# Schedule Model 1/11/18

In [10]:
# SCIP Model Interface
from pyscipopt import Model, quicksum
import numpy as np

At the heart of every model is a "Model" instance. It has methods to add variables (which we can specify as binary), add constraints, specify an objective, and, finally, optimize.

In [11]:
m = Model("Schedule") # Model name is optional

Now lets focus on using our data. We have sets of Students and courses; parameters for the first, second, and third choice schedules; and a one dimensional parameter for the max enrollment of each course. Lets consider some fake test data:

In [12]:
# use dictionary to map students to a number
STUDENTS = {}
for i in range(1, 6):
    student = "s" + str(i)
    STUDENTS[i] = student

In [13]:
# use dictionary to map courses to a number
COURSES = {}
for i in range(1, 13):
    course = "c" + str(i)
    COURSES[i] = course

In this example, we have 12 courses, which we will say is divided into three periods, so there are 4 courses per period.

In [14]:
# function to generate fake preferences for a student
def gen_pref(num_courses, num_periods):
    """returns a 1xnum_courses array with num_courses/num_peridos 1's
    assumes that courses are ordered by periods, e.g., the first 3 courses
    are in the first, period, next three in the second period, etc. """
    
    if num_courses % num_periods != 0:
        raise Exception("Must have correct divisibility")
    
    num_coureses = int(num_courses)
    num_periods = int(num_periods) # ensure all ints
    per_period = int(num_courses/num_periods)
    
    p = []
    for i in range(num_periods):
        x = np.zeros(per_period)
        x[np.random.randint(per_period)] = 1
        x = x.tolist()
        p.append(x)
    
    p = np.array(p).flatten().tolist()
    p = [int(i) for i in p]
    return p
            

In [15]:
# Schedule1 parameter is indexed over students (rows) and courses (columns)
np.random.seed(1)

NUM_PERIODS = 3
Schedule1 = np.zeros([len(STUDENTS), len(COURSES)])
for i in range(len(STUDENTS)):
    Schedule1[i] = gen_pref(len(COURSES), NUM_PERIODS)
    
Schedule2 = np.zeros([len(STUDENTS), len(COURSES)])
for i in range(len(STUDENTS)):
    Schedule2[i] = gen_pref(len(COURSES), NUM_PERIODS)
    
Schedule3 = np.zeros([len(STUDENTS), len(COURSES)])
for i in range(len(STUDENTS)):
    Schedule3[i] = gen_pref(len(COURSES), NUM_PERIODS)

# Re-Do With Justina's Real Data

In [16]:
import pandas as pd

In [46]:
s1 = pd.read_csv("FirstChoiceBinary.csv")
s1.head()

Unnamed: 0.1,Unnamed: 0,P17th-8th Grade Art,P1Advanced Algebra & Trigonometry,P1Constitutional Law/Government,P1Evolutionary Biology,P1Facing History,P1French B,P1HS English,P1HS English (TBA),P1HS PE,...,Period 8 (First Choice)_French A,Period 8 (First Choice)_HS PE,Period 8 (First Choice)_In-Depth Spanish,Period 8 (First Choice)_Latin American Studies,Period 8 (First Choice)_MS Chemistry,Period 8 (First Choice)_MS English & Social Studies,Period 8 (First Choice)_MS PE,Period 8 (First Choice)_Other,Period 8 (First Choice)_Probability & Statistics,Period 8 (First Choice)_US History & Lit: The 20th Century
0,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,1,0,0,0,0,0,0,0,0,1,...,0,0,0,1,0,0,0,0,0,0
2,2,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,5,0,0,0,0,0,1,0,0,0,...,0,0,0,0,1,0,0,0,0,0


In [18]:
# Make COURSES dictionary
COURSES = {}
i = 1
for course in s1.columns[1:]:
    COURSES[i] = course
    i += 1

Notice, that here students are only identified by a number, which is part of a simple sequence, so the dicionary is not especially usefull here

In [26]:
# Make STUDENTS dictionary
STUDENTS = {}
i = 1
for student in s1.iloc[:,0]:
    STUDENTS[i] = student
    i += 1

Now we can use the meat of the matrix to fill in the schedule parameters

In [52]:
Schedule1 = np.zeros([s1.shape[0], s1.shape[1]-1])
for i in range(s1.shape[0]):
    Schedule1[i] = s1.iloc[i,1:].tolist()

# Lets look at the other files

In [57]:
s2 = pd.read_csv("SecondChoiceBinary.csv")
s3 = pd.read_csv("ThirdChoiceBinary.csv")

In [60]:
# Verify everything is the same:
if len(s1.columns) == len(s2.columns) == len(s3.columns):
    print("Same number of columns")

if s1.columns == s2.columns == s3.columns:
    print("The columns are in the same order")

94

In [None]:
Schedule2 = np.zeros([s2.shape[0], s2.shape[1]-1])
for i in range(s2.shape[0]):
    Schedule2[i] = s2.iloc[i,1:].tolist()
    
Schedule3 = np.zeros([s3.shape[0], s3.shape[1]-1])
for i in range(s3.shape[0]):
    Schedule3[i] = s3.iloc[i,1:].tolist()

In [11]:
# Max class size
#MAX_SIZE = [1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2]
#MAX_SIZE = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
MAX_SIZE = 30

The above is all the data we need, now lets specify the variables in the model. We can keep track of the variables using a dictionary. Using the model instance, we can add variables with "m.addVar()". This returns a pointer to the variable, which we will add to the dictionary. We require the variable to be binary by includingt he option "vtype='B'".

Our variables is indexed over student and "preference" so each student has three variables, one for each schedule, indicating the schedule to which they are assigned.

In [8]:
X = {} # variable dictionary
for i in range(1, len(STUDENTS)+1):
    for j in [1,2,3]:
        name = STUDENTS[i] + " pref" + str(j)
        X[i,j] = m.addVar(name, vtype='B')

Now we can focus on adding the constraints:
$$ \sum_{s \in S} x_{s,1}S1_{i, c} + x_{s,2}S2_{i,c} + x_{s,3}S3[i,c] <= max_c\qquad \forall c \in C$$
which enforces that the capcity for each class is not exceed. To do this sum we can use the pyscipopt function "quicksum".

Lastly, we need to ensure that a student is only assigned to one schedule:
$$ x_{s, 1} + x_{s,2} + x_{s, 3} = 1 \qquad \forall s \in S $$


In [12]:
# Capacity Constraint
for c in range(len(COURSES)):
    m.addCons(quicksum(X[s,1]*Schedule1[s-1,c] + X[s,2]*Schedule2[s-1,c] + X[s,3]*Schedule3[s-1,c]
                       for s in range(1, len(STUDENTS)+1)) <= MAX_SIZE)

## NOTE in the above, differenent from the test model, we are using a constant max class size.

In [13]:
# Assignment Constraint
for s in range(len(STUDENTS)):
    m.addCons(X[s+1,1] + X[s+1,2] + X[s+1,3] == 1)

Lastly, we can add the objective to the model:
$$ \text{max}_x \sum_{s \in S} 3x_{s,1} + 2x_{s,2} + x_{s,3} $$

In [14]:
# Objective
m.setObjective(quicksum(3*X[s,1] + 2*X[s,2] + X[s,1] for s in STUDENTS), "maximize")

Now that we have fully set up the model, we can solve it, and attempt to extract the results.

In [15]:
# Solve/Optimize
m.optimize()

In [18]:
# Take a look at solution
if m.getStatus() == "optimal":
    for i in range(1, len(STUDENTS) +1):
        for j in [1,2,3]:
            v = m.getVal(X[i,j])
            if v == 1:
                print("Student", str(i), "is assigned to schedule", str(j))
else:
    print("We have not found an optimal solution\n the problem is", m.getStatus())

Student 1 is assigned to schedule 1
Student 2 is assigned to schedule 2
Student 3 is assigned to schedule 1
Student 4 is assigned to schedule 1
Student 5 is assigned to schedule 1
