# Schedule Model 1/11/18

In [152]:
# SCIP Model Interface
from pyscipopt import Model, quicksum
import numpy as np

At the heart of every model is a "Model" instance. It has methods to add variables (which we can specify as binary), add constraints, specify an objective, and, finally, optimize.

In [153]:
m = Model("Schedule") # Model name is optional

Now lets focus on using our data. We have sets of Students and courses; parameters for the first, second, and third choice schedules; and a one dimensional parameter for the max enrollment of each course. Lets consider some fake test data:

In [154]:
# use dictionary to map students to a number
STUDENTS = {}
for i in range(1, 6):
    student = "s" + str(i)
    STUDENTS[i] = student

In [155]:
# use dictionary to map courses to a number
COURSES = {}
for i in range(1, 13):
    course = "c" + str(i)
    COURSES[i] = course

In this example, we have 12 courses, which we will say is divided into three periods, so there are 4 courses per period.

In [156]:
# function to generate fake preferences for a student
def gen_pref(num_courses, num_periods):
    """returns a 1xnum_courses array with num_courses/num_peridos 1's
    assumes that courses are ordered by periods, e.g., the first 3 courses
    are in the first, period, next three in the second period, etc. """
    
    if num_courses % num_periods != 0:
        raise Exception("Must have correct divisibility")
    
    num_coureses = int(num_courses)
    num_periods = int(num_periods) # ensure all ints
    per_period = int(num_courses/num_periods)
    
    p = []
    for i in range(num_periods):
        x = np.zeros(per_period)
        x[np.random.randint(per_period)] = 1
        x = x.tolist()
        p.append(x)
    
    p = np.array(p).flatten().tolist()
    p = [int(i) for i in p]
    return p
            

In [157]:
# Schedule1 parameter is indexed over students (rows) and courses (columns)
np.random.seed(1)

NUM_PERIODS = 3
Schedule1 = np.zeros([len(STUDENTS), len(COURSES)])
for i in range(len(STUDENTS)):
    Schedule1[i] = gen_pref(len(COURSES), NUM_PERIODS)
    
Schedule2 = np.zeros([len(STUDENTS), len(COURSES)])
for i in range(len(STUDENTS)):
    Schedule2[i] = gen_pref(len(COURSES), NUM_PERIODS)
    
Schedule3 = np.zeros([len(STUDENTS), len(COURSES)])
for i in range(len(STUDENTS)):
    Schedule3[i] = gen_pref(len(COURSES), NUM_PERIODS)

# Re-Do With Justina's Real Data

In [158]:
import pandas as pd

In [159]:
s1 = pd.read_csv("Resources/FirstChoiceBinary.csv")
s1.head()

Unnamed: 0.1,Unnamed: 0,P1_7th-8th Grade Art,P1_Advanced Algebra & Trigonometry,P1_Algebra A,P1_Constitutional Law/Government,P1_Evolutionary Biology,P1_Facing History,P1_French B,P1_HS English (TBA),P1_HS PE,...,P8_HS PE,P8_In-Depth Spanish,P8_Latin American Studies,P8_MS Chemistry,P8_MS English & Social Studies,P8_MS PE,P8_Other,P8_PE,P8_Probability & Statistics,P8_US History & Lit: The 20th Century
0,0,0,0,0.0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0.0,0,1
1,1,0,0,0.0,0,0,0,0,0,1,...,0,0,1,0,0,0,0,0.0,0,0
2,2,1,0,0.0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0.0,0,0
3,4,0,0,0.0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0.0,0,0
4,5,0,0,0.0,0,0,0,1,0,0,...,0,0,0,1,0,0,0,0.0,0,0


In [160]:
# Make COURSES dictionary
COURSES = {}
for course in s1.columns[1:]:
    COURSES[course] = course

Notice, that here students are only identified by a number, which is part of a simple sequence, so the dicionary is not especially usefull here

In [161]:
# Make STUDENTS dictionary
STUDENTS = {}
for student in s1.iloc[:,0]:
    STUDENTS[student] = student

Now we can use the meat of the matrix to fill in the schedule parameters

In [162]:
Schedule1 = np.zeros([s1.shape[0], s1.shape[1]-1])
for i in range(s1.shape[0]):
    Schedule1[i] = s1.iloc[i,1:].tolist()

# Lets look at the other files

In [163]:
s2 = pd.read_csv("Resources/SecondChoiceBinary.csv")
s3 = pd.read_csv("Resources/ThirdChoiceBinary.csv")

In [164]:
# Verify everything is the same:
if (len(s1.columns) == len(s2.columns)) and len(s2.columns) == len(s3.columns):
    print("Same number of columns")


Same number of columns


In [165]:
s3.shape
print(s2.columns == s3.columns)

[ True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True  True  True  True  True
  True  True  True  True  True  True  True  True]


In [166]:
not_matching = []
for i in range(len(s1.columns)):
    if s1.columns[i] != s2.columns[i] or s2.columns[i] != s3.columns[i] or s1.columns[i] != s3.columns[i]:
        not_matching.append(i)
if len(not_matching) == 0:
    print("Columns Match")
else:
    print("The following indicies do not match")
    print(not_matching)

Columns Match


In [167]:
Schedule2 = np.zeros([s2.shape[0], s2.shape[1]-1])
for i in range(s2.shape[0]):
    Schedule2[i] = s2.iloc[i,1:].tolist()
    
Schedule3 = np.zeros([s3.shape[0], s3.shape[1]-1])
for i in range(s3.shape[0]):
    Schedule3[i] = s3.iloc[i,1:].tolist()

In [168]:
# Max class size
#MAX_SIZE = [1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 2]
#MAX_SIZE = [2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2]
#MAX_SIZE = 24
course_sizes = pd.read_csv("Resources/CourseSize.csv")

In [169]:
course_names = course_sizes['0']
sizes = course_sizes['Size']

In [170]:
MAX_SIZES = {}
for i in range(len(course_names)):
    MAX_SIZES[course_names[i]] = sizes[i]

In [171]:
len(MAX_SIZES) #len(COURSES)

91

In [172]:
# Change max sizes
for c in MAX_SIZES:
    if "Other" in c:
        MAX_SIZES[c] = 100
    else:
        MAX_SIZES[c] = 25

The above is all the data we need, now lets specify the variables in the model. We can keep track of the variables using a dictionary. Using the model instance, we can add variables with "m.addVar()". This returns a pointer to the variable, which we will add to the dictionary. We require the variable to be binary by includingt he option "vtype='B'".

Our variables is indexed over student and "preference" so each student has three variables, one for each schedule, indicating the schedule to which they are assigned.

In [173]:
X = {} # variable dictionary
for i in range(1, len(STUDENTS)+1):
    for j in [1,2,3]:
        name = str(STUDENTS[i]) + " pref" + str(j)
        X[i,j] = m.addVar(name, vtype='B')

Now we can focus on adding the constraints:
$$ \sum_{s \in S} x_{s,1}S1_{i, c} + x_{s,2}S2_{i,c} + x_{s,3}S3[i,c] <= max_c\qquad \forall c \in C$$
which enforces that the capcity for each class is not exceed. To do this sum we can use the pyscipopt function "quicksum".

Lastly, we need to ensure that a student is only assigned to one schedule:
$$ x_{s, 1} + x_{s,2} + x_{s, 3} = 1 \qquad \forall s \in S $$


In [174]:
# Capacity Constraint
for c in range(len(COURSES)):
    m.addCons(quicksum(X[s,1]*Schedule1[s-1,c] + X[s,2]*Schedule2[s-1,c] + X[s,3]*Schedule3[s-1,c]
                       for s in range(1, len(STUDENTS)+1)) <= MAX_SIZES)

NotImplementedError: 

In [175]:
# Capacity Constraint with Different Max sizes
for c in range(len(COURSES)):
    m.addCons(quicksum(X[s,1]*Schedule1[s-1,c] + X[s,2]*Schedule2[s-1,c] + X[s,3]*Schedule3[s-1,c]
                       for s in range(1, len(STUDENTS)+1)) <= MAX_SIZES[COURSES[c+1]])

## NOTE in the above, differenent from the test model, we are using a constant max class size.

In [176]:
# Assignment Constraint
for s in range(len(STUDENTS)):
    m.addCons(X[s+1,1] + X[s+1,2] + X[s+1,3] == 1)

Lastly, we can add the objective to the model:
$$ \text{max}_x \sum_{s \in S} 3x_{s,1} + 2x_{s,2} + x_{s,3} $$

# Add Grades

In [189]:
gdf = pd.read_csv("Resources/Grades.csv", header=None)

In [190]:
key = gdf[gdf.columns[0]]
grades = gdf[gdf.columns[1]]
GRADES = {}
i = 0
for s in STUDENTS:
    GRADES[s] = grades[i] # doing this key so that it matches with STUDENTS
    i += 1

In [192]:
len(key)

246

In [193]:
# Make another dictionary, that maps seniors and eigth graders to 2 and all else to zero
GRADE_MAP = {}
for s in GRADES:
    if GRADES[s] == 8 or GRADES[s] == 12:
        GRADE_MAP[s] = 2
    else:
        GRADE_MAP[s] = 0

In [194]:
# Objective
m.setObjective(quicksum(3*X[s,1] + 2*X[s,2] + X[s,3] for s in STUDENTS) +
                   quicksum(GRADE_MAP[s]*X[s,1] for s in STUDENTS), "maximize")

Now that we have fully set up the model, we can solve it, and attempt to extract the results.

In [195]:
# Solve/Optimize
m.optimize()

In [196]:
# Take a look at solution
if m.getStatus() == "optimal":
    for i in range(1, len(STUDENTS) +1):
        for j in [1,2,3]:
            v = m.getVal(X[i,j])
            if v == 1:
                print("Student", str(i), "is assigned to schedule", str(j))
else:
    print("We have not found an optimal solution\n the problem is", m.getStatus())

Student 1 is assigned to schedule 2
Student 2 is assigned to schedule 1
Student 3 is assigned to schedule 3
Student 4 is assigned to schedule 1
Student 5 is assigned to schedule 1
Student 6 is assigned to schedule 1
Student 7 is assigned to schedule 1
Student 8 is assigned to schedule 1
Student 9 is assigned to schedule 1
Student 10 is assigned to schedule 1
Student 11 is assigned to schedule 1
Student 12 is assigned to schedule 2
Student 13 is assigned to schedule 1
Student 14 is assigned to schedule 1
Student 15 is assigned to schedule 1
Student 16 is assigned to schedule 1
Student 17 is assigned to schedule 1
Student 18 is assigned to schedule 2
Student 19 is assigned to schedule 1
Student 20 is assigned to schedule 1
Student 21 is assigned to schedule 1
Student 22 is assigned to schedule 1
Student 23 is assigned to schedule 1
Student 24 is assigned to schedule 1
Student 25 is assigned to schedule 1
Student 26 is assigned to schedule 1
Student 27 is assigned to schedule 1
Student 28