A number of students and a number of projects. 

Any student can be assigned any project, incurring some cost that may vary depending on the student-project assignment. 

Number of projects > number of students

A project with N students permitted is represented as N seperate projects, each with equal ranking by any student who chooses that project. 

In [109]:
import numpy as np
import random
from scipy.optimize import linear_sum_assignment
import csv

# Student Project Assignment

In [110]:
n_students = 10
n_projects = 15
n_choices = 5

In [111]:


def build_cost_matrix(n_students, n_projects):
    """
    Creates a student project selection form
    """
    students = list(range(n_students))
    projects = list(range(n_projects))

    cost_matrix = []
    
    for s in students:
        selections = [10 for p in projects] # assigns a suitably high number to all unselected projects
        
        # selects n ranked choices per student for projects selected randomly from a weighted probability distribution
        for n in range(1, n_choices+1):
            
            element = random.choices(np.arange(n_projects), weights=np.linspace(0, 1, n_projects))
            
            selections[element[0]] = n
            
        # repeat the last two entries to represent two projects that can each accept 2 students 
        selections = selections + selections[-2:] 
            
        cost_matrix.append(selections)
    
    return np.array(cost_matrix)

Build student preference matrix

In [112]:
# # build student project selection form using random values
# cost_matrix = build_cost_matrix(n_students, n_projects)                      

# build student project selection form by importing
with open(f'data/student_preference_matrices/student_preference_matrix_0.csv', 'r') as f:   
        
        cost_matrix = csv.reader(f)
        
        cost_matrix = [[int(i) for i in j] for j in cost_matrix]            # convert from string
        
        cost_matrix = [[10 if i==0 else i for i in j] for j in cost_matrix] # replace zeros with 10
        
        cost_matrix = [j+j[-2:] for j in cost_matrix]                       # repeat last two entries to simulate 2 projects with student capacity 2
        
        cost_matrix = np.array(cost_matrix)
        

print('\ncost matrix\n', np.matrix(cost_matrix))


cost matrix
 [[10 10 10  5 10 10 10 10  4 10 10 10  1 10  3 10  3]
 [10 10 10 10 10 10 10 10 10 10  1  4 10  3  5  3  5]
 [10 10 10 10  2 10 10 10 10  1  4  5 10 10 10 10 10]
 [10 10 10 10 10 10 10  2 10 10 10  1 10  5  4  5  4]
 [10  4 10 10 10  5 10  3 10 10 10 10 10  2 10  2 10]
 [10 10  3 10 10  5 10 10 10  1  2 10 10 10  4 10  4]
 [10 10 10 10 10 10 10  3 10  1 10  4  2  5 10  5 10]
 [10 10  2 10 10 10 10  3 10  4 10  1 10  5 10  5 10]
 [10 10 10 10  2 10 10 10  1 10 10  5  4 10  3 10  3]
 [10 10 10 10 10 10 10 10  2  3 10  4 10  5  1  5  1]]


Find solution

In [113]:
studentID, projectID = linear_sum_assignment(cost_matrix) # find optimised assignment

Display solution

In [114]:
assign_matrix = np.zeros(cost_matrix.shape)    # create an array to display assignments

for s, p in zip(studentID, projectID):         # mark each assignment with a 1
    assign_matrix[s, p] = 1
    
# check every student has a project
print('\ntotal_projects_assigned=',np.sum(assign_matrix), '\nequal to number of students=', np.sum(assign_matrix)==n_students)


total_projects_assigned= 10.0 
equal to number of students= True


In [115]:
assign_matrix *= cost_matrix                                     # matrix showing cost of each assignment   

ranks = assign_matrix[np.nonzero(assign_matrix)]

print('\nassign matrix\n',np.matrix(np.absolute(assign_matrix)))
print('\nstudents', studentID)
print('project ID (this model)', projectID)
print('project rank', ranks)
print('\n\nsum of all assignments (should be as low as possible)=', ranks.sum())


assign matrix
 [[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 2. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 2. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 3. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 2. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0.]]

students [0 1 2 3 4 5 6 7 8 9]
project ID (this model) [12 10  4 11 13  9  7  2  8 14]
project rank [1. 1. 2. 1. 2. 1. 3. 2. 1. 1.]


sum of all assignments (should be as low as possible)= 15.0
