In [None]:
Generates n instances of a basic students project selection matrix and saves each as a csv file. 

Students may rank 5 selected projects. 

All unselected projects are ranked 0 unless otherwise specified. 

In [8]:
import numpy as np
import random
from scipy.optimize import linear_sum_assignment
import csv

In [9]:
n_students = 10
n_projects = 15
n_choices = 5

In [10]:
def build_cost_matrix(n_students, n_projects, unselected=0):
    """
    Creates a student project selection form
    Unselected projects indicated by zero by default
    """
    students = list(range(n_students))
    projects = list(range(n_projects))

    cost_matrix = []
    
    for s in students:
        selections = [unselected for p in projects] # assigns a suitably high number to all unselected projects
        
        # selects n ranked choices per student for projects selected randomly from a weighted probability distribution
        for n in range(1, n_choices+1):
            
            element = random.choices(np.arange(n_projects), weights=np.linspace(0, 1, n_projects))
            
            selections[element[0]] = n
            
        # repeat the last two entries to represent two projects that can each accept 2 students 
        # selections = selections + selections[-2:] 
            
        cost_matrix.append(selections)
    
    return np.array(cost_matrix)

In [11]:
def build_ideal_cost_matrix(n_students, n_projects):
    
    """
    Creates an ideal student project selection form where each 
    student has uniquely ranked their first choice 1
    Unselected projects indicated by zero
    """

    A = np.identity(n_students, dtype=int)                          # ensure all students have uniquely chosen first choice

    B = np.zeros((n_students, n_projects-n_students), dtype=int)    # add extra projects

    cost_matrix = np.hstack((A,B))                                  # combine to form project selection matrix

    cost_matrix=cost_matrix.tolist()

    for i in range(len(cost_matrix)):

        # selects n ranked choices per student 
        for n in range(2, n_choices+1):

            element = random.choices(np.arange(n_projects), weights=np.linspace(0, 1, n_projects))

            while cost_matrix[i][element[0]]>0:
                element = random.choices(np.arange(n_projects), weights=np.linspace(0, 1, n_projects))

            cost_matrix[i][element[0]]=n

Generate multiple sample data sets to use to test SPA algorithms 

In [None]:
for i in range(5):

    # build student project selection form
    cost_matrix = build_cost_matrix(n_students, n_projects)        
    
    # store form 
    with open(f'data/student_preference_matrices/student_preference_matrix_{i}.csv', 'w') as f:   
        
        w = csv.writer(f)                  # writer object

        w.writerows(cost_matrix)           # list to row

Generate an ideal data set to test SPA algorithms 

In [12]:


# build student project selection form
cost_matrix = build_ideal_cost_matrix(n_students, n_projects) 

# store form 
with open(f'data/student_preference_matrices/student_preference_matrix_ideal.csv', 'w') as f:   

    w = csv.writer(f)                  # writer object

    w.writerows(cost_matrix)           # list to row

TypeError: writerows() argument must be iterable