## Pitzer College FYS Placement Program

__Authors: Brian Simpkins and Professor Sarah Gilman__

__Written in the fall of 2022__

In [1]:
# First let's set up some imports
import numpy as np
import pandas as pd
import unicodedata as ud
import math
from random import sample

In [29]:
# Now let's collect our data
student_data = "C:/Users/Brian/Documents/2022Fall/FYSplacement/data/FYS Fall 2021 Results - Active Commits 20210719-171403_deidentified.csv"
df = pd.read_csv(student_data)

# Separate forms by completion
completed_forms = df[df["Completed Form"] != "No"]
incomplete_forms = df[df["Completed Form"] != "Yes"]
num_students = len(completed_forms)

# Remove df to free memory
del df

In [30]:
# Convert all classes to normalized unicode
completed_forms["Preference 1"] = [ud.normalize("NFKC", x) for x in completed_forms["Preference 1"]]
completed_forms["Preference 2"] = [ud.normalize("NFKC", x) for x in completed_forms["Preference 2"]]
completed_forms["Preference 3"] = [ud.normalize("NFKC", x) for x in completed_forms["Preference 3"]]
completed_forms["Preference 4"] = [ud.normalize("NFKC", x) for x in completed_forms["Preference 4"]]
completed_forms["Preference 5"] = [ud.normalize("NFKC", x) for x in completed_forms["Preference 5"]]

In [4]:
# Gather a list of all classes
classes = set()
classes.update(completed_forms["Preference 1"])
classes.update(completed_forms["Preference 2"])
classes.update(completed_forms["Preference 3"])
classes.update(completed_forms["Preference 4"])
classes.update(completed_forms["Preference 5"])

In [5]:
# Determine class size
large_class_size = math.ceil(num_students / len(classes))
small_class_size = large_class_size - 1
small_class_num = len(classes) * large_class_size - num_students

In [6]:
# Build class - preference dictionary
class_pref = {x: {"1":[], "n1":0, "2":[], "n2":0, "3":[], "n3":0, "4":[], "n4":0, "5":[], "n5":0} for x in classes}

for i in range(len(completed_forms["CX ID"])):
    # add student id to class_pref dict
    class_pref[completed_forms["Preference 1"].iloc[i]]["1"].append(completed_forms["CX ID"].iloc[i])
    class_pref[completed_forms["Preference 1"].iloc[i]]["n1"] += 1
    class_pref[completed_forms["Preference 2"].iloc[i]]["n2"] += 1
    class_pref[completed_forms["Preference 2"].iloc[i]]["2"].append(completed_forms["CX ID"].iloc[i])
    class_pref[completed_forms["Preference 3"].iloc[i]]["n3"] += 1
    class_pref[completed_forms["Preference 3"].iloc[i]]["3"].append(completed_forms["CX ID"].iloc[i])
    class_pref[completed_forms["Preference 4"].iloc[i]]["n4"] += 1
    class_pref[completed_forms["Preference 4"].iloc[i]]["4"].append(completed_forms["CX ID"].iloc[i])
    class_pref[completed_forms["Preference 5"].iloc[i]]["n5"] += 1
    class_pref[completed_forms["Preference 5"].iloc[i]]["5"].append(completed_forms["CX ID"].iloc[i])

# Convert to dataframe
class_pref = pd.DataFrame(class_pref).T

In [7]:
# Determine popularity at given preference depth
class_pref["top2"] = class_pref["n1"] + class_pref["n2"]
class_pref["top3"] = class_pref["top2"] + class_pref["n3"]
class_pref["top4"] = class_pref["top3"] + class_pref["n4"]
class_pref["top5"] = class_pref["top4"] + class_pref["n5"]

class_pref = class_pref.sort_values(["top3"])

In [8]:
# If there aren't enough students to fill a class, reorder and try again!
def place_students(class_preferences):

    num_small_classes = small_class_num

    student_assignments = {}

    # Iterate through the classes, starting with the least popular
    for class_num, curr_class in enumerate(class_preferences.index):

        # Determine the class size - less popular classes will have the smaller class sizes
        if num_small_classes > 0:
            curr_size = small_class_size
            num_small_classes -= 1
        else:
            curr_size = large_class_size

        # Give open spots to students who requested, giving greater weight to higher preference
        curr_preference = 1
        while curr_size > 0 and curr_preference <= 5:
            # Get all students with the current preference level
            curr_students = class_preferences.loc[curr_class][str(curr_preference)]
            # Remove students who have already been placed
            curr_students = [student for student in curr_students if student not in student_assignments]
            # If there are more students at this preference level than we are looking for
            if len(curr_students) > curr_size:
                curr_students = sample(curr_students, curr_size)
            
            for student in curr_students:
                # Assign student
                student_assignments[student] = curr_class
                # Fill one slot
                curr_size -= 1
            
            curr_preference += 1
        
        # If we went through all preferences and the class still isn't filled, swap the class up and try again!
        if curr_size > 0:
            print("Class " + curr_class + " cannot be filled. Swapping up and trying again!")
            class_preferences = class_preferences.iloc[np.r_[0: class_num - 1, class_num, class_num - 1, class_num + 1 : len(class_preferences)]]
            return place_students(class_preferences)
    
    return student_assignments


In [9]:
# get a score for how "happy" people are in their classes
def get_score(student_assignments):
    total = 0
    worst_placement = 0

    for student, course in student_assignments.items():
        prefs = completed_forms.loc[completed_forms["CX ID"] == student].values.tolist()[0][-5:]
        assigned_class = student_assignments[student]
        total += prefs.index(assigned_class)

        worst_placement = max(worst_placement, prefs.index(assigned_class))
    
    ave_happiness = total / num_students
    return ave_happiness, worst_placement

In [10]:
# get a really good student assignment
min_student_assignments = {}
min_student_happiness = 5
min_worst_placement = 5

# iterate 500 times, make sure everyone is happy!
for i in range(100):
    student_assigments = place_students(class_pref.copy())
    happiness, worst_placement = get_score(student_assigments)
    if worst_placement <= min_worst_placement and happiness <= min_student_happiness:
        min_student_assignments = student_assigments
        min_worst_placement = worst_placement
        min_student_happiness = happiness

Class Borowski, Thomas - "Is There a Science of Dreaming?" cannot be filled. Swapping up and trying again!
Class Guillermo, Steffanie - "Social Identity" cannot be filled. Swapping up and trying again!
Class Borowski, Thomas - "Is There a Science of Dreaming?" cannot be filled. Swapping up and trying again!
Class Guillermo, Steffanie - "Social Identity" cannot be filled. Swapping up and trying again!
Class Borowski, Thomas - "Is There a Science of Dreaming?" cannot be filled. Swapping up and trying again!
Class Guillermo, Steffanie - "Social Identity" cannot be filled. Swapping up and trying again!
Class Guillermo, Steffanie - "Social Identity" cannot be filled. Swapping up and trying again!
Class Borowski, Thomas - "Is There a Science of Dreaming?" cannot be filled. Swapping up and trying again!
Class Guillermo, Steffanie - "Social Identity" cannot be filled. Swapping up and trying again!
Class Guillermo, Steffanie - "Social Identity" cannot be filled. Swapping up and trying again!
Cl

In [27]:
completed_forms.columns[-5:]

Index(['Preference 1', 'Preference 2', 'Preference 3', 'Preference 4',
       'Preference 5'],
      dtype='object')

In [33]:
output = completed_forms.drop(columns = completed_forms.columns[-5:])
new_col = output["CX ID"].apply(lambda x : student_assigments[x])
output.insert(1, "Assignment", new_col)
output.to_csv(".", index = False)

PermissionError: [Errno 13] Permission denied: '.'