## Pitzer College FYS Placement Program

__Authors: Brian Simpkins and Professor Sarah Gilman__

__Written in the fall of 2022__

In [None]:
# First let's set up some imports
import numpy as np
import pandas as pd
import unicodedata as ud
import math
from random import sample
import tqdm

In [None]:
# Now let's collect our data
student_data = "C:/Users/Brian/Documents/2022Fall/FYSplacement/data/FYS Fall 2022 Results - Active Commits 20220712-180011_deidentified.csv"
df = pd.read_csv(student_data)

# Separate forms by completion
completed_forms = df[df["Completed Form"] != "No"]
incomplete_forms = df[df["Completed Form"] != "Yes"]
num_students = len(df)

# Remove df to free memory
del df

In [None]:
# Convert all classes to normalized unicode
completed_forms["Preference 1"] = [ud.normalize("NFKC", x) for x in completed_forms["Preference 1"]]
completed_forms["Preference 2"] = [ud.normalize("NFKC", x) for x in completed_forms["Preference 2"]]
completed_forms["Preference 3"] = [ud.normalize("NFKC", x) for x in completed_forms["Preference 3"]]
completed_forms["Preference 4"] = [ud.normalize("NFKC", x) for x in completed_forms["Preference 4"]]
completed_forms["Preference 5"] = [ud.normalize("NFKC", x) for x in completed_forms["Preference 5"]]

In [None]:
# Gather a list of all classes
classes = set()
classes.update(completed_forms["Preference 1"])
classes.update(completed_forms["Preference 2"])
classes.update(completed_forms["Preference 3"])
classes.update(completed_forms["Preference 4"])
classes.update(completed_forms["Preference 5"])

In [None]:
# Determine class size
large_class_size = math.ceil(num_students / len(classes))
small_class_size = large_class_size - 1
small_class_num = len(classes) * large_class_size - num_students

In [None]:
# Build class - preference dictionary
class_pref = {x: {"1":[], "n1":0, "2":[], "n2":0, "3":[], "n3":0, "4":[], "n4":0, "5":[], "n5":0} for x in classes}

for i in range(len(completed_forms["CX ID"])):
    # add student id to class_pref dict
    class_pref[completed_forms["Preference 1"].iloc[i]]["1"].append(completed_forms["CX ID"].iloc[i])
    class_pref[completed_forms["Preference 1"].iloc[i]]["n1"] += 1
    class_pref[completed_forms["Preference 2"].iloc[i]]["n2"] += 1
    class_pref[completed_forms["Preference 2"].iloc[i]]["2"].append(completed_forms["CX ID"].iloc[i])
    class_pref[completed_forms["Preference 3"].iloc[i]]["n3"] += 1
    class_pref[completed_forms["Preference 3"].iloc[i]]["3"].append(completed_forms["CX ID"].iloc[i])
    class_pref[completed_forms["Preference 4"].iloc[i]]["n4"] += 1
    class_pref[completed_forms["Preference 4"].iloc[i]]["4"].append(completed_forms["CX ID"].iloc[i])
    class_pref[completed_forms["Preference 5"].iloc[i]]["n5"] += 1
    class_pref[completed_forms["Preference 5"].iloc[i]]["5"].append(completed_forms["CX ID"].iloc[i])

# Convert to dataframe
class_pref = pd.DataFrame(class_pref).T

In [None]:
# Determine popularity at given preference depth
class_pref["top2"] = class_pref["n1"] + class_pref["n2"]
class_pref["top3"] = class_pref["top2"] + class_pref["n3"]
class_pref["top4"] = class_pref["top3"] + class_pref["n4"]
class_pref["top5"] = class_pref["top4"] + class_pref["n5"]

class_pref = class_pref.sort_values(["top5"])

In [None]:
# Add placed students to a set that keeps track of students who can't be placed anymore, and their assigned class
student_assignments = {}

# If our ordering of classes means one can't be filled, swap it with the one above it
def swap_up(df, i1):
    # One row above
    i2 = i1 - 1

    row1 = df.iloc[i1].copy()
    row2 = df.iloc[i2].copy()

    df.iloc[i1] = row2
    df.iloc[i2] = row1

    return df

# We probably should remember classes that we had to swap to the top. 
# If we see the same one twice then it can't be filled properly.

# If there aren't enough students to fill a class, reorder and try again!
def place_students(class_preferences):

    num_small_classes = small_class_num

    student_assignments.clear()

    # Iterate through the classes, starting with the least popular
    for class_num, curr_class in enumerate(tqdm.tqdm(class_preferences.index)):

        # Determine the class size - less popular classes will have the smaller class sizes
        if num_small_classes > 0:
            curr_size = small_class_size
            num_small_classes -= 1
        else:
            curr_size = large_class_size

        # Give open spots to students who requested, giving greater weight to higher preference
        curr_preference = 1
        while curr_size > 0 and curr_preference <= 5:
            # Get all students with the current preference level
            curr_students = class_preferences.loc[curr_class][str(curr_preference)]
            # Remove students who have already been placed
            curr_students = [student for student in curr_students if student not in student_assignments]
            # If there are more students at this preference level than we are looking for
            if len(curr_students) > curr_size:
                curr_students = sample(curr_students, curr_size)
            
            for student in curr_students:
                # Assign student
                student_assignments[student] = curr_class
                # Fill one slot
                curr_size -= 1
            
            curr_preference += 1
        
        # If we went through all preferences and the class still isn't filled
        while curr_size > 0:
            # Iterate through remaining students
            
            # Check student's preferences
            return
            # If one of their preferred classes contains a student that wants this class, substitute the students
        
place_students(class_pref.copy())


In [None]:
df = class_pref.copy()
i1 = 16

i2 = i1 - 1

df = df.iloc[np.r_[0:i2, i1, i2, i1:(len(df)-1)]]