In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
import ast
import math
import random
import seaborn as sns


In [None]:
preferences = pd.read_csv('Fall_clinical_placements.csv')
preferences = preferences[preferences.columns[:8]].copy()

display(preferences)
def string_to_num(choice):
    if choice == 'First Choice':
        return 1
    elif choice == 'Second Choice':
        return 2
    else:
        return 3
    
preferences.mask(preferences == 'First Choice', 1 , inplace=True )
preferences.mask(preferences == 'Second Choice', 2 , inplace=True )
preferences.mask(preferences == 'Third Chice', 3 , inplace=True )
preferences.mask(preferences == 'Third Choice', 3 , inplace=True )
preferences.mask(preferences == 'third Chice', 3 , inplace=True )
preferences.mask(preferences == 'Med Surg', '[1,2]', inplace=True)
preferences.mask(preferences == 'Psych', '[2,1]', inplace=True)

print(preferences.columns)
preferences.columns = ['Name', 'r', '[0,0]', '[0,1]', '[0,2]', '[1,0]', '[1,1]', '[1,2]']


In [None]:
preferences.sort_values('Name',inplace=True)

In [None]:
# Preparing data

N = 2 # num rotations
M = 3 # num placements
num_students = preferences.shape[0]
S = np.zeros((num_students,N,M))
for col in preferences.columns[2:]: 
    # Converting string to list
    rotation_idx = ast.literal_eval(col)
    S[:,rotation_idx[0],rotation_idx[1]] = preferences[col]

r = np.zeros((num_students, 2))
for i in range(num_students):
    r[i,] = ast.literal_eval(preferences['r'][i])
    
pp = np.zeros((num_students, 1))
pp[[9,11,13,18,26]] = 1


print(f'S: {S.shape}')
print(f'r: {r.shape}')
print(f'pp: {pp.shape}')

In [None]:
def G(S, r, pp):
    S_prime = np.zeros((num_students,N,M))
    for i in range(S.shape[0]):
        S_prime_i = np.zeros((N,M))
        for row in range(N):
            for col in range(M):
                val = 0
                preference = S[i,row,col]
                # First do G()
                if preference == 1:
                    val += 0
                elif preference == 2:
                    val += (-1)
                elif preference == 3:
                    val += (-3)
                else:
                    print('Not a valid preference')
                    
                # Now add rotation ranking and preference points
                rotation_rank = r[i,row]
                if rotation_rank == 1:
                    val += 2
                    val += math.ceil(pp[i]//N)
                elif rotation_rank == 2:
                    val +=1
                    val += math.ceil((pp[i]-1)//N)
                elif rotation_rank == 3:
                    val += 0
                    val += math.ceil((pp[i]-2)//N)
                else:
                    print('Not a valid rotation rank')
                S_prime_i[row,col] = val
        S_prime[i] = S_prime_i
    return S_prime

S_prime = G(S,r,pp)

In [None]:
restrictions = np.asarray([
    [[1, 0], # Monday Med Surg (0,0)
     [], # Friday Med Surg (0,1)
     [1,2]], # Saturday Med Surg (0,2)
    [[0,0], # Monday Psych (1,0)
     [], # Thursday Psych (1,1)
     [0,2]] # Saturday Psych (1,2) 
], dtype=object)

restrictions


In [None]:
# Preparing data for algorithm

# just holds the score for a placement
preference_list=np.zeros((num_students * N * M, ), dtype=int) 

# holds metadata about placement (student_id, rotation, placement)
preference_list_meta_data = np.zeros((num_students*N*M, 3),dtype=int)  

i = 0
for k in range(num_students):
    S_prime_k = S_prime[k]
    for row in range(N):
        for col in range(M):
            preference_list_meta_data[i] = [k, row, col] 
            preference_list[i] = S_prime_k[row,col]
            i += 1              

In [None]:
# Remove saturday's for Avi

fri_sat_list = np.asarray([(5,0,1), (5,0,2), (5,1,2)])
# fri_sat_list = np.asarray([(5,0,2), (5,1,2)])

preference_mask = np.invert((
        (preference_list_meta_data == fri_sat_list[0]).all(axis=1) | 
        (preference_list_meta_data == fri_sat_list[1]).all(axis=1) | 
        (preference_list_meta_data == fri_sat_list[2]).all(axis=1)
    ))
preference_list_meta_data = preference_list_meta_data[preference_mask]
preference_list = preference_list[preference_mask]
preference_list

In [None]:
def place(preferences, preferences_meta):    
    total_placements = 0
    while total_placements < num_students*2:

        # Sort preferences in ascending order
        sort_idxs = np.argsort(preferences)
        sorted_preferences = preferences[sort_idxs]
        sorted_meta = preferences_meta[sort_idxs]

        # Sort preferences in descending order
        sorted_preferences = sorted_preferences[::-1]
        sorted_meta = sorted_meta[::-1]

        # Initializa array to keep track of placements left
        placement_count = np.zeros((N,M)) + 10
        # Initiate final placement dict
        placements = {}
        # Initialize score
        score = 0
        while sorted_preferences.shape[0] > 0:
            current_student, current_rotation, current_placement = sorted_meta[0]
            current_conflicts = restrictions[current_rotation, current_placement]


            # Check if current placement is available
            if placement_count[current_rotation, current_placement] > 0:
                # Add score
                score += sorted_preferences[0]

                # Place student in preferred location (if statement for adding new dict key or not)
                if f'[{current_rotation},{current_placement}]' in placements:
                    placements[f'[{current_rotation},{current_placement}]'].append(current_student)
                else:
                    placements[f'[{current_rotation},{current_placement}]'] = [current_student]

                # Update number of placements available
                placement_count[current_rotation, current_placement] += (-1)

                # Then remove current student from the list (for current rotation)
                removal_mask = np.invert(((sorted_meta[:,0] == current_student) & (sorted_meta[:,1] == current_rotation)))
                sorted_preferences = sorted_preferences[removal_mask]
                sorted_meta = sorted_meta[removal_mask]


                # And remove any conflicts
                if len(current_conflicts) == 2:
                    removal_mask = np.invert(((sorted_meta[:,0] == current_student) & (sorted_meta[:,1] == current_conflicts[0]) & (sorted_meta[:,2] == current_conflicts[1])))
                    sorted_preferences = sorted_preferences[removal_mask]
                    sorted_meta = sorted_meta[removal_mask]
                elif len(current_conflicts) > 2:
                    print('Too many conflicts, needs implementation')
                else:
                    continue

            else: # If placement is full
                # Remove current entry and keep going
                removal_mask = np.invert(((sorted_meta[:,0] == current_student) & (sorted_meta[:,1:2] == current_rotation)))
                sorted_preferences = sorted_preferences[1:]
                sorted_meta = sorted_meta[1:]
        total_placements = np.sum(list(map(len, placements.values())))
        if total_placements < num_students*2:
            shuffle_idxs = np.linspace(0,preferences.shape[0] - 1, preferences.shape[0]).astype(int)
            random.shuffle(shuffle_idxs)
            preferences = preferences[shuffle_idxs]
            preferences_meta = preferences_meta[shuffle_idxs]
    return placements, score
        
        
        
    

In [None]:
best_placements = None
best_score = 0
for i in tqdm(range(100)):
    # Randomize order of preferences
    shuffle_idxs = np.linspace(0,preference_list.shape[0] - 1, preference_list.shape[0]).astype(int)
    random.shuffle(shuffle_idxs)
    preferences = preference_list[shuffle_idxs]
    preferences_meta = preference_list_meta_data[shuffle_idxs]
    placements, score = place(preference_list, preference_list_meta_data)
#     print(score)
    if score > best_score:
        best_score = score
        best_placements = placements
#         print(score)


In [None]:
best_placements

In [None]:
# Now we will figure out who go their first, second and third choices
def find_which_choice(placements, S):
    med_surg_placement_ranking = []
    psych_placement_ranking = []
    for key in placements.keys():
        for student in placements[key]:
            placement_list = ast.literal_eval(key)
            if placement_list[0] == 0:
                med_surg_placement_ranking.append(S[student, placement_list[0], placement_list[1]])
            elif placement_list[0] == 1:
                psych_placement_ranking.append(S[student, placement_list[0], placement_list[1]])
            else:
                print(f'oops, {placement_list[0]} is not right!')
    return med_surg_placement_ranking, psych_placement_ranking

med_surg_placement_ranking, psych_placement_ranking = find_which_choice(best_placements, S)


            

In [None]:
unique, counts = np.unique(np.asarray(psych_placement_ranking), return_counts=True)
print(np.asarray((unique, counts)).T)

In [None]:
unique, counts = np.unique(np.asarray(med_surg_placement_ranking), return_counts=True)
print(np.asarray((unique, counts)).T)

In [None]:
# Now comparing to Joe's placements

# Load data
actual_placements = pd.read_csv('actual_fall_clinical_placements.csv')
actual_placements = actual_placements[actual_placements.columns[:-1]]
actual_placements.fillna(0, inplace=True)

# Replace strings with integer representation
actual_placements.mask(actual_placements == 'PIH', 3 , inplace=True )
actual_placements.mask(actual_placements == 'VPH', 2 , inplace=True )
actual_placements.mask(actual_placements == 'Cedars', 1 , inplace=True )
actual_placements.mask(actual_placements == 'Mon LADMC', 1 , inplace=True )
actual_placements.mask(actual_placements == 'MON LADMC', 1 , inplace=True )
actual_placements.mask(actual_placements == 'Thurs Gateways', 2 , inplace=True )
actual_placements.mask(actual_placements == 'SAT GATEWAYS', 3 , inplace=True )

# Add in student id's 
actual_placements = actual_placements.sort_values('Summer preference points').reset_index(drop=True).reset_index()
display(actual_placements.head())
student_list = np.asarray(actual_placements['index'])

# Convert to index representation
med_surg_placement = np.asarray(actual_placements['Med/Surg Choice 1'], dtype=int) - 1
psych_placement = np.asarray(actual_placements['Psych Choice 1'], dtype=int) - 1



In [None]:
med_surg_meta = np.concatenate((student_list.reshape((29,1)),np.zeros((29,1)),med_surg_placement.reshape((29,1))), axis=1)
psych_meta = np.concatenate((student_list.reshape((29,1)),np.ones((29,1)),psych_placement.reshape((29,1))), axis=1)

In [None]:
# Get med surg placement scores
ms_points_idx = np.zeros(29)
for i in range(29):
    ms_points_idx[i] = np.where((preference_list_meta_data == med_surg_meta[i]).all(axis=1))[0]
ms_points_idx = ms_points_idx.astype(int)

In [None]:
# Get psych placement scores
psych_points_idx = np.zeros(29)
for i in range(29):
    psych_points_idx[i] = np.where((preference_list_meta_data == psych_meta[i]).all(axis=1))[0]
psych_points_idx = psych_points_idx.astype(int)

In [None]:
# Total score
sum(preference_list[psych_points_idx]) + sum(preference_list[ms_points_idx])

In [None]:
# Now we will figure out who go their first, second and third choices
def find_which_choice_joe(med_surg, psych, S):
    med_surg_placement_ranking = []
    psych_placement_ranking = []
    for placement in med_surg:
        med_surg_placement_ranking.append(S[int(placement[0]), int(placement[1]), int(placement[2])])
    for placement in psych:
        psych_placement_ranking.append(S[int(placement[0]), int(placement[1]), int(placement[2])])
    return med_surg_placement_ranking, psych_placement_ranking

joe_med_surg_placement_ranking, joe_psych_placement_ranking = find_which_choice_joe(med_surg_meta, psych_meta, S)


In [None]:
print('Joes Psych Placements')
unique, counts = np.unique(np.asarray(joe_psych_placement_ranking), return_counts=True)
print(np.asarray((unique, counts)).T)

print('\n Alg Psych Placements')
unique, counts = np.unique(np.asarray(psych_placement_ranking), return_counts=True)
print(np.asarray((unique, counts)).T)

In [None]:
print('Joes Med Surg Placements')
unique, counts = np.unique(np.asarray(joe_med_surg_placement_ranking), return_counts=True)
print(np.asarray((unique, counts)).T)

print('\n Alg Med Surg Placements')
unique, counts = np.unique(np.asarray(med_surg_placement_ranking), return_counts=True)
print(np.asarray((unique, counts)).T)

In [None]:
med_surg_df = pd.DataFrame(list(zip(joe_med_surg_placement_ranking + med_surg_placement_ranking, (['Joe']*29 + ['Algorithm']*29))))
psych_df = pd.DataFrame(list(zip(joe_psych_placement_ranking + psych_placement_ranking, (['Joe']*29 + ['Algorithm']*29))))
med_surg_df.columns = ['ranking', 'label']
psych_df.columns = ['ranking', 'label']

In [None]:
sns.countplot(data=med_surg_df, x = 'ranking', hue='label', palette='colorblind')
plt.title('Psych Placement Comparison')
plt.show()

sns.countplot(data=psych_df, x = 'ranking', hue='label', palette='colorblind')
plt.title('Med Surg Placement Comparison')
plt.show()

sns.countplot(data=pd.concat([med_surg_df, psych_df]), x = 'ranking', hue='label', palette='colorblind')
plt.title('Total Placement Comparison')
plt.show()