In [None]:
'''
https://en.wikipedia.org/wiki/Assignment_problem
https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.linear_sum_assignment.html

order of lists is important (eg, student 1 before student 2). in case of draw the preferences of the first list are preferred
student1    [3, 2, 1] -> student 1 prefers topic 3, then 2 and lastly the first topic (C > B > A)

example 1:
weight_of topic1 topic2 topic3
student1    [3, 2, 1], 
student2    [1, 3, 2], 
student3    [2, 1, 3]
->
adjusted (plus one) array indices of best solution: [3 1 2]
S1 gets topic3, S2 gets topic 1, S3 gets topic 2

example 2:
not an issue, if more topics than students
[4,3,1,2], 
[2,4,1,3], 
[2,1,3,4], 
->
adjusted (plus one) array indices of best solution: [3 1 2]

example 3:
if less topics than students, and each topic is assigned more than once:
list each topic accordingly to the number it is assigned. eg, topic1 can be chosen 4 times and topic2 3 times
[1,1,1,1,2,2,2]
[2,2,2,2,1,1,1]
'''

In [101]:
import pandas as pd
import numpy as np
from scipy.optimize import linear_sum_assignment

file_location = r"C:\Users\g21728\Desktop\WS21\Übersicht_Teilnehmer_Master-Thesis.xlsx"
# file_location = r"C:\Users\g21728\Desktop\WS21\Übersicht_Teilnehmer_Bachelor-Thesis.xlsx"
# file_location = r"C:\Users\g21728\Desktop\WS21\Übersicht_Teilnehmer_Masterseminar.xlsx"
# file_location = r"C:\Users\g21728\Desktop\WS21\Übersicht_Teilnehmer_Proseminar.xlsx"

save_location = r"C:\Users\g21728\Desktop\WS21\Übersicht_Teilnehmer_Master-Thesis_assigned.xlsx"
# save_location = r"C:\Users\g21728\Desktop\WS21\Übersicht_Teilnehmer_Bachelor-Thesis_assigned.xlsx"
# save_location = r"C:\Users\g21728\Desktop\WS21\Übersicht_Teilnehmer_Masterseminar_assigned.xlsx"
# save_location = r"C:\Users\g21728\Desktop\WS21\Übersicht_Teilnehmer_Proseminar_assigned.xlsx"

df = pd.read_excel(file_location, sheet_name="Tabelle1")

df = df[df["Themen-Präferenzen"].str.contains("zurückgetreten|Rücktritt", regex=True)==False] # drop zurückgetreten students

df["Themen-Präferenzen"] = df["Themen-Präferenzen"].str.replace(' ', '') # remove whitespaces from lists
df["Themen-Präferenzen"] = df["Themen-Präferenzen"].apply(eval) # lists are stored as strings in pandas -> convert with apply(eval)

# convert float columns to int
# df.dtypes # datatype of columns
df["Matrnr"] = df["Matrnr"].astype("Int64") #Int64 can store missing values as NaN

# df

In [102]:
# preferences = [
#     [4,3,1,2], 
#     [2,4,1,3], 
#     [2,1,3,4], 
#     ]

preferences = df['Themen-Präferenzen'].to_list()

cost = np.array(preferences)
row_ind, col_ind = linear_sum_assignment(cost)
adj_ind = [i+1 for i in col_ind]

print(f"Available Topics: {len(preferences[0])}")
print(f"Assigned Topics: {len(col_ind)}")
print(f"Number of students: {df.index[-1] + 1}")
print(f"array indices of best solution: {col_ind}")
print(f"adjusted (plus one) indices of best solution: {adj_ind}\n")


for i,v in enumerate(preferences):
    selected_topic = col_ind[i]+1
    pref_of_selected_topic = v[col_ind[i]]
    print(f"Student {i+1}: Assigned topic {selected_topic} (selected as #{pref_of_selected_topic})")
print(f"\nTotal Sum of preferences (lowest sum possible): {cost[row_ind, col_ind].sum()}\n")

df["scipy.optimize.linear_sum_assignment"] = adj_ind # copy assigned topics to column

# merge assigned topics with Betreuer sheet
df2 = pd.read_excel(file_location, sheet_name="Themen")
# df2

df3 = df.merge(df2, how="left", left_on="scipy.optimize.linear_sum_assignment", right_on="Thema")
df3["Betreuer_x"] = df3["Betreuer_y"]
df3.rename(columns={"Betreuer_x":"Betreuer"}, inplace=True)
df3["Thema DE"] = df3["Titel DE"]
df3.drop(["Titel DE", "Thema", "Betreuer_y"],1, inplace=True)

# df3

Available Topics: 12
Assigned Topics: 10
Number of students: 10
array indices of best solution: [7 5 0 8 2 1 4 6 9 3]
adjusted (plus one) indices of best solution: [8, 6, 1, 9, 3, 2, 5, 7, 10, 4]

Student 1: Assigned topic 8 (selected as #2)
Student 2: Assigned topic 6 (selected as #2)
Student 3: Assigned topic 1 (selected as #2)
Student 4: Assigned topic 9 (selected as #1)
Student 5: Assigned topic 3 (selected as #1)
Student 6: Assigned topic 2 (selected as #2)
Student 7: Assigned topic 5 (selected as #1)
Student 8: Assigned topic 7 (selected as #1)
Student 9: Assigned topic 10 (selected as #4)
Student 10: Assigned topic 4 (selected as #1)

Total Sum of preferences (lowest sum possible): 17





In [98]:
df3.to_excel(save_location, sheet_name="Final", index=False)

In [None]:
# adjust weights of specific students
prefe = df['Themen-Präferenzen'].to_list() # can be directly modified by accessing prefe

# prefer X first students by overweighting their first coice
preferred_number = 1
# prefer first students by adding X to each other student -> doesnt work
# for i in range(len(prefe)):
#     if i >= preferred_number:
#         x = list(map(lambda x:x+1, prefe[i])) # add 1 to each preference weight for students after preferred students
#         df.loc[i, "Themen-Präferenzen"] = str(x)
#     else:
#         df.loc[i, "Themen-Präferenzen"] = str(prefe[i])

# first choice of preferred students gets -1 weight -> more likely that preferred students get their first choice
# for i in range(len(prefe)):
#     if i < preferred_number:
#         index_min_value = prefe[i].index(min(prefe[i]))
#         prefe[i][index_min_value] -= 1 # adjust weight of first choice by minus one
#         print(prefe[i])
#         df.loc[i, "Themen-Präferenzen"] = str(prefe[i])
#     else:
#         df.loc[i, "Themen-Präferenzen"] = str(prefe[i])
# df["Themen-Präferenzen"] = df["Themen-Präferenzen"].apply(eval) # lists are stored as strings in pandas -> convert with apply(eval)

# ignore a specific topic of all students (otherwise unpopular topic gets ignored if more topics avaiable than students)
for i in range(len(prefe)):
    # print(prefe[i][5:8])
    prefe[i].pop() # drop last element. directly modifies dataframe
    prefe[i][5] = 99 # ignore topic at index X (5 -> drop topic 6)
    # prefe[i].pop(5) # drop 


# preferences = df['Themen-Präferenzen'].to_list()
# print(preferences)
df