# Setup

In [1]:
import pandas as pd
# from collections import defaultdict
# from operator import itemgetter
import numpy as np
# import re
# from fuzzywuzzy import process, fuzz

# Display all rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', '{:.3f}'.format)

In [2]:
dsd = "DSD2324_overview (2).xlsx"
preferences = "Teaching Preferences Questionnaire_S1_2023-24_raw.xlsx"
contract = "ta_contract_v2.xlsx"
bs_weights = "bs_courses_weights_FILLED.xlsx"
term = "S1"

# DEMAND - practical lessons

In [3]:
dsd_df = pd.read_excel(dsd)

In [4]:
dsd_df.head()

Unnamed: 0,ORDER,TERM,CYCLE,COURSE CODE,COURSE NAME,LANGUAGE,CLASS,SLOTS,WEIGHT,PROGRAM,FORMAT,INSTITUTIONAL ID,FACULTY ID,FACULTY NAME,FACULTY EMAIL,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,LAST UPDATED AT
0,1,S1,BSC,1104,Seminário de Economia Europeia,EN,T301A,35,1.0,,NORMAL,16699,146,João Amador,jamador@novasbe.pt,,,,,
1,2,S1,BSC,1104,Seminário de Economia Europeia,EN,T302A,35,1.0,,NORMAL,16699,146,João Amador,jamador@novasbe.pt,,,,,
2,3,S1,BSC,1104,Seminário de Economia Europeia,EN,T303A,35,1.0,,NORMAL,16699,146,João Amador,jamador@novasbe.pt,,,,,
3,4,S2,BSC,1104,Seminário de Economia Europeia,EN,T301A,40,1.0,,NORMAL,16699,146,João Amador,jamador@novasbe.pt,,,,,
4,5,S2,BSC,1104,Seminário de Economia Europeia,EN,T302A,40,1.0,,NORMAL,16699,146,João Amador,jamador@novasbe.pt,,,,,


In [5]:
dsd_df["course"] = dsd_df["COURSE CODE"].astype(str) + " || " + dsd_df["COURSE NAME"].astype(str) + " || " + dsd_df["TERM"].astype(str) + " || " + dsd_df["LANGUAGE"].astype(str)

if term == "S1":
    selected_terms = ["S1", "T1", "T2"]
    dsd_df = dsd_df[dsd_df['TERM'].isin(selected_terms)]
else:
    selected_terms = ["S2", "T3", "T4"]
    dsd_df = dsd_df[dsd_df['TERM'].isin(selected_terms)]

In [6]:
agg_functions = {
    'CLASS': 'count',
    'SLOTS': np.sum
    # 'COURSE NAME': 'first'  # Add 'COURSE NAME' to the aggregation functions
}

output_1 = dsd_df.groupby(['COURSE NAME', 'TERM', 'COURSE CODE', 'LANGUAGE']).agg(agg_functions).reset_index()
output_1 = output_1.rename(columns={'CLASS': 'Nº CLASSES', 'SLOTS': 'Nº STUDENTS'})


output_1.head()

Unnamed: 0,COURSE NAME,TERM,COURSE CODE,LANGUAGE,Nº CLASSES,Nº STUDENTS
0,Advanced Data Analysis,S1,2597,EN,2,140
1,Advanced Financial Management,S1,2581,EN,4,360
2,Advanced Marketing,S1,2430,EN,8,480
3,Advanced Maths and Stats for Economics,T1,4508,EN,1,999
4,Advanced Methods,S1,270107,EN,1,15


In [7]:
dsd_df.TERM.value_counts(dropna=False)

S1    291
T2    131
T1    128
Name: TERM, dtype: int64

In [8]:
course_demand = dsd_df.groupby(['course']).agg(agg_functions).reset_index()
course_demand = course_demand.rename(columns={'CLASS': 'number_classes', 'SLOTS': 'number_students'})

course_demand.head()

Unnamed: 0,course,number_classes,number_students
0,107789 || Innovation for Impact || T2 || EN,2,140
1,1104 || Seminário de Economia Europeia || S1 |...,3,105
2,1108 || Economia do Desenvolvimento || S1 || EN,2,70
3,1112 || Economia Industrial || S1 || EN,3,126
4,1113 || Economia Global I || S1 || EN,2,84


# SUPPLY - TA hours (current contract)

In [9]:
contract = pd.read_excel(contract)

contract["TA"] = contract["TA"].str.lower()

# Zero contracts
zero_contracts = contract[contract['contract'] == 0]["TA"].unique()
contract = contract[contract['contract'] != 0]

In [10]:
zero_contracts

array(['silvia.guerra@novasbe.pt', 'joana.matos@novasbe.pt'], dtype=object)

In [11]:
# Check for duplicates
contract[contract.duplicated()].TA.count()

0

In [12]:
contract_emails = contract.TA.unique()

In [13]:
contract.head()

Unnamed: 0,TA,NAME,contract
0,abrao.kulaif@novasbe.pt,Abrão Kulaif,0.5
1,afonso.ribeiro@novasbe.pt,Afonso Ribeiro,0.5
2,afonso.serrano@novasbe.pt,Afonso Serrano,0.25
3,afonso.castro@novasbe.pt,Afonso Castro,0.5
4,afonso.duarte@novasbe.pt,Afonso Teixeira Duarte,0.175


# SUPPLY - TA preferences

## A. Preferences

In [14]:
# Read the Excel file with the second row as the header
preferences_df = pd.read_excel(preferences, header=1)

# Sort the DataFrame by "End Date" column in descending order
preferences_df = preferences_df.sort_values(by='End Date', ascending=False)

# Rename the column to "TA"
preferences_df.rename(columns={'Please write your E-mail @novasbe.pt': 'TA'}, inplace=True)

# Convert the values in the "TA" column to lowercase
preferences_df['TA'] = preferences_df['TA'].str.lower()

# Create a new dataframe with column names and zero-indexed column numbers
column_df = pd.DataFrame({'Column Name': preferences_df.columns,
                          'Column Number': range(len(preferences_df.columns))})

# Remove TAs with zero_contracts
preferences_df = preferences_df[~preferences_df["TA"].isin(zero_contracts)]

# Create a mask to identify duplicates in the "TA" column
duplicates_mask = preferences_df.duplicated(subset='TA', keep=False)
preferences_duplicates = preferences_df[duplicates_mask]
preferences_duplicates = preferences_duplicates.sort_values(by='End Date', ascending=False)

preferences_duplicates_last = preferences_duplicates.drop_duplicates(subset='TA', keep='first').copy()

# Create a mask to check if columns 31:81 or 83:347 have values
value_mask = preferences_duplicates.iloc[:, 31:81].notnull().any(axis=1) | preferences_duplicates.iloc[:, 83:347].notnull().any(axis=1)
preferences_duplicates_values = preferences_duplicates[value_mask]
preferences_duplicates_values = preferences_duplicates_values.drop_duplicates(subset='TA', keep='first').copy()

# Drop duplicates based on the "TA" column
preferences_df = preferences_df[~duplicates_mask]

# Drop duplicates based on the "Full Name" column while keeping the row with the most recent "End Date" (ex. Franziska wrong)
preferences_df = preferences_df.drop_duplicates(subset='Full Name', keep='first')

# Create a new DataFrame with columns from preferences_duplicates_last
preferences_df_final = preferences_duplicates_last.copy()

# Get the relevant columns from preferences_duplicates_values
preference_columns = preferences_duplicates_values.columns[31:81].tolist() + preferences_duplicates_values.columns[83:347].tolist()

# Update the values in preferences_df_final using values from preferences_duplicates_values for preference_columns
preferences_df_final.set_index('TA', inplace=True, drop=False)
preferences_duplicates_values.set_index('TA', inplace=True, drop=False)
preferences_df_final.loc[preferences_duplicates_values.index, preference_columns] = preferences_duplicates_values[preference_columns].values

# Concatenate the remaining columns from preferences_df to preferences_df_final
preferences_df_final = pd.concat([preferences_df_final, preferences_df])

# Sort the final DataFrame by "End Date" column in descending order
preferences_df_final.sort_values(by='End Date', ascending=False, inplace=True)

# Reset the index of the final DataFrame
preferences_df_final.reset_index(drop=True, inplace=True)

# Drop duplicates based on the "Full Name" column while keeping the row with the most recent "End Date" (ex. Franziska wrong )
preferences_df_final.drop_duplicates(subset='Full Name', keep='first', inplace=True)

# Create a mapping of original column names to new column names (course ID as integer)
mapping = {}
# course_full_codes = []

for column_name in preference_columns:
    # Extract the course ID number from the column name
    # course_id = column_name.split(' || ')[0].split(' - ')[3] + " || " + column_name.split(' || ')[1] + ' || ' +  column_name.split(' || ')[2].split(' - ')[0]
    course_id = column_name.split(' || ')[0].split(' - ')[3] + " || "  + column_name.split(' || ')[0].split(' - ')[4]+ " || " + column_name.split(' || ')[1] + ' || ' +  column_name.split(' || ')[2].split(' - ')[0]
    
    # Extract full course ID for matching later
    # course_full_codes.append(column_name.split(' || ')[0].split(' - ')[3] + " || "  + column_name.split(' || ')[0].split(' - ')[4]+ " || " + column_name.split(' || ')[1] + ' || ' +  column_name.split(' || ')[2].split(' - ')[0])
    
    # Map the original column name to the course ID
    mapping[column_name] = course_id

# Remove duplicates
# course_full_codes = list(set(course_full_codes))

# Rename the columns using the mapping
preferences_df_final.rename(columns=mapping, inplace=True)

# Drop columns with list of courses (redundant) [30, 81, and 82]
preferences_df_final.drop(columns=preferences_df_final.iloc[:,[30, 81, 82]], inplace=True)

# OUTPUT #2
# Filter the DataFrame for rows where "Do you intend to continue your collaboration with Nova SBE next semester as Teaching Assistant?" 
# (column 20) is not equal to "No"
ta_exits = preferences_df_final[preferences_df_final.iloc[:, 19] == "No"].iloc[:, [17, 18, 20]]
ta_exits = ta_exits.rename(columns={ta_exits.columns[-1]: "Comments"}).sort_values("Full Name")

# ta_exits.to_excel("ta_exits_v1.xlsx", index=False)
ta_exits_list = ta_exits.TA.unique()

preferences_df_final = preferences_df_final[preferences_df_final.iloc[:, 19] != "No"]

In [15]:
column_17 = column_df[column_df['Column Name'] == "Full Name"].iloc[0]["Column Number"]
column_18 = column_df[column_df['Column Name'] == "TA"].iloc[0]["Column Number"]

continue_str = "Do you intend to continue your collaboration with Nova SBE next semester"
continue_just_str = "Please write here a short justification on why you do not intend to continue"
bs_or_ms_str = "Do you prefer to be assigned to Bachelor’s or Master's courses?"
load_availability_str = "What is your availability in terms of workload and contract percentage for the next semester?"

column_19 = column_df[column_df['Column Name'].str.startswith(continue_str)].iloc[0]["Column Number"]
column_20 = column_df[column_df['Column Name'].str.startswith(continue_just_str)].iloc[0]["Column Number"]
column_22 = column_df[column_df['Column Name'].str.startswith(bs_or_ms_str)].iloc[0]["Column Number"]
column_27 = column_df[column_df['Column Name'].str.startswith(load_availability_str)].iloc[0]["Column Number"]

bs_str = "Please choose below your teaching preferences for Bachelor Courses."
column_30 = column_df[column_df['Column Name'].str.startswith(bs_str)].iloc[0]["Column Number"]
column_31 = column_30 + 1

ms_str = "Please choose below your teaching preferences for Masters Courses (grading)."
column_81 = column_df[column_df['Column Name'].str.startswith(ms_str)].iloc[0]["Column Number"]
column_83 = column_81 + 2

# print(column_17, column_18, column_19, column_20, column_22, column_27, column_30, column_31, column_81, column_83)

In [16]:
print(f"Number of TAs who are leaving: {len(ta_exits_list)}")

Number of TAs who are leaving: 17


In [17]:
# Check that can use the email as key/ ID
preferences_df_final["TA"].isnull().sum()

0

In [18]:
# Check there are no duplicates names
preferences_df_final[preferences_df_final["Full Name"].duplicated()]["Response ID"].count()

0

In [19]:
# Check there are no duplicates answers
preferences_df_final[preferences_df_final["TA"].duplicated()]["Response ID"].count()

0

In [20]:
responded_number = preferences_df_final.shape[0] 
print(f"Number of TAs willing to continue who responded: {responded_number}")
display(preferences_df_final[["TA", "Full Name"]].head())

Number of TAs willing to continue who responded: 175


Unnamed: 0,TA,Full Name
0,beatriz.f.ferreira@novasbe.pt,Beatriz Frazão Ferreira
1,sara.b.lopes@novasbe.pt,Sara de Louxembourg Henriques Borges Lopes
3,marcio.silva@novasbe.pt,Marcio Nunes da Silva
4,ana.lourenco@novasbe.pt,Ana Filipa Duarte Lourenço
5,fabian.wassmann@novasbe.pt,Fabian Wassmann


In [21]:
# OUTPUT #N
preferences_df_final[~preferences_df_final.iloc[:,-1].isna()].iloc[:, [17, 18, -1]]

Unnamed: 0,Full Name,TA,Please write below your observations:
0,Beatriz Frazão Ferreira,beatriz.f.ferreira@novasbe.pt,Only for the course of Business Strategy and P...
3,Marcio Nunes da Silva,marcio.silva@novasbe.pt,"I can only handle one course per semester, sin..."
8,Andre Daniel Vinhas Nunes,andre.nunes@novasbe.pt,"""Economics of education"" and ""Competition Poli..."
9,Franziska Hittmair,hittmair.franziska@novasbe.pt,Global Strategic Management for CEMS
18,Valter Miguel Sobral Nóbrega,valter.nobrega@novasbe.pt,Wiseflow preference
21,Afonso Maria Jardim Gonçalves Teixeira Duarte,afonso.duarte@novasbe.pt,I would like to be assistant also in Risk Mana...
24,Gonçalo Maria de Abreu Peixoto Cordeiro de Sousa,goncalo.sousa@novasbe.pt,I would like to continue to work with Prof Ema...
31,Rafael de Almeida Sequeira,rafael.sequeira@novasbe.pt,If I am allocated to Investments (full course)...
34,Maria José Rodrigues Aniceto,maria.aniceto@novasbe.pt,I would like to keep grading financial managem...
35,Afonso Saraiva Câmara Leme,afonso.leme@novasbe.pt,First option is Economics of Education (shows ...


In [22]:
# OUTPUT #N
preferences_df_final[~preferences_df_final["TA"].isin(contract_emails)][["TA", "Full Name"]]

Unnamed: 0,TA,Full Name
63,dd@novasbe.pt,dddd
85,pedrobrinca@novasbe.pt,Pedro Brinca
115,xxx@novasbe.pt,xxx
120,aa@novasbe.pt,aaa
140,test@novasbe.pt,test


In [23]:
# Get the course columns
course_columns = preferences_df_final.columns[30:-1]

# Create a new DataFrame for the adapted format
adapted_df = pd.DataFrame(columns=["TA", "course", "preference", "preference_type"])

# Define the translation mapping for column 22 values
translation_mapping = {
    "Masters' Courses": 2,
    "Bachelors' Courses": 0,
    "Indifferent": 1,
    pd.NaT: 1  # Assuming NaN values should also be considered "Indifferent"
}

# Iterate over the course columns
for course in course_columns:
    # Check if the course has already been processed
    if course in adapted_df["course"].unique():
        continue

    # Get the duplicate columns for the current course
    duplicate_columns = [col for col in course_columns if col != course and col.endswith(course)]

    # Combine the duplicate columns into a single column
    combined_column = preferences_df_final[[course] + duplicate_columns].ffill(axis=1).iloc[:, -1]

    # Filter the DataFrame for non-null values in the combined column
    non_null_mask = combined_column.notnull()
    non_null_df = preferences_df_final[non_null_mask]

    # Get the teacher names and their corresponding preference rankings for the current course
    teacher_names = non_null_df["TA"]
    preference_rankings = combined_column[non_null_mask]

    # Get the corresponding preference types based on the translation mapping
    preference_types = non_null_df.iloc[:, 22].map(translation_mapping)

    # Create a DataFrame for the current course, preference rankings, and preference types
    course_df = pd.DataFrame({"TA": teacher_names, "course": [course] * len(teacher_names),
                              "preference": preference_rankings, "preference_type": preference_types})

    # Concatenate course_df with adapted_df
    adapted_df = pd.concat([adapted_df, course_df], ignore_index=True)
    
    # Create the 'masters_course' column based on the condition
    adapted_df['masters_course'] = adapted_df['course'].apply(lambda x: 0 if x.split(' ')[0].startswith('1') else 1)

    # Convert "preference" column to integers
    adapted_df['preference'] = adapted_df['preference'].astype(np.int8)

    # Remove preferences above 5
    adapted_df = adapted_df[adapted_df['preference']<=5]

In [24]:
# Check there are no preferences above 5 and below 1
print(adapted_df['preference'].min())
print(adapted_df['preference'].max())

1
5


In [25]:
completed_preferences = adapted_df["TA"].unique()
completed_preferences_number = len(completed_preferences)
print(f"Number of TAs with completed preferences: {completed_preferences_number}")
# display(completed_preferences)

Number of TAs with completed preferences: 162


In [26]:
# OUTPUT #6
adapted_df.iloc[:,:-1].head()

Unnamed: 0,TA,course,preference,preference_type
0,pedro.freitas@novasbe.pt,1104 || Seminar in European Economics || S1 || EN,1,0
1,marlon.e.francisco@novasbe.pt,1104 || Seminar in European Economics || S1 || EN,5,1
2,luis.pereira@novasbe.pt,1104 || Seminar in European Economics || S1 || EN,5,0
3,stefano.grancini@novasbe.pt,1104 || Seminar in European Economics || S1 || EN,3,1
4,joao.quelhas@novasbe.pt,1104 || Seminar in European Economics || S1 || EN,5,0


In [27]:
adapted_df[adapted_df["TA"]=="bernardo.costa@novasbe.pt"]

Unnamed: 0,TA,course,preference,preference_type,masters_course
178,bernardo.costa@novasbe.pt,1312 || Data Analysis and Probability || S1 || EN,3,1,0
191,bernardo.costa@novasbe.pt,1313 || Statistics for Economics and Managemen...,2,1,0
200,bernardo.costa@novasbe.pt,1314 || Econometrics || S1 || EN,4,1,0
207,bernardo.costa@novasbe.pt,1318 || Introduction to Programming || S1 || EN,1,1,0
517,bernardo.costa@novasbe.pt,2477 || Introduction to Programming || S1 || EN,2,1,1
579,bernardo.costa@novasbe.pt,2597 || Advanced Data Analysis || S1 || EN,1,1,1
587,bernardo.costa@novasbe.pt,2609 || Data Visualization for Business Analyt...,4,1,1
636,bernardo.costa@novasbe.pt,2659 || Data Curation for Business Analytics |...,3,1,1


## B. New contract (preferences)

In [28]:
contact_list = contract[(~contract.TA.isin(completed_preferences)) & (~contract.TA.isin(ta_exits_list))]
print(contact_list.shape[0])
display(contact_list)
contact_list.to_excel("ta_contact_list_v1.xlsx", index=False)

34


Unnamed: 0,TA,NAME,contract
1,afonso.ribeiro@novasbe.pt,Afonso Ribeiro,0.5
3,afonso.castro@novasbe.pt,Afonso Castro,0.5
20,ana.silva@novasbe.pt,Ana Rita Tavares,0.2
24,andre.tome@novasbe.pt,André Tomé,0.1
32,beatriz.marques@novasbe.pt,Beatriz Marques,0.25
33,beatriz.bento@novasbe.pt,Beatriz Bento,0.125
42,catarina.m.grosso@novasbe.pt,Catarina Grosso,0.9
47,claudia.castico@novasbe.pt,Cláudia Castiço,0.375
54,daniele.gamberoni@novasbe.pt,Daniele Gamberoni,0.2
56,diogo.v.rebelo@novasbe.pt,Diogo Bebiano Rebelo,0.4


In [29]:
def round_to_closest(value):
    if pd.isnull(value):
        return np.nan
    else:
        # rounded_value = round(value * 8) / 8  # Round to the nearest multiple of 0.125
        capped_value = min(value, 0.5)  # Cap the value at 0.5
        capped_value = max(value, 0.1)  # Cap the value at 0.1
        return capped_value

# Clean the "load_requested" column
def clean_percentage(value):
    if pd.isnull(value):
        return value
    elif isinstance(value, str):
        # Check if the value contains only text characters
        if value.isalpha():
            return np.nan

        # Extract numeric values from string
        numeric_value = ''.join(filter(str.isdigit, value))

        if numeric_value == '':
            return np.nan

        if numeric_value == '100':
            return 100

        if len(numeric_value) >= 2:
            integer_part = numeric_value[:2]
            decimal_part = numeric_value[2:]
            return float(integer_part + '.' + decimal_part)

        return np.nan

    elif isinstance(value, (int, float)):
        return float(value) / 100

    return value

def decrease_contract_level(value):
    return value - 0.125


mapping = {
    "I want to increase the contract percentage/workload in the next semester (please specify the desired contract percentage level)": 1,
    "I want to keep the same contract percentage/workload as this semester": 0,
    "I want to reduce the contract percentage/workload in the next semester (please specify the desired contract percentage level)": -1,
    pd.NaT: 0
}

mapping_21 = {
    "Yes, I am a PhD student": 0,
    "Yes, I will be a Masters student but not doing any courses, only the Work Project": 0,
    "Yes, I will be a Masters student and I will be doing at least one more course": 1,
    "No": 0,
    pd.NaT: 0
}

mapping_23 = {
    "Yes, I have some other constraints that limit my teaching hours/workload (please specify the reason and the limit)": 1,
    "Yes, I have a FCT scholarship that limits my weekly teaching hours to 4h per week": 1,
    "No": 0,
    pd.NaT: 0
}

################################################################################################################################################

mask = preferences_df_final.iloc[:, 27].notna()
new_contract = preferences_df_final[mask].iloc[:, [18, 21, 23, 27, 28, 29]]

new_contract.columns = ['TA', 'master_student', 'PhD_restrictions', 'change_load', 'new_contract_decreased_load', 'new_contract_increased_load']
new_contract['change_load'] = new_contract['change_load'].map(mapping)
new_contract['master_student'] = new_contract['master_student'].map(mapping_21).fillna(0).astype(int)
new_contract['PhD_restrictions'] = new_contract['PhD_restrictions'].map(mapping_23).fillna(0).astype(int)

# Convert TA column to lowercase
new_contract['TA'] = new_contract['TA'].str.lower()

new_contract['new_contract_decreased_load'] = new_contract['new_contract_decreased_load'].apply(clean_percentage) / 100
new_contract['new_contract_increased_load'] = new_contract['new_contract_increased_load'].apply(clean_percentage) / 100

# Merge "new_contract_decreased_load" and "new_contract_increased_load" into "load_requested"
new_contract['load_requested'] = new_contract[['new_contract_decreased_load', 'new_contract_increased_load']].mean(axis=1)
new_contract['load_requested'] = new_contract['load_requested'].apply(round_to_closest)

# Drop "new_contract_decreased_load" and "new_contract_increased_load" columns
new_contract.drop(columns=['new_contract_decreased_load', 'new_contract_increased_load'], inplace=True)

In [30]:
# OUTPUT #3
new_contract[new_contract.change_load !=0].sort_values("TA")

Unnamed: 0,TA,master_student,PhD_restrictions,change_load,load_requested
21,afonso.duarte@novasbe.pt,0,0,1,0.5
53,afonso.quintela@novasbe.pt,0,0,1,
162,anastasiia.smirnova@novasbe.pt,0,0,1,0.5
8,andre.nunes@novasbe.pt,0,0,1,0.5
118,antonio.santos@novasbe.pt,0,1,-1,0.25
138,claudia.vaqueiro@novasbe.pt,0,0,-1,0.1
83,constanca.b.roquette@novasbe.pt,0,0,1,0.375
176,daniel.silva@novasbe.pt,0,0,1,0.5
17,daniela.afonso@novasbe.pt,0,0,-1,0.25
182,diogo.lima@novasbe.pt,0,0,1,0.25


In [31]:
# Check new_contract_load is formatted correctly
# new_contract.load_requested.value_counts()

In [32]:
number_TAS = len(new_contract[new_contract.change_load.isin([1,-1])])
print(f"Number of TAs who want to change their contract: {number_TAS}")

Number of TAs who want to change their contract: 47


In [33]:
all_contracts = contract.merge(new_contract, how="left", on="TA")

# Filter rows where change_load is not equal to 0
filtered_contracts = all_contracts[all_contracts['change_load'] != 0].copy()

# Decrease contract to load_requested for rows where change_load is -1
filtered_contracts.loc[filtered_contracts['change_load'] == -1, 'new_contract'] = filtered_contracts['load_requested']
filtered_contracts.loc[(filtered_contracts['change_load'] == -1) & (filtered_contracts['load_requested'].isnull()), 'new_contract'] = filtered_contracts.apply(lambda row: decrease_contract_level(row['contract']), axis=1)

# # Increase contract for no restrictions
# filtered_contracts.loc[(filtered_contracts['change_load'] == 1) & (filtered_contracts['restrictions'] == 0), 'new_contract'] = filtered_contracts['load_requested']
# filtered_contracts.loc[(filtered_contracts['change_load'] == 1) & (filtered_contracts['restrictions'] == 0) & (filtered_contracts['load_requested'].isnull()), 'new_contract'] = np.minimum(filtered_contracts['contract'] * 2, 0.5)

# Apply conditions for updating new_contract based on change_load and restrictions
# filtered_contracts.loc[(filtered_contracts['change_load'] == 1) & (filtered_contracts['restrictions'] == 1), 'new_contract'] = filtered_contracts['contract']
# filtered_contracts.loc[(filtered_contracts['change_load'] == 1) & (filtered_contracts['restrictions'] == 2), 'new_contract'] = np.maximum(filtered_contracts['contract'], 0.25)

# Fill NaN values with the original contract value
filtered_contracts['new_contract'].fillna(filtered_contracts['contract'], inplace=True)

# Create a new column "new_contract" in the original DataFrame with NaN values
all_contracts['new_contract'] = np.nan

# Update the "new_contract" column in the original DataFrame with the filtered values
all_contracts.update(filtered_contracts[['new_contract']])
all_contracts['new_contract'].fillna(all_contracts['contract'], inplace=True)

# Round the "new_contract" values to the closest contract level (0.125, 0.25, 0.375, 0.5)
# all_contracts['new_contract'] = all_contracts['new_contract'].apply(round_to_closest)

# Drop emails which currently do not have a contract (ex. pedro.brinca)
all_contracts = all_contracts[all_contracts.contract.notna()]

In [34]:
# Check there are no TAs wihout current contract
all_contracts[all_contracts.contract.isnull()].TA.to_list()

[]

In [35]:
all_contracts = all_contracts[["TA", "new_contract", "master_student"]]

## C. Merge SUPPLY and DEMAND

In [36]:
ta_preferences = adapted_df.merge(all_contracts, how="left", on="TA", indicator=True)
ta_preferences.head()

Unnamed: 0,TA,course,preference,preference_type,masters_course,new_contract,master_student,_merge
0,pedro.freitas@novasbe.pt,1104 || Seminar in European Economics || S1 || EN,1,0,0,0.375,0.0,both
1,marlon.e.francisco@novasbe.pt,1104 || Seminar in European Economics || S1 || EN,5,1,0,0.5,0.0,both
2,luis.pereira@novasbe.pt,1104 || Seminar in European Economics || S1 || EN,5,0,0,0.5,1.0,both
3,stefano.grancini@novasbe.pt,1104 || Seminar in European Economics || S1 || EN,3,1,0,0.25,0.0,both
4,joao.quelhas@novasbe.pt,1104 || Seminar in European Economics || S1 || EN,5,0,0,0.375,0.0,both


In [37]:
# Check for non-matching TAs
non_matching_values = ta_preferences[ta_preferences['_merge'] != 'both']
ta_preferences.drop(columns=["_merge"], inplace=True)
display(non_matching_values)

Unnamed: 0,TA,course,preference,preference_type,masters_course,new_contract,master_student,_merge


In [38]:
market = ta_preferences.merge(course_demand, how="left", on="course", indicator=True)

# market['number_classes'] = pd.to_numeric(market['number_classes'], errors='coerce').astype(pd.Int64Dtype())
# market['number_students'] = pd.to_numeric(market['number_students'], errors='coerce').astype(pd.Int64Dtype())

In [39]:
non_matching_values = market[market['_merge'] != 'both']
market.drop(columns=["_merge"], inplace=True)

non_matching_courses = non_matching_values[["course"]].drop_duplicates()
non_matching_courses = non_matching_courses.copy()
non_matching_courses[["course_code", "course_name", "period", "language"]] = non_matching_courses["course"].str.split(" \|\| ", expand=True)

course_demand_extended = course_demand.copy()
course_demand_extended[["course_code", "course_name", "period", "language"]] = course_demand["course"].str.split(" \|\| ", expand=True)
course_demand_extended = course_demand_extended[["course", "course_code", "course_name", "period", "language"]]

In [40]:
# Initialize an empty DataFrame to store the concatenated results
concatenated_matches = pd.DataFrame()

# Merge on 'course_code', 'period', and 'language'
merged_courses = pd.merge(non_matching_courses, course_demand_extended, on=["course_code", "period", "language"], how="left", suffixes=("", "_new"))
still_unmatched = merged_courses[merged_courses["course_new"].isna()][["course", "course_name", "course_code", "period", "language"]]
concatenated_matches = pd.concat([concatenated_matches, merged_courses[~merged_courses["course_new"].isna()][["course", "course_new"]]])

# Merge on 'course_name', 'period', and 'language'
merged_courses = pd.merge(still_unmatched, course_demand_extended, on=["course_name", "period", "language"], how="left", suffixes=("", "_new"))
still_unmatched = merged_courses[merged_courses["course_new"].isna()][["course", "course_name", "course_code", "period", "language"]]
concatenated_matches = pd.concat([concatenated_matches, merged_courses[~merged_courses["course_new"].isna()][["course", "course_new"]]])

# Merge on 'course_code' and 'period'
merged_courses = pd.merge(still_unmatched, course_demand_extended, on=["course_code", "period"], how="left", suffixes=("", "_new"))
still_unmatched = merged_courses[merged_courses["course_new"].isna()][["course", "course_name", "course_code", "period", "language"]]
concatenated_matches = pd.concat([concatenated_matches, merged_courses[~merged_courses["course_new"].isna()][["course", "course_new"]]])

# # [DANGER! Might include courses from different semester] Merge on 'course_code' and 'language' 
# merged_courses = pd.merge(still_unmatched, course_demand_extended, on=["course_code", "language"], how="left", suffixes=("", "_new"))
# still_unmatched = merged_courses[merged_courses["course_new"].isna()][["course", "course_name", "course_code", "period", "language"]]
# concatenated_matches = pd.concat([concatenated_matches, merged_courses[~merged_courses["course_new"].isna()][["course", "course_new"]]])

# Merge concatenated_matches on the market DataFrame to add the "course_new" column
market = pd.merge(market, concatenated_matches[["course", "course_new"]], on=["course"], how="left")
market["course_new"].fillna(market["course"], inplace=True)
market.rename(columns={"course": "course_old"}, inplace=True)
market.drop(columns=["course_old"], inplace=True)
market.rename(columns={"course_new": "course"}, inplace=True)

# Merge market and course_demand on "course" column
merged_market = pd.merge(market, course_demand[["course", "number_classes", "number_students"]], on="course", how="left", suffixes=("", "_demand"))

# Fill NaN values in number_classes and number_students columns
merged_market["number_classes"].fillna(merged_market["number_classes_demand"], inplace=True)
merged_market["number_students"].fillna(merged_market["number_students_demand"], inplace=True)

# Drop the unnecessary columns
merged_market.drop(columns=["number_classes_demand", "number_students_demand"], inplace=True)

In [41]:
# OUTPUT #4
# Display the updated merged_market DataFrame
no_matches_final = merged_market[(merged_market.number_classes.isna()) | (merged_market.number_students.isna())][["course"]]
no_matches_final = no_matches_final.drop_duplicates()
display(no_matches_final)

# Drop these courses
merged_market.dropna(subset=["number_classes", "number_students"], inplace=True)

Unnamed: 0,course
243,2134 || Economics of Education || T1 || EN
348,2260 || Nova Students Portfolio || A || EN
376,2329 || Consumer and Managerial Decision Makin...
381,2330 || Consumer Behavior || T2 || EN
488,2452 || Business Strategy and Practice || T2 |...
581,2604 || Brand Management for Hospitality || T2...
590,2621 || Algorithmic Governance || T1 || EN
605,2642 || Principles of Shopper Marketing || T2 ...
609,2645 || Tourism Marketing || S1 || EN
629,2653 || Impact Makers || S1 || EN


## D. Compute capacity

In [42]:
# Create the "semester" column based on the condition
merged_market['semester'] = merged_market['course'].apply(lambda x: 1 if x.split(' || ')[2].startswith('S') else 0)
# merged_market['ms_capacity'] = merged_market['new_contract'] * 36

merged_market.head()
# Define a function to apply the conditions
def calculate_weight(row):
    if row['semester'] == 1 and row['masters_course'] == 1:
        return ((row['number_students'] * 2.33) / 16) / 36
    elif row['semester'] == 0 and row['masters_course'] == 1:
        return ((row['number_students'] * 1.25) / 16) / 36
    else:
        pd.NaT

# Apply the function to create the 'ms_weight' column
merged_market['weight'] = merged_market.apply(calculate_weight, axis=1)

# Set 'ms_capacity' to NaN when 'masters_course' is 0
# merged_market.loc[merged_market['masters_course'] == 0, 'ms_capacity'] = np.nan
# merged_market.loc[merged_market['masters_course'] == 0, 'ms_weight'] = np.nan

In [43]:
merged_market[merged_market.TA=="bernardo.costa@novasbe.pt"]

Unnamed: 0,TA,preference,preference_type,masters_course,new_contract,master_student,number_classes,number_students,course,semester,weight
178,bernardo.costa@novasbe.pt,3,1,0,0.5,0.0,2.0,90.0,1312 || Análise de Dados e Probabilidade || S1...,1,
191,bernardo.costa@novasbe.pt,2,1,0,0.5,0.0,8.0,340.0,1313 || Estatística para Economia e Gestão || ...,1,
200,bernardo.costa@novasbe.pt,4,1,0,0.5,0.0,2.0,76.0,1314 || Econometria || S1 || EN,1,
207,bernardo.costa@novasbe.pt,1,1,0,0.5,0.0,6.0,270.0,1318 || Programação || S1 || EN,1,
517,bernardo.costa@novasbe.pt,2,1,1,0.5,0.0,4.0,320.0,2477 || Introduction to Programming || S1 || EN,1,1.294
579,bernardo.costa@novasbe.pt,1,1,1,0.5,0.0,2.0,140.0,2597 || Advanced Data Analysis || S1 || EN,1,0.566
587,bernardo.costa@novasbe.pt,4,1,1,0.5,0.0,2.0,140.0,2609 || Data Visualization for Business Analyt...,0,0.304
636,bernardo.costa@novasbe.pt,3,1,1,0.5,0.0,2.0,130.0,2659 || Data Curation for Business Analytics |...,0,0.282


In [44]:
merged_market.masters_course.value_counts(dropna=False)

1    404
0    243
Name: masters_course, dtype: int64

### Get file course list to manually input the weights

In [45]:
course_demand_extended['masters_course'] = course_demand_extended['course'].apply(lambda x: 0 if x.split(' ')[0].startswith('1') else 1)
course_demand_extended_bs = course_demand_extended[course_demand_extended.masters_course==0]
course_demand_extended_bs = course_demand_extended_bs.drop(columns=["masters_course"])
course_demand_extended_bs["weight"] = ""
course_demand_extended_bs.to_excel("course_demand_extended_bs.xlsx", index=False)

### Read the manually inputed weights

In [46]:
bs_weights_df = pd.read_excel(bs_weights)[["course", "weight"]]
bs_weights_df["weight"] = bs_weights_df["weight"] * 0.125
bs_weights_df.weight.value_counts(dropna=False)

0.125    89
0.333     9
0.167     3
0.250     1
Name: weight, dtype: int64

In [47]:
final_market = pd.merge(merged_market, bs_weights_df, on=["course"], how="left", suffixes=("", "_bs"), indicator=True)
final_market.head()

Unnamed: 0,TA,preference,preference_type,masters_course,new_contract,master_student,number_classes,number_students,course,semester,weight,weight_bs,_merge
0,pedro.freitas@novasbe.pt,1,0,0,0.375,0.0,3.0,105.0,1104 || Seminário de Economia Europeia || S1 |...,1,,0.125,both
1,marlon.e.francisco@novasbe.pt,5,1,0,0.5,0.0,3.0,105.0,1104 || Seminário de Economia Europeia || S1 |...,1,,0.125,both
2,luis.pereira@novasbe.pt,5,0,0,0.5,1.0,3.0,105.0,1104 || Seminário de Economia Europeia || S1 |...,1,,0.125,both
3,stefano.grancini@novasbe.pt,3,1,0,0.25,0.0,3.0,105.0,1104 || Seminário de Economia Europeia || S1 |...,1,,0.125,both
4,joao.quelhas@novasbe.pt,5,0,0,0.375,0.0,3.0,105.0,1104 || Seminário de Economia Europeia || S1 |...,1,,0.125,both


In [48]:
# Check there are no BS courses without weight
final_market[(final_market["_merge"] != "both") & (final_market.masters_course==0)]

Unnamed: 0,TA,preference,preference_type,masters_course,new_contract,master_student,number_classes,number_students,course,semester,weight,weight_bs,_merge


In [49]:
final_market.rename(columns={"new_contract": "capacity"}, inplace=True) # "weight": "bs_weight", 
final_market.drop(columns="_merge", inplace=True)

final_market["weight"] = final_market["weight"].fillna(final_market["weight_bs"])
final_market.drop(columns=["weight_bs"], inplace=True)

# final_market.loc[final_market['masters_course'] == 1, 'bs_capacity'] = np.nan
# final_market.loc[final_market['masters_course'] == 1, 'bs_weight'] = np.nan

In [50]:
final_market.head()

Unnamed: 0,TA,preference,preference_type,masters_course,capacity,master_student,number_classes,number_students,course,semester,weight
0,pedro.freitas@novasbe.pt,1,0,0,0.375,0.0,3.0,105.0,1104 || Seminário de Economia Europeia || S1 |...,1,0.125
1,marlon.e.francisco@novasbe.pt,5,1,0,0.5,0.0,3.0,105.0,1104 || Seminário de Economia Europeia || S1 |...,1,0.125
2,luis.pereira@novasbe.pt,5,0,0,0.5,1.0,3.0,105.0,1104 || Seminário de Economia Europeia || S1 |...,1,0.125
3,stefano.grancini@novasbe.pt,3,1,0,0.25,0.0,3.0,105.0,1104 || Seminário de Economia Europeia || S1 |...,1,0.125
4,joao.quelhas@novasbe.pt,5,0,0,0.375,0.0,3.0,105.0,1104 || Seminário de Economia Europeia || S1 |...,1,0.125


In [51]:
final_market[final_market.TA=="bernardo.costa@novasbe.pt"]

Unnamed: 0,TA,preference,preference_type,masters_course,capacity,master_student,number_classes,number_students,course,semester,weight
178,bernardo.costa@novasbe.pt,3,1,0,0.5,0.0,2.0,90.0,1312 || Análise de Dados e Probabilidade || S1...,1,0.333
191,bernardo.costa@novasbe.pt,2,1,0,0.5,0.0,8.0,340.0,1313 || Estatística para Economia e Gestão || ...,1,0.125
200,bernardo.costa@novasbe.pt,4,1,0,0.5,0.0,2.0,76.0,1314 || Econometria || S1 || EN,1,0.125
207,bernardo.costa@novasbe.pt,1,1,0,0.5,0.0,6.0,270.0,1318 || Programação || S1 || EN,1,0.333
493,bernardo.costa@novasbe.pt,2,1,1,0.5,0.0,4.0,320.0,2477 || Introduction to Programming || S1 || EN,1,1.294
555,bernardo.costa@novasbe.pt,1,1,1,0.5,0.0,2.0,140.0,2597 || Advanced Data Analysis || S1 || EN,1,0.566
561,bernardo.costa@novasbe.pt,4,1,1,0.5,0.0,2.0,140.0,2609 || Data Visualization for Business Analyt...,0,0.304
601,bernardo.costa@novasbe.pt,3,1,1,0.5,0.0,2.0,130.0,2659 || Data Curation for Business Analytics |...,0,0.282


In [52]:
final_market[final_market.course=='2193 || Behavioral Economics and Finance || T1 || EN'] #  # "2597 || Advanced Data Analysis || S1 || EN"

Unnamed: 0,TA,preference,preference_type,masters_course,capacity,master_student,number_classes,number_students,course,semester,weight
272,fabian.wassmann@novasbe.pt,5,2,1,0.125,0.0,1.0,60.0,2193 || Behavioral Economics and Finance || T1...,0,0.13
273,svitlana.sivitskaya@novasbe.pt,3,1,1,0.25,0.0,1.0,60.0,2193 || Behavioral Economics and Finance || T1...,0,0.13
274,galina.vysotskaya@novasbe.pt,4,2,1,0.25,0.0,1.0,60.0,2193 || Behavioral Economics and Finance || T1...,0,0.13
275,luis.morais@novasbe.pt,3,2,1,0.3,0.0,1.0,60.0,2193 || Behavioral Economics and Finance || T1...,0,0.13
276,ruben.bento@novasbe.pt,5,1,1,0.5,0.0,1.0,60.0,2193 || Behavioral Economics and Finance || T1...,0,0.13
277,iqra.hamid@novasbe.pt,2,2,1,0.5,0.0,1.0,60.0,2193 || Behavioral Economics and Finance || T1...,0,0.13
278,priya.krishnan@novasbe.pt,1,2,1,0.5,0.0,1.0,60.0,2193 || Behavioral Economics and Finance || T1...,0,0.13
279,joao.pato@novasbe.pt,4,2,1,0.25,0.0,1.0,60.0,2193 || Behavioral Economics and Finance || T1...,0,0.13


In [53]:
tas = final_market.TA.unique()

# Find elements missing in array1 compared to array2
missing_elements = np.setdiff1d(completed_preferences, tas)

# OUTPUT #N: TAs affected by unmatched courses
print(missing_elements)

['beatriz.f.ferreira@novasbe.pt' 'mariana.pires@novasbe.pt']


In [54]:
final_market.weight[0]

0.125

In [55]:
final_market.weight.value_counts(dropna=False)

0.125    191
0.217     37
0.333     36
0.174     36
0.324     27
0.130     26
0.347     22
2.306     21
1.294     20
0.098     17
0.566     15
0.141     14
0.243     13
0.087     12
0.260     12
0.152     10
0.250     10
0.109     10
0.358      9
0.607      9
0.485      9
0.586      9
0.651      8
0.210      6
0.304      6
6.068      6
0.167      6
1.942      5
0.119      5
0.434      4
1.456      4
0.222      4
0.111      3
0.202      3
0.809      3
0.182      3
0.195      3
0.521      3
0.971      2
0.239      2
0.293      1
0.283      1
7.160      1
0.282      1
0.002      1
0.162      1
Name: weight, dtype: int64

# Matching algorithm

In [56]:
ta_dict = final_market[['TA','capacity']].drop_duplicates()
ta_dict = dict(zip(ta_dict['TA'], ta_dict['capacity']))

In [57]:
courses_dict = final_market[['course','weight']].drop_duplicates()
courses_dict = dict(zip(courses_dict['course'], courses_dict['weight']))

In [58]:
# final_preferences = final_market[["TA", "preference_type", "preference", "course", "semester", "master_student", "masters_course"]]
# final_preferences = final_preferences.sort_values(by=["course", "preference_type", "preference"], ascending=[True, False, True])
# final_preferences.head()

In [59]:
# len(final_preferences.course.unique())

In [60]:
# course_preference_counts = final_preferences.groupby(['course', 'preference']).size().reset_index(name='count')

# # Filter out the courses with more than one observation for preference == 1
# filtered_courses = course_preference_counts.loc[(course_preference_counts['preference'] == 1) & (course_preference_counts['count'] == 1), 'course']
# final_preferences_filtered = final_preferences[final_preferences['course'].isin(filtered_courses)]

# # Print the filtered DataFrame
# print(len(final_preferences_filtered.course.unique()))

In [61]:
ms_courses = final_market[(final_market['masters_course'] == 1) & (final_market['master_student'] == 0)]

ms_courses_dict = ms_courses[['course','weight']].drop_duplicates()
ms_courses_dict = dict(zip(ms_courses_dict['course'], ms_courses_dict['weight']))

bs_courses = final_market[final_market['masters_course'] == 0]

bs_courses_dict = bs_courses[['course','weight']].drop_duplicates()
bs_courses_dict = dict(zip(bs_courses['course'], bs_courses['weight']))

In [62]:
ms_final_preferences = ms_courses[["TA", "preference_type", "preference", "course", "semester"]]
ms_final_preferences = ms_final_preferences.sort_values(by=["course", "preference_type", "preference"], ascending=[True, False, True])

bs_final_preferences = bs_courses[["TA", "preference_type", "preference", "course", "semester"]]
bs_final_preferences = bs_final_preferences.sort_values(by=["course", "preference_type", "preference"], ascending=[True, True, True])

In [63]:
print(len(ms_final_preferences.course.unique()))
print(len(bs_final_preferences.course.unique()))

107
46


In [64]:
# Count the number of observations for each course and preference
course_preference_counts = ms_final_preferences.groupby(['course', 'preference']).size().reset_index(name='count')
# Filter out the courses with more than one observation for preference == 1
filtered_courses = course_preference_counts.loc[(course_preference_counts['preference'] == 1) & (course_preference_counts['count'] == 1), 'course']
# Filter the original DataFrame based on the filtered courses
ms_final_preferences_filtered = ms_final_preferences[ms_final_preferences['course'].isin(filtered_courses)]

# Print the filtered DataFrame
print(len(ms_final_preferences_filtered.course.unique()))

36


In [65]:
# Count the number of observations for each course and preference
course_preference_counts = bs_final_preferences.groupby(['course', 'preference']).size().reset_index(name='count')
# Filter out the courses with more than one observation for preference == 1
filtered_courses = course_preference_counts.loc[(course_preference_counts['preference'] == 1) & (course_preference_counts['count'] == 1), 'course']
# Filter the original DataFrame based on the filtered courses
bs_final_preferences_filtered = bs_final_preferences[bs_final_preferences['course'].isin(filtered_courses)]

# Print the filtered DataFrame
print(len(bs_final_preferences_filtered.course.unique()))

16


In [66]:
translation_mapping

{"Masters' Courses": 2, "Bachelors' Courses": 0, 'Indifferent': 1, NaT: 1}

In [67]:
ms_final_preferences_filtered.head()

Unnamed: 0,TA,preference_type,preference,course,semester
278,priya.krishnan@novasbe.pt,2,1,2193 || Behavioral Economics and Finance || T1...,0
277,iqra.hamid@novasbe.pt,2,2,2193 || Behavioral Economics and Finance || T1...,0
275,luis.morais@novasbe.pt,2,3,2193 || Behavioral Economics and Finance || T1...,0
274,galina.vysotskaya@novasbe.pt,2,4,2193 || Behavioral Economics and Finance || T1...,0
279,joao.pato@novasbe.pt,2,4,2193 || Behavioral Economics and Finance || T1...,0


In [68]:
ms_final_preferences_filtered[ms_final_preferences_filtered.preference_type==0]

Unnamed: 0,TA,preference_type,preference,course,semester
281,joao.quelhas@novasbe.pt,0,1,2194 || History of Economic Analysis || T2 || EN,0
280,pedro.coelho@novasbe.pt,0,2,2194 || History of Economic Analysis || T2 || EN,0
283,diogo.guerreiro@novasbe.pt,0,2,2194 || History of Economic Analysis || T2 || EN,0
285,diogo.lima@novasbe.pt,0,2,2194 || History of Economic Analysis || T2 || EN,0
298,ana.lourenco@novasbe.pt,0,4,2222 || Financial Statement Analysis || T1 || EN,0
309,ana.lourenco@novasbe.pt,0,1,2232 || Applied Corporate Finance || S1 || EN,1
390,ana.marques@novasbe.pt,0,2,2375 || Corporate Social Responsibility || T1 ...,0
437,paulo.rocha@novasbe.pt,0,5,2434 || Statistics II || T1 || EN,0
458,jose.ferrao@novasbe.pt,0,4,2440 || Big Data Analysis || T2 || EN,0
522,joao.almeida@novasbe.pt,0,1,2490 || Geoeconomics and International Relatio...,0


In [69]:
# ta_allocations = []

# for _, row in final_preferences_filtered.iterrows():
#     ta = row['TA']
#     pref_type = row['preference_type']
#     course = row['course']
#     ta_capacity = ta_dict[ta]
#     course_weight = courses_dict[course]
    
#     # Give priority to TAs who want MS courses
#     if pref_type == 1 or pref_type == 2:
#         # Check if course can be allocated
#         if course_weight > 0:
#             # Check if TA still has capacity
#             if  ta_capacity > 0:
#                 allocated_weight = min(course_weight, ta_capacity)
#                 # Allocate course to TA
#                 ta_allocations.append((ta, course, allocated_weight))
#                 ta_dict[ta] -= allocated_weight
#                 courses_dict[course] -= allocated_weight
#             else:
#                 try:
#                     final_preferences_filtered = final_preferences_filtered[final_preferences_filtered['ta'] != ta]
#                 except:
#                     continue
#         else:
#             final_preferences_filtered = final_preferences_filtered[final_preferences_filtered['course'] != course]

NameError: name 'final_preferences_filtered' is not defined

In [None]:
# ta_allocations = []

# for _, row in final_preferences_filtered.iterrows():
#     ta = row['TA']
#     pref_type = row['preference_type']
#     course = row['course']
#     student = row['master_student']
#     ms_course = row['masters_course']

#     ta_capacity = ta_dict[ta]
#     course_weight = courses_dict[course]

#     # MS courses
#     if ms_course == 1 and student == 0:
#         # Give priority to TAs who want MS courses
#         if pref_type == 1 or pref_type == 2:
#             # Check if course can be allocated
#             if course_weight > 0:
#                 # Check if TA still has capacity
#                 if  ta_capacity > 0:
#                     allocated_weight = min(course_weight, ta_capacity)
#                     # Allocate course to TA
#                     ta_allocations.append((ta, course, allocated_weight))
#                     ta_dict[ta] -= allocated_weight
#                     courses_dict[course] -= allocated_weight
#                 else:
#                     try:
#                         final_preferences_filtered = final_preferences_filtered[final_preferences_filtered['ta'] != ta]
#                     except:
#                         continue
#             else:
#                 final_preferences_filtered = final_preferences_filtered[final_preferences_filtered['course'] != course]
#         else:
#             continue
    
#     elif ms_course == 0:
#         # Give priority to TAs who want BS courses
#         if pref_type == 0:
#             # Check if course can be allocated
#             if course_weight > 0:
#                 # Check if TA still has capacity
#                 if  ta_capacity > 0:
#                     allocated_weight = min(course_weight, ta_capacity)
#                     # Allocate course to TA
#                     ta_allocations.append((ta, course, allocated_weight))
#                     ta_dict[ta] -= allocated_weight
#                     courses_dict[course] -= allocated_weight
#                 else:
#                     try:
#                         final_preferences_filtered = final_preferences_filtered[final_preferences_filtered['ta'] != ta]
#                     except:
#                         continue
#             else:
#                 final_preferences_filtered = final_preferences_filtered[final_preferences_filtered['course'] != course]
#         else:
#             continue
#     else:
#         if course_weight > 0:
#             # Check if TA still has capacity
#             if  ta_capacity > 0:
#                 allocated_weight = min(course_weight, ta_capacity)
#                 # Allocate course to TA
#                 ta_allocations.append((ta, course, allocated_weight))
#                 ta_dict[ta] -= allocated_weight
#                 courses_dict[course] -= allocated_weight
#             else:
#                 try:
#                     final_preferences_filtered = final_preferences_filtered[final_preferences_filtered['ta'] != ta]
#                 except:
#                     continue
#         else:
#             final_preferences_filtered = final_preferences_filtered[final_preferences_filtered['course'] != course]            

In [None]:
len(ta_allocations)

24

In [None]:
ta_allocations

[('isabel.silva.carvalho@novasbe.pt',
  '2463 || Advanced Topics in Human Resources Management || T2 || EN',
  0.2921006944444445),
 ('catarina.leitao@novasbe.pt',
  '2473 || Negotiation || T1 || EN',
  0.13020833333333334),
 ('carlos.marques@novasbe.pt',
  '2481 || Product Design and Development || T1 || EN',
  0.2170138888888889),
 ('joao.costa@novasbe.pt',
  '2490 || Geoeconomics and International Relations || T1 || EN',
  0.2387152777777778),
 ('renato.goncalves@novasbe.pt',
  '2491 || Data Visualization || T2 || EN',
  0.1),
 ('luis.morais@novasbe.pt',
  '2491 || Data Visualization || T2 || EN',
  0.16041666666666668),
 ('maria.m.carvalho@novasbe.pt',
  '2496 || Strategic Foresight and Scenario Planning || T1 || EN',
  0.1953125),
 ('carlota.lagoa@novasbe.pt',
  '2500 || Performance and Progress || T2 || EN',
  0.125),
 ('radek.oros@novasbe.pt',
  '2585 || Evidence-Based Practices for Wellbeing || S1 || EN',
  0.18203125),
 ('paulo.ruiz@novasbe.pt', '2597 || Advanced Data Analysis

In [None]:
# ms_courses_dict_filtered = {k: v for k, v in ms_courses_dict.items() if v == 0}
# len(ms_courses_dict_filtered)

In [None]:
# ms_courses_dict_filtered = {k: v for k, v in ms_ta_dict.items() if v < 5}
# len(ms_courses_dict_filtered)
# ms_courses_dict_filtered