In [2]:
import pandas as pd
import numpy as np
import datetime as dt
import os

'''
1. Important (quality of schedule significantly influence my happiness, but will not leave over it)
2. Never have to teach in the mornings before 10 am.
3. Never have to teach in the evenings after 6 pm.
4. Schedule all teaching during either M/W only or T/H only.
'''

'''
Simulate professor preferences and total time per week to be able to teach
Preference for time of day
Total number of hours able to teach

Assume professors fill out a survey indicating:
- Courses able to teach
- Preference score of 0, 1, 2 for time of day for Morning, Afternoon, Evening (Block out times from 12:00 to 12:30 for lunch)
- Total number of available hours for teaching per week (could be based on Employment Status and Role)
- Maximum number of hours a professor can teach is 35 hours a week
- Order of most hours is:
    - Full Time Clinical
    - Full Time 
- Preference for MW, TH as 0, 1, 2

Assumptions for data as given:
Classes offered for the term and number of sections
Class length for given section is fixed
Professor provides total number of hours can teach
Professor provides classes able to teach
'''

'\nSimulate professor preferences and total time per week to be able to teach\nPreference for time of day\nTotal number of hours able to teach\n\nAssume professors fill out a survey indicating:\n- Courses able to teach\n- Preference score of 0, 1, 2 for time of day for Morning, Afternoon, Evening (Block out times from 12:00 to 12:30 for lunch)\n- Total number of available hours for teaching per week (could be based on Employment Status and Role)\n- Maximum number of hours a professor can teach is 35 hours a week\n- Order of most hours is:\n    - Full Time Clinical\n    - Full Time \n- Preference for MW, TH as 0, 1, 2\n\nAssumptions for data as given:\nClasses offered for the term and number of sections\nClass length for given section is fixed\nProfessor provides total number of hours can teach\nProfessor provides classes able to teach\n'

In [3]:
enrolls = pd.read_excel(os.getcwd() + '\Marshall_Course_Enrollment_1516_1617.xlsx')[['Course','Section','Course Prefix','Course Suffix','Department',
                                                                                     'First Instructor','First Instructor UID','First Begin Time',
                                                                                     'First Days','First End Time','Term','Max Units', 'First Room']]

prof_info = pd.read_excel(os.getcwd() + '\prof_info.xlsx')
prof_info.columns = ['Name', 'Department', 'Room', 'Building', 'Employment Status', 'Role',
                     'Promotion', 'First_Name', 'Last_Name', 'First Instructor UID']

prof_match = enrolls[enrolls['First Instructor UID'].isnull() == False].merge(prof_info, on = 'First Instructor UID', how = 'inner').reset_index()
prof_match = prof_match[['Course', 'Name', 'First Instructor UID', 'First Begin Time', 'First Days',
            'First End Time', 'Employment Status', 'Role', 'Promotion','Section','Term', 'Max Units', 'First Room']]

prof_match = prof_match[prof_match['First Begin Time'].isnull() == False]
prof_match = prof_match[prof_match['First Begin Time'] != 'TBA'].reset_index()[prof_match.columns]
prof_match['Class Length'] = np.nan
for idx, row in prof_match.iterrows():
    beg_hour = row['First Begin Time'].hour
    beg_min = row['First Begin Time'].minute
    end_hour = row['First End Time'].hour
    end_min = row['First End Time'].minute
    prof_match.loc[idx,'Class Length'] = np.round(2*((end_hour - beg_hour) + (end_min - beg_min)/60))/2

prof_pref = pd.DataFrame(prof_match.groupby(['First Instructor UID',
                                             'Term'])['Class Length'].sum()).reset_index().sort_values('Class Length',
                                                                                                       ascending = False)
prof_pref['Available Courses'] = np.nan
for prof in prof_match['First Instructor UID'].drop_duplicates():
    for idx in prof_pref[prof_pref['First Instructor UID'] == prof].index:
        prof_pref.loc[idx,'Available Courses'] = str(prof_match[prof_match['First Instructor UID'] == prof]['Course'].drop_duplicates().tolist()).replace('[','').replace(']','')
        
prof_pref = prof_pref.merge(prof_match[['First Instructor UID','Employment Status','Role']],
                            on = 'First Instructor UID',
                            how = 'inner').drop_duplicates()

In [324]:
prof_match

Unnamed: 0,Course,Name,First Instructor UID,First Begin Time,First Days,First End Time,Employment Status,Role,Promotion,Section,Term,Max Units,First Room,Class Length
0,ACCT-370,Merle Hopkins,3.78335e+09,10:00:00,F,11:50:00,Full-Time,Clinical,Full,14029,20153,0.0,SLH200,2.0
1,ACCT-370,Merle Hopkins,3.78335e+09,08:00:00,MW,09:50:00,Full-Time,Clinical,Full,14025,20153,4.0,ACC303,2.0
2,ACCT-370,Merle Hopkins,3.78335e+09,10:00:00,MW,11:50:00,Full-Time,Clinical,Full,14026,20153,4.0,ACC303,2.0
3,ACCT-370,Merle Hopkins,3.78335e+09,12:00:00,MW,13:50:00,Full-Time,Clinical,Full,14027,20153,4.0,ACC303,2.0
4,BUAD-285B,Merle Hopkins,3.78335e+09,12:00:00,TH,13:50:00,Full-Time,Clinical,Full,14522,20153,2.0,ACC201,2.0
5,GSBA-520,Merle Hopkins,3.78335e+09,18:30:00,T,21:30:00,Full-Time,Clinical,Full,15650,20153,3.0,ACC303,3.0
6,ACCT-370,Merle Hopkins,3.78335e+09,10:00:00,F,11:50:00,Full-Time,Clinical,Full,14029,20161,0.0,HOH EDI,2.0
7,ACCT-370,Merle Hopkins,3.78335e+09,10:00:00,MW,11:50:00,Full-Time,Clinical,Full,14028,20161,4.0,ACC310,2.0
8,ACCT-370,Merle Hopkins,3.78335e+09,12:00:00,MW,13:50:00,Full-Time,Clinical,Full,14025,20161,4.0,ACC310,2.0
9,ACCT-415,Merle Hopkins,3.78335e+09,08:00:00,MW,09:50:00,Full-Time,Clinical,Full,14100,20161,4.0,ACC205,2.0


In [329]:
prof_pref['Morning Rank'] = np.random.choice(3, p = [0.5,0.3,0.2],size = len(prof_pref))
prof_pref['Afternoon Rank'] = np.random.choice(3, p = [0.2,0.2,0.6],size = len(prof_pref))
prof_pref['Afternoon Rank'] = (prof_pref['Afternoon Rank'] != prof_pref['Morning Rank'])*prof_pref['Afternoon Rank']
prof_pref['Afternoon Rank'] = (prof_pref['Afternoon Rank'] == prof_pref['Morning Rank']) + prof_pref['Afternoon Rank']
prof_pref['Evening Rank'] = 3 - prof_pref['Afternoon Rank'] - prof_pref['Morning Rank']

prof_pref.reset_index()[prof_pref.columns].to_csv('Simulated Prof Preferences.csv', index = False)

In [323]:
print(prof_pref.groupby(['Morning Rank'])['Role'].count())
print(prof_pref.groupby(['Afternoon Rank'])['Role'].count())
print(prof_pref.groupby(['Evening Rank'])['Role'].count())

Morning Rank
0    385
1    228
2    146
Name: Role, dtype: int64
Afternoon Rank
0    204
1    187
2    368
Name: Role, dtype: int64
Evening Rank
0    170
1    344
2    245
Name: Role, dtype: int64
