**Another question: what happens when we make sure that there is at least 30 mins between each class?**

Pull CSV I scraped from the UNC-CH course registrar pdf.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

fall19_courses = pd.read_csv("UNCfall19courses.csv")

df = fall19_courses["Time:"].str.split(r'[:,-]', expand=True)
df_start = (df[0] + df[1])
df_end =  (df[2] + df[3])


fall19_courses["start_time"] = df_start
fall19_courses["end_time"] = df_end

fall19_courses = fall19_courses.dropna(subset=['start_time'])
fall19_courses = fall19_courses.dropna(subset=['end_time'])

# working with datetime
fall19_courses["start_time"] = pd.to_datetime(fall19_courses["start_time"].astype(int), format="%H%M")
fall19_courses["end_time"] = pd.to_datetime(fall19_courses["end_time"].astype(int), format="%H%M")

In [2]:
fall19_courses = fall19_courses[fall19_courses['Bldg:']!='TBA']
fall19_courses = fall19_courses[fall19_courses['Days:']!='TBA']

# sort and reset index
# fall19_courses = fall19_courses.groupby(['Bldg:', 'Room:','Days:','start_time','end_time'])
# fall19_courses.head()
# print(fall19_courses.count())
# fall19_courses = fall19_courses.groupby(['Bldg:']).size().reset_index(name='Count')

# add index for each class
fall19_courses = fall19_courses.reset_index().reset_index()
fall19_courses = fall19_courses.drop(['level_0','level_1'], 1)
fall19_courses.head()

Unnamed: 0,index,Bldg:,Room:,Days:,Time:,Class Enrl Tot:,start_time,end_time
0,0,Gardner,308,TuTh,12:30 - 13:45,43,1900-01-01 12:30:00,1900-01-01 13:45:00
1,1,Phillips,247,TuTh,09:30 - 10:45,42,1900-01-01 09:30:00,1900-01-01 10:45:00
2,2,Peabody,311,TuTh,14:00 - 15:15,43,1900-01-01 14:00:00,1900-01-01 15:15:00
3,3,Peabody,306,TuTh,14:00 - 15:15,36,1900-01-01 14:00:00,1900-01-01 15:15:00
4,4,Carolina Hall,220,MWF,08:00 - 08:50,32,1900-01-01 08:00:00,1900-01-01 08:50:00


**Optional: remove classes with over 50 or over 100 students.**

In [3]:
# fall19_courses = fall19_courses[fall19_courses['Class Enrl Tot:'] < 100]
# fall19_courses = fall19_courses[fall19_courses['Class Enrl Tot:'] < 50]
print(f'Courses with more than 100 students: {fall19_courses[fall19_courses["Class Enrl Tot:"] > 99].count()[0]}')
print(f'Courses with more than 50 students: {fall19_courses[fall19_courses["Class Enrl Tot:"] > 49].count()[0]}')


Courses with more than 100 students: 178
Courses with more than 50 students: 370


**Organize by day of week into separate day names.**

(Note: I could include a groupby to ignore cross listed courses so we avoid double counting these courses. However, this would also remove labs that are listed in the same classroom but actually take place in a laboratory building.)

In [4]:
monday_courses = fall19_courses[(fall19_courses['Days:'].str.contains('M'))
                               ].sort_values(by=['Bldg:', 'Room:', 'start_time'])

tuesday_courses = fall19_courses[(fall19_courses['Days:'].str.contains('Tu'))
                               ].sort_values(by=['Bldg:', 'Room:', 'start_time'])

wednesday_courses = fall19_courses[(fall19_courses['Days:'].str.contains('W'))
                               ].sort_values(by=['Bldg:', 'Room:', 'start_time'])

thursday_courses = fall19_courses[(fall19_courses['Days:'].str.contains('Th'))
                               ].sort_values(by=['Bldg:', 'Room:', 'start_time'])

friday_courses = fall19_courses[(fall19_courses['Days:'].str.contains('F'))
                               ].sort_values(by=['Bldg:', 'Room:', 'start_time'])


# optional: save to csv
# standard_day_list = [monday_courses, tuesday_courses, wednesday_courses, thursday_courses, friday_courses]
# days = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday']
# for index, course in enumerate(standard_day_list):
#     course.to_csv(f'standard_{days[index]}_yes50.csv')

**Now, I add 30 minutes between each class.**

I consider each classroom, and check whether each class taking place is spaced 30 minutes from the one before it. If it is not (the diff is less than 30) I reassign the start and end times for the class, and then consider the next class.


In [5]:
from datetime import datetime, timedelta

day_list = [monday_courses, tuesday_courses, wednesday_courses, thursday_courses, friday_courses]
i = 0
for df in day_list:
    # initialize old bldg, room, times
    old_bldg = df.iloc[0]['Bldg:'] 
    old_room = df.iloc[0]['Room:'] 
    classbefore_starttime = df.iloc[0]['start_time']
    classbefore_endtime = df.iloc[0]['end_time']
    
    # start iterating at second row
    for index, row in df.iloc[1:].iterrows():
        new_bldg = row['Bldg:']
        new_room = row['Room:']
        class_starttime = row['start_time']
        class_endtime = row['end_time']
        if new_room == old_room and new_bldg == old_bldg:
            # find time diff between classes (can be negative!)
            diff = (class_starttime-classbefore_endtime).total_seconds()/60
            if diff < 30:
                # amount of time to add
                time_add = timedelta(minutes = int(30 - int(diff)))
                # add time
                df.at[index,'start_time'] =  row['start_time'] + time_add
                df.at[index,'end_time']   =  row['end_time'] + time_add

                classbefore_starttime = class_starttime + time_add
                classbefore_endtime = class_endtime + time_add

            else:
                classbefore_starttime = class_starttime 
                classbefore_endtime = class_endtime
                
        # in a new room or building
        else:
            classbefore_starttime = class_starttime 
            classbefore_endtime = class_endtime
                
        old_bldg = new_bldg
        old_room = new_room

In [6]:
monday_courses.head()

Unnamed: 0,index,Bldg:,Room:,Days:,Time:,Class Enrl Tot:,start_time,end_time
2928,2928,Alumni,203,MWF,09:05 - 09:55,18,1900-01-01 09:05:00,1900-01-01 09:55:00
99,99,Alumni,203,MWF,10:10 - 11:00,22,1900-01-01 10:25:00,1900-01-01 11:15:00
2890,2890,Alumni,203,MWF,11:15 - 12:05,15,1900-01-01 11:45:00,1900-01-01 12:35:00
131,131,Alumni,203,M,12:20 - 13:10,20,1900-01-01 13:05:00,1900-01-01 13:55:00
228,228,Alumni,203,M,13:25 - 15:55,8,1900-01-01 14:25:00,1900-01-01 16:55:00


Save as CSV file

In [7]:
# days = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday']
# for index, course in enumerate(day_list):
#     course.to_csv(f'spread_{days[index]}_yes50.csv')