### This notebook will create a spreadsheet for course-semesters

Course semesters have the following form:

| code-semster   | code    | semester | name               | prof        |redundant_codes   | prereqs   | student_ids  |
|----------------|---------|----------|--------------------|-------------|------------------|-----------|--------------|
| ENGR100 0304FA | ENGR100 | 0304FA   | Engineering Basics | Paul Ruvalo | [FND100, FND103] | [ENGR101] | [15, 43, 32] |

In [1]:
import pandas as pd

data = pd.read_csv('../data/olincourseenrollment0214.csv')
data.head()

Unnamed: 0,status,grad_class,id,semester,sex,grade,major_main,major_sub,code,Unnamed: 9,course_main,course_sub,professor,term,year,course_fullname,major_fullname,code-semester
0,GRAD,2007,602,0304FA,M,FF,Undeclared,,AHS1101,2,History and Society,Bodies in Motion: Migration...,"Fleischmann, Fritz",FA,3,History and Society Bodies in Motion: Migratio...,Undeclared,AHS1101 0304FA
1,GRAD,2007,602,0304FA,M,FF,Undeclared,,FND1210,1,Physical Foundations ofEngineering I,,"Zastavker, Yevgeniya",FA,3,Physical Foundations ofEngineering I,Undeclared,FND1210 0304FA
2,GRAD,2007,602,0304FA,M,FF,Undeclared,,FND1312,1,Mathematical Foundations ofEngineering I: Calc...,,"Tilley, Burt S.",FA,3,Mathematical Foundations ofEngineering I: Calc...,Undeclared,FND1312 0304FA
3,GRAD,2007,602,0304FA,M,FF,Undeclared,,FND1410,1,Foundations of EngineeringProject I,Mechanical Nature,"Linder, Benjamin",FA,3,Foundations of EngineeringProject I Mechanical...,Undeclared,FND1410 0304FA
4,GRAD,2007,602,0304FA,M,FF,Undeclared,,FND1510,3,Intro to Modeling & Control,,"Pratt, Gill",FA,3,Intro to Modeling & Control,Undeclared,FND1510 0304FA


In [2]:
data['code-semester'][0]

'AHS1101 0304FA'

In [3]:
grouped = data.groupby('code-semester')

In [4]:
from collections import OrderedDict
course_semesters = OrderedDict()

columns =  ['code-semester', 
            'code', 
            'semester', 
            'name', 
            'prof',
            'redundant_codes',
            'prereqs',
            'student_ids']

for column in columns:
    course_semesters[column] = []

for group in grouped:
    code_semester = group[0]
    registrations = group[1]
    
    first_registration = registrations.iloc[0]
    code = first_registration.code
    semester = first_registration.semester
    name = first_registration.course_fullname
    prof = first_registration.professor
    redundant_codes = [] # TODO: figure out what the redundant codes are
    prereqs = [] # TODO: figure out what the prereqs
    student_ids = registrations.id.unique().tolist()
    
    course_semesters['code-semester'].append(code_semester)
    course_semesters['code'].append(code)
    course_semesters['semester'].append(semester)
    course_semesters['name'].append(name)
    course_semesters['prof'].append(prof)
    course_semesters['redundant_codes'].append(redundant_codes)
    course_semesters['prereqs'].append(prereqs)
    course_semesters['student_ids'].append(student_ids)
    
course_semesters_df = pd.DataFrame(course_semesters)

In [5]:
course_semesters_df.head(10)

Unnamed: 0,code-semester,code,semester,name,prof,redundant_codes,prereqs,student_ids
0,AHS CAP SPR 0607FA,AHS CAP SPR,0607FA,AHS CapstoneSpring Pre-registration,"Martello, Robert",[],[],"[123, 663, 393, 227, 160, 551, 320, 684, 661, ..."
1,AHS CAP SPR 0708FA,AHS CAP SPR,0708FA,AHS CapstoneSpring Pre-registration,"Lynch, Caitrin",[],[],"[535, 709, 906, 131, 877]"
2,AHS1101 0203FA,AHS1101,0203FA,History and Society Bodies in Motion: Migratio...,"Fleischmann, Fritz",[],[],"[625, 899, 198, 945, 148, 589, 395, 477, 158, ..."
3,AHS1101 0304FA,AHS1101,0304FA,History and Society Bodies in Motion: Migratio...,"Fleischmann, Fritz",[],[],"[602, 393, 631, 779, 227, 127, 868, 160, 19, 6..."
4,AHS1102 0203FA,AHS1102,0203FA,Arts and Humanities The Quest,"Tropp, Martin",[],[],"[458, 291, 253, 207, 455, 698, 72, 849, 237, 8..."
5,AHS1102 0304FA,AHS1102,0304FA,Arts and Humanities,"Tropp, Martin",[],[],"[856, 861, 123, 663, 469, 724, 551, 320, 684, ..."
6,AHS1105 0203SP,AHS1105,0203SP,"Arts, Humanities, Social ScienceFoundation wit...","Kelly, Kathleen",[],[],"[305, 554, 262, 803, 3, 409, 268, 494, 523, 43..."
7,AHS1110 0203FA,AHS1110,0203FA,"History of TechnologyFoundation: Technology, S...","Martello, Robert",[],[],"[537, 367, 744, 468, 388, 756, 618, 644, 542, ..."
8,AHS1110 0304SP,AHS1110,0304SP,"History of TechnologyFoundation: Technology, S...","Martello, Robert",[],[],"[663, 393, 236, 857, 669, 320, 615, 539, 950, ..."
9,AHS1111 0304SP,AHS1111,0304SP,Responsive Drawing and VisualThinking,"Donis-Keller, Helen",[],[],"[393, 236, 631, 148, 695, 684, 242, 175, 897, ..."


In [6]:
course_semesters_df.to_csv('../data/course-semesters.csv', index=False)