#### Data Analysis with Pandas

In [6]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [7]:
# read all 5 pages of the schedule into a single pandas dataframe
# figure out which are grad which are undergrad
# display the undergrad 8am classes
# display the undergrad friday classes


In [12]:
# we will get back a list and inside the list there is a df
website = 'https://ischool.syr.edu/classes/'
datalist = pd.read_html(website)
page_1 = datalist[0]

In [26]:
# let's generate links to the other pages
website = 'https://ischool.syr.edu/classes/?page='
for i in range(1,6):
    link = website + str(i)
    print(link)

https://ischool.syr.edu/classes/?page=1
https://ischool.syr.edu/classes/?page=2
https://ischool.syr.edu/classes/?page=3
https://ischool.syr.edu/classes/?page=4
https://ischool.syr.edu/classes/?page=5


In [30]:
# let's read them all and append them to a single data frame

website = 'https://ischool.syr.edu/classes/?page='
classes = pd.DataFrame() #  (columns = ['Course','Section','ClassNo','Credits','Title','Instructor','Time','Days','Room'])

for i in range(1,6):
    link = website + str(i)
    data = pd.read_html(website + str(i))
    classes = classes.append(data[0], ignore_index = True)
classes.sample(5)

# Set the columns. If we want to set the name column at ones
# classes.columns = ['Course','Section','ClassNo','Credits','Title','Instructor','Time','Days','Room']

Unnamed: 0,Course,Section,Class,Credits,Title,Instructor(s),Time,Day,Room(s)
39,IST300,M002,37853,3.0,Information Security Policy,James Enwright,6:30pm - 7:50pm,TuTh,Hall of Languages 114
28,IST233,M006,37666,3.0,LAB: Intro to Computer Networking,S Bruce Boardman,1:50pm - 2:45pm,F,Hinds Hall 027
10,IDS660,M003,37759,3.0,Spring Break in Silicon Valley,John DuRoss Liddy,12:00am - 12:00am,,
172,IST664,M001,37701,3.0,Natural Language Processing,Lu Xiao,2:00pm - 4:45pm,Th,Hinds Hall 010
198,IST707,M405,37909,3.0,Data Analytics,Jeremy P Bolton,12:00am - 4:00pm,Su,Online Online


In [29]:
classes.shape

(244, 9)

In [127]:
# undergrad classes are 0-499, grad classes are 500 and up but we don't have 
# course numbers!!!! So we must engineer them.


classes['Course_num'] = classes.Course.str[3:]

# we will create a new column for the type of the course
# if the course num is below 500 is undergrand otherwise is grand course
classes['Type'] = 'none'
classes = classes[['Course','Course_num','Type','Section','ClassNo','Credits','Title','Instructor','Time','Days','Room']]


classes['Course_num'] = classes.Course.str[3:]
classes['Type'] = 'none'
classes['Type'][classes['Course_num'] < '500'] = 'Undergrad'
classes['Type'][classes['Course_num'] >= '500'] = 'Grand'

classes.sample(5)

Unnamed: 0,Course,Course_num,Type,Section,ClassNo,Credits,Title,Instructor,Time,Days,Room
232,IST810,810,Grand,M002,37710,2.0,Practicum in Research,Steven B Sawyer,9:30am - 11:25am,Tu,Hinds Hall 120
116,IST605,605,Grand,M405,37788,3.0,Reference& Info Literacy Svces,Amanda Bucher Albert,12:00am - 8:30pm,W,Online Online
199,IST707,707,Grand,M406,37910,3.0,Data Analytics,Ying Lin,12:00am - 10:30pm,W,Online Online
64,IST349,349,Undergrad,M001,37634,3.0,Human Comp. Interaction,Natalie C. LoRusso,12:30pm - 1:50pm,TuTh,Hinds Hall 111
89,IST455,455,Undergrad,M001,42590,3.0,Enterprise IT Consultation,Frank Jr Marullo,3:30pm - 4:50pm,TuTh,Hinds Hall 011


In [128]:
# let's find the number of grad / undergrad courses
classes.Type.value_counts()

Grand        146
Undergrad     98
Name: Type, dtype: int64

In [140]:
# how many undergrad classes on a Friday?
fridays = classes[(classes.Days.str.find('F')>=0) & (classes.Type == 'Undergrad')]
fridays.sample(5)

Unnamed: 0,Course,Course_num,Type,Section,ClassNo,Credits,Title,Instructor,Time,Days,Room
15,IST195,195,Undergrad,M004,37659,3.0,LAB: Information Technologies,Jeff Rubin,10:35am - 11:30am,F,Hinds Hall 010
6,IDS403,403,Undergrad,M001,37649,1.0,Startup Sandbox,John DuRoss Liddy,2:15pm - 5:05pm,F,Syracuse Technology Garden
7,IDS460,460,Undergrad,M002,37755,3.0,Entretech - NYC,John DuRoss Liddy,12:00am - 12:00am,MTuWThF,Courtyard Marriott Man Midtwn Online Online
18,IST195,195,Undergrad,M007,37662,3.0,LAB: Information Technologies,Jeff Rubin,1:50pm - 2:45pm,F,Hinds Hall 010
16,IST195,195,Undergrad,M005,37660,3.0,LAB: Information Technologies,Jeff Rubin,11:40am - 12:35pm,F,Hinds Hall 010


In [144]:
# how many undergrad classes on a Friday?
friday_no_lab = fridays[~fridays.Title.str.startswith('LAB:')]
friday_no_lab

Unnamed: 0,Course,Course_num,Type,Section,ClassNo,Credits,Title,Instructor,Time,Days,Room
6,IDS403,403,Undergrad,M001,37649,1.0,Startup Sandbox,John DuRoss Liddy,2:15pm - 5:05pm,F,Syracuse Technology Garden
7,IDS460,460,Undergrad,M002,37755,3.0,Entretech - NYC,John DuRoss Liddy,12:00am - 12:00am,MTuWThF,Courtyard Marriott Man Midtwn Online Online
8,IDS460,460,Undergrad,M003,37756,3.0,Spring Break in Silicon Valley,John DuRoss Liddy,12:00am - 12:00am,MTuWThFSu,"Online Online Residence Inn by Marr, San Mat"
48,IST337,337,Undergrad,M001,37848,1.0,IM&T Support Practicum,Jeffrey Fouts,2:15pm - 5:00pm,F,Hinds Hall 117
99,IST486,486,Undergrad,M002,42625,3.0,Social Media in the Organiz.,Kristy Lee Hochenberger,9:30am - 10:25am,MWF,Hinds Hall 021


In [145]:
# 8AM classes
eight_am = classes[ classes['Time'].str.startswith('8:00am')]
eight_am

Unnamed: 0,Course,Course_num,Type,Section,ClassNo,Credits,Title,Instructor,Time,Days,Room
68,IST352,352,Undergrad,M006,37688,3.0,Info Analysis of Org. Systems,Alexander Corsello,8:00am - 9:20am,TuTh,Hinds Hall 117
91,IST466,466,Undergrad,M001,37621,3.0,Prof Issues/Info Mgmt & Tech,Bruce Kingma,8:00am - 9:20am,MW,Hinds Hall 243
