In [49]:
import pandas as pd 
import numpy as np
import sqlite3
conn = sqlite3.connect('CourseData2.db')
c = conn.cursor()

In [50]:
%load_ext sql

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [51]:
%%sql
sqlite:///CourseData2.db

'Connected: @CourseData2.db'

In [52]:
%%sql
--Dropping tables for when we need to restart and clear CourseData database-- 
--Drop ERD tables--
DROP TABLE IF EXISTS SECTION;
DROP TABLE IF EXISTS CATALOG;
DROP TABLE IF EXISTS INSTRUCTOR;
DROP TABLE IF EXISTS COURSE_MEETING;

--Drop import tables--
DROP TABLE IF EXISTS import_course;
DROP TABLE IF EXISTS import_course_meeting;
DROP TABLE IF EXISTS import_course_catalog;

 * sqlite:///CourseData2.db
Done.
Done.
Done.
Done.
Done.
Done.
Done.


[]

## Create Tables (Import and ERD) ##

In [53]:
%%sql
--Creating import_catalog table-- 

CREATE TABLE 'import_course_catalog' (
    program_code TEXT NOT NULL,
    program_name VARCHAR NOT NULL,
    catalog_id VARCHAR NOT NULL,
    course_title VARCHAR NOT NULL, 
    credits VARCHAR NOT NULL,
    prereqs VARCHAR, 
    coreqs VARCHAR,
    fees VARCHAR,
    attributes VARCHAR,
    description VARCHAR 
);

 * sqlite:///CourseData2.db
Done.


[]

In [54]:
%%sql
--Creating import_course_meeting table-- 

CREATE TABLE 'import_course_meeting' (
    term VARCHAR NOT NULL,
    crn INTEGER NOT NULL,
    location VARCHAR NOT NULL,
    day TEXT NOT NULL,
    start VARCHAR NOT NULL,
    end VARCHAR NOT NULL 
);

 * sqlite:///CourseData2.db
Done.


[]

In [55]:
%%sql
--Creating import_course table-- 

CREATE TABLE 'import_course' (
    term VARCHAR NOT NULL,
    crn INTEGER NOT NULL,
    catalog_id VARCHAR NOT NULL,
    section VARCHAR NOT NULL,
    credits VARCHAR,
    title VARCHAR, 
    meetings VARCHAR,
    timecodes VARCHAR, 
    primary_instructor TEXT,
    cap INTEGER, 
    act INTEGER, 
    rem INTEGER 
);

 * sqlite:///CourseData2.db
Done.


[]

In [56]:
%%sql
-- Creating table INSTRUCTOR with surrogate primary key-- 

CREATE TABLE 'INSTRUCTOR' (
    InID INTEGER NOT NULL PRIMARY KEY,
    Name TEXT NOT NULL
);

--Creating table CATALOG with CatID as the primary key-- 

CREATE TABLE 'CATALOG'(
    CatalogClassifier INTEGER NOT NULL PRIMARY KEY,
    CatID VARCHAR,
    Title VARCHAR NOT NULL,
    Description TEXT,
    Credits TEXT NOT NULL,
    PName TEXT NOT NULL,
    PCode TEXT,
    PREREQS TEXT,
    COREQS TEXT,
    FEES INTEGER,
    Attribute TEXT
    
);

--Creating table SECTION with surrogate primary key-- 

CREATE TABLE 'SECTION'(
    SID INTEGER NOT NULL PRIMARY KEY,
    CRN INTEGER NOT NULL,
    Term TEXT NOT NULL,
    Section VARCHAR NOT NULL,
    Cap INTEGER,
    Act INTEGER,
    Rem INTEGER,
    CatID VARCHAR,
    CatalogClassifier INTEGER, 
    InID INTEGER,
    FOREIGN KEY (CatalogClassifier) REFERENCES CATALOG(CatalogClassifier),
    FOREIGN KEY (InID) REFERENCES INSTRUCTOR(InID)
);

--Creating table COURSE_MEETING with CatalogClassifier as the Primary Key-- 

CREATE TABLE 'COURSE_MEETING' (
    MID INTEGER NOT NULL PRIMARY KEY,
    CRN INTEGER NOT NULL,
    Term TEXT NOT NULL,
    Start TEXT NOT NULL,
    Location VARCHAR NOT NULL,
    Day TEXT NOT NULL,
    End TEXT NOT NULL,
    FOREIGN KEY (CRN) REFERENCES SECTION(CRN),
    FOREIGN KEY (Term) REFERENCES SECTION(Term)
);

 * sqlite:///CourseData2.db
Done.
Done.
Done.
Done.


[]

## Populate the Import Tables with the Data from the csv files ##

**Creating dataframes for the import course**

In [57]:
## populating import_course table
fall = ['SourceData/Fall2014/courses.csv','SourceData/Fall2015/courses.csv', 'SourceData/Fall2016/courses.csv',
        'SourceData/Fall2017/courses.csv','SourceData/Fall2018/courses.csv']
dff0 = pd.read_csv(fall[0])
dff1 = pd.read_csv(fall[1])
dff2 = pd.read_csv(fall[2])    
dff3 = pd.read_csv(fall[3])
dff4 = pd.read_csv(fall[4])

In [58]:
df_fall = pd.concat([dff0, dff1, dff2, dff3, dff4], axis=0)
df_fall.sample(5)

Unnamed: 0,term,crn,catalog_id,section,credits,title,meetings,timecodes,primary_instructor,cap,act,rem
1234,Fall2018,75188,PS 0115L,03,1.0,General Physics I Lab,"[{'days': 'W', 'times': '0200pm-0430pm', 'date...",['W 0200pm-0430pm 09/04-12/10 BNW 162'],Bidyut B. Das,24,22,2
32,Fall2016,78767,AC 0343,03,3.0,Federal Income Taxation I,"[{'days': 'MR', 'times': '1100am-1215pm', 'dat...",['MR 1100am-1215pm 09/06-12/12 DSB 105'],Ahmed M. Ebrahim,31,31,0
724,Fall2014,72086,HI 0274,A,3.0,Historical Perspectives on Contemporary Global...,"[{'days': 'TF', 'times': '0930am-1045am', 'dat...",['TF 0930am-1045am 09/02-12/08 CNS 104'],David W. McFadden,20,18,2
189,Fall2015,76955,BB 0115,01,0.0,SHRM Learning Systems,"[{'days': 'T', 'times': '0600pm-0900pm', 'date...",['T 0600pm-0900pm 09/15-12/01 BNW 334'],Raymond T. Lamanna,20,16,4
54,Fall2018,74240,AY 0110,01,3.0,Biological Anthropology,"[{'days': 'W', 'times': '0200pm-0430pm', 'date...",['W 0200pm-0430pm 09/04-12/10 DMH 349'],Blaire Hensley-Marschand,28,28,0


In [59]:
df_fall.describe()

Unnamed: 0,crn,cap,act,rem
count,7486.0,7486.0,7486.0,7486.0
mean,74523.769169,18.39487,16.310446,2.219076
std,2512.545835,14.41373,9.863494,17.175757
min,70001.0,0.0,-1.0,-75.0
25%,72213.0,12.0,8.0,0.0
50%,74848.5,19.0,16.0,1.0
75%,76698.75,25.0,24.0,5.0
max,79215.0,500.0,90.0,999.0


In [60]:
spring = ['SourceData/Spring2015/courses.csv','SourceData/Spring2016/courses.csv', 'SourceData/Spring2017/courses.csv',
        'SourceData/Spring2018/courses.csv','SourceData/Spring2019/courses.csv', 'SourceData/SpringBreak2017/courses.csv']
dfs0 = pd.read_csv(spring[0])
dfs1 = pd.read_csv(spring[1])
dfs2 = pd.read_csv(spring[2])    
dfs3 = pd.read_csv(spring[3])
dfs4 = pd.read_csv(spring[4])
dfs5 = pd.read_csv(spring[5])

In [61]:
df_spring = pd.concat([dfs0,dfs1,dfs2,dfs3,dfs4,dfs5], axis=0)
df_spring.sample(5)

Unnamed: 0,term,crn,catalog_id,section,credits,title,meetings,timecodes,primary_instructor,cap,act,rem
620,Spring2018,34543,HI 0010,10,3.0,Origins of the Modern World Since 1500,"[{'days': 'R', 'times': '0500pm-0730pm', 'date...",['R 0500pm-0730pm 01/16-05/01 CNS 104'],Elizabeth Hohl,25,25,0
446,Spring2017,31004,EN 0012,4,3.0,Texts and Contexts II: Writing About Literature,"[{'days': 'MR', 'times': '0200pm-0315pm', 'dat...",['MR 0200pm-0315pm 01/17-05/02 DMH 331'],Timothy Smith,19,19,0
274,Spring2017,31175,CO 0309,1,3.0,Research Projects in Communication: Capstone,"[{'days': 'MW', 'times': '0630pm-0745pm', 'dat...",['MW 0630pm-0745pm 01/17-05/02 DMH 348'],Colleen E. Arendt,16,14,2
153,Spring2017,31915,BI 0171P,9,0.0,General Biology II PLG,"[{'days': 'R', 'times': '0300pm-0350pm', 'date...",['R 0300pm-0350pm 01/17-05/02 BNW 319'],TBA,10,10,0
861,Spring2018,34506,MA 0300,1,3.0,Topics in Discrete Mathematics,"[{'days': 'TF', 'times': '1230pm-0120pm', 'dat...","['TF 1230pm-0120pm 01/16-05/01 BNW 124', 'W 01...",Janet Striuli,20,4,16


In [62]:
df_spring.describe()

Unnamed: 0,crn,cap,act,rem
count,7122.0,7122.0,7122.0,7122.0
mean,35212.001966,18.123982,15.658804,2.465178
std,2618.670948,10.87664,10.342885,6.4971
min,31001.0,0.0,0.0,-47.0
25%,32773.25,11.0,7.0,0.0
50%,34835.0,19.0,16.0,1.0
75%,37693.75,25.0,24.0,4.0
max,41702.0,100.0,90.0,89.0


In [63]:
summer = ['SourceData/Summer2015/courses.csv', 'SourceData/Summer2016/courses.csv',
        'SourceData/Summer2017/courses.csv','SourceData/Summer2018/courses.csv']
dfsm0 = pd.read_csv(summer[0])
dfsm1 = pd.read_csv(summer[1])
dfsm2 = pd.read_csv(summer[2])    
dfsm3 = pd.read_csv(summer[3])

In [64]:
df_summer = pd.concat([dfsm0,dfsm1,dfsm2,dfsm3], axis=0)
df_summer.sample(5)

Unnamed: 0,term,crn,catalog_id,section,credits,title,meetings,timecodes,primary_instructor,cap,act,rem
247,Summer2018,58397,OM 0400,OL1,3.0,Principles of Business Analytics,[],[],Neal A. Lewis,20,17,3
116,Summer2017,57148,IN 0107,01N,0.0,Color Design,"[{'days': 'TR', 'times': '0930am-1230pm', 'dat...","['TR 0930am-1230pm 05/30-06/08 DLH 107', 'T 09...",Robert A. Hardy,3,3,0
201,Summer2015,55141,NS 0272C,D,0.0,Geriatric Nursing Clinical,"[{'days': 'MTW', 'times': '0700am-0200pm', 'da...",['MTW 0700am-0200pm 07/13-07/22 LCH '],Mary Anne Caserta,8,7,1
120,Summer2018,58441,IS 0550,01,3.0,Business Analytics and Big Data Management,"[{'days': 'MR', 'times': '0600pm-0930pm', 'dat...","['MR 0600pm-0930pm 06/04-06/30 DSB 108', 'S 08...",Jie Tao,20,5,15
72,Summer2015,55009,EC 0011,A,3.0,Introduction to Microeconomics,"[{'days': 'MTWRF', 'times': '0930am-1230pm', '...",['MTWRF 0930am-1230pm 07/06-07/17 DMH 350'],Philip J. Lane,0,15,-15


In [65]:
df_summer.describe()

Unnamed: 0,crn,cap,act,rem
count,1224.0,1224.0,1224.0,1224.0
mean,56704.305556,14.843954,10.913399,3.928922
std,1143.898206,10.771969,9.124774,6.881239
min,55001.0,0.0,-1.0,-24.0
25%,55382.75,6.0,5.0,0.0
50%,56517.5,16.0,9.0,3.0
75%,58008.5,20.0,15.0,9.0
max,58477.0,60.0,59.0,24.0


In [66]:
winter = ['SourceData/Winter2015/courses.csv', 'SourceData/Winter2016/courses.csv',
        'SourceData/Winter2017/courses.csv','SourceData/Winter2018/courses.csv']
dfw0 = pd.read_csv(winter[0])
dfw1 = pd.read_csv(winter[1])
dfw2 = pd.read_csv(winter[2])    
dfw3 = pd.read_csv(winter[3])

In [67]:
df_winter = pd.concat([dfw0,dfw1,dfw2,dfw3], axis=0)
df_winter.sample(5)

Unnamed: 0,term,crn,catalog_id,section,credits,title,meetings,timecodes,primary_instructor,cap,act,rem
3,Winter2018,11806,BI 0075,OL1,3.000,Ecology and Society,[],[],Thomas B. Cunningham,24,22,2
22,Winter2018,11814,PO 0133,OL1,3.000,United States Foreign Policy,[],[],Lucrecia Garcia Iommi,20,11,9
15,Winter2016,11601,IS 0500,01,3.000,Information Systems and Database Management,"[{'days': 'MTWR', 'times': '0600pm-0930pm', 'd...","['MTWR 0600pm-0930pm 01/04-01/16 DSB 107', 'S ...",Yasin Ozcelik,20,11,9
4,Winter2018,11801,BU 0211,01,3.000,Legal Environment of Business,"[{'days': 'MTWRF', 'times': '0830am-0430pm', '...",['MTWRF 0830am-0430pm 01/04-01/10 DSB 109'],Sharlene A. McEvoy,25,15,10
20,Winter2018,11824,NS 0687,01,1.000-6.000,DNP Immersion,[],[],Joyce Muriel Shea,30,3,27


In [68]:
df_winter.describe()

Unnamed: 0,crn,cap,act,rem
count,105.0,105.0,105.0,105.0
mean,11524.266667,17.133333,12.342857,4.790476
std,355.382575,7.612069,6.58499,6.207463
min,10893.0,0.0,0.0,-3.0
25%,11603.0,15.0,9.0,0.0
50%,11633.0,20.0,12.0,4.0
75%,11802.0,20.0,18.0,9.0
max,11841.0,30.0,25.0,27.0


In [69]:
df_course = pd.concat([df_fall, df_spring, df_summer, df_winter], axis=0)
df_course.sample(5)

Unnamed: 0,term,crn,catalog_id,section,credits,title,meetings,timecodes,primary_instructor,cap,act,rem
18,Summer2017,57229,AE 0293,01,3.0,Ethics of War and Peace,"[{'days': 'MTR', 'times': '0645pm-0955pm', 'da...",['MTR 0645pm-0955pm 07/10-08/04 DSB 104'],Norma Schmidt,25,10,15
207,Summer2018,58210,NS 0314C,02,0.0,Maternal and Newborn Nursing Clinical,"[{'days': 'MWF', 'times': '0700am-0200pm', 'da...",['MWF 0700am-0200pm 06/18-06/29 STAMH '],Kelly Ann Sullivan,7,7,0
1257,Spring2017,31053,PY 0576,01,3.0,Practicum I: Assessment and Group Process,"[{'days': 'T', 'times': '0200pm-0400pm', 'date...",['T 0200pm-0400pm 01/17-05/12 CNS 10'],Paula Gill Lopez,4,4,0
639,Spring2019,38039,GD 0202,01,3.0,Graphic Design II: Clients and Collaboration,"[{'days': 'M', 'times': '0200pm-0430pm', 'date...",['M 0200pm-0430pm 01/22-04/30 XVR 15'],Matthew P. Durand,14,12,2
390,Fall2014,70244,MD 0300,A,3.0,Introduction to Educational Technology,"[{'days': 'M', 'times': '0455pm-0655pm', 'date...",['M 0455pm-0655pm 09/02-12/08 DMH 148'],Jay L. Rozgonyi,18,7,11


In [70]:
df_course.describe()

Unnamed: 0,crn,cap,act,rem
count,15937.0,15937.0,15937.0,15937.0
mean,55172.299178,17.992784,15.578591,2.477317
std,19308.879968,12.671906,10.10769,12.709781
min,10893.0,0.0,-1.0,-75.0
25%,35426.0,10.0,7.0,0.0
50%,57145.0,19.0,15.0,1.0
75%,74541.0,25.0,23.0,5.0
max,79215.0,500.0,90.0,999.0


In [71]:
df_course.to_sql('import_course', conn, if_exists='append', index=False)

In [72]:
%%sql
SELECT *
FROM import_course
LIMIT 10;

 * sqlite:///CourseData2.db
Done.


term,crn,catalog_id,section,credits,title,meetings,timecodes,primary_instructor,cap,act,rem
Fall2014,70384,AC 0011,C01,3.0,Introduction to Financial Accounting,"[{'days': 'TF', 'times': '0800am-0915am', 'dates': '09/02-12/08', 'location': 'DSB 105'}]",['TF 0800am-0915am 09/02-12/08 DSB 105'],Michael P. Coyne,0,31,-31
Fall2014,70385,AC 0011,C02,3.0,Introduction to Financial Accounting,"[{'days': 'TF', 'times': '0930am-1045am', 'dates': '09/02-12/08', 'location': 'DSB 105'}]",['TF 0930am-1045am 09/02-12/08 DSB 105'],Michael P. Coyne,0,31,-31
Fall2014,70382,AC 0011,C03,3.0,Introduction to Financial Accounting,"[{'days': 'TF', 'times': '1230pm-0145pm', 'dates': '09/02-12/08', 'location': 'DSB 105'}]",['TF 1230pm-0145pm 09/02-12/08 DSB 105'],Michael P. Coyne,0,31,-31
Fall2014,70291,AC 0011,C04,3.0,Introduction to Financial Accounting,"[{'days': 'MR', 'times': '1100am-1215pm', 'dates': '09/02-12/08', 'location': 'DSB 111'}]",['MR 1100am-1215pm 09/02-12/08 DSB 111'],Rebecca I. Bloch,0,29,-29
Fall2014,70350,AC 0011,C05,3.0,Introduction to Financial Accounting,"[{'days': 'MR', 'times': '1230pm-0145pm', 'dates': '09/02-12/08', 'location': 'DSB 111'}]",['MR 1230pm-0145pm 09/02-12/08 DSB 111'],Rebecca I. Bloch,0,30,-30
Fall2014,70381,AC 0011,C06,3.0,Introduction to Financial Accounting,"[{'days': 'MR', 'times': '0330pm-0445pm', 'dates': '09/02-12/08', 'location': 'DSB 110A'}]",['MR 0330pm-0445pm 09/02-12/08 DSB 110A'],Rebecca I. Bloch,0,31,-31
Fall2014,70383,AC 0011,G,3.0,Introduction to Financial Accounting,"[{'days': 'MR', 'times': '0930am-1045am', 'dates': '09/02-12/08', 'location': 'DSB 111'}]",['MR 0930am-1045am 09/02-12/08 DSB 111'],Paul Caster,30,31,-1
Fall2014,70391,AC 0011,H,3.0,Introduction to Financial Accounting,"[{'days': 'TF', 'times': '0930am-1045am', 'dates': '09/02-12/08', 'location': 'DSB 110A'}]",['TF 0930am-1045am 09/02-12/08 DSB 110A'],Jo Ann Drusbosky,30,32,-2
Fall2014,71105,AC 0011,I,3.0,Introduction to Financial Accounting,"[{'days': 'TF', 'times': '1100am-1215pm', 'dates': '09/02-12/08', 'location': 'DSB 110A'}]",['TF 1100am-1215pm 09/02-12/08 DSB 110A'],Jo Ann Drusbosky,30,33,-3
Fall2014,71123,AC 0011,J,3.0,Introduction to Financial Accounting,"[{'days': 'TF', 'times': '0200pm-0315pm', 'dates': '09/02-12/08', 'location': 'DSB 105'}]",['TF 0200pm-0315pm 09/02-12/08 DSB 105'],Jo Ann Drusbosky,30,32,-2


**Creating dataframes for the import course_meeting**

In [73]:
cm_fall = ['SourceData/Fall2014/course_meetings.csv', 'SourceData/Fall2015/course_meetings.csv',
           'SourceData/Fall2016/course_meetings.csv', 'SourceData/Fall2017/course_meetings.csv',
          'SourceData/Fall2018/course_meetings.csv']
dfcmf1 = pd.read_csv(cm_fall[0])
dfcmf2  = pd.read_csv(cm_fall[1])
dfcmf3  = pd.read_csv(cm_fall[2])
dfcmf4  = pd.read_csv(cm_fall[3])
dfcmf5  = pd.read_csv(cm_fall[4])

In [74]:
df_cmfall = pd.concat([dfcmf1, dfcmf2, dfcmf3, dfcmf4, dfcmf5], axis=0)
df_cmfall.sample(5)

Unnamed: 0,term,crn,location,day,start,end
23293,Fall2017,71410,BNW 335,R,2017-10-12T12:30:00,2017-10-12T13:20:00
6031,Fall2014,70215,CNS 303,R,2014-09-18T09:30:00,2014-09-18T10:45:00
28606,Fall2018,75671,BNW 138,T,2018-11-13T11:00:00,2018-11-13T12:15:00
2037,Fall2015,76788,BNW 137,M,2015-12-07T15:30:00,2015-12-07T16:45:00
17663,Fall2018,75729,BNW 137,M,2018-10-08T18:30:00,2018-10-08T21:00:00


In [75]:
cm_spring = ['SourceData/Spring2015/course_meetings.csv',
           'SourceData/Spring2016/course_meetings.csv', 'SourceData/Spring2017/course_meetings.csv',
          'SourceData/Spring2018/course_meetings.csv', 'SourceData/Spring2019/course_meetings.csv']
dfcms1 = pd.read_csv(cm_spring[0])
dfcms2  = pd.read_csv(cm_spring[1])
dfcms3  = pd.read_csv(cm_spring[2])
dfcms4  = pd.read_csv(cm_spring[3])
dfcms5  = pd.read_csv(cm_spring[4])


In [76]:
df_cmspring = pd.concat([dfcms1, dfcms2, dfcms3, dfcms4, dfcms5], axis=0)
df_cmspring.sample(5)

Unnamed: 0,term,crn,location,day,start,end
12478,Spring2016,37770,CNS 104,R,2016-04-07T09:30:00,2016-04-07T10:20:00
14130,Spring2017,32215,DLH 107,F,2017-02-10T09:30:00,2017-02-10T12:00:00
11222,Spring2017,31159,DSB 114,T,2017-04-11T09:30:00,2017-04-11T10:45:00
19781,Spring2019,37672,NHS 301,T,2019-02-26T08:30:00,2019-02-26T14:00:00
4369,Spring2016,38306,BNW 254,R,2016-02-04T10:00:00,2016-02-04T10:50:00


In [77]:
cm_springbreak = ['SourceData/SpringBreak2017/course_meetings.csv']

dfcmsb1 = pd.read_csv(cm_springbreak[0])


In [78]:
cm_summer = ['SourceData/Summer2015/course_meetings.csv',
           'SourceData/Summer2016/course_meetings.csv', 'SourceData/Summer2017/course_meetings.csv',
          'SourceData/Summer2018/course_meetings.csv']
dfcmsu1 = pd.read_csv(cm_summer[0])
dfcmsu2  = pd.read_csv(cm_summer[1])
dfcmsu3  = pd.read_csv(cm_summer[2])
dfcmsu4  = pd.read_csv(cm_summer[3])

In [79]:
df_cmsummer = pd.concat([dfcmsu1, dfcmsu2, dfcmsu3, dfcmsu4], axis=0)
df_cmspring.sample(5)

Unnamed: 0,term,crn,location,day,start,end
18501,Spring2015,33915,JGS 25,R,2015-03-05T14:00:00,2015-03-05T15:15:00
1599,Spring2015,32289,BLM LL105,M,2015-03-02T12:00:00,2015-03-02T12:50:00
24349,Spring2015,32763,CNS 202,R,2015-04-23T08:00:00,2015-04-23T09:15:00
13679,Spring2018,34934,DSB 115,W,2018-01-31T18:30:00,2018-01-31T21:30:00
11424,Spring2019,38762,XVR 15,M,2019-02-11T17:00:00,2019-02-11T19:30:00


In [80]:
cm_winter = ['SourceData/Winter2015/course_meetings.csv',
           'SourceData/Winter2016/course_meetings.csv', 'SourceData/Winter2017/course_meetings.csv',
          'SourceData/Winter2018/course_meetings.csv']
dfcmw1 = pd.read_csv(cm_winter[0])
dfcmw2  = pd.read_csv(cm_winter[1])
dfcmw3  = pd.read_csv(cm_winter[2])
dfcmw4  = pd.read_csv(cm_winter[3])

In [81]:
df_cmwinter = pd.concat([dfcmw1, dfcmw2, dfcmw3, dfcmw4], axis=0)
df_cmwinter.sample(5)

Unnamed: 0,term,crn,location,day,start,end
77,Winter2015,10908,DSB 111,M,2015-01-12T18:00:00,2015-01-12T21:30:00
68,Winter2016,11617,DSB 109,S,2016-01-09T08:30:00,2016-01-09T16:30:00
176,Winter2017,11726,CNS 108,R,2017-01-12T16:30:00,2017-01-12T19:30:00
75,Winter2017,11720,CNS 202,R,2017-01-05T09:00:00,2017-01-05T15:00:00
40,Winter2018,11834,DSB 106,R,2018-01-11T09:00:00,2018-01-11T17:30:00


In [82]:
df_coursemeeting = pd.concat([df_cmfall,df_cmspring,dfcmsb1,df_cmsummer,df_cmwinter],axis=0)

In [83]:
df_coursemeeting.to_sql('import_course_meeting', conn, if_exists='append', index=False)

In [84]:
%%sql
SELECT *
FROM import_course_meeting
LIMIT 10;

 * sqlite:///CourseData2.db
Done.


term,crn,location,day,start,end
Fall2014,70384,DSB 105,T,2014-09-02T08:00:00,2014-09-02T09:15:00
Fall2014,70384,DSB 105,F,2014-09-05T08:00:00,2014-09-05T09:15:00
Fall2014,70384,DSB 105,T,2014-09-09T08:00:00,2014-09-09T09:15:00
Fall2014,70384,DSB 105,F,2014-09-12T08:00:00,2014-09-12T09:15:00
Fall2014,70384,DSB 105,T,2014-09-16T08:00:00,2014-09-16T09:15:00
Fall2014,70384,DSB 105,F,2014-09-19T08:00:00,2014-09-19T09:15:00
Fall2014,70384,DSB 105,T,2014-09-23T08:00:00,2014-09-23T09:15:00
Fall2014,70384,DSB 105,F,2014-09-26T08:00:00,2014-09-26T09:15:00
Fall2014,70384,DSB 105,T,2014-09-30T08:00:00,2014-09-30T09:15:00
Fall2014,70384,DSB 105,F,2014-10-03T08:00:00,2014-10-03T09:15:00


**Creating the dataframes for import_course_catalog**

In [85]:
cc2017_2018 = ['SourceData/Catalogs/CourseCatalog2017_2018.csv']
dfcc17_18 = pd.read_csv(cc2017_2018[0])
dfcc17_18.sample(5)

Unnamed: 0,program_code,program_name,catalog_id,course_title,credits,prereqs,coreqs,fees,attributes,description
870,RS,Religious Studies,RS 0241,Encountering God in Medieval Christian Thought,3 Credits,RS 0101.,,,CARS Catholic Studies: Religious Studies,This course presents a historical overview of ...
19,AR,Arabic,AR 0210,Intermediate Modern Standard Arabic I,3 Credits,AR 0111.,,,,"This course, the first in a two-semester seque..."
1074,IS,Information Systems,IS 0399,Independent Study,3 Credits,,,,,Students pursue topics of special interest thr...
255,IT,Italian,IT 0377,Internship,3 Credits,,,,ISIF Italian Studies: Italy-Focused,The internship program gives students first-ha...
1984,EN,English,EN 0142,Myths and Legends of Ireland and Britain,3 Credits,"EN 0011, EN 0012.",,,E_BF English Literature Before 1800,This course studies the literature of early me...


In [86]:
cc2018_2019 = ['SourceData/Catalogs/CourseCatalog2018_2019.csv']
dfcc18_19 = pd.read_csv(cc2018_2019[0])
dfcc18_19.sample(5)

Unnamed: 0,program_code,program_name,catalog_id,course_title,credits,prereqs,coreqs,fees,attributes,description
2131,EN,English,EN 0263,African American Women Writers,3 Credits,One 100-level English literature course.,,,"ASEN American Studies: Literature, ASUP Americ...",This course offers a survey of writing by Afri...
1850,CO,Communication,CO 0239,Consumer Culture,3 Credits,CO 0130.,,,UDIV U.S. Diversity,This course explores how social meanings are c...
396,RLD,Reading and Language Development,RLD 0529,Principles of Structured Literacy II: Essentia...,3 Credits,RLD 0429 or SE 0429.,,,,This course covers vocabulary and comprehensio...
2175,EN,English,EN 0336,Seminar on Toni Morrison,3 Credits,One 100-level or 200-level English literature ...,,,"ASEN American Studies: Literature, BSFC Black ...",A comprehensive study of the works of Toni Mor...
1606,CI,Chinese,CI 0211,Intermediate Chinese II,3 Credits,CI 0210.,,,,"This course, a continuation of"


In [87]:
df_catalog = pd.concat([dfcc17_18,dfcc18_19], axis=0)
df_catalog.sample(5)

Unnamed: 0,program_code,program_name,catalog_id,course_title,credits,prereqs,coreqs,fees,attributes,description
1213,HI,History,HI 0216,Rise of the British Empire,3 Credits,HI 0010 or CL 0115 or CL 0116.,,,"H_BF History Before 1750, H_EU European History",This course examines the history of Britain an...
1724,EN,English,EN 0116,Rome in the Cultural Imagination,3 Credits,"EN 0011, EN 0012.",,,"E_BF English Literature Before 1800, ISIF Ital...",The city of Rome has been a source of wonder a...
2137,HI,History,HI 0391,The Meanings of History,3 Credits,HI 0010 or CL 0115 or CL 0116.,,,"H_AF History After 1750, H_EU European History...",This upper-division seminar for juniors and se...
1147,ME,Mechanical Engineering,ME 0382,Independent Study: Advanced Mechanical Project,1-3 Credits,Completion of non-elective mechanical engineer...,,,,During this design course emphasizing individu...
1917,ED,Education,ED 0529,Critical Literacies Beyond the Classroom Walls,3 Credits,Teaching certification.,,,,Designed for K-12 teachers and community leade...


In [88]:
df_catalog.to_sql('import_course_catalog', conn, if_exists='append', index=False)

In [89]:
%%sql
SELECT *
FROM import_course_catalog
LIMIT 10;

 * sqlite:///CourseData2.db
Done.


program_code,program_name,catalog_id,course_title,credits,prereqs,coreqs,fees,attributes,description
AN,Asian Studies,AN 0301,Independent Study,1-3 Credits,,,,,Students undertake an individualized program of study in consultation with a director from the Asian studies faculty.
AN,Asian Studies,AN 0310,Asian Studies Seminar,3 Credits,,,,,"This seminar examines selected topics concerning Asia. This course is taught in conjunction with another 100-300 level course from a rotation of course offerings. Consult the Asian Studies director to identify the conjoined course for a given semester. The seminar concentrates on topics within the parameters of the conjoined course syllabus but adds research emphasis. Students registered for this course must complete a research project, to include 300-level research, in addition to the regular research requirements of the conjoined course, and a 25-50 page term paper in substitution of some portion of the conjoined course requirements, as determined by the instructor. Open to juniors and seniors only."
BU,Business,BU 0211,Legal Environment of Business,3 Credits,Junior standing.,,,,"This course examines the broad philosophical as well as practical nature and function of the legal system, and introduces students to the legal and social responsibilities of business. The course includes an introduction to the legal system, the federal courts, Constitutional law, the United States Supreme Court, the civil process, and regulatory areas such as employment discrimination, protection of the environment, and corporate governance and securities markets."
BU,Business,BU 0220,Environmental Law and Policy,3 Credits,,,,"EVME Environmental Studies Major Elective, EVPE Environmental Studies Elective, EVSS Environmental Studies: Social Science, MGEL Management: General Elective","This course surveys issues arising out of federal laws designed to protect the environment and manage resources. It considers in detail the role of the Environmental Protection Agency in the enforcement of environmental policies arising out of such laws as the National Environmental Policy Act, the Clean Water Act, and the Clear Air Act, among others. The course also considers the impact of Congress, political parties, bureaucracy, and interest groups in shaping environmental policy, giving special attention to the impact of environmental regulation on business and private property rights."
BU,Business,BU 0311,"The Law of Contracts, Sales, and Property",3 Credits,BU 0211.,,,,"This course examines the components of common law contracts including the concepts of offer and acceptance, consideration, capacity and legality, assignment of rights and delegation of duties, as well as discharge of contracts. The course covers Articles 2 and 2A of the Uniform Commercial Code relating to leases, sales of goods, and warranties. The course also considers personal and real property, and bailments."
BU,Business,BU 0312,The Law of Business Organizations and Financial Transactions,3 Credits,BU 0211.,,,,"This course offers an analysis of legal principles related to the law of agency, sole proprietorships, partnerships, corporations, limited liability companies, and other business forms. The second half of the course addresses several sections of the Uniform Commercial Code, such as negotiable instruments, bank collections and deposits and secured transactions. Finally, the course examines the law of suretyship, debtor-creditor relationships, and bankruptcy."
BU,Business,BU 0320,Employment Law and Discrimination in the Workplace,3 Credits,,,,"MGEL Management: General Elective, UDIV U.S. Diversity","This course examines a variety of legal issues related to the workplace including the doctrine of employment at will, employee privacy, and the history and development of labor unions and the legal protections afforded by the National Labor Relations Act. A study of the role of the Civil Rights Act of 1964 and the Equal Employment Opportunity Commission in eradicating discrimination based on race, sex, religion, national origin, age, and disability occupies a major portion of the course. Other employment issues include affirmative action, worker safety, and compensation."
BU,Business,BU 0391,Seminar in Business Law and Ethics,3 Credits,"AE 0291, BU 0211, two additional courses in law or applied ethics.",,,,This interdisciplinary study of these two aspects of the business environment is cross-listed as
BL,Black Studies,BL 0101,Black Lives Matter,3 Credits,,,,"ASGW American Studies: Gateway, BSFC Black Studies Focus Course, BSSS Black Studies: Social and Behavioral Sciences, PJST Peace and Justice Studies, UDIV U.S. Diversity","In the context of Ferguson, Charleston, and other national crises, this course responds to the call of students from our campus community to raise questions about and critically reflect upon the failures of democracy to recognize the value of Black Life. This course employs collective thinking, teaching, and research to focus on questions surrounding race, structural inequality, and violence. It examines the historical, geographical, cultural, social, and political ways in which race has been configured and deployed in the United States. Various faculty will bring to bear their respective scholarly lenses so that students understand race and racism across intellectual disciplines."
BL,Black Studies,BL 0398,Independent Study,1-3 Credits,,,,BSCP Black Studies Capstone Course,"Upon request and by agreement with a professor in the program, a Black Studies minor may conduct a one-semester independent study on a defined research topic or field of study."


## Populate ERD Tables with data from Import Tables ##

In [90]:
%%sql

--Populating Course_Meeting table using import_course_meeting data-- 

INSERT INTO Course_Meeting (CRN, Term, Start, Location, Day, End)
SELECT DISTINCT crn, term, start, location, day, end 
FROM import_course_meeting;

 * sqlite:///CourseData2.db
Done.


[]

In [91]:
%%sql 

--Checking data population of Course_Meeting table-- 

SELECT * 
FROM Course_Meeting
LIMIT 5;

 * sqlite:///CourseData2.db
Done.


MID,CRN,Term,Start,Location,Day,End
1,70384,Fall2014,2014-09-02T08:00:00,DSB 105,T,2014-09-02T09:15:00
2,70384,Fall2014,2014-09-05T08:00:00,DSB 105,F,2014-09-05T09:15:00
3,70384,Fall2014,2014-09-09T08:00:00,DSB 105,T,2014-09-09T09:15:00
4,70384,Fall2014,2014-09-12T08:00:00,DSB 105,F,2014-09-12T09:15:00
5,70384,Fall2014,2014-09-16T08:00:00,DSB 105,T,2014-09-16T09:15:00


In [92]:
%%sql 

--Populating Catalog table using import_course_catalog data--  

INSERT INTO CATALOG (CatID, Title, Description, Credits, PName, PCode, PREREQS, COREQS, FEES, Attribute)
SELECT DISTINCT catalog_id, course_title, description, credits, program_name, program_code, prereqs, coreqs, fees, attributes
FROM import_course_catalog;

 * sqlite:///CourseData2.db
Done.


[]

In [93]:
%%sql 

--Checking data population of Catalog table-- 

SELECT * 
FROM CATALOG
LIMIT 5;

 * sqlite:///CourseData2.db
Done.


CatalogClassifier,CatID,Title,Description,Credits,PName,PCode,PREREQS,COREQS,FEES,Attribute
1,AN 0301,Independent Study,Students undertake an individualized program of study in consultation with a director from the Asian studies faculty.,1-3 Credits,Asian Studies,AN,,,,
2,AN 0310,Asian Studies Seminar,"This seminar examines selected topics concerning Asia. This course is taught in conjunction with another 100-300 level course from a rotation of course offerings. Consult the Asian Studies director to identify the conjoined course for a given semester. The seminar concentrates on topics within the parameters of the conjoined course syllabus but adds research emphasis. Students registered for this course must complete a research project, to include 300-level research, in addition to the regular research requirements of the conjoined course, and a 25-50 page term paper in substitution of some portion of the conjoined course requirements, as determined by the instructor. Open to juniors and seniors only.",3 Credits,Asian Studies,AN,,,,
3,BU 0211,Legal Environment of Business,"This course examines the broad philosophical as well as practical nature and function of the legal system, and introduces students to the legal and social responsibilities of business. The course includes an introduction to the legal system, the federal courts, Constitutional law, the United States Supreme Court, the civil process, and regulatory areas such as employment discrimination, protection of the environment, and corporate governance and securities markets.",3 Credits,Business,BU,Junior standing.,,,
4,BU 0220,Environmental Law and Policy,"This course surveys issues arising out of federal laws designed to protect the environment and manage resources. It considers in detail the role of the Environmental Protection Agency in the enforcement of environmental policies arising out of such laws as the National Environmental Policy Act, the Clean Water Act, and the Clear Air Act, among others. The course also considers the impact of Congress, political parties, bureaucracy, and interest groups in shaping environmental policy, giving special attention to the impact of environmental regulation on business and private property rights.",3 Credits,Business,BU,,,,"EVME Environmental Studies Major Elective, EVPE Environmental Studies Elective, EVSS Environmental Studies: Social Science, MGEL Management: General Elective"
5,BU 0311,"The Law of Contracts, Sales, and Property","This course examines the components of common law contracts including the concepts of offer and acceptance, consideration, capacity and legality, assignment of rights and delegation of duties, as well as discharge of contracts. The course covers Articles 2 and 2A of the Uniform Commercial Code relating to leases, sales of goods, and warranties. The course also considers personal and real property, and bailments.",3 Credits,Business,BU,BU 0211.,,,


In [94]:
%%sql 

--Populating Instructor table using import_courses data--  

INSERT INTO Instructor (Name)
SELECT DISTINCT primary_instructor
FROM import_course
WHERE primary_instructor <> 'TBA' AND primary_instructor NOT LIKE 'primary_instructor' AND primary_instructor NOT LIKE '%/';

 * sqlite:///CourseData2.db
Done.


[]

In [95]:
%%sql 

--Checking data population of Instructor table-- 

SELECT * 
FROM Instructor
LIMIT 5;

 * sqlite:///CourseData2.db
Done.


InID,Name
1,Michael P. Coyne
2,Rebecca I. Bloch
3,Paul Caster
4,Jo Ann Drusbosky
5,Arleen N. Kardos


In [96]:
%%sql 

--Populating the SECTION table using import_course data--
--Joining Instructor table using import_course and instructor table--

INSERT INTO SECTION (CRN, Term, Section, Cap, Act, Rem, CatID, InID)
SELECT crn, term, section, cap, act, rem, catalog_id, InID
FROM import_course
    JOIN Instructor ON (import_course.primary_instructor = Instructor.Name)
;

 * sqlite:///CourseData2.db
Done.


[]

In [97]:
%%sql 

--Checking data population of Section table--

SELECT * 
FROM Section
LIMIT 10;

 * sqlite:///CourseData2.db
Done.


SID,CRN,Term,Section,Cap,Act,Rem,CatID,CatalogClassifier,InID
1,70384,Fall2014,C01,0,31,-31,AC 0011,,1
2,70385,Fall2014,C02,0,31,-31,AC 0011,,1
3,70382,Fall2014,C03,0,31,-31,AC 0011,,1
4,70291,Fall2014,C04,0,29,-29,AC 0011,,2
5,70350,Fall2014,C05,0,30,-30,AC 0011,,2
6,70381,Fall2014,C06,0,31,-31,AC 0011,,2
7,70383,Fall2014,G,30,31,-1,AC 0011,,3
8,70391,Fall2014,H,30,32,-2,AC 0011,,4
9,71105,Fall2014,I,30,33,-3,AC 0011,,4
10,71123,Fall2014,J,30,32,-2,AC 0011,,4


## Drop Import Tables ##

In [98]:
%%sql
--Drop import tables--
DROP TABLE IF EXISTS import_course;
DROP TABLE IF EXISTS import_course_meeting;
DROP TABLE IF EXISTS import_course_catalog;

 * sqlite:///CourseData2.db
Done.
Done.
Done.


[]