In [20]:
import pandas as pd 
import numpy as np
import sqlite3
conn = sqlite3.connect('CourseData.db')
c = conn.cursor()

## Run this to set up %sql and %%sql magics

In [3]:
%load_ext sql

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [4]:
%%sql
sqlite:///CourseData.db

'Connected: @CourseData.db'

## Drop Import Tables For Any Future Rerun of Code

In [77]:
%%sql
drop table if exists courses;

 * sqlite:///CourseData.db
Done.


[]

In [78]:
%%sql
drop table if exists cmeetings;

 * sqlite:///CourseData.db
Done.


[]

In [79]:
%%sql
drop table if exists catalog;

 * sqlite:///CourseData.db
Done.


[]

## Drop ERD Tables For Any Future Rerun of Code

In [135]:
%%sql
drop table if exists catalog_program;

 * sqlite:///CourseData.db
Done.


[]

In [136]:
%%sql
drop table if exists catalog_course;

 * sqlite:///CourseData.db
Done.


[]

In [137]:
%%sql
drop table if exists term;

 * sqlite:///CourseData.db
Done.


[]

In [138]:
%%sql
drop table if exists course_offering;

 * sqlite:///CourseData.db
Done.


[]

In [139]:
%%sql
drop table if exists instructor;

 * sqlite:///CourseData.db
Done.


[]

In [140]:
%%sql
drop table if exists course_meeting;

 * sqlite:///CourseData.db
Done.


[]

In [141]:
%%sql
drop table if exists timecode;

 * sqlite:///CourseData.db
Done.


[]

In [142]:
%%sql
drop table if exists location;

 * sqlite:///CourseData.db
Done.


[]

## Create Import Tables

In [82]:
%%sql
create table 'catalog' (
    program_code TEXT NOT NULL,
    program_name VARCHAR NOT NULL,
    catalog_id VARCHAR NOT NULL,
    course_title VARCHAR NOT NULL, 
    credits VARCHAR NOT NULL,
    prereqs VARCHAR, 
    coreqs VARCHAR,
    fees VARCHAR,
    attributes VARCHAR,
    description VARCHAR 
);

 * sqlite:///CourseData.db
Done.


[]

In [81]:
%%sql
create table 'cmeetings' (
    term VARCHAR NOT NULL,
    crn INTEGER NOT NULL,
    location VARCHAR NOT NULL,
    day TEXT NOT NULL,
    start VARCHAR NOT NULL,
    end VARCHAR NOT NULL 
);

 * sqlite:///CourseData.db
Done.


[]

In [80]:
%%sql
create table 'courses' (
    term VARCHAR NOT NULL,
    crn INTEGER NOT NULL,
    catalog_id VARCHAR NOT NULL,
    section VARCHAR NOT NULL,
    credits VARCHAR,
    title VARCHAR, 
    meetings VARCHAR,
    timecodes VARCHAR, 
    primary_instructor TEXT,
    cap INTEGER, 
    act INTEGER, 
    rem INTEGER 
);

 * sqlite:///CourseData.db
Done.


[]

## Create Database Tables From ERD

In [143]:
%%sql
create table CATALOG_PROGRAM (
    CP_ID INTEGER PRIMARY KEY,
    PROGRAM_CODE TEXT NOT NULL,
    PROGRAM_NAME TEXT NOT NULL,
    COURSE_TITLE TEXT
);

 * sqlite:///CourseData.db
Done.


[]

In [144]:
%%sql
create table CATALOG_COURSE (
    CC_ID INTEGER PRIMARY KEY,
    CRN INTEGER NOT NULL,
    COURSE_TITLE TEXT NOT NULL,
    PRE_REQS TEXT,
    CO_REQS TEXT,
    CP_ID INTEGER NOT NULL,
    FOREIGN KEY (CP_ID) REFERENCES CATALOG_PROGRAM (CP_ID)
);

 * sqlite:///CourseData.db
Done.


[]

In [145]:
%%sql
create table TERM (
    TERM_ID INTEGER PRIMARY KEY,
    TERM_NAME TEXT NOT NULL,
    CRN INTEGER NOT NULL,
    FOREIGN KEY (CRN) REFERENCES CATALOG_COURSE (CRN)
);

 * sqlite:///CourseData.db
Done.


[]

In [148]:
%%sql
create table COURSE_OFFERING (
    CO_ID INTEGER PRIMARY KEY,
    CRN INTEGER NOT NULL,
    SECTION TEXT NOT NULL,
    COURSE_TITLE 
    CREDITS INTEGER NOT NULL,
    FOREIGN KEY (CRN) REFERENCES CATALOG_COURSE (CRN)
);

 * sqlite:///CourseData.db
Done.


[]

In [149]:
%%sql
create table INSTRUCTOR (
    I_ID INTEGER PRIMARY KEY,
    NAME TEXT NOT NULL,
    CRN INTEGER NOT NULL,
    CO_ID INTEGER NOT NULL,
    FOREIGN KEY (CO_ID) REFERENCES COURSE_OFFERING (CO_ID)
);

 * sqlite:///CourseData.db
Done.


[]

In [150]:
%%sql
create table COURSE_MEETING (
    CM_ID INTEGER PRIMARY KEY,
    CAP INTEGER NOT NULL,
    ACT INTEGER,
    REM INTEGER,
    START TEXT NOT NULL,
    END TEXT NOT NULL,
    CRN INTEGER NOT NULL,
    FOREIGN KEY (CRN) REFERENCES COURSE_OFFERINGS (CRN)
);

 * sqlite:///CourseData.db
Done.


[]

In [153]:
%%sql
create table TIME_CODE (
    TC_ID INTEGER PRIMARY KEY,
    START_TIME TEXT NOT NULL,
    END_TIME TEXT NOT NULL,
    CO_ID INTEGER NOT NULL,
    FOREIGN KEY (CO_ID) REFERENCES COURSE_OFFERING (CO_ID)
);

 * sqlite:///CourseData.db
Done.


[]

In [155]:
%%sql
create table LOCATION (
    L_ID INTEGER PRIMARY KEY,
    BUILDING_NAME TEXT NOT NULL,
    CM_ID INTEGER NOT NULL,
    FOREIGN KEY (CM_ID) REFERENCES COURSE_MEETING (CM_ID)
);

 * sqlite:///CourseData.db
Done.


[]

## Import CSV Files

#### Import Courses CSVs

In [49]:
fall = ['SourceData/Fall2014/courses.csv',
        'SourceData/Fall2015/courses.csv', 
        'SourceData/Fall2016/courses.csv',
        'SourceData/Fall2017/courses.csv',
        'SourceData/Fall2018/courses.csv']
fall_0 = pd.read_csv(fall[0])
fall_1 = pd.read_csv(fall[1])
fall_2 = pd.read_csv(fall[2])    
fall_3 = pd.read_csv(fall[3])
fall_4 = pd.read_csv(fall[4])

In [50]:
winter = ['SourceData/Winter2015/courses.csv', 
          'SourceData/Winter2016/courses.csv',
          'SourceData/Winter2017/courses.csv',
          'SourceData/Winter2018/courses.csv']
winter_0 = pd.read_csv(winter[0])
winter_1 = pd.read_csv(winter[1])
winter_2 = pd.read_csv(winter[2])    
winter_3 = pd.read_csv(winter[3])

In [51]:
spring = ['SourceData/Spring2015/courses.csv',
          'SourceData/Spring2016/courses.csv', 
          'SourceData/Spring2017/courses.csv',
          'SourceData/Spring2018/courses.csv',
          'SourceData/Spring2019/courses.csv',
          'SourceData/SpringBreak2017/courses.csv']
spring_0 = pd.read_csv(spring[0])
spring_1 = pd.read_csv(spring[1])
spring_2 = pd.read_csv(spring[2])    
spring_3 = pd.read_csv(spring[3])
spring_4 = pd.read_csv(spring[4])
spring_5 = pd.read_csv(spring[5])

In [52]:
summer = ['SourceData/Summer2015/courses.csv', 
          'SourceData/Summer2016/courses.csv',
          'SourceData/Summer2017/courses.csv',
          'SourceData/Summer2018/courses.csv']
summer_0 = pd.read_csv(summer[0])
summer_1 = pd.read_csv(summer[1])
summer_2 = pd.read_csv(summer[2])    
summer_3 = pd.read_csv(summer[3])

In [53]:
df_fall = pd.concat([fall_0,fall_1,fall_2,fall_3,fall_4], axis=0)

In [55]:
df_winter = pd.concat([winter_0,winter_1,winter_2,winter_3], axis=0)

In [56]:
df_spring = pd.concat([spring_0,spring_1,spring_2,spring_3,spring_4,spring_5], axis=0)

In [57]:
df_summer = pd.concat([summer_0,summer_1,summer_2,summer_3], axis=0)

In [83]:
df_course = pd.concat([df_fall, df_winter, df_spring, df_summer], axis=0)

In [58]:
df_fall.head(1)

Unnamed: 0,term,crn,catalog_id,section,credits,title,meetings,timecodes,primary_instructor,cap,act,rem
0,Fall2014,70384,AC 0011,C01,3.0,Introduction to Financial Accounting,"[{'days': 'TF', 'times': '0800am-0915am', 'dat...",['TF 0800am-0915am 09/02-12/08 DSB 105'],Michael P. Coyne,0,31,-31


In [59]:
df_winter.head(1)

Unnamed: 0,term,crn,catalog_id,section,credits,title,meetings,timecodes,primary_instructor,cap,act,rem
0,Winter2015,10913,AC 0555,1,3.0,Issues in Internal Audit,"[{'days': 'MTWRF', 'times': '1000am-0600pm', '...","['MTWRF 1000am-0600pm 01/05-01/17 DSB 115', 'S...",Dawn W Massey,18,10,8


In [60]:
df_spring.head(1)

Unnamed: 0,term,crn,catalog_id,section,credits,title,meetings,timecodes,primary_instructor,cap,act,rem
0,Spring2015,32436,AC 0011,A,3.0,Introduction to Financial Accounting,"[{'days': 'MR', 'times': '0200pm-0315pm', 'dat...",['MR 0200pm-0315pm 01/20-04/30 DSB 108'],Dawn W Massey,28,28,0


In [61]:
df_summer.head(1)

Unnamed: 0,term,crn,catalog_id,section,credits,title,meetings,timecodes,primary_instructor,cap,act,rem
0,Summer2015,55230,AC 0203,A,3.0,Intermediate Accounting I,"[{'days': 'MW', 'times': '0600pm-0930pm', 'dat...","['MW 0600pm-0930pm 05/18-06/24 DSB 106', 'TR 0...",Scott M Brenner,25,9,16


In [84]:
df_course.head(1)

Unnamed: 0,term,crn,catalog_id,section,credits,title,meetings,timecodes,primary_instructor,cap,act,rem
0,Fall2014,70384,AC 0011,C01,3.0,Introduction to Financial Accounting,"[{'days': 'TF', 'times': '0800am-0915am', 'dat...",['TF 0800am-0915am 09/02-12/08 DSB 105'],Michael P. Coyne,0,31,-31


In [85]:
df_course.to_sql('courses', conn, if_exists='append', index=False)

In [87]:
%%sql
select *
from courses
limit 1;

 * sqlite:///CourseData.db
Done.


term,crn,catalog_id,section,credits,title,meetings,timecodes,primary_instructor,cap,act,rem
Fall2014,70384,AC 0011,C01,3.0,Introduction to Financial Accounting,"[{'days': 'TF', 'times': '0800am-0915am', 'dates': '09/02-12/08', 'location': 'DSB 105'}]",['TF 0800am-0915am 09/02-12/08 DSB 105'],Michael P. Coyne,0,31,-31


#### Import Course Meetings CSVs

In [62]:
fall_2 = ['SourceData/Fall2014/course_meetings.csv',
         'SourceData/Fall2015/course_meetings.csv',
         'SourceData/Fall2016/course_meetings.csv',
         'SourceData/Fall2017/course_meetings.csv',
         'SourceData/Fall2018/course_meetings.csv']
fall_2_0  = pd.read_csv(fall_2[0])
fall_2_1  = pd.read_csv(fall_2[1])
fall_2_2  = pd.read_csv(fall_2[2])
fall_2_3  = pd.read_csv(fall_2[3])
fall_2_4  = pd.read_csv(fall_2[4])

In [63]:
winter_2 = ['SourceData/Winter2015/course_meetings.csv',
           'SourceData/Winter2016/course_meetings.csv',
           'SourceData/Winter2017/course_meetings.csv',
           'SourceData/Winter2018/course_meetings.csv']
winter_2_0  = pd.read_csv(winter_2[0])
winter_2_1  = pd.read_csv(winter_2[1])
winter_2_2  = pd.read_csv(winter_2[2])
winter_2_3  = pd.read_csv(winter_2[3])

In [64]:
spring_2 = ['SourceData/Spring2015/course_meetings.csv',
           'SourceData/Spring2016/course_meetings.csv',
           'SourceData/Spring2017/course_meetings.csv',
           'SourceData/Spring2018/course_meetings.csv',
           'SourceData/Spring2019/course_meetings.csv',
           'SourceData/SpringBreak2017/course_meetings.csv']
spring_2_0  = pd.read_csv(spring_2[0])
spring_2_1  = pd.read_csv(spring_2[1])
spring_2_2  = pd.read_csv(spring_2[2])
spring_2_3  = pd.read_csv(spring_2[3])
spring_2_4  = pd.read_csv(spring_2[4])
spring_2_5  = pd.read_csv(spring_2[5])

In [66]:
summer_2 = ['SourceData/Summer2015/course_meetings.csv',
           'SourceData/Summer2016/course_meetings.csv',
           'SourceData/Summer2017/course_meetings.csv',
           'SourceData/Summer2018/course_meetings.csv']
summer_2_0  = pd.read_csv(summer_2[0])
summer_2_1  = pd.read_csv(summer_2[1])
summer_2_2  = pd.read_csv(summer_2[2])
summer_2_3  = pd.read_csv(summer_2[3])

In [69]:
df_fall_2 = pd.concat([fall_2_0,fall_2_1,fall_2_2,fall_2_3,fall_2_4], axis=0)

In [70]:
df_winter_2 = pd.concat([winter_2_0,winter_2_1,winter_2_2,winter_2_3], axis=0)

In [72]:
df_spring_2 = pd.concat([spring_2_0,spring_2_1,spring_2_2,spring_2_3,
spring_2_4,spring_2_5], axis=0)

In [73]:
df_summer_2 = pd.concat([summer_2_0,summer_2_1,summer_2_2,summer_2_3], axis=0)

In [88]:
df_cmeetings = pd.concat([df_fall_2,df_winter_2,df_spring_2,df_summer_2], axis=0)

In [68]:
df_fall_2.head(1)

Unnamed: 0,term,crn,location,day,start,end
0,Fall2014,70384,DSB 105,T,2014-09-02T08:00:00,2014-09-02T09:15:00


In [74]:
df_winter_2.head(1)

Unnamed: 0,term,crn,location,day,start,end
0,Winter2015,10913,DSB 115,M,2015-01-05T10:00:00,2015-01-05T18:00:00


In [75]:
df_spring_2.head(1)

Unnamed: 0,term,crn,location,day,start,end
0,Spring2015,32436,DSB 108,R,2015-01-22T14:00:00,2015-01-22T15:15:00


In [76]:
df_summer_2.head(1)

Unnamed: 0,term,crn,location,day,start,end
0,Summer2015,55230,DSB 106,M,2015-05-18T18:00:00,2015-05-18T21:30:00


In [89]:
df_cmeetings.head(1)

Unnamed: 0,term,crn,location,day,start,end
0,Fall2014,70384,DSB 105,T,2014-09-02T08:00:00,2014-09-02T09:15:00


In [90]:
df_cmeetings.to_sql('cmeetings', conn, if_exists='append', index=False)

In [91]:
%%sql
select *
from cmeetings
limit 1;

 * sqlite:///CourseData.db
Done.


term,crn,location,day,start,end
Fall2014,70384,DSB 105,T,2014-09-02T08:00:00,2014-09-02T09:15:00


#### Import CourseCatalog CSVs

In [93]:
catalog2017 = ['SourceData/Catalogs/CourseCatalog2017_2018.csv']
catalog2017_2018 = pd.read_csv(catalog2017[0])

In [94]:
catalog2018 = ['SourceData/Catalogs/CourseCatalog2018_2019.csv']
catalog2018_2019 = pd.read_csv(catalog2018[0])

In [95]:
df_catalog = pd.concat([catalog2017_2018,catalog2018_2019], axis=0)

In [97]:
df_catalog.to_sql('catalog', conn, if_exists='append', index=False)

In [98]:
%%sql
select *
from catalog
limit 1;

 * sqlite:///CourseData.db
Done.


program_code,program_name,catalog_id,course_title,credits,prereqs,coreqs,fees,attributes,description
AN,Asian Studies,AN 0301,Independent Study,1-3 Credits,,,,,Students undertake an individualized program of study in consultation with a director from the Asian studies faculty.


#### Insert Data to ERD Tables

Write SELECT Queries to Transform the data in your IMPORT Tables
to match your ERD tables

Use INSERT INTO tablename SELECT to populate the ERD tables