In [2]:
%load_ext sql
import pandas as pd
import sqlite3

## Create the Database

In [3]:
%sql sqlite:///CourseData.db 
conn = sqlite3.connect('CourseData.db')

Add all tables to Database and connect them

In [4]:
%%sql
DROP TABLE IF EXISTS CATALOGCOURSE;
CREATE TABLE CATALOGCOURSE (
    catalog_id TEXT NOT NULL PRIMARY KEY,
    course_title TEXT NOT NULL,
    credits INTEGER,
    prereqs TEXT,
    coreqs TEXT,
    fees INTEGER,
    attributes TEXT,
    description TEXT,
    program_code TEXT NOT NULL,
    term_code INTEGER NOT NULL
);

DROP TABLE IF EXISTS TERMS;
CREATE TABLE TERMS (
    term_code INTEGER PRIMARY KEY,
    term TEXT NOT NULL,
    FOREIGN KEY (term_code) REFERENCES CATALOGCOURSE (term_code)
);

DROP TABLE IF EXISTS SECTION;
CREATE TABLE SECTION (
    crn INTEGER PRIMARY KEY,
    title TEXT,
    section TEXT,
    credits INTERGER,
    cap INTEGER,
    act INTEGER,
    remaining INTEGER,
    time_codes TEXT,
    catalog_id TEXT NOT NULL,
    instructor_id INTEGER NOT NULL,
    FOREIGN KEY (catalog_id) REFERENCES CATALOGCOURSE (catalog_id)
);

DROP TABLE IF EXISTS SECTIONMEETINGS;
CREATE TABLE SECTIONMEETINGS (
    crn INTEGER,
    start TEXT,
    end TEXT,
    day TEXT,
    location_code INTGER,
    FOREIGN KEY (crn) REFERENCES SECTION (crn)
);

DROP TABLE IF EXISTS LOCATIONS;
CREATE TABLE LOCATIONS (
    location_code INTEGER PRIMARY KEY,
    building TEXT,
    room_number INTEGER,
    FOREIGN KEY (location_code) REFERENCES SECTIONMEETINGS (location_code)
);

DROP TABLE IF EXISTS INSTRUCTORS;
CREATE TABLE INSTRUCTORS (
    instructor_id INTEGER NOT NULL PRIMARY KEY,
    instructor_name TEXT NOT NULL,
    FOREIGN KEY (instructor_id) REFERENCES SECTION (instructor_id)
);

DROP TABLE IF EXISTS PROGRAMS;
CREATE TABLE PROGRAMS (
    program_code TEXT NOT NULL PRIMARY KEY,
    program_name TEXT NOT NULL,
    FOREIGN KEY (program_code) REFERENCES CATALOGCOURSE (program_code)
);

 * sqlite:///CourseData.db
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.


[]

Import Data into the Database from Catalogs

In [7]:
catalog_years = ['2017_2018', '2018_2019']

for cat_year in catalog_years:
    filepath = 'SourceData/Catalogs/CourseCatalog'+cat_year+'.csv'
    data = pd.read_csv(filepath)
    data['cat_year'] = cat_year
    data.to_sql('IMPORT_CATALOGCOURSE',conn,if_exists='append',index=False)

In [12]:
terms = ['Fall2014','Fall2015','Fall2016','Fall2017','Fall2018',
         'Spring2015','Spring2016','Spring2017','Spring2018','Spring2019',
         'SpringBreak2017',
         'Summer2015','Summer2016','Summer2017','Summer2018',
         'Winter2015','Winter2016','Winter2017','Winter2018']

for term in terms:
    filepath = 'SourceData/'+term+'/courses.csv'
    data = pd.read_csv(filepath)
    data.to_sql('IMPORT_SECTION',conn,if_exists='append',index=False) 
    
    filepath = 'SourceData/'+term+'/course_meetings.csv'
    data = pd.read_csv(filepath)
    data.to_sql('IMPORT_SECTIONMEETINGS',conn,if_exists='append',index=False)

In [10]:
%%sql
-- Record Counts for Catalog Courses
SELECT 
    (SELECT Count(*) FROM IMPORT_CATALOGCOURSE) as 'RawCount',
    (SELECT Count(*) FROM (SELECT DISTINCT * FROM IMPORT_CATALOGCOURSE)) as 'DistinctCount';

 * sqlite:///CourseData.db
Done.


RawCount,DistinctCount
4440,4440


In [9]:
%%sql
-- Record Counts for Sections
SELECT 
    (SELECT Count(*) FROM IMPORT_SECTION) as 'RawCount',
    (SELECT Count(*) FROM (SELECT DISTINCT * FROM IMPORT_SECTION)) as 'DistinctCount';

 * sqlite:///CourseData.db
Done.


RawCount,DistinctCount
15937,15937


In [13]:
%%sql
-- Record Counts for Section Meetings
SELECT 
    (SELECT Count(*) FROM IMPORT_SECTIONMEETINGS) as 'RawCount',
    (SELECT Count(*) FROM (SELECT DISTINCT * FROM IMPORT_SECTIONMEETINGS)) as 'DistinctCount';

 * sqlite:///CourseData.db
Done.


RawCount,DistinctCount
284907,284847
