In [1]:
%load_ext sql
import pandas as pd
import sqlite3

## Create database

In [2]:
%sql sqlite:///CourseData.db
conn = sqlite3.connect('CourseData.db')

## Create table

In [3]:
%%sql

-- Program table
DROP TABLE IF EXISTS PROGRAM;
CREATE TABLE PROGRAM (
    Program_id INTEGER PRIMARY KEY,
    Program_code TEXT NOT NULL,
    Program_name TEXT NOT NULL
);

-- Location table
DROP TABLE IF EXISTS LOCATION;
CREATE TABLE LOCATION (
    Location_id INTEGER PRIMARY KEY,
    Capacity INTEGER,
    Building TEXT,
    Room TEXT
);


-- Course table
DROP TABLE IF EXISTS COURSE;
CREATE TABLE COURSE (
    Course_id INTEGER PRIMARY KEY, 
    Class_id INTEGER NOT NULL,
    Program_id INTEGER NOT NULL,
    CatalogID TEXT NOT NULL,
    Course_title TEXT NOT NULL, 
    Description TEXT,
    Credits REAL,
    Attributes TEXT,
    Prerequisites TEXT,
    Corequisites TEXT,
    Fees TEXT,
    FOREIGN KEY (Program_id) REFERENCES PROGRAM(Program_id),
    FOREIGN KEY (Class_id) REFERENCES CLASS(Class_id)
);


-- Instructor table
DROP TABLE IF EXISTS INSTRUCTOR;
CREATE TABLE INSTRUCTOR (
    Instructor_id INTEGER PRIMARY KEY,
    Name TEXT NOT NULL,
    Phone INTEGER,
    Email TEXT
);

-- Class table
DROP TABLE IF EXISTS CLASS;
CREATE TABLE CLASS (
    Class_id INTEGER PRIMARY KEY,
    Course_id INTEGER NOT NULL,
    Section_id TEXT NOT NULL,
    Instructor_id INTEGER,
    Term TEXT NOT NULL, 
    Crncode INTEGER NOT NULL,
    Cap INTEGER NOT NULL,
    Act INTEGER NOT NULL,
    Rem INTEGER NOT NULL,
    Timecode TEXT,
    FOREIGN KEY (Course_id) REFERENCES COURSE(Course_id)
    FOREIGN KEY (Instructor_id) REFERENCES INSTRUCTOR(Instuctor_id)
);

-- Meeting table
DROP TABLE IF EXISTS MEETING;
CREATE TABLE MEETING (
    Meeting_id INTEGER PRIMARY KEY,
    Class_id INTEGER NOT NULL,
    Location_id INTEGER NOT NULL,
    Start TEXT NOT NULL,
    End TEXT NOT NULL,
    Day TEXT NOT NULL,
    FOREIGN KEY (Class_id) REFERENCES CLASS(Class_id),
    FOREIGN KEY (Location_id) REFERENCES LOCATION(Location_id)
);

-- A conversion table for matching the term to the corresponding catalog year
-- Catalogs are not available for the first several years
DROP TABLE IF EXISTS TERM_CATALOG_YEAR;
CREATE TABLE TERM_CATALOG_YEAR (
    CatalogYear TEXT NOT NULL,
    Term TEXT NOT NULL
);

INSERT INTO TERM_CATALOG_YEAR (Term, CatalogYear) VALUES 
('Fall2017','2017_2018'),('Winter2018','2017_2018'),('Spring2018','2017_2018'),('Summer2018','2017_2018'),('Fall2018','2018_2019'),
('Winter2019','2018_2019'),('Spring2019','2018_2019');


 * sqlite:///CourseData.db
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
7 rows affected.


[]

# Extracting Data

In [4]:
# Catalog Data
catalog_years = ['2017_2018', '2018_2019']

for cat_year in catalog_years:
    filepath = 'SourceData/Catalogs/CourseCatalog'+cat_year+'.csv'
    data = pd.read_csv(filepath)
    data['cat_year'] = cat_year
    data.to_sql('IMPORT_CATALOG_COURSES',conn,if_exists='append',index=False)
    


In [5]:
# Course Offering and Course Meeting Data
terms = ['Fall2014','Fall2015','Fall2016','Fall2017','Fall2018',
         'Spring2015','Spring2016','Spring2017','Spring2018','Spring2019',
         'SpringBreak2017',
         'Summer2015','Summer2016','Summer2017','Summer2018',
         'Winter2015','Winter2016','Winter2017','Winter2018']
for term in terms:
    filepath = 'SourceData/'+term+'/courses.csv'
    data = pd.read_csv(filepath)
    data.to_sql('IMPORT_COURSE_OFFERINGS',conn,if_exists='append',index=False) 
    
    filepath = 'SourceData/'+term+'/course_meetings.csv'
    data = pd.read_csv(filepath)
    data.to_sql('IMPORT_COURSE_MEETINGS',conn,if_exists='append',index=False)

In [6]:
%%sql
-- Record Counts for Catalog Courses
SELECT 
    (SELECT Count(*) FROM IMPORT_CATALOG_COURSES) as 'RawCount',
    (SELECT Count(*) FROM (SELECT DISTINCT * FROM IMPORT_CATALOG_COURSES)) as 'DistinctCount';

 * sqlite:///CourseData.db
Done.


RawCount,DistinctCount
4440,4440


In [7]:
%%sql
-- Record Counts for Course Offerings
SELECT 
    (SELECT Count(*) FROM IMPORT_COURSE_OFFERINGS) as 'RawCount',
    (SELECT Count(*) FROM (SELECT DISTINCT * FROM IMPORT_COURSE_OFFERINGS)) as 'DistinctCount';

 * sqlite:///CourseData.db
Done.


RawCount,DistinctCount
15937,15937


In [8]:
%%sql 
-- Record Counts for Catalog Meetings
SELECT 
    (SELECT Count(*) FROM IMPORT_COURSE_MEETINGS) as 'RawCount',
    (SELECT Count(*) FROM (SELECT DISTINCT * FROM IMPORT_COURSE_MEETINGS)) as 'DistinctCount';

 * sqlite:///CourseData.db
Done.


RawCount,DistinctCount
284907,284847


In [9]:
%%sql 
SELECT term,crn,location, day, start 
FROM IMPORT_COURSE_MEETINGS
GROUP BY term,crn,location, day, start
HAVING COUNT(*)>1;

 * sqlite:///CourseData.db
Done.


term,crn,location,day,start
Fall2014,73073,MCA 102,M,2014-09-08T18:30:00
Fall2014,73073,MCA 102,M,2014-09-15T18:30:00
Fall2014,73073,MCA 102,M,2014-09-22T18:30:00
Fall2014,73073,MCA 102,M,2014-09-29T18:30:00
Fall2014,73073,MCA 102,M,2014-10-06T18:30:00
Fall2014,73073,MCA 102,M,2014-10-20T18:30:00
Fall2014,73073,MCA 102,M,2014-10-27T18:30:00
Fall2014,73073,MCA 102,M,2014-11-03T18:30:00
Fall2014,73073,MCA 102,M,2014-11-10T18:30:00
Fall2014,73073,MCA 102,M,2014-11-17T18:30:00
