In [1]:
%load_ext sql
import sqlite3
import pandas as pd

In [2]:
%sql sqlite:///CourseData.db
conn = sqlite3.connect('CourseData.db')

In [3]:
%%sql 
DROP TABLE IF EXISTS INSTRUCTORS;
CREATE TABLE INSTRUCTORS(
    INSTRUCTOR_ID INTEGER PRIMARY KEY,
    INSTRUCTOR_NAME TEXT NOT NULL);

 * sqlite:///CourseData.db
Done.
Done.


[]

In [4]:
%%sql
DROP TABLE IF EXISTS TERMS;
CREATE TABLE TERMS(
    TERM_ID INTEGER PRIMARY KEY,
    SEMESTER TEXT NOT NULL,
    YEAR INTEGER NOT NULL,
    CRN INTEGER);

 * sqlite:///CourseData.db
Done.
Done.


[]

In [5]:
%%sql
DROP TABLE IF EXISTS PROGRAMS;
CREATE TABLE PROGRAMS(
    PROGRAM_ID INTEGER PRIMARY KEY,
    PROGRAM_CODE TEXT NOT NULL,
    PROGRAM_NAME TEXT NOT NULL);

 * sqlite:///CourseData.db
Done.
Done.


[]

In [6]:
%%sql
DROP TABLE IF EXISTS COURSES;
CREATE TABLE COURSES(
    COURSE_ID INTEGER PRIMARY KEY,
    CATALOG_ID TEXT,
    TERM TEXT NOT NULL,
    TITLE TEXT NOT NULL,
    PREREQ TEXT,
    COREQ TEXT,
    FEES TEXT,
    ATTRIBUTES TEXT,
    DESCRIPTION TEXT,
    PROGRAM_ID INTEGER NOT NULL,
    FOREIGN KEY (PROGRAM_ID) 
        REFERENCES PROGRAM (PROGRAM_ID)
        ON UPDATE CASCADE
);

 * sqlite:///CourseData.db
Done.
Done.


[]

In [7]:
%%sql
DROP TABLE IF EXISTS COURSE_OFFERINGS;
CREATE TABLE COURSE_OFFERINGS(
    COURSE_OFFERING_ID INTEGER PRIMARY KEY,
    CRN INTEGER NOT NULL,
    SECTION INTEGER NOT NULL,
    CREDITS INTEGER NOT NULL,
    CAPACITY INTEGER NOT NULL,
    ACTUAL INTEGER NOT NULL,
    REMAINING INTEGER NOT NULL,
    TERM TEXT NOT NULL,
    CATALOG_ID INTEGER NOT NULL,
    PRIMARY_INSTRUCTOR_ID INTEGER,
    FOREIGN KEY (CATALOG_ID)
        REFERENCES COURSES(CATALOG_ID)
        ON UPDATE CASCADE,
    FOREIGN KEY (PRIMARY_INSTRUCTOR_ID)
        REFERENCES INSTRUCTOR(INSTRUCTOR_ID)
        ON UPDATE CASCADE
);

 * sqlite:///CourseData.db
Done.
Done.


[]

In [8]:
%%sql
DROP TABLE IF EXISTS LOCATIONS;
CREATE TABLE LOCATIONS(
    LOCATION_ID INTEGER PRIMARY KEY,
    ROOM_ID TEXT);

 * sqlite:///CourseData.db
Done.
Done.


[]

In [9]:
%%sql 
DROP TABLE IF EXISTS COURSE_MEETINGS;
CREATE TABLE COURSE_MEETINGS(
    MEETING_ID INTEGER PRIMARY KEY,
    TERM TEXT NOT NULL,
    LOCATION TEXT NOT NULL,
    DAY TEXT NOT NULL,
    START TEXT NOT NULL,
    END TEXT NOT NULL,
    CRN INTEGER NOT NULL,
    ROOM_ID,
    FOREIGN KEY (CRN)
        REFERENCES COURSE_OFFERING (CRN)
        ON UPDATE CASCADE,
    FOREIGN KEY (ROOM_ID)
        REFERENCES LOCATION (ROOM_ID)
        ON UPDATE CASCADE
);

 * sqlite:///CourseData.db
Done.
Done.


[]

In [10]:
%%sql
DROP TABLE IF EXISTS TERM_CATALOGYEAR;
CREATE TABLE TERM_CATALOGYEAR(
    TERM TEXT,
    CATALOG_YEAR TEXT);

 * sqlite:///CourseData.db
Done.
Done.


[]

In [11]:
%%sql
INSERT INTO TERM_CATALOGYEAR (TERM, CATALOG_YEAR) VALUES 
('Fall2017','2017_2018'),('Winter2018', '2017_2018'),('Spring2018','2017_2018'),('Summer2018','2017_2018'),
('Fall2018','2018_2019'),('Winter2019','2018_2019'),('Spring2019','2018_2019');

 * sqlite:///CourseData.db
7 rows affected.


[]

In [12]:
# Import catalog data
catalog_year = ['2017_2018', '2018_2019']

for year in catalog_year:
    filepath = 'SourceData/Catalogs/CourseCatalog'+year+'.csv'
    catalog_data = pd.read_csv(filepath)
    catalog_data['year'] = year
    catalog_data.to_sql('IMPORT_CAT_COURSES',conn, if_exists ='append', index=False)

In [13]:
# Import Course meetings data and course offerings data
terms = ['Fall2014','Fall2015','Fall2016','Fall2017','Fall2018',
         'Spring2015','Spring2016','Spring2017','Spring2018','Spring2019',
         'SpringBreak2017','Summer2015','Summer2016','Summer2017','Summer2018',
         'Winter2015','Winter2016','Winter2017','Winter2018']
for term in terms:
    filepath = 'SourceData/'+term+'/courses.csv'
    term_data = pd.read_csv(filepath)
    term_data.to_sql('IMPORT_COURSE_DATA',conn,if_exists='append',index=False)
    
    filepath = 'SourceData/'+term+'/course_meetings.csv'
    term_data = pd.read_csv(filepath)
    term_data.to_sql('IMPORT_COURSEMEETINGS_DATA',conn, if_exists='append',index=False)

In [14]:
%%sql
SELECT
    (SELECT COUNT(*) FROM IMPORT_COURSE_DATA) as 'RAW_COUNT',
    (SELECT COUNT(*) FROM (SELECT DISTINCT * FROM IMPORT_COURSE_DATA)) as 'DISTINCT_COUNT';

 * sqlite:///CourseData.db
Done.


RAW_COUNT,DISTINCT_COUNT
15937,15937


In [15]:
%%sql
SELECT 
    (SELECT COUNT(*) FROM IMPORT_CAT_COURSES) as 'RAW_COUNT',
    (SELECT COUNT(*) FROM (SELECT DISTINCT * FROM IMPORT_CAT_COURSES)) as 'DISTINCT_COUNT';

 * sqlite:///CourseData.db
Done.


RAW_COUNT,DISTINCT_COUNT
4440,4440


In [16]:
%%sql
SELECT
    (SELECT COUNT(*) FROM IMPORT_COURSEMEETINGS_DATA) as 'RAW_COUNT',
    (SELECT COUNT(*) FROM (SELECT DISTINCT * FROM IMPORT_COURSEMEETINGS_DATA)) as 'DISTINCT_COUNT';

 * sqlite:///CourseData.db
Done.


RAW_COUNT,DISTINCT_COUNT
284907,284847


In [17]:
# Import data into INSTRUCTORS table

In [18]:
%%sql
DELETE FROM INSTRUCTORS;

INSERT INTO INSTRUCTORS (INSTRUCTOR_NAME)
SELECT DISTINCT primary_instructor
FROM IMPORT_COURSE_DATA 
WHERE primary_instructor <> 'TBA' AND primary_instructor NOT LIKE '%/%';

 * sqlite:///CourseData.db
0 rows affected.
1095 rows affected.


[]

In [19]:
%%sql
SELECT *
FROM INSTRUCTORS
LIMIT 10;

 * sqlite:///CourseData.db
Done.


INSTRUCTOR_ID,INSTRUCTOR_NAME
1,Michael P. Coyne
2,Rebecca I. Bloch
3,Paul Caster
4,Jo Ann Drusbosky
5,Arleen N. Kardos
6,Scott M Brenner
7,Kevin C. Cassidy
8,Bruce Bradford
9,Milo W. Peck
10,Stephen E. Yost


In [20]:
#Import data into PROGRAMS table

In [21]:
%%sql
DELETE FROM PROGRAMS;

INSERT INTO PROGRAMS (PROGRAM_CODE, PROGRAM_NAME)
SELECT DISTINCT program_code, program_name
FROM IMPORT_CAT_COURSES
ORDER BY PROGRAM_CODE;

 * sqlite:///CourseData.db
0 rows affected.
83 rows affected.


[]

In [22]:
%%sql
SELECT *
FROM PROGRAMS
LIMIT 10;

 * sqlite:///CourseData.db
Done.


PROGRAM_ID,PROGRAM_CODE,PROGRAM_NAME
1,AC,Accounting
2,AE,Applied Ethics
3,AH,Art History
4,AN,Asian Studies
5,AR,Arabic
6,AS,American Studies
7,AY,Anthropology
8,BB,Business
9,BEN,Bioengineering
10,BI,Biology


In [23]:
# Import data into LOCATIONS table

In [24]:
%%sql
DELETE FROM LOCATIONS;

INSERT INTO LOCATIONS (ROOM_ID)
SELECT DISTINCT location
FROM IMPORT_COURSEMEETINGS_DATA
ORDER BY location;

 * sqlite:///CourseData.db
0 rows affected.
207 rows affected.


[]

In [25]:
%%sql
SELECT *
FROM LOCATIONS
LIMIT 10;

 * sqlite:///CourseData.db
Done.


LOCATION_ID,ROOM_ID
1,BCC 200
2,BD
3,BH
4,BH BY ARR
5,BLM 112
6,BLM LL105
7,BNW 124
8,BNW 127
9,BNW 128
10,BNW 129B


In [26]:
#Import data into COURSE_MEETINGS

In [27]:
%%sql
DELETE FROM COURSE_MEETINGS;

INSERT INTO COURSE_MEETINGS (TERM, LOCATION, DAY, START, END, CRN, ROOM_ID)
SELECT term, location, day, start, end, crn, LOCATIONS.LOCATION_ID
FROM IMPORT_COURSEMEETINGS_DATA
    LEFT JOIN LOCATIONS ON (IMPORT_COURSEMEETINGS_DATA.Location = LOCATIONS.ROOM_ID);

 * sqlite:///CourseData.db
0 rows affected.
284907 rows affected.


[]

In [28]:
%%sql
SELECT *
FROM COURSE_MEETINGS
LIMIT 10;

 * sqlite:///CourseData.db
Done.


MEETING_ID,TERM,LOCATION,DAY,START,END,CRN,ROOM_ID
1,Fall2014,DSB 105,T,2014-09-02T08:00:00,2014-09-02T09:15:00,70384,99
2,Fall2014,DSB 105,F,2014-09-05T08:00:00,2014-09-05T09:15:00,70384,99
3,Fall2014,DSB 105,T,2014-09-09T08:00:00,2014-09-09T09:15:00,70384,99
4,Fall2014,DSB 105,F,2014-09-12T08:00:00,2014-09-12T09:15:00,70384,99
5,Fall2014,DSB 105,T,2014-09-16T08:00:00,2014-09-16T09:15:00,70384,99
6,Fall2014,DSB 105,F,2014-09-19T08:00:00,2014-09-19T09:15:00,70384,99
7,Fall2014,DSB 105,T,2014-09-23T08:00:00,2014-09-23T09:15:00,70384,99
8,Fall2014,DSB 105,F,2014-09-26T08:00:00,2014-09-26T09:15:00,70384,99
9,Fall2014,DSB 105,T,2014-09-30T08:00:00,2014-09-30T09:15:00,70384,99
10,Fall2014,DSB 105,F,2014-10-03T08:00:00,2014-10-03T09:15:00,70384,99


# Import data into COURSE_OFFERINGS table

In [29]:
%%sql
DELETE FROM COURSE_OFFERINGS;

INSERT INTO COURSE_OFFERINGS (CRN, SECTION, CREDITS, CAPACITY, ACTUAL, REMAINING, CATALOG_ID, TERM, 
                             PRIMARY_INSTRUCTOR_ID)
SELECT crn, section, credits, cap, act, rem, catalog_id, term, primary_instructor
FROM IMPORT_COURSE_DATA;

 * sqlite:///CourseData.db
0 rows affected.
15937 rows affected.


[]

# Import data into COURSES table

In [34]:
%%sql
DELETE FROM COURSES;

INSERT INTO COURSES (CATALOG_ID, TERM, TITLE, PREREQ, COREQ, FEES, ATTRIBUTES, DESCRIPTION, PROGRAM_ID)
SELECT distinct IMPORT_COURSE_DATA.catalog_id, term, title, prereqs, coreqs, fees, attributes, description, PROGRAMS.PROGRAM_ID
FROM IMPORT_COURSE_DATA
    JOIN IMPORT_CAT_COURSES ON (IMPORT_CAT_COURSES.catalog_id = IMPORT_COURSE_DATA.catalog_id)
    JOIN PROGRAMS ON (PROGRAMS.PROGRAM_CODE = IMPORT_CAT_COURSES.program_code);

 * sqlite:///CourseData.db
30486 rows affected.
8847 rows affected.


[]

In [35]:
%%sql
SELECT COUNT(*) FROM 
(SELECT distinct IMPORT_COURSE_DATA.catalog_id, term, title, prereqs, coreqs, fees, attributes, description, PROGRAMS.PROGRAM_ID
FROM IMPORT_COURSE_DATA
    JOIN IMPORT_CAT_COURSES ON (IMPORT_CAT_COURSES.catalog_id = IMPORT_COURSE_DATA.catalog_id)
    JOIN PROGRAMS ON (PROGRAMS.PROGRAM_CODE = IMPORT_CAT_COURSES.program_code));

 * sqlite:///CourseData.db
Done.


COUNT(*)
8847
