In [1]:
%load_ext sql
import sqlite3
import pandas as pd

In [2]:
%sql sqlite:///CourseDataWarehouse.db

'Connected: @CourseDataWarehouse.db'

# 5. Create and Build Datawarehouse 

## A. Created a fact table and dimensions to outline our datawarehouse
![ERD](Course_Facts_ERD.png)

## B. Created SQL Tables that aligned with our datawarehouse

### 1. Instructors Table

In [3]:
%%sql
DROP TABLE IF EXISTS INSTRUCTORS;

CREATE TABLE INSTRUCTORS(
    INSTRUCTOR_ID INTEGER PRIMARY KEY,
    INSTRUCTOR_NAME TEXT NOT NULL);

 * sqlite:///CourseDataWarehouse.db
Done.
Done.


[]

### 2. Programs Table

In [4]:
%%sql
DROP TABLE IF EXISTS PROGRAMS;
CREATE TABLE PROGRAMS(
    PROGRAM_ID INTEGER PRIMARY KEY,
    PROGRAM_CODE TEXT NOT NULL,
    PROGRAM_NAME TEXT NOT NULL);

 * sqlite:///CourseDataWarehouse.db
Done.
Done.


[]

### 3. Locations Table

In [5]:
%%sql
DROP TABLE IF EXISTS LOCATIONS;
CREATE TABLE LOCATIONS(
    LOCATION_ID INTEGER PRIMARY KEY,
    ROOM_ID TEXT);

 * sqlite:///CourseDataWarehouse.db
Done.
Done.


[]

### 4. Time Slices Table

In [6]:
%%sql
DROP TABLE IF EXISTS TIME_SLICES;
CREATE TABLE TIME_SLICES(
    TIMECODE INTEGER PRIMARY KEY,
    DAY TEXT,
    START TEXT,
    END TEXT);

 * sqlite:///CourseDataWarehouse.db
Done.
Done.


[]

### 5. Calendar Dates Table

In [7]:
%%sql
DROP TABLE IF EXISTS CALENDAR_DATES;
CREATE TABLE CALENDAR_DATES(
    TERM TEXT PRIMARY KEY
);

 * sqlite:///CourseDataWarehouse.db
Done.
Done.


[]

### 6. Course Fact Table

In [8]:
%%sql
DROP TABLE IF EXISTS COURSE_FACTS;
CREATE TABLE COURSE_FACTS(
    CATALOG_ID TEXT NOT NULL,
    TITLE TEXT NOT NULL,
    PREREQ TEXT,
    COREQ TEXT,
    SECTION TEXT,
    CAPACITY INTEGER,
    ACTUAL INTEGER,
    REMAINING INTEGER,
    CREDITS INTEGER,
    TIMECODE INTEGER NOT NULL,
    TERM TEXT NOT NULL,
    LOCATION_ID INTEGER NOT NULL,
    INSTRUCTOR_ID INTEGER NOT NULL,
    PROGRAM_ID INTEGER NOT NULL,
    FOREIGN KEY (TIMECODE)
        REFERENCES TIME_SLICES (TIMECODE)
        ON UPDATE CASCADE,
    FOREIGN KEY (TERM)
        REFERENCES CALENDAR_DATES (TERM)
        ON UPDATE CASCADE,
    FOREIGN KEY (LOCATION_ID)
        REFERENCES LOCATIONS (LOCATION_ID)
        ON UPDATE CASCADE
    FOREIGN KEY (INSTRUCTOR_ID)
        REFERENCES INSTRUCTORS (INSTRUCTOR_ID)
        ON UPDATE CASCADE,
    FOREIGN KEY (PROGRAM_ID)
        REFERENCES PROGRAMS (PROGRAM_ID)
        ON UPDATE CASCADE
);

 * sqlite:///CourseDataWarehouse.db
Done.
Done.


[]

# 6. Insert Data from Database into Datawarehouse Tables

## A. Attached database to pull data from

In [9]:
%sql ATTACH DATABASE 'CourseData.db' as 'CourseData';

 * sqlite:///CourseDataWarehouse.db
Done.


[]

## B. Inserted Data into tables through select queries 

### 1. Load data into programs

In [10]:
%%sql
SELECT *
FROM CourseData.PROGRAMS
LIMIT 10;

 * sqlite:///CourseDataWarehouse.db
Done.


PROGRAM_ID,PROGRAM_CODE,PROGRAM_NAME
1,AC,Accounting
2,AE,Applied Ethics
3,AH,Art History
4,AN,Asian Studies
5,AR,Arabic
6,AS,American Studies
7,AY,Anthropology
8,BB,Business
9,BEN,Bioengineering
10,BI,Biology


In [11]:
%%sql

INSERT INTO PROGRAMS (PROGRAM_CODE, PROGRAM_NAME)
SELECT PROGRAM_CODE, PROGRAM_NAME
FROM CourseData.PROGRAMS 

 * sqlite:///CourseDataWarehouse.db
83 rows affected.


[]

### 2. Load data into Instructors

In [12]:
%%sql
SELECT COUNT(*)
FROM (SELECT * FROM CourseData.INSTRUCTORS)

 * sqlite:///CourseDataWarehouse.db
Done.


COUNT(*)
1095


In [13]:
%%sql
INSERT INTO INSTRUCTORS (INSTRUCTOR_NAME)
SELECT INSTRUCTOR_NAME
FROM CourseData.INSTRUCTORS

 * sqlite:///CourseDataWarehouse.db
1095 rows affected.


[]

In [14]:
%%sql
SELECT *
FROM INSTRUCTORS 
LIMIT 10;

 * sqlite:///CourseDataWarehouse.db
Done.


INSTRUCTOR_ID,INSTRUCTOR_NAME
1,Michael P. Coyne
2,Rebecca I. Bloch
3,Paul Caster
4,Jo Ann Drusbosky
5,Arleen N. Kardos
6,Scott M Brenner
7,Kevin C. Cassidy
8,Bruce Bradford
9,Milo W. Peck
10,Stephen E. Yost


### 3. Load data into Locations

In [15]:
%%sql
SELECT COUNT(*)
FROM (SELECT * FROM CourseData.LOCATIONS)

 * sqlite:///CourseDataWarehouse.db
Done.


COUNT(*)
207


In [16]:
%%sql 
INSERT INTO LOCATIONS (ROOM_ID)
SELECT ROOM_ID
FROM CourseData.LOCATIONS

 * sqlite:///CourseDataWarehouse.db
207 rows affected.


[]

In [17]:
%%sql
SELECT *
FROM LOCATIONS
LIMIT 10;

 * sqlite:///CourseDataWarehouse.db
Done.


LOCATION_ID,ROOM_ID
1,BCC 200
2,BD
3,BH
4,BH BY ARR
5,BLM 112
6,BLM LL105
7,BNW 124
8,BNW 127
9,BNW 128
10,BNW 129B


### 4. Load data into Time Slices

In [18]:
%%sql
SELECT DISTINCT DAY,substr(start,12,8) as StartTime, substr(end,12,8) as EndTime 
FROM CourseData.COURSE_OFFERINGS
    JOIN CourseData.COURSE_MEETINGS USING (COURSE_OFFERING_ID)
LIMIT 10;

 * sqlite:///CourseDataWarehouse.db
Done.


DAY,StartTime,EndTime
T,08:00:00,09:15:00
F,08:00:00,09:15:00
T,09:30:00,10:45:00
F,09:30:00,10:45:00
T,12:30:00,13:45:00
F,12:30:00,13:45:00
R,11:00:00,12:15:00
M,11:00:00,12:15:00
R,12:30:00,13:45:00
M,12:30:00,13:45:00


In [19]:
%%sql

DELETE FROM TIME_SLICES;

INSERT INTO TIME_SLICES ( DAY, START, END)
SELECT DISTINCT DAY,substr(start,12,8) as StartTime, substr(end,12,8) as EndTime 
FROM CourseData.COURSE_OFFERINGS
    JOIN CourseData.COURSE_MEETINGS USING (COURSE_OFFERING_ID)

 * sqlite:///CourseDataWarehouse.db
0 rows affected.
1090 rows affected.


[]

In [20]:
%%sql
SELECT COUNT(*)
FROM (SELECT DISTINCT * FROM TIME_SLICES);

SELECT DISTINCT *
FROM TIME_SLICES
LIMIT 10;

 * sqlite:///CourseDataWarehouse.db
Done.
Done.


TIMECODE,DAY,START,END
1,T,08:00:00,09:15:00
2,F,08:00:00,09:15:00
3,T,09:30:00,10:45:00
4,F,09:30:00,10:45:00
5,T,12:30:00,13:45:00
6,F,12:30:00,13:45:00
7,R,11:00:00,12:15:00
8,M,11:00:00,12:15:00
9,R,12:30:00,13:45:00
10,M,12:30:00,13:45:00


In [21]:
%%sql
DROP TABLE IF EXISTS TEMP_TABLE;
CREATE TABLE TEMP_TABLE(
    TIMECODE INTEGER,
    MEETING_ID);

 * sqlite:///CourseDataWarehouse.db
Done.
Done.


[]

In [22]:
%%sql 
INSERT INTO TEMP_TABLE (MEETING_ID, TIMECODE)
SELECT DISTINCT MEETING_ID, TIMECODE
FROM CourseData.COURSE_OFFERINGS
    JOIN CourseData.COURSE_MEETINGS USING (COURSE_OFFERING_ID)
    JOIN TIME_SLICES ON (TIME_SLICES.DAY = COURSE_MEETINGS.DAY AND TIME_SLICES.START =substr(COURSE_MEETINGS.start,12,8)  AND TIME_SLICES.END =substr(COURSE_MEETINGS.end,12,8) )

 * sqlite:///CourseDataWarehouse.db
284907 rows affected.


[]

In [23]:
%%sql
SELECT *
FROM TEMP_TABLE
LIMIT 10;

 * sqlite:///CourseDataWarehouse.db
Done.


TIMECODE,MEETING_ID
1,1
2,2
1,3
2,4
1,5
2,6
1,7
2,8
1,9
2,10


In [24]:
%%sql
SELECT DISTINCT MEETING_ID, TIMECODE
FROM CourseData.COURSE_OFFERINGS
    JOIN CourseData.COURSE_MEETINGS USING (COURSE_OFFERING_ID)
    JOIN TIME_SLICES ON (TIME_SLICES.DAY = COURSE_MEETINGS.DAY AND TIME_SLICES.START =substr(COURSE_MEETINGS.start,12,8)  AND TIME_SLICES.END =substr(COURSE_MEETINGS.end,12,8) )
LIMIT 10;

 * sqlite:///CourseDataWarehouse.db
Done.


MEETING_ID,TIMECODE
1,1
2,2
3,1
4,2
5,1
6,2
7,1
8,2
9,1
10,2


### 5. Load data into Calendar Dates Table

In [25]:
%%sql
SELECT DISTINCT TERM
FROM CourseData.Course_Offerings
LIMIT 10;

 * sqlite:///CourseDataWarehouse.db
Done.


TERM
Fall2014
Fall2015
Fall2016
Fall2017
Fall2018
Spring2015
Spring2016
Spring2017
Spring2018
Spring2019


In [26]:
%%sql

DELETE FROM CALENDAR_DATES;

INSERT INTO CALENDAR_DATES (TERM)
SELECT DISTINCT TERM
FROM CourseData.Course_Offerings

 * sqlite:///CourseDataWarehouse.db
0 rows affected.
19 rows affected.


[]

In [27]:
%%sql
SELECT *
FROM CALENDAR_DATES
LIMIT 10;

 * sqlite:///CourseDataWarehouse.db
Done.


TERM
Fall2014
Fall2015
Fall2016
Fall2017
Fall2018
Spring2015
Spring2016
Spring2017
Spring2018
Spring2019


### 6. Load data into Course Facts Table

In [28]:
%%sql
--SELECT DISTINCT CATALOG_ID, TITLE, PREREQ, COREQ, SECTION, CAPACITY, ACTUAL, REMAINING, CREDITS, CALENDAR_DATES.TERM, LOCATIONS.LOCATION_ID, INSTRUCTORS.INSTRUCTOR_ID, PROGRAMS.PROGRAM_ID, TIME_SLICES.DAY, TIME_SLICES.START, TIME_SLICES.END
--FROM CourseData.Courses
    --JOIN CourseData.Course_Offerings USING (CATALOG_ID)
    --JOIN CALENDAR_DATES ON (CALENDAR_DATES.TERM = CourseData.Course_Offerings.TERM)
    --JOIN CourseData.Course_Meetings USING (COURSE_OFFERING_ID)
    --JOIN LOCATIONS ON (LOCATIONS.LOCATION_ID = CourseData.Course_Meetings.Location_ID)
    --JOIN INSTRUCTORS ON (INSTRUCTORS.INSTRUCTOR_ID = CourseData.Course_Offerings.Primary_Instructor_ID)
    --JOIN PROGRAMS ON (PROGRAMS.PROGRAM_ID = CourseData.Courses.Program_ID)
    --JOIN TEMP_TABLE ON (TEMP_TABLE.MEETING_ID = CourseData.Course_Meetings.MEETING_ID)
    --JOIN TIME_SLICES ON (TIME_SLICES.TIMECODE = TEMP_TABLE.TIMECODE)
--LIMIT 50;


 * sqlite:///CourseDataWarehouse.db
0 rows affected.


[]

In [33]:
%%sql
--SELECT DISTINCT CATALOG_ID, TITLE, PREREQ, COREQ, SECTION, CAPACITY, ACTUAL, REMAINING, CREDITS, CALENDAR_DATES.TERM, LOCATIONS.LOCATION_ID, INSTRUCTORS.INSTRUCTOR_ID, PROGRAMS.PROGRAM_ID, TIME_SLICES.TIMECODE
--FROM CourseData.Courses
    --JOIN CourseData.Course_Offerings USING (CATALOG_ID)
    --JOIN CALENDAR_DATES ON (CALENDAR_DATES.TERM = CourseData.Course_Offerings.TERM)
    --JOIN CourseData.Course_Meetings USING (COURSE_OFFERING_ID)
    --JOIN LOCATIONS ON (LOCATIONS.LOCATION_ID = CourseData.Course_Meetings.Location_ID)
    --JOIN INSTRUCTORS ON (INSTRUCTORS.INSTRUCTOR_ID = CourseData.Course_Offerings.Primary_Instructor_ID)
    --JOIN PROGRAMS ON (PROGRAMS.PROGRAM_ID = CourseData.Courses.Program_ID)
    --JOIN TEMP_TABLE ON (TEMP_TABLE.MEETING_ID = CourseData.Course_Meetings.MEETING_ID)
    --JOIN TIME_SLICES ON (TIME_SLICES.TIMECODE = TEMP_TABLE.TIMECODE)
--LIMIT 10;

 * sqlite:///CourseDataWarehouse.db
0 rows affected.


[]

In [30]:
%%sql
DELETE FROM COURSE_FACTS;



 * sqlite:///CourseDataWarehouse.db
0 rows affected.


[]

In [31]:
%%sql
INSERT INTO COURSE_FACTS(CATALOG_ID, TITLE, PREREQ, COREQ, SECTION, CAPACITY, ACTUAL, REMAINING, CREDITS, TERM, LOCATION_ID, INSTRUCTOR_ID, PROGRAM_ID, TIMECODE)
SELECT DISTINCT CATALOG_ID, TITLE, PREREQ, COREQ, SECTION, CAPACITY, ACTUAL, REMAINING, CREDITS, CALENDAR_DATES.TERM, LOCATIONS.LOCATION_ID, INSTRUCTORS.INSTRUCTOR_ID, PROGRAMS.PROGRAM_ID, TIME_SLICES.TIMECODE
FROM CourseData.Courses
    JOIN CourseData.Course_Offerings USING (CATALOG_ID)
    JOIN CALENDAR_DATES ON (CALENDAR_DATES.TERM = CourseData.Course_Offerings.TERM)
    JOIN CourseData.Course_Meetings USING (COURSE_OFFERING_ID)
    JOIN LOCATIONS ON (LOCATIONS.LOCATION_ID = CourseData.Course_Meetings.Location_ID)
    JOIN INSTRUCTORS ON (INSTRUCTORS.INSTRUCTOR_ID = CourseData.Course_Offerings.Primary_Instructor_ID)
    JOIN PROGRAMS ON (PROGRAMS.PROGRAM_ID = CourseData.Courses.Program_ID)
    JOIN TEMP_TABLE ON (TEMP_TABLE.MEETING_ID = CourseData.Course_Meetings.MEETING_ID)
    JOIN TIME_SLICES ON (TIME_SLICES.TIMECODE = TEMP_TABLE.TIMECODE)
;

 * sqlite:///CourseDataWarehouse.db
25317 rows affected.


[]

In [32]:
%%sql
-- Why wont this work when I count the section column?
SELECT DISTINCT CATALOG_ID, TERM,SECTION, DAY, START, END
FROM COURSE_FACTS
    JOIN TEMP_TABLE USING (TIMECODE)
    JOIN TIME_SLICES USING (TIMECODE)
WHERE CATALOG_ID LIKE '%FI%' and TERM = 'Spring2019'

 * sqlite:///CourseDataWarehouse.db
Done.


CATALOG_ID,TERM,SECTION,DAY,START,END
FI 0101,Spring2019,1,R,14:00:00,15:15:00
FI 0101,Spring2019,1,M,14:00:00,15:15:00
FI 0101,Spring2019,1,T,14:00:00,15:15:00
FI 0101,Spring2019,2,R,15:30:00,16:45:00
FI 0101,Spring2019,2,M,15:30:00,16:45:00
FI 0101,Spring2019,2,T,15:30:00,16:45:00
FI 0101,Spring2019,3,R,08:00:00,09:15:00
FI 0101,Spring2019,3,M,08:00:00,09:15:00
FI 0101,Spring2019,3,T,08:00:00,09:15:00
FI 0101,Spring2019,4,R,11:00:00,12:15:00
