In [1]:
%load_ext sql
import sqlite3
import pandas as pd

In [2]:
%sql sqlite:///CourseDataWarehouse.db

'Connected: @CourseDataWarehouse.db'

# 5. Create and Build Datawarehouse 

## A. Created a fact table and dimensions to outline our datawarehouse
![ERD](Course_Facts_ERD.png)

## B. Created SQL Tables that aligned with our datawarehouse

In [None]:
%%sql
DROP TABLE IF EXISTS INSTRUCTORS;

CREATE TABLE INSTRUCTORS(
    INSTRUCTOR_ID INTEGER PRIMARY KEY,
    INSTRUCTOR_NAME TEXT NOT NULL);

In [None]:
%%sql
DROP TABLE IF EXISTS PROGRAMS;
CREATE TABLE PROGRAMS(
    PROGRAM_ID INTEGER PRIMARY KEY,
    PROGRAM_CODE TEXT NOT NULL,
    PROGRAM_NAME TEXT NOT NULL);

In [None]:
%%sql
DROP TABLE IF EXISTS LOCATIONS;
CREATE TABLE LOCATIONS(
    LOCATION_ID INTEGER PRIMARY KEY,
    ROOM_ID TEXT);

In [None]:
%%sql
DROP TABLE IF EXISTS TIME_SLICES;
CREATE TABLE TIME_SLICES(
    TIMECODE INTEGER PRIMARY KEY,
    DAY TEXT,
    START TEXT,
    END TEXT);

In [None]:
%%sql
DROP TABLE IF EXISTS CALENDAR_DATES;
CREATE TABLE CALENDAR_DATES(
    TERM TEXT PRIMARY KEY
);

In [None]:
%%sql
DROP TABLE IF EXISTS COURSE_FACTS;
CREATE TABLE COURSE_FACTS(
    CATALOG_ID TEXT NOT NULL,
    TITLE TEXT NOT NULL,
    PREREQ TEXT,
    COREQ TEXT,
    SECTION TEXT,
    CAPACITY INTEGER,
    ACTUAL INTEGER,
    REMAINING INTEGER,
    CREDITS INTEGER,
    TIMECODE TEXT NOT NULL,
    TERM TEXT NOT NULL,
    LOCATION_ID INTEGER NOT NULL,
    INSTRUCTOR_ID INTEGER NOT NULL,
    PROGRAM_ID INTEGER NOT NULL,
    FOREIGN KEY (TIMECODE)
        REFERENCES TIME_SLICES (TIMECODE)
        ON UPDATE CASCADE,
    FOREIGN KEY (TERM)
        REFERENCES CALENDAR_DATES (TERM)
        ON UPDATE CASCADE,
    FOREIGN KEY (LOCATION_ID)
        REFERENCES LOCATIONS (LOCATION_ID)
        ON UPDATE CASCADE
    FOREIGN KEY (INSTRUCTOR_ID)
        REFERENCES INSTRUCTORS (INSTRUCTOR_ID)
        ON UPDATE CASCADE,
    FOREIGN KEY (PROGRAM_ID)
        REFERENCES PROGRAMS (PROGRAM_ID)
        ON UPDATE CASCADE
);

In [None]:
%sql ATTACH DATABASE 'CourseData.db' as 'CourseData';

In [None]:
%%sql
SELECT *
FROM CourseData.PROGRAMS
LIMIT 10;

In [None]:
%%sql

INSERT INTO PROGRAMS (PROGRAM_CODE, PROGRAM_NAME)
SELECT PROGRAM_CODE, PROGRAM_NAME
FROM CourseData.PROGRAMS 

In [None]:
%%sql
SELECT COUNT(*)
FROM (SELECT * FROM CourseData.INSTRUCTORS)

In [None]:
%%sql
INSERT INTO INSTRUCTORS (INSTRUCTOR_NAME)
SELECT INSTRUCTOR_NAME
FROM CourseData.INSTRUCTORS

In [None]:
%%sql
SELECT *
FROM INSTRUCTORS 
LIMIT 10;

In [None]:
%%sql
SELECT COUNT(*)
FROM (SELECT * FROM CourseData.LOCATIONS)

In [None]:
%%sql 
INSERT INTO LOCATIONS (ROOM_ID)
SELECT ROOM_ID
FROM CourseData.LOCATIONS

In [None]:
%%sql
SELECT *
FROM LOCATIONS
LIMIT 10;

In [None]:
%%sql
SELECT DISTINCT DAY,substr(start,12,8) as StartTime, substr(end,12,8) as EndTime 
FROM CourseData.COURSE_OFFERINGS
    JOIN CourseData.COURSE_MEETINGS USING (COURSE_OFFERING_ID)
LIMIT 10;

In [None]:
%%sql

DELETE FROM TIME_SLICES;

INSERT INTO TIME_SLICES ( DAY, START, END)
SELECT DISTINCT DAY,substr(start,12,8) as StartTime, substr(end,12,8) as EndTime 
FROM CourseData.COURSE_OFFERINGS
    JOIN CourseData.COURSE_MEETINGS USING (COURSE_OFFERING_ID)

In [None]:
%%sql
SELECT DISTINCT *
FROM TIME_SLICES
LIMIT 10;

In [None]:
%%sql
DROP TABLE IF EXISTS TEMP_TABLE;
CREATE TABLE TEMP_TABLE(
    TIMECODE INTEGER,
    MEETING_ID);

In [None]:
%%sql 
INSERT INTO TEMP_TABLE (MEETING_ID, TIMECODE)
SELECT DISTINCT MEETING_ID, TIMECODE
FROM CourseData.COURSE_OFFERINGS
    JOIN CourseData.COURSE_MEETINGS USING (COURSE_OFFERING_ID)
    JOIN TIME_SLICES ON (TIME_SLICES.DAY = COURSE_MEETINGS.DAY AND TIME_SLICES.START =substr(COURSE_MEETINGS.start,12,8)  AND TIME_SLICES.END =substr(COURSE_MEETINGS.end,12,8) )

In [None]:
%%sql
SELECT *
FROM TEMP_TABLE
LIMIT 10;

In [None]:
%%sql
SELECT DISTINCT MEETING_ID, TIMECODE
FROM CourseData.COURSE_OFFERINGS
    JOIN CourseData.COURSE_MEETINGS USING (COURSE_OFFERING_ID)
    JOIN TIME_SLICES ON (TIME_SLICES.DAY = COURSE_MEETINGS.DAY AND TIME_SLICES.START =substr(COURSE_MEETINGS.start,12,8)  AND TIME_SLICES.END =substr(COURSE_MEETINGS.end,12,8) )
LIMIT 10;

In [None]:
%%sql
SELECT DISTINCT TERM
FROM CourseData.Course_Offerings
LIMIT 10;

In [None]:
%%sql

DELETE FROM CALENDAR_DATES;

INSERT INTO CALENDAR_DATES (TERM)
SELECT DISTINCT TERM
FROM CourseData.Course_Offerings

In [None]:
%%sql
SELECT *
FROM CALENDAR_DATES
LIMIT 10;

In [None]:
%%sql
SELECT DISTINCT CATALOG_ID, TITLE, PREREQ, COREQ, SECTION, CAPACITY, ACTUAL, REMAINING, CREDITS, CALENDAR_DATES.TERM, LOCATIONS.LOCATION_ID, INSTRUCTORS.INSTRUCTOR_ID, PROGRAMS.PROGRAM_ID, TIME_SLICES.DAY, TIME_SLICES.START, TIME_SLICES.END
FROM CourseData.Courses
    JOIN CourseData.Course_Offerings USING (CATALOG_ID)
    JOIN CALENDAR_DATES ON (CALENDAR_DATES.TERM = CourseData.Course_Offerings.TERM)
    JOIN CourseData.Course_Meetings USING (COURSE_OFFERING_ID)
    JOIN LOCATIONS ON (LOCATIONS.LOCATION_ID = CourseData.Course_Meetings.Location_ID)
    JOIN INSTRUCTORS ON (INSTRUCTORS.INSTRUCTOR_ID = CourseData.Course_Offerings.Primary_Instructor_ID)
    JOIN PROGRAMS ON (PROGRAMS.PROGRAM_ID = CourseData.Courses.Program_ID)
    JOIN TEMP_TABLE ON (TEMP_TABLE.MEETING_ID = CourseData.Course_Meetings.MEETING_ID)
    JOIN TIME_SLICES ON (TIME_SLICES.TIMECODE = TEMP_TABLE.TIMECODE)
LIMIT 10;


In [None]:
%%sql

SELECT DISTINCT CATALOG_ID, TITLE, PREREQ, COREQ, SECTION, CAPACITY, ACTUAL, REMAINING, CREDITS, CALENDAR_DATES.TERM, LOCATIONS.LOCATION_ID, INSTRUCTORS.INSTRUCTOR_ID, PROGRAMS.PROGRAM_ID, TIME_SLICES.TIMECODE
FROM CourseData.Courses
    JOIN CourseData.Course_Offerings USING (CATALOG_ID)
    JOIN CALENDAR_DATES ON (CALENDAR_DATES.TERM = CourseData.Course_Offerings.TERM)
    JOIN CourseData.Course_Meetings USING (COURSE_OFFERING_ID)
    JOIN LOCATIONS ON (LOCATIONS.LOCATION_ID = CourseData.Course_Meetings.Location_ID)
    JOIN INSTRUCTORS ON (INSTRUCTORS.INSTRUCTOR_ID = CourseData.Course_Offerings.Primary_Instructor_ID)
    JOIN PROGRAMS ON (PROGRAMS.PROGRAM_ID = CourseData.Courses.Program_ID)
    JOIN TEMP_TABLE ON (TEMP_TABLE.MEETING_ID = CourseData.Course_Meetings.MEETING_ID)
    JOIN TIME_SLICES ON (TIME_SLICES.TIMECODE = TEMP_TABLE.TIMECODE)
LIMIT 10;

In [None]:
%%sql
DELETE FROM COURSE_FACTS;
INSERT INTO COURSE_FACTS (CATALOG_ID, TITLE, PREREQ, COREQ, SECTION, CAPACITY, ACTUAL, REMAINING, CREDITS, TERM, LOCATION_ID, INSTRUCTOR_ID, PROGRAM_ID, TIMECODE)
SELECT DISTINCT CATALOG_ID, TITLE, PREREQ, COREQ, SECTION, CAPACITY, ACTUAL, REMAINING, CREDITS, CALENDAR_DATES.TERM, LOCATIONS.LOCATION_ID, INSTRUCTORS.INSTRUCTOR_ID, PROGRAMS.PROGRAM_ID, TIME_SLICES.TIMECODE
FROM CourseData.Courses
    JOIN CourseData.Course_Offerings USING (CATALOG_ID)
    JOIN CALENDAR_DATES ON (CALENDAR_DATES.TERM = CourseData.Course_Offerings.TERM)
    JOIN CourseData.Course_Meetings USING (COURSE_OFFERING_ID)
    JOIN LOCATIONS ON (LOCATIONS.LOCATION_ID = CourseData.Course_Meetings.Location_ID)
    JOIN INSTRUCTORS ON (INSTRUCTORS.INSTRUCTOR_ID = CourseData.Course_Offerings.Primary_Instructor_ID)
    JOIN PROGRAMS ON (PROGRAMS.PROGRAM_ID = CourseData.Courses.Program_ID)
    JOIN TEMP_TABLE ON (TEMP_TABLE.MEETING_ID = CourseData.Course_Meetings.MEETING_ID)
    JOIN TIME_SLICES ON (TIME_SLICES.TIMECODE = TEMP_TABLE.TIMECODE);