# Term Project Spring 2020 - Team Game Cancelled
___

## Course Data Warehouse Test

<b> Table of Contents: </b>
<br> [1. Star Schema](#000)
<br> [2. Set Up - SQL and Database Warehouse](#100)
<br> [3. Creating Tables from Star Schema](#200)
<br> [4. Load Data into the Tables ](#300)
<br> [5.](#400)
<br> [6.](#500)
___

<a id = "000"> <h2> 1. Star Schema </h2> </a>
_Design and build data warehouse called CourseDataWarehouse.db._

![Star Schema](James/files/StarSchema.png)

In [None]:
## name - primaryinstructor 
#ADD updated image ^^

<a id = "100"> <h2> 2. Set Up - SQL and Database Warehouse </h2> </a>

### Imports

In [1]:
%load_ext sql
import pandas as pd
import sqlite3

In [2]:
# Do we still need this part?
%sql sqlite:///CourseDataWarehouse.db
conn = sqlite3.connect('CourseDataWarehouse.db')

In [3]:
%%sql
ATTACH DATABASE 'CourseData.db' AS CourseData;

 * sqlite:///CourseDataWarehouse.db
Done.


[]

<a id = "200"> <h2> 3. Create Tables from Star Schema </h2> </a>

In [4]:
%%sql

-- Instructors Dimension table
DROP TABLE IF EXISTS INSTRUCTORS_DIM;
CREATE TABLE INSTRUCTORS_DIM (
    InstructorID INTEGER PRIMARY KEY,
    Name TEXT
);

-- Location Dimension table
DROP TABLE IF EXISTS LOCATION_DIM;
CREATE TABLE LOCATION_DIM (
    LocationID INTEGER PRIMARY KEY,
    Location TEXT NOT NULL
);

-- Timecode Dimension table
DROP TABLE IF EXISTS TIMECODE_DIM;
CREATE TABLE TIMECODE_DIM (
    TimecodeID INTEGER PRIMARY KEY,
    Day TEXT,
    Start TEXT,
    End Text
);

-- Programs Dimension table
DROP TABLE IF EXISTS PROGRAMS_DIM;
CREATE TABLE PROGRAMS_DIM (
    ProgramID INTEGER PRIMARY KEY,
    ProgramCode TEXT NOT NULL,
    ProgramName TEXT NOT NULL
);


-- Course Catalogs Dimension table
DROP TABLE IF EXISTS COURSE_CATALOGS_DIM;
CREATE TABLE COURSE_CATALOGS_DIM(
    CourseID INTEGER PRIMARY KEY,
    CatalogYear TEXT NOT NULL,
    CatalogID TEXT NOT NULL,
    CourseTitle TEXT NOT NULL,
    Credits TEXT NOT NULL
);

-- Course Meetings Fact table
DROP TABLE IF EXISTS COURSE_MEETINGS_FACT;
CREATE TABLE COURSE_MEETINGS_FACT(
    CourseMeetingID INTEGER,
    CatalogID INTEGER,
    ProgramID INTEGER,
    InstructorID INTEGER,
    CourseID INTEGER,
    CourseOfferingID INTEGER,
    TimecodeID INTEGER,
    TermsID INTEGER,
    Term TEXT,
    LocationID INTEGER,
    CRN INTEGER,
    PrimaryInstructorID TEXT,
    Capacity INTEGER,
    Actual INTEGER,
    Remaining INTEGER,
    Credits INTEGER,
    FOREIGN KEY (TimecodeID) REFERENCES TIMECODE_DIM(TimecodeID),
    FOREIGN KEY (TermsID) REFERENCES TERMS_DIM(TermsID),
    FOREIGN KEY (InstructorID) REFERENCES INSTRUCTOR_DIM(InstructorID),
    FOREIGN KEY (ProgramID) REFERENCES PROGRAMS_DIM(ProgramID),
    FOREIGN KEY (CourseID) REFERENCES COURSE_CATALOGS_DIM(CourseID),
    FOREIGN KEY (LocationID) REFERENCES LOCATION_DIM(LocationID)

    
    
);


 * sqlite:///CourseDataWarehouse.db
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.


[]

In [5]:
-- Terms Dimension table
DROP TABLE IF EXISTS TERMS_DIM;
CREATE TABLE TERMS_DIM (
    TermsID INTEGER PRIMARY KEY,
    Term TEXT
);

SyntaxError: invalid syntax (<ipython-input-5-bd2ed31f5957>, line 1)

<a id = "300"> <h2> 3. Load Data into our Tables </h2> </a>

### Rollup Queries

_We use rollup queries to populate our tables._

In [6]:
%%sql
DELETE FROM INSTRUCTORS_DIM;

INSERT INTO INSTRUCTORS_DIM
SELECT InstructorID, Name
FROM CourseData.INSTRUCTORS;

 * sqlite:///CourseDataWarehouse.db
0 rows affected.
1095 rows affected.


[]

In [7]:
%%sql
-- Record Counts for Course Meetings
SELECT 
    (SELECT Count(*) FROM INSTRUCTORS_DIM) as 'RawCount',
    (SELECT Count(*) FROM (SELECT DISTINCT * FROM INSTRUCTORS_DIM)) as 'DistinctCount';

 * sqlite:///CourseDataWarehouse.db
Done.


RawCount,DistinctCount
1095,1095


_Testing the data loaded into Coursedata Warehouse_

In [8]:
%%sql
DELETE FROM LOCATION_DIM;

INSERT INTO LOCATION_DIM
SELECT DISTINCT LocationID, Location
FROM CourseData.LOCATION;

 * sqlite:///CourseDataWarehouse.db
0 rows affected.
207 rows affected.


[]

In [9]:
%%sql 
SELECT *
FROM LOCATION_DIM
LIMIT 5;

 * sqlite:///CourseDataWarehouse.db
Done.


LocationID,Location
1,BCC 200
2,BD
3,BH
4,BH BY ARR
5,BLM 112


In [10]:
%%sql
-- Record Counts for Course Meetings
SELECT 
    (SELECT Count(*) FROM LOCATION_DIM) as 'RawCount',
    (SELECT Count(*) FROM (SELECT DISTINCT * FROM LOCATION_DIM)) as 'DistinctCount';

 * sqlite:///CourseDataWarehouse.db
Done.


RawCount,DistinctCount
207,207


In [11]:
%%sql
DELETE FROM TIMECODE_DIM;

INSERT INTO TIMECODE_DIM(Day, Start, End)
SELECT DISTINCT Day, StartDateTime, EndDateTime
FROM CourseData.COURSE_MEETINGS;

 * sqlite:///CourseDataWarehouse.db
0 rows affected.
39298 rows affected.


[]

In [29]:
%%sql
DROP TABLE IF EXISTS ClassTimecode;
CREATE TABLE ClassTimecode AS 
SELECT CourseMeetingID, TimecodeID
FROM CourseDate.COURSE_MEETINGS JOIN TIMECODE_DIM ON (CourseData.Meeting.Day = TIMECODE_DIM.Day AND substr(CourseData.COURSE_MEETINGS.StartDateTime,12) = TIMECODE_DIM.StartTime AND substr(CourseData.Meeting.EndDateTime,12) = TIMECODE_DIM.EndDateTime);


 * sqlite:///CourseDataWarehouse.db
Done.
(sqlite3.OperationalError) no such table: CourseDate.COURSE_MEETINGS
[SQL: CREATE TABLE ClassTimecode AS 
SELECT CourseMeetingID, TimecodeID
FROM CourseDate.COURSE_MEETINGS JOIN TIMECODE_DIM ON (CourseData.Meeting.Day = TIMECODE_DIM.Day AND substr(CourseData.COURSE_MEETINGS.StartDateTime,12) = TIMECODE_DIM.StartTime AND substr(CourseData.Meeting.EndDateTime,12) = TIMECODE_DIM.EndDateTime);]
(Background on this error at: http://sqlalche.me/e/e3q8)


In [13]:
%%sql
-- Record Counts for Course Meetings
SELECT 
    (SELECT Count(*) FROM TIMECODE_DIM) as 'RawCount',
    (SELECT Count(*) FROM (SELECT DISTINCT * FROM TIMECODE_DIM)) as 'DistinctCount';

 * sqlite:///CourseDataWarehouse.db
Done.


RawCount,DistinctCount
39298,39298


In [14]:
%%sql
SELECT *
FROM TIMECODE_DIM
LIMIT 10;

 * sqlite:///CourseDataWarehouse.db
Done.


TimecodeID,Day,Start,End
1,T,2014-09-02T08:00:00,2014-09-02T09:15:00
2,F,2014-09-05T08:00:00,2014-09-05T09:15:00
3,T,2014-09-09T08:00:00,2014-09-09T09:15:00
4,F,2014-09-12T08:00:00,2014-09-12T09:15:00
5,T,2014-09-16T08:00:00,2014-09-16T09:15:00
6,F,2014-09-19T08:00:00,2014-09-19T09:15:00
7,T,2014-09-23T08:00:00,2014-09-23T09:15:00
8,F,2014-09-26T08:00:00,2014-09-26T09:15:00
9,T,2014-09-30T08:00:00,2014-09-30T09:15:00
10,F,2014-10-03T08:00:00,2014-10-03T09:15:00


In [15]:
%%sql
DELETE FROM PROGRAMS_DIM;

INSERT INTO PROGRAMS_DIM
SELECT ProgramID, ProgramCode, ProgramName
FROM CourseData.PROGRAMS; 

 * sqlite:///CourseDataWarehouse.db
0 rows affected.
83 rows affected.


[]

In [16]:
%%sql
-- Record Counts for Course Meetings
SELECT 
    (SELECT Count(*) FROM PROGRAMS) as 'RawCount',
    (SELECT Count(*) FROM (SELECT DISTINCT * FROM PROGRAMS)) as 'DistinctCount';

 * sqlite:///CourseDataWarehouse.db
Done.


RawCount,DistinctCount
83,83


In [17]:
%%sql
DELETE FROM TERMS_DIM;

INSERT INTO TERMS_DIM(Term)
SELECT DISTINCT Term
FROM CourseData.COURSE_OFFERINGS;

 * sqlite:///CourseDataWarehouse.db
(sqlite3.OperationalError) no such table: TERMS_DIM
[SQL: DELETE FROM TERMS_DIM;]
(Background on this error at: http://sqlalche.me/e/e3q8)


In [18]:
%%sql
-- Record Counts for Course Meetings
SELECT 
    (SELECT Count(*) FROM TERMS_DIM) as 'RawCount',
    (SELECT Count(*) FROM (SELECT DISTINCT * FROM TERMS_DIM)) as 'DistinctCount';

 * sqlite:///CourseDataWarehouse.db
(sqlite3.OperationalError) no such table: TERMS_DIM
[SQL: -- Record Counts for Course Meetings
SELECT 
    (SELECT Count(*) FROM TERMS_DIM) as 'RawCount',
    (SELECT Count(*) FROM (SELECT DISTINCT * FROM TERMS_DIM)) as 'DistinctCount';]
(Background on this error at: http://sqlalche.me/e/e3q8)


In [19]:
%%sql
SELECT *
FROM TERMS_DIM
;

 * sqlite:///CourseDataWarehouse.db
(sqlite3.OperationalError) no such table: TERMS_DIM
[SQL: SELECT *
FROM TERMS_DIM
;]
(Background on this error at: http://sqlalche.me/e/e3q8)


In [20]:
%%sql
DELETE FROM COURSE_CATALOGS_DIM;

INSERT INTO COURSE_CATALOGS_DIM
SELECT CourseID, CatalogYear, CatalogID, CourseTitle, Credits 
FROM CourseData.COURSE_CATALOGS;

 * sqlite:///CourseDataWarehouse.db
0 rows affected.
4440 rows affected.


[]

In [21]:
%%sql
SELECT DISTINCT *
FROM COURSE_CATALOGS_DIM
LIMIT (5);

 * sqlite:///CourseDataWarehouse.db
Done.


CourseID,CatalogYear,CatalogID,CourseTitle,Credits
1,2017_2018,AN 0301,Independent Study,1-3 Credits
2,2017_2018,AN 0310,Asian Studies Seminar,3 Credits
3,2017_2018,BU 0211,Legal Environment of Business,3 Credits
4,2017_2018,BU 0220,Environmental Law and Policy,3 Credits
5,2017_2018,BU 0311,"The Law of Contracts, Sales, and Property",3 Credits


In [22]:
%%sql

SELECT *
FROM 
LOCATION_DIM

LIMIT 5;

 * sqlite:///CourseDataWarehouse.db
Done.


LocationID,Location
1,BCC 200
2,BD
3,BH
4,BH BY ARR
5,BLM 112


In [None]:
## do we need termID?


# James adding in sql comments in next two cells (4/27 6pm)

In [None]:
%%sql
--select query to see what the info will look like before we insert into the fact table

SELECT *
FROM CourseData.COURSE_MEETINGS
   JOIN CourseData.COURSE_OFFERINGS USING (CourseOfferingID)
    JOIN PROGRAMS_DIM ON (PROGRAMS_DIM.ProgramCode = SUBSTR(CourseData.COURSE_OFFERINGS.CatalogID,1,INSTR(CourseData.COURSE_OFFERINGS.CatalogID," ")-1))
    LEFT JOIN INSTRUCTORS_DIM ON (INSTRUCTORS_DIM.Name = CourseData.COURSE_OFFERINGS.PrimaryInstructorID)
    LEFT JOIN LOCATION_DIM USING (LocationID)
--      LEFT JOIN TIMECODE_DIM ON (TIMECODE_DIM.Day = CourseData.COURSE_MEETINGS.Day)

WHERE Term = 'Fall2017'
LIMIT 5;


__CREATING COURSE MEETINGS FACT TABLE__

In [None]:
%%sql
DELETE FROM COURSE_MEETINGS_FACT;

INSERT INTO COURSE_MEETINGS_FACT(CatalogID,ProgramID,CourseID,CourseOfferingID,TimecodeID,LocationID,CRN,PrimaryInstructorID,Capacity,Actual,Remaining,Credits)
SELECT DISTINCT COURSE_OFFERINGS.CatalogID, PROGRAMS_DIM.ProgramID,COURSE_OFFERINGS.CourseID, COURSE_OFFERINGS.CourseOfferingID, TIMECODE_DIM.TimecodeID, LOCATION_DIM.LocationID, COURSE_OFFERINGS.CRN, CourseData.COURSE_OFFERINGS.PrimaryInstructorID, COURSE_OFFERINGS.Capacity, COURSE_OFFERINGS.Actual, COURSE_OFFERINGS.Remaining, COURSE_OFFERINGS.Credits

FROM CourseData.COURSE_MEETINGS
    JOIN CourseData.COURSE_OFFERINGS ON (CourseData.COURSE_OFFERINGS.CourseOfferingID = CourseData.COURSE_MEETINGS.CourseOfferingID)
    LEFT JOIN PROGRAMS_DIM ON (PROGRAMS_DIM.ProgramCode = SUBSTR(CourseData.COURSE_OFFERINGS.CatalogID,1,2))
    LEFT JOIN INSTRUCTORS_DIM ON (INSTRUCTORS_DIM.Name = CourseData.COURSE_OFFERINGS.PrimaryInstructorID)
    LEFT JOIN LOCATION_DIM ON (LOCATION_DIM.LocationID = CourseData.COURSE_MEETINGS.LocationID)
    LEFT JOIN TIMECODE_DIM ON (TIMECODE_DIM.Day = CourseData.COURSE_MEETINGS.Day)
    LEFT JOIN COURSE_CATALOGS_DIM ON (COURSE_CATALOGS_DIM.CourseID = CourseData.COURSE_CATALOGS.CourseID);


In [None]:
%%sql

SELECT *
FROM COURSE_MEETINGS_FACT

WHERE TimecodeID = '7'
LIMIT 5;


In [None]:
%%sql
vacuum;

In [None]:
%%sql
DELETE FROM COURSE_MEETINGS_FACT;

INSERT INTO COURSE_MEETINGS_FACT(CatalogID,ProgramID,CourseID,CourseOfferingID,TimecodeID,LocationID,CRN,PrimaryInstructorID,Capacity,Actual,Remaining,Credits)
SELECT DISTINCT COURSE_OFFERINGS.CatalogID, PROGRAMS_DIM.ProgramID,COURSE_OFFERINGS.CourseID, COURSE_OFFERINGS.CourseOfferingID, TIMECODE_DIM.TimecodeID, LOCATION_DIM.LocationID, COURSE_OFFERINGS.CRN, CourseData.COURSE_OFFERINGS.PrimaryInstructorID, COURSE_OFFERINGS.Capacity, COURSE_OFFERINGS.Actual, COURSE_OFFERINGS.Remaining, COURSE_OFFERINGS.Credits

FROM CourseData.COURSE_MEETINGS
    JOIN CourseData.COURSE_OFFERINGS USING (CourseOfferingID)
    JOIN PROGRAMS_DIM ON (PROGRAMS_DIM.ProgramCode = SUBSTR(CourseData.COURSE_OFFERINGS.CatalogID,1,2))
    LEFT JOIN INSTRUCTORS_DIM ON (INSTRUCTORS_DIM.Name = CourseData.COURSE_OFFERINGS.PrimaryInstructorID)
    LEFT JOIN LOCATION_DIM USING (LocationID)
    LEFT JOIN TIMECODE_DIM ON (TIMECODE_DIM.Day = CourseData.COURSE_MEETINGS.Day)
    LEFT JOIN COURSE_CATALOGS_DIM USING (CourseID, CatalogID);