# Term Project Spring 2020 - Team Game Cancelled
___

## Course Data Warehouse Test

<b> Table of Contents: </b>
<br> [1. Star Schema](#000)
<br> [2. Set Up - SQL and Database Warehouse](#100)
<br> [3. Creating Tables from Star Schema](#200)
<br> [4. Load Data into the Tables ](#300)
<br> [5.](#400)
<br> [6.](#500)
___

<a id = "000"> <h2> 1. Star Schema </h2> </a>
_Design and build data warehouse called CourseDataWarehouse.db._

![Star Schema](James/files/StarSchema.png)

In [1]:
#ADD updated image ^^

<a id = "100"> <h2> 2. Set Up - SQL and Database Warehouse </h2> </a>

### Imports

In [2]:
%load_ext sql
import pandas as pd
import sqlite3

In [3]:
# Do we still need this part?
%sql sqlite:///CourseDataWarehouse.db
conn = sqlite3.connect('CourseDataWarehouse.db')

In [4]:
%%sql
ATTACH DATABASE 'CourseData.db' AS CourseData;

 * sqlite:///CourseDataWarehouse.db
Done.


[]

In [5]:
%%sql

SELECT *
FROM CourseData.LOCATION
GROUP BY (LocationID)
LIMIT 10;

 * sqlite:///CourseDataWarehouse.db
Done.


LocationID
BCC 200
BD
BH
BH BY ARR
BLM 112
BLM LL105
BNW 124
BNW 127
BNW 128
BNW 129B


<a id = "200"> <h2> 3. Create Tables from Star Schema </h2> </a>

In [6]:
%%sql

-- Instructors Dimension table
DROP TABLE IF EXISTS INSTRUCTORS_DIM;
CREATE TABLE INSTRUCTORS_DIM (
    InstructorID INTEGER PRIMARY KEY,
    Name TEXT NOT NULL
);

-- Location Dimension table
DROP TABLE IF EXISTS LOCATION_DIM;
CREATE TABLE LOCATION_DIM (
    LocationID TEXT PRIMARY KEY
);

-- Timecode Dimension table
DROP TABLE IF EXISTS TIMECODE_DIM;
CREATE TABLE TIMECODE_DIM (
    TimecodeID INTEGER PRIMARY KEY,
    Day TEXT,
    Start TEXT,
    End Text
);

-- Programs Dimension table
DROP TABLE IF EXISTS PROGRAMS_DIM;
CREATE TABLE PROGRAMS_DIM (
    ProgramID INTEGER PRIMARY KEY,
    ProgramCode TEXT NOT NULL,
    ProgramName TEXT NOT NULL
);

-- Terms Dimension table
DROP TABLE IF EXISTS TERMS_DIM;
CREATE TABLE TERMS_DIM (
    TermsID INTEGER PRIMARY KEY,
    Term TEXT NOT NULL
);

-- Course Catalogs Dimension table
DROP TABLE IF EXISTS COURSE_CATALOGS_DIM;
CREATE TABLE COURSE_CATALOGS_DIM(
    CourseID INTEGER PRIMARY KEY,
    CatalogYear TEXT NOT NULL,
    CatalogID TEXT NOT NULL,
    CourseTitle TEXT NOT NULL,
    Credits TEXT NOT NULL,
    Prequisites TEXT,
    Corequisites TEXT,
    Attributes TEXT
);

-- Course Meetings Fact table
DROP TABLE IF EXISTS COURSE_MEETINGS_FACT;
CREATE TABLE COURSE_MEETINGS_FACT(
    CourseMeetingID INTEGER PRIMARY KEY,
    CatalogID TEXT NOT NULL,
    ProgramID TEXT NOT NULL,
    InstructorID INTEGER NOT NULL,
    CourseID INTEGER NOT NULL,
    CourseOfferingID INTEGER NOT NULL,
    TimecodeID INTEGER,
    TermsID INTEGER,
    Term TEXT NOT NULL,
    LocationID TEXT NOT NULL,
    CRN INTEGER NOT NULL,
    PrimaryInstructorID INTEGER,
    Capacity INTEGER NOT NULL,
    Actual INTEGER NOT NULL,
    Remaining INTEGER NOT NULL,
    Credits INTEGER NOT NULL,
    FOREIGN KEY (TimecodeID) REFERENCES TIMECODE_DIM(TimecodeID),
    FOREIGN KEY (Term) REFERENCES TERMS_DIM(Term)
    
);


 * sqlite:///CourseDataWarehouse.db
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.


[]

<a id = "300"> <h2> 3. Load Data into our Tables </h2> </a>

### Rollup Queries

_We use rollup queries to populate our tables._

In [7]:
%%sql
DELETE FROM INSTRUCTORS_DIM;

INSERT INTO INSTRUCTORS_DIM
SELECT InstructorID, Name
FROM CourseData.INSTRUCTORS;

 * sqlite:///CourseDataWarehouse.db
0 rows affected.
1095 rows affected.


[]

In [8]:
%%sql
-- Record Counts for Course Meetings
SELECT 
    (SELECT Count(*) FROM INSTRUCTORS_DIM) as 'RawCount',
    (SELECT Count(*) FROM (SELECT DISTINCT * FROM INSTRUCTORS_DIM)) as 'DistinctCount';

 * sqlite:///CourseDataWarehouse.db
Done.


RawCount,DistinctCount
1095,1095


_Testing the data loaded into Coursedata Warehouse_

In [9]:
%%sql
DELETE FROM LOCATION_DIM;

INSERT INTO LOCATION_DIM
SELECT DISTINCT LocationID
FROM CourseData.LOCATION;

 * sqlite:///CourseDataWarehouse.db
0 rows affected.
207 rows affected.


[]

In [10]:
%%sql
-- Record Counts for Course Meetings
SELECT 
    (SELECT Count(*) FROM LOCATION_DIM) as 'RawCount',
    (SELECT Count(*) FROM (SELECT DISTINCT * FROM LOCATION_DIM)) as 'DistinctCount';

 * sqlite:///CourseDataWarehouse.db
Done.


RawCount,DistinctCount
207,207


In [11]:
%%sql
DELETE FROM TIMECODE_DIM;

INSERT INTO TIMECODE_DIM(Day, Start, End)
SELECT DISTINCT Day, StartDateTime, EndDateTime
FROM CourseData.COURSE_MEETINGS;

 * sqlite:///CourseDataWarehouse.db
0 rows affected.
39298 rows affected.


[]

In [12]:
%%sql
-- Record Counts for Course Meetings
SELECT 
    (SELECT Count(*) FROM TIMECODE_DIM) as 'RawCount',
    (SELECT Count(*) FROM (SELECT DISTINCT * FROM TIMECODE_DIM)) as 'DistinctCount';

 * sqlite:///CourseDataWarehouse.db
Done.


RawCount,DistinctCount
39298,39298


In [13]:
%%sql
SELECT *
FROM TIMECODE_DIM
LIMIT 10;

 * sqlite:///CourseDataWarehouse.db
Done.


TimecodeID,Day,Start,End
1,T,2014-09-02T08:00:00,2014-09-02T09:15:00
2,F,2014-09-05T08:00:00,2014-09-05T09:15:00
3,T,2014-09-09T08:00:00,2014-09-09T09:15:00
4,F,2014-09-12T08:00:00,2014-09-12T09:15:00
5,T,2014-09-16T08:00:00,2014-09-16T09:15:00
6,F,2014-09-19T08:00:00,2014-09-19T09:15:00
7,T,2014-09-23T08:00:00,2014-09-23T09:15:00
8,F,2014-09-26T08:00:00,2014-09-26T09:15:00
9,T,2014-09-30T08:00:00,2014-09-30T09:15:00
10,F,2014-10-03T08:00:00,2014-10-03T09:15:00


In [14]:
%%sql
DELETE FROM PROGRAMS_DIM;

INSERT INTO PROGRAMS_DIM
SELECT ProgramID, ProgramCode, ProgramName
FROM CourseData.PROGRAMS; 

 * sqlite:///CourseDataWarehouse.db
0 rows affected.
83 rows affected.


[]

In [15]:
%%sql
-- Record Counts for Course Meetings
SELECT 
    (SELECT Count(*) FROM PROGRAMS) as 'RawCount',
    (SELECT Count(*) FROM (SELECT DISTINCT * FROM PROGRAMS)) as 'DistinctCount';

 * sqlite:///CourseDataWarehouse.db
Done.


RawCount,DistinctCount
83,83


In [16]:
%%sql
DELETE FROM TERMS_DIM;

INSERT INTO TERMS_DIM(Term)
SELECT DISTINCT Term
FROM CourseData.COURSE_OFFERINGS;

 * sqlite:///CourseDataWarehouse.db
0 rows affected.
19 rows affected.


[]

In [17]:
%%sql
-- Record Counts for Course Meetings
SELECT 
    (SELECT Count(*) FROM TERMS_DIM) as 'RawCount',
    (SELECT Count(*) FROM (SELECT DISTINCT * FROM TERMS_DIM)) as 'DistinctCount';

 * sqlite:///CourseDataWarehouse.db
Done.


RawCount,DistinctCount
19,19


In [18]:
%%sql
SELECT *
FROM TERMS_DIM
;

 * sqlite:///CourseDataWarehouse.db
Done.


TermsID,Term
1,Fall2014
2,Fall2015
3,Fall2016
4,Fall2017
5,Fall2018
6,Spring2015
7,Spring2016
8,Spring2017
9,Spring2018
10,Spring2019


In [19]:
%%sql
DELETE FROM COURSE_CATALOGS_DIM;

INSERT INTO COURSE_CATALOGS_DIM
SELECT CourseID, CatalogYear, CatalogID, CourseTitle, Credits, Prequisites, Corequisites, Attributes 
FROM CourseData.COURSE_CATALOGS;

 * sqlite:///CourseDataWarehouse.db
0 rows affected.
4440 rows affected.


[]

In [20]:
%%sql
SELECT DISTINCT *
FROM COURSE_CATALOGS_DIM
LIMIT (5);

 * sqlite:///CourseDataWarehouse.db
Done.


CourseID,CatalogYear,CatalogID,CourseTitle,Credits,Prequisites,Corequisites,Attributes
1,2017_2018,AN 0301,Independent Study,1-3 Credits,,,
2,2017_2018,AN 0310,Asian Studies Seminar,3 Credits,,,
3,2017_2018,BU 0211,Legal Environment of Business,3 Credits,Junior standing.,,
4,2017_2018,BU 0220,Environmental Law and Policy,3 Credits,,,"EVME Environmental Studies Major Elective, EVPE Environmental Studies Elective, EVSS Environmental Studies: Social Science, MGEL Management: General Elective"
5,2017_2018,BU 0311,"The Law of Contracts, Sales, and Property",3 Credits,BU 0211.,,


In [21]:
%%sql

SELECT *
FROM 
LOCATION_DIM

LIMIT 5;

 * sqlite:///CourseDataWarehouse.db
Done.


LocationID
BCC 200
BD
BH
BH BY ARR
BLM 112


In [36]:
## do we need termID?


# James adding in sql comments in next two cells (4/27 6pm)

In [35]:
%%sql
--select query to see what the info will look like before we insert into the fact table

SELECT *
FROM CourseData.COURSE_MEETINGS
   JOIN CourseData.COURSE_OFFERINGS USING (CourseOfferingID)
    JOIN PROGRAMS_DIM ON (PROGRAMS_DIM.ProgramCode = SUBSTR(CourseData.COURSE_OFFERINGS.CatalogID,1,INSTR(CourseData.COURSE_OFFERINGS.CatalogID," ")-1))
    LEFT JOIN INSTRUCTORS_DIM ON (INSTRUCTORS_DIM.Name = CourseData.COURSE_OFFERINGS.PrimaryInstructorID)
    LEFT JOIN LOCATION_DIM USING (LocationID)
    LEFT JOIN TIMECODE_DIM ON (TIMECODE_DIM.Day = CourseData.COURSE_MEETINGS.Day)

WHERE Term = 'Fall2017'
LIMIT 5;


 * sqlite:///CourseDataWarehouse.db
Done.


CourseMeetingID,CourseOfferingID,CRN,LocationID,Day,StartDateTime,EndDateTime,CourseID,Term,CRN_1,CatalogID,Section,Credits,Title,Timecodes,PrimaryInstructorID,Capacity,Actual,Remaining,ProgramID,ProgramCode,ProgramName,InstructorID,Name,TimecodeID,Day_1,Start,End
164495,8706,71188,DSB 110A,R,2017-09-07T11:00:00,2017-09-07T12:15:00,113,Fall2017,71188,AC 0011,01C,3.0,Introduction to Financial Accounting,['MR 1100am-1215pm 09/05-12/11 DSB 110A'],Rebecca I. Bloch,0,28,-28,1,AC,Accounting,2,Rebecca I. Bloch,1142,R,2014-08-28T18:00:00,2014-08-28T21:30:00
164495,8706,71188,DSB 110A,R,2017-09-07T11:00:00,2017-09-07T12:15:00,113,Fall2017,71188,AC 0011,01C,3.0,Introduction to Financial Accounting,['MR 1100am-1215pm 09/05-12/11 DSB 110A'],Rebecca I. Bloch,0,28,-28,1,AC,Accounting,2,Rebecca I. Bloch,2474,R,2014-09-04T08:00:00,2014-09-04T08:50:00
164495,8706,71188,DSB 110A,R,2017-09-07T11:00:00,2017-09-07T12:15:00,113,Fall2017,71188,AC 0011,01C,3.0,Introduction to Financial Accounting,['MR 1100am-1215pm 09/05-12/11 DSB 110A'],Rebecca I. Bloch,0,28,-28,1,AC,Accounting,2,Rebecca I. Bloch,574,R,2014-09-04T08:00:00,2014-09-04T09:15:00
164495,8706,71188,DSB 110A,R,2017-09-07T11:00:00,2017-09-07T12:15:00,113,Fall2017,71188,AC 0011,01C,3.0,Introduction to Financial Accounting,['MR 1100am-1215pm 09/05-12/11 DSB 110A'],Rebecca I. Bloch,0,28,-28,1,AC,Accounting,2,Rebecca I. Bloch,2822,R,2014-09-04T08:00:00,2014-09-04T09:50:00
164495,8706,71188,DSB 110A,R,2017-09-07T11:00:00,2017-09-07T12:15:00,113,Fall2017,71188,AC 0011,01C,3.0,Introduction to Financial Accounting,['MR 1100am-1215pm 09/05-12/11 DSB 110A'],Rebecca I. Bloch,0,28,-28,1,AC,Accounting,2,Rebecca I. Bloch,2736,R,2014-09-04T08:00:00,2014-09-04T15:00:00


In [31]:
%%sql
-- James and Mike working with the Fact Table
-- fact table should have 16 columns but we have 28 from the query above^

DELETE FROM COURSE_MEETINGS_FACT;

INSERT INTO COURSE_MEETINGS_FACT
SELECT DISTINCT *
FROM CourseData.COURSE_MEETINGS
   JOIN CourseData.COURSE_OFFERINGS USING (CourseOfferingID)
    JOIN PROGRAMS_DIM ON (PROGRAMS_DIM.ProgramCode = SUBSTR(CourseData.COURSE_OFFERINGS.CatalogID,1,INSTR(CourseData.COURSE_OFFERINGS.CatalogID," ")-1))
    LEFT JOIN INSTRUCTORS_DIM ON (INSTRUCTORS_DIM.Name = CourseData.COURSE_OFFERINGS.PrimaryInstructorID)
    LEFT JOIN LOCATION_DIM USING (LocationID)
    LEFT JOIN TIMECODE_DIM ON (TIMECODE_DIM.Day = CourseData.COURSE_MEETINGS.Day)

WHERE Term = 'Fall2017'
LIMIT 5;

 * sqlite:///CourseDataWarehouse.db
0 rows affected.
(sqlite3.OperationalError) table COURSE_MEETINGS_FACT has 16 columns but 28 values were supplied
[SQL: INSERT INTO COURSE_MEETINGS_FACT
SELECT DISTINCT *
FROM CourseData.COURSE_MEETINGS
   JOIN CourseData.COURSE_OFFERINGS USING (CourseOfferingID)
    JOIN PROGRAMS_DIM ON (PROGRAMS_DIM.ProgramCode = SUBSTR(CourseData.COURSE_OFFERINGS.CatalogID,1,INSTR(CourseData.COURSE_OFFERINGS.CatalogID," ")-1))
    LEFT JOIN INSTRUCTORS_DIM ON (INSTRUCTORS_DIM.Name = CourseData.COURSE_OFFERINGS.PrimaryInstructorID)
    LEFT JOIN LOCATION_DIM USING (LocationID)
    LEFT JOIN TIMECODE_DIM ON (TIMECODE_DIM.Day = CourseData.COURSE_MEETINGS.Day)

WHERE Term = 'Fall2017'
LIMIT 5;]
(Background on this error at: http://sqlalche.me/e/e3q8)


In [None]:
# James and Mike did not change the cells below

In [24]:
## do we need to make a course_meeting_fact table

In [25]:
%%sql
--DELETE FROM COURSE_MEETINGS_FACT;

--INSERT INTO COURSE_MEETINGS_FACT(CourseMeetingID,CatalogID,ProgramID,InstructorID,CourseID,CourseOfferingID,LocationID,CRN,InstructorID,Capacity,Actual,Remaining,Credits)
--SELECT DISTINCT COURSE_MEETINGS.CourseMeetingID, CourseData.COURSE_OFFERINGS.CatalogID, PROGRAMS.ProgramID, INSTRUCTORS.InstructorID, COURSE_CATALOGS.CourseID, COURSE_OFFERINGS.CourseOfferingID, LOCATION.LocationID, COURSE_MEETINGS.CRN, INSTRUCTORS.InstructorID, COURSE_OFFERINGS.Capacity, COURSE_OFFERINGS.Actual, COURSE_OFFERINGS.Remaining, COURSE_OFFERINGS.Credits

FROM CourseData.COURSE_MEETINGS
   JOIN CourseData.COURSE_OFFERINGS USING (CourseOfferingID)
    JOIN PROGRAMS_DIM ON (PROGRAMS_DIM.ProgramCode = SUBSTR(CourseData.COURSE_OFFERINGS.CatalogID,1,INSTR(CourseData.COURSE_OFFERINGS.CatalogID," ")-1))
    LEFT JOIN INSTRUCTORS_DIM ON (INSTRUCTORS_DIM.Name = CourseData.COURSE_OFFERINGS.PrimaryInstructorID)
--    LEFT JOIN LOCATION_DIM USING (LocationID)
--    LEFT JOIN TIMECODE_DIM ON (TIMECODE_DIM.Day = CourseData.COURSE_MEETINGS.Day)

WHERE Term = '2017'
LIMIT 5;


 * sqlite:///CourseDataWarehouse.db
(sqlite3.OperationalError) near "FROM": syntax error
[SQL: --DELETE FROM COURSE_MEETINGS_FACT;

--INSERT INTO COURSE_MEETINGS_FACT(CourseMeetingID,CatalogID,ProgramID,InstructorID,CourseID,CourseOfferingID,LocationID,CRN,InstructorID,Capacity,Actual,Remaining,Credits)
--SELECT DISTINCT COURSE_MEETINGS.CourseMeetingID, CourseData.COURSE_OFFERINGS.CatalogID, PROGRAMS.ProgramID, INSTRUCTORS.InstructorID, COURSE_CATALOGS.CourseID, COURSE_OFFERINGS.CourseOfferingID, LOCATION.LocationID, COURSE_MEETINGS.CRN, INSTRUCTORS.InstructorID, COURSE_OFFERINGS.Capacity, COURSE_OFFERINGS.Actual, COURSE_OFFERINGS.Remaining, COURSE_OFFERINGS.Credits

FROM CourseData.COURSE_MEETINGS
   JOIN CourseData.COURSE_OFFERINGS USING (CourseOfferingID)
    JOIN PROGRAMS_DIM ON (PROGRAMS_DIM.ProgramCode = SUBSTR(CourseData.COURSE_OFFERINGS.CatalogID,1,INSTR(CourseData.COURSE_OFFERINGS.CatalogID," ")-1))
    LEFT JOIN INSTRUCTORS_DIM ON (INSTRUCTORS_DIM.Name = CourseData.COURSE_OFF