## Create DataWarehouse.db

In [11]:
%load_ext sql
import pandas as pd
import sqlite3 as sql

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [12]:
%sql sqlite:///CourseDataWarehouse.db
conn_dw = sql.connect('CourseDataWarehouse.db')

In [13]:
%%sql

-- Create Fact Table
DROP TABLE IF EXISTS FACT_TABLE;
CREATE TABLE FACT_TABLE (
    CourseMeetingID INTEGER NOT NULL,
    CourseOfferingID INTEGER,
    CourseID INTEGER,
    LocationID INTEGER,
    InstructorID INTEGER, 
    TotalCatalogs INTEGER,
    TotalClassSections INTEGER,
    TotalInstructors INTEGER,
    TotalClassrooms INTEGER
);

-- Create CATALOG_DIMENSION table
DROP TABLE IF EXISTS CATALOG_DIMENSION;
CREATE TABLE CATALOG_DIMENSION (
    CourseID INTEGER PRIMARY KEY,
    CatalogID TEXT NOT NULL,
    ProgramID INTEGER NOT NULL,
    ProgramCode TEXT NOT NULL,
    ProgramName TEXT NOT NULL,
    Credits TEXT NOT NULL,
    CourseTitle TEXT NOT NULL,
    Fees TEXT,
    Attributes TEXT,
    Description TEXT,
    Prereqs TEXT,
    Coreqs TEXT
);

-- Create LOCATION_DIMENSION table
DROP TABLE IF EXISTS LOCATION_DIMENSION;
CREATE TABLE LOCATION_DIMENSION (
    LocationID INTEGER PRIMARY KEY,
    LocationCode TEXT NOT NULL
);

-- Course TIME_DIMENSION table
DROP TABLE IF EXISTS TIME_DIMENSION;
CREATE TABLE TIME_DIMENSION (
    CourseMeetingID INTEGER PRIMARY KEY,
    CourseOfferingID TEXT NOT NULL,
    Timecodes TEXT,
    CourseStartTime TEXT,
    CourseEndTime TEXT,
    CourseDate TEXT,
    Year TEXT,
    Term TEXT,
    CatalogYear TEXT
);

-- Create COURSE_DIMENSION table
DROP TABLE IF EXISTS COURSE_DIMENSION;
CREATE TABLE COURSE_DIMENSION (
    CourseOfferingID INTEGER PRIMARY KEY,
    CRN TEXT NOT NULL,
    Section TEXT NOT NULL,
    Title TEXT NOT NULL,
    Capacity TEXT NOT NULL,
    Actual TEXT,
    Remaining TEXT
);

-- Create INSTRUCTOR_DIMENSION table
DROP TABLE IF EXISTS INSTRUCTOR_DIMENSION;
CREATE TABLE INSTRUCTOR_DIMENSION (
    InstructorID INTEGER PRIMARY KEY,
    Name TEXT NOT NULL
);


 * sqlite:///CourseDataWarehouse.db
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.


[]

## Insert data from CourseData.db to CourseDataWarehouse.db

![StarSchemaERD](StarSchemaERD.png)

In [14]:
%%sql
attach database 'CourseData.db' as orig_db

 * sqlite:///CourseDataWarehouse.db
(sqlite3.OperationalError) database orig_db is already in use
[SQL: attach database 'CourseData.db' as orig_db]
(Background on this error at: http://sqlalche.me/e/e3q8)


In [15]:
%%sql
DELETE FROM CATALOG_DIMENSION;
INSERT INTO CATALOG_DIMENSION (CourseID,CatalogID,ProgramID,ProgramCode,ProgramName,Credits,CourseTitle,Fees,Attributes,
                               Description,Prereqs,Coreqs)
SELECT DISTINCT CourseID,CatalogID,ProgramID,ProgramCode,ProgramName,Credits,CourseTitle,Fees,Attributes,Description,Prereqs,Coreqs
FROM orig_db.CATALOG_COURSES
    JOIN orig_db.PROGRAMS USING(ProgramID);

 * sqlite:///CourseDataWarehouse.db
0 rows affected.
4440 rows affected.


[]

In [16]:
%%sql
DELETE FROM LOCATION_DIMENSION;
INSERT INTO LOCATION_DIMENSION (LocationID,LocationCode)
SELECT DISTINCT LocationID,LocationCode
FROM orig_db.COURSE_LOCATIONS;

 * sqlite:///CourseDataWarehouse.db
0 rows affected.
207 rows affected.


[]

In [17]:
%%sql
DELETE FROM TIME_DIMENSION;
INSERT INTO TIME_DIMENSION (CourseMeetingID,CourseOfferingID,Timecodes,CourseStartTime,CourseEndTime,CourseDate,Year,Term,CatalogYear)
SELECT CourseMeetingID,CourseOfferingID,Timecodes,substr(StartDateTime,17,-5) as CourseStartTime,
        substr(EndDateTime,17,-5) as CourseEndTime,substr(StartDateTime,1,10) as CourseDate,
        substr(Term,-4) as Year,Term,CatalogYear
FROM orig_db.COURSE_OFFERINGS
    JOIN orig_db.COURSE_MEETINGS USING(CourseOfferingID)
    LEFT JOIN orig_db.TERM_TO_CATALOG_YEAR USING(Term)

 * sqlite:///CourseDataWarehouse.db
0 rows affected.
284847 rows affected.


[]

In [18]:
%%sql
DELETE FROM COURSE_DIMENSION;
INSERT INTO COURSE_DIMENSION (CourseOfferingID,CRN,Section,Title,Capacity,Actual,Remaining)
SELECT CourseOfferingID,CRN,Section,Title,Capacity,Actual,Remaining
FROM orig_db.COURSE_OFFERINGS

 * sqlite:///CourseDataWarehouse.db
0 rows affected.
15937 rows affected.


[]

In [19]:
%%sql
DELETE FROM INSTRUCTOR_DIMENSION;
INSERT INTO INSTRUCTOR_DIMENSION (InstructorID,Name)
SELECT InstructorID,Name
FROM orig_db.COURSE_INSTRUCTORS

 * sqlite:///CourseDataWarehouse.db
0 rows affected.
1095 rows affected.


[]

In [20]:
%%sql
DELETE FROM FACT_TABLE;
INSERT INTO FACT_TABLE (CourseMeetingID,CourseOfferingID,CourseID,LocationID,InstructorID,TotalCatalogs,TotalClassSections,TotalInstructors,TotalClassrooms)
SELECT CourseMeetingID, COURSE_OFFERINGS.CourseOfferingID, COURSE_OFFERINGS.CourseID, LocationID, InstructorID,
                (SELECT COUNT(DISTINCT CatalogID) FROM orig_db.COURSE_OFFERINGS) as TotalCatalogs,
                (SELECT COUNT(DISTINCT Section) FROM orig_db.COURSE_OFFERINGS) as TotalClassSections,
                (SELECT COUNT(DISTINCT InstructorID) FROM orig_db.COURSE_INSTRUCTORS) as TotalInstructors, 
                (SELECT COUNT(DISTINCT LocationID) FROM orig_db.COURSE_LOCATIONS) as TotalClassrooms 
FROM orig_db.COURSE_OFFERINGS
    LEFT JOIN orig_db.TERM_TO_CATALOG_YEAR USING(Term)
    LEFT JOIN orig_db.CATALOG_COURSES ON (CATALOG_COURSES.CatalogID = COURSE_OFFERINGS.CatalogID AND CATALOG_COURSES.CatalogYear = TERM_TO_CATALOG_YEAR.CatalogYear)
    LEFT JOIN orig_db.PROGRAMS USING(ProgramID)
    LEFT JOIN orig_db.COURSE_INSTRUCTORS ON (InstructorID = PrimaryInstructorID)
    JOIN orig_db.COURSE_MEETINGS USING(CourseOfferingID)
    JOIN orig_db.COURSE_LOCATIONS USING(LocationID);


 * sqlite:///CourseDataWarehouse.db
0 rows affected.
284847 rows affected.


[]