## Import modules

In [2]:
%load_ext sql
import pandas as pd
import sqlite3 as sql

## 1. Create DataWarehouse.db


In [3]:
%sql sqlite:///CourseDataWarehouse.db

'Connected: @CourseDataWarehouse.db'

<img src="StarSchemaERD.png" alt="StarSchemaERD" width="550"/>

## 2. Create DataWarehouse tables

- Create tables that match our CourseDataWarehouseERD schema.

- Ensure there are no truncation issues (Dropping tables before creating).


In [4]:
%%sql

-- Create Fact Table
DROP TABLE IF EXISTS FACT_TABLE;
CREATE TABLE FACT_TABLE (
    CourseMeetingID INTEGER NOT NULL,
    CourseOfferingID INTEGER NOT NULL,
    CourseID INTEGER,
    LocationID INTEGER NOT NULL,
    InstructorID INTEGER, 
    TotalCatalogs INTEGER,
    TotalClassSections INTEGER,
    TotalInstructors INTEGER,
    TotalClassrooms INTEGER
);

-- Create CATALOG_DIMENSION table
DROP TABLE IF EXISTS CATALOG_DIMENSION;
CREATE TABLE CATALOG_DIMENSION (
    CourseID INTEGER PRIMARY KEY,
    CatalogID TEXT NOT NULL,
    ProgramID INTEGER NOT NULL,
    ProgramCode TEXT NOT NULL,
    ProgramName TEXT NOT NULL,
    Credits TEXT NOT NULL,
    CourseTitle TEXT NOT NULL,
    Fees TEXT,
    Attributes TEXT,
    Description TEXT,
    Prereqs TEXT,
    Coreqs TEXT
);

-- Create LOCATION_DIMENSION table
DROP TABLE IF EXISTS LOCATION_DIMENSION;
CREATE TABLE LOCATION_DIMENSION (
    LocationID INTEGER PRIMARY KEY,
    LocationCode TEXT NOT NULL
);

-- Course TIME_DIMENSION table
DROP TABLE IF EXISTS TIME_DIMENSION;
CREATE TABLE TIME_DIMENSION (
    CourseMeetingID INTEGER PRIMARY KEY,
    CourseOfferingID TEXT NOT NULL,
    Timecodes TEXT,
    CourseStartTime TEXT,
    CourseEndTime TEXT,
    CourseDate TEXT,
    Year TEXT,
    Term TEXT,
    CatalogYear TEXT
);

-- Create COURSE_DIMENSION table
DROP TABLE IF EXISTS COURSE_DIMENSION;
CREATE TABLE COURSE_DIMENSION (
    CourseOfferingID INTEGER PRIMARY KEY,
    CRN TEXT NOT NULL,
    Section TEXT NOT NULL,
    Title TEXT NOT NULL,
    Capacity TEXT NOT NULL,
    Actual TEXT,
    Remaining TEXT
);

-- Create INSTRUCTOR_DIMENSION table
DROP TABLE IF EXISTS INSTRUCTOR_DIMENSION;
CREATE TABLE INSTRUCTOR_DIMENSION (
    InstructorID INTEGER PRIMARY KEY,
    Name TEXT NOT NULL
);


 * sqlite:///CourseDataWarehouse.db
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.


[]

## 3. Load data into CourseDataWarehouse.db

- We use the 'attach' function to set the variable 'CourseDataDB' which acts as a callable variable of CourseData.db 

- The dimension tables do not require any data transformation, data is loaded as it is from CourseData.db into its respective dimension.


In [5]:
%%sql
attach database 'CourseData.db' as CourseDataDB

 * sqlite:///CourseDataWarehouse.db
Done.


[]

In [6]:
%%sql
DELETE FROM CATALOG_DIMENSION;
INSERT INTO CATALOG_DIMENSION (CourseID,CatalogID,ProgramID,ProgramCode,ProgramName,Credits,CourseTitle,Fees,Attributes,
                               Description,Prereqs,Coreqs)
SELECT DISTINCT CourseID,CatalogID,ProgramID,ProgramCode,ProgramName,Credits,CourseTitle,Fees,Attributes,Description,Prereqs,Coreqs
FROM CourseDataDB.CATALOG_COURSES
    JOIN CourseDataDB.PROGRAMS USING(ProgramID);

 * sqlite:///CourseDataWarehouse.db
0 rows affected.
4440 rows affected.


[]

In [7]:
%%sql
DELETE FROM LOCATION_DIMENSION;
INSERT INTO LOCATION_DIMENSION (LocationID,LocationCode)
SELECT DISTINCT LocationID,LocationCode
FROM CourseDataDB.COURSE_LOCATIONS;

 * sqlite:///CourseDataWarehouse.db
0 rows affected.
207 rows affected.


[]

In [8]:
%%sql
DELETE FROM TIME_DIMENSION;
INSERT INTO TIME_DIMENSION (CourseMeetingID,CourseOfferingID,Timecodes,CourseStartTime,CourseEndTime,CourseDate,Year,Term,CatalogYear)
SELECT CourseMeetingID,CourseOfferingID,Timecodes,substr(StartDateTime,17,-5) as CourseStartTime,
        substr(EndDateTime,17,-5) as CourseEndTime,substr(StartDateTime,1,10) as CourseDate,
        substr(Term,-4) as Year,COURSE_OFFERINGS.Term,CatalogYear
FROM CourseDataDB.COURSE_OFFERINGS
    JOIN CourseDataDB.COURSE_MEETINGS USING(CourseOfferingID)
    LEFT JOIN CourseDataDB.TERM_TO_CATALOG_YEAR USING(Term)

 * sqlite:///CourseDataWarehouse.db
0 rows affected.
284847 rows affected.


[]

In [9]:
%%sql
DELETE FROM COURSE_DIMENSION;
INSERT INTO COURSE_DIMENSION (CourseOfferingID,CRN,Section,Title,Capacity,Actual,Remaining)
SELECT CourseOfferingID,CRN,Section,Title,Capacity,Actual,Remaining
FROM CourseDataDB.COURSE_OFFERINGS

 * sqlite:///CourseDataWarehouse.db
0 rows affected.
15937 rows affected.


[]

In [10]:
%%sql
DELETE FROM INSTRUCTOR_DIMENSION;
INSERT INTO INSTRUCTOR_DIMENSION (InstructorID,Name)
SELECT InstructorID,Name
FROM CourseDataDB.COURSE_INSTRUCTORS

 * sqlite:///CourseDataWarehouse.db
0 rows affected.
1095 rows affected.


[]

In [11]:
%%sql
DELETE FROM FACT_TABLE;
INSERT INTO FACT_TABLE (CourseMeetingID,CourseOfferingID,CourseID,LocationID,InstructorID,TotalCatalogs,TotalClassSections,TotalInstructors,TotalClassrooms)
SELECT COURSE_MEETINGS.CourseMeetingID, COURSE_OFFERINGS.CourseOfferingID, COURSE_OFFERINGS.CourseID, COURSE_MEETINGS.LocationID, InstructorID,
                (SELECT COUNT(DISTINCT CatalogID) FROM CourseDataDB.COURSE_OFFERINGS) as TotalCatalogs,
                (SELECT COUNT(DISTINCT Section) FROM CourseDataDB.COURSE_OFFERINGS) as TotalClassSections,
                (SELECT COUNT(DISTINCT InstructorID) FROM CourseDataDB.COURSE_INSTRUCTORS) as TotalInstructors, 
                (SELECT COUNT(DISTINCT LocationID) FROM CourseDataDB.COURSE_LOCATIONS) as TotalClassrooms 
FROM CourseDataDB.COURSE_OFFERINGS
    LEFT JOIN CourseDataDB.TERM_TO_CATALOG_YEAR USING(Term)
    LEFT JOIN CourseDataDB.CATALOG_COURSES ON (CATALOG_COURSES.CatalogID = COURSE_OFFERINGS.CatalogID AND CATALOG_COURSES.CatalogYear = TERM_TO_CATALOG_YEAR.CatalogYear)
    LEFT JOIN CourseDataDB.PROGRAMS USING(ProgramID)
    LEFT JOIN CourseDataDB.COURSE_INSTRUCTORS ON (InstructorID = PrimaryInstructorID)
    LEFT JOIN CourseDataDB.COURSE_MEETINGS USING(CourseOfferingID)
    JOIN CourseDataDB.COURSE_LOCATIONS USING(LocationID);


 * sqlite:///CourseDataWarehouse.db
0 rows affected.
284847 rows affected.


[]