# Data Warehouse

In [1]:
%load_ext sql
import pandas as pd
import sqlite3

In [2]:
%sql sqlite:///CourseDataWarehouse.db

'Connected: @CourseDataWarehouse.db'

# Warehouse ERD

![Course DataWarehouse ERD](CourseDataWarehouseERD.png)

# Creating Dimension Tables and Fact Table

## Course Catalog Dimension Table

In [3]:
%%sql
-- Course Catalogs table
DROP TABLE IF EXISTS CATALOG_COURSES_DIM;
CREATE TABLE CATALOG_COURSES_DIM (
    CourseID INTEGER PRIMARY KEY,
    CatalogYear TEXT NOT NULL,
    CatalogID TEXT NOT NULL,
    ProgramID INTEGER,
    CourseTitle TEXT NOT NULL,
    Credits TEXT NOT NULL,
    Prereqs TEXT,
    Coreqs TEXT,
    Fees TEXT,
    Attributes TEXT,
    Description TEXT
    
);


 * sqlite:///CourseDataWarehouse.db
Done.
Done.


[]

## Programs Dimension Table

In [4]:
%%sql
-- Programs table as Dimension_Table
DROP TABLE IF EXISTS PROGRAMS_DIM;
CREATE TABLE PROGRAMS_DIM (
    ProgramID INTEGER PRIMARY KEY,
    ProgramCode TEXT NOT NULL,
    ProgramName TEXT NOT NULL
);

 * sqlite:///CourseDataWarehouse.db
Done.
Done.


[]

## Locations Dimension Table

In [5]:
%%sql
-- Locations table
DROP TABLE IF EXISTS LOCATIONS_DIM;
CREATE TABLE LOCATIONS_DIM (
    LocationID INTEGER PRIMARY KEY,
    LocationCode TEXT NOT NULL
);

 * sqlite:///CourseDataWarehouse.db
Done.
Done.


[]

## Instructors Dimension Table

In [6]:
%%sql
-- Instructors table
DROP TABLE IF EXISTS INSTRUCTORS_DIM;
CREATE TABLE INSTRUCTORS_DIM (
    InstructorID INTEGER PRIMARY KEY,
    Name TEXT NOT NULL
);


 * sqlite:///CourseDataWarehouse.db
Done.
Done.


[]

## Fact Table

In [7]:
%%sql 
DROP TABLE IF EXISTS COURSE_MEETING_FACTS;
CREATE TABLE COURSE_MEETING_FACTS(
    CourseMeetingID INTEGER PRIMARY KEY,
    CourseOfferingID INTEGER NOT NULL,
    LocationID INTEGER NOT NULL,
    StartDateTime TEXT NOT NULL,
    EndDateTime TEXT NOT NULL,
    CatalogID TEXT NOT NULL,
    CourseID INTEGER,
    ProgramID INTEGER,
    CourseTitle TEXT NOT NULL,
    CatalogYear TEXT,
    PrimaryInstructorID INTEGER,
    Term TEXT NOT NULL,
    CRN INTEGER NOT NULL,
    Section TEXT NOT NULL,
    Credits REAL,
    Timecodes TEXT
);

    

 * sqlite:///CourseDataWarehouse.db
Done.
Done.


[]

# Loading Data

In [8]:
%%sql
ATTACH DATABASE 'CourseData.db' as Course_Info;

 * sqlite:///CourseDataWarehouse.db
Done.


[]

## Insert Data Into Programs

In [9]:
%%sql
INSERT INTO PROGRAMS_DIM(ProgramID, ProgramName,ProgramCode)
    SELECT DISTINCT ProgramID, ProgramName,ProgramCode
    FROM COURSE_INFO.PROGRAMS;
SELECT * 
FROM PROGRAMS_DIM
LIMIT 5;

 * sqlite:///CourseDataWarehouse.db
83 rows affected.
Done.


ProgramID,ProgramCode,ProgramName
1,AC,Accounting
2,AE,Applied Ethics
3,AH,Art History
4,AN,Asian Studies
5,AR,Arabic


## Insert Data Into Instructors

In [10]:
%%sql
INSERT INTO INSTRUCTORS_DIM(Name)
    SELECT DISTINCT Name
    FROM COURSE_INFO.INSTRUCTORS;
SELECT * 
FROM INSTRUCTORS_DIM 
LIMIT 5;

 * sqlite:///CourseDataWarehouse.db
1095 rows affected.
Done.


InstructorID,Name
1,Aaron K. Perkus
2,Aaron Quinn Weinstein
3,Aaron R. Van Dyke
4,Abdel Illah A. Douda
5,Adam E. Rugg


## Insert Data Into Locations

In [11]:
%%sql
INSERT INTO LOCATIONS_DIM(LOCATIONID, LOCATIONCODE)
    SELECT LOCATIONID, LOCATIONCODE
    FROM COURSE_INFO.LOCATIONS;
SELECT * 
FROM LOCATIONS_DIM 
LIMIT 5;

 * sqlite:///CourseDataWarehouse.db
207 rows affected.
Done.


LocationID,LocationCode
1,BCC 200
2,BD
3,BH
4,BH BY ARR
5,BLM 112


## Insert Data Into Course Catalog

In [12]:
%%sql
INSERT INTO CATALOG_COURSES_DIM(COURSEID, CATALOGID, PROGRAMID, CATALOGYEAR, COURSETITLE, CREDITS, PREREQS, COREQS, FEES, ATTRIBUTES, DESCRIPTION)
    SELECT COURSEID, CATALOGID, PROGRAMID, CATALOGYEAR, COURSETITLE, CREDITS, PREREQS, COREQS, FEES, ATTRIBUTES, DESCRIPTION
    FROM COURSE_INFO.CATALOG_COURSES;
SELECT *
FROM CATALOG_COURSES_DIM
LIMIT 5;

 * sqlite:///CourseDataWarehouse.db
4440 rows affected.
Done.


CourseID,CatalogYear,CatalogID,ProgramID,CourseTitle,Credits,Prereqs,Coreqs,Fees,Attributes,Description
1,2017_2018,AN 0301,4,Independent Study,1-3 Credits,,,,,Students undertake an individualized program of study in consultation with a director from the Asian studies faculty.
2,2017_2018,AN 0310,4,Asian Studies Seminar,3 Credits,,,,,"This seminar examines selected topics concerning Asia. This course is taught in conjunction with another 100-300 level course from a rotation of course offerings. Consult the Asian Studies director to identify the conjoined course for a given semester. The seminar concentrates on topics within the parameters of the conjoined course syllabus but adds research emphasis. Students registered for this course must complete a research project, to include 300-level research, in addition to the regular research requirements of the conjoined course, and a 25-50 page term paper in substitution of some portion of the conjoined course requirements, as determined by the instructor. Open to juniors and seniors only."
3,2017_2018,BU 0211,12,Legal Environment of Business,3 Credits,Junior standing.,,,,"This course examines the broad philosophical as well as practical nature and function of the legal system, and introduces students to the legal and social responsibilities of business. The course includes an introduction to the legal system, the federal courts, Constitutional law, the United States Supreme Court, the civil process, and regulatory areas such as employment discrimination, protection of the environment, and corporate governance and securities markets."
4,2017_2018,BU 0220,12,Environmental Law and Policy,3 Credits,,,,"EVME Environmental Studies Major Elective, EVPE Environmental Studies Elective, EVSS Environmental Studies: Social Science, MGEL Management: General Elective","This course surveys issues arising out of federal laws designed to protect the environment and manage resources. It considers in detail the role of the Environmental Protection Agency in the enforcement of environmental policies arising out of such laws as the National Environmental Policy Act, the Clean Water Act, and the Clear Air Act, among others. The course also considers the impact of Congress, political parties, bureaucracy, and interest groups in shaping environmental policy, giving special attention to the impact of environmental regulation on business and private property rights."
5,2017_2018,BU 0311,12,"The Law of Contracts, Sales, and Property",3 Credits,BU 0211.,,,,"This course examines the components of common law contracts including the concepts of offer and acceptance, consideration, capacity and legality, assignment of rights and delegation of duties, as well as discharge of contracts. The course covers Articles 2 and 2A of the Uniform Commercial Code relating to leases, sales of goods, and warranties. The course also considers personal and real property, and bailments."


## Insert Data into Fact Table 

In [13]:
%%sql 
DELETE FROM COURSE_MEETING_FACTS;
INSERT INTO COURSE_MEETING_FACTS (COURSEMEETINGID, COURSEOFFERINGID, LOCATIONID, STARTDATETIME, ENDDATETIME, CATALOGID, COURSEID, PROGRAMID, COURSETITLE, CATALOGYEAR, PRIMARYINSTRUCTORID, TERM, CRN, SECTION, CREDITS,TIMECODES)
SELECT COURSEMEETINGID, COURSEOFFERINGID, LOCATIONID, STARTDATETIME, ENDDATETIME, COURSE_OFFERINGS.CATALOGID, COURSEID, PROGRAMS_DIM.PROGRAMID, COURSE_OFFERINGS.TITLE, CATALOGYEAR, PRIMARYINSTRUCTORID, TERM, CRN, SECTION, COURSE_OFFERINGS.CREDITS,TIMECODES
FROM COURSE_OFFERINGS
    JOIN COURSE_INFO.COURSE_MEETINGS USING (COURSEOFFERINGID)
    LEFT JOIN PROGRAMS_DIM ON SUBSTR(COURSE_OFFERINGS.CATALOGID,1, INSTR(COURSE_OFFERINGS.CATALOGID, " ")-1) = PROGRAMS_DIM.PROGRAMCODE
    LEFT JOIN INSTRUCTORS_DIM ON PRIMARYINSTRUCTORID = INSTRUCTORID
    LEFT JOIN CATALOG_COURSES_DIM USING (COURSEID)
;

 * sqlite:///CourseDataWarehouse.db
0 rows affected.
284847 rows affected.


[]

In [14]:
%%sql
SELECT DISTINCT CATALOGID, TERM, CRN, TITLE
FROM Course_Info.INSTRUCTORS
    JOIN Course_Info.COURSE_OFFERINGS ON (Course_Info.INSTRUCTORS.INSTRUCTORID=Course_Info.COURSE_OFFERINGS.PRIMARYINSTRUCTORID)
LIMIT 5;

 * sqlite:///CourseDataWarehouse.db
Done.


CatalogID,Term,CRN,Title
AC 0011,Fall2014,70384,Introduction to Financial Accounting
AC 0011,Fall2014,70385,Introduction to Financial Accounting
AC 0011,Fall2014,70382,Introduction to Financial Accounting
AC 0011,Fall2014,70291,Introduction to Financial Accounting
AC 0011,Fall2014,70350,Introduction to Financial Accounting


In [15]:
%%sql
SELECT PRIMARYINSTRUCTORID, COUNT(DISTINCT COURSEID) as COURSECOUNT
FROM Course_Info.COURSE_OFFERINGS
GROUP BY PRIMARYINSTRUCTORID
HAVING COUNT(*) = 1
LIMIT 5;


 * sqlite:///CourseDataWarehouse.db
Done.


PrimaryInstructorID,COURSECOUNT
42,0
80,0
111,0
117,0
134,0
