## **COURSE DATA WAREHOUSE**

In [1]:
%load_ext sql
import sqlite3
%sql sqlite:///CourseDataWarehouse.db

'Connected: @CourseDataWarehouse.db'

In [2]:
%%sql
ATTACH DATABASE 'CourseData.db' AS coursedata;

 * sqlite:///CourseDataWarehouse.db
Done.


[]

# **Data Warehouse Design**

### **1. Create *dimention* tables**

In [3]:
%%sql
-- Instructor table
DROP TABLE IF EXISTS INSTRUCTOR1;
CREATE TABLE INSTRUCTOR1 (
    Instructor_id INTEGER PRIMARY KEY,
    Name TEXT NOT NULL,
    Phone INTEGER,
    Email TEXT
);

-- Program table
DROP TABLE IF EXISTS PROGRAM1;
CREATE TABLE PROGRAM1 (
    Program_id INTEGER PRIMARY KEY,
    Program_code TEXT NOT NULL,
    Program_name TEXT NOT NULL
);

-- Term table
DROP TABLE IF EXISTS TERM1;
CREATE TABLE TERM1 (
    Term_id INTEGER PRIMARY KEY,
    Term TEXT NOT NULL
);

-- Location table
DROP TABLE IF EXISTS LOCATION1;
CREATE TABLE LOCATION1 (
    Location_id INTEGER PRIMARY KEY,
    Location TEXT NOT NULL
);

 * sqlite:///CourseDataWarehouse.db
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.


[]

In [4]:
%%sql
DELETE FROM INSTRUCTOR1;
INSERT INTO INSTRUCTOR1(Instructor_id, Name, Phone, Email ) 
    SELECT Instructor_id, Name, Phone, Email 
    FROM coursedata.INSTRUCTOR;

 * sqlite:///CourseDataWarehouse.db
0 rows affected.
1095 rows affected.


[]

%%sql
PRAGMA table_info();

In [5]:
%%sql
DELETE FROM PROGRAM1;
INSERT INTO PROGRAM1(Program_id, Program_code, Program_name) 
    SELECT Program_id, Program_code, Program_name 
    FROM coursedata.PROGRAM;

 * sqlite:///CourseDataWarehouse.db
0 rows affected.
83 rows affected.


[]

In [6]:
%%sql
DELETE FROM TERM1;
INSERT INTO TERM1(Term) 
    SELECT DISTINCT Term 
    FROM coursedata.CLASS;

 * sqlite:///CourseDataWarehouse.db
0 rows affected.
19 rows affected.


[]

In [28]:
%%sql
SELECT *
FROM TERM1

 * sqlite:///CourseDataWarehouse.db
Done.


Term_id,Term
1,Fall2014
2,Fall2015
3,Fall2016
4,Fall2017
5,Fall2018
6,Spring2015
7,Spring2016
8,Spring2017
9,Spring2018
10,Spring2019


In [None]:
%%sql
DELETE FROM LOCATION1;
INSERT INTO LOCATION1(Location_id, Location) 
SELECT Location_id, Location
    FROM coursedata.LOCATION;

### **2. Create and modify COURSE table**

In [8]:
%%sql
DROP TABLE IF EXISTS COURSE1;
CREATE TABLE COURSE1 AS SELECT * 
    FROM coursedata.COURSE 

 * sqlite:///CourseDataWarehouse.db
Done.
Done.


[]

%%sql
DROP TABLE IF EXISTS COURSE1;
CREATE TABLE COURSE1 AS SELECT * 
    FROM coursedata.COURSE 
    WHERE Catalog_year = 
        (SELECT DISTINCT(Catalog_year) 
             FROM coursedata.COURSE)

In [9]:
%%sql
PRAGMA table_info(COURSE1);

 * sqlite:///CourseDataWarehouse.db
Done.


cid,name,type,notnull,dflt_value,pk
0,Course_id,INT,0,,0
1,Catalog_year,TEXT,0,,0
2,Program_id,INT,0,,0
3,CatalogID,TEXT,0,,0
4,Course_title,TEXT,0,,0
5,Description,TEXT,0,,0
6,Credits,REAL,0,,0
7,Attributes,TEXT,0,,0
8,Prerequisites,TEXT,0,,0
9,Corequisites,TEXT,0,,0


In [10]:
%%sql
PRAGMA foreign_keys=off;

--BEGIN TRANSACTION;

ALTER TABLE COURSE1 RENAME TO COURSE1_OLD;

CREATE TABLE COURSE1 (
    Course_id INTEGER PRIMARY KEY,
    CatalogID TEXT NOT NULL,
    Course_title TEXT NOT NULL, 
    Description TEXT,
    Program_id INTEGER,
    Credits REAL,
    Attributes TEXT,
    Prerequisites TEXT,
    Corequisites TEXT);

INSERT INTO COURSE1(Course_id, CatalogID, Course_title, Description, Program_id, Credits, Attributes, Prerequisites, Corequisites)
  SELECT Course_id, CatalogID, Course_title, Description, Program_id, Credits, Attributes, Prerequisites, Corequisites
  FROM COURSE1_OLD;

--COMMIT;

PRAGMA foreign_keys=on;

 * sqlite:///CourseDataWarehouse.db
Done.
Done.
Done.
4440 rows affected.
Done.


[]

In [11]:
%%sql
PRAGMA table_info(COURSE1);

 * sqlite:///CourseDataWarehouse.db
Done.


cid,name,type,notnull,dflt_value,pk
0,Course_id,INTEGER,0,,1
1,CatalogID,TEXT,1,,0
2,Course_title,TEXT,1,,0
3,Description,TEXT,0,,0
4,Program_id,INTEGER,0,,0
5,Credits,REAL,0,,0
6,Attributes,TEXT,0,,0
7,Prerequisites,TEXT,0,,0
8,Corequisites,TEXT,0,,0


In [12]:
%%sql
DROP TABLE COURSE1_OLD

 * sqlite:///CourseDataWarehouse.db
Done.


[]

In [13]:
%%sql
SELECT * 
FROM COURSE1
WHERE (Course_id = 1766)

 * sqlite:///CourseDataWarehouse.db
Done.


Course_id,CatalogID,Course_title,Description,Program_id,Credits,Attributes,Prerequisites,Corequisites
1766,BB 0139,CFP Capstone Course,This course will engage the student in critical thinking and decision-making about personal financial management topics in the context of the financial planning process. You will gain the hands-on experience of taking a client from start to finish with their personal financial plan.,8,0 Credits,,"BB 0130, BB 0132, BB 0134, BB 0136, BB 0138, BB 0140.",


## Creating ***facts*** table CLASS_STATISTICS

In [14]:
%%sql
-- Class table
DROP TABLE IF EXISTS CLASS_STATISTICS;
CREATE TABLE CLASS_STATISTICS (
    ClassStat_id INTEGER PRIMARY KEY,
    Term_id INTEGER,
    Instructor_id INTEGER,
    Program_id INTEGER,
    Course_id INTEGER,
    Location_id INTEGER,
    crn INTEGER,
    Cap INTEGER,
    Act INTEGER,
    Rem INTEGER,
    Meeting_times INTEGER,
    Meeting_hours INTEGER,
    FOREIGN KEY (Term_id) REFERENCES TERM1(Term_id)
    FOREIGN KEY (Instructor_id) REFERENCES INSTRUCTOR1(Instructor_id)
    FOREIGN KEY (Program_id) REFERENCES PROGRAM1(Program_id)
    FOREIGN KEY (Location_id) REFERENCES LOCATION1(Location_id)
    FOREIGN KEY (Course_id) REFERENCES COURSE1(Course_id)
);

 * sqlite:///CourseDataWarehouse.db
Done.
Done.


[]

In [31]:
%%sql
DELETE FROM CLASS_STATISTICS;
INSERT INTO CLASS_STATISTICS (Term_id, Instructor_id, Program_id, Course_id, Location_id, crn, Cap, Act, Rem, Meeting_times, Meeting_hours)
    SELECT DISTINCT TERM1.Term_id, coursedata.CLASS.Instructor_id, coursedata.COURSE.Program_id, coursedata.COURSE.Course_id,
            coursedata.MEETING.Location_id,  crn, Cap, Act, Rem,
        
            COUNT(coursedata.MEETING.Class_id) AS Meeting_times, 
           sum(strftime('%s',coursedata.MEETING.End) - strftime('%s',coursedata.MEETING.Start))/3600 AS Meeting_hours
        
    FROM coursedata.CLASS
        LEFT JOIN coursedata.COURSE ON (coursedata.COURSE.CatalogID = coursedata.CLASS.CatalogID)
        LEFT JOIN coursedata.MEETING ON (coursedata.CLASS.Class_id = coursedata.MEETING.Class_id)
        LEFT JOIN TERM1 USING(Term)
        WHERE coursedata.COURSE.Catalog_year <> '2017_2018'
    GROUP BY coursedata.MEETING.Class_id
 
        
        
       
        
        

 * sqlite:///CourseDataWarehouse.db
12773 rows affected.
12773 rows affected.


[]

%%sql
ALTER TABLE CLASS_STATISTICS
ADD COLUMN Term_id;

%%sql
INSERT INTO CLASS_STATISTICS (Term_id)
SELECT Term_id
    FROM TERM1 
    LEFT JOIN TERM1 ON (CLASS_STATISTICS.Term = TERM1.Term)

In [27]:
%%sql
SELECT *
FROM CLASS_STATISTICS
LIMIT 100



 * sqlite:///CourseDataWarehouse.db
Done.


ClassStat_id,Term_id,Instructor_id,Program_id,Course_id,Location_id,crn,Cap,Act,Rem,Meeting_times,Meeting_hours
1,1,,4,1,,71858,0,0,0,0,
2,1,744.0,1,113,99.0,70384,0,31,-31,26,32.0
3,1,744.0,1,113,99.0,70385,0,31,-31,26,32.0
4,1,744.0,1,113,99.0,70382,0,31,-31,26,32.0
5,1,871.0,1,113,107.0,70291,0,29,-29,26,32.0
6,1,871.0,1,113,107.0,70350,0,30,-30,26,32.0
7,1,871.0,1,113,105.0,70381,0,31,-31,26,32.0
8,1,829.0,1,113,107.0,70383,30,31,-1,26,32.0
9,1,447.0,1,113,105.0,70391,30,32,-2,26,32.0
10,1,447.0,1,113,105.0,71105,30,33,-3,26,32.0


In [23]:
%%sql 
SELECT *
    FROM CLASS
    WHERE (crn = 70384)


 * sqlite:///CourseDataWarehouse.db
Done.


Class_id,Course_id,Section,Instructor_id,CatalogID,Course_title,Term,crn,Cap,Act,Rem,Timecode
1,,C01,744,AC 0011,Introduction to Financial Accounting,Fall2014,70384,0,31,-31,['TF 0800am-0915am 09/02-12/08 DSB 105']


In [22]:
%%sql 
SELECT *
    FROM CLASS_STATISTICS
    WHERE (crn = 113)

 * sqlite:///CourseDataWarehouse.db
Done.


ClassStat_id,Term_id,Instructor_id,Program_id,Course_id,Location_id,crn,Cap,Act,Rem,Meeting_times,Meeting_hours


%%sql
DELETE FROM CLASS_STATISTICS;
INSERT INTO CLASS_STATISTICS (Term, Instructor_id, Program_id, Course_id, Location_id, crn, Cap, Act, Rem, Meeting_times, Meeting_hours)
    SELECT DISTINCT coursedata.CLASS.Term, coursedata.CLASS.Instructor_id, coursedata.COURSE.Program_id, coursedata.COURSE.Course_id,
            coursedata.MEETING.Location_id,  crn, Cap, Act, Rem,
        
            COUNT(coursedata.MEETING.Class_id) AS Meeting_times, 
           sum(strftime('%H:%M',coursedata.MEETING.End) - strftime('%H:%M',coursedata.MEETING.Start))AS Meeting_hours
        
    FROM coursedata.CLASS
        LEFT JOIN coursedata.COURSE ON (coursedata.COURSE.CatalogID = coursedata.CLASS.CatalogID)
        LEFT JOIN coursedata.MEETING ON (coursedata.CLASS.Class_id = coursedata.MEETING.Class_id)
        WHERE coursedata.COURSE.Catalog_year <> '2018_2019'
    GROUP BY coursedata.MEETING.Class_id
 
        
        
       
        

%%sql
--DELETE FROM CLASS_STATISTICS;
--INSERT INTO CLASS_STATISTICS (Term_id, Instructor_id, Program_id, Location_id, Course_id, crn, Cap, Act, Rem)
    SELECT DISTINCT coursedata.CLASS.Term, coursedata.CLASS.Instructor_id, coursedata.COURSE.Program_id, coursedata.COURSE.Course_id, coursedata.MEETING.Location_id,  crn, Cap, Act, Rem, COUNT(coursedata.MEETING.Class_id) AS Meeting_times 
    FROM coursedata.CLASS
        LEFT JOIN coursedata.COURSE ON (coursedata.CLASS.Course_id = COURSE.Course_id)
        LEFT JOIN coursedata.MEETING ON (CLASS.Class_id = MEETING.Class_id)
        GROUP BY coursedata.MEETING.Class_id
            LIMIT 500
        
       
        

In [None]:
%%sql
DELETE FROM CLASS_STATISTICS;
INSERT INTO CLASS_STATISTICS (Term_id, Instructor_id, Program_id, Course_id, Location_id, crn, Cap, Act, Rem, Meeting_times, Meeting_hours)
    SELECT DISTINCT TERM1.Term_id, coursedata.CLASS.Instructor_id, coursedata.COURSE.Program_id, coursedata.COURSE.Course_id,
            coursedata.MEETING.Location_id,  crn, Cap, Act, Rem,
        
            COUNT(coursedata.MEETING.Class_id) AS Meeting_times, 
           sum(strftime('%s',coursedata.MEETING.End) - strftime('%s',coursedata.MEETING.Start))/3600 AS Meeting_hours
        
    FROM coursedata.CLASS
        LEFT JOIN coursedata.COURSE ON (coursedata.COURSE.CatalogID = coursedata.CLASS.CatalogID)
        LEFT JOIN coursedata.MEETING ON (coursedata.CLASS.Class_id = coursedata.MEETING.Class_id)
        LEFT JOIN TERM1 USING(Term)
        WHERE coursedata.COURSE.Catalog_year <> '2018_2019'
    GROUP BY coursedata.MEETING.Class_id
 
        
        
       