# Data Warehouse
## 1. Load SQLite and Database
- 'CourseData.db'
- note the 3 backslashes

In [1]:
%reload_ext sql
%sql sqlite:///CourseData.db

'Connected: @CourseData.db'

In [2]:
%%sql
DROP TABLE IF EXISTS CourseData;

 * sqlite:///CourseData.db
Done.


[]

## DROP Tables 

In [3]:
%%sql
sqlite:///CourseData.db
DROP TABLE IF EXISTS COURSEDIM;
DROP TABLE IF EXISTS COURSEFACTS;
DROP TABLE IF EXISTS TEACHERS;
DROP TABLE IF EXISTS LOCATION;

Done.
Done.
Done.
Done.


[]

## Create Tables
- primary key is generated on every table by naming new surrogate key/field that isn't already in .csv files.
- don't need NOT NULL for PK because it's implied in SQLite.


In [8]:
%%sql
CREATE TABLE COURSEDIM (
    catalog_id TEXT PRIMARY KEY,
    title TEXT,
    meetings TEXT,
    timecodes TEXT,
    program_code TEXT,    
    program_name TEXT,
    prereqs TEXT,    
    coreqs TEXT,
    fees TEXT,
    attributes TEXT,    
    description TEXT
);

 * sqlite:///CourseData.db
Done.


[]

In [None]:
%%sql
CREATE TABLE LOCATION (
 LID INTEGER PRIMARY KEY,
 location TEXT NOT NULL
);

In [9]:
%%sql
CREATE TABLE TEACHERS (
 TID INTEGER PRIMARY KEY,
 primary_instructor TEXT NOT NULL
);

 * sqlite:///CourseData.db
Done.


[]

In [10]:
%%sql
CREATE TABLE COURSEFACTS (
                 crn SMALLINT,
                 term VARCAR(20),
                 credits INTEGER,
                 cap INTEGER,
                 act INTEGER,
                 rem INTEGER,
                 LID INTEGER,
                catalog_ID INTEGER,
                LID INTEGER,
                TID INTEGER,
                FOREIGN KEY(catalog_ID) REFERENCES COURSEDIM(catalog_id),
                 FOREIGN KEY(LID) REFERENCES LOCATION(LID),
                 FOREIGN KEY(TID) REFERENCES COURSEDIM(catalog_id)
);

 * sqlite:///CourseData.db
Done.


[]

# 4. Do INSERT Statements

In [12]:
%%sql
INSERT INTO COURSECATALOGS (catalog_id, program_code, program_name, course_title, prereqs, coreqs, fees, attributes, description) 
SELECT DISTINCT catalog_id, program_code, program_name, course_title, prereqs, coreqs, fees, attributes, description
FROM import_Course_Catalog;

 * sqlite:///CourseData.db
2221 rows affected.


[]

In [13]:
%%sql
SELECT * FROM COURSECATALOGS
limit 2;

 * sqlite:///CourseData.db
Done.


CCID,catalog_id,program_code,program_name,course_title,prereqs,coreqs,fees,attributes,description
1,AN 0301,AN,Asian Studies,Independent Study,,,,,Students undertake an individualized program of study in consultation with a director from the Asian studies faculty.
2,AN 0310,AN,Asian Studies,Asian Studies Seminar,,,,,"This seminar examines selected topics concerning Asia. This course is taught in conjunction with another 100-300 level course from a rotation of course offerings. Consult the Asian Studies director to identify the conjoined course for a given semester. The seminar concentrates on topics within the parameters of the conjoined course syllabus but adds research emphasis. Students registered for this course must complete a research project, to include 300-level research, in addition to the regular research requirements of the conjoined course, and a 25-50 page term paper in substitution of some portion of the conjoined course requirements, as determined by the instructor. Open to juniors and seniors only."


# INSERT TEACHERS

In [14]:
%%sql
DELETE FROM TEACHERS;

INSERT INTO TEACHERS  (primary_instructor)
SELECT DISTINCT primary_instructor
    FROM import_courses
    ;

 * sqlite:///CourseData.db
0 rows affected.
1105 rows affected.


[]

In [15]:
%%sql
SELECT * FROM TEACHERS
limit 2;

 * sqlite:///CourseData.db
Done.


TID,primary_instructor
1,Michael P. Coyne
2,Rebecca I. Bloch


# INSERT COURSEMEETINGS

In [16]:
%%sql 
INSERT INTO COURSEMEETINGS ( crn, term, location, day, start, end)
SELECT DISTINCT crn, term, location, day, start, end
FROM import_course_meetings


 * sqlite:///CourseData.db
311142 rows affected.


[]

In [17]:
%%sql
SELECT * FROM COURSEMEETINGS
LIMIT 10;

 * sqlite:///CourseData.db
Done.


CID,crn,term,location,day,start,end
1,70384,Fall2014,DSB 105,T,2014-09-02T08:00:00,2014-09-02T09:15:00
2,70384,Fall2014,DSB 105,F,2014-09-05T08:00:00,2014-09-05T09:15:00
3,70384,Fall2014,DSB 105,T,2014-09-09T08:00:00,2014-09-09T09:15:00
4,70384,Fall2014,DSB 105,F,2014-09-12T08:00:00,2014-09-12T09:15:00
5,70384,Fall2014,DSB 105,T,2014-09-16T08:00:00,2014-09-16T09:15:00
6,70384,Fall2014,DSB 105,F,2014-09-19T08:00:00,2014-09-19T09:15:00
7,70384,Fall2014,DSB 105,T,2014-09-23T08:00:00,2014-09-23T09:15:00
8,70384,Fall2014,DSB 105,F,2014-09-26T08:00:00,2014-09-26T09:15:00
9,70384,Fall2014,DSB 105,T,2014-09-30T08:00:00,2014-09-30T09:15:00
10,70384,Fall2014,DSB 105,F,2014-10-03T08:00:00,2014-10-03T09:15:00


# INSERT COURSES
- _key:_ do NOT need primary_instructor in courses table since we're going to join in the teachers table that has this value.  We are simply telling it what to JOIN these tables on, but primary_instructor does not need to be in both tables.

- still need to add additional FKs

In [18]:
%%sql
INSERT INTO COURSES (crn, term,catalog_id, section, credits, title, meetings, timecodes, cap, act, rem, TID)
SELECT DISTINCT crn, term, catalog_id, section, credits, title, meetings, timecodes, cap, act, rem, TID
FROM import_courses
JOIN TEACHERS ON TEACHERS.primary_instructor = import_courses.primary_instructor;


 * sqlite:///CourseData.db
15938 rows affected.


[]

In [19]:
%%sql
SELECT * FROM COURSES
LIMIT 5;

 * sqlite:///CourseData.db
Done.


crn,term,catalog_id,section,credits,title,meetings,timecodes,cap,act,rem,TID
70384,Fall2014,AC 0011,C01,3.0,Introduction to Financial Accounting,"[{'days': 'TF', 'times': '0800am-0915am', 'dates': '09/02-12/08', 'location': 'DSB 105'}]",['TF 0800am-0915am 09/02-12/08 DSB 105'],0,31,-31,1
70385,Fall2014,AC 0011,C02,3.0,Introduction to Financial Accounting,"[{'days': 'TF', 'times': '0930am-1045am', 'dates': '09/02-12/08', 'location': 'DSB 105'}]",['TF 0930am-1045am 09/02-12/08 DSB 105'],0,31,-31,1
70382,Fall2014,AC 0011,C03,3.0,Introduction to Financial Accounting,"[{'days': 'TF', 'times': '1230pm-0145pm', 'dates': '09/02-12/08', 'location': 'DSB 105'}]",['TF 1230pm-0145pm 09/02-12/08 DSB 105'],0,31,-31,1
70291,Fall2014,AC 0011,C04,3.0,Introduction to Financial Accounting,"[{'days': 'MR', 'times': '1100am-1215pm', 'dates': '09/02-12/08', 'location': 'DSB 111'}]",['MR 1100am-1215pm 09/02-12/08 DSB 111'],0,29,-29,2
70350,Fall2014,AC 0011,C05,3.0,Introduction to Financial Accounting,"[{'days': 'MR', 'times': '1230pm-0145pm', 'dates': '09/02-12/08', 'location': 'DSB 111'}]",['MR 1230pm-0145pm 09/02-12/08 DSB 111'],0,30,-30,2


In [20]:
%%sql
SELECT catalog_id, primary_instructor 
FROM COURSES
JOIN TEACHERS USING (TID)
limit 10;

 * sqlite:///CourseData.db
Done.


catalog_id,primary_instructor
AC 0011,Michael P. Coyne
AC 0011,Michael P. Coyne
AC 0011,Michael P. Coyne
AC 0011,Rebecca I. Bloch
AC 0011,Rebecca I. Bloch
AC 0011,Rebecca I. Bloch
AC 0011,Paul Caster
AC 0011,Jo Ann Drusbosky
AC 0011,Jo Ann Drusbosky
AC 0011,Jo Ann Drusbosky


In [21]:
%%sql
SELECT * FROM COURSES
limit 5
;


 * sqlite:///CourseData.db
Done.


crn,term,catalog_id,section,credits,title,meetings,timecodes,cap,act,rem,TID
70384,Fall2014,AC 0011,C01,3.0,Introduction to Financial Accounting,"[{'days': 'TF', 'times': '0800am-0915am', 'dates': '09/02-12/08', 'location': 'DSB 105'}]",['TF 0800am-0915am 09/02-12/08 DSB 105'],0,31,-31,1
70385,Fall2014,AC 0011,C02,3.0,Introduction to Financial Accounting,"[{'days': 'TF', 'times': '0930am-1045am', 'dates': '09/02-12/08', 'location': 'DSB 105'}]",['TF 0930am-1045am 09/02-12/08 DSB 105'],0,31,-31,1
70382,Fall2014,AC 0011,C03,3.0,Introduction to Financial Accounting,"[{'days': 'TF', 'times': '1230pm-0145pm', 'dates': '09/02-12/08', 'location': 'DSB 105'}]",['TF 1230pm-0145pm 09/02-12/08 DSB 105'],0,31,-31,1
70291,Fall2014,AC 0011,C04,3.0,Introduction to Financial Accounting,"[{'days': 'MR', 'times': '1100am-1215pm', 'dates': '09/02-12/08', 'location': 'DSB 111'}]",['MR 1100am-1215pm 09/02-12/08 DSB 111'],0,29,-29,2
70350,Fall2014,AC 0011,C05,3.0,Introduction to Financial Accounting,"[{'days': 'MR', 'times': '1230pm-0145pm', 'dates': '09/02-12/08', 'location': 'DSB 111'}]",['MR 1230pm-0145pm 09/02-12/08 DSB 111'],0,30,-30,2


In [22]:
%%sql
SELECT TID FROM TEACHERS
limit 3;

 * sqlite:///CourseData.db
Done.


TID
1
2
3


# 5. Run SELECT Queries

In [23]:
%reload_ext sql
%sql sqlite:///CourseData.db

'Connected: @CourseData.db'

## SELECT Query for all unique classrooms 
- only included those with a character lenght of 7 (some had 2, 8, etc.)
- can't figure out how to get a distinct location when including additinonal fields.

In [24]:
%%sql
SELECT DISTINCT 
location
FROM COURSEMEETINGS
WHERE LENGTH(location) = 7
ORDER BY location
LIMIT 20
;

 * sqlite:///CourseData.db
Done.


location
BCC 200
BLM 112
BNW 124
BNW 127
BNW 128
BNW 130
BNW 131
BNW 133
BNW 136
BNW 137


## SELECT Query displaying all courses in MSBA Program '18-'19
- still have to get rid of blank by making defining description as NOT NULL.
- is there an easier way?

## core classes

In [26]:
%%sql
SELECT course_title AS Course, program_name AS Program, catalog_id AS Code, description AS Description
FROM COURSES
WHERE catalog_id = 'BA 0505'
LIMIT 12

 * sqlite:///CourseData.db
(sqlite3.OperationalError) no such column: course_title [SQL: "SELECT course_title AS Course, program_name AS Program, catalog_id AS Code, description AS Description\nFROM COURSES\nWHERE catalog_id = 'BA 0505'\nLIMIT 12"] (Background on this error at: http://sqlalche.me/e/e3q8)


In [34]:
%%sql
SELECT prereqs AS Required, attributes AS Attributes, course_title AS Course, program_name AS Program, catalog_id AS Code, description AS Description
FROM COURSECATALOGS
WHERE Prereqs > 0
ORDER BY prereqs
-DESC
LIMIT 5;

 * sqlite:///CourseData.db
Done.


Required,Attributes,Course,Program,Code,Description
prereqs,attributes,course_title,program_name,catalog_id,description
"junior or senior standing, three environmental studies program courses, and program approval.","EVCA Environmental Studies: Capstone, EVPE Environmental Studies Elective",Independent Study,Environmental Studies,EV 0299,A student may conduct a one-semester independent study on a defined research topic or field of study under the supervision of a professor in the Environmental Studies Program. Credit requires prior approval by the Professor which whom the student will work as well as the Director of the Environmental Studies Program. Students must have a GPA of 3.0 or higher.
Two previous courses in Philosophy.,CAOT Catholic Studies: Non-Religious Studies,Metaphysics,Philosophy,PH 0320,"This course concerns itself with being and our knowledge of being, developing in student minds an operative habit of viewing reality in its ultimate context."
Two courses in music or theatre.,VPC2 Visual and Performing Arts Core: Applied Course,Performing Arts Administration Principles and Practices,Music,MU 0306,"This course explores the fundamental principles associated with not-for-profit performing arts organizations. This course is for all arts students, as the study of arts administration core principles sets a foundation of essential knowledge vital for employment within a non-profit arts organization. Such training also is for practicing artists and those with for-profit intentions. Students will come away with knowledge and skills, as well as a strong self-awareness of their leadership and management capacities. Previously VPA 0306."
Two courses in Philosophy.,,Plato,Philosophy,PH 0300,"This course covers central ontological and epistemological themes in selected early, middle, and late Platonic dialogues, paying particular attention to Plato's inclination to identify virtue with knowledge."


In [50]:
%%sql
SELECT t.primary_instructor, COUNT(c.catalog_id) AS courses_taught
FROM COURSES as c
JOIN TEACHERS as t ON c.TID = t.TID


 * sqlite:///CourseData.db
Done.


primary_instructor,courses_taught
Michael P. Coyne,15938


# JOIN Testing

In [26]:
%%sql
SELECT *
FROM COURSES
LIMIT 7;

 * sqlite:///CourseData.db
Done.


crn,term,catalog_id,section,credits,title,meetings,timecodes,cap,act,rem,TID
70384,Fall2014,AC 0011,C01,3.0,Introduction to Financial Accounting,"[{'days': 'TF', 'times': '0800am-0915am', 'dates': '09/02-12/08', 'location': 'DSB 105'}]",['TF 0800am-0915am 09/02-12/08 DSB 105'],0,31,-31,1
70385,Fall2014,AC 0011,C02,3.0,Introduction to Financial Accounting,"[{'days': 'TF', 'times': '0930am-1045am', 'dates': '09/02-12/08', 'location': 'DSB 105'}]",['TF 0930am-1045am 09/02-12/08 DSB 105'],0,31,-31,1
70382,Fall2014,AC 0011,C03,3.0,Introduction to Financial Accounting,"[{'days': 'TF', 'times': '1230pm-0145pm', 'dates': '09/02-12/08', 'location': 'DSB 105'}]",['TF 1230pm-0145pm 09/02-12/08 DSB 105'],0,31,-31,1
70291,Fall2014,AC 0011,C04,3.0,Introduction to Financial Accounting,"[{'days': 'MR', 'times': '1100am-1215pm', 'dates': '09/02-12/08', 'location': 'DSB 111'}]",['MR 1100am-1215pm 09/02-12/08 DSB 111'],0,29,-29,2
70350,Fall2014,AC 0011,C05,3.0,Introduction to Financial Accounting,"[{'days': 'MR', 'times': '1230pm-0145pm', 'dates': '09/02-12/08', 'location': 'DSB 111'}]",['MR 1230pm-0145pm 09/02-12/08 DSB 111'],0,30,-30,2
70381,Fall2014,AC 0011,C06,3.0,Introduction to Financial Accounting,"[{'days': 'MR', 'times': '0330pm-0445pm', 'dates': '09/02-12/08', 'location': 'DSB 110A'}]",['MR 0330pm-0445pm 09/02-12/08 DSB 110A'],0,31,-31,2
70383,Fall2014,AC 0011,G,3.0,Introduction to Financial Accounting,"[{'days': 'MR', 'times': '0930am-1045am', 'dates': '09/02-12/08', 'location': 'DSB 111'}]",['MR 0930am-1045am 09/02-12/08 DSB 111'],30,31,-1,3


%%sql
SELECT t.TID, t.primary_instructor, c.catalog_id
FROM COURSES as c
JOIN TEACHERS as t ON c.TID = t.TID
LIMIT 100;

# Dr. Huntley
### What Courses has Dr. Huntley Taught?

In [27]:
%%sql
SELECT t.primary_instructor, c.catalog_id
FROM COURSES as c
JOIN TEACHERS as t ON c.TID = t.TID
WHERE primary_instructor like "Christopher L.%"
Limit 10;

 * sqlite:///CourseData.db
Done.


primary_instructor,catalog_id
Christopher L. Huntley,IS 0100
Christopher L. Huntley,IS 0135
Christopher L. Huntley,IS 0320
Christopher L. Huntley,IS 0100
Christopher L. Huntley,IS 0100
Christopher L. Huntley,IS 0135
Christopher L. Huntley,IS 0320
Christopher L. Huntley,IS 0135
Christopher L. Huntley,IS 0240
Christopher L. Huntley,IS 0585C


### How Many Courses has Dr. Huntley Taught?

In [28]:
%%sql
SELECT DISTINCT t.primary_instructor, COUNT(c.catalog_id) AS courses_taught
FROM COURSES as c
JOIN TEACHERS as t ON c.TID = t.TID
WHERE primary_instructor like "Christopher L.%"
Limit 10;

 * sqlite:///CourseData.db
Done.


primary_instructor,courses_taught
Christopher L. Huntley,31


### What Courses has Dr. Massey Taught?

In [29]:
%%sql
SELECT t.primary_instructor, c.catalog_id
FROM COURSES as c
JOIN TEACHERS as t ON c.TID = t.TID
WHERE primary_instructor like "Dawn W%"
Limit 10;

 * sqlite:///CourseData.db
Done.


primary_instructor,catalog_id
Dawn W Massey,AC 0570
Dawn W Massey,AC 0590
Dawn W Massey,AC 0590
Dawn W Massey,AC 0590
Dawn W Massey,AC 0570
Dawn W Massey,AC 0590
Dawn W Massey,AC 0590
Dawn W Massey,AC 0590
Dawn W Massey,AC 0570
Dawn W Massey,AC 0570


### How Many Courses has Dr. Massey Taught?

In [30]:
%%sql
SELECT DISTINCT t.primary_instructor, COUNT(c.catalog_id) AS courses_taught
FROM COURSES as c
JOIN TEACHERS as t ON c.TID = t.TID
WHERE primary_instructor like "Dawn%"
Limit 10;

 * sqlite:///CourseData.db
Done.


primary_instructor,courses_taught
Dawn W Massey,52


In [31]:
%%sql
SELECT TID, primary_instructor FROM TEACHERS
limit 10;


 * sqlite:///CourseData.db
Done.


TID,primary_instructor
1,Michael P. Coyne
2,Rebecca I. Bloch
3,Paul Caster
4,Jo Ann Drusbosky
5,Arleen N. Kardos
6,Scott M Brenner
7,Kevin C. Cassidy
8,Bruce Bradford
9,Milo W. Peck
10,Stephen E. Yost


In [32]:
%%sql
SELECT c.term, cm.start, cm.location, c.catalog_id
FROM COURSES AS c
JOIN COURSEMEETINGS AS cm
LIMIT 20;

 * sqlite:///CourseData.db
Done.


term,start,location,catalog_id
Fall2014,2014-09-02T08:00:00,DSB 105,AC 0011
Fall2014,2014-09-05T08:00:00,DSB 105,AC 0011
Fall2014,2014-09-09T08:00:00,DSB 105,AC 0011
Fall2014,2014-09-12T08:00:00,DSB 105,AC 0011
Fall2014,2014-09-16T08:00:00,DSB 105,AC 0011
Fall2014,2014-09-19T08:00:00,DSB 105,AC 0011
Fall2014,2014-09-23T08:00:00,DSB 105,AC 0011
Fall2014,2014-09-26T08:00:00,DSB 105,AC 0011
Fall2014,2014-09-30T08:00:00,DSB 105,AC 0011
Fall2014,2014-10-03T08:00:00,DSB 105,AC 0011


In [33]:
%%sql
SELECT cm.start, c.catalog_id
FROM COURSES AS c
JOIN COURSEMEETINGS AS cm
LIMIT 10;

 * sqlite:///CourseData.db
Done.


start,catalog_id
2014-09-02T08:00:00,AC 0011
2014-09-05T08:00:00,AC 0011
2014-09-09T08:00:00,AC 0011
2014-09-12T08:00:00,AC 0011
2014-09-16T08:00:00,AC 0011
2014-09-19T08:00:00,AC 0011
2014-09-23T08:00:00,AC 0011
2014-09-26T08:00:00,AC 0011
2014-09-30T08:00:00,AC 0011
2014-10-03T08:00:00,AC 0011


Have to finish insterting COURSES column

In [34]:
%%sql
SELECT TID, primary_instructor FROM TEACHERS
limit 10;

 * sqlite:///CourseData.db
Done.


TID,primary_instructor
1,Michael P. Coyne
2,Rebecca I. Bloch
3,Paul Caster
4,Jo Ann Drusbosky
5,Arleen N. Kardos
6,Scott M Brenner
7,Kevin C. Cassidy
8,Bruce Bradford
9,Milo W. Peck
10,Stephen E. Yost


In [35]:
%%sql
SELECT course_title AS Course,  catalog_id AS Code, program_name AS Program, 
FROM COURSECATALOGS
WHERE credits = "6 credits"
LIMIT 50;

 * sqlite:///CourseData.db
(sqlite3.OperationalError) near "FROM": syntax error [SQL: 'SELECT course_title AS Course,  catalog_id AS Code, program_name AS Program, \nFROM COURSECATALOGS\nWHERE credits = "6 credits"\nLIMIT 50;'] (Background on this error at: http://sqlalche.me/e/e3q8)


In [36]:
%%sql
VACUUM;

 * sqlite:///CourseData.db
Done.


[]