## Import modules & set database connection

In [3]:
%load_ext sql
import pandas as pd
import sqlite3 as sql

%sql sqlite:///CourseData.db

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


'Connected: @CourseData.db'

## 1. Domain Integrity

- Check that ensures our datatypes and constraints are correctly defined.

- We use PRAGMA to display our table's metadata. This information is used to double check that domain integrity is met and that our data is defined accurately.

In [5]:
%%sql
SELECT * FROM COURSE_OFFERINGS;
PRAGMA table_info(COURSE_OFFERINGS);

 * sqlite:///CourseData.db
Done.
Done.


cid,name,type,notnull,dflt_value,pk
0,CourseOfferingID,INTEGER,0,,1
1,CourseID,INTEGER,0,,0
2,CatalogID,TEXT,1,,0
3,Term,TEXT,1,,0
4,CRN,INTEGER,1,,0
5,Section,TEXT,1,,0
6,Credits,REAL,0,,0
7,Title,TEXT,1,,0
8,Timecodes,TEXT,0,,0
9,PrimaryInstructorID,INTEGER,0,,0


In [6]:
%%sql
SELECT * FROM CATALOG_COURSES;
PRAGMA table_info(CATALOG_COURSES);

 * sqlite:///CourseData.db
Done.
Done.


cid,name,type,notnull,dflt_value,pk
0,CourseID,INTEGER,0,,1
1,CatalogYear,TEXT,1,,0
2,CatalogID,TEXT,1,,0
3,ProgramID,INTEGER,0,,0
4,CourseTitle,TEXT,1,,0
5,Credits,TEXT,1,,0
6,Prereqs,TEXT,0,,0
7,Coreqs,TEXT,0,,0
8,Fees,TEXT,0,,0
9,Attributes,TEXT,0,,0


In [7]:
%%sql
SELECT * FROM COURSE_MEETINGS;
PRAGMA table_info(COURSE_MEETINGS);

 * sqlite:///CourseData.db
Done.
Done.


cid,name,type,notnull,dflt_value,pk
0,CourseMeetingID,INTEGER,0,,1
1,CourseOfferingID,INTEGER,1,,0
2,LocationID,INTEGER,1,,0
3,StartDateTime,TEXT,1,,0
4,EndDateTime,TEXT,1,,0


## 2. Entity Integrity

- There should be 4440 Catalog Entries, 15937 Course Offerings, and 284847 Course Meetings.

- With our Entity integrity check we are expecting to match the number of rows within our tables to the number of rows that we loaded into our CourseData.db.

In [9]:
%%sql

SELECT 
    (SELECT Count(*) FROM CATALOG_COURSES) as CatalogCourses,
    (SELECT Count(*) FROM COURSE_OFFERINGS) as CourseOfferings,
    (SELECT Count(*) FROM COURSE_MEETINGS) as CourseMeetings;

 * sqlite:///CourseData.db
Done.


CatalogCourses,CourseOfferings,CourseMeetings
4440,15937,284847


## 3. Relational Integrity

- Check that ensures our tables can all be joined and queried appropriately using PK --> FK pairs.

- We use a query that requires every table in our ERD to be joined in order to confirm its relational integrity.

In [10]:
%%sql

SELECT CourseID,Name as ProfessorName,TERM_TO_CATALOG_YEAR.Term,Section,Title,ProgramName,
       substr(StartDateTime,1,10) as CourseDate,substr(StartDateTime,-8) as CourseStartTime,
       substr(EndDateTime,-8) as CourseEndTime,LocationCode as Location,substr(Term,-4) as Year 
FROM COURSE_OFFERINGS
    LEFT JOIN COURSE_INSTRUCTORS ON (COURSE_OFFERINGS.PrimaryInstructorID = COURSE_INSTRUCTORS.InstructorID)
    LEFT JOIN TERM_TO_CATALOG_YEAR USING(Term)
    LEFT JOIN CATALOG_COURSES USING(CourseID)
    LEFT JOIN PROGRAMS USING(ProgramID)
    LEFT JOIN COURSE_MEETINGS USING(CourseOfferingID)
    LEFT JOIN COURSE_LOCATIONS USING(LocationID)
WHERE Name LIKE '%Huntley'
ORDER BY Term DESC
LIMIT 5;

 * sqlite:///CourseData.db
Done.


CourseID,ProfessorName,Term,Section,Title,ProgramName,CourseDate,CourseStartTime,CourseEndTime,Location,Year
2683,Christopher L. Huntley,Spring2019,2,Operations Management,Operations Management,2019-01-24,11:00:00,12:15:00,DSB 110B,2019
2683,Christopher L. Huntley,Spring2019,2,Operations Management,Operations Management,2019-01-28,11:00:00,12:15:00,DSB 110B,2019
2683,Christopher L. Huntley,Spring2019,2,Operations Management,Operations Management,2019-01-31,11:00:00,12:15:00,DSB 110B,2019
2683,Christopher L. Huntley,Spring2019,2,Operations Management,Operations Management,2019-02-04,11:00:00,12:15:00,DSB 110B,2019
2683,Christopher L. Huntley,Spring2019,2,Operations Management,Operations Management,2019-02-07,11:00:00,12:15:00,DSB 110B,2019
