# __Course Data ETL:__

__Import Packages__

In [1]:
%load_ext sql
import pandas as pd
import sqlite3

In [2]:
%sql sqlite:///CourseData.db
conn = sqlite3.connect('CourseData.db')

__Create Tables__

In [3]:
%%sql
DROP TABLE IF EXISTS Professor;
CREATE TABLE Professor (
    ProfessorID INTEGER PRIMARY KEY,
    Name TEXT NOT NULL
);
DROP TABLE IF EXISTS Location;
CREATE TABLE Location (
    LocationID INTEGER PRIMARY KEY,
    Location TEXT NOT NULL
);
DROP TABLE IF EXISTS Program;
CREATE TABLE Program (
    ProgramID INTEGER PRIMARY KEY,
    ProgramName TEXT NOT NULL,
    ProgramCode TEXT NOT NULL
);
DROP TABLE IF EXISTS CatalogCourse;
CREATE TABLE CatalogCourse (
    CatalogCourseID INTEGER PRIMARY KEY,
    CatalogYear TEXT NOT NULL,
    CatalogID TEXT NOT NULL,
    ProgramID INTEGER,
    CourseTitle TEXT NOT NULL,
    Credits REAL,
    Prereqs TEXT,
    Coreqs TEXT,
    Fees TEXT,
    Attributes TEXT,
    Description TEXT
);
DROP TABLE IF EXISTS Class;
CREATE TABLE Class (
    ClassID INTEGER PRIMARY KEY,
    CatalogID TEXT NOT NULL,
    CatalogCourseID INTEGER,
    ProfessorID INTEGER,
    Term TEXT NOT NULL,
    CRN INTEGER NOT NULL,
    Section TEXT NOT NULL,
    Credits INTEGER,
    Title TEXT NOT NULL,
    Timecodes TEXT,
    Meetings TEXT,
    Capacity INTEGER NOT NULL,
    Actual INTEGER NOT NULL,
    Remaining INTEGER NOT NULL
);
DROP TABLE IF EXISTS Meeting;
CREATE TABLE Meeting (
    MeetingID INTEGER PRIMARY KEY,
    ClassID INTEGER NOT NULL,
    LocationID INTEGER NOT NULL,
    Day TEXT, 
    StartTime TEXT NOT NULL,
    EndTime TEXT NOT NULL
);

DROP TABLE IF EXISTS TermCatalogYear;
CREATE TABLE TermCatalogYear (
    CatalogYear TEXT NOT NULL,
    Term TEXT NOT NULL
);

INSERT INTO TermCatalogYear (Term, CatalogYear) VALUES 
('Fall2017','2017_2018'),('Winter2018','2017_2018'),('Spring2018','2017_2018'),('Summer2018','2017_2018'),('Fall2018','2018_2019'),
('Winter2019','2018_2019'),('Spring2019','2018_2019');

 * sqlite:///CourseData.db
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
7 rows affected.


[]

## Extract from CSV

In [4]:
# Class & Meeting CSVs

terms = ['Fall2014','Fall2015','Fall2016','Fall2017','Fall2018',
         'Spring2015','Spring2016','Spring2017','Spring2018','Spring2019',
         'SpringBreak2017',
         'Summer2015','Summer2016','Summer2017','Summer2018',
         'Winter2015','Winter2016','Winter2017','Winter2018']

for term in terms:
    filepath = 'SourceData/'+term+'/courses.csv'
    data = pd.read_csv(filepath)
    data.to_sql('import_courses',conn,if_exists='append',index=False)
    
    filepath = 'SourceData/'+term+'/course_meetings.csv'
    data = pd.read_csv(filepath)
    data.to_sql('import_course_meetings',conn,if_exists='append',index=False)

In [5]:
%%sql
SELECT 
    (SELECT Count(*) FROM import_courses) as 'RawCount',
    (SELECT Count(*) FROM (SELECT DISTINCT * FROM import_courses)) as 'DistinctCount'

 * sqlite:///CourseData.db
Done.


RawCount,DistinctCount
15937,15937


In [6]:
%%sql
SELECT 
    (SELECT Count(*) FROM import_course_meetings) as 'RawCount',
    (SELECT Count(*) FROM (SELECT DISTINCT * FROM import_course_meetings)) as 'DistinctCount'

 * sqlite:///CourseData.db
Done.


RawCount,DistinctCount
284907,284847


In [7]:
# Catalog CSV
catalog_years = ['2017_2018', '2018_2019']

for cat_year in catalog_years:
    filepath = 'SourceData/Catalogs/CourseCatalog'+cat_year+'.csv'
    data = pd.read_csv(filepath)
    data['cat_year'] = cat_year
    data.to_sql('import_catalog',conn,if_exists='append',index=False)

In [8]:
%%sql
SELECT 
    (SELECT Count(*) FROM import_catalog) as 'RawCount',
    (SELECT Count(*) FROM (SELECT DISTINCT * FROM import_catalog)) as 'DistinctCount';

 * sqlite:///CourseData.db
Done.


RawCount,DistinctCount
4440,4440


## Import Data into ERD Tables

- Ordered by entity strength. First do the tables with no foreign keys. When creating a table, be sure the tables foreign keys have already been created first.

In [9]:
%%sql

DELETE FROM Professor;

INSERT INTO Professor (Name)
SELECT DISTINCT primary_instructor
FROM import_courses
WHERE primary_instructor != 'TBA' AND primary_instructor NOT LIKE '%/%';

 * sqlite:///CourseData.db
0 rows affected.
1095 rows affected.


[]

In [10]:
%%sql
SELECT *
FROM Professor
LIMIT 10;

 * sqlite:///CourseData.db
Done.


ProfessorID,Name
1,Michael P. Coyne
2,Rebecca I. Bloch
3,Paul Caster
4,Jo Ann Drusbosky
5,Arleen N. Kardos
6,Scott M Brenner
7,Kevin C. Cassidy
8,Bruce Bradford
9,Milo W. Peck
10,Stephen E. Yost


In [11]:
%%sql

DELETE FROM Location;

INSERT INTO Location (Location)
SELECT DISTINCT location
FROM import_course_meetings

 * sqlite:///CourseData.db
0 rows affected.
207 rows affected.


[]

In [12]:
%%sql
SELECT *
FROM Location
LIMIT 10;

 * sqlite:///CourseData.db
Done.


LocationID,Location
1,DSB 105
2,DSB 111
3,DSB 110A
4,DSB 108
5,DSB 110B
6,DSB 104
7,DSB 112
8,DSB 109
9,DSB 115
10,DSB 2109A


In [13]:
%%sql

DELETE FROM Program;

INSERT INTO Program (ProgramName, ProgramCode)
SELECT DISTINCT program_name, program_code
FROM import_catalog;

 * sqlite:///CourseData.db
0 rows affected.
83 rows affected.


[]

In [14]:
%%sql
SELECT *
FROM Program
LIMIT 10;

 * sqlite:///CourseData.db
Done.


ProgramID,ProgramName,ProgramCode
1,Asian Studies,AN
2,Business,BU
3,Black Studies,BL
4,Bioengineering,BEN
5,Arabic,AR
6,Anthropology,AY
7,American Studies,AS
8,Japanese,JA
9,Art History,AH
10,Applied Ethics,AE


In [15]:
%%sql

DELETE FROM CatalogCourse;

INSERT INTO CatalogCourse (CatalogID, CatalogYear, ProgramID, CourseTitle, Credits, Prereqs, Coreqs, Fees, Attributes, Description)
SELECT DISTINCT catalog_id, cat_year, ProgramID, course_title, credits, prereqs, coreqs, fees, attributes, description
FROM import_catalog
    JOIN Program ON (program_code = ProgramCode)

 * sqlite:///CourseData.db
0 rows affected.
4440 rows affected.


[]

In [16]:
%%sql
SELECT *
FROM CatalogCourse
LIMIT 10;

 * sqlite:///CourseData.db
Done.


CatalogCourseID,CatalogYear,CatalogID,ProgramID,CourseTitle,Credits,Prereqs,Coreqs,Fees,Attributes,Description
1,2017_2018,AN 0301,1,Independent Study,1-3 Credits,,,,,Students undertake an individualized program of study in consultation with a director from the Asian studies faculty.
2,2017_2018,AN 0310,1,Asian Studies Seminar,3 Credits,,,,,"This seminar examines selected topics concerning Asia. This course is taught in conjunction with another 100-300 level course from a rotation of course offerings. Consult the Asian Studies director to identify the conjoined course for a given semester. The seminar concentrates on topics within the parameters of the conjoined course syllabus but adds research emphasis. Students registered for this course must complete a research project, to include 300-level research, in addition to the regular research requirements of the conjoined course, and a 25-50 page term paper in substitution of some portion of the conjoined course requirements, as determined by the instructor. Open to juniors and seniors only."
3,2017_2018,BU 0211,2,Legal Environment of Business,3 Credits,Junior standing.,,,,"This course examines the broad philosophical as well as practical nature and function of the legal system, and introduces students to the legal and social responsibilities of business. The course includes an introduction to the legal system, the federal courts, Constitutional law, the United States Supreme Court, the civil process, and regulatory areas such as employment discrimination, protection of the environment, and corporate governance and securities markets."
4,2017_2018,BU 0220,2,Environmental Law and Policy,3 Credits,,,,"EVME Environmental Studies Major Elective, EVPE Environmental Studies Elective, EVSS Environmental Studies: Social Science, MGEL Management: General Elective","This course surveys issues arising out of federal laws designed to protect the environment and manage resources. It considers in detail the role of the Environmental Protection Agency in the enforcement of environmental policies arising out of such laws as the National Environmental Policy Act, the Clean Water Act, and the Clear Air Act, among others. The course also considers the impact of Congress, political parties, bureaucracy, and interest groups in shaping environmental policy, giving special attention to the impact of environmental regulation on business and private property rights."
5,2017_2018,BU 0311,2,"The Law of Contracts, Sales, and Property",3 Credits,BU 0211.,,,,"This course examines the components of common law contracts including the concepts of offer and acceptance, consideration, capacity and legality, assignment of rights and delegation of duties, as well as discharge of contracts. The course covers Articles 2 and 2A of the Uniform Commercial Code relating to leases, sales of goods, and warranties. The course also considers personal and real property, and bailments."
6,2017_2018,BU 0312,2,The Law of Business Organizations and Financial Transactions,3 Credits,BU 0211.,,,,"This course offers an analysis of legal principles related to the law of agency, sole proprietorships, partnerships, corporations, limited liability companies, and other business forms. The second half of the course addresses several sections of the Uniform Commercial Code, such as negotiable instruments, bank collections and deposits and secured transactions. Finally, the course examines the law of suretyship, debtor-creditor relationships, and bankruptcy."
7,2017_2018,BU 0320,2,Employment Law and Discrimination in the Workplace,3 Credits,,,,"MGEL Management: General Elective, UDIV U.S. Diversity","This course examines a variety of legal issues related to the workplace including the doctrine of employment at will, employee privacy, and the history and development of labor unions and the legal protections afforded by the National Labor Relations Act. A study of the role of the Civil Rights Act of 1964 and the Equal Employment Opportunity Commission in eradicating discrimination based on race, sex, religion, national origin, age, and disability occupies a major portion of the course. Other employment issues include affirmative action, worker safety, and compensation."
8,2017_2018,BU 0391,2,Seminar in Business Law and Ethics,3 Credits,"AE 0291, BU 0211, two additional courses in law or applied ethics.",,,,This interdisciplinary study of these two aspects of the business environment is cross-listed as
9,2017_2018,BL 0101,3,Black Lives Matter,3 Credits,,,,"ASGW American Studies: Gateway, BSFC Black Studies Focus Course, BSSS Black Studies: Social and Behavioral Sciences, PJST Peace and Justice Studies, UDIV U.S. Diversity","In the context of Ferguson, Charleston, and other national crises, this course responds to the call of students from our campus community to raise questions about and critically reflect upon the failures of democracy to recognize the value of Black Life. This course employs collective thinking, teaching, and research to focus on questions surrounding race, structural inequality, and violence. It examines the historical, geographical, cultural, social, and political ways in which race has been configured and deployed in the United States. Various faculty will bring to bear their respective scholarly lenses so that students understand race and racism across intellectual disciplines."
10,2017_2018,BL 0398,3,Independent Study,1-3 Credits,,,,BSCP Black Studies Capstone Course,"Upon request and by agreement with a professor in the program, a Black Studies minor may conduct a one-semester independent study on a defined research topic or field of study."


- CatalogYear column was made in the for loop while importing the Catalog CSVs: cat_year

In [17]:
%%sql

DELETE FROM Class;

INSERT INTO Class (CatalogID, CatalogCourseID, ProfessorID, Term, CRN, Section, Credits, Title, Timecodes, Meetings, Capacity, Actual, Remaining)
SELECT DISTINCT catalog_id, CatalogCourseID, ProfessorID, term, crn, section, import_courses.credits, title, timecodes, meetings, cap, act, rem
FROM import_courses
    LEFT JOIN Professor ON (primary_instructor = Professor.Name)
    LEFT JOIN TermCatalogYear USING (Term)
    LEFT JOIN CatalogCourse ON (catalog_id = CatalogID AND CatalogCourse.CatalogYear = TermCatalogYear.CatalogYear);

 * sqlite:///CourseData.db
0 rows affected.
15937 rows affected.


[]

In [18]:
%%sql
SELECT *
FROM Class
LIMIT 10;

 * sqlite:///CourseData.db
Done.


ClassID,CatalogID,CatalogCourseID,ProfessorID,Term,CRN,Section,Credits,Title,Timecodes,Meetings,Capacity,Actual,Remaining
1,AC 0011,,1,Fall2014,70384,C01,3,Introduction to Financial Accounting,['TF 0800am-0915am 09/02-12/08 DSB 105'],"[{'days': 'TF', 'times': '0800am-0915am', 'dates': '09/02-12/08', 'location': 'DSB 105'}]",0,31,-31
2,AC 0011,,1,Fall2014,70385,C02,3,Introduction to Financial Accounting,['TF 0930am-1045am 09/02-12/08 DSB 105'],"[{'days': 'TF', 'times': '0930am-1045am', 'dates': '09/02-12/08', 'location': 'DSB 105'}]",0,31,-31
3,AC 0011,,1,Fall2014,70382,C03,3,Introduction to Financial Accounting,['TF 1230pm-0145pm 09/02-12/08 DSB 105'],"[{'days': 'TF', 'times': '1230pm-0145pm', 'dates': '09/02-12/08', 'location': 'DSB 105'}]",0,31,-31
4,AC 0011,,2,Fall2014,70291,C04,3,Introduction to Financial Accounting,['MR 1100am-1215pm 09/02-12/08 DSB 111'],"[{'days': 'MR', 'times': '1100am-1215pm', 'dates': '09/02-12/08', 'location': 'DSB 111'}]",0,29,-29
5,AC 0011,,2,Fall2014,70350,C05,3,Introduction to Financial Accounting,['MR 1230pm-0145pm 09/02-12/08 DSB 111'],"[{'days': 'MR', 'times': '1230pm-0145pm', 'dates': '09/02-12/08', 'location': 'DSB 111'}]",0,30,-30
6,AC 0011,,2,Fall2014,70381,C06,3,Introduction to Financial Accounting,['MR 0330pm-0445pm 09/02-12/08 DSB 110A'],"[{'days': 'MR', 'times': '0330pm-0445pm', 'dates': '09/02-12/08', 'location': 'DSB 110A'}]",0,31,-31
7,AC 0011,,3,Fall2014,70383,G,3,Introduction to Financial Accounting,['MR 0930am-1045am 09/02-12/08 DSB 111'],"[{'days': 'MR', 'times': '0930am-1045am', 'dates': '09/02-12/08', 'location': 'DSB 111'}]",30,31,-1
8,AC 0011,,4,Fall2014,70391,H,3,Introduction to Financial Accounting,['TF 0930am-1045am 09/02-12/08 DSB 110A'],"[{'days': 'TF', 'times': '0930am-1045am', 'dates': '09/02-12/08', 'location': 'DSB 110A'}]",30,32,-2
9,AC 0011,,4,Fall2014,71105,I,3,Introduction to Financial Accounting,['TF 1100am-1215pm 09/02-12/08 DSB 110A'],"[{'days': 'TF', 'times': '1100am-1215pm', 'dates': '09/02-12/08', 'location': 'DSB 110A'}]",30,33,-3
10,AC 0011,,4,Fall2014,71123,J,3,Introduction to Financial Accounting,['TF 0200pm-0315pm 09/02-12/08 DSB 105'],"[{'days': 'TF', 'times': '0200pm-0315pm', 'dates': '09/02-12/08', 'location': 'DSB 105'}]",30,32,-2


In [19]:
%%sql

DELETE FROM Meeting;

INSERT INTO Meeting (ClassID, LocationID, Day, StartTime, EndTime)
SELECT DISTINCT Class.ClassID, LocationID, day, `start`,`end`
FROM import_course_meetings
    JOIN CLASS USING (Term, CRN)
    LEFT JOIN Location ON (import_course_meetings.Location = Location.Location);

 * sqlite:///CourseData.db
0 rows affected.
284847 rows affected.


[]

In [20]:
%%sql
SELECT *
FROM Meeting
LIMIT 10;

 * sqlite:///CourseData.db
Done.


MeetingID,ClassID,LocationID,Day,StartTime,EndTime
1,1,1,T,2014-09-02T08:00:00,2014-09-02T09:15:00
2,1,1,F,2014-09-05T08:00:00,2014-09-05T09:15:00
3,1,1,T,2014-09-09T08:00:00,2014-09-09T09:15:00
4,1,1,F,2014-09-12T08:00:00,2014-09-12T09:15:00
5,1,1,T,2014-09-16T08:00:00,2014-09-16T09:15:00
6,1,1,F,2014-09-19T08:00:00,2014-09-19T09:15:00
7,1,1,T,2014-09-23T08:00:00,2014-09-23T09:15:00
8,1,1,F,2014-09-26T08:00:00,2014-09-26T09:15:00
9,1,1,T,2014-09-30T08:00:00,2014-09-30T09:15:00
10,1,1,F,2014-10-03T08:00:00,2014-10-03T09:15:00


### __4. Integrity Checks__

__Domain Integrity__

__Entity Integrity:__


There should be 4440 Catalog Entries, 15937 Course Offerings, and 284847 Course Meetings
SELECT 

In [21]:
%%sql
SELECT
    (SELECT Count(*) FROM Meeting) as CourseMeetings,
    (SELECT Count(*) FROM CatalogCourse) as CatalogCourses,
    (SELECT Count(*) FROM Class) as CourseOfferings;

 * sqlite:///CourseData.db
Done.


CourseMeetings,CatalogCourses,CourseOfferings
284847,4440,15937


__Relational Integrity__

In [22]:
%%sql
SELECT CatalogCourseID, CatalogYear, Term, CRN, Section,Class.CatalogID as CatalogID, Title, Capacity, Actual, Remaining, substr(Term,-4) as Year
FROM Class 
    LEFT JOIN Professor ON (Class.ProfessorID = Professor.ProfessorID)
    LEFT JOIN CatalogCourse USING (CatalogCourseID)
    LEFT JOIN Program USING (ProgramID)
WHERE Name like '%Huntley'
ORDER BY Year,Term DESC,CatalogID,Section;

 * sqlite:///CourseData.db
Done.


CatalogCourseID,CatalogYear,Term,CRN,Section,CatalogID,Title,Capacity,Actual,Remaining,Year
,,Fall2014,70369,E,IS 0100,Intro to Information Systems,25,26,-1,2014
,,Fall2014,73060,A,IS 0135,Fundamentals of Web Design,25,26,-1,2014
,,Fall2014,73061,A,IS 0320,Systems Design and Implementation,25,16,9,2014
,,Fall2015,75231,E,IS 0100,Intro to Information Systems,29,28,1,2015
,,Fall2015,75246,F,IS 0100,Intro to Information Systems,29,28,1,2015
,,Fall2015,76388,A,IS 0135,Fundamentals of Web Design,25,21,4,2015
,,Fall2015,76389,A,IS 0320,Systems Design and Implementation,25,13,12,2015
,,Spring2016,38780,01,IS 0585,Contemporary Topics: Information Systems and Data,20,15,5,2016
,,Spring2016,37253,B,OM 0101,Operations Management,29,28,1,2016
,,Spring2016,37254,C,OM 0101,Operations Management,29,29,0,2016


In [23]:
%%sql
SELECT Term, ClassID, MeetingID
FROM Class JOIN Meeting USING (ClassID)
WHERE CRN=39006 and Term = 'Spring2019'
GROUP BY ClassID;

 * sqlite:///CourseData.db
Done.


Term,ClassID,MeetingID
Spring2019,13345,253416


In [24]:
%%sql
SELECT ClassID, MeetingID, StartTime
FROM Class JOIN Meeting USING (ClassID)
WHERE CRN=39006 AND Term="Spring2019";

 * sqlite:///CourseData.db
Done.


ClassID,MeetingID,StartTime
13345,253416,2019-01-29T18:30:00
13345,253417,2019-02-05T18:30:00
13345,253418,2019-02-12T18:30:00
13345,253419,2019-02-26T18:30:00
13345,253420,2019-03-05T18:30:00
13345,253421,2019-03-12T18:30:00
13345,253422,2019-02-02T09:00:00
13345,253423,2019-02-16T09:00:00
13345,253424,2019-03-09T09:00:00


### __4. Empty Imported Tables:__

In [25]:
%%sql
DELETE FROM import_courses;
DELETE FROM import_course_meetings;
DELETE FROM import_catalog;

 * sqlite:///CourseData.db
15937 rows affected.
284907 rows affected.
4440 rows affected.


[]

In [26]:
%%sql
DROP TABLE import_courses;
DROP TABLE import_course_meetings;
DROP TABLE import_catalog;

 * sqlite:///CourseData.db
Done.
Done.
Done.


[]

In [27]:
%%sql
vacuum;

 * sqlite:///CourseData.db
Done.


[]