# 1. Load SQLite and Database
- 'CourseData.db'
- note the 3 backslashes

In [33]:
%reload_ext sql
%sql sqlite:///CourseData.db

'Connected: @CourseData.db'

In [34]:
%%sql
DROP TABLE IF EXISTS CourseData;

 * sqlite:///CourseData.db
Done.


[]

# safety precautions
- drop tables to start from scratch

In [35]:
%%sql
sqlite:///CourseData.db
DROP TABLE IF EXISTS COURSECATALOGS;
DROP TABLE IF EXISTS COURSES
DROP TABLE IF EXISTS COURSEMEETINGS;

Done.
(sqlite3.OperationalError) near "DROP": syntax error [SQL: 'DROP TABLE IF EXISTS COURSES\nDROP TABLE IF EXISTS COURSEMEETINGS;'] (Background on this error at: http://sqlalche.me/e/e3q8)


# 2. Do COUNT statements for 3 .csv Files
- ensure .csv name spelling is exact

In [36]:
%%sql
SELECT Count(*) FROM import_course_meetings;

 * sqlite:///CourseData.db
Done.


Count(*)
317339


In [37]:
%%sql
SELECT Count(*) FROM import_courses;

 * sqlite:///CourseData.db
Done.


Count(*)
15955


In [38]:
%%sql
SELECT catalog_id FROM import_Course_Catalog limit 1;


 * sqlite:///CourseData.db
Done.


catalog_id
AN 0301


In [7]:
%%sql
SELECT Count(*) FROM import_Course_Catalog;

 * sqlite:///CourseData.db
Done.


Count(*)
4441


# 3. CREATE Tables
- primary key is generated on every table by naming new surrogate key/field that isn't already in .csv files.
- don't need NOT NULL for PK because it's implied in SQLite.


### COURSECATALOGS

In [8]:
%%sql
CREATE TABLE COURSECATALOGS (
    CCID INTEGER PRIMARY KEY,
    catalog_id TEXT,
    program_code TEXT,    
    program_name TEXT,
    course_title TEXT,
    prereqs TEXT,    
    coreqs TEXT,
    fees TEXT,
    attributes TEXT,    
    description TEXT
);

 * sqlite:///CourseData.db
Done.


[]

### COURSES

In [10]:
%%sql
CREATE TABLE COURSES (
                 CID INTEGER PRIMARY KEY,
                 catalog_id TEXT,
                 crn INTEGER,
                 term TEXT,
                 section TEXT,
                 credits TEXT,
                 title TEXT,
                 meetings TEXT,
                 timecodes TEXT,
                 cap TEXT,
                 act TEXT,
                 rem TEXT,
                 TID INTEGER,      
);

 * sqlite:///CourseData.db
Done.


[]

In [31]:
%%sql
CREATE TABLE COURSEMEETINGS (
    CMID INTEGER PRIMARY KEY,
    term TEXT,
    crn TEXT,
    location TEXT NOT NULL,	
    day TEXT NOT NULL,
    start TEXT NOT NULL,
    end TEXT NOT NULL
);

 * sqlite:///CourseData.db
Done.


[]

# 4. Do INSERT Statements

In [12]:
%%sql
INSERT INTO COURSECATALOGS (catalog_id, program_code, program_name, course_title, prereqs, coreqs, fees, attributes, description) 
SELECT DISTINCT catalog_id, program_code, program_name, course_title, prereqs, coreqs, fees, attributes, description
FROM import_Course_Catalog;

 * sqlite:///CourseData.db
2221 rows affected.


[]

In [13]:
%%sql
SELECT * FROM COURSECATALOGS
limit 2;

 * sqlite:///CourseData.db
Done.


CCID,catalog_id,program_code,program_name,course_title,prereqs,coreqs,fees,attributes,description
1,AN 0301,AN,Asian Studies,Independent Study,,,,,Students undertake an individualized program of study in consultation with a director from the Asian studies faculty.
2,AN 0310,AN,Asian Studies,Asian Studies Seminar,,,,,"This seminar examines selected topics concerning Asia. This course is taught in conjunction with another 100-300 level course from a rotation of course offerings. Consult the Asian Studies director to identify the conjoined course for a given semester. The seminar concentrates on topics within the parameters of the conjoined course syllabus but adds research emphasis. Students registered for this course must complete a research project, to include 300-level research, in addition to the regular research requirements of the conjoined course, and a 25-50 page term paper in substitution of some portion of the conjoined course requirements, as determined by the instructor. Open to juniors and seniors only."


# INSERT TEACHERS

In [14]:
%%sql
DELETE FROM TEACHERS;

INSERT INTO TEACHERS  (primary_instructor)
SELECT DISTINCT primary_instructor
    FROM import_courses
    ;

 * sqlite:///CourseData.db
0 rows affected.
1105 rows affected.


[]

In [15]:
%%sql
SELECT * FROM TEACHERS
limit 2;

 * sqlite:///CourseData.db
Done.


TID,primary_instructor
,Michael P. Coyne
,Rebecca I. Bloch


# INSERT COURSEMEETINGS

In [16]:
%%sql 
INSERT INTO COURSEMEETINGS (CID, location, day, start, end)
SELECT DISTINCT
   CID, location, day, start, end
    FROM import_course_meetings
     JOIN COURSES ON (import_course_meetings.crn =COURSES.crn AND import_course_meetings.term=COURSES.term)
                ;

 * sqlite:///CourseData.db
(sqlite3.OperationalError) no such table: COURSEMEETINGS [SQL: 'INSERT INTO COURSEMEETINGS (CID, location, day, start, end)\nSELECT DISTINCT\n   CID, location, day, start, end\n    FROM import_course_meetings\n     JOIN COURSES ON (import_course_meetings.crn =COURSES.crn AND import_course_meetings.term=COURSES.term)\n                ;'] (Background on this error at: http://sqlalche.me/e/e3q8)


In [17]:
%%sql
SELECT * FROM COURSEMEETINGS
LIMIT 10;

 * sqlite:///CourseData.db
(sqlite3.OperationalError) no such table: COURSEMEETINGS [SQL: 'SELECT * FROM COURSEMEETINGS\nLIMIT 10;'] (Background on this error at: http://sqlalche.me/e/e3q8)


# INSERT COURSES
- _key:_ do NOT need primary_instructor in courses table since we're going to join in the teachers table that has this value.  We are simply telling it what to JOIN these tables on, but primary_instructor does not need to be in both tables.

- still need to add additional FKs

In [18]:
%%sql


INSERT INTO COURSES (TID, catalog_id, crn, term, section, credits, title, meetings, timecodes, cap, act, rem)
SELECT DISTINCT TID, catalog_id, crn, term, section, credits, title, meetings, timecodes, cap, act, rem
              
                FROM import_courses
                JOIN TEACHERS ON TEACHERS.primary_instructor = import_courses.primary_instructor
                ;


 * sqlite:///CourseData.db
15938 rows affected.


[]

In [19]:
%%sql
SELECT TID, catalog_id, primary_instructor 
FROM COURSES
JOIN TEACHERS USING (TID)
limit 10;

 * sqlite:///CourseData.db
Done.


TID,catalog_id,primary_instructor


In [29]:
%%sql
SELECT * FROM COURSES
limit 5
;


 * sqlite:///CourseData.db
Done.


CID,catalog_id,crn,term,section,credits,title,meetings,timecodes,cap,act,rem,TID
1,AC 0011,70382,Fall2014,C03,3.0,Introduction to Financial Accounting,"[{'days': 'TF', 'times': '1230pm-0145pm', 'dates': '09/02-12/08', 'location': 'DSB 105'}]",['TF 1230pm-0145pm 09/02-12/08 DSB 105'],0,31,-31,
2,AC 0011,70384,Fall2014,C01,3.0,Introduction to Financial Accounting,"[{'days': 'TF', 'times': '0800am-0915am', 'dates': '09/02-12/08', 'location': 'DSB 105'}]",['TF 0800am-0915am 09/02-12/08 DSB 105'],0,31,-31,
3,AC 0011,70385,Fall2014,C02,3.0,Introduction to Financial Accounting,"[{'days': 'TF', 'times': '0930am-1045am', 'dates': '09/02-12/08', 'location': 'DSB 105'}]",['TF 0930am-1045am 09/02-12/08 DSB 105'],0,31,-31,
4,AC 0011,75241,Fall2015,C03,3.0,Introduction to Financial Accounting,"[{'days': 'TF', 'times': '0200pm-0315pm', 'dates': '09/01-12/07', 'location': 'DSB 115'}]",['TF 0200pm-0315pm 09/01-12/07 DSB 115'],0,32,-32,
5,AC 0011,75243,Fall2015,C04,3.0,Introduction to Financial Accounting,"[{'days': 'TF', 'times': '0800am-0915am', 'dates': '09/01-12/07', 'location': 'DSB 111'}]",['TF 0800am-0915am 09/01-12/07 DSB 111'],0,35,-35,


In [21]:
%%sql
SELECT TID FROM TEACHERS
limit 5;

 * sqlite:///CourseData.db
Done.


TID
""
""
""
""
""


# 5. Run SELECT Queries

In [22]:
%reload_ext sql
%sql sqlite:///CourseData.db

'Connected: @CourseData.db'

## SELECT Query for all unique classrooms 
- only included those with a character lenght of 7 (some had 2, 8, etc.)
- can't figure out how to get a distinct location when including additinonal fields.

In [23]:
%%sql
SELECT DISTINCT 
location
FROM COURSEMEETINGS
WHERE LENGTH(location) = 7
ORDER BY location
LIMIT 20
;

 * sqlite:///CourseData.db
(sqlite3.OperationalError) no such table: COURSEMEETINGS [SQL: 'SELECT DISTINCT \nlocation\nFROM COURSEMEETINGS\nWHERE LENGTH(location) = 7\nORDER BY location\nLIMIT 20\n;'] (Background on this error at: http://sqlalche.me/e/e3q8)


## SELECT Query displaying all courses in MSBA Program '18-'19
- still have to get rid of blank by making defining description as NOT NULL.
- is there an easier way?

In [24]:
%%sql
SELECT course_title AS Course, program_name AS Program, catalog_id AS Code, description AS Description
FROM COURSECATALOGS
WHERE program_name = 'Information Systems'
AND
Code LIKE "IS 05%"
ORDER BY program_name
LIMIT 1;

 * sqlite:///CourseData.db
Done.


Course,Program,Code,Description
Information Systems and Database Management,Information Systems,IS 0500,"This course introduces the basic concepts and tools relevant to information systems and database management, and their enabling roles in business strategies and operations. Case studies are used to facilitate discussions of practical applications and issues involving strategic alignments of organizations, resource allocation, integration, planning, and analysis of cost, benefit and performance in light of the big data challenges. Specific emphases involve database design and implementation and emerging strategies and technologies such as business intelligence, big data management, web security, and online business analytics."


# JOIN Testing

In [25]:
%%sql
SELECT *
FROM COURSES
LIMIT 7;

 * sqlite:///CourseData.db
Done.


CID,catalog_id,crn,term,section,credits,title,meetings,timecodes,cap,act,rem,TID
1,AC 0011,70382,Fall2014,C03,3.0,Introduction to Financial Accounting,"[{'days': 'TF', 'times': '1230pm-0145pm', 'dates': '09/02-12/08', 'location': 'DSB 105'}]",['TF 1230pm-0145pm 09/02-12/08 DSB 105'],0,31,-31,
2,AC 0011,70384,Fall2014,C01,3.0,Introduction to Financial Accounting,"[{'days': 'TF', 'times': '0800am-0915am', 'dates': '09/02-12/08', 'location': 'DSB 105'}]",['TF 0800am-0915am 09/02-12/08 DSB 105'],0,31,-31,
3,AC 0011,70385,Fall2014,C02,3.0,Introduction to Financial Accounting,"[{'days': 'TF', 'times': '0930am-1045am', 'dates': '09/02-12/08', 'location': 'DSB 105'}]",['TF 0930am-1045am 09/02-12/08 DSB 105'],0,31,-31,
4,AC 0011,75241,Fall2015,C03,3.0,Introduction to Financial Accounting,"[{'days': 'TF', 'times': '0200pm-0315pm', 'dates': '09/01-12/07', 'location': 'DSB 115'}]",['TF 0200pm-0315pm 09/01-12/07 DSB 115'],0,32,-32,
5,AC 0011,75243,Fall2015,C04,3.0,Introduction to Financial Accounting,"[{'days': 'TF', 'times': '0800am-0915am', 'dates': '09/01-12/07', 'location': 'DSB 111'}]",['TF 0800am-0915am 09/01-12/07 DSB 111'],0,35,-35,
6,AC 0011,75244,Fall2015,C05,3.0,Introduction to Financial Accounting,"[{'days': 'TF', 'times': '0930am-1045am', 'dates': '09/01-12/07', 'location': 'DSB 111'}]",['TF 0930am-1045am 09/01-12/07 DSB 111'],0,36,-36,
7,AC 0011,77416,Fall2016,08,3.0,Introduction to Financial Accounting,"[{'days': 'TF', 'times': '0330pm-0445pm', 'dates': '09/06-12/12', 'location': 'DSB 112'}]",['TF 0330pm-0445pm 09/06-12/12 DSB 112'],29,27,2,


In [26]:
%%sql
SELECT t.primary_instructor, c.catalog_id
FROM COURSES as c
JOIN TEACHERS as t ON c.TID = t.TI;

 * sqlite:///CourseData.db
(sqlite3.OperationalError) no such column: t.TI [SQL: 'SELECT t.primary_instructor, c.catalog_id\nFROM COURSES as c\nJOIN TEACHERS as t ON c.TID = t.TI;'] (Background on this error at: http://sqlalche.me/e/e3q8)


In [27]:
%%sql
SELECT TID, primary_instructor FROM TEACHERS
limit 10;


 * sqlite:///CourseData.db
Done.


TID,primary_instructor
,Michael P. Coyne
,Rebecca I. Bloch
,Paul Caster
,Jo Ann Drusbosky
,Arleen N. Kardos
,Scott M Brenner
,Kevin C. Cassidy
,Bruce Bradford
,Milo W. Peck
,Stephen E. Yost


Have to finish insterting COURSES column

In [28]:
%%sql
VACUUM;

 * sqlite:///CourseData.db
Done.


[]