In [1]:
%load_ext sql
import pandas as pd
import sqlite3 as sql

%sql sqlite:///CourseData.db
conn = sql.connect('CourseData.db')

![Team %%sql Project ERD](CourseDataERD.png)

In [4]:
%%sql

-- Create PROGRAMS table
DROP TABLE IF EXISTS PROGRAMS;
CREATE TABLE PROGRAMS (
    ProgramID INTEGER PRIMARY KEY,
    ProgramCode TEXT NOT NULL,
    ProgramName TEXT NOT NULL
);

-- Create COURSE_CATALOG table
DROP TABLE IF EXISTS COURSE_CATALOG;
CREATE TABLE COURSE_CATALOG (
    CourseID INTEGER PRIMARY KEY,
    CatalogYear TEXT NOT NULL,
    CatalogID TEXT NOT NULL,
    ProgramID INTEGER,
    CourseTitle TEXT NOT NULL,
    Credits TEXT NOT NULL,
    Prereqs TEXT,
    Coreqs TEXT,
    Fees TEXT,
    Attributes TEXT,
    Description TEXT
);

-- Create COURSE_INSTRUCTORS table
DROP TABLE IF EXISTS COURSE_INSTRUCTORS;
CREATE TABLE COURSE_INSTRUCTORS (
    InstructorID INTEGER PRIMARY KEY,
    Name TEXT NOT NULL
);

-- Course COURSE_OFFERINGS table
DROP TABLE IF EXISTS COURSE_OFFERINGS;
CREATE TABLE COURSE_OFFERINGS (
    CourseOfferingID INTEGER PRIMARY KEY,
    CourseID INTEGER,
    CatalogID TEXT NOT NULL,
    Term TEXT NOT NULL,
    CRN INTEGER NOT NULL,
    Section TEXT NOT NULL,
    Credits REAL,
    Title TEXT NOT NULL,
    Timecodes TEXT,
    PrimaryInstructorID INTEGER,
    Capacity INTEGER NOT NULL,
    Actual INTEGER NOT NULL,
    Remaining INTEGER NOT NULL
);

-- Create COURSE_LOCATIONS table
DROP TABLE IF EXISTS COURSE_LOCATIONS;
CREATE TABLE COURSE_LOCATIONS (
    LocationID INTEGER PRIMARY KEY,
    LocationCode TEXT NOT NULL
);

-- Create COURSE_MEETINGS table
DROP TABLE IF EXISTS COURSE_MEETINGS;
CREATE TABLE COURSE_MEETINGS (
    CourseMeetingID INTEGER PRIMARY KEY,
    CourseOfferingID INTEGER NOT NULL,
    LocationID INTEGER NOT NULL,
    StartDateTime TEXT NOT NULL,
    EndDateTime TEXT NOT NULL
);

-- Create TERM_TO_CATALOG_YEAR table
DROP TABLE IF EXISTS TERM_TO_CATALOG_YEAR;
CREATE TABLE TERM_TO_CATALOG_YEAR (
    CatalogYear TEXT NOT NULL,
    Term TEXT NOT NULL
);

INSERT INTO TERM_TO_CATALOG_YEAR (Term, CatalogYear) VALUES 
('Fall2017','2017_2018'),('Winter2018','2017_2018'),('Spring2018','2017_2018'),('Summer2018','2017_2018'),('Fall2018','2018_2019'),
('Winter2019','2018_2019'),('Spring2019','2018_2019');

 * sqlite:///CourseData.db
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
Done.
7 rows affected.


[]

In [None]:
#Defining path variables
src_path = 'SourceData/'
course_csv_filename = '/courses.csv'
meeting_csv_filename = '/course_meetings.csv'

In [None]:
# List to loop over catalog csv files
df_catalogs_export_data_csv = pd.DataFrame()
catalog_years = ['2017_2018', '2018_2019']

# Loop to import catalog data into our database.db
for catalog_year in catalog_years:
    file_path = 'SourceData/Catalogs/CourseCatalog' + catalog_year + '.csv'
    df_catalogs = pd.read_csv(file_path)
    df_catalogs['cat_year'] = catalog_year # Add column with 'catalog_year'
    df_catalogs_export_data_csv = df_catalogs.append(df_catalogs_export_data_csv) # append to a df that will get exported to csv for visual purposes
    df_catalogs.to_sql('IMPORT_COURSE_CATALOG',conn,if_exists='append',index=False) # append: Insert new values to the existing table.

In [None]:
#list with semesters to loop path
semesters = ['Fall2014','Winter2015','Spring2015','Summer2015','Fall2015','Winter2016',
             'Spring2016','Summer2016','Fall2016','Winter2017','Spring2017','SpringBreak2017',
             'Summer2017','Fall2017','Winter2018','Spring2018','Summer2018','Fall2018',
             'Spring2019'
            ]

# Loop to import course offerings and course meetings data into database.db
for semester in semesters:
    file_path = 'SourceData/' + semester + '/courses.csv'
    df_course_offerings = pd.read_csv(file_path)
    df_course_offerings.to_sql('IMPORT_COURSE_OFFERINGS',conn,if_exists='append',index=False)
    
    file_path = 'SourceData/' + semester + '/course_meetings.csv'
    df_course_meetings = pd.read_csv(file_path)
    df_course_meetings.to_sql('IMPORT_COURSE_MEETINGS',conn,if_exists='append',index=False)

In [None]:
# Export to csv for visual purposes
#df_course_offerings.to_csv('Data_Dump_course.csv', index=None)
#df_course_meetings.to_csv('Data_Dump_meeting.csv', index=None)
#df_catalogs_export_data_csv.to_csv('Data_Dump_catalogs.csv', index=None)
