# Data Warehouse Utilization

In [1]:
%load_ext sql

In [2]:
%%sql
sqlite:///CourseDataWarehouse.db

'Connected: @CourseDataWarehouse.db'

## Which classroom is the most utilized, and what programs hold classes there? (Done)

In [3]:
%%sql
SELECT location, COUNT(course_id) as Loc_count
FROM LOCATIONS_DW 
    JOIN CLASS_FACTS_DW USING (location_id)
    JOIN COURSES_DW USING (course_id)
GROUP BY location 
ORDER BY Loc_count DESC
LIMIT 10;

 * sqlite:///CourseDataWarehouse.db
Done.


location,Loc_count
DSB 112,171
BNW 341,160
DSB 109,158
NHS 301,158
CNS 101,157
DSB 106,157
CNS 108,154
BNW 137,152
CNS 106,149
DSB 108,148


In [4]:
%%sql
SELECT program_name AS Program, count(catalog_id) AS Program_Count
FROM COURSES_DW 
    JOIN CLASS_FACTS_DW USING (course_id)
    JOIN PROGRAMS_DW USING (program_id)
    JOIN LOCATIONS_DW USING (location_id)
    WHERE location = 'DSB 112'
    GROUP BY program_name
    ORDER BY Program_count DESC; 

 * sqlite:///CourseDataWarehouse.db
Done.


Program,Program_Count
Accounting,63
Marketing,38
Management,26
Finance,21
Operations Management,10
Information Systems,8
Taxation,5


## Which classes go over capacity the most frequently?

Problem- why are some professors teaching multiple sections of the same course? 6 sections of AC 0012 by same prof? check if accurate...

In [5]:
%%sql
SELECT DISTINCT Course_title, COUNT(Course_title) AS Cap_count
FROM (SELECT DISTINCT COURSES_DW.CatalogYear, term, section, professor_id, COURSES_DW.Course_id, Catalog_id, Course_title, Timecodes, Cap, Actual, Remaining
FROM COURSES_DW    
    JOIN CLASS_FACTS_DW USING (course_id)
    JOIN TIMECODES_DW USING(Timecode_id)
    WHERE Remaining < 0 AND Cap != 0
    ORDER BY professor_id)
GROUP BY course_title
ORDER BY Cap_count DESC
LIMIT 10;

 * sqlite:///CourseDataWarehouse.db
Done.


Course_title,Cap_count
Introduction to Management Accounting,13
Operations Management,13
Principles of Marketing,13
Introduction to Information Systems,11
Marketing Research,10
Business Strategies in the Global Environment,8
Health Assessment Lab,7
Honors Seminar,7
Introduction to Finance,7
Introduction to Management,7


## Which professors have the most diverse courseload? (DONE)

In [6]:
%%sql
SELECT professor_id, Name, COUNT(DISTINCT Course_id) AS CourseCount
FROM PROFESSORS_DW
    JOIN CLASS_FACTS_DW USING (professor_id)
    JOIN COURSES_DW USING (course_id)
    GROUP BY professor_id, Name
    ORDER BY CourseCount DESC
    LIMIT 10;

 * sqlite:///CourseDataWarehouse.db
Done.


Professor_id,Name,CourseCount
161,Jeffrey N. Denenberg,18
110,Michael P. Pagano,17
65,Aaron R. Van Dyke,16
111,Qin Zhang,15
511,Amalia I. Rusu,15
576,Shannon P. Gerry,14
67,Diane J. Brousseau,13
84,John R. Miecznikowski,13
93,Amanda S. Harper-Leatherman,13
117,Virginia A. Kelly,13


In [7]:
%%sql
SELECT DISTINCT term, catalog_id, course_title
FROM COURSES_DW
    JOIN CLASS_FACTS_DW USING (course_id)
    WHERE professor_id = '161'; 

 * sqlite:///CourseDataWarehouse.db
Done.


Term,Catalog_id,Course_title
Fall2017,ECE 0461,Green Power Generation
Fall2017,EE 0213L,Electric Circuits Lab
Fall2017,EE 0231,Introduction to Electronics Circuits and Devices
Fall2017,EE 0231L,Electronics Circuits Lab
Fall2017,EE 0361,Green Power Generation
Fall2018,BEN 0331,Biomedical Signal Processing
Fall2018,CR 0331,Biomedical Signal Processing
Fall2018,EE 0213L,Electric Circuits Lab
Fall2018,EE 0231L,Electronics Circuits Lab
Spring2018,CR 0245L,Digital Design I Lab


## On which day are the most/least classes held? (DONE)

In [8]:
%%sql
SELECT DISTINCT Day, count(Day) as Date_Count
FROM TIMECODES_DW
    JOIN CLASS_FACTS_DW USING (timecode_id)
    JOIN COURSES_DW USING (course_id)
GROUP BY Day
ORDER BY Date_Count DESC;

 * sqlite:///CourseDataWarehouse.db
Done.


Day,Date_Count
T,2853
R,2083
M,2077
W,1648
F,1460
S,163
U,32


## During which time slot are the most classes held?

In [9]:
%%sql
SELECT DISTINCT Class_time, COUNT(Class_time) AS Class_count
FROM (SELECT (Start ||'-'|| End) AS Class_time
FROM TIMECODES_DW)
GROUP BY Class_time
ORDER BY Class_count DESC
LIMIT 10;

 * sqlite:///CourseDataWarehouse.db
Done.


Class_time,Class_count
07:00:00-14:00:00,7
08:00:00-16:00:00,7
09:00:00-13:00:00,7
08:00:00-10:30:00,6
08:00:00-12:00:00,6
08:00:00-16:30:00,6
08:30:00-14:00:00,6
08:30:00-15:30:00,6
08:30:00-16:30:00,6
08:30:00-17:00:00,6


In [10]:
%%sql
SELECT SUM(Class_time)
FROM (SELECT (Start ||'-'|| End) AS Class_time
FROM TIMECODES_DW)
--GROUP BY Class_time
--ORDER BY Class_count DESC;

 * sqlite:///CourseDataWarehouse.db
Done.


SUM(Class_time)
13659.0


In [11]:
%%sql
SELECT COUNT(*)
FROM (SELECT DISTINCT Start, End 
    FROM TIMECODES_DW);

 * sqlite:///CourseDataWarehouse.db
Done.


COUNT(*)
379


In [12]:
%%sql
SELECT DISTINCT Start, End, Day, course_title
FROM TIMECODES_DW
    JOIN CLASS_FACTS_DW USING (timecode_id)
    JOIN COURSES_DW USING (course_id)
WHERE Start = "07:00:00" AND End = "14:00:00";

 * sqlite:///CourseDataWarehouse.db
Done.


Start,End,Day,Course_title
07:00:00,14:00:00,T,Geriatric Nursing Clinical
07:00:00,14:00:00,W,Geriatric Nursing Clinical
07:00:00,14:00:00,F,Geriatric Nursing Clinical
07:00:00,14:00:00,F,Mental Health Nursing Clinical
07:00:00,14:00:00,W,Mental Health Nursing Clinical
07:00:00,14:00:00,T,Mental Health Nursing Clinical
07:00:00,14:00:00,M,Mental Health Nursing Clinical
07:00:00,14:00:00,M,Medical Surgical Nursing I Clinical
07:00:00,14:00:00,W,Medical Surgical Nursing I Clinical
07:00:00,14:00:00,R,Medical Surgical Nursing I Clinical


## Which professor has taught the most classes? students? (use actual, not cap)

Comment- double check if this is right. Might be over counting bc of same problem as first question

In [13]:
%%sql
SELECT DISTINCT Professor_id AS ID, program_name AS Program, Name, COUNT(Course_title) AS Courses_Taught
FROM PROFESSORS_DW    
    JOIN CLASS_FACTS_DW USING (professor_id)
    JOIN PROGRAMS_DW USING (program_id)
    JOIN COURSES_DW USING (course_id)
    GROUP BY ID
    ORDER BY Courses_Taught DESC
    LIMIT 10;

 * sqlite:///CourseDataWarehouse.db
Done.


ID,Program,Name,Courses_Taught
984,Accounting,Victor E. Hartman,72
374,Nursing,Mary M Murphy,67
9,Accounting,Milo W. Peck,63
457,Physics,Bidyut B. Das,57
842,Spanish,Lauren S. Gaskill,56
383,Nursing,Laura A. Conklin,54
379,Nursing,Kelly Ann Sullivan,51
14,Accounting,Patricia M. Poli,47
251,Sociology,Rose P. Rodrigues,46
398,Nursing,Martha Ann Boulton,46


In [13]:
%%sql
SELECT DISTINCT course_id, Offering_id, term, day, start, end, section, catalog_id, course_title, timecodes
FROM COURSES_DW
    JOIN CLASS_FACTS_DW USING (course_id)
    JOIN TIMECODES_DW USING(timecode_id)
    WHERE professor_id = '984'; 

 * sqlite:///CourseDataWarehouse.db
Done.


Course_id,Offering_id,Term,Day,Start,End,Section,Catalog_id,Course_title,Timecodes
363,11773,Spring2018,T,18:00:00,20:00:00,1,AC 0565,Forensic Accounting,"['T 0600pm-0800pm 03/20-03/20 DSB 112', 'T 0600pm-1000pm 03/27-03/27 DSB 112', 'W 0300pm-0900pm 03/28-03/28 DSB 112', 'T 0600pm-0900pm 04/03-04/03 DSB 112', 'T 0600pm-1000pm 04/17-04/17 DSB 112', 'W 0100pm-1000pm 04/18-04/18 DSB 112', 'T 0600pm-0800pm 04/24-04/24 DSB 112', 'W 0600pm-0900pm 04/25-04/25 DSB 112', 'T 0600pm-1000pm 05/01-05/01 DSB 112', 'W 0300pm-0900pm 05/02-05/02 DSB 109', 'T 0600pm-0800pm 05/08-05/08 DSB 112']"
363,11773,Spring2018,T,18:00:00,21:00:00,1,AC 0565,Forensic Accounting,"['T 0600pm-0800pm 03/20-03/20 DSB 112', 'T 0600pm-1000pm 03/27-03/27 DSB 112', 'W 0300pm-0900pm 03/28-03/28 DSB 112', 'T 0600pm-0900pm 04/03-04/03 DSB 112', 'T 0600pm-1000pm 04/17-04/17 DSB 112', 'W 0100pm-1000pm 04/18-04/18 DSB 112', 'T 0600pm-0800pm 04/24-04/24 DSB 112', 'W 0600pm-0900pm 04/25-04/25 DSB 112', 'T 0600pm-1000pm 05/01-05/01 DSB 112', 'W 0300pm-0900pm 05/02-05/02 DSB 109', 'T 0600pm-0800pm 05/08-05/08 DSB 112']"
363,11773,Spring2018,T,18:00:00,22:00:00,1,AC 0565,Forensic Accounting,"['T 0600pm-0800pm 03/20-03/20 DSB 112', 'T 0600pm-1000pm 03/27-03/27 DSB 112', 'W 0300pm-0900pm 03/28-03/28 DSB 112', 'T 0600pm-0900pm 04/03-04/03 DSB 112', 'T 0600pm-1000pm 04/17-04/17 DSB 112', 'W 0100pm-1000pm 04/18-04/18 DSB 112', 'T 0600pm-0800pm 04/24-04/24 DSB 112', 'W 0600pm-0900pm 04/25-04/25 DSB 112', 'T 0600pm-1000pm 05/01-05/01 DSB 112', 'W 0300pm-0900pm 05/02-05/02 DSB 109', 'T 0600pm-0800pm 05/08-05/08 DSB 112']"
363,11773,Spring2018,W,13:00:00,22:00:00,1,AC 0565,Forensic Accounting,"['T 0600pm-0800pm 03/20-03/20 DSB 112', 'T 0600pm-1000pm 03/27-03/27 DSB 112', 'W 0300pm-0900pm 03/28-03/28 DSB 112', 'T 0600pm-0900pm 04/03-04/03 DSB 112', 'T 0600pm-1000pm 04/17-04/17 DSB 112', 'W 0100pm-1000pm 04/18-04/18 DSB 112', 'T 0600pm-0800pm 04/24-04/24 DSB 112', 'W 0600pm-0900pm 04/25-04/25 DSB 112', 'T 0600pm-1000pm 05/01-05/01 DSB 112', 'W 0300pm-0900pm 05/02-05/02 DSB 109', 'T 0600pm-0800pm 05/08-05/08 DSB 112']"
363,11773,Spring2018,W,15:00:00,21:00:00,1,AC 0565,Forensic Accounting,"['T 0600pm-0800pm 03/20-03/20 DSB 112', 'T 0600pm-1000pm 03/27-03/27 DSB 112', 'W 0300pm-0900pm 03/28-03/28 DSB 112', 'T 0600pm-0900pm 04/03-04/03 DSB 112', 'T 0600pm-1000pm 04/17-04/17 DSB 112', 'W 0100pm-1000pm 04/18-04/18 DSB 112', 'T 0600pm-0800pm 04/24-04/24 DSB 112', 'W 0600pm-0900pm 04/25-04/25 DSB 112', 'T 0600pm-1000pm 05/01-05/01 DSB 112', 'W 0300pm-0900pm 05/02-05/02 DSB 109', 'T 0600pm-0800pm 05/08-05/08 DSB 112']"
363,11773,Spring2018,W,18:00:00,21:00:00,1,AC 0565,Forensic Accounting,"['T 0600pm-0800pm 03/20-03/20 DSB 112', 'T 0600pm-1000pm 03/27-03/27 DSB 112', 'W 0300pm-0900pm 03/28-03/28 DSB 112', 'T 0600pm-0900pm 04/03-04/03 DSB 112', 'T 0600pm-1000pm 04/17-04/17 DSB 112', 'W 0100pm-1000pm 04/18-04/18 DSB 112', 'T 0600pm-0800pm 04/24-04/24 DSB 112', 'W 0600pm-0900pm 04/25-04/25 DSB 112', 'T 0600pm-1000pm 05/01-05/01 DSB 112', 'W 0300pm-0900pm 05/02-05/02 DSB 109', 'T 0600pm-0800pm 05/08-05/08 DSB 112']"
363,11774,Spring2018,R,07:55:00,11:55:00,2,AC 0565,Forensic Accounting,"['T 0600pm-0800pm 03/20-03/20 DSB 107', 'W 0755am-0155pm 03/28-03/28 DSB 112', 'R 0755am-1155am 03/29-03/29 DSB 2109A', 'T 0600pm-0900pm 04/03-04/03 DSB 107', 'W 0755am-0455pm 04/18-04/18 DSB 112', 'R 0755am-1155am 04/19-04/19 DSB 2109A', 'T 0600pm-0800pm 04/24-04/24 DSB 112', 'W 0600pm-0900pm 04/25-04/25 DSB 107', 'W 0755am-0155pm 05/02-05/02 DSB 2109A', 'R 0755am-1155am 05/03-05/03 DSB 2109A', 'T 0600pm-0800pm 05/08-05/08 DSB 112']"
363,11774,Spring2018,T,18:00:00,20:00:00,2,AC 0565,Forensic Accounting,"['T 0600pm-0800pm 03/20-03/20 DSB 107', 'W 0755am-0155pm 03/28-03/28 DSB 112', 'R 0755am-1155am 03/29-03/29 DSB 2109A', 'T 0600pm-0900pm 04/03-04/03 DSB 107', 'W 0755am-0455pm 04/18-04/18 DSB 112', 'R 0755am-1155am 04/19-04/19 DSB 2109A', 'T 0600pm-0800pm 04/24-04/24 DSB 112', 'W 0600pm-0900pm 04/25-04/25 DSB 107', 'W 0755am-0155pm 05/02-05/02 DSB 2109A', 'R 0755am-1155am 05/03-05/03 DSB 2109A', 'T 0600pm-0800pm 05/08-05/08 DSB 112']"
363,11774,Spring2018,T,18:00:00,21:00:00,2,AC 0565,Forensic Accounting,"['T 0600pm-0800pm 03/20-03/20 DSB 107', 'W 0755am-0155pm 03/28-03/28 DSB 112', 'R 0755am-1155am 03/29-03/29 DSB 2109A', 'T 0600pm-0900pm 04/03-04/03 DSB 107', 'W 0755am-0455pm 04/18-04/18 DSB 112', 'R 0755am-1155am 04/19-04/19 DSB 2109A', 'T 0600pm-0800pm 04/24-04/24 DSB 112', 'W 0600pm-0900pm 04/25-04/25 DSB 107', 'W 0755am-0155pm 05/02-05/02 DSB 2109A', 'R 0755am-1155am 05/03-05/03 DSB 2109A', 'T 0600pm-0800pm 05/08-05/08 DSB 112']"
363,11774,Spring2018,W,07:55:00,13:55:00,2,AC 0565,Forensic Accounting,"['T 0600pm-0800pm 03/20-03/20 DSB 107', 'W 0755am-0155pm 03/28-03/28 DSB 112', 'R 0755am-1155am 03/29-03/29 DSB 2109A', 'T 0600pm-0900pm 04/03-04/03 DSB 107', 'W 0755am-0455pm 04/18-04/18 DSB 112', 'R 0755am-1155am 04/19-04/19 DSB 2109A', 'T 0600pm-0800pm 04/24-04/24 DSB 112', 'W 0600pm-0900pm 04/25-04/25 DSB 107', 'W 0755am-0155pm 05/02-05/02 DSB 2109A', 'R 0755am-1155am 05/03-05/03 DSB 2109A', 'T 0600pm-0800pm 05/08-05/08 DSB 112']"


In [12]:
%%sql
SELECT Professor_id AS ID, program_name AS Program, Name, SUM(actual) AS Students_Taught
FROM PROFESSORS_DW    
    JOIN CLASS_FACTS_DW USING (professor_id)
    JOIN PROGRAMS_DW USING (program_id)
    JOIN COURSES_DW USING (course_id)
    GROUP BY ID
    ORDER BY Students_Taught DESC
    LIMIT 10;

 * sqlite:///CourseDataWarehouse.db
Done.


ID,Program,Name,Students_Taught
374,Nursing,Mary M Murphy,3249
380,Nursing,Katherine Marie Saracino,1611
9,Accounting,Milo W. Peck,1598
51,Biology,Christine Earls,1319
52,Biology,Anthony J. DeCristofaro,1190
457,Physics,Bidyut B. Das,1121
307,Marketing,John E. Neal,1110
984,Accounting,Victor E. Hartman,1077
297,Management,Paul P. Daulerio,1058
496,Religious Studies,John E. Thiel,1008


## Which classes are attracting <10 students? (which classes are the least popular--- ratio?)

In [None]:
%%sql
-- Find classes where actual is <10 
-- Also get the count of each value
SELECT Course_title as Course, Catalog_id, Name, COURSES_DW.CatalogYear, Term, Actual, Cap, round((Actual*1.0/Cap*1.0),2) AS Ratio
    FROM COURSES_DW
    JOIN CLASS_FACTS_DW USING (course_id)
    JOIN PROFESSORS_DW USING (professor_id)
    JOIN TIMECODES_DW USING (timecode_id)
    JOIN PROGRAMS_DW USING (program_id)
    WHERE Actual < 10 AND Ratio <0.3
    GROUP BY Course_title
    ORDER BY Actual DESC

LIMIT 20;