# Student Course Report Analysis


In [16]:
# Imports
import pandas as pd
import os
import re
import json
import matplotlib.pyplot as plt
import functools

from scipy import stats
from os.path import join, dirname
from pymongo import MongoClient
from pprint import pprint
from dotenv import load_dotenv
from IPython.display import display, JSON, HTML

# Load environment variables
load_dotenv(join(os.path.abspath(""), "../.env.local"))

pass

In [17]:
# Connect to database
mongo_client = MongoClient(os.environ.get("MONGO_URI"))
db = mongo_client["george-beta"]

# Load data
professors = pd.DataFrame(db["professors"].find({}))
courses = pd.DataFrame(db["courses"].find({}))
sections = pd.DataFrame(db["sections"].find({}))

In [18]:
# The question text for each number
question_texts = {
    1: "My overall rating of the quality of this course is",
    2: "My overall rating of the instructor's teaching is",
    3: "The educational value of the assigned work was",
    4: "The instructor's organization of the course was",
    5: "The instructor's clarity in communicating course objectives was",
    6: "The instructor's skill in providing understandable explanations was",
    7: "The amount I learned from the course was",
    8: "The intellectual challenge presented by the course was",
    9: "The instructor's personal interest in helping students learn was",
    10: "The instructor stimulated my interest in the subject matter",
    11: "The amount of reading, homework, and other assigned work was",
    12: "The instructor was well prepared to teach class.",
    13: "The instructor encouraged students to ask questions.",
    14: "The instructor treated students with respect.",
    15: "Instructor feedback on exams/assignments was timely and helpful.",
    16: "The exams and/or evaluations were good measures of the material covered.",
    17: "My grades were determined in a fair and impartial manner.",
    18: "What grade do you think you will receive in this course?",
    19: "On average, what were the total hours spent in each 7-day week OUTSIDE of formally scheduled class time in work related to this course (including studying, reading, writing, homework, rehearsal, etc.)?"
}

# Categories of questions (subjective)
question_categories = {
    "course_quality": [1, 2, 3, 4, 5, 6],
    "course_difficulty": [7, 8, 9, 10, 11, 19],
    "instructor_quality": [12, 13, 14, 15, 16],
    "grades": [17, 18]
}

In [19]:
# Calculate the average response of every question in a category
def add_category(row, questions):
    category_sum = 0
    for question in row.report:
        if question["questionNumber"] not in questions:
            continue
        #print(question["questionNumber"])
        responses = question["responses"]
        weighted_responses = [x*(i+1) for i, x in enumerate(responses)]

        if sum(responses) == 0:
            return None
        else:
            mean_response = sum(weighted_responses) / sum(responses)

        category_sum += mean_response
        #print(responses, weighted_responses, mean_response)
    
    category_mean = category_sum / len(questions)
    
    return category_mean

for category, questions in question_categories.items():
    sections[category] = sections.apply(lambda row: add_category(row, questions), axis=1)

def get_department(dept):
    return sections[sections.apply(lambda r: re.search(f"^{dept}", r["courseId"]) is not None, axis=1 )]



In [26]:
departments = ["CS", "ECE", "RBE", "ME", "AE", "MA", "BME"]

for d in departments:
    df = get_department(d)["course_difficulty"]
    print(df.to_numpy())
    ttest = stats.ttest_ind(df.to_numpy(), get_department("CS")["course_difficulty"].to_numpy(), equal_var=False)
    print(f"{d.ljust(4)} vs CS ttest: {ttest}")
    print(f"{d.ljust(4)}: mean: {df.mean():.3f}, stddev: {df.std():.3f}")

display(get_department("RBE").sort_values(by=["course_difficulty"], ascending=False))


[3.63402602 3.66905806 3.29924304 3.47006051 3.57977855 3.82160934
 3.65976914 3.67995468 3.20243902 3.01765813 2.87301587 3.39880363
 3.19193416 3.57462229 3.39880363 3.19193416 3.57462229 4.08597204
 3.45954106 3.53176407 3.60346216 4.22550476 3.31393678 3.38131313
 3.95632343 3.66873476 3.54320119 4.33547283 4.3232591  4.26287298
 3.74388889 2.90598291 3.62962963 3.52083333 3.11904762 3.59166667
 2.79924242 3.47435897 3.85714286 3.60263158 3.14801865 3.76785714
 3.49491138 3.78156463 3.43949624 3.85416667 3.3627451  4.55178372
 4.27916667 4.62780329 3.92592593 3.65555556 3.8107313  3.84444444
 4.08194066 4.24099617 4.32621758 4.25555556 4.35634278 3.82546853
 3.64391534 3.69791667 3.36375661 3.32027778 3.80207781 3.5939958
 4.30357143 4.2206083  3.52592593 3.28787879 3.97619048 3.16666667
 4.25595238 3.67094017 4.2979798  4.13333333 3.64583333 3.49733028
 3.97904483 3.97222222 3.88425926 3.65       4.33333333 4.30876068
 4.33333333 4.33333333 4.30876068 4.33333333 4.33333333 4.30876

Unnamed: 0,_id,professorId,professorName,courseId,courseName,term,sectionType,report,course_quality,course_difficulty,instructor_quality,grades
1881,6245a7b30008c19b8aeb75e9,941964,"Bertozzi, Nicholas",RBE 596,ROBOTICS ENGINEERING PRACTICUM,201902_D,Practicum,"[{'questionNumber': 1, 'responses': [0, 0, 0, ...",5.000000,5.000000,5.000000,3.000000
5543,6245afd40008c19b8aeb8437,915536,"Radhakrishnan, Pradeep",RBE 4322,MDLNG & ANALY OF MECHATRNC SYS,202102_C,Lab,"[{'questionNumber': 1, 'responses': [0, 0, 0, ...",4.625000,4.736111,4.500000,3.250000
3207,6245aad10008c19b8aeb7b17,1020938,"Lewin, Gregory",RBE 2002,UNIFIED ROBOTICS II: SENSING,202001_B,Lab,"[{'questionNumber': 1, 'responses': [0, 0, 3, ...",4.371212,4.730303,4.709091,3.568182
1857,6245a7aa0008c19b8aeb75d1,779069,"Popovic, Marko",RBE 595,ST:SPACE & PLANETARY ROBOTICS,201901_A,Lecture,"[{'questionNumber': 1, 'responses': [0, 0, 0, ...",4.722222,4.666667,4.666667,3.666667
1845,6245a7a50008c19b8aeb75c5,779069,"Popovic, Marko",RBE 595,ST:SPACE & PLANETARY ROBOTICS,201901_A,Lecture,"[{'questionNumber': 1, 'responses': [0, 0, 0, ...",4.722222,4.666667,4.666667,3.666667
...,...,...,...,...,...,...,...,...,...,...,...,...
5599,6245afec0008c19b8aeb846f,948302,"Morato, Carlos",RBE 595,ST: Artificial Intel for AV,202102_D,ADLN,"[{'questionNumber': 1, 'responses': [4, 1, 2, ...",1.981481,2.384259,2.222222,2.611111
1882,6245a7b40008c19b8aeb75ea,915217,"Nafziger, John",RBE 596,ROBOTICS ENGINEERING PRACTICUM,201902_D,Practicum,"[{'questionNumber': 1, 'responses': [0, 0, 0, ...",5.000000,,5.000000,3.000000
3239,6245aae10008c19b8aeb7b37,941964,"Bertozzi, Nicholas",RBE 596,ROBOTICS ENGINEERING PRACTICUM,202001_B,Practicum,"[{'questionNumber': 1, 'responses': [0, 0, 0, ...",5.000000,,5.000000,3.000000
5647,6245affe0008c19b8aeb849f,719331,"Agheli Hajiabadi, Mohammad Mahdi",RBE 596,ROBOTICS ENGINEERING PRACTICUM,202102_D,Practicum,"[{'questionNumber': 1, 'responses': [0, 0, 0, ...",4.833333,,5.000000,3.000000


In [3]:
def is_prime(n):
    for x in range(2, int(n**.5 + 1)):
        if n%x == 0: 
            return False

    return True

for x in range(41):
    if not is_prime(x**2 + x + 41):
        print(x)

40
