# CMSC320 Final Project
## Regression test of the trend in average GPA of UMD CMSC courses over last two decades
### is there a grade inflation? deflation? or neither?

Group Members: Jihyo Park, Calvin Pham

In [95]:
import requests # type: ignore
import pandas as pd # type: ignore
import numpy as np # type: ignore
import json

Getting grade data for CMSC from PlanetTerp API. This will give us the list of courses we will further gather detailed yearly data from

In [96]:
# cmsc_data = requests.get(f"https://planetterp.com/api/v1/courses?department=CMSC").json()
# with open('cmsc_data.json', 'w') as f:
#     json.dump(cmsc_data, f)

Preprocessing the data

In [97]:
cmsc_df = pd.read_json('cmsc_data.json')
cmsc_df = cmsc_df.sort_values(by=['course_number'])

# only use 100, 200, 300, and 400 level courses
cmsc_df = cmsc_df[cmsc_df['course_number'].str.startswith(('1','2','3','4'))]

# drop unnecessary columns
cmsc_df.drop(columns=['professors', 'description', 'department', 'name', 'is_recent', 'geneds', 'title'], inplace=True)
# cmsc_df.dropna(subset=['average_gpa'], inplace=True)

cmsc_df.dropna(inplace=True)

cmsc_df.head(10)

Unnamed: 0,average_gpa,course_number,credits
2,3.496197,100,1.0
39,2.397251,106,4.0
16,2.527072,122,3.0
40,2.48799,131,4.0
41,2.602482,132,4.0
88,3.365746,132H,4.0
9,2.651282,198D,1.0
10,0.8,198E,1.0
4,2.471644,216,4.0
11,2.668913,250,4.0


In [98]:
courses = ['CMSC' + x for x in cmsc_df['course_number'].values]

Getting detailed grade data from PlanetTerp API for each course

In [99]:
# for x in courses:
#     grade_data = requests.get(f"https://api.planetterp.com/v1/grades?course={x}").json()
#     with open(f'./data/{x}.json', 'w') as f:
#         json.dump(grade_data, f)

Preprocessing the data

In [100]:
gpa_map = {
    'A+':   4.0,
    'A':    4.0,
    'A-':   3.7, 
    'B+':   3.3,
    'B':    3.0,
    'B-':   2.7,
    'C+':   2.3,
    'C':    2.0, 
    'C-':   1.7, 
    'D+':   1.3, 
    'D':    1.0, 
    'D-':   0.7, 
    'F':    0.0,
}

grades = gpa_map.keys()
gpas = [gpa_map[grade] for grade in grades]

In [101]:
# build a DataFrame with all course data
df = pd.DataFrame()

for x in courses:
    grade_data = pd.read_json(f'./data/{x}.json')
    grade_data.drop(columns=['professor', 'section', "Other", "W"], inplace=True)
    grade_data = grade_data.groupby(['semester']).sum().reset_index()
    grade_data['students'] = grade_data[grades].sum(axis=1)
    grade_data.drop(grade_data[grade_data['students'] == 0].index, inplace=True)
    
    avg_gpa = []
    for index, row in grade_data.iterrows():
        avg_gpa.append(np.dot(row[grades], gpas) / row['students'])
    
    grade_data['avg_gpa'] = avg_gpa
    grade_data.drop(columns=grades, inplace=True)
    grade_data['course'] = x
    
    df = pd.concat([df, grade_data])

df = df.reset_index()
df.drop(columns=['index'], inplace=True)
df.head(20)

Unnamed: 0,semester,course,students,avg_gpa
0,201208,CMSC100,23,3.826087
1,201308,CMSC100,39,3.2
2,201408,CMSC100,36,3.669444
3,201508,CMSC100,46,3.382609
4,201608,CMSC100,38,3.331579
5,201708,CMSC100,32,3.853125
6,201808,CMSC100,30,3.92
7,202108,CMSC100,68,3.907353
8,202201,CMSC100,25,3.388
9,202208,CMSC100,105,3.545714
