# Learning Standards Analysis

** ! this notebook file may confidential information, clear all outputs before making public ! **

Simeon Wong  
MAT188 2023F at the University of Toronto

In [None]:
import pandas as pd
import statsmodels.api as sm
import statsmodels.formula.api as smf
import numpy as np
import wwparse

In [None]:
# import webwork learning standards
lsref = pd.read_csv('webwork_learning_standards.csv')

# parse webwork data
scores = wwparse.parse_html('mat188-2023f-ww1.html', save_csv=False)

# merge webwork data with learning standards
scores = pd.merge(scores, lsref, how='left', on=['webwork_set', 'problem_num'])

###
# other preprocessing for convenience
scores['correct'] = scores['score'] > 75

# extract skill
scores['skill'] = scores['standard'].str.split('-').str[1]
scores['skill'] = scores['skill'].astype('category')

# drop students who did not attempt the problem at all
scores.dropna(subset=['score'], inplace=True)
###

# preview
scores.head()

In [None]:
# how many problems are students getting correct within each learning standard?
scores['correct'] = scores['score'] > 75
scores.groupby(['login_name', 'standard'])['correct'].sum().reset_index()

In [None]:
# mean score by learning standard
scores.groupby('standard')['score'].mean().reset_index()

In [None]:
# are students taking more attempts (performing worse) in one skill vs the others?
smf.mixedlm('n_incor ~ skill', scores, groups=scores['login_name']).fit().summary()