# Start Here

In [1]:
!pip install yaspin




[notice] A new release of pip is available: 24.0 -> 24.1.2
[notice] To update, run: C:\Users\Andrew\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.8_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [2]:
# static path variables
paths = {
    "ASSIST09" : "data/ASSISTments2009/",
}
wranglers = {
    "ASSIST09" : "data/ASSISTments2009/assistments09_wrangler", 
}
models = { #might not need this
    "IRT" : "models/IRT/IRT",
}
prepper_path = "data/dataPrepper"

In [3]:
import os

def checkExists(path):
    #remove the .py file if it exists
    if(os.path.exists(path)):
        print("found " + path)
        return True
    else:
        print("couldn't find " + path)
        print(f"Try running: !jupyter nbconvert --to script {path[:-2] + 'ipynb'}")
        return False

In [4]:
# A temporary measure while working in notebooks
# Ensure all necesary scripts have been converted from notebooks
allExists = True

allExists = checkExists(prepper_path + ".py") and allExists

for key in models.keys():
    allExists = checkExists(models[key] + ".py") and allExists

for key in wranglers.keys():
    allExists = checkExists(wranglers[key] + ".py") and allExists

if(not allExists):
    print("\nMissing critical .py file(s), see above")

found data/dataPrepper.py
found models/IRT/IRT.py
found data/ASSISTments2009/assistments09_wrangler.py


In [5]:
#!jupyter nbconvert --to script data/dataPrepper.ipynb

In [6]:
modelList ={
    "1":"IRT",
}

datasetList = {
    "1":"ASSIST09",
}

runTypeList = {
    "1":"basic"
}

model = modelList[input(f"Select a model to test:\n" + "\n".join([f'{k}.{modelList[k]}' for k in modelList.keys()]))]
dataset = datasetList[input(f"Select a dataset to test on:\n" + "\n".join([f'{k}.{datasetList[k]}' for k in datasetList.keys()]))]
runType = runTypeList[input(f"Select a run condition:\n" + "\n".join([f'{k}.{runTypeList[k]}' for k in runTypeList.keys()]))]

print(f"Performing test run of {model} using {dataset} in {runType} format")

Select a model to test:
1.IRT 1
Select a dataset to test on:
1.ASSIST09 1
Select a run condition:
1.basic 1


Performing test run of IRT using ASSIST09 in basic format


In [7]:
from data import dataPrepper as prep
from yaspin import yaspin
from models.IRT import IRT

with yaspin(text="Preparing Data...") as spinner:
    Q, data = prep.prepareData(dataset = dataset, runType = runType, model = model)
    spinner.ok("✔️ ")

✔️  Preparing Data...


In [8]:
modelFuncs = {
    "IRT": IRT.run_IRT,
}

In [9]:
import numpy as np
import time
from datetime import datetime


#find average training correct/incorrect ratio per student (should be ~0.50)
avg_train_score = []
avg_test_score = []
for d in data:
    avg_train_score.append(d["train"].groupby('user_id')["score"].mean().mean())
    avg_test_score.append(d["test"].groupby('user_id')["score"].mean().mean())

result_object = {
    "model" : model,
    "runType" : runType,
    "dataset" : dataset,
    "test_correct_ratio":np.mean(avg_test_score),
    "train_correct_ratio":np.mean(avg_train_score),
}

accs, aucs, maes, rmses, times = [], [], [], [], []

#run each provided data configuration and collect statistics
with yaspin(text="Evaluating...") as spinner:
    for run in data:
        start_timer = time.time()
        acc, auc, mae, rmse = modelFuncs[model](run["train"], run["test"])
        end_timer = time.time()
        
        accs.append(acc)
        aucs.append(auc)
        maes.append(mae)
        rmses.append(rmse)
        times.append(end_timer - start_timer)
    spinner.ok("✔️ ")

#format the output
result_object['ACC'] = np.mean(accs)
result_object['AUC'] = np.mean(aucs)
result_object['MAE'] = np.mean(maes)
result_object['RMSE'] = np.mean(rmses)
result_object['ACC_std'] = np.std(accs)
result_object['AUC_std'] = np.std(aucs)
result_object['MAE_std'] = np.std(maes)
result_object['RMSE_std'] = np.std(rmses)
result_object['avg_train_duration'] = np.mean(times)
result_object['performed_at'] = datetime.today().strftime('%Y-%m-%d %H:%M:%S')

⠙ Evaluating... 

evaluating:  94%|████████████████████████████████████████████████████████▌   | 66612/70674 [00:00<00:00, 664190.28it/s]

⠹ Evaluating... 

evaluating: 100%|████████████████████████████████████████████████████████████| 70674/70674 [00:00<00:00, 695808.27it/s]

Trained on IRT for RMSE: 0.490564, MAE: 0.411051, ACC: 0.628732, AUC: 0.628740




                
⠼ Evaluating... 

evaluating:   0%|                                                                            | 0/69768 [00:00<?, ?it/s]

⠴ Evaluating... 

evaluating: 100%|████████████████████████████████████████████████████████████| 69768/69768 [00:00<00:00, 691481.84it/s]

⠦ Evaluating... or RMSE: 0.491868, MAE: 0.411144, ACC: 0.633428, AUC: 0.626271





⠇ Evaluating... 

evaluating:   0%|                                                                            | 0/68971 [00:00<?, ?it/s]

⠏ Evaluating... 

evaluating: 100%|████████████████████████████████████████████████████████████| 68971/68971 [00:00<00:00, 692515.86it/s]

Trained on IRT for RMSE: 0.492899, MAE: 0.410087, ACC: 0.632309, AUC: 0.625618





⠧ Evaluating... 

evaluating:   0%|                                                                            | 0/68125 [00:00<?, ?it/s]

⠇ Evaluating... 

evaluating: 100%|████████████████████████████████████████████████████████████| 68125/68125 [00:00<00:00, 676896.49it/s]

Trained on IRT for RMSE: 0.492899, MAE: 0.411765, ACC: 0.631912, AUC: 0.621786





⠇ Evaluating... 

evaluating:   0%|                                                                            | 0/67320 [00:00<?, ?it/s]

⠏ Evaluating... 

evaluating: 100%|████████████████████████████████████████████████████████████| 67320/67320 [00:00<00:00, 582021.09it/s]

⠋ Evaluating... 




Trained on IRT for RMSE: 0.491566, MAE: 0.410679, ACC: 0.632145, AUC: 0.626594
✔️  Evaluating...


In [10]:
import pandas as pd

#print the output to a csv
results_path = "results.csv"

#if the csv doesn't exist create it
if not os.path.exists(results_path):
    df = pd.DataFrame([result_object])
else:
    #pull in the csv as a dataframe
    df = pd.read_csv(results_path)
    #remove any existing row that has the same...
        #model, runtype, and dataset
    df = df[~((df['model'] == model) & (df['runType'] == runType) & (df['dataset'] == dataset))]
    #add the new row
    df = pd.concat([df, pd.DataFrame(result_object, index=[1])], ignore_index=True)

print(df)
df.to_csv(results_path, index=False)







  model runType   dataset  test_correct_ratio  train_correct_ratio       ACC  \
0   IRT   basic  ASSIST09             0.60004             0.600131  0.631705   

        AUC       MAE      RMSE  avg_train_duration         performed_at  \
0  0.625802  0.410945  0.491959           48.941547  2024-07-24 10:19:50   

    ACC_std   AUC_std   MAE_std  RMSE_std  
0  0.001575  0.002265  0.000553  0.000881  
