In [0]:
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score, accuracy_score, log_loss
from sklearn.linear_model import LogisticRegression
from scipy.sparse import load_npz, hstack, csr_matrix
import pandas as pd
import numpy as np

In [9]:
from google.colab import drive
drive.mount('/gdrive')

Drive already mounted at /gdrive; to attempt to forcibly remount, call drive.mount("/gdrive", force_remount=True).


In [0]:
!cp "/gdrive/My Drive/Colab Notebooks/data/assistments12/X-IRT.npz" .
!cp "/gdrive/My Drive/Colab Notebooks/data/assistments12/q_mat.npz" .

In [0]:
# Load sparsely encoded datasets
X = csr_matrix(load_npz('X-IRT.npz'))
all_users = np.unique(X[:,0].toarray().flatten())
y = X[:,3].toarray().flatten()
qmat = load_npz("q_mat.npz")

In [0]:
# Student-level train-test split
kf = KFold(n_splits=5, shuffle=True)
splits = kf.split(all_users)


In [13]:
for run_id, (i_user_train, i_user_test) in enumerate(splits):
  users_train = all_users[i_user_train]
  users_test = all_users[i_user_test]
  
  X_train = X[np.where(np.isin(X[:,0].toarray().flatten(),users_train))]
  y_train = X_train[:,3].toarray().flatten()
  X_test = X[np.where(np.isin(X[:,0].toarray().flatten(),users_test))]
  y_test = X_test[:,3].toarray().flatten()
  print('fitting...')
  # model = LogisticRegression(solver="lbfgs", max_iter=400)
  model = LogisticRegression(solver="liblinear", max_iter=400)
  model.fit(X_train[:,5:], y_train) # the 5 first columns are the non-sparse dataset
  y_pred_test = model.predict_proba(X_test[:,5:])[:, 1]
  
  print(y_test)
  print(y_pred_test)
  ACC = accuracy_score(y_test, np.round(y_pred_test))
  print('ACC', ACC)
  AUC = roc_auc_score(y_test, y_pred_test)
  print('auc', AUC)
  NLL = log_loss(y_test, y_pred_test)
  print('nll', NLL)



fitting...
[1. 0. 0. ... 1. 1. 1.]
[0.83854772 0.548136   0.507902   ... 0.87551147 0.89181362 0.90565919]
ACC 0.7162960293322644
auc 0.7009618403268341
nll 0.5614215277885163
fitting...
[0. 1. 1. ... 1. 0. 0.]
[0.31407212 0.6050211  0.43505484 ... 0.51934352 0.50448033 0.42451824]
ACC 0.7231778812516587
auc 0.702742504620788
nll 0.5530072932780884
fitting...
[0. 1. 1. ... 1. 0. 0.]
[0.70988352 0.73643821 0.70103492 ... 0.77277853 0.84897564 0.79194259]
ACC 0.718331190116923
auc 0.7008133839462631
nll 0.5581835907761848
fitting...
[1. 1. 0. ... 1. 0. 1.]
[0.70201828 0.7990999  0.56906997 ... 0.73896987 0.80887529 0.8081351 ]
ACC 0.7200704192060632
auc 0.7023322210300871
nll 0.5573703062658302
fitting...
[1. 1. 1. ... 1. 0. 1.]
[0.83509916 0.45687093 0.80547224 ... 0.52265792 0.49199069 0.57810085]
ACC 0.7174168751346068
auc 0.7017377355534176
nll 0.5605451920664767
