In [131]:
import json
from sklearn.preprocessing import MultiLabelBinarizer
import numpy as np
import tensorflow as tf
import pprint as pp
import math
import random
import matplotlib.pyplot as plt
import os
from collections import Counter
%matplotlib inline
np.set_printoptions(threshold=np.nan)
import pickle
def save_obj(obj, name ):
    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f)
        
def load_obj(name ):
    with open(name + '.pkl', 'rb') as f:
        return pickle.load(f)

In [132]:
'''
Loading data from file
'''
filepath = "../data/student_vectors_n_task_10_n_limit_10000.json"
student_vectors = json.load(open(filepath))
#filepath2 = "../../../student_vectors_n_task_10_n_limit_100000.json"

In [133]:
'''
Generate one hot encoding from task IDs
'''
#Collect task IDs
task_ids = []
for i in student_vectors:
    for j in student_vectors[i]:
        if j['task_id'] not in task_ids:
            task_ids.append(j['task_id'])

#pre-processing for using MultiLabelBinarizer
temp_ids = [[x] for x in task_ids]

#generating encodings
enc = MultiLabelBinarizer()
task_ids_1hot = (enc.fit_transform(temp_ids)).astype(float)
task_ids_dict = dict(zip(task_ids, task_ids_1hot))

In [155]:
'''
Split train and test student_vectors
'''
split = int(0.8*len(student_vectors))
train_student_vectors = {}
test_student_vectors = {}

for idx,keys in enumerate(student_vectors):
    if(idx < split):
        train_student_vectors[keys] = student_vectors[keys]
    else:
        test_student_vectors[keys] = student_vectors[keys]
length_interaction_vector = 20

In [220]:
'''
Gather Training Data from train_student_vectors
train_X, train_Y, train_Seqlen
'''
train_sequences = {}
train_sequences['overall'] = []
incorrect_vec = np.zeros((len(task_ids)), dtype=np.float)
temp_seqlen = {}
temp_seqlen['overall'] = []

for i in task_ids: #go per task IDs
    train_sequences[i] = []
    temp_seqlen[i] = []
    for j in train_student_vectors: #go per student
        temp = [] #one student sequence
        temp.append(np.concatenate([incorrect_vec,incorrect_vec])) #for getting first output
        for k in train_student_vectors[j]: #per question
            if(k['second_try'] == False and k['task_id'] == i):
                if(k['correct'] == True):
                    temp.append(np.concatenate([task_ids_dict[k['task_id']],incorrect_vec]))
                else:
                    temp.append(np.concatenate([incorrect_vec,task_ids_dict[k['task_id']]]))
        if(len(temp) > 1):
            train_sequences['overall'].append(temp)
            train_sequences[i].append(temp)
            random.shuffle(train_sequences['overall'])
            temp_seqlen['overall'].append(len(temp)-1)
            temp_seqlen[i].append(len(temp)-1)

print("Maximum sequence length is: "+ str(max(temp_seqlen['overall'])))

Maximum sequence length is: 56


In [222]:
'''continued
Gather Training Data
train_X, train_Y, train_Seqlen, train_mask
'''
train_X = {}
train_Y = {}
train_Seqlen = {}
train_mask = {}

for i in task_ids:
    train_X[i] = np.zeros(shape=(len(train_sequences[i]),max(temp_seqlen[i]),len(train_sequences[i][0][0])),dtype=float)
    train_Y[i] = np.zeros(shape=(len(train_sequences[i]),max(temp_seqlen[i]),len(train_sequences[i][0][0])),dtype=float)
    train_Seqlen[i] = np.zeros(shape=(len(train_sequences[i])),dtype=int)
    train_mask[i] = np.zeros(shape=(len(train_sequences[i]),1),dtype=int)
    for idx, seq in enumerate(train_sequences[i]):
        vec1 = np.concatenate([task_ids_dict[i],incorrect_vec])
        vec2 = np.concatenate([incorrect_vec,task_ids_dict[i]])
        if(train_sequences[i][idx][1].all() == vec1.all() or train_sequences[i][idx][1].all() == vec2.all()):
            leng = len(train_sequences[i][idx])
            train_Seqlen[i][idx] = leng-1
            train_mask[i][idx] = [leng-1]
            for pos in range(leng-1):
                train_X[i][idx][pos] = train_sequences[i][idx][pos]
                train_Y[i][idx][pos] = train_sequences[i][idx][pos+1]

train_X['overall'] = np.zeros(shape =(len(train_sequences['overall']), max(temp_seqlen['overall']), len(train_sequences['overall'][0][0])),dtype=float)
train_Y['overall'] = np.zeros(shape =(len(train_sequences['overall']), max(temp_seqlen['overall']), len(train_sequences['overall'][0][0])),dtype=float)
train_Seqlen['overall'] = np.zeros(shape=(len(train_sequences['overall'])),dtype=int)
train_mask['overall'] = np.zeros(shape=(len(train_sequences['overall']),1),dtype=int)

for idx, seq in enumerate(train_sequences['overall']):
    leng = len(seq)
    train_Seqlen['overall'][idx] = leng-1
    train_mask['overall'][idx] = [leng-1]
    for pos in range(leng-1):
        train_X['overall'][idx][pos] = train_sequences['overall'][idx][pos]
        train_Y['overall'][idx][pos] = train_sequences['overall'][idx][pos+1]

In [160]:
'''
Gathering test data for evaluation metric 1 from test_student_vectors
1) Input one skill at a time and get predictions for each separately
a- calculate 10 separate AUCs
b- concatenate separate predictions to calculate 1 AUC

test_1_X, test_1_Y, test_1_Seqlen
'''
test_sequences = {}
incorrect_vec = np.zeros((len(task_ids)), dtype=np.float)
temp_seqlen = {}
temp_seqlen['overall'] = []
test_sequences['overall'] = []

for i in task_ids: #go per task IDs
    temp_seqlen[i] = []
    test_sequences[i] = []
    for j in test_student_vectors: #go per student
        temp = [] #one student sequence
        temp.append(np.concatenate([incorrect_vec,incorrect_vec])) #for getting first output
        for k in test_student_vectors[j]: #per question
            if(k['second_try'] == False and k['task_id'] == i):
                if(k['correct'] == True):
                    temp.append(np.concatenate([task_ids_dict[k['task_id']],incorrect_vec]))
                else:
                    temp.append(np.concatenate([incorrect_vec,task_ids_dict[k['task_id']]]))
        if(len(temp) > 1):
            test_sequences[i].append(temp)
            test_sequences['overall'].append(temp)
            random.shuffle(test_sequences['overall'])
            temp_seqlen['overall'].append(len(temp)-1)
            temp_seqlen[i].append(len(temp)-1)

print("Maximum sequence length is: "+ str(max(temp_seqlen['overall'])))

Maximum sequence length is: 39


In [161]:
'''continued
Gathering test data for evaluation metric 1
test_1_X, test_1_Y, test_1_Seqlen
'''
test_1_X = {}
test_1_Y = {}
test_1_Seqlen = {}
test_1_mask = {}

for i in task_ids:
    test_1_X[i] = np.zeros(shape=(len(test_sequences[i]),max(temp_seqlen[i]),len(test_sequences[i][0][0])),dtype=float)
    test_1_Y[i] = np.zeros(shape=(len(test_sequences[i]),max(temp_seqlen[i]),len(test_sequences[i][0][0])),dtype=float)
    test_1_Seqlen[i] = np.zeros(shape=(len(test_sequences[i])),dtype=int)
    test_1_mask[i] = np.zeros(shape=(len(test_sequences[i]),1),dtype=int)

    for idx, seq in enumerate(test_sequences[i]): #go per student
        vec1 = np.concatenate([task_ids_dict[i],incorrect_vec])
        vec2 = np.concatenate([incorrect_vec,task_ids_dict[i]])
        if(test_sequences[i][idx][1].all() == vec1.all() or test_sequences[i][idx][1].all() == vec2.all()):
            leng = len(test_sequences[i][idx])
            test_1_Seqlen[i][idx] = leng-1
            test_1_mask[i][idx] = [leng-1]
            for pos in range(leng-1):
                test_1_X[i][idx][pos] = test_sequences[i][idx][pos]
                test_1_Y[i][idx][pos] = test_sequences[i][idx][pos+1]

test_1_X['overall'] = np.zeros(shape=(len(test_sequences['overall']),max(temp_seqlen['overall']),len(test_sequences['overall'][0][0])),dtype=float)
test_1_Y['overall'] = np.zeros(shape=(len(test_sequences['overall']),max(temp_seqlen['overall']),len(test_sequences['overall'][0][0])),dtype=float)
test_1_Seqlen['overall'] = np.zeros(shape=(len(test_sequences['overall'])),dtype=int)
test_1_mask['overall'] = np.zeros(shape=(len(test_sequences['overall']),1),dtype=int)
for idx, seq in enumerate(test_sequences['overall']):
    leng = len(seq)
    test_1_Seqlen['overall'][idx] = leng-1
    test_1_mask['overall'][idx] = [leng-1]
    for pos in range(leng-1):
        test_1_X['overall'][idx][pos] = test_sequences['overall'][idx][pos]
        test_1_Y['overall'][idx][pos] = test_sequences['overall'][idx][pos+1]

In [162]:
'''
Gathering test data for evaluation metric 2 from test_student_vectors
2) Input the natural sequence of students and get predictions for the same
a- calculate 1 AUC with natural sequence predictions
b- filter predictions per skill, and calculate 10 separate AUCs

test_2_X, test_2_Y, test_2_Seqlen
'''
test_sequences_2 = {}
incorrect_vec = np.zeros((len(task_ids)), dtype=np.float)
temp_seqlen = {}
temp_seqlen['overall'] = []
test_sequences_2['overall'] = []

#first lets get a natural sequence in overall!, rest part will be done after getting predictions from the model
for j in test_student_vectors: #go per student
    temp = [] #one student sequence
    temp.append(np.concatenate([incorrect_vec,incorrect_vec])) #for getting first output
    for k in test_student_vectors[j]: #per question
        if(k['second_try'] == False):
            if(k['correct'] == True):
                temp.append(np.concatenate([task_ids_dict[k['task_id']],incorrect_vec]))
            else:
                temp.append(np.concatenate([incorrect_vec,task_ids_dict[k['task_id']]]))
    if(len(temp) > 1):
        test_sequences_2['overall'].append(temp)
        random.shuffle(test_sequences_2['overall'])
        temp_seqlen['overall'].append(len(temp)-1)

print("Maximum sequence length is: "+ str(max(temp_seqlen['overall'])))

Maximum sequence length is: 85


In [163]:
'''continued
Gathering test data for evaluation metric 2
test_2_X, test_2_Y, test_2_Seqlen
'''
test_2_X = {}
test_2_Y = {}
test_2_Seqlen = {}
test_2_mask = {}

test_2_X['overall'] = np.zeros(shape=(len(test_sequences_2['overall']),max(temp_seqlen['overall']),len(test_sequences_2['overall'][0][0])),dtype=float)
test_2_Y['overall'] = np.zeros(shape=(len(test_sequences_2['overall']),max(temp_seqlen['overall']),len(test_sequences_2['overall'][0][0])),dtype=float)
test_2_Seqlen['overall'] = np.zeros(shape=(len(test_sequences_2['overall'])),dtype=int)
test_2_mask['overall'] = np.zeros(shape=(len(test_sequences_2['overall']),1),dtype=int)
for idx, seq in enumerate(test_sequences_2['overall']):
    leng = len(seq)
    test_2_Seqlen['overall'][idx] = leng-1
    test_2_mask['overall'][idx] = [leng-1]
    for pos in range(leng-1):
        test_2_X['overall'][idx][pos] = test_sequences_2['overall'][idx][pos]
        test_2_Y['overall'][idx][pos] = test_sequences_2['overall'][idx][pos+1]

In [371]:
'''
let's define AUC functions
'''
import math
import numpy as np
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve

def calculate_auc (y_true, y_pred, sequence_lengths=[], plot=False, debug=False, idx = 0):
    if sequence_lengths == []:
        con1_y_true = np.zeros([len(y_true)])
        con1_y_pred = np.zeros([len(y_true)])
        index_two = idx
        right = 0
        index_one = index_two + int(length_interaction_vector/2)
        for i in range(len(y_true)): #go up to sequence length
            if(np.argmax(y_true[i]) == index_one):
                print("incorrect true label")
                con1_y_true[i] = 0.
                con1_y_pred[i] = 1.0 - y_pred[i][index_one]
            elif (np.argmax(y_true[i]) == index_two):
                print("correct true label")
                right += 1
                con1_y_true[i] = 1.
                con1_y_pred[i] = y_pred[i][index_two]
        return [roc_auc_score(con1_y_true, con1_y_pred),con1_y_pred,con1_y_true, (right/len(y_true))]
    else:
        con_y_true = np.zeros([sum(sequence_lengths), length_interaction_vector])
        con_y_pred = np.zeros([sum(sequence_lengths), length_interaction_vector],dtype=np.float)
        index = 0
        for i in range(len(y_true)): #per sequence
            for j in range(sequence_lengths[i]): #up to the sequence length
                con_y_true[index] = y_true[i][j]
                con_y_pred[index] = y_pred[i][j]
                index += 1
        con1_y_true = np.zeros([sum(sequence_lengths)])
        con1_y_pred = np.zeros([sum(sequence_lengths)])
        right = 0
        for l in range(sum(sequence_lengths)): # go per interaction vector
            index_one = np.argmax(con_y_true[l]) #detect its indices!, index_two => correct
            if(index_one >= int(length_interaction_vector/2)):
                index_two = index_one - int(length_interaction_vector/2)
            else:
                index_two = index_one
                index_one = index_one + int(length_interaction_vector/2)
            if(np.argmax(con_y_true[l]) == index_one): #true is incorrect
                con1_y_true[l] = 0.
                con1_y_pred[l] = 1.0 - con_y_pred[l][index_one]
            elif(np.argmax(con_y_true[l]) == index_two):
                right += 1
                con1_y_true[l] = 1.
                con1_y_pred[l] = con_y_pred[l][index_two]
        debug=False
        if(debug):
            print(np.c_[con1_y_true,con1_y_pred])
        fpr, tpr, thresholds = roc_curve(con1_y_true, con1_y_pred)
        #print("tpr: "+str(tpr) + ", fpr: "+str(fpr) + ", thresholds: "+str(thresholds))
        if(plot):
            return [roc_auc_score(con1_y_true, con1_y_pred),fpr,tpr]
        else:
            return [roc_auc_score(con1_y_true, con1_y_pred),(right)/sum(sequence_lengths)]

In [176]:
'''
Filter Predictions per skill and calculate separate AUCs
For evaluation algorithm 2b
'''
def extract_2(true, predictions):
    print(true)

In [154]:
'''
load and save model functions
'''
def loadmodel(session, saver, checkpoint_dir):
    session.run(tf.global_variables_initializer())
    ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
    if ckpt and ckpt.model_checkpoint_path:
        ckpt_name = os.path.basename(ckpt.model_checkpoint_path)
        saver.restore(session, os.path.join(checkpoint_dir, ckpt_name))
        print("Model restored successfully")
        return int(ckpt_name[6:])
    else:
        print("No pre-trained model exists, starting from the beginning!")
        return 0

def save(session, saver, checkpoint_dir, step):
    dir1 = os.path.join(checkpoint_dir, "model")
    saver.save(session, dir1, global_step=step)

In [345]:
'''
Build the model
'''
num_units = 50
lr = 0.001
tf.reset_default_graph()
tf.set_random_seed(1234)

#defining placeholders
x = tf.placeholder(tf.float32, [None, None, length_interaction_vector])
y = tf.placeholder(tf.float32, [None, None, length_interaction_vector])
seqlen_tf = tf.placeholder(tf.float32,[None])

#variables
converged = tf.Variable(0,trainable=False)

#dynamic RNN definition
def dynamicRNN(x):
    rnn_cell = tf.nn.rnn_cell.BasicLSTMCell(num_units)
    outputs, states = tf.nn.dynamic_rnn(rnn_cell, x, dtype=tf.float32,sequence_length=seqlen_tf)
    out_size = int(length_interaction_vector / 2)
    outputs = tf.contrib.layers.fully_connected(outputs, out_size, activation_fn = tf.nn.sigmoid, weights_initializer=tf.contrib.layers.xavier_initializer(uniform=False))
    opposites = tf.subtract(tf.ones(tf.shape(outputs)),outputs)
    outputs1 = tf.concat([outputs,opposites],2)
    return outputs1

#making predictions
pred = dynamicRNN(x)
pred = pred*y
# Define loss and optimizer
cost1 = tf.nn.sigmoid_cross_entropy_with_logits(logits=pred, labels=y)
mask = tf.cast(tf.sequence_mask(lengths = train_mask['overall'], maxlen = max(train_Seqlen['overall'])), tf.float32)
cost1 = tf.multiply(cost1,tf.transpose(mask, perm=[0, 2, 1]))
cost1 = tf.reduce_sum(cost1, 1)
cost1 /= tf.cast(train_mask['overall'],tf.float32)
cost = tf.reduce_mean(cost1)
optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(cost)

saver = tf.train.Saver(max_to_keep=5)
saver_url = 'saved_models/4_combined_model_D_small/model.ckpt'

In [331]:
'''
All the recorders
'''
predictions = {}
for i in ['train','test1','test2']:
    predictions[i] = {}
    for j in task_ids:
        predictions[i][j] = []
    predictions[i]['overall'] = []

In [347]:
'''
Training the model
'''
display_step = 20
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
    cost_prev = 1.0
    stop = False
    step = loadmodel(sess, saver,saver_url)
    print("converged = "+str(converged.eval()))
    if(sess.run(converged) == 1):
        print("Model has already converged! Stop training")
        stop = True
        
    
    while(stop == False):
        # train on train_X['overall']
        sess.run(optimizer, feed_dict={x: train_X['overall'], y: train_Y['overall'], seqlen_tf: train_Seqlen['overall']})
        step += 1
        if step % display_step == 0 or step == 1:
            # first lets check convergence
            loss = sess.run(cost, feed_dict={x: train_X['overall'], y: train_Y['overall'], seqlen_tf: train_Seqlen['overall']})
            cost_current = loss
            if cost_prev - cost_current <= 0.00005:
                stop = True
                sess.run(converged.assign(1))
                print("Model has converged!")
            else:
                cost_prev = cost_current
                
            # save the model
            #save(sess, saver, saver_url, step)

            # report the AUC scores till now
            # train AUC
            predict = sess.run(pred, feed_dict={x: train_X['overall'], y: train_Y['overall'], seqlen_tf: train_Seqlen['overall']})
            predictions['train']['overall'].append(predict)
            print("Training AUC (similar to 1b): "+str(calculate_auc(train_Y['overall'], predict, train_Seqlen['overall'])))
            print("Training AUC (similar to 1a): ")
            for i in task_ids:
                predict = sess.run(pred, feed_dict={x: train_X[i], y: train_Y[i], seqlen_tf: train_Seqlen[i]})
                predictions['train'][i].append(predict)
                print(str(i)+": "+str(calculate_auc(train_Y[i], predict, sequence_lengths= train_Seqlen[i])))
            
            # test AUC 1
            predict = sess.run(pred, feed_dict={x: test_1_X['overall'], y: test_1_Y['overall'], seqlen_tf: test_1_Seqlen['overall']})
            predictions['test1']['overall'].append(predict)
            print("Test AUC 1b: "+str(calculate_auc(test_1_Y['overall'], predict, test_1_Seqlen['overall'])))
            print("Test AUC 1a: ")
            for i in task_ids:
                predict = sess.run(pred, feed_dict={x: test_1_X[i], y: test_1_Y[i], seqlen_tf: test_1_Seqlen[i]})
                predictions['test1'][i].append(predict)
                print(str(i)+": "+str(calculate_auc(test_1_Y[i], predict, test_1_Seqlen[i])))
            
            # test AUC 2
            predict = sess.run(pred, feed_dict={x: test_2_X['overall'], y: test_2_Y['overall'], seqlen_tf: test_2_Seqlen['overall']})
            predictions['test2']['overall'].append(predict)
            print("Test AUC 2a: "+str(calculate_auc(test_2_Y['overall'], predict, test_2_Seqlen['overall'])))
#             print("Test AUC 2b: ")
            #extract_2(test_2_Y['overall'], predict)
            print("\n\n\n")

No pre-trained model exists, starting from the beginning!
converged = 0


  # Remove the CWD from sys.path while we load stuff.


Training AUC (similar to 1b): 0.532386272530059
Training AUC (similar to 1a): 
p7cfRPp-kQ.partb: 0.4176430607416092
Ok-iIHxjgx.partb: 0.6774267332132992
1zsCldT4p8.set1: 0.5169500183186315
DebcfZEEmI.proper_fractions: 0.45470743595275087
9wRCzK1G7F.partb: 0.5237828389792547
1zsCldT4p8.set2: 0.44822613216577506
nl-M69Ez9k.parta: 0.6716490562794464
kvig7fcCVc.partb: 0.34885097958241734
Ok-iIHxjgx.parta: 0.32306255835667597
hyei4uD81i.parta: 0.36158862072454967
Test AUC 1b: 0.5160816973115523
Test AUC 1a: 
p7cfRPp-kQ.partb: 0.5244224872674359
Ok-iIHxjgx.partb: 0.7043184289209025
1zsCldT4p8.set1: 0.41515326009922043
DebcfZEEmI.proper_fractions: 0.418241458677624
9wRCzK1G7F.partb: 0.5216552056942936
1zsCldT4p8.set2: 0.48960964753999764
nl-M69Ez9k.parta: 0.634175342670961
kvig7fcCVc.partb: 0.31746651785714286
Ok-iIHxjgx.parta: 0.32078858641358643
hyei4uD81i.parta: 0.4570061226071457
Test AUC 2a: 0.49738922953193365




Training AUC (similar to 1b): 0.5718729854580326
Training AUC (similar to

Test AUC 2a: 0.5877458903330284






KeyboardInterrupt: 

In [381]:
predictions['test1']['1zsCldT4p8.set1'][-1][5][:test_1_Seqlen['1zsCldT4p8.set1'][5]]

array([[6.2713712e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00],
       [8.9657325e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00],
       [9.9700743e-01, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00, 0.000000

In [380]:
test_1_Y['1zsCldT4p8.set1'][5][:test_1_Seqlen['1zsCldT4p8.set1'][5]]

array([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.]])

In [265]:
sum(test_1_Seqlen['1zsCldT4p8.set2'][:12])

64

In [268]:
test_1_Seqlen['1zsCldT4p8.set2'][9]

4

In [379]:
pat = 5
calculate_auc(test_1_Y['1zsCldT4p8.set1'][pat][:test_1_Seqlen['1zsCldT4p8.set1'][pat]], predictions['test1']['1zsCldT4p8.set1'][-1][pat][:test_1_Seqlen['1zsCldT4p8.set1'][pat]], idx = 0)

correct true label
correct true label
correct true label
correct true label
incorrect true label
incorrect true label
incorrect true label
incorrect true label
incorrect true label


[0.0, array([0.62713712, 0.89657325, 0.99700743, 0.999874  , 0.99994671,
        0.99995971, 0.99996436, 0.99996614, 0.99996674]), array([1., 1., 1., 1., 0., 0., 0., 0., 0.]), 0.4444444444444444]

In [354]:
for i in task_ids:
    print(i)

p7cfRPp-kQ.partb
Ok-iIHxjgx.partb
1zsCldT4p8.set1
DebcfZEEmI.proper_fractions
9wRCzK1G7F.partb
1zsCldT4p8.set2
nl-M69Ez9k.parta
kvig7fcCVc.partb
Ok-iIHxjgx.parta
hyei4uD81i.parta
