## Lab 4: RNN modeling of behavior and performance

### Starter Code

In [1]:
# Starter code 1
import pandas as pd
import numpy as np
import json

import scipy

filename = 'skill_builder_data_corrected.csv'
df = pd.read_csv(filename, encoding='ISO-8859-1', low_memory=False)
df = df[(df['original'] == 1) & (df['attempt_count'] == 1) & ~(df['skill_name'].isnull())]

In [8]:
# Starter code 2
response_df = pd.read_csv('correct.tsv', sep='\t').drop('Unnamed: 0', axis=1)
skill_df = pd.read_csv('skill.tsv', sep='\t').drop('Unnamed: 0', axis=1)
assistment_df = pd.read_csv('assistment_id.tsv', sep='\t').drop('Unnamed: 0', axis=1)
skill_dict = {}
with open('skill_dict.json', 'r', encoding='utf-8') as f:
    loaded = json.load(f)
    for k, v in loaded.items():
        skill_dict[k] = int(v)

skill_num = len(skill_dict) + 1 # including 0

def one_hot(skill_matrix, vocab_size):
    '''
    params:
        skill_matrix: 2-D matrix (student, skills)
        vocal_size: size of the vocabulary
    returns:
        a ndarray with a shape like (student, sequence_len, vocab_size)
    '''
    seq_len = skill_matrix.shape[1]
    result = np.zeros((skill_matrix.shape[0], seq_len, vocab_size))
    for i in range(skill_matrix.shape[0]):
        result[i, np.arange(seq_len), skill_matrix[i]] = 1.
    return result

def dkt_one_hot(skill_matrix, response_matrix, vocab_size):
    seq_len = skill_matrix.shape[1]
    skill_response_array = np.zeros((skill_matrix.shape[0], seq_len, 2 * vocab_size))
    for i in range(skill_matrix.shape[0]):
        skill_response_array[i, np.arange(seq_len), 2 * skill_matrix[i] + response_matrix[i]] = 1.
    return skill_response_array

def preprocess(skill_df, response_df, skill_num):
    skill_matrix = skill_df.iloc[:, 1:].values
    # print(skill_matrix)
    response_array = response_df.iloc[:, 1:].values
    skill_array = one_hot(skill_matrix, skill_num)

    skill_response_array = dkt_one_hot(skill_matrix, response_array, skill_num)
    return skill_array, response_array, skill_response_array
    
skill_array, response_array, skill_response_array = preprocess(skill_df, response_df, skill_num)

[[[0. 1. 0. ... 0. 0. 0.]
  [0. 1. 0. ... 0. 0. 0.]
  [0. 0. 1. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 1. 0. ... 0. 0. 0.]
  [0. 1. 0. ... 0. 0. 0.]
  [0. 1. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 1. 0. ... 0. 0. 0.]
  [0. 1. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 ...

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]

 [[0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  ...
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]
  [0. 0. 0. ... 0. 0. 0.]]]


In [3]:
# Starter code 3

# Import required packages
import keras
from keras.layers import Input, Dense, LSTM, TimeDistributed, Lambda, multiply
from keras.models import Model
from tensorflow.keras.optimizers import RMSprop, Adam  # changed from "from keras.optimizers import RMSprop, Adam"
from keras.preprocessing.sequence import pad_sequences
from keras import backend as K

# sequence prediction model (behavior)
def build_skill2skill_model(input_shape, lstm_dim=32, dropout=0.0):
    input = Input(shape=input_shape, name='input_skills')
    lstm = LSTM(lstm_dim, 
                return_sequences=True, 
                dropout=dropout,
                name='lstm_layer')(input)
    output = TimeDistributed(Dense(input_shape[-1], activation='softmax'), name='probability')(lstm)
    model = Model(inputs=[input], outputs=[output])
    adam = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, decay=0.0)
    model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])
    model.summary()
    return model

def reduce_dim(x):
    x = K.max(x, axis=-1, keepdims=True)
    return x

# performance prediction model (DKT)
def build_dkt_model(input_shape, lstm_dim=32, dropout=0.0):
    input_skills = Input(shape=input_shape, name='input_skills')
    lstm = LSTM(lstm_dim, 
                return_sequences=True, 
                dropout=dropout,
                name='lstm_layer')(input_skills)
    dense = TimeDistributed(Dense(int(input_shape[-1]/2), activation='sigmoid'), name='probability_for_each')(lstm)
    
    skill_next = Input(shape=(input_shape[0], int(input_shape[1]/2)), name='next_skill_tested')
    merged = multiply([dense, skill_next], name='multiply')
    reduced = Lambda(reduce_dim, output_shape=(input_shape[0], 1), name='reduce_dim')(merged)
    
    model = Model(inputs=[input_skills, skill_next], outputs=[reduced])
    adam = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07, decay=0.0)
    model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['accuracy'])
    model.summary()
    return model

In [6]:
# Starter code 4

# Sample Usage: skill2skill
print('skill2skill')
skill2skill_model = build_skill2skill_model((99, skill_num), lstm_dim=64)

# Sample Usage: DKT
print('dkt')
dkt_model = build_dkt_model((99, 2 * skill_num), lstm_dim=64)
    

# train skill2skill
skill2skill_model.fit(skill_array[:, 0:-1], 
                      skill_array[:, 1:],
                      epochs=20, 
                      batch_size=32, 
                      shuffle=True,
                      validation_split=0.2)

# train DKT 
dkt_model.fit([skill_response_array[:, 0:-1], skill_array[:, 1:]],
              response_array[:, 1:, np.newaxis],
              epochs=20, 
              batch_size=32, 
              shuffle=True,
              validation_split=0.2)

skill2skill
Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_skills (InputLayer)   [(None, 99, 111)]         0         
                                                                 
 lstm_layer (LSTM)           (None, 99, 64)            45056     
                                                                 
 probability (TimeDistribute  (None, 99, 111)          7215      
 d)                                                              
                                                                 
Total params: 52,271
Trainable params: 52,271
Non-trainable params: 0
_________________________________________________________________
dkt
Model: "model_3"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_skills (InputLa

#### 0. Data exploration/Pre-processing

In [4]:
# Exploring the datasets
students_df_skill = skill_df.student_id.unique().tolist()
students_df_response = response_df.student_id.unique().tolist()
students_assistment_df = assistment_df. student_id.unique().tolist()

df_reduced = df.groupby('user_id').filter(lambda x : len(x)>100)
students_df_reduced = df_reduced.user_id.unique().tolist()

(students_df_reduced == students_df_skill) and (students_df_reduced == students_df_response) and (students_df_reduced == students_assistment_df)

True

#### 1. What were the 5 most common and 5 least common skills in this dataset? What percentage of responses are associated with the most common skill?

In [5]:
# Find most and least common skills
mc_skills = df['skill_name'].value_counts()[:5].index.tolist()
lc_skills = df['skill_name'].value_counts()[-5:].index.tolist()
print('The 5 most common skills are:')
for sk in mc_skills:
    print(sk)
print('\nThe 5 least common skills are:')
for sk in lc_skills:
    print(sk)
    
# Find percentage of responses associated with the most common skill
perc_resp = 100*len(df.loc[df['skill_name']==mc_skills[0]].index)/len(df.index)
print('\nThe percentage of responses associated with the most common skill is: %5.2f' % perc_resp,'%')

The 5 most common skills are:
Equation Solving Two or Fewer Steps
Conversion of Fraction Decimals Percents
Addition and Subtraction Fractions
Addition and Subtraction Integers
Ordering Fractions

The 5 least common skills are:
Distributive Property
Finding Slope From Situation
Finding Slope from Ordered Pairs
Recognize Quadratic Pattern
Reading a Ruler or Scale

The percentage of responses associated with the most common skill is:  5.77 %


#### 2. Train the sequence prediction model using a randomly selected 70% (training set) of students' data and predict on the remaining 30% (test set). What was the overall accuracy of skill prediction in the test set? What were the top 5 hardest and easiest to predict skills? Describe the metric you chose to represent hard/easy prediction. 

In [6]:
# 70% of data used as training data
test_prop = 0.7

# Split user_ids/student_ids into 70% training and 30% test data
idx_split = np.array(skill_df.student_id.unique())
np.random.seed(42)
np.random.shuffle(idx_split)
train_students = idx_split[:int(test_prop * len(idx_split))]
test_students = idx_split[int(test_prop * len(idx_split)):]

# Split response_df
train_response_df = response_df.loc[response_df['student_id'].isin(train_students)].reset_index(drop = True)
test_response_df = response_df.loc[response_df['student_id'].isin(test_students)].reset_index(drop = True)

# Split skill_df
train_skill_df = skill_df.loc[skill_df['student_id'].isin(train_students)].reset_index(drop = True)
test_skill_df = skill_df.loc[skill_df['student_id'].isin(test_students)].reset_index(drop = True)

# Checking that the ratio of training data to test data is adequate
print('Training data is %5.3f' % (train_skill_df.shape[0]/skill_df.shape[0]),'% of total data')
print('Testing data is %5.3f' % (test_skill_df.shape[0]/skill_df.shape[0]),'% of total data')

Training data is 0.699 % of total data
Testing data is 0.301 % of total data


In [7]:
# Create RNN input arrays from training data
skill_array, response_array, skill_response_array = preprocess(train_skill_df, train_response_df, skill_num)

# Train sequence prediction model on training set
skill2skill_model = build_skill2skill_model((99, skill_num), lstm_dim=64)
skill2skill_model.fit(skill_array[:, 0:-1], 
                      skill_array[:, 1:],
                      epochs=20, 
                      batch_size=32, 
                      shuffle=True,
                      validation_split=0.2)

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_skills (InputLayer)   [(None, 99, 111)]         0         
                                                                 
 lstm_layer (LSTM)           (None, 99, 64)            45056     
                                                                 
 probability (TimeDistribute  (None, 99, 111)          7215      
 d)                                                              
                                                                 
Total params: 52,271
Trainable params: 52,271
Non-trainable params: 0
_________________________________________________________________
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x14ddb413bb0>

In [8]:
# Create RNN input arrays from testing data
test_skill_array, test_response_array, test_skill_response_array = preprocess(test_skill_df, test_response_df, skill_num)

# Make a prediction on testing data
test_predict = skill2skill_model.predict(test_skill_array[:, 0:-1])
print("Shape of prediction on test set:", test_predict.shape, ' = (student, time step, skill) \n')

# Evaulate model prediction accuracy on test data
results = skill2skill_model.evaluate(test_skill_array[:, 0:-1], test_skill_array[:, 1:])
print('\nThe overall accuracy of skill prediction in the test set is: %5.2f' % (100*results[1]),'%')

Shape of prediction on test set: (176, 99, 111)  = (student, time step, skill) 


The overall accuracy of skill prediction in the test set is: 66.80 %


#### My metric for finding the top 5 hardest and easiest to predict skills is as follows:
First I find total number of times that each skill was predicted correctly. Then I divide this by number by the total occurences of this skill in the test set. This gives me the proportion of times that each skill was predicted correctly, relative to the number of times it actually came up in the test data.

Limitation: more than 5 skills have a prediction accuracy of zero.

Possible solutions: (1) Taking into account the n best predicted skills; (2) using better hyperparameters

In [9]:
# Create 1-hot representation of predictions
test_predict_onehot = np.zeros_like(test_predict)
for i in range(test_predict.shape[0]):
    for j in range(test_predict.shape[1]):
        test_predict_onehot[i,j,np.argmax([test_predict[i,j,:]])] = 1

test_actual = test_skill_array[:, 1:]

# initialize arrays
pred_corr = np.zeros(skill_num) # total occurences of correct predictions per skill
percent_pred_corr = np.zeros(skill_num) # nr of correct predictions / nr of occurences of skill

# compute percent_pred_corr for each skill
for k in range(test_actual.shape[2]): # loop over skills
    for i in range(test_actual.shape[0]): # loop over students
        for j in range(test_actual.shape[1]): # loop over time steps
            if test_actual[i,j,k] == 1 and test_predict_onehot[i,j,k] == 1:
                pred_corr[k] = pred_corr[k] + 1
    nr_actual = sum(sum(test_actual[:,:,k]))
    if nr_actual != 0:
        percent_pred_corr[k] = pred_corr[k]/nr_actual

# Find 5 hardest skills (lowest percent_pred_corr) and 5 easiest skills (highest percent_pred_corr) to predict
idx = percent_pred_corr.argsort()
hardest = idx[:5]
easiest = idx[-5:]
easiest = easiest[::-1]

print('The 5 hardest skills to predict are:')
for i in range(5):
    print(list(skill_dict.keys())[hardest[i]])
    
print('\nThe 5 easiest skills to predict are:')
for i in range(5):
    print(list(skill_dict.keys())[easiest[i]])

The 5 hardest skills to predict are:
Box and Whisker
Surface Area Rectangular Prism
Surface Area Cylinder
Area Triangle
Area Trapezoid

The 5 easiest skills to predict are:
Circle Graph
Fraction Of
Equation Solving More Than Two Steps
Equivalent Fractions
Venn Diagram


In [10]:
# Refined metric: looking at n best predicted skills (work in progress)
n = 4

# Create weighted multi-hot representation of predictions
test_predict_multihot = np.zeros_like(test_predict)
for i in range(test_predict.shape[0]): 
    for j in range(test_predict.shape[1]):
        ind = np.argpartition(test_predict, -n)[-n:] # Find indices of 4 highest prediction probabilities
        for h in range(n):
            test_predict_multihot[i,j,ind[h]] = h # Put 4 for first predicted skill, 
                                                # 3 for second predicted skill, ...

test_actual = test_skill_array[:, 1:]

# initialize arrays
pred_corr = np.zeros(skill_num) # weighted count of correct/partially correct predictions
percent_pred_corr = np.zeros(skill_num) # nr of correct predictions / nr of occurences of skill

# compute percent_pred_corr for each skill
for k in range(test_actual.shape[2]): # loop over skills
    for i in range(test_actual.shape[0]): # loop over students
        for j in range(test_actual.shape[1]): # loop over time steps
            if test_actual[i,j,k] == 1 and test_predict_onehot[i,j,k] != 1:
                pred_corr[k] = pred_corr[k] + test_predict_multihot[i,j,k]
    nr_actual = sum(sum(test_actual[:,:,k]))*4
    if nr_actual != 0:
        percent_pred_corr[k] = pred_corr[k]/nr_actual

# Find 5 hardest skills (lowest percent_pred_corr) and 5 easiest skills (highest percent_pred_corr) to predict
idx = percent_pred_corr.argsort()
hardest = idx[:5]
easiest = idx[-5:]
easiest = easiest[::-1]

print('The 5 hardest skills to predict are:')
for i in range(5):
    print(list(skill_dict.keys())[hardest[i]])
    
print('\nThe 5 easiest skills to predict are:')
for i in range(5):
    print(list(skill_dict.keys())[easiest[i]])

KeyboardInterrupt: 

#### 3. Modify parameters of the network to increase accuracy (e.g. number of hidden nodes, optimizer, number of RNN layers, number of epochs, creating a validation set and stopping training when the validation set accuracy decreases). What were your accuracy results with respect to the hyper parameters you tuned?

In [11]:
# Create RNN input arrays from training and testing data
skill_array, response_array, skill_response_array = preprocess(train_skill_df, train_response_df, skill_num)
test_skill_array, test_response_array, test_skill_response_array = preprocess(test_skill_df, test_response_df, skill_num)

# Specify different hyperparemeter values
epochs = [10,20,40]
lstm_dims = [32,64,128]
dropouts = [0.0,0.2,0.4]

results = np.zeros((len(epochs),len(lstm_dims),len(dropouts)))

# Train sequence prediction model on training set with different hyperparemeters
for ep in range(len(epochs)):
    for dim in range(len(lstm_dims)):
        for do in range(len(dropouts)):
            
            skill2skill_model = build_skill2skill_model((99, skill_num), lstm_dim=lstm_dims[dim], dropout=dropouts[do])
            skill2skill_model.fit(skill_array[:, 0:-1], 
                      skill_array[:, 1:],
                      epochs=epochs[ep], 
                      batch_size=32, 
                      shuffle=True,
                      validation_split=0.2)
            result = skill2skill_model.evaluate(test_skill_array[:, 0:-1], test_skill_array[:, 1:])
            results[ep,dim,do] = result[1]

Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_skills (InputLayer)   [(None, 99, 111)]         0         
                                                                 
 lstm_layer (LSTM)           (None, 99, 32)            18432     
                                                                 
 probability (TimeDistribute  (None, 99, 111)          3663      
 d)                                                              
                                                                 
Total params: 22,095
Trainable params: 22,095
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Para

In [12]:
hyper_10_epochs = pd.DataFrame(results[0,:,:], columns=['lstm_dim=32','lstm_dim=64','lstm_dim=128'], index=['dropout=0.0','dropout=0.2','dropout=0.4'])
hyper_20_epochs = pd.DataFrame(results[1,:,:], columns=['lstm_dim=32','lstm_dim=64','lstm_dim=128'], index=['dropout=0.0','dropout=0.2','dropout=0.4'])
hyper_40_epochs = pd.DataFrame(results[2,:,:], columns=['lstm_dim=32','lstm_dim=64','lstm_dim=128'], index=['dropout=0.0','dropout=0.2','dropout=0.4'])

print('epochs=10\n',hyper_10_epochs)
print('\nepochs=20\n',hyper_20_epochs)
print('\nepochs=40\n',hyper_40_epochs)

epochs=10
              lstm_dim=32  lstm_dim=64  lstm_dim=128
dropout=0.0     0.346935     0.215852      0.259814
dropout=0.2     0.450872     0.472567      0.475666
dropout=0.4     0.600953     0.603593      0.580808

epochs=20
              lstm_dim=32  lstm_dim=64  lstm_dim=128
dropout=0.0     0.437041     0.555441      0.496442
dropout=0.2     0.671488     0.665404      0.634871
dropout=0.4     0.735709     0.733299      0.722624

epochs=40
              lstm_dim=32  lstm_dim=64  lstm_dim=128
dropout=0.0     0.699839     0.701102      0.703111
dropout=0.2     0.774966     0.766816      0.758379
dropout=0.4     0.804637     0.802514      0.778294


The best accuracy (80.05%) is achieved with 40 epochs, a 32-dimenional LSTM layer, and a dropout of 0.4

#### 4. Train a performance prediction model (DKT) using the same 70/30% split and report the accuracy and AUC of prediction on the 30%

In [13]:
# Create RNN input arrays from training and testing data
skill_array, response_array, skill_response_array = preprocess(train_skill_df, train_response_df, skill_num)
test_skill_array, test_response_array, test_skill_response_array = preprocess(test_skill_df, test_response_df, skill_num)

# Initialize and fit DKT model
dkt_model = build_dkt_model((99, 2 * skill_num), lstm_dim=64)
dkt_model.fit([skill_response_array[:, 0:-1], skill_array[:, 1:]],
              response_array[:, 1:, np.newaxis],
              epochs=20, 
              batch_size=32, 
              shuffle=True,
              validation_split=0.2)

# Evaulate model prediction accuracy on test data
result_dkt = dkt_model.evaluate([skill_response_array[:, 0:-1], skill_array[:, 1:]],
              response_array[:, 1:, np.newaxis])

print('\nThe overall accuracy of skill prediction in the test set is: %5.2f' % (100*result_dkt[1]),'%')

Model: "model_28"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_skills (InputLayer)      [(None, 99, 222)]    0           []                               
                                                                                                  
 lstm_layer (LSTM)              (None, 99, 64)       73472       ['input_skills[0][0]']           
                                                                                                  
 probability_for_each (TimeDist  (None, 99, 111)     7215        ['lstm_layer[0][0]']             
 ributed)                                                                                         
                                                                                                  
 next_skill_tested (InputLayer)  [(None, 99, 111)]   0           []                        

#### 5. Tune the hyper parameters of this model to improve accuracy and report your improvement with respect to the tuned parameters. Which lead to the most significant improvement?

In [14]:
# Specify different hyperparemeter values
epochs = [1,2,4]
lstm_dims = [32,64,128]
dropouts = [0.0,0.2,0.4]

results_dkt = np.zeros((len(epochs),len(lstm_dims),len(dropouts)))

# Train sequence prediction model on training set with different hyperparemeters
for ep in range(len(epochs)):
    for dim in range(len(lstm_dims)):
        for do in range(len(dropouts)):
            dkt_model = build_dkt_model((99, 2 * skill_num), lstm_dim=lstm_dims[dim], dropout=dropouts[do])
            dkt_model.fit([skill_response_array[:, 0:-1], skill_array[:, 1:]],
                          response_array[:, 1:, np.newaxis],
                          epochs=epochs[ep], 
                          batch_size=32, 
                          shuffle=True,
                          validation_split=0.2)
            result_dkt = dkt_model.evaluate([skill_response_array[:, 0:-1], skill_array[:, 1:]],
                          response_array[:, 1:, np.newaxis])
            results_dkt[ep,dim,do] = result_dkt[1]

hyper_10_epochs = pd.DataFrame(results_dkt[0,:,:], columns=['lstm_dim=32','lstm_dim=64','lstm_dim=128'], index=['dropout=0.0','dropout=0.2','dropout=0.4'])
hyper_20_epochs = pd.DataFrame(results_dkt[1,:,:], columns=['lstm_dim=32','lstm_dim=64','lstm_dim=128'], index=['dropout=0.0','dropout=0.2','dropout=0.4'])
hyper_40_epochs = pd.DataFrame(results_dkt[2,:,:], columns=['lstm_dim=32','lstm_dim=64','lstm_dim=128'], index=['dropout=0.0','dropout=0.2','dropout=0.4'])

print('epochs=10\n',hyper_10_epochs)
print('\nepochs=20\n',hyper_20_epochs)
print('\nepochs=40\n',hyper_40_epochs)

Model: "model_29"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_skills (InputLayer)      [(None, 99, 222)]    0           []                               
                                                                                                  
 lstm_layer (LSTM)              (None, 99, 32)       32640       ['input_skills[0][0]']           
                                                                                                  
 probability_for_each (TimeDist  (None, 99, 111)     3663        ['lstm_layer[0][0]']             
 ributed)                                                                                         
                                                                                                  
 next_skill_tested (InputLayer)  [(None, 99, 111)]   0           []                        

The best accuracy (85.22%) is achieved with epochs = 40, a 32-dimenional LSTM layer, and a dropout of 0.4

#### 9. Re-tune the sequence and performance predictions models to use the assignment ID instead of skill ID as the input. Keep the output the same as it was before. Compare accuracies bewteen the two different inputs for each model. (work in progress)

In [16]:
## Sequence prediction
# Preperations
assistment_df = pd.read_csv('assistment_id.tsv', sep='\t').drop('Unnamed: 0', axis=1)
assistment_dff = assistment_df
assistment_list = df[['assistment_id']].to_numpy().tolist()
assistment_list[:] = np.unique(assistment_list)
assistment_list.sort()

assistment_num = len(assistment_list)

#assignment_list = [str(x) for x in assignment_list]
assistment_dict = {}
counter = 0
for i in assistment_list:
    counter = counter+1
    assistment_dict[i] = counter

assistment_df=assistment_df.replace(assistment_dict)

# Split assignment_df into training and testing data set
train_assistment_df = assistment_df.loc[skill_df['student_id'].isin(train_students)].reset_index(drop = True)
test_assistment_df = assistment_df.loc[skill_df['student_id'].isin(test_students)].reset_index(drop = True)

# Create input arrays from training and testing data
assistment_array, response_array, assistment_response_array = preprocess(train_assistment_df, train_response_df, assistment_num)
test_assistment_array, test_response_array, test_assistment_response_array = preprocess(test_assistment_df, test_response_df, assistment_num)

In [19]:
# Specify different hyperparemeter values
epochs = [10,20,40]
lstm_dims = [32,64,128]
dropouts = [0.0,0.2,0.4]

results_assistment = np.zeros((len(epochs),len(lstm_dims),len(dropouts)))

# Train sequence prediction model on training set with different hyperparemeters
for ep in range(len(epochs)):
    for dim in range(len(lstm_dims)):
        for do in range(len(dropouts)):
            skill2skill_model = build_skill2skill_model((99, assistment_num), lstm_dim=lstm_dims[dim], dropout=dropouts[do])
            skill2skill_model.fit(assistment_array[:, 0:-1], 
                      assistment_array[:, 1:],
                      epochs=epochs[ep], 
                      batch_size=32, 
                      shuffle=True,
                      validation_split=0.2)
            result_assistment = skill2skill_model.evaluate(test_assistment_array[:, 0:-1], test_assistment_array[:, 1:])
            results_assistment[ep,dim,do] = result_assistment[1]

hyper_10_epochs = pd.DataFrame(results_assistment[0,:,:], columns=['lstm_dim=32','lstm_dim=64','lstm_dim=128'], index=['dropout=0.0','dropout=0.2','dropout=0.4'])
hyper_20_epochs = pd.DataFrame(results_assistment[1,:,:], columns=['lstm_dim=32','lstm_dim=64','lstm_dim=128'], index=['dropout=0.0','dropout=0.2','dropout=0.4'])
hyper_40_epochs = pd.DataFrame(results_assistment[2,:,:], columns=['lstm_dim=32','lstm_dim=64','lstm_dim=128'], index=['dropout=0.0','dropout=0.2','dropout=0.4'])

print('epochs=10\n',hyper_10_epochs)
print('\nepochs=20\n',hyper_20_epochs)
print('\nepochs=40\n',hyper_40_epochs)

Model: "model_58"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_skills (InputLayer)   [(None, 99, 14617)]       0         
                                                                 
 lstm_layer (LSTM)           (None, 99, 32)            1875200   
                                                                 
 probability (TimeDistribute  (None, 99, 14617)        482361    
 d)                                                              
                                                                 
Total params: 2,357,561
Trainable params: 2,357,561
Non-trainable params: 0
_________________________________________________________________


MemoryError: Unable to allocate 1.76 GiB for an array with shape (326, 99, 14617) and data type float32