### Confirm we can use a GPU to run the model

In [7]:
import tensorflow as tf

In [8]:
gpus = tf.config.list_physical_devices('GPU')
logical_gpus = tf.config.experimental.list_logical_devices('GPU')
print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")

0 Physical GPUs, 0 Logical GPU


In [9]:
from tensorflow.python.client import device_lib

In [10]:
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 13805922394068669191]

In [11]:
tf.test.gpu_device_name()

''

In [12]:
tf.config.list_physical_devices("GPU")

[]

### Define constants

In [26]:
data = "data/kuze_data/evaluations_per_ans_with_taxonomy_ids_PPL.csv"
factorized_taxonomies = "data/kuze_data/factorized_math_taxonomies.csv"
verbose = 1
best_model_weights = "weights/bestmodel"
log_dir = "logs"
optimizer = "adam"
lstm_units = 100
batch_size = 32
epochs = 100
dropout_rate = 0.3
test_fraction = 0.2
validation_fraction = 0.2

### Pre-processing

In [14]:
import sys

sys.path.append('/home/grenouille/Documents/jenga/final_project/code/kuze_dkt_imp')

In [15]:
from deepkt import deepkt, data_util, metrics

dataset, length, nb_features, nb_taxonomies = data_util.load_dataset(data, factorized_taxonomies, batch_size=batch_size, shuffle=True)

train_set, test_set, val_set = data_util.split_dataset(dataset=dataset, total_size=length, test_fraction=test_fraction, val_fraction=validation_fraction)

set_size = length * batch_size

test_set_size = (set_size * test_fraction)

val_set_size = (set_size - test_set_size) * validation_fraction

train_set_size = set_size - test_set_size - val_set_size

print("============== Data Summary ==============")
print("Total number of students: %d" % set_size)
print("Training set size: %d" % train_set_size)
print("Validation set size: %d" % val_set_size)
print("Testing set size: %d" % test_set_size)
print("Number of skills: %d" % nb_taxonomies)
print("Number of features in the input: %d" % nb_features)
print("========================================= ")

  if (await self.run_code(code, result,  async_=asy)):


Total number of students: 1120
Training set size: 716
Validation set size: 179
Testing set size: 224
Number of skills: 350
Number of features in the input: 351


### Building the model

In [16]:
student_model = deepkt.DKTModel(
        nb_features=nb_features,
        nb_taxonomies=nb_taxonomies,
        hidden_units=lstm_units,
        dropout_rate=dropout_rate)

student_model.compile(
        optimizer=optimizer,
        metrics=[
            metrics.BinaryAccuracy(),
            metrics.AUC(),
            metrics.Precision(),
            metrics.Recall()
        ])

student_model.summary()

Model: "DKTModel"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inputs (InputLayer)          [(None, None, 351)]       0         
_________________________________________________________________
masking (Masking)            (None, None, 351)         0         
_________________________________________________________________
lstm (LSTM)                  (None, None, 100)         180800    
_________________________________________________________________
outputs (TimeDistributed)    (None, None, 350)         35350     
Total params: 216,150
Trainable params: 216,150
Non-trainable params: 0
_________________________________________________________________


### Train the model

In [17]:
import tensorflow as tf
history = student_model.fit(
    dataset=train_set,
    epochs=epochs,
    verbose=verbose,
    validation_data=val_set,
    callbacks=[
        tf.keras.callbacks.CSVLogger(f"{log_dir}/train.log"),
        tf.keras.callbacks.ModelCheckpoint(best_model_weights, save_best_only=True, save_weights_only=True),
        tf.keras.callbacks.TensorBoard(log_dir=log_dir)
    ]
)



### Load the model with the best validation loss

In [18]:
student_model.load_weights(best_model_weights)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fd1556abbe0>

### Test the model

In [19]:
result = student_model.evaluate(test_set, verbose=verbose)



In [20]:
result

{'loss': 0.07880068570375443,
 'binary_accuracy': 0.5855172276496887,
 'auc': 0.6138414144515991,
 'precision': 0.5748977661132812,
 'recall': 0.5092509984970093}

In [21]:
student_model.save('student_prediction/student_prediction_model')



INFO:tensorflow:Assets written to: student_prediction/student_prediction_model/assets


INFO:tensorflow:Assets written to: student_prediction/student_prediction_model/assets
