# Udacity - Machine Learning Engineer Nanodegree
## Capstone Project
### Title: Development of a LSTM Network to Predict Students’ Answers on Exam Questions

### Implementation of DKT:
#### Part 1: Define constants

In [9]:
dataset = "data/ASSISTments_skill_builder_data.csv" # Dataset path
best_model_file = "saved_models/ASSISTments.best.model.weights.hdf5" # File to save the model.
train_log = "logs/dktmodel.train.log" # File to save the training log.
eval_log = "logs/dktmodel.eval.log" # File to save the testing log.
optimizer = "adagrad" # Optimizer to use
lstm_units = 250 # Number of LSTM units
batch_size = 20 # Batch size
epochs = 100 # Number of epochs to train
dropout_rate = 0.6 # Dropout rate
verbose = 1 # Verbose = {0,1,2}
testing_rate = 0.2 # Portion of data to be used for testing
validation_rate = 0.2 # Portion of training data to be used for validation

#### Part 2: Pre-processing

In [10]:
from Utils import *

dataset, num_skills = read_file(dataset)
X_train, X_val, X_test, y_train, y_val, y_test = split_dataset(dataset, validation_rate, testing_rate)

print("======== Data Summary ========")
print("Data size: %d" % len(dataset))
print("Training data size: %d" % len(X_train))
print("Validation data size: %d" % len(X_val))
print("Testing data size: %d" % len(X_test))
print("Number of skills: %d" % num_skills)
print("==============================")

Data size: 4163
Training data size: 2665
Validation data size: 666
Testing data size: 832
Number of skills: 123


#### Part 3: Building the model

In [12]:
from StudentModel import DKTModel, DataGenerator

# Create generators for training/testing/validation
train_gen = DataGenerator(X_train[0:10], y_train[0:10], num_skills, batch_size)
val_gen = DataGenerator(X_val[0:10], y_val[0:10], num_skills, batch_size)
test_gen = DataGenerator(X_test[0:10], y_test[0:10], num_skills, batch_size)

# Create model
student_model = DKTModel(num_skills=train_gen.num_skills,
                      num_features=train_gen.feature_dim,
                      optimizer=optimizer,
                      hidden_units=lstm_units,
                      batch_size=batch_size,
                      dropout_rate=dropout_rate)

#### Part 4: Train the Model

In [13]:
history = student_model.fit(train_gen,
                  epochs=epochs,
                  val_gen=val_gen,
                  verbose=verbose,
                  filepath_bestmodel=best_model_file,
                  filepath_log=train_log)

==== Training Started ====
Epoch 1/100

Epoch 00001: val_loss improved from inf to 0.65795, saving model to saved_models/ASSISTments.best.model.weights.hdf5
Epoch 2/100

Epoch 00002: val_loss did not improve
Epoch 3/100

Epoch 00003: val_loss improved from 0.65795 to 0.62957, saving model to saved_models/ASSISTments.best.model.weights.hdf5
Epoch 4/100

Epoch 00004: val_loss did not improve
Epoch 5/100

Epoch 00005: val_loss improved from 0.62957 to 0.62381, saving model to saved_models/ASSISTments.best.model.weights.hdf5
Epoch 6/100

Epoch 00006: val_loss did not improve
Epoch 7/100

Epoch 00007: val_loss did not improve
Epoch 8/100

Epoch 00008: val_loss improved from 0.62381 to 0.61989, saving model to saved_models/ASSISTments.best.model.weights.hdf5
Epoch 9/100

Epoch 00009: val_loss improved from 0.61989 to 0.61674, saving model to saved_models/ASSISTments.best.model.weights.hdf5
Epoch 10/100

Epoch 00010: val_loss did not improve
Epoch 11/100

Epoch 00011: val_loss did not improve

#### Part 5: Load the Model with the Best Validation Loss

In [14]:
student_model.load_weights(best_model_file)

#### Part 6: Test the Model

In [15]:
result = student_model.evaluate(test_gen, metrics=['auc','acc','pre'], verbose=verbose, filepath_log=eval_log)

==== Evaluation Started ====
==== Evaluation Done ====
