In [3]:
import numpy as np
import matplotlib.pyplot as plt
import pickle
import os
import keras
import copy
import tensorflow as tf
import seaborn as sn
import pandas as pd
import keras.backend as K
from keras.models import Sequential
from keras.layers.core import Activation, Dense
from keras.layers import Flatten, LSTM, Masking
from keras.models import Model
from keras.layers import Input
from sklearn import metrics
from model_helper import *
from pathlib import Path

## 1. Load in dataset

In [None]:
cwd = Path()
training_set_path = cwd.parent / 'preprocessing' / 'training_seq_n_12_rmrp0'
dev_set_path = cwd.parent / 'preprocessing' / 'dev_seq_n_12_rmrp0'
test_set_path = cwd.parent / 'preprocessing' / 'test_seq_n_12_rmrp0'

In [None]:
with open(training_set_path, 'rb') as f:
    training_set = pickle.load(f)
with open(dev_set_path, 'rb') as f:
    dev_set = pickle.load(f)
with open(test_set_path, 'rb') as f:
    test_set = pickle.load(f)

In [None]:
X_train = np.array(training_set['X'])
Y_train = np.array(training_set['Y'])
X_dev = np.array(dev_set['X'])
Y_dev = np.array(dev_set['Y'])
X_test = np.array(test_set['X'])
Y_test = np.array(test_set['Y'])

print(Y_train)

## 2. Define GradeNet Model

In [None]:
np.random.seed(0)
tf.random.set_seed(0)
inputs = Input(shape = (12, 22))
mask = Masking(mask_value = 0.).compute_mask(inputs)
lstm0 = LSTM(20, activation='tanh', input_shape=(12, 22), kernel_initializer='glorot_normal', return_sequences = 'True')(
    inputs, mask = mask)
dense1 = Dense(100, activation='relu', kernel_initializer='glorot_normal')(lstm0)
dense2 = Dense(80, activation='relu', kernel_initializer='glorot_normal')(dense1)
dense3 = Dense(75, activation='relu', kernel_initializer='glorot_normal')(dense2)
dense4 = Dense(50, activation='relu', kernel_initializer='glorot_normal')(dense3)
dense5 = Dense(20, activation='relu', kernel_initializer='glorot_normal')(dense4)
dense6 = Dense(10, activation='relu', kernel_initializer='glorot_normal')(dense5)
flat = Flatten()(dense6)
softmax2 = Dense(10, activation='softmax', name = 'softmax2')(flat)
lstm1 = LSTM(20, activation='tanh', kernel_initializer='glorot_normal', return_sequences = True)(dense6)
lstm2 = LSTM(20, activation='tanh', kernel_initializer='glorot_normal')(lstm1)
dense7 = Dense(15, activation='relu', kernel_initializer='glorot_normal')(lstm2)
dense8 = Dense(15, activation='relu', kernel_initializer='glorot_normal')(dense7)
softmax3 = Dense(10, activation='softmax', name = 'softmax2')(dense8)

def custom_loss(layer):
    def loss(y_true,y_pred):
        loss1 = K.sparse_categorical_crossentropy(y_true, y_pred)
        loss2 = K.sparse_categorical_crossentropy(y_true, layer)
        return K.mean(loss1 + loss2, axis=-1)
    return loss

GradeNet = Model(inputs=[inputs], outputs=[softmax3])
GradeNet.compile(optimizer='adam', 
                loss='sparse_categorical_crossentropy' ,#loss=custom_loss(softmax2), -loss func seems to break it, idk how to fix it
                metrics=['sparse_categorical_accuracy'])

## 2-1. Training of GradeNet
### To load pretrained weights, please skip to 2-2.

In [None]:
history_GradeNet_all = []

In [None]:
for i in range(10):
    history_GradeNet = GradeNet.fit(X_train, Y_train, epochs=10, batch_size=256, validation_data = (X_dev, Y_dev), 
                                class_weight = {0:1, 1:1, 2:2, 3: 2, 4: 1, 5: 4, 6:2, 7: 4, 8: 8, 9: 8})
    history_GradeNet_all.append(history_GradeNet)

In [None]:
# Change weights
for i in range(10):
    history_GradeNet = GradeNet.fit(X_train, Y_train, epochs=10, batch_size=256, validation_data = (X_dev, Y_dev), 
                                class_weight = {0:1, 1:1, 2:2, 3: 4, 4: 1, 5: 4, 6: 8, 7: 8, 8: 8, 9: 8})
    history_GradeNet_all.append(history_GradeNet)

### Plot training history

In [None]:
GradeNet_history_package = plot_history(history_GradeNet_all, 'GradeNet')

### Save training results

In [None]:
# saving trained results
save_pickle(GradeNet_history_package, 'GradeNet_train_history')
GradeNet.save_weights("GradeNet.h5")

## 2-2. Loading pretrained GradeNet

In [None]:
# load model weight
GradeNet.load_weights(cwd.parent / 'model' / 'GradeNet.h5')

# load training history
history_path = cwd.parent / 'model' / 'GradeNet_train_history'
with open(history_path, 'rb') as f:
    GradeNet_history_package = pickle.load(f)

plot_history_package(GradeNet_history_package, 'GradeNet')

## 3. Analyze GradeNet Performance

### Confusion Matrix

In [None]:
plot_confusion_matrix(Y_train, GradeNet.predict(X_train).argmax(axis=1), title = 'Confusion matrix of GradeNet(Training set)')

In [None]:
plot_confusion_matrix(Y_dev, GradeNet.predict(X_dev).argmax(axis=1), title = 'Confusion matrix of GradeNet(Dev set)')

In [None]:
plot_confusion_matrix(Y_test, GradeNet.predict(X_test).argmax(axis=1), title = 'Confusion matrix of GradeNet(Test set)')

### F1 score

In [None]:
F1_train = metrics.f1_score(Y_train, GradeNet.predict(X_train).argmax(axis=1), average = 'macro')
print(F1_train)

In [None]:
F1_dev = metrics.f1_score(Y_dev, GradeNet.predict(X_dev).argmax(axis=1), average = 'macro')
print(F1_dev)

In [None]:
F1_test = metrics.f1_score(Y_test, GradeNet.predict(X_test).argmax(axis=1), average = 'macro')
print(F1_test)

### Accuracy and Rough accuracy

In [None]:
accuracy_train = compute_accuracy(Y_train, GradeNet.predict(X_train).argmax(axis=1))
print("Exactly accuracy rate of training set = %s" %accuracy_train[0])
print("+/-1 Accuracy rate of training set= %s" %accuracy_train[1])

In [None]:
accuracy_dev = compute_accuracy(Y_dev, GradeNet.predict(X_dev).argmax(axis=1))
print("Exactly accuracy rate of dev set = %s" %accuracy_dev[0])
print("+/-1 Accuracy rate of dev set = %s" %accuracy_dev[1])

In [None]:
accuracy_test = compute_accuracy(Y_test, GradeNet.predict(X_test).argmax(axis=1))
print("Exactly accuracy rate of test set = %s" %accuracy_test[0])
print("+/-1 Accuracy rate of test set = %s" %accuracy_test[1])

### KL divergence

In [None]:
kl = tf.keras.losses.KLDivergence()

kld_train = kl(tf.one_hot(Y_train.astype(int), depth = 10), GradeNet.predict(X_train)).numpy()
print(kld_train)

kld_dev = kl(tf.one_hot(Y_dev.astype(int), depth = 10), GradeNet.predict(X_dev)).numpy()
print(kld_dev)

kld_test = kl(tf.one_hot(Y_test.astype(int), depth = 10), GradeNet.predict(X_test)).numpy()
print(kld_test)

### Mean absolute error

In [None]:
# MAE
mae_train = np.mean(np.abs(Y_train - GradeNet.predict(X_train).argmax(axis=1)))
print(mae_train)

mae_dev = np.mean(np.abs(Y_dev - GradeNet.predict(X_dev).argmax(axis=1)))
print(mae_dev)

mae_test = np.mean(np.abs(Y_test - GradeNet.predict(X_test).argmax(axis=1)))
print(mae_test)

### Classification report

In [None]:
sk_report_train = metrics.classification_report(
    digits=4,
    y_true=Y_train, 
    y_pred=GradeNet.predict(X_train).argmax(axis=1))
print(sk_report_train)

sk_report_dev = metrics.classification_report(
    digits=4,
    y_true=Y_dev, 
    y_pred=GradeNet.predict(X_dev).argmax(axis=1))
print(sk_report_dev)

sk_report_test = metrics.classification_report(
    digits=4,
    y_true=Y_test, 
    y_pred=GradeNet.predict(X_test).argmax(axis=1))
print(sk_report_test)

In [None]:
AUC_train = metrics.roc_auc_score(Y_train, GradeNet.predict(X_train), multi_class= 'ovr', average="macro")
print(AUC_train)

AUC_dev = metrics.roc_auc_score(Y_dev, GradeNet.predict(X_dev), multi_class= 'ovr', average="macro")
print(AUC_dev)

AUC_test = metrics.roc_auc_score(Y_test, GradeNet.predict(X_test), multi_class= 'ovr', average="macro")
print(AUC_test)