In [None]:
%run ./../data/load-dataset.ipynb
%run ./../various/_epoch-callback.ipynb
%matplotlib inline

In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import ConfusionMatrixDisplay
from tensorflow import SparseTensor
from tensorflow.keras import Input
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

In [None]:
ITERS = 10

RANDOM_SEED = 0

In [None]:
corpus, labels = df[[proc_doc_col, label_col]].T.values
X = [dictionary.doc2bow(doc) for doc in corpus]
y = np.array([np.eye(2)[int(label)] for label in labels])
train_samples, test_samples, train_labels, test_labels = train_test_split(X, y, random_state=RANDOM_SEED)

In [None]:
_ = {(row, col): val for row, sample in enumerate(train_samples) for (col, val) in sample}
train_tensor = SparseTensor(indices=list(_.keys()), values=list(_.values()),
                            dense_shape=(len(train_samples), len(dictionary)))
_ = {(row, col): val for row, sample in enumerate(test_samples) for (col, val) in sample}
test_tensor = SparseTensor(indices=list(_.keys()), values=list(_.values()),
                           dense_shape=(len(test_samples), len(dictionary)))
del(_)

In [None]:
model = Sequential()
model.add(Input(shape=len(dictionary), sparse=True))
model.add(Dense(128, activation=tf.nn.relu))
model.add(Dropout(0.5))
model.add(Dense(2, activation=tf.nn.sigmoid))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

training_set_hist = KerasEpochCallback(end_func=model.evaluate,
                                       end_args=(train_tensor, train_labels), 
                                       end_kwargs={'verbose': False})
test_set_hist = KerasEpochCallback(end_func=model.evaluate,
                                   end_args=(test_tensor, test_labels), 
                                   end_kwargs={'verbose': False})
model.summary()

In [None]:
training_hist = model.fit(train_tensor, train_labels, epochs=ITERS, batch_size=32,
                          callbacks=[training_set_hist, test_set_hist])

training_metrics = np.array(list(zip(training_hist.history['loss'], training_hist.history['accuracy'])))
training_set_metrics = np.array(training_set_hist.end_results)
test_set_metrics = np.array(test_set_hist.end_results)

results = [{'Training Loss': training_loss, 'Training Accuracy': training_acc,
            'Training Set Loss': training_set_loss, 'Training Set Accuracy': training_set_acc,
            'Test Set Loss': test_set_loss, 'Test Set Accuracy': test_set_acc} 
           for (training_loss, training_acc), 
               (training_set_loss, training_set_acc), 
               (test_set_loss, test_set_acc) 
           in zip(training_metrics, training_set_metrics, test_set_metrics)]

results_df = pd.DataFrame(results)
results_df.index += 1
results_df[['Training Loss', 'Training Accuracy', 
            'Training Set Loss', 'Training Set Accuracy', 
            'Test Set Loss', 'Test Set Accuracy']].style \
                .highlight_min(subset=['Training Accuracy', 'Training Set Accuracy', 'Test Set Accuracy'],
                               color='lightcoral') \
                .highlight_max(subset=['Training Accuracy', 'Training Set Accuracy', 'Test Set Accuracy'],
                               color='lightgreen') \
                .highlight_min(subset=['Training Loss', 'Training Set Loss', 'Test Set Loss'], 
                               color='lightgreen') \
                .highlight_max(subset=['Training Loss', 'Training Set Loss', 'Test Set Loss'], 
                               color='lightcoral')

In [None]:
ax = plt.gca()
ax.plot(np.arange(0, ITERS), training_metrics.T[0], '-', label="During Training")
ax.plot(np.arange(0, ITERS), training_set_metrics.T[0], '--', label="On Training Set")
ax.plot(np.arange(0, ITERS), test_set_metrics.T[0], ':', label="On Test Set")
ax.legend()
plt.title("Loss")
plt.tight_layout()

In [None]:
ax = plt.gca()
ax.plot(np.arange(0, ITERS), training_metrics.T[1], '-', label="During Training")
ax.plot(np.arange(0, ITERS), training_set_metrics.T[1], '--', label="On Training Set")
ax.plot(np.arange(0, ITERS), test_set_metrics.T[1], ':', label="On Test Set")
ax.legend()
plt.title("Accuracy")
plt.tight_layout()

In [None]:
test_preds = model.predict(test_tensor)
true_preds, true_labels = [np.argmax(pred) for pred in test_preds], \
                          [np.argmax(label) for label in test_labels]
ConfusionMatrixDisplay.from_predictions(true_labels, true_preds, normalize='true',
                                        cmap=plt.cm.Blues, display_labels=('reliable', 'unreliable'))