In [None]:
%run ./../data/load-dataset.ipynb
%run ./../doc2vec/_load-d2v-model.ipynb
%run ./../various/_epoch_callback.ipynb
%matplotlib inline

In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import ConfusionMatrixDisplay
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import SGD

In [None]:
ITERS = 10

RANDOM_SEED = 0
if RANDOM_SEED is not None:
    import os
    os.environ['PYTHONHASHSEED'] = str(RANDOM_SEED)
    tf.random.set_seed(RANDOM_SEED)

In [None]:
corpus, labels = df[[proc_doc_col, label_col]].T.values
X = np.array([d2v_model.dv[i] for i in range(len(corpus))])
y = np.array([np.eye(2)[int(label)] for label in labels])
train_samples, test_samples, train_labels, test_labels = train_test_split(X, y, random_state=RANDOM_SEED)

In [None]:
model = Sequential()
model.add(Dense(100, input_dim=len(X.T), activation=tf.nn.relu))
model.add(Dropout(0.5))
model.add(Dense(2, activation=tf.nn.sigmoid))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

train_eval_hist = KerasEpochCallback(end_func=model.evaluate,
                                     end_args=(train_samples, train_labels), 
                                     end_kwargs={'verbose': False})
test_eval_hist = KerasEpochCallback(end_func=model.evaluate, ii
                                    end_args=(test_samples, test_labels), 
                                    end_kwargs={'verbose': False})
model_hist = model.fit(train_samples, train_labels, epochs=ITERS, batch_size=32,
                       callbacks=[train_eval_hist, test_eval_hist])

In [None]:
training_metrics = np.array(list(zip(model_hist.history['loss'], model_hist.history['accuracy'])))
train_eval_metrics = np.array(train_eval_hist.end_results)
test_eval_metrics = np.array(test_eval_hist.end_results)

data = []
for i in range(len(training_metrics)):
    training_loss, training_accuracy = training_metrics[i]
    train_loss, train_accuracy = train_eval_metrics[i]
    test_loss, test_accuracy = test_eval_metrics[i]
    data.append({'Training Loss': training_loss, 'Training Accuracy': training_accuracy,
                 'Train Loss': train_loss, 'Train Accuracy': train_accuracy,
                 'Test Loss': test_loss, 'Test Accuracy': test_accuracy})

results = pd.DataFrame(data)
results.index += 1
results[['Training Loss', 'Training Accuracy', 
         'Train Loss', 'Train Accuracy', 
         'Test Loss', 'Test Accuracy']].style \
            .highlight_min(subset=['Training Accuracy', 'Train Accuracy', 'Test Accuracy'], color='lightcoral') \
            .highlight_max(subset=['Training Accuracy', 'Train Accuracy', 'Test Accuracy'], color='lightgreen') \
            .highlight_min(subset=['Training Loss', 'Train Loss', 'Test Loss'], color='lightgreen') \
            .highlight_max(subset=['Training Loss', 'Train Loss', 'Test Loss'], color='lightcoral')

In [None]:
normalize = 'true'

true_preds, true_labels = [np.argmax(pred) for pred in model.predict(test_samples)], \
                          [np.argmax(label) for label in test_labels]
ConfusionMatrixDisplay.from_predictions(true_labels, true_preds, normalize=normalize,
                                        cmap=plt.cm.Blues, display_labels=('reliable', 'unreliable'))