In [None]:
%run ./../data/load-dataset.ipynb
%run ./../various/basic-neural-network.ipynb
%matplotlib inline

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import ConfusionMatrixDisplay

In [None]:
ITERS = 100

RANDOM_SEED = 0

In [None]:
corpus, labels = df[[proc_doc_col, label_col]].T.values
X = [np.array(dictionary.doc2bow(doc)) for doc in corpus]
y = np.array([np.eye(2)[int(label)] for label in labels])
train_samples, test_samples, train_labels, test_labels = train_test_split(X, y, random_state=RANDOM_SEED)

In [None]:
hidden_layer_size = 100
iterations = ITERS + 1
alpha = 1e-3
batch_size = 32
hid_activation = Activation.RELU
out_activation = Activation.SIGMOID
dropout = 0.6
normalize = True
input_to_binary = True
skip_remaining = True
is_generator = True
random_seed = RANDOM_SEED

model = BasicSparseNeuralNetwork(len(dictionary), hidden_layer_size, epochs=iterations, alpha=alpha,
                                 batch_size=batch_size, skip_remaining=skip_remaining,
                                 hid_activation=hid_activation, out_activation=out_activation, 
                                 dropout=dropout, normalize=normalize, input_to_binary=input_to_binary, 
                                 is_generator=is_generator, random_seed=random_seed)

data = []
for _ in tqdm(model.fit(train_samples, train_labels), disable=SILENT, total=iterations):
    train_preds, test_preds = model.predict(train_samples), model.predict(test_samples)
    train_loss, train_accuracy = model.evaluate(train_preds, train_labels)
    test_loss, test_accuracy = model.evaluate(test_preds, test_labels)
    data.append({'Train Predictions': train_preds, 'Test Predictions': test_preds,
                 'Train Loss (MSE)': train_loss, 'Test Loss (MSE)': test_loss, 
                 'Train Accuracy': train_accuracy, 'Test Accuracy': test_accuracy})

results = pd.DataFrame(data)
results[['Train Loss (MSE)', 'Train Accuracy', 'Test Loss (MSE)', 'Test Accuracy']][::10].style \
            .highlight_min(subset=['Train Accuracy', 'Test Accuracy'], color='lightcoral') \
            .highlight_max(subset=['Train Accuracy', 'Test Accuracy'], color='lightgreen') \
            .highlight_min(subset=['Train Loss (MSE)', 'Test Loss (MSE)'], color='lightgreen') \
            .highlight_max(subset=['Train Loss (MSE)', 'Test Loss (MSE)'], color='lightcoral')

In [None]:
normalize = 'true'

true_preds, true_labels = [np.argmax(pred) for pred in test_preds], \
                          [np.argmax(label) for label in test_labels]
ConfusionMatrixDisplay.from_predictions(true_labels, true_preds, normalize=normalize,
                                        cmap=plt.cm.Blues, display_labels=('reliable', 'unreliable'))