In [None]:
########################################################################################################################
# Filename: Generate_ANN_Results.ipynb
#
# Purpose: Generate results from different ANN models trained on paragraph 
#          classification task
#
# Author(s): Bobby (Robert) Lumpkin
#
# Library Dependencies: numpy, pandas, tensorflow, bpmll
########################################################################################################################

# Generate and Save Results from Different ANN Methods

In [1]:
import numpy as np
import pandas as pd
import os
import json
import ast
import random
import tensorflow as tf
import tensorflow_addons as tfa
from bpmll import bp_mll_loss
import sklearn_json as skljson
from sklearn.model_selection import train_test_split
from sklearn import metrics
import sys
os.chdir('C:\\Users\\rober\\OneDrive\\Documents\\STAT 6500\\Project\\NewsArticleClassification\\codes\\ANN Results')  ## Set working directory
                                                                                                                      ## to be 'ANN Results'
sys.path.append('../ThresholdFunctionLearning')    ## Append path to the ThresholdFunctionLearning directory to the interpreters
                                                   ## search path
from threshold_learning import predict_test_labels_binary    ## Import the 'predict_test_labels_binary()' function from the 
from threshold_learning import predict_labels_binary         ## threshold_learning library

## Models on Reduced Dataset (each instance has atleast one label)

In [2]:
## Load the reduced tfidf dataset
file_object = open('../BP-MLL Text Categorization/tfidf_trainTest_data_reduced.json',)
tfidf_data_reduced = json.load(file_object)
X_train_hasLabel = np.array(tfidf_data_reduced['X_train_hasLabel'])
X_test_hasLabel = np.array(tfidf_data_reduced['X_test_hasLabel'])
Y_train_hasLabel = np.array(tfidf_data_reduced['Y_train_hasLabel'])
Y_test_hasLabel = np.array(tfidf_data_reduced['Y_test_hasLabel'])

### Feed-Forward Cross-Entropy Network

In [3]:
## Start by defining and compiling the cross-entropy loss network (bpmll used later)
tf.random.set_seed(123)
num_labels = 13

model_ce_FF = tf.keras.models.Sequential([
    tf.keras.layers.Dense(32, activation = 'relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(num_labels, activation = 'sigmoid')
])

optim_func = tf.keras.optimizers.Adam(lr=0.001)

metric = tfa.metrics.HammingLoss(mode = 'multilabel', threshold = 0.5)

model_ce_FF.compile(optimizer = optim_func,
              loss = 'binary_crossentropy', metrics = metric
              )

In [4]:
tf.random.set_seed(123)
history_ce_FF_lr001 = model_ce_FF.fit(X_train_hasLabel, Y_train_hasLabel, epochs = 100,
                                validation_data = (X_test_hasLabel, Y_test_hasLabel), verbose=2)

Epoch 1/100
6/6 - 2s - loss: 0.6967 - hamming_loss: 0.4655 - val_loss: 0.6730 - val_hamming_loss: 0.4073
Epoch 2/100
6/6 - 0s - loss: 0.6624 - hamming_loss: 0.3916 - val_loss: 0.6541 - val_hamming_loss: 0.3427
Epoch 3/100
6/6 - 0s - loss: 0.6365 - hamming_loss: 0.3422 - val_loss: 0.6353 - val_hamming_loss: 0.2990
Epoch 4/100
6/6 - 0s - loss: 0.6105 - hamming_loss: 0.3003 - val_loss: 0.6157 - val_hamming_loss: 0.2736
Epoch 5/100
6/6 - 0s - loss: 0.5861 - hamming_loss: 0.2802 - val_loss: 0.5954 - val_hamming_loss: 0.2509
Epoch 6/100
6/6 - 0s - loss: 0.5562 - hamming_loss: 0.2517 - val_loss: 0.5747 - val_hamming_loss: 0.2308
Epoch 7/100
6/6 - 0s - loss: 0.5355 - hamming_loss: 0.2290 - val_loss: 0.5533 - val_hamming_loss: 0.2281
Epoch 8/100
6/6 - 0s - loss: 0.5093 - hamming_loss: 0.2111 - val_loss: 0.5323 - val_hamming_loss: 0.2142
Epoch 9/100
6/6 - 0s - loss: 0.4780 - hamming_loss: 0.1936 - val_loss: 0.5123 - val_hamming_loss: 0.2002
Epoch 10/100
6/6 - 0s - loss: 0.4527 - hamming_loss: 0.

In [5]:
## (CAUTION: DO NOT OVERWRITE EXISTING FILES) -- Convert training history to dataframe and write to a .json file 
history_ce_FF_lr001_df = pd.DataFrame(history_ce_FF_lr001.history)
#with open("Reduced Data Eval Metrics/Cross Entropy Feed Forward/history_ce_FF_lr001.json", "w") as outfile: 
#    history_ce_FF_lr001_df.to_json(outfile)

In [6]:
## Learn a threshold function and save the test error for use in future DF
Y_train_pred = model_ce_FF.predict(X_train_hasLabel)
Y_test_pred = model_ce_FF.predict(X_test_hasLabel)
t_range = (0, 1)

test_labels_binary, threshold_function = predict_test_labels_binary(Y_train_pred, Y_train_hasLabel, Y_test_pred, t_range)
ce_FF_withThreshold = metrics.hamming_loss(Y_test_hasLabel, test_labels_binary)

### Feed-Forward BP-MLL Network

In [7]:
## Start by defining and compiling the bp-mll loss network 
tf.random.set_seed(123)
model_bpmll_FF = tf.keras.models.Sequential([
    tf.keras.layers.Dense(32, activation = 'relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(num_labels, activation = 'sigmoid')
])

optim_func = tf.keras.optimizers.Adam(lr = 0.001)

model_bpmll_FF.compile(optimizer = optim_func,
              loss = bp_mll_loss, metrics = metric
              )

In [8]:
tf.random.set_seed(123)
history_bpmll_FF_lr001 = model_bpmll_FF.fit(X_train_hasLabel, Y_train_hasLabel, epochs = 100,
                validation_data = (X_test_hasLabel, Y_test_hasLabel), verbose=2)

Epoch 1/100
6/6 - 1s - loss: 0.9794 - hamming_loss: 0.3596 - val_loss: 0.9620 - val_hamming_loss: 0.4353
Epoch 2/100
6/6 - 0s - loss: 0.9434 - hamming_loss: 0.4270 - val_loss: 0.9439 - val_hamming_loss: 0.4038
Epoch 3/100
6/6 - 0s - loss: 0.9161 - hamming_loss: 0.3934 - val_loss: 0.9250 - val_hamming_loss: 0.3802
Epoch 4/100
6/6 - 0s - loss: 0.8863 - hamming_loss: 0.3571 - val_loss: 0.9059 - val_hamming_loss: 0.3531
Epoch 5/100
6/6 - 0s - loss: 0.8618 - hamming_loss: 0.3536 - val_loss: 0.8866 - val_hamming_loss: 0.3304
Epoch 6/100
6/6 - 0s - loss: 0.8277 - hamming_loss: 0.3252 - val_loss: 0.8678 - val_hamming_loss: 0.2990
Epoch 7/100
6/6 - 0s - loss: 0.8053 - hamming_loss: 0.3055 - val_loss: 0.8489 - val_hamming_loss: 0.2788
Epoch 8/100
6/6 - 0s - loss: 0.7880 - hamming_loss: 0.2898 - val_loss: 0.8311 - val_hamming_loss: 0.2570
Epoch 9/100
6/6 - 0s - loss: 0.7562 - hamming_loss: 0.2745 - val_loss: 0.8144 - val_hamming_loss: 0.2439
Epoch 10/100
6/6 - 0s - loss: 0.7367 - hamming_loss: 0.

In [32]:
## (CAUTION: DO NOT OVERWRITE EXISTING FILES) -- Convert training history to dataframe and write to a .json file 
history_bpmll_FF_lr001_df = pd.DataFrame(history_bpmll_FF_lr001.history)
#with open("Reduced Data Eval Metrics/BPMLL Feed Forward/history_bpmll_FF_lr001.json", "w") as outfile: 
#    history_bpmll_FF_lr001_df.to_json(outfile)

In [9]:
## Learn a threshold function and save the test error for use in future DF
Y_train_pred = model_bpmll_FF.predict(X_train_hasLabel)
Y_test_pred = model_bpmll_FF.predict(X_test_hasLabel)
t_range = (0, 1)

test_labels_binary, threshold_function = predict_test_labels_binary(Y_train_pred, Y_train_hasLabel, Y_test_pred, t_range)
bpmll_FF_withThreshold = metrics.hamming_loss(Y_test_hasLabel, test_labels_binary)

### Bidirectional LSTM Recurrent Network

In [10]:
## Load the pre-processed data
file_object_reduced = open('../RNN Text Categorization/RNN_data_dict_reduced.json',)
RNN_data_dict_reduced = json.load(file_object_reduced)
RNN_data_dict_reduced = ast.literal_eval(RNN_data_dict_reduced)
train_padded_hasLabel = np.array(RNN_data_dict_reduced['train_padded_hasLabel'])
test_padded_hasLabel = np.array(RNN_data_dict_reduced['test_padded_hasLabel'])
Y_train_hasLabel = np.array(RNN_data_dict_reduced['Y_train_hasLabel'])
Y_test_hasLabel = np.array(RNN_data_dict_reduced['Y_test_hasLabel'])

In [11]:
## Define the bidirectional LSTM RNN architecture
tf.random.set_seed(123)
num_labels = 13
max_length = 100
num_unique_words = 2711

model_bpmll_biLSTM = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(num_unique_words, 32, input_length = max_length),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(16, return_sequences = False, return_state = False)),
    #tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(num_labels, activation = 'sigmoid')
])

optim_func = tf.keras.optimizers.Adam(lr = 0.001)

model_bpmll_biLSTM.compile(loss = bp_mll_loss, optimizer = optim_func, metrics = metric)

In [12]:
tf.random.set_seed(123)
history_bpmll_RNN_lr001 = model_bpmll_biLSTM.fit(train_padded_hasLabel, Y_train_hasLabel, epochs = 100, 
                       validation_data = (test_padded_hasLabel, Y_test_hasLabel), verbose=2)

Epoch 1/100
6/6 - 6s - loss: 0.9963 - hamming_loss: 0.3636 - val_loss: 0.9916 - val_hamming_loss: 0.3759
Epoch 2/100
6/6 - 0s - loss: 0.9869 - hamming_loss: 0.3431 - val_loss: 0.9820 - val_hamming_loss: 0.3549
Epoch 3/100
6/6 - 0s - loss: 0.9746 - hamming_loss: 0.3475 - val_loss: 0.9679 - val_hamming_loss: 0.3584
Epoch 4/100
6/6 - 0s - loss: 0.9561 - hamming_loss: 0.3479 - val_loss: 0.9450 - val_hamming_loss: 0.3514
Epoch 5/100
6/6 - 0s - loss: 0.9259 - hamming_loss: 0.3230 - val_loss: 0.9044 - val_hamming_loss: 0.3042
Epoch 6/100
6/6 - 0s - loss: 0.8743 - hamming_loss: 0.3046 - val_loss: 0.8490 - val_hamming_loss: 0.3007
Epoch 7/100
6/6 - 0s - loss: 0.8240 - hamming_loss: 0.2985 - val_loss: 0.8097 - val_hamming_loss: 0.3007
Epoch 8/100
6/6 - 0s - loss: 0.7903 - hamming_loss: 0.2985 - val_loss: 0.7843 - val_hamming_loss: 0.3007
Epoch 9/100
6/6 - 0s - loss: 0.7678 - hamming_loss: 0.2985 - val_loss: 0.7658 - val_hamming_loss: 0.3007
Epoch 10/100
6/6 - 0s - loss: 0.7521 - hamming_loss: 0.

In [33]:
## (CAUTION: DO NOT OVERWRITE EXISTING FILES) -- Convert training history to dataframe and write to a .json file
history_bpmll_RNN_lr001_df = pd.DataFrame(history_bpmll_RNN_lr001.history)
#with open("Reduced Data Eval Metrics/BPMLL RNN/history_bpmll_RNN_lr001.json", "w") as outfile: 
#    history_bpmll_RNN_lr001_df.to_json(outfile)

In [13]:
## Learn a threshold function and save the test error for use in future DF
Y_train_pred = model_bpmll_biLSTM.predict(train_padded_hasLabel)
Y_test_pred = model_bpmll_biLSTM.predict(test_padded_hasLabel)
t_range = (0, 1)

test_labels_binary, threshold_function = predict_test_labels_binary(Y_train_pred, Y_train_hasLabel, Y_test_pred, t_range)
bpmll_RNN_withThreshold = metrics.hamming_loss(Y_test_hasLabel, test_labels_binary)

In [16]:
## (CAUTION: DO NOT OVERWRITE EXISTING FILES) -- Collect the test set hamming losses for the models 
##                                               with learned threshold functions into a df and write to .json file
val_hamming_loss_withThreshold_lr001_df = pd.DataFrame({'ce_FF_lr001' : ce_FF_withThreshold,
                                                        'bpmll_FF_lr001' : bpmll_FF_withThreshold,
                                                        'bpmll_RNN_lr001' : bpmll_RNN_withThreshold},
                                                        index = [0])

#with open("Reduced Data Eval Metrics/val_hamming_loss_withThreshold_lr001.json", "w") as outfile: 
#    val_hamming_loss_withThreshold_lr001_df.to_json(outfile)

In [15]:
val_hamming_loss_withThreshold_lr001_df

Unnamed: 0,ce_FF_lr001,bpmll_FF_lr001,bpmll_RNN_lr001
0,0.185315,0.257867,0.204545


## Models on Full Dataset (some instances have no labels)

In [17]:
## Load the full tfidf dataset
file_object = open('../BP-MLL Text Categorization/tfidf_trainTest_data.json',)
tfidf_data_full = json.load(file_object)
X_train = np.array(tfidf_data_full['X_train'])
X_test = np.array(tfidf_data_full['X_test'])
Y_train = np.array(tfidf_data_full['Y_train'])
Y_test = np.array(tfidf_data_full['Y_test'])

### Feed-Forward Cross-Entropy Network

In [18]:
## Use same architecture as the previous cross-entropy feed-forward network and train on full dataset
tf.random.set_seed(123)
num_labels = 13

model_ce_FF_full = tf.keras.models.Sequential([
    tf.keras.layers.Dense(32, activation = 'relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(num_labels, activation = 'sigmoid')
])

optim_func = tf.keras.optimizers.Adam(lr = 0.001)

model_ce_FF_full.compile(optimizer = optim_func,
              loss = 'binary_crossentropy', metrics = metric
              )

In [19]:
tf.random.set_seed(123)
history_ce_FF_lr001_full = model_ce_FF_full.fit(X_train, Y_train, epochs = 100,
                validation_data = (X_test, Y_test), verbose=2)

Epoch 1/100
7/7 - 1s - loss: 0.6951 - hamming_loss: 0.3726 - val_loss: 0.6761 - val_hamming_loss: 0.4183
Epoch 2/100
7/7 - 0s - loss: 0.6628 - hamming_loss: 0.3898 - val_loss: 0.6565 - val_hamming_loss: 0.3510
Epoch 3/100
7/7 - 0s - loss: 0.6383 - hamming_loss: 0.3398 - val_loss: 0.6372 - val_hamming_loss: 0.3093
Epoch 4/100
7/7 - 0s - loss: 0.6048 - hamming_loss: 0.2823 - val_loss: 0.6169 - val_hamming_loss: 0.2764
Epoch 5/100
7/7 - 0s - loss: 0.5818 - hamming_loss: 0.2669 - val_loss: 0.5955 - val_hamming_loss: 0.2596
Epoch 6/100
7/7 - 0s - loss: 0.5500 - hamming_loss: 0.2419 - val_loss: 0.5735 - val_hamming_loss: 0.2420
Epoch 7/100
7/7 - 0s - loss: 0.5212 - hamming_loss: 0.2177 - val_loss: 0.5510 - val_hamming_loss: 0.2292
Epoch 8/100
7/7 - 0s - loss: 0.4909 - hamming_loss: 0.2070 - val_loss: 0.5297 - val_hamming_loss: 0.2115
Epoch 9/100
7/7 - 0s - loss: 0.4653 - hamming_loss: 0.1824 - val_loss: 0.5110 - val_hamming_loss: 0.1955
Epoch 10/100
7/7 - 0s - loss: 0.4542 - hamming_loss: 0.

In [20]:
## (CAUTION: DO NOT OVERWRITE EXISTING FILES) -- Convert training history to dataframe and write to a .json file
history_ce_FF_lr001_full_df = pd.DataFrame(history_ce_FF_lr001_full.history)
#with open("Full Data Eval Metrics/Cross Entropy Feed Forward/history_ce_FF_lr001_full.json", "w") as outfile: 
#    history_ce_FF_lr001_full_df.to_json(outfile)

In [21]:
## Learn a threshold function and save the test error for use in future DF
Y_train_pred = model_ce_FF_full.predict(X_train)
Y_test_pred = model_ce_FF_full.predict(X_test)
t_range = (0, 1)

test_labels_binary, threshold_function = predict_test_labels_binary(Y_train_pred, Y_train, Y_test_pred, t_range)
ce_FF_full_withThreshold = metrics.hamming_loss(Y_test, test_labels_binary)

### LSTM Reccurrent Network

In [22]:
## Load the pre-processed data
file_object = open('../RNN Text Categorization/RNN_data_dict.json',)
RNN_data_dict = json.load(file_object)
RNN_data_dict = ast.literal_eval(RNN_data_dict)
train_padded = np.array(RNN_data_dict['train_padded'])
test_padded = np.array(RNN_data_dict['test_padded'])
Y_train = np.array(RNN_data_dict['Y_train'])
Y_test = np.array(RNN_data_dict['Y_test'])

In [23]:
## Define the LSTM RNN architecture
tf.random.set_seed(123)
num_labels = 13

model_LSTM_full = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(num_unique_words, 32, input_length = max_length),
    tf.keras.layers.LSTM(16, return_sequences = False, return_state = False),
    #tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(num_labels, activation = 'sigmoid')
])

optim_func = tf.keras.optimizers.Adam(lr = 0.001)

model_LSTM_full.compile(loss = 'binary_crossentropy', optimizer = optim_func, metrics = metric)

In [24]:
tf.random.set_seed(123)
history_ce_RNN_lr001_full = model_LSTM_full.fit(train_padded, Y_train, epochs = 100, 
               validation_data = (test_padded, Y_test), verbose=2)

Epoch 1/100
7/7 - 3s - loss: 0.6923 - hamming_loss: 0.4090 - val_loss: 0.6865 - val_hamming_loss: 0.3421
Epoch 2/100
7/7 - 0s - loss: 0.6830 - hamming_loss: 0.2950 - val_loss: 0.6763 - val_hamming_loss: 0.2043
Epoch 3/100
7/7 - 0s - loss: 0.6716 - hamming_loss: 0.1864 - val_loss: 0.6616 - val_hamming_loss: 0.1723
Epoch 4/100
7/7 - 0s - loss: 0.6542 - hamming_loss: 0.1792 - val_loss: 0.6368 - val_hamming_loss: 0.1723
Epoch 5/100
7/7 - 0s - loss: 0.6246 - hamming_loss: 0.1792 - val_loss: 0.5958 - val_hamming_loss: 0.1723
Epoch 6/100
7/7 - 0s - loss: 0.5810 - hamming_loss: 0.1792 - val_loss: 0.5540 - val_hamming_loss: 0.1723
Epoch 7/100
7/7 - 0s - loss: 0.5462 - hamming_loss: 0.1792 - val_loss: 0.5280 - val_hamming_loss: 0.1723
Epoch 8/100
7/7 - 0s - loss: 0.5233 - hamming_loss: 0.1792 - val_loss: 0.5068 - val_hamming_loss: 0.1723
Epoch 9/100
7/7 - 0s - loss: 0.5037 - hamming_loss: 0.1792 - val_loss: 0.4890 - val_hamming_loss: 0.1723
Epoch 10/100
7/7 - 0s - loss: 0.4869 - hamming_loss: 0.

In [25]:
## (CAUTION: DO NOT OVERWRITE EXISTING FILES) -- Convert training history to dataframe and write to a .json file
history_ce_RNN_lr001_full_df = pd.DataFrame(history_ce_RNN_lr001_full.history)
#with open("Full Data Eval Metrics/Cross Entropy RNN/history_ce_RNN_lr001_full.json", "w") as outfile: 
#    history_ce_RNN_lr001_full_df.to_json(outfile)

In [26]:
## Learn a threshold function and save the test error for use in future DF
Y_train_pred = model_LSTM_full.predict(train_padded)
Y_test_pred = model_LSTM_full.predict(test_padded)
t_range = (0, 1)

test_labels_binary, threshold_function = predict_test_labels_binary(Y_train_pred, Y_train, Y_test_pred, t_range)
ce_RNN_full_withThreshold = metrics.hamming_loss(Y_test, test_labels_binary)

In [27]:
ce_RNN_full_withThreshold

0.17307692307692307

In [29]:
## (CAUTION: DO NOT OVERWRITE EXISTING FILES) -- Collect the test set hamming losses for the models 
##                                               with learned threshold functions into a df and write to .json file
val_hamming_loss_withThreshold_lr001_df = pd.DataFrame({'ce_FF_full_lr001' : ce_FF_full_withThreshold,
                                                        'ce_RNN_full_lr001' : ce_RNN_full_withThreshold},
                                                        index = [0])

#with open("Full Data Eval Metrics/val_hamming_loss_withThreshold_lr001.json", "w") as outfile: 
#    val_hamming_loss_withThreshold_lr001_df.to_json(outfile)

In [30]:
val_hamming_loss_withThreshold_lr001_df

Unnamed: 0,ce_FF_full_lr001,ce_RNN_full_lr001
0,0.178686,0.173077
