In [None]:
########################################################################################################################
# Filename: Generate_ANN_Results.ipynb
#
# Purpose: Generate results from different ANN models trained on paragraph 
#          classification task
#
# Author(s): Bobby (Robert) Lumpkin
#
# Library Dependencies: numpy, pandas, tensorflow, bpmll
########################################################################################################################

# Generate and Save Results from Different ANN Methods

In [1]:
import numpy as np
import pandas as pd
import os
import json
import ast
import random
import tensorflow as tf
import tensorflow_addons as tfa
from bpmll import bp_mll_loss
import sklearn_json as skljson
from sklearn.model_selection import train_test_split
from sklearn import metrics
import sys
os.chdir('C:\\Users\\rober\\OneDrive\\Documents\\STAT 6500\\Project\\NewsArticleClassification\\codes\\ANN Results')  ## Set working directory
                                                                                                                      ## to be 'ANN Results'
sys.path.append('../ThresholdFunctionLearning')    ## Append path to the ThresholdFunctionLearning directory to the interpreters
                                                   ## search path
from threshold_learning import predict_test_labels_binary    ## Import the 'predict_test_labels_binary()' function from the 
from threshold_learning import predict_labels_binary         ## threshold_learning library

## Models on Reduced Dataset (each instance has atleast one label)

In [2]:
## Load the reduced tfidf dataset
file_object = open('../BP-MLL Text Categorization/tfidf_trainTest_data_reduced.json',)
tfidf_data_reduced = json.load(file_object)
X_train_hasLabel = np.array(tfidf_data_reduced['X_train_hasLabel'])
X_test_hasLabel = np.array(tfidf_data_reduced['X_test_hasLabel'])
Y_train_hasLabel = np.array(tfidf_data_reduced['Y_train_hasLabel'])
Y_test_hasLabel = np.array(tfidf_data_reduced['Y_test_hasLabel'])

### Feed-Forward Cross-Entropy Network

In [3]:
## Start by defining and compiling the cross-entropy loss network (bpmll used later)
tf.random.set_seed(123)
num_labels = 13

model_ce_FF = tf.keras.models.Sequential([
    tf.keras.layers.Dense(32, activation = 'relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(num_labels, activation = 'sigmoid')
])

optim_func = tf.keras.optimizers.Adam(lr=0.001)

metric = tfa.metrics.HammingLoss(mode = 'multilabel', threshold = 0.5)

model_ce_FF.compile(optimizer = optim_func,
              loss = 'categorical_crossentropy', metrics = metric
              )

In [4]:
tf.random.set_seed(123)
history_ce_FF_lr001 = model_ce_FF.fit(X_train_hasLabel, Y_train_hasLabel, epochs = 100,
                                validation_data = (X_test_hasLabel, Y_test_hasLabel), verbose=2)

Epoch 1/100
6/6 - 3s - loss: 8.4710 - hamming_loss: 0.4803 - val_loss: 8.2122 - val_hamming_loss: 0.4685
Epoch 2/100
6/6 - 0s - loss: 8.1678 - hamming_loss: 0.4694 - val_loss: 8.0670 - val_hamming_loss: 0.4703
Epoch 3/100
6/6 - 0s - loss: 8.0293 - hamming_loss: 0.4672 - val_loss: 7.9268 - val_hamming_loss: 0.4747
Epoch 4/100
6/6 - 0s - loss: 7.9066 - hamming_loss: 0.4580 - val_loss: 7.7977 - val_hamming_loss: 0.4712
Epoch 5/100
6/6 - 0s - loss: 7.8850 - hamming_loss: 0.4624 - val_loss: 7.6922 - val_hamming_loss: 0.4773
Epoch 6/100
6/6 - 0s - loss: 7.7712 - hamming_loss: 0.4563 - val_loss: 7.6317 - val_hamming_loss: 0.4843
Epoch 7/100
6/6 - 0s - loss: 7.9109 - hamming_loss: 0.4650 - val_loss: 7.6316 - val_hamming_loss: 0.4948
Epoch 8/100
6/6 - 0s - loss: 8.1664 - hamming_loss: 0.4760 - val_loss: 7.7078 - val_hamming_loss: 0.5096
Epoch 9/100
6/6 - 0s - loss: 8.2705 - hamming_loss: 0.4821 - val_loss: 7.8541 - val_hamming_loss: 0.5157
Epoch 10/100
6/6 - 0s - loss: 8.6326 - hamming_loss: 0.

In [31]:
## (CAUTION: DO NOT OVERWRITE EXISTING FILES) -- Convert training history to dataframe and write to a .json file 
history_ce_FF_lr001_df = pd.DataFrame(history_ce_FF_lr001.history)
#with open("Reduced Data Eval Metrics/Cross Entropy Feed Forward/history_ce_FF_lr001.json", "w") as outfile: 
#    history_ce_FF_lr001_df.to_json(outfile)

In [5]:
## Learn a threshold function and save the test error for use in future DF
Y_train_pred = model_ce_FF.predict(X_train_hasLabel)
Y_test_pred = model_ce_FF.predict(X_test_hasLabel)
t_range = (0, 1)

test_labels_binary, threshold_function = predict_test_labels_binary(Y_train_pred, Y_train_hasLabel, Y_test_pred, t_range)
ce_FF_withThreshold = metrics.hamming_loss(Y_test_hasLabel, test_labels_binary)

### Feed-Forward BP-MLL Network

In [6]:
## Start by defining and compiling the bp-mll loss network 
tf.random.set_seed(123)
model_bpmll_FF = tf.keras.models.Sequential([
    tf.keras.layers.Dense(32, activation = 'relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(num_labels, activation = 'sigmoid')
])

optim_func = tf.keras.optimizers.Adam(lr = 0.001)

model_bpmll_FF.compile(optimizer = optim_func,
              loss = bp_mll_loss, metrics = metric
              )

In [7]:
tf.random.set_seed(123)
history_bpmll_FF_lr001 = model_bpmll_FF.fit(X_train_hasLabel, Y_train_hasLabel, epochs = 100,
                validation_data = (X_test_hasLabel, Y_test_hasLabel), verbose=2)

Epoch 1/100
6/6 - 2s - loss: 0.9794 - hamming_loss: 0.5370 - val_loss: 0.9620 - val_hamming_loss: 0.4353
Epoch 2/100
6/6 - 0s - loss: 0.9434 - hamming_loss: 0.4270 - val_loss: 0.9439 - val_hamming_loss: 0.4038
Epoch 3/100
6/6 - 0s - loss: 0.9161 - hamming_loss: 0.3934 - val_loss: 0.9250 - val_hamming_loss: 0.3802
Epoch 4/100
6/6 - 0s - loss: 0.8863 - hamming_loss: 0.3571 - val_loss: 0.9059 - val_hamming_loss: 0.3531
Epoch 5/100
6/6 - 0s - loss: 0.8618 - hamming_loss: 0.3536 - val_loss: 0.8866 - val_hamming_loss: 0.3304
Epoch 6/100
6/6 - 0s - loss: 0.8277 - hamming_loss: 0.3252 - val_loss: 0.8678 - val_hamming_loss: 0.2990
Epoch 7/100
6/6 - 0s - loss: 0.8053 - hamming_loss: 0.3055 - val_loss: 0.8489 - val_hamming_loss: 0.2788
Epoch 8/100
6/6 - 0s - loss: 0.7880 - hamming_loss: 0.2898 - val_loss: 0.8311 - val_hamming_loss: 0.2570
Epoch 9/100
6/6 - 0s - loss: 0.7562 - hamming_loss: 0.2745 - val_loss: 0.8144 - val_hamming_loss: 0.2439
Epoch 10/100
6/6 - 0s - loss: 0.7367 - hamming_loss: 0.

In [32]:
## (CAUTION: DO NOT OVERWRITE EXISTING FILES) -- Convert training history to dataframe and write to a .json file 
history_bpmll_FF_lr001_df = pd.DataFrame(history_bpmll_FF_lr001.history)
#with open("Reduced Data Eval Metrics/BPMLL Feed Forward/history_bpmll_FF_lr001.json", "w") as outfile: 
#    history_bpmll_FF_lr001_df.to_json(outfile)

In [8]:
## Learn a threshold function and save the test error for use in future DF
Y_train_pred = model_bpmll_FF.predict(X_train_hasLabel)
Y_test_pred = model_bpmll_FF.predict(X_test_hasLabel)
t_range = (0, 1)

test_labels_binary, threshold_function = predict_test_labels_binary(Y_train_pred, Y_train_hasLabel, Y_test_pred, t_range)
bpmll_FF_withThreshold = metrics.hamming_loss(Y_test_hasLabel, test_labels_binary)

### Bidirectional LSTM Recurrent Network

In [9]:
## Load the pre-processed data
file_object_reduced = open('../RNN Text Categorization/RNN_data_dict_reduced.json',)
RNN_data_dict_reduced = json.load(file_object_reduced)
RNN_data_dict_reduced = ast.literal_eval(RNN_data_dict_reduced)
train_padded_hasLabel = np.array(RNN_data_dict_reduced['train_padded_hasLabel'])
test_padded_hasLabel = np.array(RNN_data_dict_reduced['test_padded_hasLabel'])
Y_train_hasLabel = np.array(RNN_data_dict_reduced['Y_train_hasLabel'])
Y_test_hasLabel = np.array(RNN_data_dict_reduced['Y_test_hasLabel'])

In [10]:
## Define the bidirectional LSTM RNN architecture
tf.random.set_seed(123)
num_labels = 13
max_length = 100
num_unique_words = 2711

model_bpmll_biLSTM = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(num_unique_words, 32, input_length = max_length),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(16, return_sequences = False, return_state = False)),
    #tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(num_labels, activation = 'sigmoid')
])

optim_func = tf.keras.optimizers.Adam(lr = 0.001)

model_bpmll_biLSTM.compile(loss = bp_mll_loss, optimizer = optim_func, metrics = metric)

In [11]:
tf.random.set_seed(123)
history_bpmll_RNN_lr001 = model_bpmll_biLSTM.fit(train_padded_hasLabel, Y_train_hasLabel, epochs = 100, 
                       validation_data = (test_padded_hasLabel, Y_test_hasLabel), verbose=2)

Epoch 1/100
6/6 - 9s - loss: 0.9963 - hamming_loss: 0.3636 - val_loss: 0.9916 - val_hamming_loss: 0.3759
Epoch 2/100
6/6 - 0s - loss: 0.9869 - hamming_loss: 0.3431 - val_loss: 0.9820 - val_hamming_loss: 0.3549
Epoch 3/100
6/6 - 0s - loss: 0.9746 - hamming_loss: 0.3475 - val_loss: 0.9679 - val_hamming_loss: 0.3584
Epoch 4/100
6/6 - 0s - loss: 0.9561 - hamming_loss: 0.3479 - val_loss: 0.9450 - val_hamming_loss: 0.3514
Epoch 5/100
6/6 - 0s - loss: 0.9259 - hamming_loss: 0.3230 - val_loss: 0.9044 - val_hamming_loss: 0.3042
Epoch 6/100
6/6 - 0s - loss: 0.8743 - hamming_loss: 0.3046 - val_loss: 0.8490 - val_hamming_loss: 0.3007
Epoch 7/100
6/6 - 0s - loss: 0.8240 - hamming_loss: 0.2985 - val_loss: 0.8097 - val_hamming_loss: 0.3007
Epoch 8/100
6/6 - 0s - loss: 0.7903 - hamming_loss: 0.2985 - val_loss: 0.7843 - val_hamming_loss: 0.3007
Epoch 9/100
6/6 - 0s - loss: 0.7678 - hamming_loss: 0.2985 - val_loss: 0.7658 - val_hamming_loss: 0.3007
Epoch 10/100
6/6 - 0s - loss: 0.7521 - hamming_loss: 0.

In [33]:
## (CAUTION: DO NOT OVERWRITE EXISTING FILES) -- Convert training history to dataframe and write to a .json file
history_bpmll_RNN_lr001_df = pd.DataFrame(history_bpmll_RNN_lr001.history)
#with open("Reduced Data Eval Metrics/BPMLL RNN/history_bpmll_RNN_lr001.json", "w") as outfile: 
#    history_bpmll_RNN_lr001_df.to_json(outfile)

In [12]:
## Learn a threshold function and save the test error for use in future DF
Y_train_pred = model_bpmll_biLSTM.predict(train_padded_hasLabel)
Y_test_pred = model_bpmll_biLSTM.predict(test_padded_hasLabel)
t_range = (0, 1)

test_labels_binary, threshold_function = predict_test_labels_binary(Y_train_pred, Y_train_hasLabel, Y_test_pred, t_range)
bpmll_RNN_withThreshold = metrics.hamming_loss(Y_test_hasLabel, test_labels_binary)

In [27]:
## (CAUTION: DO NOT OVERWRITE EXISTING FILES) -- Collect the test set hamming losses for the models 
##                                               with learned threshold functions into a df and write to .json file
val_hamming_loss_withThreshold_lr001_df = pd.DataFrame({'ce_FF_lr001' : ce_FF_withThreshold,
                                                        'bpmll_FF_lr001' : bpmll_FF_withThreshold,
                                                        'bpmll_RNN_lr001' : bpmll_RNN_withThreshold},
                                                        index = [0])

#with open("Reduced Data Eval Metrics/val_hamming_loss_withThreshold_lr001.json", "w") as outfile: 
#    val_hamming_loss_withThreshold_lr001_df.to_json(outfile)

In [26]:
val_hamming_loss_withThreshold_lr001_df

Unnamed: 0,ce_FF_lr001,bpmll_FF_lr001,bpmll_RNN_lr001
0,0.667832,0.257867,0.204545


## Models on Full Dataset (some instances have no labels)

In [15]:
## Load the full tfidf dataset
file_object = open('../BP-MLL Text Categorization/tfidf_trainTest_data.json',)
tfidf_data_full = json.load(file_object)
X_train = np.array(tfidf_data_full['X_train'])
X_test = np.array(tfidf_data_full['X_test'])
Y_train = np.array(tfidf_data_full['Y_train'])
Y_test = np.array(tfidf_data_full['Y_test'])

### Feed-Forward Cross-Entropy Network

In [16]:
## Use same architecture as the previous cross-entropy feed-forward network and train on full dataset
tf.random.set_seed(123)
num_labels = 13

model_ce_FF_full = tf.keras.models.Sequential([
    tf.keras.layers.Dense(32, activation = 'relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(num_labels, activation = 'sigmoid')
])

optim_func = tf.keras.optimizers.Adam(lr = 0.001)

model_ce_FF_full.compile(optimizer = optim_func,
              loss = 'categorical_crossentropy', metrics = metric
              )

In [17]:
tf.random.set_seed(123)
history_ce_FF_lr001_full = model_ce_FF_full.fit(X_train, Y_train, epochs = 100,
                validation_data = (X_test, Y_test), verbose=2)

Epoch 1/100
7/7 - 1s - loss: 7.5654 - hamming_loss: 0.3860 - val_loss: 7.6821 - val_hamming_loss: 0.4688
Epoch 2/100
7/7 - 0s - loss: 7.3172 - hamming_loss: 0.4687 - val_loss: 7.5401 - val_hamming_loss: 0.4704
Epoch 3/100
7/7 - 0s - loss: 7.1500 - hamming_loss: 0.4651 - val_loss: 7.4061 - val_hamming_loss: 0.4720
Epoch 4/100
7/7 - 0s - loss: 6.9570 - hamming_loss: 0.4580 - val_loss: 7.2831 - val_hamming_loss: 0.4720
Epoch 5/100
7/7 - 0s - loss: 7.0566 - hamming_loss: 0.4492 - val_loss: 7.1935 - val_hamming_loss: 0.4840
Epoch 6/100
7/7 - 0s - loss: 6.8512 - hamming_loss: 0.4675 - val_loss: 7.1488 - val_hamming_loss: 0.4848
Epoch 7/100
7/7 - 0s - loss: 6.7843 - hamming_loss: 0.4647 - val_loss: 7.1674 - val_hamming_loss: 0.4864
Epoch 8/100
7/7 - 0s - loss: 7.1565 - hamming_loss: 0.4496 - val_loss: 7.2623 - val_hamming_loss: 0.4952
Epoch 9/100
7/7 - 0s - loss: 7.3134 - hamming_loss: 0.4536 - val_loss: 7.4056 - val_hamming_loss: 0.5096
Epoch 10/100
7/7 - 0s - loss: 7.8009 - hamming_loss: 0.

In [34]:
## (CAUTION: DO NOT OVERWRITE EXISTING FILES) -- Convert training history to dataframe and write to a .json file
history_ce_FF_lr001_full_df = pd.DataFrame(history_ce_FF_lr001_full.history)
#with open("Full Data Eval Metrics/Cross Entropy Feed Forward/history_ce_FF_lr001_full.json", "w") as outfile: 
#    history_ce_FF_lr001_full_df.to_json(outfile)

In [18]:
## Learn a threshold function and save the test error for use in future DF
Y_train_pred = model_ce_FF_full.predict(X_train)
Y_test_pred = model_ce_FF_full.predict(X_test)
t_range = (0, 1)

test_labels_binary, threshold_function = predict_test_labels_binary(Y_train_pred, Y_train, Y_test_pred, t_range)
ce_FF_full_withThreshold = metrics.hamming_loss(Y_test, test_labels_binary)

### LSTM Reccurrent Network

In [19]:
## Load the pre-processed data
file_object = open('../RNN Text Categorization/RNN_data_dict.json',)
RNN_data_dict = json.load(file_object)
RNN_data_dict = ast.literal_eval(RNN_data_dict)
train_padded = np.array(RNN_data_dict['train_padded'])
test_padded = np.array(RNN_data_dict['test_padded'])
Y_train = np.array(RNN_data_dict['Y_train'])
Y_test = np.array(RNN_data_dict['Y_test'])

In [20]:
## Define the LSTM RNN architecture
tf.random.set_seed(123)
num_labels = 13

model_LSTM_full = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(num_unique_words, 32, input_length = max_length),
    tf.keras.layers.LSTM(16, return_sequences = False, return_state = False),
    #tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(num_labels, activation = 'sigmoid')
])

optim_func = tf.keras.optimizers.Adam(lr = 0.001)

model_LSTM_full.compile(loss = 'categorical_crossentropy', optimizer = optim_func, metrics = metric)

In [21]:
tf.random.set_seed(123)
history_ce_RNN_lr001_full = model_LSTM_full.fit(train_padded, Y_train, epochs = 100, 
               validation_data = (test_padded, Y_test), verbose=2)

Epoch 1/100
7/7 - 6s - loss: 7.6843 - hamming_loss: 0.5867 - val_loss: 7.9091 - val_hamming_loss: 0.4639
Epoch 2/100
7/7 - 0s - loss: 7.6235 - hamming_loss: 0.5052 - val_loss: 7.8395 - val_hamming_loss: 0.5393
Epoch 3/100
7/7 - 0s - loss: 7.5496 - hamming_loss: 0.5551 - val_loss: 7.7410 - val_hamming_loss: 0.5393
Epoch 4/100
7/7 - 0s - loss: 7.4440 - hamming_loss: 0.5551 - val_loss: 7.5849 - val_hamming_loss: 0.5393
Epoch 5/100
7/7 - 0s - loss: 7.2851 - hamming_loss: 0.5551 - val_loss: 7.3702 - val_hamming_loss: 0.5777
Epoch 6/100
7/7 - 0s - loss: 7.0946 - hamming_loss: 0.5440 - val_loss: 7.1429 - val_hamming_loss: 0.5024
Epoch 7/100
7/7 - 0s - loss: 6.9162 - hamming_loss: 0.5186 - val_loss: 6.9666 - val_hamming_loss: 0.5024
Epoch 8/100
7/7 - 0s - loss: 6.7968 - hamming_loss: 0.5186 - val_loss: 6.8954 - val_hamming_loss: 0.5024
Epoch 9/100
7/7 - 0s - loss: 6.7634 - hamming_loss: 0.5186 - val_loss: 6.8984 - val_hamming_loss: 0.5024
Epoch 10/100
7/7 - 0s - loss: 6.7741 - hamming_loss: 0.

In [35]:
## (CAUTION: DO NOT OVERWRITE EXISTING FILES) -- Convert training history to dataframe and write to a .json file
history_ce_RNN_lr001_full_df = pd.DataFrame(history_ce_RNN_lr001_full.history)
#with open("Full Data Eval Metrics/Cross Entropy RNN/history_ce_RNN_lr001_full.json", "w") as outfile: 
#    history_ce_RNN_lr001_full_df.to_json(outfile)

In [22]:
## Learn a threshold function and save the test error for use in future DF
Y_train_pred = model_LSTM_full.predict(train_padded)
Y_test_pred = model_LSTM_full.predict(test_padded)
t_range = (0, 1)

test_labels_binary, threshold_function = predict_test_labels_binary(Y_train_pred, Y_train, Y_test_pred, t_range)
ce_RNN_full_withThreshold = metrics.hamming_loss(Y_test, test_labels_binary)

In [13]:
ce_RNN_full_withThreshold

0.23798076923076922

In [30]:
## (CAUTION: DO NOT OVERWRITE EXISTING FILES) -- Collect the test set hamming losses for the models 
##                                               with learned threshold functions into a df and write to .json file
val_hamming_loss_withThreshold_lr001_df = pd.DataFrame({'ce_FF_full_lr001' : ce_FF_full_withThreshold,
                                                        'ce_RNN_full_lr001' : ce_RNN_full_withThreshold},
                                                        index = [0])

with open("Full Data Eval Metrics/val_hamming_loss_withThreshold_lr001.json", "w") as outfile: 
    val_hamming_loss_withThreshold_lr001_df.to_json(outfile)

In [29]:
val_hamming_loss_withThreshold_lr001_df

Unnamed: 0,ce_FF_full_lr001,ce_RNN_full_lr001
0,0.678686,0.237981
