In [None]:
########################################################################################################################
# Filename: Generate_ANN_Results.ipynb
#
# Purpose: Generate results from different ANN models trained on paragraph 
#          classification task
#
# Author(s): Bobby (Robert) Lumpkin
#
# Library Dependencies: numpy, pandas, tensorflow, bpmll, os, json, ast, random, 
#                       tensorflow_addons, skljson, sklearn, sys, threshold_learning
########################################################################################################################

# Generate and Save Results from Different ANN Methods

In [1]:
import numpy as np
import pandas as pd
import os
import json
import ast
import random
import tensorflow as tf
import tensorflow_addons as tfa
from bpmll import bp_mll_loss
import sklearn_json as skljson
from sklearn.model_selection import train_test_split
from sklearn import metrics
import sys
os.chdir('C:\\Users\\rober\\OneDrive\\Documents\\STAT 6500\\Project\\NewsArticleClassification\\codes\\ANN Results')  ## Set working directory
                                                                                                                      ## to be 'ANN Results'
sys.path.append('../ThresholdFunctionLearning')    ## Append path to the ThresholdFunctionLearning directory to the interpreters
                                                   ## search path
from threshold_learning import predict_test_labels_binary    ## Import the 'predict_test_labels_binary()' function from the 
from threshold_learning import predict_labels_binary         ## threshold_learning library

## Models on Reduced Dataset (each instance has atleast one label)

In [2]:
## Load the reduced tfidf dataset
file_object = open('../BP-MLL Text Categorization/tfidf_trainTest_data_reduced.json',)
tfidf_data_reduced = json.load(file_object)
X_train_hasLabel = np.array(tfidf_data_reduced['X_train_hasLabel'])
X_test_hasLabel = np.array(tfidf_data_reduced['X_test_hasLabel'])
Y_train_hasLabel = np.array(tfidf_data_reduced['Y_train_hasLabel'])
Y_test_hasLabel = np.array(tfidf_data_reduced['Y_test_hasLabel'])

### Feed-Forward Cross-Entropy Network

In [9]:
## Start by defining and compiling the cross-entropy loss network (bpmll used later)
tf.random.set_seed(123)
num_labels = 13

model_ce_FF = tf.keras.models.Sequential([
    tf.keras.layers.Dense(32, activation = 'relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(num_labels, activation = 'sigmoid')
])

optim_func = tf.keras.optimizers.Adam(lr=0.0001)

metric = tfa.metrics.HammingLoss(mode = 'multilabel', threshold = 0.5)

model_ce_FF.compile(optimizer = optim_func,
              loss = 'binary_crossentropy', metrics = metric
              )

In [4]:
tf.random.set_seed(123)
history_ce_FF_lr001 = model_ce_FF.fit(X_train_hasLabel, Y_train_hasLabel, epochs = 100,
                                validation_data = (X_test_hasLabel, Y_test_hasLabel), verbose=2)

Epoch 1/100
6/6 - 1s - loss: 0.7028 - hamming_loss: 0.4847 - val_loss: 0.6898 - val_hamming_loss: 0.4650
Epoch 2/100
6/6 - 0s - loss: 0.6970 - hamming_loss: 0.4742 - val_loss: 0.6878 - val_hamming_loss: 0.4572
Epoch 3/100
6/6 - 0s - loss: 0.6969 - hamming_loss: 0.4764 - val_loss: 0.6858 - val_hamming_loss: 0.4484
Epoch 4/100
6/6 - 0s - loss: 0.6936 - hamming_loss: 0.4642 - val_loss: 0.6838 - val_hamming_loss: 0.4414
Epoch 5/100
6/6 - 0s - loss: 0.6903 - hamming_loss: 0.4572 - val_loss: 0.6818 - val_hamming_loss: 0.4318
Epoch 6/100
6/6 - 0s - loss: 0.6836 - hamming_loss: 0.4489 - val_loss: 0.6799 - val_hamming_loss: 0.4274
Epoch 7/100
6/6 - 0s - loss: 0.6821 - hamming_loss: 0.4414 - val_loss: 0.6780 - val_hamming_loss: 0.4248
Epoch 8/100
6/6 - 0s - loss: 0.6825 - hamming_loss: 0.4427 - val_loss: 0.6761 - val_hamming_loss: 0.4205
Epoch 9/100
6/6 - 0s - loss: 0.6757 - hamming_loss: 0.4135 - val_loss: 0.6743 - val_hamming_loss: 0.4152
Epoch 10/100
6/6 - 0s - loss: 0.6670 - hamming_loss: 0.

In [5]:
## (CAUTION: DO NOT OVERWRITE EXISTING FILES) -- Convert training history to dataframe and write to a .json file 
history_ce_FF_lr001_df = pd.DataFrame(history_ce_FF_lr001.history)
#with open("Reduced Data Eval Metrics/Cross Entropy Feed Forward/history_ce_FF_lr0001.json", "w") as outfile: 
#    history_ce_FF_lr001_df.to_json(outfile)

In [6]:
## Learn a threshold function and save the test error for use in future DF
Y_train_pred = model_ce_FF.predict(X_train_hasLabel)
Y_test_pred = model_ce_FF.predict(X_test_hasLabel)
t_range = (0, 1)

test_labels_binary, threshold_function = predict_test_labels_binary(Y_train_pred, Y_train_hasLabel, Y_test_pred, t_range)
ce_FF_withThreshold = metrics.hamming_loss(Y_test_hasLabel, test_labels_binary)

### Feed-Forward BP-MLL Network

In [7]:
## Start by defining and compiling the bp-mll loss network 
tf.random.set_seed(123)
model_bpmll_FF = tf.keras.models.Sequential([
    tf.keras.layers.Dense(32, activation = 'relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(num_labels, activation = 'sigmoid')
])

optim_func = tf.keras.optimizers.Adam(lr = 0.0001)

model_bpmll_FF.compile(optimizer = optim_func,
              loss = bp_mll_loss, metrics = metric
              )

In [8]:
tf.random.set_seed(123)
history_bpmll_FF_lr001 = model_bpmll_FF.fit(X_train_hasLabel, Y_train_hasLabel, epochs = 100,
                validation_data = (X_test_hasLabel, Y_test_hasLabel), verbose=2)

Epoch 1/100
6/6 - 1s - loss: 0.9846 - hamming_loss: 0.3820 - val_loss: 0.9769 - val_hamming_loss: 0.4685
Epoch 2/100
6/6 - 0s - loss: 0.9782 - hamming_loss: 0.4808 - val_loss: 0.9751 - val_hamming_loss: 0.4624
Epoch 3/100
6/6 - 0s - loss: 0.9793 - hamming_loss: 0.4830 - val_loss: 0.9733 - val_hamming_loss: 0.4598
Epoch 4/100
6/6 - 0s - loss: 0.9722 - hamming_loss: 0.4672 - val_loss: 0.9715 - val_hamming_loss: 0.4537
Epoch 5/100
6/6 - 0s - loss: 0.9715 - hamming_loss: 0.4668 - val_loss: 0.9698 - val_hamming_loss: 0.4519
Epoch 6/100
6/6 - 0s - loss: 0.9645 - hamming_loss: 0.4563 - val_loss: 0.9680 - val_hamming_loss: 0.4449
Epoch 7/100
6/6 - 0s - loss: 0.9601 - hamming_loss: 0.4515 - val_loss: 0.9662 - val_hamming_loss: 0.4423
Epoch 8/100
6/6 - 0s - loss: 0.9639 - hamming_loss: 0.4567 - val_loss: 0.9644 - val_hamming_loss: 0.4406
Epoch 9/100
6/6 - 0s - loss: 0.9539 - hamming_loss: 0.4388 - val_loss: 0.9626 - val_hamming_loss: 0.4379
Epoch 10/100
6/6 - 0s - loss: 0.9491 - hamming_loss: 0.

In [9]:
## (CAUTION: DO NOT OVERWRITE EXISTING FILES) -- Convert training history to dataframe and write to a .json file 
history_bpmll_FF_lr001_df = pd.DataFrame(history_bpmll_FF_lr001.history)
#with open("Reduced Data Eval Metrics/BPMLL Feed Forward/history_bpmll_FF_lr0001.json", "w") as outfile: 
#    history_bpmll_FF_lr001_df.to_json(outfile)

In [10]:
## Learn a threshold function and save the test error for use in future DF
Y_train_pred = model_bpmll_FF.predict(X_train_hasLabel)
Y_test_pred = model_bpmll_FF.predict(X_test_hasLabel)
t_range = (0, 1)

test_labels_binary, threshold_function = predict_test_labels_binary(Y_train_pred, Y_train_hasLabel, Y_test_pred, t_range)
bpmll_FF_withThreshold = metrics.hamming_loss(Y_test_hasLabel, test_labels_binary)

### BPMLL Bidirectional LSTM Recurrent Network

In [4]:
## Load the pre-processed data
file_object_reduced = open('../RNN Text Categorization/RNN_data_dict_reduced.json',)
RNN_data_dict_reduced = json.load(file_object_reduced)
RNN_data_dict_reduced = ast.literal_eval(RNN_data_dict_reduced)
train_padded_hasLabel = np.array(RNN_data_dict_reduced['train_padded_hasLabel'])
test_padded_hasLabel = np.array(RNN_data_dict_reduced['test_padded_hasLabel'])
Y_train_hasLabel = np.array(RNN_data_dict_reduced['Y_train_hasLabel'])
Y_test_hasLabel = np.array(RNN_data_dict_reduced['Y_test_hasLabel'])

In [12]:
## Define the bidirectional LSTM RNN architecture
tf.random.set_seed(123)
num_labels = 13
max_length = 100
num_unique_words = 2711

model_bpmll_biLSTM = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(num_unique_words, 32, input_length = max_length),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(16, return_sequences = False, return_state = False)),
    #tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(num_labels, activation = 'sigmoid')
])

optim_func = tf.keras.optimizers.Adam(lr = 0.0001)

model_bpmll_biLSTM.compile(loss = bp_mll_loss, optimizer = optim_func, metrics = metric)

In [13]:
tf.random.set_seed(123)
history_bpmll_RNN_lr001 = model_bpmll_biLSTM.fit(train_padded_hasLabel, Y_train_hasLabel, epochs = 100, 
                       validation_data = (test_padded_hasLabel, Y_test_hasLabel), verbose=2)

Epoch 1/100
6/6 - 4s - loss: 0.9990 - hamming_loss: 0.4350 - val_loss: 0.9986 - val_hamming_loss: 0.5411
Epoch 2/100
6/6 - 0s - loss: 0.9981 - hamming_loss: 0.5197 - val_loss: 0.9979 - val_hamming_loss: 0.5262
Epoch 3/100
6/6 - 0s - loss: 0.9973 - hamming_loss: 0.5017 - val_loss: 0.9971 - val_hamming_loss: 0.5026
Epoch 4/100
6/6 - 0s - loss: 0.9964 - hamming_loss: 0.4786 - val_loss: 0.9964 - val_hamming_loss: 0.4895
Epoch 5/100
6/6 - 0s - loss: 0.9956 - hamming_loss: 0.4576 - val_loss: 0.9956 - val_hamming_loss: 0.4764
Epoch 6/100
6/6 - 0s - loss: 0.9947 - hamming_loss: 0.4406 - val_loss: 0.9948 - val_hamming_loss: 0.4510
Epoch 7/100
6/6 - 0s - loss: 0.9938 - hamming_loss: 0.4156 - val_loss: 0.9940 - val_hamming_loss: 0.4266
Epoch 8/100
6/6 - 0s - loss: 0.9928 - hamming_loss: 0.3872 - val_loss: 0.9931 - val_hamming_loss: 0.4073
Epoch 9/100
6/6 - 0s - loss: 0.9919 - hamming_loss: 0.3663 - val_loss: 0.9922 - val_hamming_loss: 0.3881
Epoch 10/100
6/6 - 0s - loss: 0.9908 - hamming_loss: 0.

In [14]:
## (CAUTION: DO NOT OVERWRITE EXISTING FILES) -- Convert training history to dataframe and write to a .json file
history_bpmll_RNN_lr001_df = pd.DataFrame(history_bpmll_RNN_lr001.history)
#with open("Reduced Data Eval Metrics/BPMLL RNN/history_bpmll_RNN_lr0001.json", "w") as outfile: 
#    history_bpmll_RNN_lr001_df.to_json(outfile)

In [15]:
## Learn a threshold function and save the test error for use in future DF
Y_train_pred = model_bpmll_biLSTM.predict(train_padded_hasLabel)
Y_test_pred = model_bpmll_biLSTM.predict(test_padded_hasLabel)
t_range = (0, 1)

test_labels_binary, threshold_function = predict_test_labels_binary(Y_train_pred, Y_train_hasLabel, Y_test_pred, t_range)
bpmll_RNN_withThreshold = metrics.hamming_loss(Y_test_hasLabel, test_labels_binary)

### Cross-Entropy Bidirectional LSTM Recurrent Network

In [5]:
## Define the bidirectional LSTM RNN architecture
tf.random.set_seed(123)
num_labels = 13
max_length = 100
num_unique_words = 2711

model_ce_biLSTM = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(num_unique_words, 32, input_length = max_length),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(16, return_sequences = False, return_state = False)),
    #tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(num_labels, activation = 'sigmoid')
])

optim_func = tf.keras.optimizers.Adam(lr = 0.0001)

model_ce_biLSTM.compile(loss = 'binary_crossentropy', optimizer = optim_func, metrics = metric)

In [6]:
tf.random.set_seed(123)
history_ce_RNN_lr001 = model_ce_biLSTM.fit(train_padded_hasLabel, Y_train_hasLabel, epochs = 100, 
                       validation_data = (test_padded_hasLabel, Y_test_hasLabel), verbose=2)

Epoch 1/100
6/6 - 5s - loss: 0.6407 - hamming_loss: 0.2946 - val_loss: 0.4737 - val_hamming_loss: 0.1783
Epoch 2/100
6/6 - 0s - loss: 0.4329 - hamming_loss: 0.1792 - val_loss: 0.4222 - val_hamming_loss: 0.1888
Epoch 3/100
6/6 - 0s - loss: 0.4174 - hamming_loss: 0.1849 - val_loss: 0.4183 - val_hamming_loss: 0.1783
Epoch 4/100
6/6 - 0s - loss: 0.4093 - hamming_loss: 0.1796 - val_loss: 0.4088 - val_hamming_loss: 0.1783
Epoch 5/100
6/6 - 0s - loss: 0.3969 - hamming_loss: 0.1788 - val_loss: 0.4083 - val_hamming_loss: 0.1897
Epoch 6/100
6/6 - 0s - loss: 0.3801 - hamming_loss: 0.1744 - val_loss: 0.4075 - val_hamming_loss: 0.1801
Epoch 7/100
6/6 - 0s - loss: 0.3577 - hamming_loss: 0.1591 - val_loss: 0.4027 - val_hamming_loss: 0.1731
Epoch 8/100
6/6 - 0s - loss: 0.3325 - hamming_loss: 0.1342 - val_loss: 0.4029 - val_hamming_loss: 0.1809
Epoch 9/100
6/6 - 0s - loss: 0.3072 - hamming_loss: 0.1289 - val_loss: 0.4015 - val_hamming_loss: 0.1818
Epoch 10/100
6/6 - 0s - loss: 0.2821 - hamming_loss: 0.

In [7]:
## (CAUTION: DO NOT OVERWRITE EXISTING FILES) -- Convert training history to dataframe and write to a .json file
history_ce_RNN_lr001_df = pd.DataFrame(history_ce_RNN_lr001.history)
#with open("Reduced Data Eval Metrics/Cross Entropy RNN/history_ce_RNN_lr0001.json", "w") as outfile: 
#    history_ce_RNN_lr001_df.to_json(outfile)

In [19]:
## Learn a threshold function and save the test error for use in future DF
Y_train_pred = model_ce_biLSTM.predict(train_padded_hasLabel)
Y_test_pred = model_ce_biLSTM.predict(test_padded_hasLabel)
t_range = (0, 1)

test_labels_binary, threshold_function = predict_test_labels_binary(Y_train_pred, Y_train_hasLabel, Y_test_pred, t_range)
ce_RNN_withThreshold = metrics.hamming_loss(Y_test_hasLabel, test_labels_binary)

In [22]:
## (CAUTION: DO NOT OVERWRITE EXISTING FILES) -- Collect the test set hamming losses for the models 
##                                               with learned threshold functions into a df and write to .json file
val_hamming_loss_withThreshold_lr001_df = pd.DataFrame({'ce_FF_lr0001' : ce_FF_withThreshold,
                                                        'bpmll_FF_lr0001' : bpmll_FF_withThreshold,
                                                        'ce_RNN_lr0001' : ce_RNN_withThreshold,
                                                        'bpmll_RNN_lr0001' : bpmll_RNN_withThreshold},
                                                        index = [0])

#with open("Reduced Data Eval Metrics/val_hamming_loss_withThreshold_lr0001.json", "w") as outfile: 
#    val_hamming_loss_withThreshold_lr001_df.to_json(outfile)

In [21]:
val_hamming_loss_withThreshold_lr001_df

Unnamed: 0,ce_FF_lr0001,bpmll_FF_lr0001,ce_RNN_lr0001,bpmll_RNN_lr0001
0,0.207168,0.184441,0.189685,0.213287


## Models on Full Dataset (some instances have no labels)

In [8]:
## Load the full tfidf dataset
file_object = open('../BP-MLL Text Categorization/tfidf_trainTest_data.json',)
tfidf_data_full = json.load(file_object)
X_train = np.array(tfidf_data_full['X_train'])
X_test = np.array(tfidf_data_full['X_test'])
Y_train = np.array(tfidf_data_full['Y_train'])
Y_test = np.array(tfidf_data_full['Y_test'])

### Feed-Forward Cross-Entropy Network

In [24]:
## Use same architecture as the previous cross-entropy feed-forward network and train on full dataset
tf.random.set_seed(123)
num_labels = 13

model_ce_FF_full = tf.keras.models.Sequential([
    tf.keras.layers.Dense(32, activation = 'relu'),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(num_labels, activation = 'sigmoid')
])

optim_func = tf.keras.optimizers.Adam(lr = 0.0001)

model_ce_FF_full.compile(optimizer = optim_func,
              loss = 'binary_crossentropy', metrics = metric
              )

In [25]:
tf.random.set_seed(123)
history_ce_FF_lr001_full = model_ce_FF_full.fit(X_train, Y_train, epochs = 100,
                validation_data = (X_test, Y_test), verbose=2)

Epoch 1/100
7/7 - 1s - loss: 0.7023 - hamming_loss: 0.3895 - val_loss: 0.6944 - val_hamming_loss: 0.4808
Epoch 2/100
7/7 - 0s - loss: 0.7009 - hamming_loss: 0.4738 - val_loss: 0.6923 - val_hamming_loss: 0.4728
Epoch 3/100
7/7 - 0s - loss: 0.7003 - hamming_loss: 0.4762 - val_loss: 0.6902 - val_hamming_loss: 0.4663
Epoch 4/100
7/7 - 0s - loss: 0.6940 - hamming_loss: 0.4592 - val_loss: 0.6881 - val_hamming_loss: 0.4607
Epoch 5/100
7/7 - 0s - loss: 0.6914 - hamming_loss: 0.4592 - val_loss: 0.6860 - val_hamming_loss: 0.4511
Epoch 6/100
7/7 - 0s - loss: 0.6892 - hamming_loss: 0.4496 - val_loss: 0.6840 - val_hamming_loss: 0.4431
Epoch 7/100
7/7 - 0s - loss: 0.6862 - hamming_loss: 0.4437 - val_loss: 0.6820 - val_hamming_loss: 0.4343
Epoch 8/100
7/7 - 0s - loss: 0.6767 - hamming_loss: 0.4211 - val_loss: 0.6800 - val_hamming_loss: 0.4271
Epoch 9/100
7/7 - 0s - loss: 0.6723 - hamming_loss: 0.4132 - val_loss: 0.6781 - val_hamming_loss: 0.4183
Epoch 10/100
7/7 - 0s - loss: 0.6712 - hamming_loss: 0.

In [26]:
## (CAUTION: DO NOT OVERWRITE EXISTING FILES) -- Convert training history to dataframe and write to a .json file
history_ce_FF_lr001_full_df = pd.DataFrame(history_ce_FF_lr001_full.history)
#with open("Full Data Eval Metrics/Cross Entropy Feed Forward/history_ce_FF_lr0001_full.json", "w") as outfile: 
#    history_ce_FF_lr001_full_df.to_json(outfile)

In [27]:
## Learn a threshold function and save the test error for use in future DF
Y_train_pred = model_ce_FF_full.predict(X_train)
Y_test_pred = model_ce_FF_full.predict(X_test)
t_range = (0, 1)

test_labels_binary, threshold_function = predict_test_labels_binary(Y_train_pred, Y_train, Y_test_pred, t_range)
ce_FF_full_withThreshold = metrics.hamming_loss(Y_test, test_labels_binary)

### LSTM Reccurrent Network

In [10]:
## Load the pre-processed data
file_object = open('../RNN Text Categorization/RNN_data_dict.json',)
RNN_data_dict = json.load(file_object)
RNN_data_dict = ast.literal_eval(RNN_data_dict)
train_padded = np.array(RNN_data_dict['train_padded'])
test_padded = np.array(RNN_data_dict['test_padded'])
Y_train = np.array(RNN_data_dict['Y_train'])
Y_test = np.array(RNN_data_dict['Y_test'])

In [15]:
## Define the LSTM RNN architecture
tf.random.set_seed(123)
num_labels = 13

model_LSTM_full = tf.keras.models.Sequential([
    tf.keras.layers.Embedding(num_unique_words, 32, input_length = max_length),
    tf.keras.layers.LSTM(16, return_sequences = False, return_state = False),
    #tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(num_labels, activation = 'sigmoid')
])

optim_func = tf.keras.optimizers.Adam(lr = 0.0001)

model_LSTM_full.compile(loss = 'binary_crossentropy', optimizer = optim_func, metrics = metric)

In [16]:
tf.random.set_seed(123)
history_ce_RNN_lr001_full = model_LSTM_full.fit(train_padded, Y_train, epochs = 100, 
               validation_data = (test_padded, Y_test), verbose=2)

Epoch 1/100
7/7 - 3s - loss: 0.6953 - hamming_loss: 0.5159 - val_loss: 0.6949 - val_hamming_loss: 0.5921
Epoch 2/100
7/7 - 0s - loss: 0.6944 - hamming_loss: 0.5948 - val_loss: 0.6940 - val_hamming_loss: 0.5921
Epoch 3/100
7/7 - 0s - loss: 0.6935 - hamming_loss: 0.5948 - val_loss: 0.6931 - val_hamming_loss: 0.5921
Epoch 4/100
7/7 - 0s - loss: 0.6926 - hamming_loss: 0.5956 - val_loss: 0.6922 - val_hamming_loss: 0.5585
Epoch 5/100
7/7 - 0s - loss: 0.6917 - hamming_loss: 0.5757 - val_loss: 0.6913 - val_hamming_loss: 0.5585
Epoch 6/100
7/7 - 0s - loss: 0.6908 - hamming_loss: 0.5757 - val_loss: 0.6904 - val_hamming_loss: 0.4960
Epoch 7/100
7/7 - 0s - loss: 0.6899 - hamming_loss: 0.4734 - val_loss: 0.6894 - val_hamming_loss: 0.4431
Epoch 8/100
7/7 - 0s - loss: 0.6889 - hamming_loss: 0.4187 - val_loss: 0.6885 - val_hamming_loss: 0.3421
Epoch 9/100
7/7 - 0s - loss: 0.6879 - hamming_loss: 0.3466 - val_loss: 0.6875 - val_hamming_loss: 0.3421
Epoch 10/100
7/7 - 0s - loss: 0.6870 - hamming_loss: 0.

In [31]:
## (CAUTION: DO NOT OVERWRITE EXISTING FILES) -- Convert training history to dataframe and write to a .json file
history_ce_RNN_lr001_full_df = pd.DataFrame(history_ce_RNN_lr001_full.history)
#with open("Full Data Eval Metrics/Cross Entropy RNN/history_ce_RNN_lr0001_full.json", "w") as outfile: 
#    history_ce_RNN_lr001_full_df.to_json(outfile)

In [32]:
## Learn a threshold function and save the test error for use in future DF
Y_train_pred = model_LSTM_full.predict(train_padded)
Y_test_pred = model_LSTM_full.predict(test_padded)
t_range = (0, 1)

test_labels_binary, threshold_function = predict_test_labels_binary(Y_train_pred, Y_train, Y_test_pred, t_range)
ce_RNN_full_withThreshold = metrics.hamming_loss(Y_test, test_labels_binary)

In [27]:
ce_RNN_full_withThreshold

0.17307692307692307

In [37]:
## (CAUTION: DO NOT OVERWRITE EXISTING FILES) -- Collect the test set hamming losses for the models 
##                                               with learned threshold functions into a df and write to .json file
val_hamming_loss_withThreshold_lr001_df = pd.DataFrame({'ce_FF_full_lr0001' : ce_FF_full_withThreshold,
                                                        'ce_RNN_full_lr0001' : ce_RNN_full_withThreshold},
                                                        index = [0])

#with open("Full Data Eval Metrics/val_hamming_loss_withThreshold_lr0001.json", "w") as outfile: 
#    val_hamming_loss_withThreshold_lr001_df.to_json(outfile)

In [36]:
val_hamming_loss_withThreshold_lr001_df

Unnamed: 0,ce_FF_full_lr0001,ce_RNN_full_lr0001
0,0.211538,0.177083
