# Model Testing
> Final testing results for best models

In [1]:
%load_ext autoreload
%autoreload 2
from importlib import reload

import numpy as np
#import time # !
import json
#from matplotlib import pyplot as plt
import time
import sys

from loadutils import retrieve_decoder_model, retrieve_model
import evaluation_helper as eh
import tensorflow as tf
import pandas as pd
from keras.utils import to_categorical

import glove_helper
from loadutils import conll2003Data, saveProcessedData, retrieve_model
from common import vocabulary, utils


Using TensorFlow backend.


In [2]:

start_time = time.time()

"""
Pick which language to test on
"""
LANGUAGE = "es"
# LANGUAGE = "ca"
# LANGUAGE = "gd"
# LANGUAGE = sys.argv[1]

"""
Pick how much of the training data to use
"""
# TRAIN_AMOUNT = "100"
# TRAIN_AMOUNT = "50"
# TRAIN_AMOUNT = "10"
TRAIN_AMOUNT = "1"
# TRAIN_AMOUNT = sys.argv[2]

# RANDOM_SEED = sys.argv[3]

DIRECTORY = "../data/pos_tagging/"+LANGUAGE+"/"

#training file depends on low-resource or not; 100%, 50%, or 10% of training data used
TRAIN_FILE = DIRECTORY+"train_"+TRAIN_AMOUNT+".txt"

# dev, test, and vectors stay the same
DEV_FILE = DIRECTORY+"dev.txt"
TEST_FILE = DIRECTORY+"test.txt"
# VECTORS = "data/"+LANGUAGE+"/wiki."+LANGUAGE+".zip"
VECTORS = "data/"+LANGUAGE+"/cc."+LANGUAGE+".300.zip"

# # out files for IPC
# HYPER_PARAM_FILE = "hyper_params.json"

# VOCAB_SIZE = 20000

## PRINT OUT HYPERPARAMETERS FOR REFERENCE
print("LANGUAGE:\t", LANGUAGE)
print("TRAIN_AMOUNT:\t", TRAIN_AMOUNT)

LANGUAGE:	 es
TRAIN_AMOUNT:	 1


## Local helper utils

In [3]:
def construct_embedding_matrix(embed_dim, vocab_size):
    """
    construct embedding matrix from GloVe 6Bn word data
    
    reuse glove_helper code from w266 
    
    Returns: an embedding matrix directly plugged into keras.layers.Embedding(weights=[embedding_matrix])
    """
    reload(glove_helper)
    hands = glove_helper.Hands(vector_zip=VECTORS, ndim=embed_dim)
    embedding_matrix = np.zeros((vocab_size, embed_dim))
    
    for i in range(vocabData.vocab.size):
        word = vocabData.vocab.ids_to_words([i])[0]
        try:
            embedding_vector = hands.get_vector(word)
        except:
            embedding_vector = hands.get_vector("<unk>")
        embedding_matrix[i] = embedding_vector

    return embedding_matrix

## Load the Data

In [4]:
# UPDATES!
global_max_features = 20000
windowLength = 11
#testNumSents = 20000

# Use training set to build vocab here
vocabData = conll2003Data(TRAIN_FILE)
vocabData.buildVocab( vocabSize=global_max_features, verbose=False)

# Format training data
trainX, trainX_pos, trainX_capitals, trainY  = vocabData.formatWindowedData( 
                                                  vocabData.train_sentences, 
                                                  windowLength=windowLength,
                                                  verbose=False)

# read in dev data
devSents = vocabData.readFile( DEV_FILE)
devX, devX_pos, devX_capitals, devY = vocabData.formatWindowedData( 
                                              devSents, 
                                              windowLength=windowLength,
                                              verbose=False)

# read in the test data
testSents = vocabData.readFile( TEST_FILE)
testX, testX_pos, testX_capitals, testY = vocabData.formatWindowedData( 
                                                testSents, 
                                                windowLength=windowLength,
                                                verbose=False)

----------------------------------------------------
reading file from path ../data/pos_tagging/es/train_1.txt
'readFile'  111.66 ms
----------------------------------------------------
building vocabulary from TRAINING data...
'buildVocab'  29.98 ms
----------------------------------------------------
formatting sentences into input windows...
'formatWindowedData'  52.60 ms
----------------------------------------------------
reading file from path ../data/pos_tagging/es/dev.txt
'readFile'  209.76 ms
----------------------------------------------------
formatting sentences into input windows...
'formatWindowedData'  791.80 ms
----------------------------------------------------
reading file from path ../data/pos_tagging/es/test.txt
'readFile'  216.07 ms
----------------------------------------------------
formatting sentences into input windows...
'formatWindowedData'  728.81 ms


In [5]:
# Load GloVe embedding matrix

# global_embed_dim = 50
global_embed_dim = 300

embedding_matrix = construct_embedding_matrix( global_embed_dim, 
                                               global_max_features)

Loading vectors from data/es/cc.es.300.zip
Parsing file: data/es/cc.es.300.zip:cc.es.300.vec
Found 2,000,000 words.
Parsing vectors... Done! (W.shape = (2000003, 300))


In [6]:
# Get Y

# cat train/dev/test to make sure we have all labels in test set

# encoding 1-hot for ner targets
trainY_cat = to_categorical(trainY.astype('float32'))
devY_cat = to_categorical(devY.astype('float32'), num_classes=trainY_cat.shape[1])
testY_cat = to_categorical(testY.astype('float32'), num_classes=trainY_cat.shape[1])



In [7]:
trainY_cat = np.array(list(map( lambda i: np.array(i[3:], dtype=np.float), trainY_cat)), dtype=np.float)
devY_cat = np.array(list(map( lambda i: np.array(i[3:], dtype=np.float), devY_cat)), dtype=np.float)
testY_cat = np.array(list(map( lambda i: np.array(i[3:], dtype=np.float), testY_cat)), dtype=np.float)

In [8]:
# Get decoder Y -- 50 dim embedding of center word
train_decoderY = embedding_matrix[trainX[:,4]]
dev_decoderY = embedding_matrix[devX[:,4]]
test_decoderY = embedding_matrix[testX[:,4]]

In [9]:
# our take on keras's to_categorical method, but if we DON'T want a one-hot vector
# instead, the values of the vectors are 1 for ALL of the features present, so one token's vector
# could have a 1 in more than one dimension here

def to_almost_categorical(y, num_classes):
    almost_categorical_array = np.zeros((y.shape[0], y.shape[1], num_classes))
#     print(almost_categorical_array.shape)
    for i in range(len(y[0])):
#         print("window:\t",window)
        window = y[i]
        window_index = i
#         print("window_index:\t", i)
#         print("window:\t", window)
        for j in range(len(window)):
            token = window[j]
            token_index = j
#             print("token_index:\t", token_index)
#             print("token:\t",token)
            for value in token:
#                 print("value:\t",value)
#                 print("token_index:\t",token_index)
                value = int(value)
                almost_categorical_array[i,token_index,value] += 1
#                 print(almost_categorical_array[i])
#     print(almost_categorical_array)
    return almost_categorical_array

In [10]:
# Get X pos tags

# encoding 1-hot for pos tags
trainX_pos_cat = to_almost_categorical(trainX_pos, num_classes=vocabData.posTags.size)
devX_pos_cat = to_almost_categorical(devX_pos, num_classes=vocabData.posTags.size) 
testX_pos_cat = to_almost_categorical(testX_pos, num_classes=vocabData.posTags.size)

In [11]:
trainX_pos_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), trainX_pos_cat)), dtype=np.float)
devX_pos_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), devX_pos_cat)), dtype=np.float)
testX_pos_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), testX_pos_cat)), dtype=np.float)

In [12]:
# Get X capitalization 

# encoding 1-hot for capitalization info  ("allCaps", "upperInitial", "lowercase", "mixedCaps", "noinfo")
trainX_capitals_cat = to_categorical(trainX_capitals.astype('float32'))
devX_capitals_cat = to_categorical(devX_capitals.astype('float32'), num_classes=trainX_capitals_cat.shape[2]) 
testX_capitals_cat = to_categorical(testX_capitals.astype('float32'), num_classes=trainX_capitals_cat.shape[2])

trainX_capitals_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), trainX_capitals_cat)), dtype=np.float)
devX_capitals_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), devX_capitals_cat)), dtype=np.float)
testX_capitals_cat = np.array(list(map( lambda i: np.array(i[:,3:], dtype=np.float), testX_capitals_cat)), dtype=np.float)

## Final Models

In [13]:
## if we want to use features
devX_dict = {"x":devX, "x_pos":devX_pos_cat, "x_capital":devX_capitals_cat}
testX_dict = {"x":testX, "x_pos":testX_pos_cat, "x_capital":testX_capitals_cat}

## if we don't want to load the features ("x_pos" here)
# devX_dict = {"x":devX, "x_capital":devX_capitals_cat}
# testX_dict = {"x":testX, "x_capital":testX_capitals_cat}

modelDir = { "save_dir" : "result"}

In [14]:
preprocessing_time = time.time() - start_time
print("Preprocessing Time")
print("--- %s seconds ---" % (preprocessing_time))

Preprocessing Time
--- 182.05688619613647 seconds ---


## Capsnet model

### caps no learn

In [15]:
start_time_caps_nolearn = time.time()

# # model A - Window11_glove50_learn_drop50_caps3_conv5_cos_pos_caps
modelName =  "HPC_results/"+LANGUAGE + "_" + TRAIN_AMOUNT + "_caps_nolearn_1"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

caps_nolearn_1_P = report_obj4.get_precision( testY, y_pred)
caps_nolearn_1_R = report_obj4.get_recall( testY, y_pred)
caps_nolearn_1_F1 = report_obj4.get_f1( testY, y_pred)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))

time_caps_nolearn_1 = time.time()-start_time_caps_nolearn+preprocessing_time
print("Testing Time for "+modelName)
print("--- %s seconds ---" % (time_caps_nolearn_1))

# cm = report_obj4.get_confusion_matrix( testY, y_pred)
# print()
# cm



W0530 14:23:48.442375 140050625660736 module_wrapper.py:139] From /home/andrew/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0530 14:23:48.445449 140050625660736 module_wrapper.py:139] From /home/andrew/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0530 14:23:48.538696 140050625660736 deprecation.py:506] From /code/capsulelayers.py:136: calling softmax (from tensorflow.python.ops.nn_ops) with dim is deprecated and will be removed in a future version.
Instructions for updating:
dim is deprecated, use axis instead


Retrieving model: HPC_results/es_1_caps_nolearn_1


W0530 14:23:48.745819 140050625660736 module_wrapper.py:139] From /home/andrew/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:174: The name tf.get_default_session is deprecated. Please use tf.compat.v1.get_default_session instead.

W0530 14:23:48.746675 140050625660736 module_wrapper.py:139] From /home/andrew/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:181: The name tf.ConfigProto is deprecated. Please use tf.compat.v1.ConfigProto instead.

W0530 14:23:48.747340 140050625660736 module_wrapper.py:139] From /home/andrew/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:186: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.

W0530 14:23:48.781519 140050625660736 module_wrapper.py:139] From /home/andrew/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:190: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead.

W0530 14:23:48.783399 

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
x (InputLayer)               (None, 11)                0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 11, 300)           60000000  
_________________________________________________________________
conv1 (Conv1D)               (None, 9, 256)            230656    
_________________________________________________________________
primarycap_conv1d (Conv1D)   (None, 7, 256)            196864    
_________________________________________________________________
primarycap_reshape (Reshape) (None, 224, 8)            0         
_________________________________________________________________
primarycap_squash (Lambda)   (None, 224, 8)            0         
_________________________________________________________________
nercaps (CapsuleLayer)       (None, 16, 16)            458752    
__________

  gold_idx_dict[gold] = np.hstack(self.gold_pred_idx_dict[gold].values())


In [16]:
start_time_caps_nolearn = time.time()

# # model A - Window11_glove50_learn_drop50_caps3_conv5_cos_pos_caps
modelName =  "HPC_results/"+LANGUAGE + "_" + TRAIN_AMOUNT + "_caps_nolearn_2"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

caps_nolearn_2_P = report_obj4.get_precision( testY, y_pred)
caps_nolearn_2_R = report_obj4.get_recall( testY, y_pred)
caps_nolearn_2_F1 = report_obj4.get_f1( testY, y_pred)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))

time_caps_nolearn_2 = time.time()-start_time_caps_nolearn+preprocessing_time
print("Testing Time for "+modelName)
print("--- %s seconds ---" % (time_caps_nolearn_2))

Retrieving model: HPC_results/es_1_caps_nolearn_2
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
x (InputLayer)               (None, 11)                0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 11, 300)           60000000  
_________________________________________________________________
conv1 (Conv1D)               (None, 9, 256)            230656    
_________________________________________________________________
primarycap_conv1d (Conv1D)   (None, 7, 256)            196864    
_________________________________________________________________
primarycap_reshape (Reshape) (None, 224, 8)            0         
_________________________________________________________________
primarycap_squash (Lambda)   (None, 224, 8)            0         
_________________________________________________________________
nercaps (CapsuleLayer)    

In [17]:
start_time_caps_nolearn = time.time()

# # model A - Window11_glove50_learn_drop50_caps3_conv5_cos_pos_caps
modelName =  "HPC_results/"+LANGUAGE + "_" + TRAIN_AMOUNT + "_caps_nolearn_3"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

caps_nolearn_3_P = report_obj4.get_precision( testY, y_pred)
caps_nolearn_3_R = report_obj4.get_recall( testY, y_pred)
caps_nolearn_3_F1 = report_obj4.get_f1( testY, y_pred)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))

time_caps_nolearn_3 = time.time()-start_time_caps_nolearn+preprocessing_time
print("Testing Time for "+modelName)
print("--- %s seconds ---" % (time_caps_nolearn_3))

Retrieving model: HPC_results/es_1_caps_nolearn_3
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
x (InputLayer)               (None, 11)                0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 11, 300)           60000000  
_________________________________________________________________
conv1 (Conv1D)               (None, 9, 256)            230656    
_________________________________________________________________
primarycap_conv1d (Conv1D)   (None, 7, 256)            196864    
_________________________________________________________________
primarycap_reshape (Reshape) (None, 224, 8)            0         
_________________________________________________________________
primarycap_squash (Lambda)   (None, 224, 8)            0         
_________________________________________________________________
nercaps (CapsuleLayer)    

In [18]:
start_time_caps_nolearn = time.time()

# # model A - Window11_glove50_learn_drop50_caps3_conv5_cos_pos_caps
modelName =  "HPC_results/"+LANGUAGE + "_" + TRAIN_AMOUNT + "_caps_nolearn_4"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

caps_nolearn_4_P = report_obj4.get_precision( testY, y_pred)
caps_nolearn_4_R = report_obj4.get_recall( testY, y_pred)
caps_nolearn_4_F1 = report_obj4.get_f1( testY, y_pred)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))

time_caps_nolearn_4 = time.time()-start_time_caps_nolearn+preprocessing_time
print("Testing Time for "+modelName)
print("--- %s seconds ---" % (time_caps_nolearn_4))

Retrieving model: HPC_results/es_1_caps_nolearn_4
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
x (InputLayer)               (None, 11)                0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 11, 300)           60000000  
_________________________________________________________________
conv1 (Conv1D)               (None, 9, 256)            230656    
_________________________________________________________________
primarycap_conv1d (Conv1D)   (None, 7, 256)            196864    
_________________________________________________________________
primarycap_reshape (Reshape) (None, 224, 8)            0         
_________________________________________________________________
primarycap_squash (Lambda)   (None, 224, 8)            0         
_________________________________________________________________
nercaps (CapsuleLayer)    

In [19]:
start_time_caps_nolearn = time.time()

# # model A - Window11_glove50_learn_drop50_caps3_conv5_cos_pos_caps
modelName =  "HPC_results/"+LANGUAGE + "_" + TRAIN_AMOUNT + "_caps_nolearn_5"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

caps_nolearn_5_P = report_obj4.get_precision( testY, y_pred)
caps_nolearn_5_R = report_obj4.get_recall( testY, y_pred)
caps_nolearn_5_F1 = report_obj4.get_f1( testY, y_pred)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))

time_caps_nolearn_5 = time.time()-start_time_caps_nolearn+preprocessing_time
print("Testing Time for "+modelName)
print("--- %s seconds ---" % (time_caps_nolearn_5))
# cm = report_obj4.get_confusion_matrix( testY, y_pred)
# print()
# cm

Retrieving model: HPC_results/es_1_caps_nolearn_5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
x (InputLayer)               (None, 11)                0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 11, 300)           60000000  
_________________________________________________________________
conv1 (Conv1D)               (None, 9, 256)            230656    
_________________________________________________________________
primarycap_conv1d (Conv1D)   (None, 7, 256)            196864    
_________________________________________________________________
primarycap_reshape (Reshape) (None, 224, 8)            0         
_________________________________________________________________
primarycap_squash (Lambda)   (None, 224, 8)            0         
_________________________________________________________________
nercaps (CapsuleLayer)    

In [20]:
avg_time = (time_caps_nolearn_1+time_caps_nolearn_2+time_caps_nolearn_3+time_caps_nolearn_4+time_caps_nolearn_5)/5
print("Caps nolearn avg test time:\t",avg_time)

Caps nolearn avg test time:	 205.34342050552368


### caps learn 

In [21]:
start_time_caps_learn = time.time()

# # model A - Window11_glove50_learn_drop50_caps3_conv5_cos_pos_caps
modelName =  "HPC_results/"+LANGUAGE + "_" + TRAIN_AMOUNT + "_caps_learn_1"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

caps_learn_1_P = report_obj4.get_precision( testY, y_pred)
caps_learn_1_R = report_obj4.get_recall( testY, y_pred)
caps_learn_1_F1 = report_obj4.get_f1( testY, y_pred)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))

time_caps_learn_1 = time.time()-start_time_caps_learn+preprocessing_time
print("Testing Time for "+modelName)
print("--- %s seconds ---" % (time_caps_learn_1))

Retrieving model: HPC_results/es_1_caps_learn_1
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
x (InputLayer)               (None, 11)                0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 11, 300)           60000000  
_________________________________________________________________
conv1 (Conv1D)               (None, 9, 256)            230656    
_________________________________________________________________
primarycap_conv1d (Conv1D)   (None, 7, 256)            196864    
_________________________________________________________________
primarycap_reshape (Reshape) (None, 224, 8)            0         
_________________________________________________________________
primarycap_squash (Lambda)   (None, 224, 8)            0         
_________________________________________________________________
nercaps (CapsuleLayer)      

In [22]:
start_time_caps_learn = time.time()

# # model A - Window11_glove50_learn_drop50_caps3_conv5_cos_pos_caps
modelName =  "HPC_results/"+LANGUAGE + "_" + TRAIN_AMOUNT + "_caps_learn_2"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

caps_learn_2_P = report_obj4.get_precision( testY, y_pred)
caps_learn_2_R = report_obj4.get_recall( testY, y_pred)
caps_learn_2_F1 = report_obj4.get_f1( testY, y_pred)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))

time_caps_learn_2 = time.time()-start_time_caps_learn+preprocessing_time
print("Testing Time for "+modelName)
print("--- %s seconds ---" % (time_caps_learn_2))

Retrieving model: HPC_results/es_1_caps_learn_2
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
x (InputLayer)               (None, 11)                0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 11, 300)           60000000  
_________________________________________________________________
conv1 (Conv1D)               (None, 9, 256)            230656    
_________________________________________________________________
primarycap_conv1d (Conv1D)   (None, 7, 256)            196864    
_________________________________________________________________
primarycap_reshape (Reshape) (None, 224, 8)            0         
_________________________________________________________________
primarycap_squash (Lambda)   (None, 224, 8)            0         
_________________________________________________________________
nercaps (CapsuleLayer)      

In [23]:
start_time_caps_learn = time.time()

# # model A - Window11_glove50_learn_drop50_caps3_conv5_cos_pos_caps
modelName =  "HPC_results/"+LANGUAGE + "_" + TRAIN_AMOUNT + "_caps_learn_3"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

caps_learn_3_P = report_obj4.get_precision( testY, y_pred)
caps_learn_3_R = report_obj4.get_recall( testY, y_pred)
caps_learn_3_F1 = report_obj4.get_f1( testY, y_pred)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))

time_caps_learn_3 = time.time()-start_time_caps_learn+preprocessing_time
print("Testing Time for "+modelName)
print("--- %s seconds ---" % (time_caps_learn_3))

Retrieving model: HPC_results/es_1_caps_learn_3
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
x (InputLayer)               (None, 11)                0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 11, 300)           60000000  
_________________________________________________________________
conv1 (Conv1D)               (None, 9, 256)            230656    
_________________________________________________________________
primarycap_conv1d (Conv1D)   (None, 7, 256)            196864    
_________________________________________________________________
primarycap_reshape (Reshape) (None, 224, 8)            0         
_________________________________________________________________
primarycap_squash (Lambda)   (None, 224, 8)            0         
_________________________________________________________________
nercaps (CapsuleLayer)      

In [24]:
start_time_caps_learn = time.time()

# # model A - Window11_glove50_learn_drop50_caps3_conv5_cos_pos_caps
modelName =  "HPC_results/"+LANGUAGE + "_" + TRAIN_AMOUNT + "_caps_learn_4"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

caps_learn_4_P = report_obj4.get_precision( testY, y_pred)
caps_learn_4_R = report_obj4.get_recall( testY, y_pred)
caps_learn_4_F1 = report_obj4.get_f1( testY, y_pred)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))

time_caps_learn_4 = time.time()-start_time_caps_learn+preprocessing_time
print("Testing Time for "+modelName)
print("--- %s seconds ---" % (time_caps_learn_4))

Retrieving model: HPC_results/es_1_caps_learn_4
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
x (InputLayer)               (None, 11)                0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 11, 300)           60000000  
_________________________________________________________________
conv1 (Conv1D)               (None, 9, 256)            230656    
_________________________________________________________________
primarycap_conv1d (Conv1D)   (None, 7, 256)            196864    
_________________________________________________________________
primarycap_reshape (Reshape) (None, 224, 8)            0         
_________________________________________________________________
primarycap_squash (Lambda)   (None, 224, 8)            0         
_________________________________________________________________
nercaps (CapsuleLayer)      

In [25]:
start_time_caps_learn = time.time()

# # model A - Window11_glove50_learn_drop50_caps3_conv5_cos_pos_caps
modelName =  "HPC_results/"+LANGUAGE + "_" + TRAIN_AMOUNT + "_caps_learn_5"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

caps_learn_5_P = report_obj4.get_precision( testY, y_pred)
caps_learn_5_R = report_obj4.get_recall( testY, y_pred)
caps_learn_5_F1 = report_obj4.get_f1( testY, y_pred)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))

time_caps_learn_5 = time.time()-start_time_caps_learn+preprocessing_time
print("Testing Time for "+modelName)
print("--- %s seconds ---" % (time_caps_learn_5))

Retrieving model: HPC_results/es_1_caps_learn_5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
x (InputLayer)               (None, 11)                0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 11, 300)           60000000  
_________________________________________________________________
conv1 (Conv1D)               (None, 9, 256)            230656    
_________________________________________________________________
primarycap_conv1d (Conv1D)   (None, 7, 256)            196864    
_________________________________________________________________
primarycap_reshape (Reshape) (None, 224, 8)            0         
_________________________________________________________________
primarycap_squash (Lambda)   (None, 224, 8)            0         
_________________________________________________________________
nercaps (CapsuleLayer)      

In [26]:
avg_time = (time_caps_learn_1+time_caps_learn_2+time_caps_learn_3+time_caps_learn_4+time_caps_learn_5)/5
print("Caps learn avg test time:\t",avg_time)

Caps learn avg test time:	 204.6618718624115


### with features

In [27]:
# # model A - Window11_glove50_learn_drop50_caps3_conv5_cos_pos_caps
# modelName = "glove_nolearn_" + LANGUAGE + "_" + TRAIN_AMOUNT + "_features"
# myModel = retrieve_model( modelName, modelDir)
# myModel.summary()

# raw_y_pred = myModel.predict( testX_dict)
# y_pred = eh.convert_raw_y_pred( raw_y_pred)

# # construct report object
# report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
# report_obj4.connect_to_dataClass( vocabData)

# print("Precision", report_obj4.get_precision( testY, y_pred))
# print("Recall\t", report_obj4.get_recall( testY, y_pred))
# print("F1\t", report_obj4.get_f1( testY, y_pred))
# cm = report_obj4.get_confusion_matrix( testY, y_pred)
# print()
# cm

### Model B (Model A + Decoder)

In [28]:
# # model B - Window11_glove50_learn_drop50_decode1000_caps3_conv5_cos_pos_caps
# # modelName = "Window11_glove50_learn_drop50_decode1000_caps3_conv5_cos_pos_caps"
# modelName = "glove_nolearn_base"
# myModel = retrieve_decoder_model( modelName, modelDir)
# myModel.summary()

# raw_y_pred, raw_eval_y_pred = myModel.predict( testX_dict)
# y_pred = eh.convert_raw_y_pred( raw_y_pred)

# # construct report object
# report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
# report_obj4.connect_to_dataClass( vocabData)

# print("Precision", report_obj4.get_precision( testY, y_pred))
# print("Recall\t", report_obj4.get_recall( testY, y_pred))
# print("F1\t", report_obj4.get_f1( testY, y_pred))
# #cm = report_obj4.get_confusion_matrix( testY, y_pred)
# #print()
# #cm

## CNN

### cnn nolearn

In [29]:
start_time_cnn_nolearn = time.time()

# CNN  -- Win11_cnn_glove50_learn_drop50_convtwo5_convone3_cos_pos_caps
modelName = "HPC_results/"+LANGUAGE + "_" + TRAIN_AMOUNT + "_cnn_nolearn_1"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

cnn_nolearn_1_P = report_obj4.get_precision( testY, y_pred)
cnn_nolearn_1_R = report_obj4.get_recall( testY, y_pred)
cnn_nolearn_1_F1 = report_obj4.get_f1( testY, y_pred)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))

time_cnn_nolearn_1 = time.time()-start_time_cnn_nolearn+preprocessing_time
print("Testing Time for "+modelName)
print("--- %s seconds ---" % (time_cnn_nolearn_1))



# cm = report_obj4.get_confusion_matrix( testY, y_pred)
# print()
# cm



Retrieving model: HPC_results/es_1_cnn_nolearn_1


W0530 14:27:38.580204 140050625660736 module_wrapper.py:139] From /home/andrew/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:3976: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.

W0530 14:27:38.582912 140050625660736 module_wrapper.py:139] From /home/andrew/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:131: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0530 14:27:38.583555 140050625660736 module_wrapper.py:139] From /home/andrew/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:133: The name tf.placeholder_with_default is deprecated. Please use tf.compat.v1.placeholder_with_default instead.

W0530 14:27:38.589493 140050625660736 deprecation.py:506] From /home/andrew/.local/lib/python3.7/site-packages/keras/backend/tensorflow_backend.py:3445: calling dropout (from tensorflow.python.ops.nn_ops) with keep_prob is deprecated and will be rem

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
x (InputLayer)               (None, 11)                0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 11, 300)           60000000  
_________________________________________________________________
conv1 (Conv1D)               (None, 9, 256)            230656    
_________________________________________________________________
conv2 (Conv1D)               (None, 7, 256)            196864    
_________________________________________________________________
conv3 (Conv1D)               (None, 5, 128)            98432     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 3, 128)            0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 3, 128)            0         
__________

In [30]:
start_time_cnn_nolearn = time.time()

# CNN  -- Win11_cnn_glove50_learn_drop50_convtwo5_convone3_cos_pos_caps
modelName = "HPC_results/"+LANGUAGE + "_" + TRAIN_AMOUNT + "_cnn_nolearn_2"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

cnn_nolearn_2_P = report_obj4.get_precision( testY, y_pred)
cnn_nolearn_2_R = report_obj4.get_recall( testY, y_pred)
cnn_nolearn_2_F1 = report_obj4.get_f1( testY, y_pred)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))

time_cnn_nolearn_2 = time.time()-start_time_cnn_nolearn+preprocessing_time
print("Testing Time for "+modelName)
print("--- %s seconds ---" % (time_cnn_nolearn_2))

Retrieving model: HPC_results/es_1_cnn_nolearn_2
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
x (InputLayer)               (None, 11)                0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 11, 300)           60000000  
_________________________________________________________________
conv1 (Conv1D)               (None, 9, 256)            230656    
_________________________________________________________________
conv2 (Conv1D)               (None, 7, 256)            196864    
_________________________________________________________________
conv3 (Conv1D)               (None, 5, 128)            98432     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 3, 128)            0         
_________________________________________________________________
dropout_1 (Dropout)        

In [31]:
start_time_cnn_nolearn = time.time()

# CNN  -- Win11_cnn_glove50_learn_drop50_convtwo5_convone3_cos_pos_caps
modelName = "HPC_results/"+LANGUAGE + "_" + TRAIN_AMOUNT + "_cnn_nolearn_3"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

cnn_nolearn_3_P = report_obj4.get_precision( testY, y_pred)
cnn_nolearn_3_R = report_obj4.get_recall( testY, y_pred)
cnn_nolearn_3_F1 = report_obj4.get_f1( testY, y_pred)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))

time_cnn_nolearn_3 = time.time()-start_time_cnn_nolearn+preprocessing_time
print("Testing Time for "+modelName)
print("--- %s seconds ---" % (time_cnn_nolearn_3))

Retrieving model: HPC_results/es_1_cnn_nolearn_3
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
x (InputLayer)               (None, 11)                0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 11, 300)           60000000  
_________________________________________________________________
conv1 (Conv1D)               (None, 9, 256)            230656    
_________________________________________________________________
conv2 (Conv1D)               (None, 7, 256)            196864    
_________________________________________________________________
conv3 (Conv1D)               (None, 5, 128)            98432     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 3, 128)            0         
_________________________________________________________________
dropout_1 (Dropout)        

In [32]:
start_time_cnn_nolearn = time.time()

# CNN  -- Win11_cnn_glove50_learn_drop50_convtwo5_convone3_cos_pos_caps
modelName = "HPC_results/"+LANGUAGE + "_" + TRAIN_AMOUNT + "_cnn_nolearn_4"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

cnn_nolearn_4_P = report_obj4.get_precision( testY, y_pred)
cnn_nolearn_4_R = report_obj4.get_recall( testY, y_pred)
cnn_nolearn_4_F1 = report_obj4.get_f1( testY, y_pred)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))

time_cnn_nolearn_4 = time.time()-start_time_cnn_nolearn+preprocessing_time
print("Testing Time for "+modelName)
print("--- %s seconds ---" % (time_cnn_nolearn_4))

Retrieving model: HPC_results/es_1_cnn_nolearn_4
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
x (InputLayer)               (None, 11)                0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 11, 300)           60000000  
_________________________________________________________________
conv1 (Conv1D)               (None, 9, 256)            230656    
_________________________________________________________________
conv2 (Conv1D)               (None, 7, 256)            196864    
_________________________________________________________________
conv3 (Conv1D)               (None, 5, 128)            98432     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 3, 128)            0         
_________________________________________________________________
dropout_1 (Dropout)        

In [33]:
start_time_cnn_nolearn = time.time()

# CNN  -- Win11_cnn_glove50_learn_drop50_convtwo5_convone3_cos_pos_caps
modelName = "HPC_results/"+LANGUAGE + "_" + TRAIN_AMOUNT + "_cnn_nolearn_5"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

cnn_nolearn_5_P = report_obj4.get_precision( testY, y_pred)
cnn_nolearn_5_R = report_obj4.get_recall( testY, y_pred)
cnn_nolearn_5_F1 = report_obj4.get_f1( testY, y_pred)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))

time_cnn_nolearn_5 = time.time()-start_time_cnn_nolearn+preprocessing_time
print("Testing Time for "+modelName)
print("--- %s seconds ---" % (time_cnn_nolearn_5))

Retrieving model: HPC_results/es_1_cnn_nolearn_5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
x (InputLayer)               (None, 11)                0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 11, 300)           60000000  
_________________________________________________________________
conv1 (Conv1D)               (None, 9, 256)            230656    
_________________________________________________________________
conv2 (Conv1D)               (None, 7, 256)            196864    
_________________________________________________________________
conv3 (Conv1D)               (None, 5, 128)            98432     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 3, 128)            0         
_________________________________________________________________
dropout_1 (Dropout)        

In [34]:
avg_time = (time_cnn_nolearn_1+time_cnn_nolearn_2+time_cnn_nolearn_3+time_cnn_nolearn_4+time_cnn_nolearn_5)/5
print("CNN nolearn avg test time:\t",avg_time)

CNN nolearn avg test time:	 187.24417552947997


### cnn learn

In [35]:
start_time_cnn_learn = time.time()

# CNN  -- Win11_cnn_glove50_learn_drop50_convtwo5_convone3_cos_pos_caps
modelName = "HPC_results/"+LANGUAGE + "_" + TRAIN_AMOUNT + "_cnn_learn_1"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

cnn_learn_1_P = report_obj4.get_precision( testY, y_pred)
cnn_learn_1_R = report_obj4.get_recall( testY, y_pred)
cnn_learn_1_F1 = report_obj4.get_f1( testY, y_pred)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))

time_cnn_learn_1 = time.time()-start_time_cnn_learn+preprocessing_time
print("Testing Time for "+modelName)
print("--- %s seconds ---" % (time_cnn_learn_1))

Retrieving model: HPC_results/es_1_cnn_learn_1
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
x (InputLayer)               (None, 11)                0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 11, 300)           60000000  
_________________________________________________________________
conv1 (Conv1D)               (None, 9, 256)            230656    
_________________________________________________________________
conv2 (Conv1D)               (None, 7, 256)            196864    
_________________________________________________________________
conv3 (Conv1D)               (None, 5, 128)            98432     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 3, 128)            0         
_________________________________________________________________
dropout_1 (Dropout)          

In [36]:
start_time_cnn_learn = time.time()

# CNN  -- Win11_cnn_glove50_learn_drop50_convtwo5_convone3_cos_pos_caps
modelName = "HPC_results/"+LANGUAGE + "_" + TRAIN_AMOUNT + "_cnn_learn_2"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

cnn_learn_2_P = report_obj4.get_precision( testY, y_pred)
cnn_learn_2_R = report_obj4.get_recall( testY, y_pred)
cnn_learn_2_F1 = report_obj4.get_f1( testY, y_pred)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))

time_cnn_learn_2 = time.time()-start_time_cnn_learn+preprocessing_time
print("Testing Time for "+modelName)
print("--- %s seconds ---" % (time_cnn_learn_2))

Retrieving model: HPC_results/es_1_cnn_learn_2
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
x (InputLayer)               (None, 11)                0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 11, 300)           60000000  
_________________________________________________________________
conv1 (Conv1D)               (None, 9, 256)            230656    
_________________________________________________________________
conv2 (Conv1D)               (None, 7, 256)            196864    
_________________________________________________________________
conv3 (Conv1D)               (None, 5, 128)            98432     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 3, 128)            0         
_________________________________________________________________
dropout_1 (Dropout)          

In [37]:
start_time_cnn_learn = time.time()

# CNN  -- Win11_cnn_glove50_learn_drop50_convtwo5_convone3_cos_pos_caps
modelName = "HPC_results/"+LANGUAGE + "_" + TRAIN_AMOUNT + "_cnn_learn_3"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

cnn_learn_3_P = report_obj4.get_precision( testY, y_pred)
cnn_learn_3_R = report_obj4.get_recall( testY, y_pred)
cnn_learn_3_F1 = report_obj4.get_f1( testY, y_pred)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))

time_cnn_learn_3 = time.time()-start_time_cnn_learn+preprocessing_time
print("Testing Time for "+modelName)
print("--- %s seconds ---" % (time_cnn_learn_3))

Retrieving model: HPC_results/es_1_cnn_learn_3
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
x (InputLayer)               (None, 11)                0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 11, 300)           60000000  
_________________________________________________________________
conv1 (Conv1D)               (None, 9, 256)            230656    
_________________________________________________________________
conv2 (Conv1D)               (None, 7, 256)            196864    
_________________________________________________________________
conv3 (Conv1D)               (None, 5, 128)            98432     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 3, 128)            0         
_________________________________________________________________
dropout_1 (Dropout)          

In [38]:
start_time_cnn_learn = time.time()

# CNN  -- Win11_cnn_glove50_learn_drop50_convtwo5_convone3_cos_pos_caps
modelName = "HPC_results/"+LANGUAGE + "_" + TRAIN_AMOUNT + "_cnn_learn_4"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

cnn_learn_4_P = report_obj4.get_precision( testY, y_pred)
cnn_learn_4_R = report_obj4.get_recall( testY, y_pred)
cnn_learn_4_F1 = report_obj4.get_f1( testY, y_pred)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))

time_cnn_learn_4 = time.time()-start_time_cnn_learn+preprocessing_time
print("Testing Time for "+modelName)
print("--- %s seconds ---" % (time_cnn_learn_4))

Retrieving model: HPC_results/es_1_cnn_learn_4
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
x (InputLayer)               (None, 11)                0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 11, 300)           60000000  
_________________________________________________________________
conv1 (Conv1D)               (None, 9, 256)            230656    
_________________________________________________________________
conv2 (Conv1D)               (None, 7, 256)            196864    
_________________________________________________________________
conv3 (Conv1D)               (None, 5, 128)            98432     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 3, 128)            0         
_________________________________________________________________
dropout_1 (Dropout)          

In [39]:
start_time_cnn_learn = time.time()

# CNN  -- Win11_cnn_glove50_learn_drop50_convtwo5_convone3_cos_pos_caps
modelName = "HPC_results/"+LANGUAGE + "_" + TRAIN_AMOUNT + "_cnn_learn_5"
myModel = retrieve_model( modelName, modelDir)
myModel.summary()

raw_y_pred = myModel.predict( testX_dict)
y_pred = eh.convert_raw_y_pred( raw_y_pred)

# construct report object
report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
report_obj4.connect_to_dataClass( vocabData)

cnn_learn_5_P = report_obj4.get_precision( testY, y_pred)
cnn_learn_5_R = report_obj4.get_recall( testY, y_pred)
cnn_learn_5_F1 = report_obj4.get_f1( testY, y_pred)

print("Precision", report_obj4.get_precision( testY, y_pred))
print("Recall\t", report_obj4.get_recall( testY, y_pred))
print("F1\t", report_obj4.get_f1( testY, y_pred))

time_cnn_learn_5 = time.time()-start_time_cnn_learn+preprocessing_time
print("Testing Time for "+modelName)
print("--- %s seconds ---" % (time_cnn_learn_5))

Retrieving model: HPC_results/es_1_cnn_learn_5
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
x (InputLayer)               (None, 11)                0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 11, 300)           60000000  
_________________________________________________________________
conv1 (Conv1D)               (None, 9, 256)            230656    
_________________________________________________________________
conv2 (Conv1D)               (None, 7, 256)            196864    
_________________________________________________________________
conv3 (Conv1D)               (None, 5, 128)            98432     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 3, 128)            0         
_________________________________________________________________
dropout_1 (Dropout)          

In [40]:
avg_time = (time_cnn_learn_1+time_cnn_learn_2+time_cnn_learn_3+time_cnn_learn_4+time_cnn_learn_5)/5
print("CNN learn avg test time:\t",avg_time)

CNN learn avg test time:	 187.86535434722902


## Results summary

In [41]:
print("MODEL NAME:\t"+modelName[:-2])

print("\ncaps no learn:")
avg_time = (time_caps_nolearn_1+time_caps_nolearn_2+time_caps_nolearn_3+time_caps_nolearn_4+time_caps_nolearn_5)/5
print("caps no learn avg test time:\t",avg_time)

Pavg = (caps_nolearn_1_P+caps_nolearn_2_P+caps_nolearn_3_P+caps_nolearn_4_P+caps_nolearn_5_P)/5
Pstd = np.std([caps_nolearn_1_P,caps_nolearn_2_P,caps_nolearn_3_P,caps_nolearn_4_P,caps_nolearn_5_P])

Ravg = (caps_nolearn_1_R+caps_nolearn_2_R+caps_nolearn_3_R+caps_nolearn_4_R+caps_nolearn_5_R)/5
Rstd = np.std([caps_nolearn_1_R,caps_nolearn_2_R,caps_nolearn_3_R,caps_nolearn_4_R,caps_nolearn_5_R])

F1avg = (caps_nolearn_1_F1+caps_nolearn_2_F1+caps_nolearn_3_F1+caps_nolearn_4_F1+caps_nolearn_5_F1)/5
F1std = np.std([caps_nolearn_1_F1,caps_nolearn_2_F1,caps_nolearn_3_F1,caps_nolearn_4_F1,caps_nolearn_5_F1])

print("avg P (std):\t", Pavg, Pstd)
print("avg R (std):\t", Ravg, Rstd)
print("avg F1 (std):\t", F1avg, F1std)




print("\ncaps learn:")
avg_time = (time_caps_learn_1+time_caps_learn_2+time_caps_learn_3+time_caps_learn_4+time_caps_learn_5)/5
print("caps learn avg test time:\t",avg_time)


Pavg = (caps_learn_1_P+caps_learn_2_P+caps_learn_3_P+caps_learn_4_P+caps_learn_5_P)/5
Pstd = np.std([caps_learn_1_P,caps_learn_2_P,caps_learn_3_P,caps_learn_4_P,caps_learn_5_P])

Ravg = (caps_learn_1_R+caps_learn_2_R+caps_learn_3_R+caps_learn_4_R+caps_learn_5_R)/5
Rstd = np.std([caps_learn_1_R,caps_learn_2_R,caps_learn_3_R,caps_learn_4_R,caps_learn_5_R])

F1avg = (caps_learn_1_F1+caps_learn_2_F1+caps_learn_3_F1+caps_learn_4_F1+caps_learn_5_F1)/5
F1std = np.std([caps_learn_1_F1,caps_learn_2_F1,caps_learn_3_F1,caps_learn_4_F1,caps_learn_5_F1])

print("avg P (std):\t", Pavg, Pstd)
print("avg R (std):\t", Ravg, Rstd)
print("avg F1 (std):\t", F1avg, F1std)



print("\ncnn no learn:")
avg_time = (time_cnn_nolearn_1+time_cnn_nolearn_2+time_cnn_nolearn_3+time_cnn_nolearn_4+time_cnn_nolearn_5)/5
print("CNN no learn avg test time:\t",avg_time)

Pavg = (cnn_nolearn_1_P+cnn_nolearn_2_P+cnn_nolearn_3_P+cnn_nolearn_4_P+cnn_nolearn_5_P)/5
Pstd = np.std([cnn_nolearn_1_P,cnn_nolearn_2_P,cnn_nolearn_3_P,cnn_nolearn_4_P,cnn_nolearn_5_P])

Ravg = (cnn_nolearn_1_R+cnn_nolearn_2_R+cnn_nolearn_3_R+cnn_nolearn_4_R+cnn_nolearn_5_R)/5
Rstd = np.std([cnn_nolearn_1_R,cnn_nolearn_2_R,cnn_nolearn_3_R,cnn_nolearn_4_R,cnn_nolearn_5_R])

F1avg = (cnn_nolearn_1_F1+cnn_nolearn_2_F1+cnn_nolearn_3_F1+cnn_nolearn_4_F1+cnn_nolearn_5_F1)/5
F1std = np.std([cnn_nolearn_1_F1,cnn_nolearn_2_F1,cnn_nolearn_3_F1,cnn_nolearn_4_F1,cnn_nolearn_5_F1])

print("avg P (std):\t", Pavg, Pstd)
print("avg R (std):\t", Ravg, Rstd)
print("avg F1 (std):\t", F1avg, F1std)



print("\ncnn learn:")
avg_time = (time_cnn_learn_1+time_cnn_learn_2+time_cnn_learn_3+time_cnn_learn_4+time_cnn_learn_5)/5

print("CNN learn avg test time:\t",avg_time)
Pavg = (cnn_learn_1_P+cnn_learn_2_P+cnn_learn_3_P+cnn_learn_4_P+cnn_learn_5_P)/5
Pstd = np.std([cnn_learn_1_P,cnn_learn_2_P,cnn_learn_3_P,cnn_learn_4_P,cnn_learn_5_P])

Ravg = (cnn_learn_1_R+cnn_learn_2_R+cnn_learn_3_R+cnn_learn_4_R+cnn_learn_5_R)/5
Rstd = np.std([cnn_learn_1_R,cnn_learn_2_R,cnn_learn_3_R,cnn_learn_4_R,cnn_learn_5_R])

F1avg = (cnn_learn_1_F1+cnn_learn_2_F1+cnn_learn_3_F1+cnn_learn_4_F1+cnn_learn_5_F1)/5
F1std = np.std([cnn_learn_1_F1,cnn_learn_2_F1,cnn_learn_3_F1,cnn_learn_4_F1,cnn_learn_5_F1])

print("avg P (std):\t", Pavg, Pstd)
print("avg R (std):\t", Ravg, Rstd)
print("avg F1 (std):\t", F1avg, F1std)

MODEL NAME:	HPC_results/es_1_cnn_learn

caps no learn:
caps no learn avg test time:	 205.34342050552368
avg P (std):	 0.7662874156598568 0.011100541936340943
avg R (std):	 0.8061893316076546 0.007373362689149126
avg F1 (std):	 0.7856271498589409 0.0029567169067408047

caps learn:
caps learn avg test time:	 204.6618718624115
avg P (std):	 0.8411550819489145 0.006581618242446676
avg R (std):	 0.8265461773134881 0.004599671630219009
avg F1 (std):	 0.8337551065512704 0.0023874731469809697

cnn no learn:
CNN no learn avg test time:	 187.24417552947997
avg P (std):	 0.7396268691148197 0.026603250159621027
avg R (std):	 0.7477812702227975 0.006758096102838481
avg F1 (std):	 0.7434400363292019 0.014356831532781692

cnn learn:
CNN learn avg test time:	 187.86535434722902
avg P (std):	 0.7971412330288754 0.009146449865964914
avg R (std):	 0.7587038920218174 0.010078242764579979
avg F1 (std):	 0.7773424476883172 0.0034973887839142696


### with features

In [42]:
# # CNN  -- Win11_cnn_glove50_learn_drop50_convtwo5_convone3_cos_pos_caps
# modelName = "glove_nolearn_cnn_" + LANGUAGE + "_" + TRAIN_AMOUNT + "_features"
# myModel = retrieve_model( modelName, modelDir)
# myModel.summary()

# raw_y_pred = myModel.predict( testX_dict)
# y_pred = eh.convert_raw_y_pred( raw_y_pred)

# # construct report object
# report_obj4 = eh.EvalDev_Report(modelName=modelName, y_true=testY, raw_y_pred=raw_y_pred, y_pred=y_pred) 
# report_obj4.connect_to_dataClass( vocabData)

# print("Precision", report_obj4.get_precision( testY, y_pred))
# print("Recall\t", report_obj4.get_recall( testY, y_pred))
# print("F1\t", report_obj4.get_f1( testY, y_pred))
# cm = report_obj4.get_confusion_matrix( testY, y_pred)
# print()
# cm