In [5]:
from importlib import reload
import utils
import models
from time import time
import warnings
warnings.filterwarnings('ignore')

In [2]:
# load the corpus
corpus = utils.AMI_Corpus(seed = 75, embed_vec = 'glove300')

Loading corpus from /home/jake_miller/final/nlp-final-project/data
Begin corpus post-processing ...
Splitting corpus into training and test ...
Creating vocabulary from training set ...
Found 8435 unique words.
Building initial embedding matrix ...
(8437, 300)
loading pretrained vectors from glove.6B.300d.txt


# First Up: LSTM-Softmax

In [10]:
def run_lstm_model():
    
    from tensorflow.keras.callbacks import EarlyStopping

    BATCH_SIZE = 512
    EPOCHS = 25

    # choose hyperparameters
    num_layers = 3 
    hidden_state_size = 150
    dropout_rate = 0.4
    bidirectional = True
    stateful = False
    trainable_embed = True

    lstm = models.LSTMSoftmax(corpus,
                              batch_size = BATCH_SIZE,
                              num_layers = num_layers,
                              dropout_rate = dropout_rate,
                              hidden_state_size = hidden_state_size,
                              stateful = stateful, bidirectional = bidirectional, trainable_embed = trainable_embed)

    lstm.model.compile(optimizer = 'adagrad', metrics = ['acc'], loss = 'categorical_crossentropy')

    # create our generators
    ug_train = utils.UtteranceGenerator(corpus, "train", batch_size = BATCH_SIZE, algo = "LSTM_Soft")
    ug_val = utils.UtteranceGenerator(corpus, "val", batch_size = BATCH_SIZE, algo = "LSTM_Soft")
    ug_test = utils.UtteranceGenerator(corpus, "test", batch_size = BATCH_SIZE, algo = "LSTM_Soft")

    # create keras callbacks
    es = EarlyStopping(monitor='val_loss', patience=5, verbose=0, restore_best_weights = True)

    train_start = time()
    
    # note to self, maybe change validation_steps and validation_freq
    history = lstm.model.fit_generator(ug_train, epochs=EPOCHS, verbose=1, callbacks = [es], validation_data=ug_val, 
                                       use_multiprocessing=False, shuffle=True)
    train_end = time()
    
    results = lstm.model.evaluate_generator(ug_test)
    
    test_end = time()
    

    return [(train_end - train_start), (test_end - train_end)] + results


In [12]:
lstm_results = run_lstm_model()

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25


# Next, CNN model

In [16]:
def run_cnn_model():
    
    from tensorflow.keras.callbacks import EarlyStopping

    BATCH_SIZE = 512
    EPOCHS = 25

    # choose hyperparameters
    d1 = 0
    d2 = 2
    filters = 200
    kernel_size = 3
    hidden_units = 140
    dropout_rate = 0.5
    trainable_embed = False
    embed_vec = 'glove300'

    cnn = models.CNN(corpus = corpus, d1 = d1, d2 = d2,
                       batch_size = BATCH_SIZE,
                       filters = filters,
                       kernel_size = kernel_size,
                       hidden_units = hidden_units,
                       dropout_rate = dropout_rate,
                       trainable_embed = trainable_embed)

    cnn.model.compile(optimizer = 'adagrad', metrics = ['acc'], loss = 'categorical_crossentropy')

    # create our generators
    ug_train = utils.UtteranceGenerator(corpus, "train", batch_size = BATCH_SIZE, algo = "CNN", sequence_length = 3)
    ug_val = utils.UtteranceGenerator(corpus, "val", batch_size = BATCH_SIZE, algo = "CNN", sequence_length = 3)
    ug_test = utils.UtteranceGenerator(corpus, "test", batch_size = BATCH_SIZE, algo = "CNN", sequence_length = 3)

    # create keras callbacks
    es = EarlyStopping(monitor='val_loss', patience=5, verbose=0, restore_best_weights = True)

    train_start = time()
    
    # note to self, maybe change validation_steps and validation_freq
    history = cnn.model.fit_generator(ug_train, epochs=EPOCHS, verbose=1, callbacks = [es], validation_data=ug_val, 
                                       use_multiprocessing=False, shuffle=True)
    train_end = time()
    
    results = cnn.model.evaluate_generator(ug_test)
    
    test_end = time()
    
    return [(train_end - train_start), (test_end - train_end)] + results

In [17]:
cnn_results = run_cnn_model()

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25


# Model 3, the LSTM-CRF

In [31]:
def run_lstmcrf_model():
    
    from models_crf import BiLSTMCRF, UtteranceGenerator
    from keras.callbacks import EarlyStopping

    BATCH_SIZE = 512
    EPOCHS = 25

    # choose hyperparameters
    num_layers = 1
    hidden_state_size = 300
    dropout_rate = 0.4
    bidirectional = True
    trainable_embed = True
    embed_vec = 'glove300'
    sequence_length = 7

    lstm_crf = BiLSTMCRF(corpus, batch_size = BATCH_SIZE,
                         sequence_length = sequence_length,
                         num_layers = num_layers,
                         dropout_rate = dropout_rate,
                         hidden_state_size = hidden_state_size,
                         bidirectional = bidirectional, 
                         trainable_embed = trainable_embed)

    lstm_crf.compile()

    # create our generators
    ug_train = UtteranceGenerator(corpus, "train", batch_size = BATCH_SIZE, algo = "LSTM_CRF", sequence_length = sequence_length)
    ug_val = UtteranceGenerator(corpus, "val", batch_size = BATCH_SIZE, algo = "LSTM_CRF", sequence_length = sequence_length)
    ug_test = UtteranceGenerator(corpus, "test", batch_size = BATCH_SIZE, algo = "LSTM_CRF", sequence_length = sequence_length)

    # create keras callbacks
    es = EarlyStopping(monitor='val_loss', patience=5, verbose=0, restore_best_weights = True)

    train_start = time()
    
    # note to self, maybe change validation_steps and validation_freq
    history = lstm_crf.model.fit_generator(ug_train, epochs=EPOCHS, verbose=1, callbacks = [es], validation_data=ug_val, 
                                       use_multiprocessing=False, shuffle=True)
    train_end = time()
    
    results = lstm_crf.model.evaluate_generator(ug_test)
    
    test_end = time()
    
    return [(train_end - train_start), (test_end - train_end)] + results

In [32]:
lstm_crf = run_lstmcrf_model()

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25


# Naive Bayes, just because

In [47]:
reload(models)
def mnb():
    mnb = models.NaiveBayes(corpus, ngram_range = (1,2), tfidf = True)
    
    train_start = time()
    
    mnb.train()
    
    train_end = time()
    
    acc = mnb.eval_on_test()
    
    test_end = time()
    
    return [(train_end - train_start), (test_end - train_end), np.nan, acc]

In [48]:
nb_results = mnb()

# Comparison

In [50]:
import numpy as np
import pandas as pd
pd.DataFrame({"LSTM-Softmax":lstm_results, "CNN":cnn_results, "LSTM-CRF":lstm_crf,
              "Naive Bayes":nb_results}).T

Unnamed: 0,0,1,2,3
LSTM-Softmax,223.217211,1.126713,1.031624,0.645404
CNN,70.461788,0.338728,1.081208,0.62574
LSTM-CRF,319.369356,1.696913,1.009526,0.623276
Naive Bayes,0.06576,0.015556,,0.475121
