In [None]:
import numpy as np
import pandas as pd
import time
import os

In [None]:
def loadNLPVectors(filename):
    file = 'nlp_data/' + filename + '.npy'
    return np.load(file)

In [None]:
def loadLabels():
    return loadNLPVectors("labels")

In [None]:
from sklearn.model_selection import train_test_split
def genData(nlp):
    X_train, X_test, y_train, y_test = train_test_split(nlp, labels,
                                                        test_size = 0.2,
                                                        random_state = 42,
                                                        shuffle = True,
                                                        stratify = labels)
    
    train = [X_train, y_train]
    test = [X_test, y_test]
    
    return train, test

# Load NLP Data

In [None]:
unigram_array = "feature_array_unigram"
bigram_array = "feature_array_bigram"
tfidf_array = "feature_array_tfidf"
wordvec_array = "feature_array_word2vec"
unigram_reduced = "reduced_unigram"
bigram_reduced = "reduced_bigram"
tfidf_reduced = "reduced_tfidf"

In [None]:
unigram = loadNLPVectors(unigram_array)
bigram = loadNLPVectors(bigram_array)
tfidf = loadNLPVectors(tfidf_array)
word2vec = loadNLPVectors(wordvec_array)
reduced_unigram = loadNLPVectors(unigram_reduced)
reduced_bigram = loadNLPVectors(bigram_reduced)
reduced_tfidf = loadNLPVectors(tfidf_reduced)
labels = loadLabels()

# Generate Training and Testing Data

In [None]:
train_uni, test_uni = genData(unigram)

In [None]:
train_big, test_big = genData(bigram)

In [None]:
train_tfidf, test_tfidf = genData(tfidf)

In [None]:
train_vec, test_vec = genData(word2vec)

In [None]:
train_runi, test_runi = genData(reduced_unigram)

In [None]:
train_rbig, test_rbig = genData(reduced_bigram)

In [None]:
train_rtfidf, test_rtfidf = genData(reduced_tfidf)

# Keras - Neural Networks

In [None]:
from tensorflow.keras import models
from tensorflow.keras import layers

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

In [None]:
def plotLoss(hist, epoch_range):
    
    validation_loss = hist.history['val_loss']
    validation_acc = hist.history['val_accuracy']
    
    epochs = range(1, epoch_range + 1)
    
    f = plt.figure(1)
    plt.title("Loss")
    plt.plot(epochs, validation_loss, 'bo')
    plt.xlabel('Epochs')
    plt.ylabel('Validation Loss')
    
    g = plt.figure(2)
    plt.title("Accuracy")
    plt.plot(epochs, validation_acc, 'ro')
    plt.xlabel('Epochs')
    plt.ylabel('Validation Accuracy')
    
    plt.show()

In [None]:
def evaluate(model, history, test, epoch_range, nn_name, model_name):
    X_test = test[0]
    y_test = test[1]
    
    pred_sigmoid = model.predict(X_test)
    pred = []
    for p in pred_sigmoid:
        if p < .5:
            pred.append(0)
        else:
            pred.append(1)
    y_pred = np.asarray(pred)
    
    acc = accuracy_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_pred)
    f = f1_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    
    print(nn_name + ' ' + model_name + ' Evaluation: ')
    print('Accuracy:       ', acc)
    print('ROC AUC Score:  ', roc_auc)
    print('F1 Score:       ', f)
    print('Precision:      ', precision)
    print('Recall:         ', recall)
    
    plotLoss(history, epoch_range)

# Generate Sequential Neural Network

In [None]:
def DNN(train, test, iterations = 10):
    X_train = train[0]
    y_train = train[1]
    
    X_test = test[0]
    y_test = test[1]
    
    #~13000, ~80000, 500, 200, 50
    dim = len(X_train[0])
    
    model = models.Sequential()
    if dim > 15000:
        model.add(layers.Dense(500, activation = 'relu', input_shape = (dim,)))
        model.add(layers.Dropout(0.5))
        model.add(layers.Dense(64, activation = 'relu'))
        model.add(layers.Dropout(0.5))
        model.add(layers.Dense(4, activation = 'relu'))
    elif dim > 10000:
        model.add(layers.Dense(200, activation = 'relu', input_shape = (dim,)))
        model.add(layers.Dropout(0.5))
        model.add(layers.Dense(32, activation = 'relu'))
        model.add(layers.Dropout(0.5))
        model.add(layers.Dense(4, activation = 'relu'))
    elif dim == 500:
        model.add(layers.Dense(64, activation = 'relu', input_shape = (dim,)))
        model.add(layers.Dropout(0.5))
        model.add(layers.Dense(16, activation = 'relu'))
    elif dim == 200:
        model.add(layers.Dense(32, activation = 'relu', input_shape = (dim,)))
        model.add(layers.Dropout(0.5))
        model.add(layers.Dense(4, activation = 'relu'))
    elif dim == 50:
        model.add(layers.Dense(16, activation = 'relu', input_shape = (dim,)))
        model.add(layers.Dropout(0.5))
        model.add(layers.Dense(4, activation = 'relu', input_shape = (dim, )))
    model.add(layers.Dense(1, activation = 'sigmoid'))
    
    model.summary()
    model.compile(loss = 'binary_crossentropy',
                  optimizer = 'adam',
                  metrics = ['accuracy'])
    
    start = time.time()
    history = model.fit(X_train, y_train,
                        epochs = iterations,
                        batch_size = 128,
                        validation_data = (X_test, y_test))
    end = time.time() - start
    
    test = model.evaluate(X_test, y_test)
    
    print("Training Time:  ", end)
    print("Model Loss:     ", test[0])
    print("Model Accuracy: ", test[1])
    
    return model, history

# Generate Convolutional Neural Network

In [None]:
def CNN():
    return

# Generate Recurrent Neural Network

In [None]:
def RNN():
    return

# Training Deep Neural Networks

### DNN Reduced Unigram

In [None]:
epoch_runi = 50
dnn_runi, dnn_runi_history = DNN(train_runi, test_runi, epoch_runi)

### DNN Reduced Bigram

In [None]:
epoch_rbig = 20
dnn_rbig, dnn_rbig_history = DNN(train_rbig, test_rbig, epoch_rbig)

### DNN Reduced TFIDF

In [None]:
epoch_rtfidf = 50
dnn_rtfidf, dnn_rtfidf_history = DNN(train_rtfidf, test_rtfidf, epoch_rtfidf)

### DNN Word2Vec

In [None]:
epoch_vec = 50
dnn_vec, dnn_vec_history = DNN(train_vec, test_vec, epoch_vec)

### DNN Unigram

In [None]:
epoch_uni = 50
dnn_uni, dnn_uni_history = DNN(train_uni, test_uni, epoch_uni)

### DNN Bigram

In [None]:
epoch_big = 50
dnn_big, dnn_big_history = DNN(train_big, test_big, epoch_big)

### DNN TFIDF

In [None]:
epoch_tfidf = 50
dnn_tfidf, dnn_tfidf_history = DNN(train_tfidf, test_tfidf, epoch_tfidf)

## Evaluating Deep Neural Networks

In [None]:
evaluate(dnn_uni, dnn_uni_history, test_uni, epoch_uni, 'DNN', 'Unigram')

In [None]:
evaluate(dnn_big, dnn_big_history, test_big, epoch_big, 'DNN', 'Bigram')

In [None]:
evaluate(dnn_tfidf, dnn_tfidf_history, test_tfidf, epoch_tfidf, 'DNN', 'TFIDF')

In [None]:
evaluate(dnn_runi, dnn_runi_history, test_runi, epoch_runi, 'DNN', 'Reduced Unigram')

In [None]:
evaluate(dnn_rbig, dnn_rbig_history, test_rbig, epoch_rbig, 'DNN', 'Reduced Bigram')

In [None]:
evaluate(dnn_rtfidf, dnn_rtfidf_history, test_rtfidf, epoch_rtfidf, 'DNN', 'Reduced TFIDF')

In [None]:
evaluate(dnn_vec, dnn_vec_history, test_vec, epoch_vec, 'DNN', 'Word2Vec')

# Training and Evaluating Convolutional Neural Networks

### CNN Reduced Unigram

### CNN Reduced Bigram

### CNN Reduced TFIDF

### CNN Word2Vec

### CNN Unigram

### CNN Bigram

### CNN TFIDF

# Training and Evaluating Recurrent Neural Networks

### RNN Reduced Unigram

### RNN Reduced Bigram

### RNN Reduced TFIDF

### RNN Word2Vec

### RNN Unigram

### RNN Bigram

### RNN TFIDF