In [1]:
import numpy as np
import pandas as pd
import time
import os

In [2]:
from sklearn.model_selection import train_test_split

In [3]:
def loadNLPVectors(filename):
    file = 'nlp_data/' + filename + '.npy'
    return np.load(file)

In [4]:
def loadLabels():
    return loadNLPVectors("labels")

In [5]:
def createTestData(nlp):
    X_train, X_test, y_train, y_test = train_test_split(nlp, 
                                                        labels, 
                                                        test_size = 0.2, 
                                                        random_state = 42, 
                                                        shuffle = True, 
                                                        stratify = labels)
    return X_test, y_test

# Load NLP Data

In [6]:
unigram_array = "feature_array_unigram"
bigram_array = "feature_array_bigram"
tfidf_array = "feature_array_tfidf"
wordvec_array = "feature_array_word2vec"
unigram_reduced = "reduced_unigram"
bigram_reduced = "reduced_bigram"
tfidf_reduced = "reduced_tfidf"

In [7]:
unigram = loadNLPVectors(unigram_array)
bigram = loadNLPVectors(bigram_array)
tfidf = loadNLPVectors(tfidf_array)
word2vec = loadNLPVectors(wordvec_array)
reduced_unigram = loadNLPVectors(unigram_reduced)
reduced_bigram = loadNLPVectors(bigram_reduced)
reduced_tfidf = loadNLPVectors(tfidf_reduced)
labels = loadLabels()

# Create Test Datasets

In [8]:
x_uni, y_uni = createTestData(unigram)

In [9]:
x_big, y_big = createTestData(bigram)

In [10]:
x_tfidf, y_tfidf = createTestData(tfidf)

In [11]:
x_vec, y_vec = createTestData(word2vec)

In [12]:
x_runi, y_runi = createTestData(reduced_unigram)

In [13]:
x_rbig, y_rbig = createTestData(reduced_bigram)

In [14]:
x_rtfidf, y_rtfidf = createTestData(reduced_tfidf)

# Import Models

In [15]:
from sklearn.externals import joblib

In [16]:
feature_folders = ['unigram', 'bigram', 'tfidf', 'word2vec', 'reduced_unigram', 'reduced_bigram', 'reduced_tfidf']
classifiers = ['rand_forest', 'log_reg', 'lin_reg', 'naive_bayes', 'svm']

In [17]:
def loadModel(nlp_index, clf_index):
    model_path = 'models/' + feature_folders[nlp_index] + '/' + classifiers[clf_index] + '.pkl'
    model = joblib.load(model_path)
    return model

# Hyperparameter Optimization