This contains neural network code. Here we build a 1D-CNN and a LSTM architecture and use it to classify each of our embedding models. This follows much the same logic as '04 machine learning' notebooks. I use Keras with a Tensorflow backend. I use code from the utility, feature_engineering, preprocessing and embeddings scripts

In [1]:
import sys
sys.path.insert(0, 'scripts/')

In [2]:
import utility
import feature_engineering as fe
import preprocessing as pp
import embeddings as em

[nltk_data] Error loading punkt: <urlopen error [Errno 8] nodename nor
[nltk_data]     servname provided, or not known>
[nltk_data] Error loading stopwords: <urlopen error [Errno 8] nodename
[nltk_data]     nor servname provided, or not known>
[nltk_data] Error loading stopwords: <urlopen error [Errno 8] nodename
[nltk_data]     nor servname provided, or not known>
[nltk_data] Error loading wordnet: <urlopen error [Errno 8] nodename
[nltk_data]     nor servname provided, or not known>


In [3]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.metrics import roc_auc_score
import nltk
from nltk import word_tokenize
from gensim.models.word2vec import Word2Vec
from gensim.models import FastText
import functools
from sklearn.metrics import roc_auc_score

In [4]:
from gensim.models.keyedvectors import KeyedVectors
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.optimizers import RMSprop
from keras.layers import Dense, Flatten, LSTM, Conv1D, MaxPooling1D
from keras.layers import Dropout, Activation, GlobalMaxPooling1D, GlobalAveragePooling1D, Bidirectional
from keras.layers.embeddings import Embedding
from keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer

Using TensorFlow backend.


First we set up all the neccessary parameters as well as the parameter lists to be used in the random search parameter tuning

In [5]:
trials = 1
cbowTrain = 0
sgTrain = 1
wordToken = False
charToken = True
m1EClean=['Tokens', 'Lemma', 'Stopwords', 'Phrases', 'Emoticons']
m2EClean=['Tokens', 'Lemma', 'Stopwords', 'Phrases']
m1LClean=['Tokens', 'Lemma', 'Stopwords', 'Phrases', 'Lowercase']
m2LClean=['Tokens', 'Lemma', 'Stopwords', 'Phrases']
gloveInFileName = 'data/glove/glove.twitter.27B.'
gloveOutFileName = 'gensim_glove_vectors_'
gloveDim = ['25d.txt', '50d.txt', '100d.txt', '200d.txt']
fileNameSchiz1 = 'data/dataOut/schiz/annFinalSchiz_1.csv'
fileNameSchiz2 = 'data/dataOut/schiz/annFinalSchiz_2.csv'
fileNameStig = 'data/dataOut/stigma/annFinalStig.csv'

In [6]:
socialDf = pd.read_csv(fileNameSchiz1, encoding='utf-8')
textSchiz1 = socialDf['Tweet']
labelsSchiz1 = socialDf['Classification']

In [7]:
socialDf = pd.read_csv(fileNameSchiz2, encoding='utf-8')
textSchiz2 = socialDf['Tweet']
labelsSchiz2 = socialDf['Classification']

In [8]:
socialDf = pd.read_csv(fileNameStig, encoding='utf-8')
textStigma = socialDf['Tweet']
labelsStigma = socialDf['Classification']

In [9]:
'''
Set up the various parameters to be optimized over
'''

dropout = [0.0, 0.1, 0.2, 0.3, 0.4, 0.6]
neurons = [5, 10, 20, 50, 100]
activation = ['softmax', 'relu', 'tanh', 'sigmoid']
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adam']

cnnParamGrid = dict(numFilters=[32, 64, 128],
                  kernelSize=[3, 5, 10],
                 dropout=dropout,
                 neurons=neurons,
                 activation1=['relu', 'sigmoid', 'softmax'],
                 activation2=['relu', 'sigmoid', 'softmax'],
                 optimizer=optimizer)

In [10]:
'''The code in this cell is taken from
https://datascience.stackexchange.com/questions/45165/how-to-get-accuracy-f1-precision-and-recall-for-a-keras-model
it calculates metrics for the neural networks
'''

from keras import backend as K

def recall_m(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

def precision_m(y_true, y_pred):
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))


def auroc(y_true, y_pred):
    return tf.py_func(roc_auc_score, (y_true, y_pred), tf.double)

def auc(y_true, y_pred):
     auc = tf.metrics.auc(y_true, y_pred)[1]
     K.get_session().run(tf.local_variables_initializer())
     return auc

Below we define three models, the lSTM bidirectional model was not used in the paper, just the lSTM and CNN. These models are then used to fit and train on our embedding models

In [11]:
def getLSTMBidrectional(embeddingsMatrix, vocabDim, embeddingDim, sequenceDim, activation='sigmoid', train=False, dropout=0.3, 
            reDropout=0.3, optimizer='RMSprop', neurons=100, metrics=['acc',f1_m,precision_m, recall_m]):

    model = Sequential()
    model.add(Embedding(vocabDim, embeddingDim, weights=[embeddingsMatrix], input_length=sequenceDim, trainable=train))
    model.add(Bidirectional(LSTM(neurons, dropout=dropout, recurrent_dropout=reDropout, return_sequences=False)))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=metrics)
    
    return model

'''
LSTM model from here
code has been adapted from here
https://towardsdatascience.com/multi-class-text-classification-with-lstm-1590bee1bd17
'''
def getLSTMModel(embeddingsMatrix, vocabDim, embeddingDim, sequenceDim, activation='sigmoid', train=False, dropout=0.3, 
            reDropout=0.3, optimizer='RMSprop', neurons=100, metrics=['acc',f1_m,precision_m, recall_m]):

    model = Sequential()
    model.add(Embedding(vocabDim, embeddingDim, weights=[embeddingsMatrix], input_length=sequenceDim, trainable=train))
    model.add(LSTM(neurons, dropout=dropout, recurrent_dropout=reDropout))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=metrics)
    
    return model


'''
1DCNN models with one CNN layer - code has been adapted from 
https://realpython.com/python-keras-text-classification/#convolutional-neural-networks-cnn
'''
def getCNNModel(embeddingsMatrix, vocabDim, embeddingDim,  sequenceDim, kernelSize=3, 
                numFilters=64, activation1='relu', activation2='sigmoid', dropout=0.1, 
                neurons=20, activation3=None, optimizer='SGD', train=False, metrics=['acc',f1_m,precision_m, recall_m]):
    
    model = Sequential()
    model.add(Embedding(vocabDim, embeddingDim, weights=[embeddingsMatrix], input_length=sequenceDim, trainable=train))
    model.add(Conv1D(numFilters, kernelSize, activation=activation1))
    model.add(GlobalMaxPooling1D())
    model.add(Dropout(dropout))
    model.add(Dense(neurons, activation=activation1))
    model.add(Dense(1, activation=activation2))
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=metrics)
    
    return model

'''
performs the random parameter tuning uses keras classifier to wrap around the scikkit-learn tuning module
'''
def getRandom(nn, paramGrid, xTrain, yTrain, xTest, yTest, epochs=50, batch=10, cv=5, niter=50):

    model = KerasClassifier(build_fn=nn, epochs=epochs, batch_size=batch, verbose=False)
    random = RandomizedSearchCV(estimator=model, param_distributions=paramGrid, cv=cv, verbose=1, n_iter=niter)
    
    random = random.fit(xTrain, yTrain)
    testAccuracy = random.score(xTest, yTest)

    return (random, testAccuracy)
    
'''
fit the model to train data
'''   
def fitModel(model, xTrain, yTrain, xTest, yTest, epochs=100):
    
    model.fit(xTrain, yTrain, epochs=epochs, validation_split=0.1)
    loss, accuracy, f1_score, precision, recall = model.evaluate(xTest, yTest, verbose=False)
    
    return (loss, accuracy, f1_score, precision, recall)


In [12]:
'''
initiialize preprocessing
'''

def initializePreProcessing(text, tokenType, cleanMethods=['Tokens', 'Lemma', 'Stopwords', 'Phrases', 'Emoticons']):

    social = pp.SocialPreProcessing(text, tokenType)
    socialClean = social.clean(cleanMethods)
    
    return socialClean

'''
get features and pad each tweet to a length of 200. Anything longer gets truncated
'''
def getFeatures(embed, wIndex, tokens, labels):
    
    indexData = embed.getIndexData(tokens, labels, wIndex)
    
    features = indexData[0]
    labels = indexData[1]
    
    features = pad_sequences(features, maxlen=200, dtype='int32', padding='pre', truncating='pre', value=0.0)
    
    return (features, labels)

'''
initialize the tuning of parameters
'''
def getTune(**kwargs):
    
    for p in params:
        p.update(kwargs)
        
    evaluate = map(lambda m, p, x, y: getRandom(m, p, x[0], x[1], y[0], y[1]), cnn, params, train, test)
    
    return evaluate
        
    
'''
Get the data and preproess, load embedding models, convert train and test data into embeddings. 
Perform parameter tuning if tune==True. Fit model and call NN classifiers
'''
def getNeuralClassifiction(libraries, names, paths, textTrain, labelsTrain, textTest, labelsTest, methods, 
    tokenTypes=[False], cleanSchedule=None, models=None, retrain=False, tune=False, params=None):
    
    tokensTrain = list(map(initializePreProcessing, [textTrain]*len(names), tokenTypes, cleanSchedule))
    tokensTest = list(map(initializePreProcessing, [textTest]*len(names), tokenTypes, cleanSchedule))
    
    if models == None:
        models = list(map(lambda x, y: em.EmbeddingModel(library=x, name=y), libraries, names))
        for m, p in zip(models, paths):
            m.load(p)
    else:
        models = list(map(lambda x, y: em.EmbeddingModel(model=x, name=y), models, names))
    
    wIndicies = [embed.getWordIndex() for embed in models]
    embeddings = [embed.model.wv.vectors for embed in models]
    
    train = list(map(lambda x, y, z: getFeatures(x, y, z.tolist(), labelsTrain), models, wIndicies, tokensTrain))
    test =  list(map(lambda x, y, z: getFeatures(x, y, z.tolist(), labelsTest), models, wIndicies, tokensTest))

    seqDims = [len(x[0][0]) for x in train]
    vocabDims = [len(w) for w in wIndicies]
    embedDims = [len(m[0]) for m in embeddings]
    
    if tune:
        evaluate = getTune(params, embeddings=embeddings, seqDim=[seqDims[0]], vocabDim=[vocabDims[0]], embedDim=[embedDims[0]])
    else:
        models = map(lambda m, x, y, z, k: m(x,y,z,k, train=retrain), methods, embeddings, vocabDims, embedDims, seqDims)
        evaluate = map(lambda m, x, y: fitModel(m, x[0], x[1], y[0], y[1]), models, train, test)
    
    return list(evaluate)



In [13]:
'''
get the average results for each metric across each classifier
'''

def getAverage(results):
    results = list(zip(*results))
    results = [[np.array(metric).mean() for metric in list(zip(*model))]for model in results]
    
    return results

'''
evaluate each model and save down in csv
'''
def getEvalTrials(results, names, path):

    results = getAverage(results)
    
    accuracy = [results[m][1] for m in range(len(results))]
    precision = [results[m][2] for m in range(len(results))]
    recall = [results[m][3] for m in range(len(results))]
    f1 = [results[m][4] for m in range(len(results))]
        

    evalDict = {'accuracy':accuracy, 'precision':precision, 'recall':recall, 'f1':f1}
    evalDf = pd.DataFrame(evalDict, index=names)

    evalDf.to_csv(path + 'Summary' + '.csv')

In [14]:
'''
initiate getNeuralClassification function and then call and run for number of trials.
'''
def getSchizResults(libraries, modelNames, paths, textSchiz1, labelsSchiz1, 
                                 textSchiz2, labelsSchiz2, methods, token, cleanSchedule, resultsPath, models=None, retrain=False):
    
    tokenTypes = utility.getTokenTypes(token, modelNames)
    x = functools.partial(getNeuralClassifiction, libraries, modelNames, paths, textSchiz1, labelsSchiz1, 
                                 textSchiz2, labelsSchiz2, methods, tokenTypes, cleanSchedule, models, retrain=False)
    
    resultsSchiz = [x() for i in range(trials)]
    
    getEvalTrials(resultsSchiz, models, resultsPath)
    
    return resultsSchiz

### Emoticon Models - All

Perform classification for all data and data with emoticons removed. Logic follows the same as 04 Machine learning. Do word and characters each for LSTM and CNN and then do Glove word once for each as well. First lets look at words

In [15]:
emWordModels = ['w2vWCBSchiz', 'w2vWCBEmSchiz', 'w2vWSGSchiz', 'w2vWSGEmSchiz', 'ftWCBSchiz', 'ftWCBEmSchiz', 'ftWSGSchiz', 'ftWSGEmSchiz']
emCharModels = ['w2vCCBSchiz', 'w2vCCBEMSchiz', 'w2vCSGSchiz', 'w2vCSGEmSchiz', 'ftCCBSchiz', 'ftCCBEMSchiz', 'ftCSGSchiz', 'ftCSGEmSchiz']
path1 = ['embeddings/Word2Vec/schiz/00 emoticons/all/']
path2 = ['embeddings/FastText/schiz/00 emoticons/all/']
paths = (path1*4) + (path2*4)
libraries = [Word2Vec, Word2Vec, Word2Vec, Word2Vec, FastText, FastText, FastText, FastText]
cleanSchedule = [m1EClean, m2EClean, m1EClean, m2EClean]*2
paramGrid = dict(dropout=dropout, reDropout=dropout, neurons=neurons, optimizer=optimizer)

In [16]:
#LSTM Model
resultsPath = 'results/neural embeddings/nn/all/lstm/'
methods = [getLSTMModel]*len(emWordModels)
resultLSTMWordSchiz = getSchizResults(libraries, emWordModels, paths, textSchiz1, labelsSchiz1, 
                                 textSchiz2, labelsSchiz2, methods, wordToken, cleanSchedule, resultsPath, retrain=False)

Tokens
Lemma
Stopwords
Phrases
Emoticons
Tokens
Lemma
Stopwords
Phrases
Tokens
Lemma
Stopwords
Phrases
Emoticons
Tokens
Lemma
Stopwords
Phrases
Tokens
Lemma
Stopwords
Phrases
Emoticons
Tokens
Lemma
Stopwords
Phrases
Tokens
Lemma
Stopwords
Phrases
Emoticons
Tokens
Lemma
Stopwords
Phrases
Tokens
Lemma
Stopwords
Phrases
Emoticons
Tokens
Lemma
Stopwords
Phrases
Tokens
Lemma
Stopwords
Phrases
Emoticons
Tokens
Lemma
Stopwords
Phrases
Tokens
Lemma
Stopwords
Phrases
Emoticons
Tokens
Lemma
Stopwords
Phrases
Tokens
Lemma
Stopwords
Phrases
Emoticons
Tokens
Lemma
Stopwords
Phrases


FileNotFoundError: [Errno 2] No such file or directory: 'embeddings/Word2Vec/schiz/00 emoticons/all/w2vWCBSchiz'

In [None]:
#CNN Model
resultsPath = 'results/neural embeddings/nn/all/cnn/'
methods = [getCNNModel]*len(emWordModels)
resultCNNWordSchiz = getSchizResults(libraries, emWordModels, paths, textSchiz1, labelsSchiz1, 
                                 textSchiz2, labelsSchiz2, methods, wordToken, cleanSchedule, resultsPath, retrain=False)

Now perform for Glove

In [44]:
gloveModels = [KeyedVectors.load_word2vec_format('embeddings/glove/glove200', binary=False)]

In [None]:
resultsPath = 'results/neural embeddings/nn/all/cnn/glove'
cleanSchedule = [m2EClean]
paths = ['']
libraries=['']
methods = [getCNNModel]*len(gloveModels)

resultCNNWordSchiz = getSchizResults(libraries, gloveModels, paths, textSchiz1, labelsSchiz1, 
                textSchiz2, labelsSchiz2, methods, wordToken, cleanSchedule, resultsPath, models=gloveModels, retrain=False)


In [None]:
resultsPath = 'results/neural embeddings/nn/all/cnn/glove'
cleanSchedule = [m2EClean]

paths = ['']
libraries=['']
methods = [getLSTM2]*len(gloveModels)

resultCNNWordSchiz = getSchizResults(libraries, gloveModels, paths, textSchiz1, labelsSchiz1, 
                textSchiz2, labelsSchiz2, methods, wordToken, cleanSchedule, resultsPath, models=gloveModels, retrain=False)

Now perform classification on character embedding models

In [57]:
emCharModels = ['w2vCCBSchiz', 'w2vCCBEMSchiz', 'w2vCSGSchiz', 'w2vCSGEmSchiz', 'ftCCBSchiz', 'ftCCBEMSchiz', 'ftCSGSchiz', 'ftCSGEmSchiz']
path1 = ['embeddings/Word2Vec/schiz/00 emoticons/all/']
path2 = ['embeddings/FastText/schiz/00 emoticons/all/']
paths = (path1*4) + (path2*4)
libraries = [Word2Vec, Word2Vec, Word2Vec, Word2Vec, FastText, FastText, FastText, FastText]
cleanSchedule = [m1EClean, m2EClean, m1EClean, m2EClean]*2
paramGrid = dict(dropout=dropout, reDropout=dropout, neurons=neurons, optimizer=optimizer)

tokenTypes = utility.getTokenTypes(charToken, emCharModels)

In [None]:
#LSTM Model
resultsPath = 'results/neural embeddings/nn/all/lstm/char'
methods = [getLSTMModel]*len(emCharModels)
resultLSTMCharSchiz = getSchizResults(libraries, emCharModels, paths, textSchiz1, labelsSchiz1, 
                                 textSchiz2, labelsSchiz2, methods, charToken, cleanSchedule, resultsPath, retrain=False)

In [None]:
#CNN Model
resultsPath = 'results/neural embeddings/nn/all/cnn/char'
methods = [getCNNModel]*len(emCharModels)
resultCNNWordSchiz = getSchizResults(libraries, emCharModels, paths, textSchiz1, labelsSchiz1, 
                                 textSchiz2, labelsSchiz2, methods, charToken, cleanSchedule, resultsPath, retrain=False)

### Emoticon Models - emoticon only models - Not used in final Paper

In [None]:
#Looking at just emoticon tweets
emWordModels = ['w2vemWCBSchiz', 'w2vemWCBEmSchiz', 'ftemWCBSchiz', 'ftemWCBEmSchiz']
emCharModels = ['w2vemCCBSchiz', 'w2vemCCBEMSchiz', 'ftemCCBSchiz', 'ftemCCBEMSchiz']
path1 = ['embeddings/Word2Vec/schiz/00 emoticons/emoticonOnly/']
path2 = ['embeddings/FastText/schiz/00 emoticons/emoticonOnly/']
emWordModels = getFilePath(path1, path2, emWordModels)
emCharModels = getFilePath(path1, path2, emCharModels)
libraries = [Word2Vec, Word2Vec, Word2Vec, Word2Vec, FastText, FastText, FastText, FastText]
cleanSchedule = [m1EClean, m2EClean] * 2
charParametersLst = getParameters(charParameters, emWordModels)
wordParametersLst = getParameters(wordParameters, emWordModels)

In [None]:
# Consider word tokens
tokenTypes = utility.getTokenTypes(wordToken, emWordModels)
resultWordSchiz = getNeuralClassifiction(libraries, emWordModels, paths, textSchiz1, labelsSchiz1, textSchiz2, labelsSchiz2, methods, tokenTypes, cleanSchedule, retrain=False )


In [None]:
#Consider char tokens
tokenTypes = utility.getTokenTypes(charToken, emWordModels)
resultCharSchiz = getNeuralClassifiction(libraries, emWordModels, paths, textSchiz1, labelsSchiz1, textSchiz2, labelsSchiz2, methods, tokenTypes, cleanSchedule, retrain=False )

### Lowercase Models - All    
    

Here we focus on models with capitalized words. Using M2lClean we convert them to lowercase. This follows the same pipeline as above

In [61]:
#Looking at All tweets. ##### Need to get embedding models for Skip gram ####
lWordModels = ['w2vWCBlSchiz', 'w2vWCBSchiz', 'w2vWSGlSchiz', 'w2vWSGSchiz']
lCharModels = []
path1 = ['embeddings/Word2Vec/schiz/01 lowercase/all/']
paths = path1*4
cleanSchedule = [m2LClean, m1LClean, m2LClean, m1LClean]
libraries = [Word2Vec, Word2Vec, Word2Vec, Word2Vec]

In [None]:
resultsPath = 'results/neural embeddings/nn/lower/lstm/word'
tokenTypes = utility.getTokenTypes(wordToken, lWordModels)
methods = [getLSTMModel]*len(lWordModels)
resultLSTMCharSchiz = getSchizResults(libraries, lWordModels, paths, textSchiz1, labelsSchiz1, 
                                 textSchiz2, labelsSchiz2, methods, wordToken, cleanSchedule, resultsPath, retrain=False)

In [None]:
resultsPath = 'results/neural embeddings/nn/lower/cnn/word'
methods = [getCNNModel]*len(lWordModels)
resultCNNWordSchiz = getSchizResults(libraries, lWordModels, paths, textSchiz1, labelsSchiz1, 
                                 textSchiz2, labelsSchiz2, methods, wordToken, cleanSchedule, resultsPath, retrain=False)

### Lowercase Models - lowercase tweets only - Not used in the final Paper

In [None]:
#Looking at just lowercase tweets
lOnlyWordModels = ['w2vlWCBlSchiz', 'w2vlWCBSchiz', 'w2vlWSGlSchiz', 'w2vlWSGSchiz', 'ftlWCBlSchiz', 'ftlWCBSchiz', 'ftlWSGlSchiz', 'ftlWSGSchiz']
lOnlyCharModes = ['']

path1 = ['embeddings/Word2Vec/schiz/01 lowercase/lowerOnly/']
path2 = ['embeddings/FastText/schiz/01 lowercase/lowerOnly/']

lOnlyWordModels = getFilePath(path1, path2, lWordModels)
lOnlyCharModels  getFilePath(path1, path2, lCharModels)

cleanSchedule = [m2LClean, m1LClean, m2LClean, m1LClean]*2

charParametersLst = getParameters(charParameters, emWordModels)
wordParametersLst = getParameters(wordParameters, emWordModels)

In [None]:
# Consider word tokens
tokenTypes = utility.getTokenTypes(wordToken, emWordModels)
resultWordSchiz = getNeuralClassifiction(libraries, emWordModels, paths, textSchiz1, labelsSchiz1, textSchiz2, labelsSchiz2, methods, tokenTypes, cleanSchedule, retrain=False)


In [None]:
#Consider char tokens
tokenTypes = utility.getTokenTypes(charToken, emWordModels)
resultCharSchiz = getNeuralClassifiction(libraries, emCharModels, paths, textSchiz1, labelsSchiz1, textSchiz2, labelsSchiz2, methods, tokenTypes, cleanSchedule, retrain=False )


## Combined Datasets Not in this paper

In [None]:
emCombWordModels = ['w2vWCBAll', 'w2vWCBEmAll', 'w2vWSGAll', 'w2vWSGEmAll', 'ftWCBAll', 'ftWCBEmAll', 'ftWSGAll', 'ftWSGEmAll']
emCombCharModels = ['w2vCCBAll', 'w2vCCBEMAll', 'w2vCSGAll', 'w2vCSGEmAll', 'ftCCBAll', 'ftCCBEMAll', 'ftCSGAll', 'ftCSGEmAll']

path1 = ['embeddings/Word2Vec/combined/']
path2 = ['embeddings/FastText/combined/']

emCombWordModels = getFilePath(path1, path2, emCombWordModels)
emCombCharModels = getFilePath(path1, path2, emCombCharModels)

cleanSchedule = [m1EClean, m2EClean, m1EClean, m2EClean] * 2

charParametersLst = getParameters(charParameters, emCombCharModels)
wordParametersLst = getParameters(wordParameters, emCombWordModels)

In [None]:
# Consider word tokens
tokenTypes = utility.getTokenTypes(wordToken, emWordModels)
resultWordSchiz = getNeuralClassifiction(libraries, emWordModels, paths, textSchiz1, labelsSchiz1, textSchiz2, labelsSchiz2, methods, tokenTypes, cleanSchedule, retrain=False)



In [None]:
#Consider char tokens
tokenTypes = utility.getTokenTypes(charToken, emWordModels)
resultCharSchiz = getNeuralClassifiction(libraries, emCharModels, paths, textSchiz1, labelsSchiz1, textSchiz2, labelsSchiz2, methods, tokenTypes, cleanSchedule, retrain=False )

