# Model based on GloVe feature extraction
## Global Vectors for Word Representation
### https://nlp.stanford.edu/projects/glove/
GloVe is an unsupervised learning algorithm for obtaining vector representations for words. Training is performed on aggregated global word-word co-occurrence statistics from a corpus, and the resulting representations showcase interesting linear substructures of the word vector space.

In [1]:
import essay
import pickle
import numpy as np
import struct
import os
import pandas as pd

my_path = os.path.abspath(os.path.dirname('glove.6B.300d.txt'))
GLOVE_BIG = os.path.join(my_path, "data\pretrained\glove.6B.300d.txt")
GLOVE_SMALL = os.path.join(my_path, "data\pretrained\glove.6B.50d.txt")
encoding="utf-8"

In [3]:
# load the preprocessed data which we saved
# choose how much data you want to load (2467, 11142 or 89364)

essays = pickle.load(open( "data/essays/essays2467.p", "rb"))
#essays = pickle.load(open( "data/essays/essays11142.p", "rb"))
#essays = pickle.load(open( "data/essays/essays89364.p", "rb"))

print("loaded count of essays:", len(essays))

loaded count of essays: 2467


# Vectorizer für Glove

In [4]:
# reference to http://nadbordrozd.github.io/blog/2016/05/20/text-classification-with-word2vec/
# credit to nadbor 

class MeanEmbeddingVectorizer(object):
    def __init__(self, word2vec):
        self.word2vec = word2vec
        if len(word2vec)>0:
            self.dim=len(word2vec[next(iter(glove_mywords))])
        else:
            self.dim=0
            
    def fit(self, X, y):
        return self 

    def transform(self, X):
        return np.array([
            np.mean([self.word2vec[w] for w in words if w in self.word2vec] 
                    or [np.zeros(self.dim)], axis=0)
            for words in X
        ])

# preparing the vectors - kinda manually... :S

In [5]:
# load all vectors from all words from the GloVe File downladed from stanford
df = pd.read_csv(GLOVE_SMALL, sep=" ", quoting=3, header=None)


In [7]:
#load all words from all essays in a list
corpus = []
for e in essays:
    for w in e.words:
        corpus.append(w)
# and put it in a dataframe from this 
df_corpus = pd.DataFrame(corpus)
df_corpus

Unnamed: 0,0
0,Well
1,right
2,now
3,I
4,just
...,...
1599254,some
1599255,people
1599256,try
1599257,to


In [8]:
# inner join all GloVe Words with all words in the essays 
df_mywords = df.merge(df_corpus)
df_mywords = df_mywords.drop_duplicates()
df_mywords

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,41,42,43,44,45,46,47,48,49,50
0,the,0.418000,0.249680,-0.412420,0.121700,0.345270,-0.044457,-0.49688,-0.178620,-0.000660,...,-0.298710,-0.157490,-0.347580,-0.045637,-0.44251,0.187850,0.002785,-0.184110,-0.115140,-0.78581
38228,of,0.708530,0.570880,-0.471600,0.180480,0.544490,0.726030,0.18157,-0.523930,0.103810,...,-0.347270,0.284830,0.075693,-0.062178,-0.38988,0.229020,-0.216170,-0.225620,-0.093918,-0.80375
61163,to,0.680470,-0.039263,0.301860,-0.177920,0.429620,0.032246,-0.41376,0.132280,-0.298470,...,-0.094375,0.018324,0.210480,-0.030880,-0.19722,0.082279,-0.094340,-0.073297,-0.064699,-0.26044
117424,and,0.268180,0.143460,-0.278770,0.016257,0.113840,0.699230,-0.51332,-0.473680,-0.330750,...,-0.069043,0.368850,0.251680,-0.245170,0.25381,0.136700,-0.311780,-0.632100,-0.250280,-0.38097
154207,in,0.330420,0.249950,-0.608740,0.109230,0.036372,0.151000,-0.55083,-0.074239,-0.092307,...,-0.486090,-0.008027,0.031184,-0.365760,-0.42699,0.421640,-0.116660,-0.507030,-0.027273,-0.53285
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1390869,alreay,0.378620,-1.095900,0.230920,-0.817830,-1.061200,-0.373750,-0.29061,0.336310,0.159500,...,-0.641690,-0.654720,0.309120,1.273300,-0.13224,-0.583250,0.040426,1.090400,-0.379170,-0.31001
1390870,hafta,-0.480910,-0.713330,0.033117,-0.751560,-1.182200,-0.744370,0.38752,0.775980,0.261310,...,0.040820,-0.679650,-0.137070,0.407140,-0.18172,-0.584380,0.259140,0.605880,0.292900,0.70329
1390874,thusfar,0.194110,-0.565630,-0.240660,-0.313590,-0.585120,-0.116020,-0.15794,0.400330,-0.129930,...,-0.369470,-0.912350,-0.315940,0.571400,-0.40314,0.114760,-0.130790,0.789590,-0.555270,-0.34250
1390875,frigging,-0.002708,-0.246300,0.336800,-0.742860,-0.462800,0.113000,0.52011,0.831020,0.548580,...,-0.390020,-0.947280,0.447130,0.899360,-0.21329,-0.608260,0.131330,0.333040,-0.380220,0.33195


In [9]:
#for the vectorizer we need a dict with all of "our" words
df_temp = df_mywords.set_index(0)
glove_mywords = {key: val.values for key, val in df_temp.T.items()}
glove_mywords

{'the': array([ 4.1800e-01,  2.4968e-01, -4.1242e-01,  1.2170e-01,  3.4527e-01,
        -4.4457e-02, -4.9688e-01, -1.7862e-01, -6.6023e-04, -6.5660e-01,
         2.7843e-01, -1.4767e-01, -5.5677e-01,  1.4658e-01, -9.5095e-03,
         1.1658e-02,  1.0204e-01, -1.2792e-01, -8.4430e-01, -1.2181e-01,
        -1.6801e-02, -3.3279e-01, -1.5520e-01, -2.3131e-01, -1.9181e-01,
        -1.8823e+00, -7.6746e-01,  9.9051e-02, -4.2125e-01, -1.9526e-01,
         4.0071e+00, -1.8594e-01, -5.2287e-01, -3.1681e-01,  5.9213e-04,
         7.4449e-03,  1.7778e-01, -1.5897e-01,  1.2041e-02, -5.4223e-02,
        -2.9871e-01, -1.5749e-01, -3.4758e-01, -4.5637e-02, -4.4251e-01,
         1.8785e-01,  2.7849e-03, -1.8411e-01, -1.1514e-01, -7.8581e-01]),
 'of': array([ 0.70853  ,  0.57088  , -0.4716   ,  0.18048  ,  0.54449  ,
         0.72603  ,  0.18157  , -0.52393  ,  0.10381  , -0.17566  ,
         0.078852 , -0.36216  , -0.11829  , -0.83336  ,  0.11917  ,
        -0.16605  ,  0.061555 , -0.012719 , -0.5662

In [10]:
# for every essay we save the GloVe Vectors in essay.glove as a dictionary
# 5min on 2400 essays and 300D

for e in essays:
    df_temp_e = pd.DataFrame(e.words)
    try:
        
        df_temp_e = df_temp_e.merge(df_mywords)
        df_temp_e = df_temp_e.drop_duplicates()
        df_temp_e = df_temp_e.set_index(0)    
        e.glove = {key: val.values for key, val in df_temp_e.T.items()}
    except:
        print("error")


In [11]:
# save this essay data by converting into OBJECT essay and save with pickle and removing non emotional scentences
filename = "data/essays/essays_glove" + "50" + "d_" + str(len(essays)) + ".p" 
pickle.dump(essays, open( filename, "wb"))
print("saved", len(essays), "entries: in", filename)


saved 2467 entries: in essays/essays_glove50d_2467.p


# Split data in train & test

In [67]:
from sklearn.model_selection import train_test_split
training, test = train_test_split(essays, test_size=0.20, random_state=42)

In [68]:
train_x = [x.glove for x in training]

train_y_cEXT = [x.cEXT for x in training]
train_y_cNEU = [x.cNEU for x in training]
train_y_cAGR = [x.cAGR for x in training]
train_y_cCON = [x.cCON for x in training]
train_y_cOPN = [x.cOPN for x in training]


test_x = [x.glove for x in test]

test_y_cEXT = [x.cEXT for x in test]
test_y_cNEU = [x.cNEU for x in test]
test_y_cAGR = [x.cAGR for x in test]
test_y_cCON = [x.cCON for x in test]
test_y_cOPN = [x.cOPN for x in test]

train_x = np.array(train_x)
train_y_cEXT = np.array(train_y_cEXT)
train_y_cNEU = np.array(train_y_cNEU)
train_y_cAGR = np.array(train_y_cAGR)
train_y_cCON = np.array(train_y_cCON)
train_y_cOPN = np.array(train_y_cOPN)



# Create Vectorizer for GloVe

In [69]:
# the vectorizer calculates the MEAN of the vectors of all words 
# (that's what they recommend on stanford for a simple approach) 
glove_vectorizer = MeanEmbeddingVectorizer(glove_mywords)

# create mean from our vectors

train_x_vectors = glove_vectorizer.transform(train_x)

test_x_vectors = glove_vectorizer.transform(test_x)


In [71]:
len(train_x_vectors)

70949

In [72]:
# for evaluation save some data for later:
evaluation = []
data = len(essays)
vec_name = "GloVe"

# SVM

In [73]:
from sklearn import svm
name = "svm"

print("training Extraversion cEXT using SVM...")
clf_svm_cEXT = svm.SVC(kernel='linear')
clf_svm_cEXT.fit(train_x_vectors, train_y_cEXT)
evaluation.append([data, vec_name, name, "cEXT", clf_svm_cEXT.score(test_x_vectors, test_y_cEXT)])
print("cEXT score: ", clf_svm_cEXT.score(test_x_vectors, test_y_cEXT))

try:
    print("training Neuroticism cNEU using SVM...")
    clf_svm_cNEU = svm.SVC(kernel='linear')
    clf_svm_cNEU.fit(train_x_vectors, train_y_cNEU)
    evaluation.append([data, vec_name, name, "cNEU", clf_svm_cNEU.score(test_x_vectors, test_y_cNEU)])
    print("cNEU score: ", clf_svm_cNEU.score(test_x_vectors, test_y_cNEU))
except:
    print("with this data not available (MBTI only 4 dimensions)")
    
print("training Agreeableness cAGR using using SVM...")
clf_svm_cAGR = svm.SVC(kernel='linear')
clf_svm_cAGR.fit(train_x_vectors, train_y_cAGR)
evaluation.append([data, vec_name, name, "cAGR", clf_svm_cAGR.score(test_x_vectors, test_y_cAGR)])

print("cAGR score: ", clf_svm_cAGR.score(test_x_vectors, test_y_cAGR))

print("training Conscientiousness cCON using SVM...")
clf_svm_cCON = svm.SVC(kernel='linear')
clf_svm_cCON.fit(train_x_vectors, train_y_cCON)
evaluation.append([data, vec_name, name, "cCON", clf_svm_cCON.score(test_x_vectors, test_y_cCON)])
print("cCON score: ", clf_svm_cCON.score(test_x_vectors, test_y_cCON))

print("training Openness to Experience cOPN using SVM...")
clf_svm_cOPN = svm.SVC(kernel='linear')
clf_svm_cOPN.fit(train_x_vectors, train_y_cOPN)
evaluation.append([data, vec_name, name, "cOPN", clf_svm_cOPN.score(test_x_vectors, test_y_cOPN)])
print("cOPN score: ", clf_svm_cOPN.score(test_x_vectors, test_y_cOPN))

training Extraversion cEXT using SVM...
cEXT score:  0.7351448866839554
training Neuroticism cNEU using SVM...
with this data not available (MBTI only 4 dimensions)
training Agreeableness cAGR using using SVM...
cAGR score:  0.6804036531739768
training Conscientiousness cCON using SVM...
cCON score:  0.5891870560378848
training Openness to Experience cOPN using SVM...
cOPN score:  0.7800202954109821


# Decision Tree

In [74]:
from sklearn import tree
name = "tree"

print("training Extraversion cEXT using dec...")
clf_dec_cEXT = tree.DecisionTreeClassifier()
clf_dec_cEXT.fit(train_x_vectors, train_y_cEXT)
evaluation.append([data, vec_name, name, "cEXT", clf_dec_cEXT.score(test_x_vectors, test_y_cEXT)])

print("cEXT score: ", clf_dec_cEXT.score(test_x_vectors, test_y_cEXT))

try:
    print("training Neuroticism cNEU using dec...")
    clf_dec_cNEU = tree.DecisionTreeClassifier()
    clf_dec_cNEU.fit(train_x_vectors, train_y_cNEU)
    evaluation.append([data, vec_name, name, "cNEU", clf_dec_cNEU.score(test_x_vectors, test_y_cNEU)])
    print("cNEU score: ", clf_dec_cNEU.score(test_x_vectors, test_y_cNEU))
except:
    print("with this data not available (MBTI only 4 dimensions)")

print("training Agreeableness cAGR using using dec...")
clf_dec_cAGR = tree.DecisionTreeClassifier()
clf_dec_cAGR.fit(train_x_vectors, train_y_cAGR)
evaluation.append([data, vec_name, name, "cAGR", clf_dec_cAGR.score(test_x_vectors, test_y_cAGR)])
print("cAGR score: ", clf_dec_cAGR.score(test_x_vectors, test_y_cAGR))

print("training Conscientiousness cCON using dec...")
clf_dec_cCON = tree.DecisionTreeClassifier()
clf_dec_cCON.fit(train_x_vectors, train_y_cCON)
evaluation.append([data, vec_name, name, "cCON", clf_dec_cCON.score(test_x_vectors, test_y_cCON)])
print("cCON score: ", clf_dec_cCON.score(test_x_vectors, test_y_cCON))

print("training Openness to Experience cOPN using dec...")
clf_dec_cOPN = tree.DecisionTreeClassifier()
clf_dec_cOPN.fit(train_x_vectors, train_y_cOPN)
evaluation.append([data, vec_name, name, "cOPN", clf_dec_cOPN.score(test_x_vectors, test_y_cOPN)])
print("cOPN score: ", clf_dec_cOPN.score(test_x_vectors, test_y_cOPN))



training Extraversion cEXT using dec...
cEXT score:  0.6766264516856466
training Neuroticism cNEU using dec...
with this data not available (MBTI only 4 dimensions)
training Agreeableness cAGR using using dec...
cAGR score:  0.6634908106889165
training Conscientiousness cCON using dec...
cCON score:  0.5668057278159883
training Openness to Experience cOPN using dec...
cOPN score:  0.6920171383470515


# Naive Bayes

In [75]:
from sklearn.naive_bayes import GaussianNB
name = "gNB"
# clf_gnb = GaussianNB()
# clf_gnb.fit(train_x_vectors, train_y)


print("training Extraversion cEXT using GaussianNaiveBayes...")
clf_gnb_cEXT = GaussianNB()
clf_gnb_cEXT.fit(train_x_vectors, train_y_cEXT)
evaluation.append([data, vec_name, name, "cEXT", clf_gnb_cEXT.score(test_x_vectors, test_y_cEXT)])
print("cEXT score: ", clf_gnb_cEXT.score(test_x_vectors, test_y_cEXT))

try:
    print("training Neuroticism cNEU using GaussianNaiveBayes...")
    clf_gnb_cNEU = GaussianNB()
    clf_gnb_cNEU.fit(train_x_vectors, train_y_cNEU)
    evaluation.append([data, vec_name, name, "cNEU", clf_gnb_cNEU.score(test_x_vectors, test_y_cNEU)])
    print("cNEU score: ", clf_gnb_cNEU.score(test_x_vectors, test_y_cNEU))
except:
    print("with this data not available (MBTI only 4 dimensions)")

    
print("training Agreeableness cAGR using using GaussianNaiveBayes...")
clf_gnb_cAGR = GaussianNB()
clf_gnb_cAGR.fit(train_x_vectors, train_y_cAGR)
evaluation.append([data, vec_name, name, "cAGR", clf_gnb_cAGR.score(test_x_vectors, test_y_cAGR)])
print("cAGR score: ", clf_gnb_cAGR.score(test_x_vectors, test_y_cAGR))

print("training Conscientiousness cCON using GaussianNaiveBayes...")
clf_gnb_cCON = GaussianNB()
clf_gnb_cCON.fit(train_x_vectors, train_y_cCON)
evaluation.append([data, vec_name, name, "cCON", clf_gnb_cCON.score(test_x_vectors, test_y_cCON)])
print("cCON score: ", clf_gnb_cCON.score(test_x_vectors, test_y_cCON))

print("training Openness to Experience cOPN using GaussianNaiveBayes...")
clf_gnb_cOPN = GaussianNB()
clf_gnb_cOPN.fit(train_x_vectors, train_y_cOPN)
evaluation.append([data, vec_name, name, "cOPN", clf_gnb_cOPN.score(test_x_vectors, test_y_cOPN)])
print("cOPN score: ", clf_gnb_cOPN.score(test_x_vectors, test_y_cOPN))



training Extraversion cEXT using GaussianNaiveBayes...
cEXT score:  0.6850828729281768
training Neuroticism cNEU using GaussianNaiveBayes...
with this data not available (MBTI only 4 dimensions)
training Agreeableness cAGR using using GaussianNaiveBayes...
cAGR score:  0.5037208253467133
training Conscientiousness cCON using GaussianNaiveBayes...
cCON score:  0.5453264178599616
training Openness to Experience cOPN using GaussianNaiveBayes...
cOPN score:  0.7549892885330928


# Logisic Regression

In [76]:
from sklearn.linear_model import LogisticRegression
name="logR"
print("training Extraversion cEXT using Logistic Regression...")
clf_log_cEXT = LogisticRegression(solver="newton-cg")
clf_log_cEXT.fit(train_x_vectors, train_y_cEXT)
evaluation.append([data, vec_name, name, "cEXT", clf_log_cEXT.score(test_x_vectors, test_y_cEXT)])
print("cEXT score: ", clf_log_cEXT.score(test_x_vectors, test_y_cEXT))

try:
    print("training Neuroticism cNEU using Logistic Regression...")
    clf_log_cNEU = LogisticRegression(solver="newton-cg")
    clf_log_cNEU.fit(train_x_vectors, train_y_cNEU)
    evaluation.append([data, vec_name, name, "cNEU", clf_log_cNEU.score(test_x_vectors, test_y_cNEU)])
    print("cNEU score: ", clf_log_cNEU.score(test_x_vectors, test_y_cNEU))
except:
    print("with this data not available (MBTI only 4 dimensions)")
    
print("training Agreeableness cAGR using using Logistic Regression...")
clf_log_cAGR = LogisticRegression(solver="newton-cg")
clf_log_cAGR.fit(train_x_vectors, train_y_cAGR)
evaluation.append([data, vec_name, name, "cAGR", clf_log_cAGR.score(test_x_vectors, test_y_cAGR)])
print("cAGR score: ", clf_log_cAGR.score(test_x_vectors, test_y_cAGR))

print("training Conscientiousness cCON using Logistic Regression...")
clf_log_cCON = LogisticRegression(solver="newton-cg")
clf_log_cCON.fit(train_x_vectors, train_y_cCON)
evaluation.append([data, vec_name, name, "cCON", clf_log_cCON.score(test_x_vectors, test_y_cCON)])
print("cCON score: ", clf_log_cCON.score(test_x_vectors, test_y_cCON))

print("training Openness to Experience cOPN using Logistic Regression...")
clf_log_cOPN = LogisticRegression(solver="newton-cg")
clf_log_cOPN.fit(train_x_vectors, train_y_cOPN)
evaluation.append([data, vec_name, name, "cOPN", clf_log_cOPN.score(test_x_vectors, test_y_cOPN)])
print("cOPN score: ", clf_log_cOPN.score(test_x_vectors, test_y_cOPN))

training Extraversion cEXT using Logistic Regression...
cEXT score:  0.7434885556432518
training Neuroticism cNEU using Logistic Regression...
with this data not available (MBTI only 4 dimensions)
training Agreeableness cAGR using using Logistic Regression...
cAGR score:  0.7131018153117601
training Conscientiousness cCON using Logistic Regression...
cCON score:  0.5859172398241065
training Openness to Experience cOPN using Logistic Regression...
cOPN score:  0.7777088736046905


# Random Forest

In [77]:
from sklearn.ensemble import RandomForestClassifier
name="RF"


print("training Extraversion cEXT using Random Forest...")
clf_rf_cEXT = RandomForestClassifier(n_estimators=100)
clf_rf_cEXT.fit(train_x_vectors, train_y_cEXT)
evaluation.append([data, vec_name, name, "cEXT", clf_rf_cEXT.score(test_x_vectors, test_y_cEXT)])
print("cEXT score: ", clf_rf_cEXT.score(test_x_vectors, test_y_cEXT))

try:
    print("training Neuroticism cNEU using Random Forest...")
    clf_rf_cNEU = RandomForestClassifier(n_estimators=100)
    clf_rf_cNEU.fit(train_x_vectors, train_y_cNEU)
    evaluation.append([data, vec_name, name, "cNEU", clf_rf_cNEU.score(test_x_vectors, test_y_cNEU)])
    print("cNEU score: ", clf_rf_cNEU.score(test_x_vectors, test_y_cNEU))
except:
    print("with this data not available (MBTI only 4 dimensions)")

print("training Agreeableness cAGR using using Random Forest...")
clf_rf_cAGR = RandomForestClassifier(n_estimators=100)
clf_rf_cAGR.fit(train_x_vectors, train_y_cAGR)
evaluation.append([data, vec_name, name, "cAGR", clf_rf_cAGR.score(test_x_vectors, test_y_cAGR)])
print("cAGR score: ", clf_rf_cAGR.score(test_x_vectors, test_y_cAGR))

print("training Conscientiousness cCON using Random Forest...")
clf_rf_cCON = RandomForestClassifier(n_estimators=100)
clf_rf_cCON.fit(train_x_vectors, train_y_cCON)
evaluation.append([data, vec_name, name, "cCON", clf_rf_cCON.score(test_x_vectors, test_y_cCON)])
print("cCON score: ", clf_rf_cCON.score(test_x_vectors, test_y_cCON))

print("training Openness to Experience cOPN using Random Forest...")
clf_rf_cOPN = RandomForestClassifier(n_estimators=100)
clf_rf_cOPN.fit(train_x_vectors, train_y_cOPN)
evaluation.append([data, vec_name, name, "cOPN", clf_rf_cOPN.score(test_x_vectors, test_y_cOPN)])
print("cOPN score: ", clf_rf_cOPN.score(test_x_vectors, test_y_cOPN))


training Extraversion cEXT using Random Forest...
cEXT score:  0.7718457548765363
training Neuroticism cNEU using Random Forest...
with this data not available (MBTI only 4 dimensions)
training Agreeableness cAGR using using Random Forest...
cAGR score:  0.7323260796031119
training Conscientiousness cCON using Random Forest...
cCON score:  0.6218288420340512
training Openness to Experience cOPN using Random Forest...
cOPN score:  0.7859397902807532


In [79]:
filename = "data/evaluation/evaluation" + str(data) + vec_name + ".p"
pickle.dump(evaluation, open(filename, "wb"))
print("evaluation saved as", filename)

evaluation saved as eval/evaluation88687GloVe.p


In [78]:
print(evaluation)

[[88687, 'GloVe', 'svm', 'cEXT', 0.7351448866839554], [88687, 'GloVe', 'svm', 'cAGR', 0.6804036531739768], [88687, 'GloVe', 'svm', 'cCON', 0.5891870560378848], [88687, 'GloVe', 'svm', 'cOPN', 0.7800202954109821], [88687, 'GloVe', 'tree', 'cEXT', 0.6766264516856466], [88687, 'GloVe', 'tree', 'cAGR', 0.6634908106889165], [88687, 'GloVe', 'tree', 'cCON', 0.5668057278159883], [88687, 'GloVe', 'tree', 'cOPN', 0.6920171383470515], [88687, 'GloVe', 'gNB', 'cEXT', 0.6850828729281768], [88687, 'GloVe', 'gNB', 'cAGR', 0.5037208253467133], [88687, 'GloVe', 'gNB', 'cCON', 0.5453264178599616], [88687, 'GloVe', 'gNB', 'cOPN', 0.7549892885330928], [88687, 'GloVe', 'logR', 'cEXT', 0.7434885556432518], [88687, 'GloVe', 'logR', 'cAGR', 0.7131018153117601], [88687, 'GloVe', 'logR', 'cCON', 0.5859172398241065], [88687, 'GloVe', 'logR', 'cOPN', 0.7777088736046905], [88687, 'GloVe', 'RF', 'cEXT', 0.7718457548765363], [88687, 'GloVe', 'RF', 'cAGR', 0.7323260796031119], [88687, 'GloVe', 'RF', 'cCON', 0.621828