# Exp6: GloveVectorizer #

Trying out GloveVectorizer as part of Pipeline

NB variations of C and gamma hyperparameters give higher or lower score depending on Glove vector dimensions.

In [1]:
import pandas as pd

from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.cross_validation import cross_val_predict, StratifiedKFold
from sklearn.metrics import fbeta_score

from glove_transformer import GloveVectorizer

data = pd.read_csv(open('semeval2016-task6-trainingdata.txt'), '\t',
                   index_col=0)
data = data[data.Target == 'Climate Change is a Real Concern']
true_stances = data.Stance

cv = StratifiedKFold(true_stances, n_folds=5, shuffle=True, random_state=1)


for dim in 25, 50, 100, 200:
    print 80 * '='
    print 'DIMENSIONS:', dim

    glove_fname = 'semeval2016-task6-trainingdata_climate_glove.twitter.27B.{}d.pkl'
    glove_vecs = pd.read_pickle(glove_fname.format(dim))

    pipeline = Pipeline([('vect', GloveVectorizer(glove_vecs)),
                         ('clf', SVC(C=1, gamma=0.01))])

    pred_stances = cross_val_predict(pipeline, data.Tweet, true_stances, cv=cv)
    print classification_report(true_stances, pred_stances, digits=4)

    macro_f = fbeta_score(true_stances, pred_stances, 1.0,
                          labels=['AGAINST', 'FAVOR'], average='macro')
    print 'macro-average of F-score(FAVOR) and F-score(AGAINST): {:.4f}\n'.format(macro_f)

DIMENSIONS: 25
             precision    recall  f1-score   support

    AGAINST     1.0000    0.0667    0.1250        15
      FAVOR     0.6270    0.7453    0.6810       212
       NONE     0.5915    0.5000    0.5419       168

avg / total     0.6261    0.6152    0.6008       395

macro-average of F-score(FAVOR) and F-score(AGAINST): 0.4030

DIMENSIONS: 50
             precision    recall  f1-score   support

    AGAINST     1.0000    0.0667    0.1250        15
      FAVOR     0.6493    0.8208    0.7250       212
       NONE     0.6746    0.5060    0.5782       168

avg / total     0.6734    0.6582    0.6398       395

macro-average of F-score(FAVOR) and F-score(AGAINST): 0.4250

DIMENSIONS: 100
             precision    recall  f1-score   support

    AGAINST     1.0000    0.2000    0.3333        15
      FAVOR     0.6573    0.8868    0.7550       212
       NONE     0.7642    0.4821    0.5912       168

avg / total     0.7158    0.6886    0.6693       395

macro-average of F-score(F