In [1]:
from bert_serving.client import BertClient 
bc = BertClient(check_length=False)

In [478]:
import pandas as pd
import numpy as np
from setup import get_train, get_test, get_valid
train = get_train(6)
test = get_test(6)

In [254]:
train.to_csv('data/liar/train_whead.csv')
test.to_csv('data/liartest_whead.csv')

In [142]:
X_str_train = train['statement']
y_train = train['label']
X_str_test = test['statement']
y_test = test['label']

10240

In [129]:
#bert-as-a-service recommends not pre-batching the inputs, since the program does this automatically
#however, despite reading "sent back size 10240" after the job completes, the jupyter cell still hangs
#batching prevents this issue, and lets the BERT encoding run to completion successfully
ys = []
n = len(X_str_train)
for i in range(n):
    y, _ = bc.encode(
        X_str_train.tolist()[i:(i+1)], show_tokens=True)
    ys.append(y[0])

In [130]:
#manually pad and concatenate the sentence by sentence encodings
m = max(y.shape[0] for y in ys)
X_bert_train = np.zeros((n, m, 768))
for i in range(len(ys)):
    y = ys[i]
    X_bert_train[i] = np.pad(y, ((0, m-y.shape[0]), (0,0)), 'constant')

In [131]:
#np.save('data/bert/X_bert_train', X_bert_train)

In [None]:
#X_Bert_train = np.load('data/bert/X_bert_train.npy')

In [132]:
#average the sentence level encodings into a single 768-dimension vector
def bert_reduce_mean(X):
    return X.mean(axis=1) 

In [450]:
X_bert_train_mean = bert_reduce_mean(X_bert_train)
np.save('data/bert/X_bert_train_mean', X_bert_train_mean)

In [None]:
import os
from torch_shallow_neural_classifier import TorchShallowNeuralClassifier
from torch_rnn_classifier import TorchRNNClassifier
from sklearn.metrics import classification_report

In [428]:
mod = TorchShallowNeuralClassifier(
    max_iter=110, hidden_dim=500)
#changing learning rate (eta) and 
#l2 regularization (l2_strength) suboptimum 
#for this model space at parameters other than default

In [429]:
%time _ = mod.fit(X_bert_train_mean, tuple(y_train.tolist()))

Finished epoch 110 of 110; error is 14.775890350341797

CPU times: user 4min 23s, sys: 1min 14s, total: 5min 37s
Wall time: 32.4 s


In [143]:
ys2 = []
n2 = len(X_str_test)
for i in range(n2):
    y, _ = bc.encode(
        X_str_test.tolist()[i:(i+1)], show_tokens=True)
    ys2.append(y[0])
#perform encoding for the test set

In [144]:
m2 = max(y.shape[0] for y in ys)
X_bert_test = np.zeros((n2, m2, 768))
for i in range(len(ys2)):
    y = ys2[i]
    X_bert_test[i] = np.pad(y, ((0, m2-y.shape[0]), (0,0)), 'constant')

In [145]:
#np.save('data/bert/X_bert_test', X_bert_test)
#X_bert_test = np.load('data/bert/X_bert_test.npy')

In [448]:
X_bert_test_mean = bert_reduce_mean(X_bert_test)
np.save('data/bert/X_bert_test_mean', X_bert_test_mean)

In [None]:
bert_train_preds = mod.predict(X_bert_train_mean)
print(classification_report(y_train, bert_train_preds, digits=3))

In [453]:
bert_test_preds = mod.predict(X_bert_test_mean)
print(classification_report(y_test, bert_test_preds, digits=3))

              precision    recall  f1-score   support

       FALSE      0.308     0.265     0.285       249
        TRUE      0.240     0.231     0.235       208
 barely-true      0.260     0.118     0.162       212
   half-true      0.253     0.321     0.283       265
 mostly-true      0.225     0.324     0.265       241
  pants-fire      0.270     0.217     0.241        92

   micro avg      0.254     0.254     0.254      1267
   macro avg      0.259     0.246     0.245      1267
weighted avg      0.259     0.254     0.249      1267



In [462]:
extended_train = pd.read_csv('liwc_train.csv')
#pre-computed LIWC-2015 corpus feature-level counts on each statement. 
#Extends the columns by ~100 features (in addition to meta-data)

In [463]:
#extend the BERT feature vectors with the LIWC features
X_bert_train_liwc = np.ndarray((extended_train.shape[0], 861))
for j in range(extended_train.shape[0]):
    X_bert_train_liwc[j] = np.append(X_bert_train_mean[j], extended_train.loc[:,'WC':'OtherP'].iloc[j].values)
np.save('data/bert/X_bert_train_liwc', X_bert_train_liwc)

In [464]:
extended_test = pd.read_csv('liwc_test.csv')

In [465]:
X_bert_test_liwc = np.ndarray((extended_test.shape[0], 861))
for j in range(extended_test.shape[0]):
    X_bert_test_liwc[j] = np.append(X_bert_test_mean[j], extended_train.loc[:,'WC':'OtherP'].iloc[j].values)
np.save('data/bert/X_bert_test_liwc', X_bert_test_liwc)

In [466]:
#extend only with the three feature functions of most interest from the Pennebaker
X_bert_train_liwc_rest = np.ndarray((extended_train.shape[0], 771))
for j in range(extended_train.shape[0]):
    X_bert_train_liwc_rest[j] = np.append(X_bert_train_mean[j], extended_train[['ppron', 'negemo', 'cogproc']].values[j])
np.save('data/bert/X_bert_train_liwc_rest', X_bert_train_liwc_rest)

In [467]:
X_bert_test_liwc_rest = np.ndarray((extended_test.shape[0], 771))
for j in range(extended_test.shape[0]):
    X_bert_test_liwc_rest[j] = np.append(X_bert_test_mean[j], extended_test[['ppron', 'negemo', 'cogproc']].values[j])
np.save('data/bert/X_bert_test_liwc_rest', X_bert_test_liwc_rest)

In [468]:
mod1 = TorchShallowNeuralClassifier(
    max_iter=100, hidden_dim=500)

In [469]:
%time _ = mod1.fit(X_bert_train_liwc_rest, tuple(y_train.tolist()))

Finished epoch 100 of 100; error is 14.09523606300354

CPU times: user 5min 29s, sys: 1min 2s, total: 6min 31s
Wall time: 37 s


In [470]:
bert_test_preds = mod1.predict(X_bert_test_liwc_rest)
bert_train_preds = mod1.predict(X_bert_train_liwc_rest)

In [472]:
print(classification_report(y_test, bert_test_preds, digits=3))

              precision    recall  f1-score   support

       FALSE      0.317     0.237     0.271       249
        TRUE      0.229     0.260     0.243       208
 barely-true      0.247     0.208     0.226       212
   half-true      0.271     0.309     0.289       265
 mostly-true      0.238     0.224     0.231       241
  pants-fire      0.234     0.348     0.279        92

   micro avg      0.257     0.257     0.257      1267
   macro avg      0.256     0.264     0.257      1267
weighted avg      0.260     0.257     0.256      1267



In [473]:
print(classification_report(y_train, bert_train_preds, digits=3))

              precision    recall  f1-score   support

       FALSE      0.466     0.364     0.409      1995
        TRUE      0.449     0.479     0.463      1676
 barely-true      0.459     0.383     0.418      1654
   half-true      0.433     0.509     0.468      2114
 mostly-true      0.470     0.479     0.475      1962
  pants-fire      0.437     0.533     0.480       839

   micro avg      0.452     0.452     0.452     10240
   macro avg      0.452     0.458     0.452     10240
weighted avg      0.454     0.452     0.450     10240

