## Comparison test

This test compares the output from  `run_classifier.py` in the huggingface port to `bert_sklearn` on a small test subset from sst-2

####  `run_classifier.py` from huggingface port

In [1]:
%%time
%%bash
cd ..
python ./tests/run_classifier.py --task_name sst-2 \
                            --data_dir ./tests/data/sst2 \
                            --do_train  --do_eval \
                            --output_dir ./tests \
                            --bert_model bert-base-uncased \
                            --do_lower_case \
                            --learning_rate 5e-5 \
                            --gradient_accumulation_steps 1 \
                            --max_seq_length 64 \
                            --train_batch_size 16 \
                            --eval_batch_size 8 \
                            --num_train_epochs 2

05/17/2019 00:07:50 - INFO - __main__ -   device: cuda n_gpu: 1, distributed training: False, 16-bits training: False
05/17/2019 00:07:51 - INFO - bert_sklearn.model.pytorch_pretrained.tokenization -   loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /root/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084
05/17/2019 00:07:51 - INFO - bert_sklearn.model.pytorch_pretrained.modeling -   loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz from cache at /root/.pytorch_pretrained_bert/distributed_-1/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba
05/17/2019 00:07:51 - INFO - bert_sklearn.model.pytorch_pretrained.modeling -   extracting archive file /root/.pytorch_pretrained_bert/distributed_-1/9

CPU times: user 12 ms, sys: 8 ms, total: 20 ms
Wall time: 17.3 s


In [2]:
%%bash
cat eval_results.txt

acc = 0.81
eval_loss = 0.42863699048757553
global_step = 26
loss = 0.3171330988407135


###  `bert_sklearn` 

In [3]:
%%time
import os
import sys
import csv

import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn.metrics import classification_report

sys.path.append("../") 
from bert_sklearn import BertClassifier
from bert_sklearn import load_model


def get_sst_test_data(train_file ='./data/sst2/train.tsv',
                dev_file  = './data/sst2/dev.tsv'):
    
    train = pd.read_csv(train_file, sep='\t', encoding='utf8', keep_default_na=False)
    train.columns=['text','label']
    print("SST-2 train data size: %d "%(len(train)))
    
    dev = pd.read_csv(dev_file, sep='\t', encoding='utf8', keep_default_na=False)
    dev.columns=['text','label']
    print("SST-2 dev data size: %d "%(len(dev)))
    label_list = np.unique(train['label'])

    X_train = train['text']
    y_train = train['label']
    X_dev = dev['text']
    y_dev = dev['label']

    return X_train,y_train, X_dev, y_dev


X_train,y_train, X_dev, y_dev =  get_sst_test_data()

# define model
model = BertClassifier()
model.validation_fraction = 0.0
model.learning_rate = 5e-5 
model.gradient_accumulation_steps = 1
model.max_seq_length = 64
model.train_batch_size = 16
model.eval_batch_size=8
model.epochs = 2

# fit
model.fit(X_train,y_train)

# score
accy = model.score(X_dev,y_dev)

SST-2 train data size: 200 
SST-2 dev data size: 100 
Building sklearn text classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 200, validation data size: 0


Training: 100%|██████████| 13/13 [00:03<00:00,  3.90it/s, loss=0.673]
Training: 100%|██████████| 13/13 [00:03<00:00,  3.93it/s, loss=0.317]
                                                        


Loss: 0.4286, Accuracy: 81.00%
CPU times: user 13.2 s, sys: 3.84 s, total: 17 s
Wall time: 18.1 s




In [4]:
!rm bert_sklearn.log
!rm  eval_results.txt