In [1]:
import numpy as np
import pandas as pd
import os
import math
import random
import csv
import sys
from sklearn import metrics
from sklearn.metrics import classification_report

from bert_sklearn import BertClassifier
from bert_sklearn import BertRegressor
from bert_sklearn import load_model

DATADIR = os.getcwd() + '/glue_examples/glue_data'

def read_tsv(filename,quotechar=None):
    with open(filename, "r", encoding='utf-8') as f:
        return list(csv.reader(f,delimiter="\t",quotechar=quotechar))   

Better speed can be achieved with apex installed from https://www.github.com/nvidia/apex.



We will use the SST-2 (Stanford Sentiment Treebank) data set.

The input features are short sentences and the labels are the standard sentiment polarity of:
*    0 for negative 
*    1 for positive.

In [2]:
%%bash
python3 ./glue_examples/download_glue_data.py --data_dir ./glue_examples//glue_data --tasks SST 

Downloading and extracting SST...
	Completed!


In [2]:
"""
SST-2 train data size: 67349 
SST-2 dev data size: 872 
"""
def get_sst_data(train_file = DATADIR + '/SST-2/train.tsv',
                dev_file  = DATADIR + '/SST-2/dev.tsv'):
    
    train = pd.read_csv(train_file, sep='\t',  encoding = 'utf8',keep_default_na=False)
    train.columns=['text','label']
    print("SST-2 train data size: %d "%(len(train)))
    
    dev = pd.read_csv(dev_file, sep='\t',  encoding = 'utf8',keep_default_na=False)
    dev.columns=['text','label']
    print("SST-2 dev data size: %d "%(len(dev)))
    label_list = np.unique(train['label'])
    
    return train,dev,label_list

train,dev,label_list = get_sst_data()

# subsample data for demo
train = train.sample(1000,random_state=42)

X_train = train['text']
y_train = train['label']

X_dev = dev['text']
y_dev = dev['label']

train.head()

SST-2 train data size: 67349 
SST-2 dev data size: 872 


Unnamed: 0,text,label
66730,with outtakes in which most of the characters ...,0
29890,enigma is well-made,1
45801,is ) so stoked to make an important film about...,0
29352,the closest thing to the experience of space t...,1
19858,lose their luster,0


Suppose we want to tune over some the hyperparameters mentioned in the paper:
* **`epochs`** in  [3, 4]
* **`learning rate`** in  [2e-5, 3e-5, 5e-5]

## Option 1: Use **`tune_params`** to  fit on train and tune  on holdout set 

Here the variation can come from different random states used in the fitting

The default in **`tune_params`** has 4 random  seeds

In [4]:
%%time

# define model
model = BertClassifier()

# tune model
tuned = model.tune_params(X_train, y_train,
                          X_dev, y_dev,
                          epochs = [3,4],                          
                          learning_rate = [2e-5, 3e-5, 5e-5])


Building sklearn classifier...
epochs: 3
learning_rate: 2e-05
max_seq_length: 128
num_mlp_layers: 0
train_batch_size: 32
Using random seed : 42
Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 1000, validation data size: 0


Training: 100%|██████████| 32/32 [00:25<00:00,  1.54it/s, loss=0.583]
Training: 100%|██████████| 32/32 [00:23<00:00,  1.53it/s, loss=0.282]
Training: 100%|██████████| 32/32 [00:23<00:00,  1.53it/s, loss=0.15] 
                                                          


Test loss: 0.3114, Test accuracy = 87.73%
score: 87.73

Using random seed : 134
Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 1000, validation data size: 0


Training: 100%|██████████| 32/32 [00:23<00:00,  1.53it/s, loss=0.656]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.46it/s, loss=0.34] 
Training: 100%|██████████| 32/32 [00:24<00:00,  1.47it/s, loss=0.163]
                                                          


Test loss: 0.3054, Test accuracy = 87.96%
score: 87.96

Using random seed : 6
Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 1000, validation data size: 0


Training: 100%|██████████| 32/32 [00:23<00:00,  1.46it/s, loss=0.598]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.245]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.123]
                                                          


Test loss: 0.2898, Test accuracy = 88.76%
score: 88.76

Using random seed : 8
Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 1000, validation data size: 0


Training: 100%|██████████| 32/32 [00:23<00:00,  1.45it/s, loss=0.537]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.221]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.096] 
                                                          


Test loss: 0.3211, Test accuracy = 87.50%
score: 87.50

epochs: 3
learning_rate: 3e-05
max_seq_length: 128
num_mlp_layers: 0
train_batch_size: 32
Using random seed : 42
Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 1000, validation data size: 0


Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.572]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.196]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.0744]
                                                          


Test loss: 0.3444, Test accuracy = 86.70%
score: 86.70

Using random seed : 134
Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 1000, validation data size: 0


Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.608]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.232]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.092] 
                                                          


Test loss: 0.2870, Test accuracy = 88.88%
score: 88.88

Using random seed : 6
Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 1000, validation data size: 0


Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.724]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.438]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.195]
                                                          


Test loss: 0.3039, Test accuracy = 87.39%
score: 87.39

Using random seed : 8
Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 1000, validation data size: 0


Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.525]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.178]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.0722]
                                                          


Test loss: 0.3384, Test accuracy = 88.19%
score: 88.19

epochs: 3
learning_rate: 5e-05
max_seq_length: 128
num_mlp_layers: 0
train_batch_size: 32
Using random seed : 42
Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 1000, validation data size: 0


Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.623]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.218]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.0639]
                                                          


Test loss: 0.3546, Test accuracy = 86.58%
score: 86.58

Using random seed : 134
Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 1000, validation data size: 0


Training: 100%|██████████| 32/32 [00:24<00:00,  1.46it/s, loss=0.623]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.256]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.44it/s, loss=0.083] 
                                                          


Test loss: 0.3124, Test accuracy = 88.30%
score: 88.30

Using random seed : 6
Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 1000, validation data size: 0


Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.635]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.317]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.144]
                                                          


Test loss: 0.3467, Test accuracy = 86.01%
score: 86.01

Using random seed : 8
Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 1000, validation data size: 0


Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.577]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.249]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.0711]
                                                          


Test loss: 0.3832, Test accuracy = 86.70%
score: 86.70

epochs: 4
learning_rate: 2e-05
max_seq_length: 128
num_mlp_layers: 0
train_batch_size: 32
Using random seed : 42
Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 1000, validation data size: 0


Training: 100%|██████████| 32/32 [00:24<00:00,  1.46it/s, loss=0.576]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.227]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.0709]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.0429]
                                                          


Test loss: 0.3686, Test accuracy = 87.73%
score: 87.73

Using random seed : 134
Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 1000, validation data size: 0


Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.666]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.333]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.153]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.0909]
                                                          


Test loss: 0.3458, Test accuracy = 86.81%
score: 86.81

Using random seed : 6
Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 1000, validation data size: 0


Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.598]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.253]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.113]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.074] 
                                                          


Test loss: 0.3166, Test accuracy = 88.30%
score: 88.30

Using random seed : 8
Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 1000, validation data size: 0


Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.536]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.23] 
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.0713]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.0561]
                                                          


Test loss: 0.3473, Test accuracy = 87.61%
score: 87.61

epochs: 4
learning_rate: 3e-05
max_seq_length: 128
num_mlp_layers: 0
train_batch_size: 32
Using random seed : 42
Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 1000, validation data size: 0


Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.573]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.31] 
Training: 100%|██████████| 32/32 [00:24<00:00,  1.46it/s, loss=0.109]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.0814]
                                                          


Test loss: 0.3683, Test accuracy = 86.81%
score: 86.81

Using random seed : 134
Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 1000, validation data size: 0


Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.645]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.305]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.104]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.0575]
                                                          


Test loss: 0.3491, Test accuracy = 87.96%
score: 87.96

Using random seed : 6
Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 1000, validation data size: 0


Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.641]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.268]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.099] 
Training: 100%|██████████| 32/32 [00:24<00:00,  1.46it/s, loss=0.0569]
                                                          


Test loss: 0.3020, Test accuracy = 88.76%
score: 88.76

Using random seed : 8
Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 1000, validation data size: 0


Training: 100%|██████████| 32/32 [00:23<00:00,  1.45it/s, loss=0.538]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.223]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.0988]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.057] 
                                                          


Test loss: 0.3176, Test accuracy = 88.65%
score: 88.65

epochs: 4
learning_rate: 5e-05
max_seq_length: 128
num_mlp_layers: 0
train_batch_size: 32
Using random seed : 42
Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 1000, validation data size: 0


Training: 100%|██████████| 32/32 [00:23<00:00,  1.45it/s, loss=0.619]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.261]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.0798]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.0393]
                                                          


Test loss: 0.4940, Test accuracy = 85.21%
score: 85.21

Using random seed : 134
Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 1000, validation data size: 0


Training: 100%|██████████| 32/32 [00:23<00:00,  1.45it/s, loss=0.591]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.284]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.0727]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.03]  
                                                          


Test loss: 0.3830, Test accuracy = 88.07%
score: 88.07

Using random seed : 6
Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 1000, validation data size: 0


Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.59] 
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.245]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.0843]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.0384]
                                                          


Test loss: 0.3685, Test accuracy = 87.61%
score: 87.61

Using random seed : 8
Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 1000, validation data size: 0


Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.599]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.283]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.44it/s, loss=0.0701]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.0409]
                                                          


Test loss: 0.3433, Test accuracy = 88.65%
score: 88.65

87.987 (+/-1.098) for {'epochs': 3, 'learning_rate': 2e-05, 'max_seq_length': 128, 'num_mlp_layers': 0, 'train_batch_size': 32}
87.787 (+/-1.896) for {'epochs': 3, 'learning_rate': 3e-05, 'max_seq_length': 128, 'num_mlp_layers': 0, 'train_batch_size': 32}
86.898 (+/-1.967) for {'epochs': 3, 'learning_rate': 5e-05, 'max_seq_length': 128, 'num_mlp_layers': 0, 'train_batch_size': 32}
87.615 (+/-1.228) for {'epochs': 4, 'learning_rate': 2e-05, 'max_seq_length': 128, 'num_mlp_layers': 0, 'train_batch_size': 32}
88.045 (+/-1.790) for {'epochs': 4, 'learning_rate': 3e-05, 'max_seq_length': 128, 'num_mlp_layers': 0, 'train_batch_size': 32}
87.385 (+/-3.025) for {'epochs': 4, 'learning_rate': 5e-05, 'max_seq_length': 128, 'num_mlp_layers': 0, 'train_batch_size': 32}
Best mean score is 88.04, with params: {'epochs': 4, 'learning_rate': 3e-05, 'max_seq_length': 128, 'num_mlp_layers': 0, 'train_batch_size': 32}
Building sklearn classifier...

Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.641]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.268]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.099] 
Training: 100%|██████████| 32/32 [00:24<00:00,  1.46it/s, loss=0.0569]

CPU times: user 37min 26s, sys: 17min 57s, total: 55min 23s
Wall time: 44min 44s





In [5]:
# scores
all_scores = tuned['scores']

best_param = tuned['best_param']

best_score = tuned['best_score']

best_model = tuned['best_model']

best_model, best_param, best_score

(BertClassifier(bert_model='bert-base-uncased', epochs=4, eval_batch_size=8,
         fp16=False, gradient_accumulation_steps=1,
         label_list=array([0, 1]), learning_rate=3e-05, local_rank=-1,
         logfile='bert_sklearn.log', loss_scale=0, max_seq_length=128,
         num_mlp_hiddens=500, num_mlp_layers=0, random_state=6,
         restore_file=None, train_batch_size=32, use_cuda=True,
         validation_fraction=0, warmup_proportion=0.1),
 {'epochs': 4,
  'learning_rate': 3e-05,
  'max_seq_length': 128,
  'num_mlp_layers': 0,
  'train_batch_size': 32},
 88.0447247706422)

## Option 2:  Use sklearn's **`GridSearchCV`** to to fit on cv splits of train data

In [5]:
%%time
from sklearn.model_selection import GridSearchCV

params = {'epochs':[3, 4], 'learning_rate':[2e-5, 3e-5, 5e-5]}

# wrap classifier/regressor in GridSearchCV
clf = GridSearchCV(BertClassifier(validation_fraction=0), 
                    params,
                    scoring='accuracy',
                    verbose=True)

# fit gridsearch 
clf.fit(X_train ,y_train)

Building sklearn classifier...
Fitting 3 folds for each of 6 candidates, totalling 18 fits
Building sklearn classifier...
Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 666, validation data size: 0


Training: 100%|██████████| 21/21 [00:17<00:00,  1.41it/s, loss=0.644]
Training: 100%|██████████| 21/21 [00:15<00:00,  1.41it/s, loss=0.262]
Training: 100%|██████████| 21/21 [00:15<00:00,  1.41it/s, loss=0.127]
                                                           

Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 667, validation data size: 0


Training: 100%|██████████| 21/21 [00:15<00:00,  1.40it/s, loss=0.597]
Training: 100%|██████████| 21/21 [00:15<00:00,  1.40it/s, loss=0.273]
Training: 100%|██████████| 21/21 [00:15<00:00,  1.34it/s, loss=0.118]
                                                           

Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 667, validation data size: 0


Training: 100%|██████████| 21/21 [00:15<00:00,  1.40it/s, loss=0.673]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.32it/s, loss=0.308]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.33it/s, loss=0.148]
                                                           

Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 666, validation data size: 0


Training: 100%|██████████| 21/21 [00:15<00:00,  1.35it/s, loss=0.646]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.32it/s, loss=0.283]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.33it/s, loss=0.109] 
                                                           

Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 667, validation data size: 0


Training: 100%|██████████| 21/21 [00:15<00:00,  1.34it/s, loss=0.613]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.33it/s, loss=0.206]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.33it/s, loss=0.0713]
                                                           

Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 667, validation data size: 0


Training: 100%|██████████| 21/21 [00:15<00:00,  1.33it/s, loss=0.651]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.32it/s, loss=0.288]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.32it/s, loss=0.137]
                                                           

Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 666, validation data size: 0


Training: 100%|██████████| 21/21 [00:15<00:00,  1.34it/s, loss=0.708]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.33it/s, loss=0.7]  
Training: 100%|██████████| 21/21 [00:16<00:00,  1.33it/s, loss=0.615]
                                                           

Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 667, validation data size: 0


Training: 100%|██████████| 21/21 [00:15<00:00,  1.33it/s, loss=0.606]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.33it/s, loss=0.356]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.33it/s, loss=0.101] 
                                                           

Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 667, validation data size: 0


Training: 100%|██████████| 21/21 [00:15<00:00,  1.33it/s, loss=0.699]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.33it/s, loss=0.703]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.33it/s, loss=0.584]
                                                           

Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 666, validation data size: 0


Training: 100%|██████████| 21/21 [00:15<00:00,  1.33it/s, loss=0.636]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.32it/s, loss=0.241]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.33it/s, loss=0.0874]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.33it/s, loss=0.0523]
                                                           

Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 667, validation data size: 0


Training: 100%|██████████| 21/21 [00:15<00:00,  1.34it/s, loss=0.64] 
Training: 100%|██████████| 21/21 [00:16<00:00,  1.32it/s, loss=0.29] 
Training: 100%|██████████| 21/21 [00:16<00:00,  1.32it/s, loss=0.0936]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.32it/s, loss=0.0593]
                                                           

Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 667, validation data size: 0


Training: 100%|██████████| 21/21 [00:15<00:00,  1.33it/s, loss=0.679]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.33it/s, loss=0.347]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.33it/s, loss=0.159]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.33it/s, loss=0.0998]
                                                           

Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 666, validation data size: 0


Training: 100%|██████████| 21/21 [00:15<00:00,  1.33it/s, loss=0.64] 
Training: 100%|██████████| 21/21 [00:16<00:00,  1.33it/s, loss=0.281]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.33it/s, loss=0.0946]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.32it/s, loss=0.0502]
                                                           

Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 667, validation data size: 0


Training: 100%|██████████| 21/21 [00:15<00:00,  1.33it/s, loss=0.567]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.33it/s, loss=0.268]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.32it/s, loss=0.0566]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.32it/s, loss=0.036] 
                                                           

Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 667, validation data size: 0


Training: 100%|██████████| 21/21 [00:15<00:00,  1.33it/s, loss=0.686]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.32it/s, loss=0.374]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.33it/s, loss=0.142]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.33it/s, loss=0.0792]
                                                           

Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 666, validation data size: 0


Training: 100%|██████████| 21/21 [00:15<00:00,  1.34it/s, loss=0.705]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.32it/s, loss=0.315]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.33it/s, loss=0.116]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.32it/s, loss=0.0529]
                                                           

Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 667, validation data size: 0


Training: 100%|██████████| 21/21 [00:15<00:00,  1.34it/s, loss=0.684]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.33it/s, loss=0.485]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.33it/s, loss=0.26] 
Training: 100%|██████████| 21/21 [00:16<00:00,  1.33it/s, loss=0.127]
                                                           

Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 667, validation data size: 0


Training: 100%|██████████| 21/21 [00:15<00:00,  1.33it/s, loss=0.691]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.33it/s, loss=0.437]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.33it/s, loss=0.228]
Training: 100%|██████████| 21/21 [00:16<00:00,  1.33it/s, loss=0.146]
[Parallel(n_jobs=1)]: Done  18 out of  18 | elapsed: 24.2min finished


Building sklearn classifier...
Loading bert-base-uncased model...
Defaulting to linear classifier/regressor
train data size: 1000, validation data size: 0


Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.583]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.282]
Training: 100%|██████████| 32/32 [00:24<00:00,  1.45it/s, loss=0.15] 

CPU times: user 21min 16s, sys: 9min 59s, total: 31min 16s
Wall time: 25min 32s





In [6]:
# best scores
print(clf.best_score_, clf.best_params_)

means = clf.cv_results_['mean_test_score']
stds = clf.cv_results_['std_test_score']

for mean, std, params in zip(means, stds, clf.cv_results_['params']):
        print("%0.3f (+/-%0.03f) for %r"
              % (mean, std * 2, params))

0.891 {'epochs': 3, 'learning_rate': 2e-05}
0.891 (+/-0.024) for {'epochs': 3, 'learning_rate': 2e-05}
0.882 (+/-0.010) for {'epochs': 3, 'learning_rate': 3e-05}
0.814 (+/-0.089) for {'epochs': 3, 'learning_rate': 5e-05}
0.877 (+/-0.005) for {'epochs': 4, 'learning_rate': 2e-05}
0.885 (+/-0.022) for {'epochs': 4, 'learning_rate': 3e-05}
0.883 (+/-0.010) for {'epochs': 4, 'learning_rate': 5e-05}
