In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import gc
import sys

import torch

In [3]:
os.chdir('../../')

In [20]:
import numpy as np
import pandas as pd

In [4]:
from subclass_avail import common
from subclass_avail.target_nlp import bert_utils

# Evaluation

This notebook can be used to get the basic accuracy results for the fine tuned BERT models on the IMDB dataset.

Let's start by setting up some parameters.

In [5]:
# This is the list of models that will be loaded and evaluated.
# Change this to load arbitrary BERT models.
model_files = [
    'imdb_bert_FT_ADV.ckpt',
    'imdb_bert_FT_DEF.ckpt',
    'imdb_bert_LL_ADV.ckpt',
    'imdb_bert_LL_DEF.ckpt'
]

batch = 16

## Data

In [6]:
train_def, train_adv, test = bert_utils.load_split_tokenized_data()
train_def_ds, train_def_dl, test_ds, test_dl = bert_utils.get_data_loaders(
    train_df=train_def,
    test_df=test,
    batch_size=batch,
    shuffle=False
)
train_adv_ds, train_adv_dl, test_ds, test_dl = bert_utils.get_data_loaders(
    train_df=train_adv,
    test_df=test,
    batch_size=batch,
    shuffle=False
)

Splitting data sets for training.
Data shapes:
ids_train: 12500
att_train: 12500
y_train: 12500
ids_test: 25000
att_test: 25000
y_test: 25000
Tensors shapes:
ids_train: torch.Size([12500, 256])
att_train: torch.Size([12500, 256])
y_train: torch.Size([12500])
ids_test: torch.Size([25000, 256])
att_test: torch.Size([25000, 256])
y_test: torch.Size([25000])
Data shapes:
ids_train: 12500
att_train: 12500
y_train: 12500
ids_test: 25000
att_test: 25000
y_test: 25000
Tensors shapes:
ids_train: torch.Size([12500, 256])
att_train: torch.Size([12500, 256])
y_train: torch.Size([12500])
ids_test: torch.Size([25000, 256])
att_test: torch.Size([25000, 256])
y_test: torch.Size([25000])


In [7]:
device = bert_utils.get_device()
    
model = bert_utils.load_bert(model_file=model_files[0])

Available device:  cuda
Loading model: imdb_bert_FT_ADV.ckpt


In [8]:
predres = bert_utils.predict_bert(model, device, test_dl, raw=True)

100%|██████████| 1563/1563 [03:45<00:00,  6.95it/s]


In [23]:
predresarr = [np.array(p) for p in predres[1]]

In [27]:
predresarr = np.concatenate(predresarr)

In [29]:
predresarr.shape

(25000, 2)

## Model evaluation

In [7]:
for model_file in model_files:
    # Housekeeping
    gc.collect()
    torch.cuda.empty_cache()
    
    device = bert_utils.get_device()
    
    model = bert_utils.load_bert(model_file=model_file)
    accuracy = bert_utils.predict_bert(model, device, test_dl, acc=True)
    
    print('Model {} obtains accuracy {}\n'.format(model_file, accuracy))
    # Housekeeping
    del model


Available device:  cuda
Loading model: imdb_bert_FT_ADV.ckpt


100%|██████████| 1563/1563 [03:47<00:00,  6.87it/s]


Model imdb_bert_FT_ADV.ckpt obtains accuracy 0.916

Available device:  cuda
Loading model: imdb_bert_FT_DEF.ckpt


100%|██████████| 1563/1563 [03:49<00:00,  6.81it/s]


Model imdb_bert_FT_DEF.ckpt obtains accuracy 0.9122

Available device:  cuda
Loading model: imdb_bert_LL_ADV.ckpt


100%|██████████| 1563/1563 [03:49<00:00,  6.81it/s]


Model imdb_bert_LL_ADV.ckpt obtains accuracy 0.88972

Available device:  cuda
Loading model: imdb_bert_LL_DEF.ckpt


100%|██████████| 1563/1563 [03:49<00:00,  6.81it/s]

Model imdb_bert_LL_DEF.ckpt obtains accuracy 0.88756




