In [4]:
%reload_ext autoreload
%autoreload 2

In [None]:
from utils import *

from datasets import load_dataset
import pandas as pd
import random
import time
import pickle
import os
from tqdm.notebook import tqdm

# Data

In [None]:
dataset = load_dataset("ag_news").shuffle()

In [3]:
## generate 100 test suites with n tests
# n = 100
# test_suites = {}
# for i in range(100):
#     j = random.randint(0, len(dataset['train']['text']) - 1 - n)
#     X, y = dataset['train']['text'][j:j+n], dataset['train']['label'][j:j+n]
#     test_suite = {'data': X, 'target': y}
#     test_suites[i] = test_suite  
# pkl_save(test_suites, 'assets/AG_NEWS/test_suites.pkl')
test_suites = pkl_load('assets/AG_NEWS/test_suites.pkl')

In [4]:
INV_test_suites = transform_test_suites(test_suites, num_transforms=2, task='topic', tran='INV')
pkl_save(INV_test_suites, 'assets/AG_NEWS/INV_test_suites.pkl')

100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [04:25<00:00,  2.65s/it]


In [None]:
SIB_test_suites = transform_test_suites(test_suites, num_transforms=1, task='topic', tran='SIB-mix')
pkl_save(SIB_test_suites, 'assets/AG_NEWS/SIB_test_suites.pkl')

In [None]:
# cannot easily apply since SIB transformations return soft-labels and INV ones return hard-labels
# would require some time to think about how to do this (via one-hot-encoding), but then how to measure? loss BCEwithLogits?

# both_test_suites = transform_test_suites(test_suites, num_transforms=2, task='sentiment', tran=None)
# pkl_save(both_test_suites, 'assets/AG_NEWS/both_test_suites.pkl')

# Testing

In [5]:
test_suites = pkl_load('assets/AG_NEWS/test_suites.pkl')
INV_test_suites = pkl_load('assets/AG_NEWS/INV_test_suites.pkl')
SIB_test_suites = pkl_load('assets/AG_NEWS/SIB_test_suites.pkl')
# both_test_suites = pkl_load('assets/IMDB/both_test_suites.pkl')

In [6]:
def get_acc(y_pred, y_true):
    total = y_true.size(0)
    correct = (y_pred == y_true).sum().item()
    return correct / total

def get_topk_acc(logits, y_true, k):
    total = y_true.size(0)
    y_weights, y_idx = torch.topk(y_true, k=k, dim=1)
    out_weights, out_idx = torch.topk(logits, k=k, dim=1)
    correct = torch.sum(torch.eq(y_idx, out_idx) * y_weights)
    return correct / total

In [7]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")

def load_huggingface_model(model_name, device, max_length=500):
    tokenizer = AutoTokenizer.from_pretrained(model_name,)
    model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
    def pipeline(sentence):
        encode = tokenizer(sentence, 
                           padding=True, 
                           truncation=True, 
                           max_length=max_length, 
                           return_tensors="pt").to(device)
        logits = model(**encode)[0]
        soft_m = torch.softmax(logits, dim=1)
        return soft_m
    return pipeline

MODEL_NAME = "textattack/bert-base-uncased-ag-news" #"textattack/distilbert-base-uncased-ag-news" 
model = load_huggingface_model(MODEL_NAME, device, 100)

HBox(children=(HTML(value='Downloading'), FloatProgress(value=0.0, max=706.0), HTML(value='')))




HBox(children=(HTML(value='Downloading'), FloatProgress(value=0.0, max=231508.0), HTML(value='')))




HBox(children=(HTML(value='Downloading'), FloatProgress(value=0.0, max=112.0), HTML(value='')))




HBox(children=(HTML(value='Downloading'), FloatProgress(value=0.0, max=48.0), HTML(value='')))




HBox(children=(HTML(value='Downloading'), FloatProgress(value=0.0, max=437991539.0), HTML(value='')))




In [8]:
tss = {
    'test_suites' : test_suites,
    'INV_test_suites' : INV_test_suites,
    'SIB_test_suites' : SIB_test_suites,
    # 'both_test_suites' : both_test_suites
}

In [9]:
for t_name, test_suites in tss.items():
    print('starting {}...'.format(t_name))
    if 'SIB' in t_name:
        for idx, t in test_suites.items():
            logits = model([str(x) for x in t['data'].tolist()])
            y_true = torch.tensor(t['target'])
            acc = get_topk_acc(logits, y_true, k=2)
            t['performance'] = {
                'MODEL_NAME' : MODEL_NAME,
                'acc' : acc
            }
            print('test suite {} acc: {}'.format(idx, acc))
    else:
        for idx, t in test_suites.items():
            logits = model(t['data'])
            y_pred = torch.argmax(logits, dim=1)
            y_true = torch.tensor(t['target'])
            acc = get_acc(y_pred, y_true)
            t['performance'] = {
                'MODEL_NAME' : MODEL_NAME,
                'acc' : acc
            }
            print('test suite {} acc: {}'.format(idx, acc))
    file_path = 'assets/AG_NEWS/BERT/' + t_name + '_w_acc.pkl'
    pkl_save(test_suites, file_path)
    print('saving {}'.format(file_path))

starting test_suites...
test suite 0 acc: 0.99
test suite 1 acc: 1.0
test suite 2 acc: 0.99
test suite 3 acc: 1.0
test suite 4 acc: 1.0
test suite 5 acc: 1.0
test suite 6 acc: 1.0
test suite 7 acc: 1.0
test suite 8 acc: 1.0
test suite 9 acc: 1.0
test suite 10 acc: 0.97
test suite 11 acc: 1.0
test suite 12 acc: 0.99
test suite 13 acc: 1.0
test suite 14 acc: 1.0
test suite 15 acc: 1.0
test suite 16 acc: 0.99
test suite 17 acc: 1.0
test suite 18 acc: 1.0
test suite 19 acc: 1.0
test suite 20 acc: 1.0
test suite 21 acc: 0.98
test suite 22 acc: 0.98
test suite 23 acc: 1.0
test suite 24 acc: 1.0
test suite 25 acc: 1.0
test suite 26 acc: 1.0
test suite 27 acc: 1.0
test suite 28 acc: 1.0
test suite 29 acc: 1.0
test suite 30 acc: 1.0
test suite 31 acc: 1.0
test suite 32 acc: 1.0
test suite 33 acc: 0.99
test suite 34 acc: 1.0
test suite 35 acc: 1.0
test suite 36 acc: 1.0
test suite 37 acc: 1.0
test suite 38 acc: 1.0
test suite 39 acc: 1.0
test suite 40 acc: 1.0
test suite 41 acc: 0.99
test suite 

test suite 88 acc: 0.6794212747758607
test suite 89 acc: 0.5529635369087186
test suite 90 acc: 0.6670173753813041
test suite 91 acc: 0.6623795592934744
test suite 92 acc: 0.6330509141218206
test suite 93 acc: 0.6676707118365327
test suite 94 acc: 0.513041733944327
test suite 95 acc: 0.5592081739299514
test suite 96 acc: 0.6417770350545341
test suite 97 acc: 0.6039034802356585
test suite 98 acc: 0.6518913263140864
test suite 99 acc: 0.539455539201459
saving assets/AG_NEWS/BERT/SIB_test_suites_w_acc.pkl


In [10]:
# BERT
for t_name, test_suites in tss.items():
    count = 0
    total = 0
    for idx, t in test_suites.items():
        total += t['performance']['acc']
        count += 1
    avg_acc = total / count
    print('average acc: {0:1.2f} | {1}'.format(avg_acc, t_name))

average acc: 1.00 | test_suites
average acc: 0.45 | INV_test_suites
average acc: 0.62 | SIB_test_suites


# Sample some transforms to see if they're reasonable

In [11]:
test_suites = pkl_load('assets/AG_NEWS/test_suites.pkl')
INV_test_suites = pkl_load('assets/AG_NEWS/INV_test_suites.pkl')
SIB_test_suites = pkl_load('assets/AG_NEWS/SIB_test_suites.pkl')

In [68]:
n = 50
df_orig = pd.DataFrame.from_dict(test_suites[0]).head(n)
df_INV  = pd.DataFrame.from_dict(INV_test_suites[0]).head(n)
df_SIB  = pd.DataFrame.from_dict({'data': SIB_test_suites[0]['data'], 
                                  'target': SIB_test_suites[0]['target'],
                                  'ts': SIB_test_suites[0]['ts'] * 100}).head(n)

df_orig.rename(columns={'data': 'orig'}, inplace=True)
df_INV.rename(columns={'data': 'INV'}, inplace=True)
df_SIB.rename(columns={'data': 'SIB'}, inplace=True)

In [69]:
df = pd.concat([df_orig, df_INV, df_SIB], axis=1)

In [71]:
df.to_csv('assets/AG_NEWS/transformed_samples.csv')