In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
from utils import *

from datasets import load_dataset
import pandas as pd
import random
import time
import pickle
import os
from tqdm.notebook import tqdm

# Data

In [None]:
dataset = load_dataset("ag_news").shuffle()

In [3]:
## generate 100 test suites with n tests
# n = 100
# test_suites = {}
# for i in range(100):
#     j = random.randint(0, len(dataset['train']['text']) - 1 - n)
#     X, y = dataset['train']['text'][j:j+n], dataset['train']['label'][j:j+n]
#     test_suite = {'data': X, 'target': y}
#     test_suites[i] = test_suite  
# pkl_save(test_suites, 'assets/AG_NEWS/test_suites.pkl')
test_suites = pkl_load('assets/AG_NEWS/test_suites.pkl')

In [4]:
INV_test_suites = transform_test_suites(test_suites, num_transforms=2, task='topic', tran='INV')
pkl_save(INV_test_suites, 'assets/AG_NEWS/INV_test_suites.pkl')

100%|████████████████████████████████████████████████████████████████████████████████| 100/100 [04:25<00:00,  2.65s/it]


In [5]:
SIB_test_suites = transform_test_suites(test_suites, num_transforms=1, task='topic', tran='SIB-mix')
pkl_save(SIB_test_suites, 'assets/AG_NEWS/SIB_test_suites.pkl')

100%|███████████████████████████████████████████████████████████████████████████████| 100/100 [00:00<00:00, 176.67it/s]


In [None]:
# cannot easily apply since SIB transformations return soft-labels and INV ones return hard-labels
# would require some time to think about how to do this (via one-hot-encoding), but then how to measure? loss BCEwithLogits?

# both_test_suites = transform_test_suites(test_suites, num_transforms=2, task='sentiment', tran=None)
# pkl_save(both_test_suites, 'assets/AG_NEWS/both_test_suites.pkl')

# Testing

In [6]:
test_suites = pkl_load('assets/AG_NEWS/test_suites.pkl')
INV_test_suites = pkl_load('assets/AG_NEWS/INV_test_suites.pkl')
SIB_test_suites = pkl_load('assets/AG_NEWS/SIB_test_suites.pkl')
# both_test_suites = pkl_load('assets/IMDB/both_test_suites.pkl')

In [7]:
def get_acc(y_pred, y_true):
    total = y_true.size(0)
    correct = (y_pred == y_true).sum().item()
    return correct / total

def get_topk_acc(logits, y_true, k):
    total = y_true.size(0)
    y_weights, y_idx = torch.topk(y_true, k=k, dim=1)
    out_weights, out_idx = torch.topk(logits, k=k, dim=1)
    correct = torch.sum(torch.eq(y_idx, out_idx) * y_weights)
    return correct / total

In [8]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")

def load_huggingface_model(model_name, device, max_length=500):
    tokenizer = AutoTokenizer.from_pretrained(model_name,)
    model = AutoModelForSequenceClassification.from_pretrained(model_name).to(device)
    def pipeline(sentence):
        encode = tokenizer(sentence, 
                           padding=True, 
                           truncation=True, 
                           max_length=max_length, 
                           return_tensors="pt").to(device)
        logits = model(**encode)[0]
        soft_m = torch.softmax(logits, dim=1)
        return soft_m
    return pipeline

MODEL_NAME = "textattack/bert-base-uncased-ag-news" #"textattack/distilbert-base-uncased-ag-news" 
model = load_huggingface_model(MODEL_NAME, device, 100)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=706.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=231508.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=112.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=48.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=437991539.0, style=ProgressStyle(descri…




In [9]:
tss = {
    'test_suites' : test_suites,
    'INV_test_suites' : INV_test_suites,
    'SIB_test_suites' : SIB_test_suites,
    # 'both_test_suites' : both_test_suites
}

In [None]:
for t_name, test_suites in tss.items():
    print('starting {}...'.format(t_name))
    if 'SIB' in t_name:
        for idx, t in test_suites.items():
            logits = model([str(x) for x in t['data'].tolist()])
            y_true = torch.tensor(t['target'])
            acc = get_topk_acc(logits, y_true, k=2)
            t['performance'] = {
                'MODEL_NAME' : MODEL_NAME,
                'acc' : acc
            }
            print('test suite {} acc: {}'.format(idx, acc))
    else:
        for idx, t in test_suites.items():
            logits = model(t['data'])
            y_pred = torch.argmax(logits, dim=1)
            y_true = torch.tensor(t['target'])
            acc = get_acc(y_pred, y_true)
            t['performance'] = {
                'MODEL_NAME' : MODEL_NAME,
                'acc' : acc
            }
            print('test suite {} acc: {}'.format(idx, acc))
    file_path = 'assets/AG_NEWS/BERT/' + t_name + '_w_acc.pkl'
    pkl_save(test_suites, file_path)
    print('saving {}'.format(file_path))

starting test_suites...
test suite 0 acc: 0.99
test suite 1 acc: 1.0
test suite 2 acc: 0.99
test suite 3 acc: 1.0
test suite 4 acc: 1.0
test suite 5 acc: 1.0
test suite 6 acc: 1.0
test suite 7 acc: 1.0
test suite 8 acc: 1.0
test suite 9 acc: 1.0
test suite 10 acc: 0.97
test suite 11 acc: 1.0
test suite 12 acc: 0.99
test suite 13 acc: 1.0


In [None]:
# BERT
for t_name, test_suites in tss.items():
    count = 0
    total = 0
    for idx, t in test_suites.items():
        total += t['performance']['acc']
        count += 1
    avg_acc = total / count
    print('average acc: {0:1.2f} | {1}'.format(avg_acc, t_name))

In [12]:
# BERT
for t_name, test_suites in tss.items():
    count = 0
    total = 0
    for idx, t in test_suites.items():
        total += t['performance']['acc']
        count += 1
    avg_acc = total / count
    print('average acc: {0:1.2f} | {1}'.format(avg_acc, t_name))

average acc: 0.97 | test_suites
average acc: 0.44 | INV_test_suites
average acc: 0.60 | SIB_test_suites


# Sample some transforms to see if they're reasonable

In [None]:
import csv

In [7]:
filename = "./assets/csv_examples/agnews_inv_examples.csv"
    
# writing to csv file  
with open(filename, 'w') as csvfile:  
    # creating a csv writer object  
    csvwriter = csv.writer(csvfile)  
    csvwriter.writerow(['text', 'label', 'sound good?'])
    # writing the fields  
    for i in range(50):
        row = [INV_test_suites[i]['data'][i], INV_test_suites[i]['target'][i], True ]#, INV_test_suites[i]['ts'][i] ]
        csvwriter.writerow(row)

In [8]:
filename = "./assets/csv_examples/agnews_sib_examples.csv"
    
# writing to csv file  
with open(filename, 'w') as csvfile:  
    # creating a csv writer object  
    csvwriter = csv.writer(csvfile)  
    csvwriter.writerow(['text', 'label', 'sound good?'])
    # writing the fields  
    for i in range(50):
        row = [SIB_test_suites[i]['data'][i], SIB_test_suites[i]['target'][i], True ]#, INV_test_suites[i]['ts'][i] ]
        csvwriter.writerow(row)