In [1]:
import sys
sys.path.append('../client')
from benchmark import format_f1_scores, f1_scores

In [2]:
def confusion(model, on_data : tuple[list[str], list[str]]):
    cm = defaultdict(lambda: defaultdict(int))
    for x, y in zip(*on_data):
        cm[y][model(x)] += 1
    return cm


In [3]:
import csv
from collections import defaultdict

import numpy as np
from sklearn import metrics
from sklearn.model_selection import train_test_split

SEED = 1337

with open('../data/atis/train.tsv', 'rt') as f:
    reader = csv.reader(f, delimiter='\t')
    data = list(reader)
    data_x = [row[0] for row in data]
    data_y = [row[1] for row in data]

data_x_train, data_x_val, data_y_train, data_y_val = map(np.array, train_test_split(data_x, data_y, test_size=0.2, random_state=SEED))

def val_accuracy(f):
    print(metrics.accuracy_score(data_y_val, f(data_x_val)))

data_x_words = sorted({w for x in data_x_train for w in x.split(' ')})

by_label = defaultdict(list)
for x, y in zip(data_x_train, data_y_train):
    by_label[y].append(x)
labels = set(by_label)

SCORE_DATA = { "train": [data_x_train, data_y_train], "val": [data_x_val, data_y_val]}

def print_f1_scores(model, data_key="train"):
    print(format_f1_scores(f1_scores(confusion(model, SCORE_DATA[data_key]))))

airfare_preditor = lambda s: (' price' in s) + (' cost ' in s) + (' fare' in s) + (' rate' in s) + (' ticket' in s) + ('how much' in s)

print_f1_scores(lambda s: 'airfare' if airfare_preditor(s) else 'flight')

  abbreviation ([36m0[0m, [36m0[0m, [36m93[0m): [32m0.0[0m
  aircraft ([36m0[0m, [36m0[0m, [36m58[0m): [32m0.0[0m
  airfare ([36m299[0m, [36m116[0m, [36m20[0m): [32m0.81[0m
  airfare+flight_time ([36m0[0m, [36m0[0m, [36m1[0m): [32m0.0[0m
  airline ([36m0[0m, [36m0[0m, [36m118[0m): [32m0.0[0m
  airline+flight_no ([36m0[0m, [36m0[0m, [36m1[0m): [32m0.0[0m
  airport ([36m0[0m, [36m0[0m, [36m13[0m): [32m0.0[0m
  capacity ([36m0[0m, [36m0[0m, [36m11[0m): [32m0.0[0m
  cheapest ([36m0[0m, [36m0[0m, [36m1[0m): [32m0.0[0m
  city ([36m0[0m, [36m0[0m, [36m15[0m): [32m0.0[0m
  distance ([36m0[0m, [36m0[0m, [36m13[0m): [32m0.0[0m
  flight ([36m2703[0m, [36m589[0m, [36m52[0m): [32m0.89[0m
  flight+airfare ([36m0[0m, [36m0[0m, [36m14[0m): [32m0.0[0m
  flight_no ([36m0[0m, [36m0[0m, [36m9[0m): [32m0.0[0m
  flight_time ([36m0[0m, [36m0[0m, [36m44[0m): [32m0.0[0m
  ground_fare ([36m0

In [8]:
from transformers import pipeline

transformers_classifier_bart_large = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") # 400m parameters
transformers_classifier_deberta_base = pipeline("zero-shot-classification", model="MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli") # 200m parameters
transformers_classifier_minilm = pipeline("zero-shot-classification", model="MoritzLaurer/multilingual-MiniLMv2-L6-mnli-xnli") # 100m parameters

config.json:   0%|          | 0.00/1.03k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/428M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/565 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

In [11]:
BATCH_SIZE = 128

def confusion_batch(model, on_data : tuple[list[str], list[str]]):
    results = model(sequences=on_data[0], candidate_labels=sorted(labels), batch_size=BATCH_SIZE)
    cm = defaultdict(lambda: defaultdict(int))
    for y, y_ in zip(on_data[1], results):
        cm[y][y_] += 1
    return cm

tr_models = [transformers_classifier_minilm, transformers_classifier_deberta_base, transformers_classifier_bart_large]

for model in tr_models:
    print(format_f1_scores(f1_scores(confusion_batch(model, SCORE_DATA["train"]))))

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


  flight ([36m0[0m, [36m0[0m, [36m2[0m): [32m0.0[0m
  ground_service ([36m0[0m, [36m0[0m, [36m1[0m): [32m0.0[0m
  labels ([36m0[0m, [36m1[0m, [36m0[0m): [32m0.0[0m
  scores ([36m0[0m, [36m1[0m, [36m0[0m): [32m0.0[0m
  sequence ([36m0[0m, [36m1[0m, [36m0[0m): [32m0.0[0m
  flight ([36m0[0m, [36m0[0m, [36m2[0m): [32m0.0[0m
  ground_service ([36m0[0m, [36m0[0m, [36m1[0m): [32m0.0[0m
  labels ([36m0[0m, [36m1[0m, [36m0[0m): [32m0.0[0m
  scores ([36m0[0m, [36m1[0m, [36m0[0m): [32m0.0[0m
  sequence ([36m0[0m, [36m1[0m, [36m0[0m): [32m0.0[0m
  flight ([36m0[0m, [36m0[0m, [36m2[0m): [32m0.0[0m
  ground_service ([36m0[0m, [36m0[0m, [36m1[0m): [32m0.0[0m
  labels ([36m0[0m, [36m1[0m, [36m0[0m): [32m0.0[0m
  scores ([36m0[0m, [36m1[0m, [36m0[0m): [32m0.0[0m
  sequence ([36m0[0m, [36m1[0m, [36m0[0m): [32m0.0[0m


In [12]:
transformers_classifier_bart_large("i want to fly to nyc", candidate_labels=sorted(labels))

{'sequence': 'i want to fly to nyc',
 'labels': ['flight',
  'airfare',
  'distance',
  'city',
  'flight+airfare',
  'abbreviation',
  'airfare+flight_time',
  'flight_time',
  'aircraft',
  'airline+flight_no',
  'flight_no',
  'capacity',
  'airport',
  'quantity',
  'airline',
  'restriction',
  'ground_fare',
  'ground_service',
  'meal',
  'cheapest'],
 'scores': [0.22391395270824432,
  0.13718608021736145,
  0.12011700868606567,
  0.11480510979890823,
  0.10111062228679657,
  0.0568232461810112,
  0.04757894575595856,
  0.03555750846862793,
  0.031680475920438766,
  0.022245541214942932,
  0.018322663381695747,
  0.017090201377868652,
  0.016434896737337112,
  0.013044649735093117,
  0.012528651393949986,
  0.011125268414616585,
  0.00860058143734932,
  0.005981918424367905,
  0.003734766272827983,
  0.0021179229952394962]}

In [13]:
def into_my_model(transformer_model):
    def as_model(xs):
        results = transformer_model(xs, batch_size=BATCH_SIZE, candidate_labels=sorted(labels))
        return [result['labels'][0] for result in results]
    return as_model

my_models = [into_my_model(model) for model in tr_models] 

In [16]:
my_models[0](['fly to nyc', 'take a taxi to penn station'])

['flight', 'capacity']

In [17]:
def confusion_batch(model, on_data : tuple[list[str], list[str]]):
    results = model(on_data[0])
    cm = defaultdict(lambda: defaultdict(int))
    for y, y_ in zip(on_data[1], results):
        cm[y][y_] += 1
    return cm
    
SCORE_DATA["2"] = ['fly to nyc', 'take a taxi to penn station'], ['flight', 'ground_service']

confusion_batch(my_models[0], SCORE_DATA["2"])

defaultdict(<function __main__.confusion_batch.<locals>.<lambda>()>,
            {'flight': defaultdict(int, {'flight': 1}),
             'ground_service': defaultdict(int, {'capacity': 1})})

In [18]:
for model in my_models:
    print(format_f1_scores(f1_scores(confusion_batch(model, SCORE_DATA["train"]))))

TypeError: string indices must be integers, not 'str'

Let's not make the data into an `np.array`

In [21]:
data_x_train, data_x_val, data_y_train, data_y_val = train_test_split(data_x, data_y, test_size=0.2, random_state=SEED)

SCORE_DATA["train"] = data_x_train, data_y_train
SCORE_DATA["val"] = data_x_val, data_y_val


In [22]:
for model in my_models:
    print(format_f1_scores(f1_scores(confusion_batch(model, SCORE_DATA["train"]))))

  abbreviation ([36m56[0m, [36m115[0m, [36m37[0m): [32m0.42[0m
  aircraft ([36m30[0m, [36m7[0m, [36m28[0m): [32m0.63[0m
  airfare ([36m14[0m, [36m3[0m, [36m305[0m): [32m0.083[0m
  airfare+flight_time ([36m0[0m, [36m0[0m, [36m1[0m): [32m0.0[0m
  airline ([36m70[0m, [36m59[0m, [36m48[0m): [32m0.57[0m
  airline+flight_no ([36m0[0m, [36m1[0m, [36m1[0m): [32m0.0[0m
  airport ([36m12[0m, [36m65[0m, [36m1[0m): [32m0.27[0m
  capacity ([36m6[0m, [36m66[0m, [36m5[0m): [32m0.14[0m
  cheapest ([36m1[0m, [36m149[0m, [36m0[0m): [32m0.013[0m
  city ([36m5[0m, [36m3[0m, [36m10[0m): [32m0.43[0m
  distance ([36m7[0m, [36m153[0m, [36m6[0m): [32m0.081[0m
  flight ([36m2259[0m, [36m135[0m, [36m496[0m): [32m0.88[0m
  flight+airfare ([36m9[0m, [36m122[0m, [36m5[0m): [32m0.12[0m
  flight_no ([36m0[0m, [36m31[0m, [36m9[0m): [32m0.0[0m
  flight_time ([36m17[0m, [36m15[0m, [36m27[0m): [32m0.45[

In [23]:
len(SCORE_DATA["train"][0])

3707

In [24]:
import torch 
#  Check if a GPU is available and if not, use a CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [25]:
tr_models[0].device

device(type='cpu')

In [26]:
tr_model_names = [
    "MoritzLaurer/multilingual-MiniLMv2-L6-mnli-xnli", # 100m parameters
    "MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli", # 200m parameters
    "facebook/bart-large-mnli", # 400m parameters
]
models = [pipeline("zero-shot-classification", model=name, device=device) for name in tr_model_names]

In [27]:
my_models = [into_my_model(model) for model in models]
my_models[0](SCORE_DATA["2"][0])

['flight', 'capacity']

In [28]:
from time import time
for model in my_models:
    start_time = time()
    print(format_f1_scores(f1_scores(confusion_batch(model, SCORE_DATA["train"]))))
    print(time() - start_time, "seconds elapsed")

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


  abbreviation ([36m56[0m, [36m115[0m, [36m37[0m): [32m0.42[0m
  aircraft ([36m30[0m, [36m7[0m, [36m28[0m): [32m0.63[0m
  airfare ([36m14[0m, [36m3[0m, [36m305[0m): [32m0.083[0m
  airfare+flight_time ([36m0[0m, [36m0[0m, [36m1[0m): [32m0.0[0m
  airline ([36m70[0m, [36m59[0m, [36m48[0m): [32m0.57[0m
  airline+flight_no ([36m0[0m, [36m1[0m, [36m1[0m): [32m0.0[0m
  airport ([36m12[0m, [36m65[0m, [36m1[0m): [32m0.27[0m
  capacity ([36m6[0m, [36m66[0m, [36m5[0m): [32m0.14[0m
  cheapest ([36m1[0m, [36m149[0m, [36m0[0m): [32m0.013[0m
  city ([36m5[0m, [36m3[0m, [36m10[0m): [32m0.43[0m
  distance ([36m7[0m, [36m153[0m, [36m6[0m): [32m0.081[0m
  flight ([36m2259[0m, [36m135[0m, [36m496[0m): [32m0.88[0m
  flight+airfare ([36m9[0m, [36m122[0m, [36m5[0m): [32m0.12[0m
  flight_no ([36m0[0m, [36m31[0m, [36m9[0m): [32m0.0[0m
  flight_time ([36m17[0m, [36m15[0m, [36m27[0m): [32m0.45[

In [29]:
from time import time
for BATCH_SIZE in [16, 64, 256, 1024]:
    start_time = time()
    print(format_f1_scores(f1_scores(confusion_batch(my_models[0], SCORE_DATA["train"]))))
    print(time() - start_time, "seconds elapsed")

  abbreviation ([36m56[0m, [36m115[0m, [36m37[0m): [32m0.42[0m
  aircraft ([36m30[0m, [36m7[0m, [36m28[0m): [32m0.63[0m
  airfare ([36m14[0m, [36m3[0m, [36m305[0m): [32m0.083[0m
  airfare+flight_time ([36m0[0m, [36m0[0m, [36m1[0m): [32m0.0[0m
  airline ([36m70[0m, [36m59[0m, [36m48[0m): [32m0.57[0m
  airline+flight_no ([36m0[0m, [36m1[0m, [36m1[0m): [32m0.0[0m
  airport ([36m12[0m, [36m65[0m, [36m1[0m): [32m0.27[0m
  capacity ([36m6[0m, [36m66[0m, [36m5[0m): [32m0.14[0m
  cheapest ([36m1[0m, [36m149[0m, [36m0[0m): [32m0.013[0m
  city ([36m5[0m, [36m3[0m, [36m10[0m): [32m0.43[0m
  distance ([36m7[0m, [36m153[0m, [36m6[0m): [32m0.081[0m
  flight ([36m2259[0m, [36m135[0m, [36m496[0m): [32m0.88[0m
  flight+airfare ([36m9[0m, [36m122[0m, [36m5[0m): [32m0.12[0m
  flight_no ([36m0[0m, [36m31[0m, [36m9[0m): [32m0.0[0m
  flight_time ([36m17[0m, [36m15[0m, [36m27[0m): [32m0.45[

In [30]:
from importlib import reload
reload("benchmarks")

TypeError: reload() argument must be a module

In [43]:
import benchmark
reload(benchmark)

<module 'benchmark' from '/home/we/Code/intent-classifier/docs/../client/benchmark.py'>

In [44]:
format_f1_scores = benchmark.format_f1_scores
f1_scores = benchmark.f1_scores

In [45]:
for model in my_models:
    print(format_f1_scores(f1_scores(confusion_batch(model, SCORE_DATA["val"]))))




  abbreviation ([36m8[0m, [36m24[0m, [36m7[0m): [32m0.34[0m
  aircraft ([36m12[0m, [36m2[0m, [36m8[0m): [32m0.71[0m
  aircraft+flight+flight_no ([36m0[0m, [36m0[0m, [36m1[0m): [32m0.0[0m
  airfare ([36m6[0m, [36m0[0m, [36m78[0m): [32m0.13[0m
  airline ([36m18[0m, [36m15[0m, [36m12[0m): [32m0.57[0m
  airline+flight_no ([36m0[0m, [36m0[0m, [36m1[0m): [32m0.0[0m
  airport ([36m5[0m, [36m21[0m, [36m0[0m): [32m0.32[0m
  capacity ([36m3[0m, [36m15[0m, [36m2[0m): [32m0.26[0m
  cheapest ([36m0[0m, [36m45[0m, [36m0[0m): [32m0.0[0m
  city ([36m2[0m, [36m3[0m, [36m1[0m): [32m0.5[0m
  distance ([36m1[0m, [36m50[0m, [36m6[0m): [32m0.034[0m
  flight ([36m548[0m, [36m38[0m, [36m123[0m): [32m0.87[0m
  flight+airfare ([36m2[0m, [36m31[0m, [36m1[0m): [32m0.11[0m
  flight_no ([36m0[0m, [36m8[0m, [36m3[0m): [32m0.0[0m
  flight_time ([36m2[0m, [36m1[0m, [36m6[0m): [32m0.36[0m
  ground_far