Import from our main project and also import the `reload` method if we decide to hot-reload a module

In [2]:
import sys
sys.path.extend(['../server', '../client'])

import benchmark 
format_f1_scores = benchmark.format_f1_scores
f1_scores = benchmark.f1_scores

from importlib import reload


Perform training on GPU

In [3]:
import torch 
device = torch.device("cuda")

Import data (do not use NumPy arrays this time)

In [4]:
import csv
from collections import defaultdict

from sklearn import metrics
from sklearn.model_selection import train_test_split

SEED = 1337

with open('../data/atis/train.tsv', 'rt') as f:
    reader = csv.reader(f, delimiter='\t')
    data = list(reader)
    data_x = [row[0] for row in data]
    data_y = [row[1] for row in data]

data_x_train, data_x_val, data_y_train, data_y_val = train_test_split(data_x, data_y, test_size=0.2, random_state=SEED)

def val_accuracy(f):
    print(metrics.accuracy_score(data_y_val, f(data_x_val)))

data_x_words = sorted({w for x in data_x_train for w in x.split(' ')})

by_label = defaultdict(list)
for x, y in zip(data_x_train, data_y_train):
    by_label[y].append(x)
labels = set(by_label)

SCORE_DATA = { "train": [data_x_train, data_y_train], "val": [data_x_val, data_y_val]}

def confusion(model, on_data : tuple[list[str], list[str]]):
    cm = defaultdict(lambda: defaultdict(int))
    for x, y in zip(*on_data):
        cm[y][model(x)] += 1
    return cm

def print_f1_scores(model, data_key="train"):
    print(format_f1_scores(f1_scores(confusion(model, SCORE_DATA[data_key]))))


Import the same transformers classifiers as the last time and use the GPU:

In [17]:
from transformers import pipeline

tr_model_names = [
    "MoritzLaurer/multilingual-MiniLMv2-L6-mnli-xnli", # 100m parameters
    "MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli", # 200m parameters
    "facebook/bart-large-mnli", # 400m parameters
]

tr_models = [pipeline("zero-shot-classification", model=name, device=device) 
             for name in tr_model_names]

BATCH_SIZE = 128

def into_my_model(transformer_model):
    def as_model(xs):
        results = transformer_model(xs, batch_size=BATCH_SIZE, candidate_labels=sorted(labels))
        return [result['labels'][0] for result in results]
    return as_model

my_models = [into_my_model(model) for model in tr_models] 

Perform a quick test:

In [18]:
SCORE_DATA["2"] = ['fly to nyc', 'take a taxi to penn station'], ['flight', 'ground_service']
my_models[0](SCORE_DATA["2"][0])

['flight', 'capacity']

Let's use a more professional-looking context manager when measuring time:



In [23]:
import time

class Timer:
    def __enter__(self):
        self.start = time.time()
        self.laps = []
        return self

    def __exit__(self, *args):
        pass
    
    @property 
    def elapsed(self):
        return time.time() - self.start
    
    def lap(self):
        self.laps.append(self.elapsed)
        self.start += self.laps[-1]    

    @property 
    def average_lap(self):
        return sum(self.laps) / len(self.laps)

# Usage example:
with Timer() as t:
    assert t.elapsed < 0.1
    time.sleep(0.5)
    assert 0.4 < t.elapsed < 0.6
    t.lap()
    assert t.elapsed < 0.1
    assert len(t.laps) == 1
    t.lap()
    assert 0.2 < t.average_lap < 0.3


Convert our naive arrays to GPU tensors

In [12]:
from datasets import Dataset

def convert_to_gpu_dataset(x):
    d = Dataset.from_dict({'data': x})
    return d.with_format("torch", device=device)

SCORE_DATASET = {
    key: [convert_to_gpu_dataset(d) for d in values] 
    for key, values in SCORE_DATA.items()
}    

In [20]:
def confusion_batch(model, on_data : tuple[Dataset, Dataset]):
    results = model(on_data[0]['data'])
    cm = defaultdict(lambda: defaultdict(int))
    for y, y_ in zip(on_data[1]['data'], results):
        cm[y][y_] += 1
    return cm
        

Now that we use the GPU for inference and the datasets are preloaded,
we can check the inference time and the validation results.

In [24]:
with Timer() as t:
    for model in my_models:
        print(format_f1_scores(f1_scores(confusion_batch(model, SCORE_DATASET["train"]))))
        t.lap()
  
    print("this took {} seconds".format(t.laps))


  abbreviation ([36m56[0m, [36m115[0m, [36m37[0m): [32m0.42[0m
  aircraft ([36m30[0m, [36m7[0m, [36m28[0m): [32m0.63[0m
  airfare ([36m14[0m, [36m3[0m, [36m305[0m): [32m0.083[0m
  airfare+flight_time ([36m0[0m, [36m0[0m, [36m1[0m): [32m0.0[0m
  airline ([36m70[0m, [36m59[0m, [36m48[0m): [32m0.57[0m
  airline+flight_no ([36m0[0m, [36m1[0m, [36m1[0m): [32m0.0[0m
  airport ([36m12[0m, [36m65[0m, [36m1[0m): [32m0.27[0m
  capacity ([36m6[0m, [36m66[0m, [36m5[0m): [32m0.14[0m
  cheapest ([36m1[0m, [36m149[0m, [36m0[0m): [32m0.013[0m
  city ([36m5[0m, [36m3[0m, [36m10[0m): [32m0.43[0m
  distance ([36m7[0m, [36m153[0m, [36m6[0m): [32m0.081[0m
  flight ([36m2259[0m, [36m135[0m, [36m496[0m): [32m0.88[0m
  flight+airfare ([36m9[0m, [36m122[0m, [36m5[0m): [32m0.12[0m
  flight_no ([36m0[0m, [36m31[0m, [36m9[0m): [32m0.0[0m
  flight_time ([36m17[0m, [36m15[0m, [36m27[0m): [32m0.45[

In [22]:
with Timer() as t:
    for model in my_models:
        print(format_f1_scores(f1_scores(confusion_batch(model, SCORE_DATASET["val"]))))
        t.lap()
  
    print("this took {} seconds".format(t.laps))


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


  abbreviation ([36m8[0m, [36m24[0m, [36m7[0m): [32m0.34[0m
  aircraft ([36m12[0m, [36m2[0m, [36m8[0m): [32m0.71[0m
  aircraft+flight+flight_no ([36m0[0m, [36m0[0m, [36m1[0m): [32m0.0[0m
  airfare ([36m6[0m, [36m0[0m, [36m78[0m): [32m0.13[0m
  airline ([36m18[0m, [36m15[0m, [36m12[0m): [32m0.57[0m
  airline+flight_no ([36m0[0m, [36m0[0m, [36m1[0m): [32m0.0[0m
  airport ([36m5[0m, [36m21[0m, [36m0[0m): [32m0.32[0m
  capacity ([36m3[0m, [36m15[0m, [36m2[0m): [32m0.26[0m
  cheapest ([36m0[0m, [36m45[0m, [36m0[0m): [32m0.0[0m
  city ([36m2[0m, [36m3[0m, [36m1[0m): [32m0.5[0m
  distance ([36m1[0m, [36m50[0m, [36m6[0m): [32m0.034[0m
  flight ([36m548[0m, [36m38[0m, [36m123[0m): [32m0.87[0m
  flight+airfare ([36m2[0m, [36m31[0m, [36m1[0m): [32m0.11[0m
  flight_no ([36m0[0m, [36m8[0m, [36m3[0m): [32m0.0[0m
  flight_time ([36m2[0m, [36m1[0m, [36m6[0m): [32m0.36[0m
  ground_far

Let's also try this 33m parameter model:

In [25]:
small_model_name = "pszemraj/xtremedistil-l12-h384-uncased-zeroshot-v1.1"
small_model = into_my_model(pipeline("zero-shot-classification", device=device, model = small_model_name))

config.json:   0%|          | 0.00/956 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/66.7M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.24k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/695 [00:00<?, ?B/s]

In [28]:
def time_my_models(model_list, key:str="val"):
    with Timer() as t:
        for model in model_list:
            print(format_f1_scores(f1_scores(confusion_batch(model, SCORE_DATASET[key]))))
            t.lap()
      
        print("this took {} seconds".format(t.laps))

time_my_models([small_model, my_models[0]])

  abbreviation ([36m11[0m, [36m4[0m, [36m4[0m): [32m0.73[0m
  aircraft ([36m17[0m, [36m46[0m, [36m3[0m): [32m0.41[0m
  aircraft+flight+flight_no ([36m0[0m, [36m0[0m, [36m1[0m): [32m0.0[0m
  airfare ([36m22[0m, [36m176[0m, [36m62[0m): [32m0.16[0m
  airfare+flight_time ([36m0[0m, [36m2[0m, [36m0[0m): [32m0.0[0m
  airline ([36m18[0m, [36m159[0m, [36m12[0m): [32m0.17[0m
  airline+flight_no ([36m0[0m, [36m6[0m, [36m1[0m): [32m0.0[0m
  airport ([36m5[0m, [36m8[0m, [36m0[0m): [32m0.56[0m
  capacity ([36m0[0m, [36m0[0m, [36m5[0m): [32m0.0[0m
  cheapest ([36m0[0m, [36m1[0m, [36m0[0m): [32m0.0[0m
  city ([36m0[0m, [36m18[0m, [36m3[0m): [32m0.0[0m
  distance ([36m4[0m, [36m23[0m, [36m3[0m): [32m0.24[0m
  flight ([36m326[0m, [36m14[0m, [36m345[0m): [32m0.64[0m
  flight+airfare ([36m0[0m, [36m12[0m, [36m3[0m): [32m0.0[0m
  flight_no ([36m0[0m, [36m0[0m, [36m3[0m): [32m0.0[0m
  fli

In [30]:
tr_model_names = [small_model_name, *tr_model_names]
my_models = [small_model, *tr_model_names]

Let's examine the models more closely:

In [40]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
auto_models = [AutoModelForSequenceClassification.from_pretrained(n) for n in tr_model_names]
auto_tokenizers = [AutoTokenizer.from_pretrained(n) for n in tr_model_names]

In [41]:
for m in auto_models:
    print(m)

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 384, padding_idx=0)
      (position_embeddings): Embedding(512, 384)
      (token_type_embeddings): Embedding(2, 384)
      (LayerNorm): LayerNorm((384,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=384, out_features=384, bias=True)
              (key): Linear(in_features=384, out_features=384, bias=True)
              (value): Linear(in_features=384, out_features=384, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=384, out_features=384, bias=True)
              (LayerNorm): LayerNorm((384,), eps=1e-12,

In [43]:
very_small_model_name = "MoritzLaurer/xtremedistil-l6-h256-zeroshot-v1.1-all-33"
very_small_model = into_my_model(pipeline("zero-shot-classification", device=device, model = small_model_name))

time_my_models([very_small_model])

  abbreviation ([36m11[0m, [36m4[0m, [36m4[0m): [32m0.73[0m
  aircraft ([36m17[0m, [36m46[0m, [36m3[0m): [32m0.41[0m
  aircraft+flight+flight_no ([36m0[0m, [36m0[0m, [36m1[0m): [32m0.0[0m
  airfare ([36m22[0m, [36m176[0m, [36m62[0m): [32m0.16[0m
  airfare+flight_time ([36m0[0m, [36m2[0m, [36m0[0m): [32m0.0[0m
  airline ([36m18[0m, [36m159[0m, [36m12[0m): [32m0.17[0m
  airline+flight_no ([36m0[0m, [36m6[0m, [36m1[0m): [32m0.0[0m
  airport ([36m5[0m, [36m8[0m, [36m0[0m): [32m0.56[0m
  capacity ([36m0[0m, [36m0[0m, [36m5[0m): [32m0.0[0m
  cheapest ([36m0[0m, [36m1[0m, [36m0[0m): [32m0.0[0m
  city ([36m0[0m, [36m18[0m, [36m3[0m): [32m0.0[0m
  distance ([36m4[0m, [36m23[0m, [36m3[0m): [32m0.24[0m
  flight ([36m326[0m, [36m14[0m, [36m345[0m): [32m0.64[0m
  flight+airfare ([36m0[0m, [36m12[0m, [36m3[0m): [32m0.0[0m
  flight_no ([36m0[0m, [36m0[0m, [36m3[0m): [32m0.0[0m
  fli

In [45]:
tr_model_names

['pszemraj/xtremedistil-l12-h384-uncased-zeroshot-v1.1',
 'MoritzLaurer/multilingual-MiniLMv2-L6-mnli-xnli',
 'MoritzLaurer/DeBERTa-v3-base-mnli-fever-anli',
 'facebook/bart-large-mnli']