In [1]:
from Todd import MahalanobisFilter, extract_embeddings
import torch
from torch.utils.data import Dataset, DataLoader

from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from toddbenchmark.generation_data import prep_dataset, prep_model, GenerationDataset
from datasets import load_dataset


In [2]:
%load_ext autoreload
%autoreload 2


In [3]:
model, tokenizer = prep_model("Helsinki-NLP/opus-mt-de-en")




## Load and prep dataset using ToddBenchmark

In [4]:

in_dataset = prep_dataset("wmt16", "de-en", tokenizer=tokenizer)
out_dataset = prep_dataset("wmt16", "ro-en", tokenizer=tokenizer)

# For the sake of this example we only use 100 samples to keep things quick!
in_val = in_dataset[1][:1000]
in_test = in_dataset[2][:100]
out_test = out_dataset[2][:100]

del in_dataset
del out_dataset


Downloading readme:   0%|          | 0.00/9.21k [00:00<?, ?B/s]

Found cached dataset wmt16 (/home/mdarrin/.cache/huggingface/datasets/wmt16/de-en/1.0.0/746749a11d25c02058042da7502d973ff410e73457f3d305fc1177dc0e8c4227)


  0%|          | 0/3 [00:00<?, ?it/s]

Found cached dataset wmt16 (/home/mdarrin/.cache/huggingface/datasets/wmt16/ro-en/1.0.0/746749a11d25c02058042da7502d973ff410e73457f3d305fc1177dc0e8c4227)


  0%|          | 0/3 [00:00<?, ?it/s]

In [5]:
# Make dataloader
in_val_loader = DataLoader(in_val, shuffle=False, batch_size=4)
in_test_loader = DataLoader(in_test, shuffle=False, batch_size=4)
out_test_loader = DataLoader(out_test, shuffle=False, batch_size=4)


## Feature based filters

### Extracting reference data to fit the detectors

In [6]:
# We work here in a case where the classes do not matter
# So we can skip retrieving them and the only class key will be 0
# It would be different in a classification problem with enough data we would have a reference per class
ref_embeddings, _ = extract_embeddings(model, tokenizer, in_val_loader, layers=[6])

In [15]:
maha_detector = MahalanobisFilter(threshold=3200, layers=[6])
maha_detector.fit(ref_embeddings)

torch.Size([512, 512])


### Evaluation loop

In [18]:

def eval_loader(loader):
    with torch.no_grad():
        for batch in loader:
            inputs = tokenizer(
                batch["source"], padding=True, truncation=True, return_tensors="pt"
            )
            output = model.generate(
                **inputs,
                return_dict_in_generate=True,
                output_hidden_states=True,
                output_scores=True,
            )

            print(maha_detector(output))
            break


eval_loader(in_test_loader)
eval_loader(out_test_loader)

tensor([True, True, True, True, True, True, True, True, True, True, True, True,
        True, True, True, True])
tensor([False, False, False, False, False, False, False, False, False, False,
        False, False,  True, False, False, False])


## Decoder based filters

In [99]:
from Todd.itfilters import SequenceRenyiNegFilter

### Output mode

It output a score / a filter on each sequence returned for each sample in the batch

In [109]:

renyi_entropy_filter = SequenceRenyiNegFilter(-10.35, pad_token_id=tokenizer.pad_token_id, mode="output")

def eval_loader(loader):
    with torch.no_grad():
        for batch in loader:
            inputs = tokenizer(
                batch["source"], padding=True, truncation=True, return_tensors="pt"
            )
            output = model.generate(
                **inputs,
                return_dict_in_generate=True,
                output_hidden_states=True,
                output_scores=True,
                num_return_sequences=2,
                num_beams=2,
                do_sample=False,
            )

            print(renyi_entropy_filter(
                output,
                batch_size=4,
                num_return_sequences=2,
                num_beam=2,
            ))


            print(renyi_entropy_filter.compute_scores(
                output,
                batch_size=4,
                num_return_sequences=2,
                num_beam=2,
            ))
            del output
            break

print("IN DATA")
eval_loader(in_test_loader)
print("OUT DATA")
eval_loader(out_test_loader)


IN DATA
tensor([[True, True],
        [True, True],
        [True, True],
        [True, True]])
tensor([[-10.3531, -10.4222],
        [-10.4674, -10.4793],
        [-10.4999, -10.5295],
        [-10.5211, -10.5511]])
OUT DATA
tensor([[False, False],
        [False, False],
        [False, False],
        [False, False]])
tensor([[-10.2332, -10.2277],
        [-10.3162, -10.3185],
        [-10.2982, -10.2804],
        [-10.2965, -10.2874]])


### Input Mode
It only returns a score for each sample in the bach by aggregating the scores of the generated sequences.

In [110]:
renyi_entropy_filter = SequenceRenyiNegFilter(-10.35, pad_token_id=tokenizer.pad_token_id, mode="input")

def eval_loader(loader):
    with torch.no_grad():
        for batch in loader:
            inputs = tokenizer(
                batch["source"], padding=True, truncation=True, return_tensors="pt"
            )
            output = model.generate(
                **inputs,
                return_dict_in_generate=True,
                output_hidden_states=True,
                output_scores=True,
                num_return_sequences=2,
                num_beams=2,
                do_sample=False,
            )

            print(renyi_entropy_filter(
                output,
                batch_size=4,
                num_return_sequences=2,
                num_beam=2,
            ))


            print(renyi_entropy_filter.compute_scores(
                output,
                batch_size=4,
                num_return_sequences=2,
                num_beam=2,
            ))
            del output
            break

print("IN DATA")
eval_loader(in_test_loader)

print("OUT DATA")
eval_loader(out_test_loader)


IN DATA
tensor([True, True, True, True])
tensor([-10.3876, -10.4734, -10.5147, -10.5361])
OUT DATA
tensor([False, False, False, False])
tensor([-10.2304, -10.3173, -10.2893, -10.2919])


### Beam ranking using Info-projection

In [9]:
from Todd.itfilters import BeamRenyiInformationProjection

In [10]:
# Make dataloader
# Smaller batch so it runs on CPU and fits in memory
in_val_loader = DataLoader(in_val, shuffle=False, batch_size=2)
in_test_loader = DataLoader(in_test, shuffle=False, batch_size=2)
out_test_loader = DataLoader(out_test, shuffle=False, batch_size=2)


In [17]:
batch_self_projector = BeamRenyiInformationProjection(-10.35, pad_token_id=tokenizer.pad_token_id, mode="input")

def eval_loader(loader):
    with torch.no_grad():
        for batch in loader:
            inputs = tokenizer(
                batch["source"], padding=True, truncation=True, return_tensors="pt"
            )
            output = model.generate(
                **inputs,
                return_dict_in_generate=True,
                output_hidden_states=True,
                output_scores=True,
                num_return_sequences=4,
                num_beams=4,
                do_sample=False,
            )

            candidate_scores = batch_self_projector.per_output_scores(
                output,
                batch_size=2,
                num_return_sequences=4,
                num_beams=2,
            )

            candidate_scores, indices = torch.sort(candidate_scores, dim=-1, descending=False)


            print(indices)
            print(candidate_scores)

            print(output.sequences[indices].shape)

            del output
            break

print("IN DATA")
eval_loader(in_test_loader)

print("OUT DATA")
eval_loader(out_test_loader)


IN DATA
tensor([[2, 3, 0, 1],
        [1, 2, 0, 3]])
tensor([[0.0487, 0.0518, 0.0553, 0.0959],
        [0.0300, 0.0366, 0.0404, 0.0804]])
torch.Size([2, 4, 17])
OUT DATA
tensor([[1, 0, 2, 3],
        [2, 0, 3, 1]])
tensor([[0.0025, 0.0025, 0.0029, 0.0516],
        [0.0101, 0.0117, 0.0136, 0.0141]])
torch.Size([2, 4, 122])
