In [None]:
# For usage on google colab:
!git clone https://github.com/ahans30/Binoculars.git
%cd Binoculars
!pip install -e .

In [None]:
from binoculars import Binoculars

In [None]:
import numpy as np
import torch
import transformers
import pickle as pkl
import pandas as pd
from google.colab import files

In [None]:
bino = Binoculars()

In [None]:
df = pd.read_csv("raid_test_100.csv")

In [11]:
df.head()

Unnamed: 0.1,Unnamed: 0,generation,attack,domain,repetition_penalty,model
0,0,Segmentations-Leak: Membership Inference Attac...,perplexity_misspelling,abstracts,no,mistral-chat
1,1,"Farshid Jamshidian is a finance researcher, ac...",insert_paragraphs,wiki,,human
2,2,Transient spine enlargement (3-5 min timesca...,alternative_spelling,abstracts,,human
3,3,The viola da Terceira (also viola Terceiren...,whitespace,wiki,,human
4,4,Yes this review is 2 years after the film's re...,insert_paragraphs,reviews,,human


In [13]:
texts = df[['generation']]
print(texts)

                                           generation
0   Segmentations-Leak: Membership Inference Attac...
1   Farshid Jamshidian is a finance researcher, ac...
2     Transient spine enlargement (3-5 min timesca...
3   The   viola da Terceira  (also viola Terceiren...
4   Yes this review is 2 years after the film's re...
..                                                ...
95    We consider the orbit type filtration on a m...
96  George Michael is to perform live at London's ...
97  Nicole Kelly is an Australian contemporary Chr...
98  In this paper, we propose a novel approach to ...
99  If   life is still bad for me even  when I com...

[100 rows x 1 columns]


In [None]:
batch = [texts] if isinstance(sample_string, str) else sample_string
encodings = bino._tokenize(batch)
observer_logits, performer_logits = bino._get_logits(encodings)
pad_token_id = bino.tokenizer.pad_token_id

In [None]:
ce_loss_fn = torch.nn.CrossEntropyLoss(reduction="none")
softmax_fn = torch.nn.Softmax(dim=-1)
np.set_printoptions(formatter={'float_kind':'{:f}'.format})

In [None]:
def perplexity(encoding: transformers.BatchEncoding,
               logits: torch.Tensor,
               median: bool = False,
               temperature: float = 1.0):
    shifted_logits = logits[..., :-1, :].contiguous() / temperature
    # print(shifted_logits)
    shifted_labels = encoding.input_ids[..., 1:].contiguous()
    # print(shifted_labels)
    shifted_attention_mask = encoding.attention_mask[..., 1:].contiguous()
    # print(shifted_attention_mask)
    if median:
        ce_nan = (ce_loss_fn(shifted_logits.transpose(1, 2), shifted_labels).
                  masked_fill(~shifted_attention_mask.bool(), float("nan")))
        # print(ce_nan)
        walk = ce_nan.to("cpu").float().numpy()
        # print(walk)
        ppl = np.nanmedian(ce_nan.cpu().float().numpy(), 1)

    else:
        ppl = (ce_loss_fn(shifted_logits.transpose(1, 2), shifted_labels) *
               shifted_attention_mask).sum(1) / shifted_attention_mask.sum(1)
        ppl = ppl.to("cpu").float().numpy()

    return ppl, walk
pf_ppl, pf_walk = perplexity(encodings, performer_logits, median=True)

In [None]:
def entropy(p_logits: torch.Tensor,
            q_logits: torch.Tensor,
            encoding: transformers.BatchEncoding,
            pad_token_id: int,
            median: bool = False,
            sample_p: bool = False,
            temperature: float = 1.0):
    vocab_size = p_logits.shape[-1]
    total_tokens_available = q_logits.shape[-2]
    p_scores, q_scores = p_logits / temperature, q_logits / temperature

    p_proba = softmax_fn(p_scores).view(-1, vocab_size)

    if sample_p:
        p_proba = torch.multinomial(p_proba.view(-1, vocab_size), replacement=True, num_samples=1).view(-1)

    q_scores = q_scores.view(-1, vocab_size)

    ce = ce_loss_fn(input=q_scores, target=p_proba).view(-1, total_tokens_available)
    padding_mask = (encoding.input_ids != pad_token_id).type(torch.uint8)

    if median:
        ce_nan = ce.masked_fill(~padding_mask.bool(), float("nan"))
        en_walk = ce_nan.to("cpu").float().numpy()
        agg_ce = np.nanmedian(ce_nan.cpu().float().numpy(), 1)
        return agg_ce, en_walk
    else:
        agg_ce = (((ce * padding_mask).sum(1) / padding_mask.sum(1)).to("cpu").float().numpy())

    return agg_ce
x_ppl, en_walk = entropy(observer_logits, performer_logits, encodings, pad_token, median=True)

In [None]:
bino_walk = (pf_walk / en_walk[..., :130])