In [13]:
import torch
from tqdm import tqdm
from transformers import GPT2LMHeadModel, GPT2TokenizerFast
device = 'cpu' # 'cuda'
model_id = 'gpt2'
model = GPT2LMHeadModel.from_pretrained(model_id).to(device)
tokenizer = GPT2TokenizerFast.from_pretrained(model_id)

# from nlp import load_dataset
# test = load_dataset('wikitext', 'wikitext-2-raw-v1', split='test')
# encodings = tokenizer('\n\n'.join(test['text']), return_tensors='pt')

encodings = tokenizer('An orange ruled the world.', return_tensors='pt')

def compute_perplexity(model, encodings):
    max_length = model.config.n_positions
    stride = 512

    lls = []
    for i in tqdm(range(0, encodings.input_ids.size(1), stride)):
        begin_loc = max(i + stride - max_length, 0)
        end_loc = min(i + stride, encodings.input_ids.size(1))
        trg_len = end_loc - i    # may be different from stride on last loop
        input_ids = encodings.input_ids[:,begin_loc:end_loc].to(device)
        target_ids = input_ids.clone()
        target_ids[:,:-trg_len] = -100

        with torch.no_grad():
            outputs = model(input_ids, labels=target_ids)
            log_likelihood = outputs[0] * trg_len

        lls.append(log_likelihood)

    ppl = torch.exp(torch.stack(lls).sum() / end_loc)
    return ppl, outputs

In [14]:
ppl, outputs = compute_perplexity(model, encodings)

100%|██████████| 1/1 [00:00<00:00,  8.15it/s]


In [15]:
outputs[1].shape

torch.Size([1, 6, 50257])

In [17]:
outputs[0]

tensor(6.1425)

In [21]:
outputs[1]

tensor([[[ -30.1623,  -28.9555,  -32.0044,  ...,  -35.6717,  -35.8829,
           -29.2568],
         [ -74.5398,  -74.5179,  -78.7193,  ...,  -85.7082,  -80.6271,
           -75.4816],
         [ -83.7110,  -85.1910,  -92.2418,  ...,  -93.3988,  -92.9523,
           -87.2709],
         [-102.3252, -100.4652, -104.4386,  ..., -105.3670, -106.3231,
          -101.8432],
         [ -95.8678,  -96.9816, -102.3193,  ..., -110.5038, -106.3977,
           -98.8357],
         [-123.4892, -122.2919, -123.8654,  ..., -134.8279, -132.7908,
          -115.4254]]])

In [22]:
dir(outputs)

['__add__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getnewargs__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__mul__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmul__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'count',
 'index']

In [23]:
outputs.last_hidden_state

AttributeError: 'tuple' object has no attribute 'last_hidden_state'

In [26]:

model2 = GPT2LMHeadModel.from_pretrained(model_id, return_dict=True).to(device)
ppl2, outputs2 = compute_perplexity(model2, encodings)

100%|██████████| 1/1 [00:00<00:00, 10.11it/s]


In [27]:
outputs2.loss

tensor(6.1425)

In [28]:
outputs[0]

tensor(6.1425)

In [31]:
(outputs2.logits == outputs[1]).all()

tensor(True)

In [32]:
tokenizer.vocab_size

50257