In [5]:
import torch
from transformers import GPT2Tokenizer, GPT2LMHeadModel

tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
model = GPT2LMHeadModel.from_pretrained("gpt2")

inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")

outputs = model(**inputs, labels=inputs["input_ids"])
loss = outputs.loss
logits = outputs.logits

Downloading:   0%|          | 0.00/523M [00:00<?, ?B/s]

In [28]:
import numpy as np

# Calculates perplexity, a lower perplexity implies a higher probability and vice versa
def score(tokens_tensor):
    loss=model(tokens_tensor, labels=tokens_tensor).loss
    return loss.item()
#     return np.exp(loss.cpu().detach().numpy())

texts = ['james bond is a very bad movie', 'james bond is a very good movie', 'i hated james bond', 'i loved james bond']
for text in texts:
    tokens_tensor = tokenizer.encode(text, add_special_tokens=False, return_tensors="pt")           
    print(text, score(tokens_tensor))

james bond is a very bad movie 6.712692737579346
james bond is a very good movie 6.395544528961182
i hated james bond 9.133502960205078
i loved james bond 8.39603328704834


In [42]:
input_id = tokenizer.encode("james bond", add_special_tokens=False, return_tensors="pt")

sample_output = model.generate(
    input_id, 
    do_sample=True, 
    max_length=50, 
    top_k=10
)

print("Output:\n" + 100 * '-')
output = tokenizer.decode(sample_output[0], skip_special_tokens=True)
print(output)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Output:
----------------------------------------------------------------------------------------------------
james bond.

A spokesperson for the department did not immediately respond to a request for comment on Monday morning.

The department, which is not affiliated with the U.S. Attorney's Office in New York, is a federal civil


In [43]:
# Loading off the shelf sentiment analysis

from transformers import pipeline
sentiment_pipeline = pipeline("sentiment-analysis")

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english)


Downloading:   0%|          | 0.00/255M [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

In [44]:
sentiment_pipeline(output)

[{'label': 'NEGATIVE', 'score': 0.9967401623725891}]

In [51]:
sentiment_pipeline("This is an unclear situation")

[{'label': 'NEGATIVE', 'score': 0.9983226656913757}]