In [17]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

In [None]:
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')

# Set the model to evaluation mode
model.eval()

In [19]:
# Function to calculate perplexity as a proxy for a GLUE-like score
def calculate_perplexity(text, model, tokenizer):
    tokens = tokenizer.encode(text, return_tensors='pt')
    with torch.no_grad():
        outputs = model(tokens, labels=tokens)
        loss = outputs.loss
        perplexity = torch.exp(loss).item()
    return perplexity

In [20]:
# Text generation function
def generate_text(prompt, model, tokenizer, max_length=54):
    generated = tokenizer.encode(prompt, return_tensors='pt')

    model.eval()
    with torch.no_grad():
        for _ in range(max_length):
            outputs = model(generated)
            next_token_logits = outputs.logits[:, -1, :]
            next_token = torch.argmax(next_token_logits, dim=-1)
            generated = torch.cat((generated, next_token.unsqueeze(0)), dim=1)
            if next_token == tokenizer.eos_token_id:
                break

    generated_text = tokenizer.decode(generated[0], skip_special_tokens=True)
    
    # Calculate a "GLUE-like" score based on perplexity
    glue_like_score = calculate_perplexity(generated_text, model, tokenizer)
    
    return generated_text, glue_like_score

In [21]:
prompt = "The quick brown fox was about six months of"
generated_text = generate_text(prompt, model, tokenizer)
print(f"Generated text: {generated_text}")
print(f"GLUE-like score (perplexity): {glue_like_score}")

Generated text: ('The quick brown fox was about six months of age when he was attacked by a group of wolves. He was taken to the hospital where he was treated for a broken jaw and a broken leg.\n\nThe fox was taken to the hospital where he was treated for a broken jaw and a broken leg.\n\n', 5.307563304901123)
GLUE-like score (perplexity): 5.307563304901123


Perplexity is a measure of how well a probability distribution or probability model predicts a sample.Perplexity is always a positive number, and in practice, it often ranges from 1 (perfect model) to infinity. A perplexity score of 1 would mean the model predicted the text perfectly (100% confidence in every prediction).So, here the score is 5.3 which is a good performance score for a model.