# Evaluate

In [1]:
import torch
from transformers import GPT2LMHeadModel, GPT2TokenizerFast, Trainer, TrainingArguments
from datasets import load_from_disk
import time
from torch.utils.data import DataLoader, Subset
import math
import random
from tqdm import tqdm


MODEL_PATH = "CooperLM-354M"

## Load Model and Tokenizer

In [2]:
tokenizer = GPT2TokenizerFast.from_pretrained(MODEL_PATH)
model = GPT2LMHeadModel.from_pretrained(MODEL_PATH).to("cuda" if torch.cuda.is_available() else "cpu")
model.eval()


GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 1024)
    (wpe): Embedding(256, 1024)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-23): 24 x GPT2Block(
        (ln_1): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D(nf=3072, nx=1024)
          (c_proj): Conv1D(nf=1024, nx=1024)
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D(nf=4096, nx=1024)
          (c_proj): Conv1D(nf=1024, nx=4096)
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=1024, out_features=50257, bias=False)
)

## Evaluating Perplexity on Eval Set

In [6]:
try:
    full_eval_dataset = load_from_disk("tokenized_data/cooper_subset_100k").train_test_split(test_size=0.1)["test"]
except:
    full_eval_dataset = None

def compute_perplexity(model, dataset, tokenizer, block_size=256, sample_size=1000):
    if dataset is None:
        print("No eval dataset found.")
        return None

    # Take a random subset of 1000 samples
    random.seed(8232010) 
    indices = random.sample(range(len(dataset)), sample_size)
    subset = Subset(dataset, indices)

    dataloader = DataLoader(subset, batch_size=1)
    losses = []

    model.eval()
    with torch.no_grad():
        for batch in tqdm(dataloader, desc="Evaluating"):
            input_ids = torch.tensor(batch['input_ids']).to(model.device)
            outputs = model(input_ids=input_ids, labels=input_ids)
            loss = outputs.loss
            losses.append(loss.item())

    avg_loss = sum(losses) / len(losses)
    perplexity = math.exp(avg_loss)
    return perplexity

if full_eval_dataset:
    perp = compute_perplexity(model, full_eval_dataset, tokenizer)
    print(f"\nPerplexity (1000 sample subset): {perp:.2f}")

Evaluating:   0%|                                                                             | 0/1000 [00:00<?, ?it/s]`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.
Evaluating: 100%|██████████████████████████████████████████████████████████████████| 1000/1000 [00:28<00:00, 34.89it/s]


Perplexity (1000 sample subset): 262.85





## Interactive Single Prompt Generator

In [9]:
def generate(prompt, max_length=100, temperature=0.9, top_p=0.95):
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
    output = model.generate(
        input_ids,
        max_length=max_length,
        do_sample=True,
        temperature=temperature,
        top_p=top_p,
        pad_token_id=tokenizer.eos_token_id,
    )
    return tokenizer.decode(output[0], skip_special_tokens=True)

prompt = input("Enter Prompt: ")
print(generate(prompt))

Enter Prompt:  Hello World


Hello World Series in 1969 – The original song and the world's home, who did not run to a great deal with other of the band. The following the first stage to go in the club to a young team to the band, and the same to have been a young player, and the band, who did not become the most influential in a part of the United States and his house. It was "The home" during the film. He also been the band was played on a short-


##  Batch Prompt Evaluation

In [11]:
prompts = [
    "Hello, my name is",
    "In a distant galaxy,",
    "The secret to happiness is",
    "The history of Canada begins with",
    "Artificial intelligence will change the world by",
    "The country Malta is"
]

print("=== Batch Completion Test ===")
for p in prompts:
    print(f"\nPrompt: {p}")
    print(generate(p))
    print("="*60)


=== Batch Completion Test ===

Prompt: Hello, my name is
Hello, my name is a long to the main system, and are the world's most famous, the first-known, the city's first known as of the only to the most popular and city. The country was considered to a large use of the use of the use of the city. The population of the population of the town was originally, and more than the city's city's population of the most common buildings. Aed town has a separate city. It is a result for local cultural

Prompt: In a distant galaxy,
In a distant galaxy, the Sun were a high-andated-known by an iron, and the other materials were used to the use of the tail, and the other species. The last of the water was used to its the first century. At the river-based and in the early 1990s were the largest water and was discovered for the late 20th century. The modern era, the area of the population grew the 20th century. It was developed in the southern and the 17th

Prompt: The secret to happiness is
The secret 

## Generation Tuner

In [12]:
prompt = "A wise man once said"
for t in [0.7, 0.9, 1.1]:
    print(f"\n[temperature = {t}]")
    print(generate(prompt, temperature=t, top_p=0.95))


[temperature = 0.7]
A wise man once said the first to be used by the old in his life to his own own book. The "Babaptized on the time" of the same period, "The life in the world has been an alternative by the city and the earliest, which is the only one of the other of the "the world. In the first century, a period, the word of the first syllor and the name of the names of the "the "the names" and "a

[temperature = 0.9]
A wise man once said about the death of Christ, the king, they had no, to marry as the bishops and the new authority. The last other scholars were the church of the same time of the first two-day Church. As-day Christianity and religious leaders. It was also been a bishop and the church in the bishops to the church as to the church of England to the time of the church by the church of the Second Orthodox Church's political rights of the church of the Book of

[temperature = 1.1]
A wise man once said there." A few of a few different times that the two centuries of a "T