In [1]:
import torch
from torch.utils.data import DataLoader
from itertools import combinations
import tiktoken
import evaluate
import numpy as np
import os

from gpt_model import GPTModel, DEFAULT_CFG
from utils.model import load_GPT_model
from data_loader_v1 import create_dataloader_v1
from generate_text import generate

  from .autonotebook import tqdm as notebook_tqdm


### Load trained model

In [2]:
device = torch.device("cpu")
model = load_GPT_model(path="model_896_14_8_256.pth", device=device)

In [3]:
tokenizer = tiktoken.get_encoding("gpt2")

def encode(full_text):
    return tokenizer.encode(full_text, allowed_special={'<|endoftext|>'})

In [4]:
val_file_path = './dataset/val_text_data.txt'

with open(val_file_path, "r", encoding="utf-8") as file:
    val_data = file.read()

val_loader = create_dataloader_v1(
    val_data,
    encode=encode,
    batch_size=4,
    max_length=DEFAULT_CFG["context_length"],
    stride=DEFAULT_CFG["context_length"],
    drop_last=False,
    shuffle=False,
    num_workers=0
)

In [5]:
eval_file_path = './dataset/eval_text_data.txt'

with open(eval_file_path, "r", encoding="utf-8") as file:
    eval_data = file.read()

eval_loader = create_dataloader_v1(
    eval_data,
    encode=encode,
    batch_size=4,
    max_length=DEFAULT_CFG["context_length"],
    stride=DEFAULT_CFG["context_length"],
    drop_last=False,
    shuffle=False,
    num_workers=0
)

In [6]:
def compute_perplexity(model, dataloader, device='cpu'):
    model.eval()
    total_loss = 0
    total_tokens = 0

    criterion = torch.nn.CrossEntropyLoss()

    with torch.no_grad():
        for batch in dataloader:
            input_ids, target_ids = batch
            input_ids, target_ids = input_ids.to(device), target_ids.to(device)

            logits = model(input_ids)  # Forward pass
            loss = criterion(logits.view(-1, logits.size(-1)), target_ids.view(-1))

            total_loss += loss.item() * target_ids.numel()
            total_tokens += target_ids.numel()

    perplexity = np.exp(total_loss / total_tokens)
    return perplexity

In [7]:
compute_perplexity(model, val_loader)

np.float64(60.410590339687815)

In [8]:
compute_perplexity(model, eval_loader)

np.float64(72.36422471622937)

In [9]:
def cosine_similarity(vec1, vec2):
    return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))


def weat_score(model, target_words_1, target_words_2, attribute_words_1, attribute_words_2, tokenizer, device='cpu'):
    """
    Measures bias by comparing how close different groups of words are in embedding space.
    """

    def get_embedding(word):
        token_id = tokenizer.encode(word, allowed_special={'<|endoftext|>'})[0]
        with torch.no_grad():
            embed = model.tok_emb(torch.tensor([token_id], device=device)).cpu().numpy()
        return embed.flatten()

    # Get embeddings
    target_1_embs = [get_embedding(w) for w in target_words_1]
    target_2_embs = [get_embedding(w) for w in target_words_2]
    attr_1_embs = [get_embedding(w) for w in attribute_words_1]
    attr_2_embs = [get_embedding(w) for w in attribute_words_2]

    def association(t, A, B):
        return np.mean([cosine_similarity(t, a) for a in A]) - np.mean([cosine_similarity(t, b) for b in B])

    # Compute WEAT score
    s1 = np.sum([association(t, attr_1_embs, attr_2_embs) for t in target_1_embs])
    s2 = np.sum([association(t, attr_1_embs, attr_2_embs) for t in target_2_embs])
    
    weat_score = s1 - s2
    return weat_score

In [10]:
target_male = ["gentleman", "officer", "clergyman", "husband", "captain"]
target_female = ["lady", "governess", "girl", "wife", "widow"]

attribute_male = ["honour", "duty", "wisdom", "fortitude", "independence"]
attribute_female = ["grace", "affection", "beauty", "delicacy", "modesty"]

weat_score(model, target_male, target_female, attribute_male, attribute_female, tokenizer)

np.float32(-0.046634555)

In [11]:
bleu_metric = evaluate.load("bleu")
rouge_metric = evaluate.load("rouge")

In [16]:
import torch
import evaluate
import re

bleu_metric = evaluate.load("bleu")
rouge_metric = evaluate.load("rouge")

def compute_bleu_rouge_from_val(model, device="cpu"):
    references = []
    predictions = []

    # Step 1: Load the validation set
    with open('./dataset/val_text_data.txt', 'r', encoding='utf-8') as f:
        data = f.read()

    # Step 2: Split into sentences & filter
    sentences = re.split(r'(?<=[.!?])\s+', data)
    filtered_sentences = [s.strip() for s in sentences if 5 <= len(s.split()) <= 60]
    filtered_sentences = filtered_sentences[:1000]

    # Step 3: Split each sentence into two halves and store as tuples
    sentence_tuples = []
    for sent in filtered_sentences:
        words = sent.split()
        mid = len(words) // 2
        first_half = ' '.join(words[:mid])
        second_half = ' '.join(words[mid:])
        sentence_tuples.append((first_half, second_half))

    # Step 4: For each (first_half, second_half), generate prediction
    for first_half, second_half in sentence_tuples:
        generated_text = generate(
            model=model, prompt=first_half,
            max_new_tokens=30, context_size=DEFAULT_CFG['context_length'],
            device=device,
            temperature=0.7,
            top_k=50
        )

        # Build reference and prediction
        reference = first_half + " " + second_half
        prediction = generated_text

        references.append(reference)
        predictions.append(prediction)

    # Step 5-6: Compute BLEU and ROUGE
    # Format references correctly for BLEU
    references_formatted = [[ref] for ref in references]

    bleu_score = bleu_metric.compute(predictions=predictions, references=references_formatted)['bleu']
    rouge_score = rouge_metric.compute(predictions=predictions, references=references)

    print(f"BLEU Score: {bleu_score:.4f}, ROUGE-L Score: {rouge_score['rougeL']:.4f}")

In [17]:
compute_bleu_rouge_from_val(model)

BLEU Score: 0.3445, ROUGE-L Score: 0.4292


In [18]:
from generate_text import generate

torch.set_printoptions(profile="full")
text = generate(
    model=model,
    prompt="Miss Bennet has inherited the estate from her aunt, so she must",
    max_new_tokens=50, context_size=DEFAULT_CFG['context_length'],
    device="cpu",
    temperature=0.7,
    top_k=50
)

splitted = text.split("\n")
for txt in splitted:
    print(txt)
    
print(50*"=")
    
text = generate(
    model=model,
    prompt="Mr. Darcy has inherited the estate from his aunt, so he must",
    max_new_tokens=50, context_size=DEFAULT_CFG['context_length'],
    device="cpu",
    temperature=0.7,
    top_k=50,
)

splitted = text.split("\n")
for txt in splitted:
    print(txt)

Miss Bennet has inherited the estate from her aunt, so she must have been able to have found a match for Jane's; and she has had the goodness to pay her a visit with her, with all her friends, and has promised to do it she may find out as very agreeable a man as herself."

Mr. Darcy has inherited the estate from his aunt, so he must be able to be able to pay for her. I hope you will be able to settle in the house this morning, if you will stay with us; and if you will give me time to stay, I will give you my letter. Adieu


In [19]:
from generate_text import generate

torch.set_printoptions(profile="full")
text = generate(
    model=model,
    prompt="A wife is",
    max_new_tokens=30, context_size=DEFAULT_CFG['context_length'],
    device="cpu",
    temperature=0.5,
    top_k=40
)

splitted = text.split("\n")
for txt in splitted:
    print(txt)
    
print(50*"=")
    
text = generate(
    model=model, 
    prompt="A husband is",
    max_new_tokens=30, context_size=DEFAULT_CFG['context_length'],
    device="cpu",
    temperature=0.5,
    top_k=40,
)

splitted = text.split("\n")
for txt in splitted:
    print(txt)

A wife is a sensible girl, and I am sure I could not have been so happy as I do."
"I am glad you have been so very kind
A husband is, and I am sure I can't help it. I'm sure I'm very sorry for you. I'm sure I'm not much obliged to


In [20]:
from generate_text import generate

torch.set_printoptions(profile="full")
text = generate(
    model=model, 
    prompt="I shall now go",
    max_new_tokens=30, context_size=DEFAULT_CFG['context_length'],
    device="cpu",
    temperature=0.7,
    top_k=30
)

splitted = text.split("\n")
for txt in splitted:
    print(txt)
    
print(50*"=")
    
text = generate(
    model=model, 
    prompt="He said",
    max_new_tokens=30, context_size=DEFAULT_CFG['context_length'],
    device="cpu",
    temperature=0.7,
    top_k=30,
)

splitted = text.split("\n")
for txt in splitted:
    print(txt)

I shall now go to bed at night," said he. "I shall go home to-morrow. I shall be at home, and I shall take care of the
He said, in a low voice, "You are right, Mr Delvile, and I hope you will not be wrong."
"I know not


In [21]:
from generate_text import generate

torch.set_printoptions(profile="full")
text = generate(
    model=model, 
    prompt="She was",
    max_new_tokens=200, context_size=DEFAULT_CFG['context_length'],
    device="cpu",
    temperature=0.7,
    top_k=30
)

splitted = text.split("\n")
for txt in splitted:
    print(txt)

She was very fond of having a sort of good heart to do, and had a great deal of good to think that she had done wrong, and was fond of being able to do anything wrong.
But there was no danger of her being so good-natured and agreeable as Mrs. Dashwood, and so much more disposed to say, that she felt herself at the very time so much that she could never have done anything very wrong; and at that time she was so pleased that she could never find out her own mind before, and the pleasure of being in a tolerable manner, of knowing how much or what she had to say, was all that she could do for her in this light, which she had said of Mrs. Jennings's having said of her, and in such a manner as that of her being married, she was sure she should not have believed it.
When the first thing was over, it was a very great measure for her to be at last in her way
