In [None]:
import torch
import torch.nn as nn
import json
import torch.nn.functional as F
import copy

In [2]:
from capas_gpt import TransformerBlock, LayerNorm

In [3]:
class GPTModel(nn.Module):
    def __init__(self, cfg):
        super().__init__()
        self.tok_emb = nn.Embedding(cfg["vocab_size"], cfg["emb_dim"])
        self.pos_emb = nn.Embedding(cfg["context_length"], cfg["emb_dim"])
        self.drop_emb = nn.Dropout(cfg["drop_rate"])
        self.trf_blocks = nn.Sequential(*[TransformerBlock(cfg) for _ in range(cfg["n_layers"])])
        self.final_norm = LayerNorm(cfg["emb_dim"])
        self.out_head = nn.Linear(cfg["emb_dim"], cfg["vocab_size"], bias=False)

    def forward(self, input_ids, attention_mask=None, labels=None):
        batch_size, seq_len = input_ids.shape
        tok_embeds = self.tok_emb(input_ids)
        pos_embeds = self.pos_emb(torch.arange(seq_len, device=input_ids.device))
        x = tok_embeds + pos_embeds
        x = self.drop_emb(x)
        x = self.trf_blocks(x)
        x = self.final_norm(x)
        logits = self.out_head(x)

        loss = None
        if labels is not None:
            loss_fn = nn.CrossEntropyLoss()
            loss = loss_fn(logits.view(-1, logits.size(-1)), labels.view(-1))

        return {"loss": loss, "logits": logits}

In [4]:
with open("config_gpt.json", "r") as f:
    cfg = json.load(f)

In [None]:
model_path = "modelo_gpt_custom.pth"

modelo_base = GPTModel(cfg)
modelo_base.load_state_dict(torch.load(model_path, map_location="cpu"))
modelo_base.eval()

GPTModel(
  (tok_emb): Embedding(50257, 1280)
  (pos_emb): Embedding(1024, 1280)
  (drop_emb): Dropout(p=0.0, inplace=False)
  (trf_blocks): Sequential(
    (0): TransformerBlock(
      (att): MultiHeadAttention(
        (W_query): Linear(in_features=1280, out_features=1280, bias=True)
        (W_key): Linear(in_features=1280, out_features=1280, bias=True)
        (W_value): Linear(in_features=1280, out_features=1280, bias=True)
        (out_proj): Linear(in_features=1280, out_features=1280, bias=True)
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (ff): FeedForward(
        (layers): Sequential(
          (0): Linear(in_features=1280, out_features=5120, bias=True)
          (1): GELU()
          (2): Linear(in_features=5120, out_features=1280, bias=True)
        )
      )
      (norm1): LayerNorm()
      (norm2): LayerNorm()
      (drop_shortcut): Dropout(p=0.0, inplace=False)
    )
    (1): TransformerBlock(
      (att): MultiHeadAttention(
        (W_query): Linear(i

In [None]:
import random

def generate_text(model,tokenizer,prompt,seed=42,max_new_tokens=50,temperature=0.9,top_k=50,top_p=0.95,repetition_penalty=1.1):
    device="cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)
    model.eval()

    if seed is not None:
        torch.manual_seed(seed)
        random.seed(seed)

    input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
    generated_ids = input_ids.clone()

    for _ in range(max_new_tokens):
        input_ids_cropped = generated_ids[:, -cfg["context_length"]:]

        with torch.no_grad():
            outputs = model(input_ids=input_ids_cropped)
            logits = outputs["logits"][:, -1, :]

        for token_id in set(generated_ids[0].tolist()):
            logits[0, token_id] /= repetition_penalty

        logits = logits / temperature

        if top_k > 0:
            values, _ = torch.topk(logits, top_k)
            threshold = values[:, -1].unsqueeze(-1)
            logits[logits < threshold] = -float("Inf")

        if top_p < 1.0:
            sorted_logits, sorted_indices = torch.sort(logits, descending=True)
            probs = F.softmax(sorted_logits, dim=-1)
            cumulative_probs = torch.cumsum(probs, dim=-1)

            sorted_indices_to_remove = cumulative_probs > top_p
            sorted_indices_to_remove[:, 1:] = sorted_indices_to_remove[:, :-1].clone()
            sorted_indices_to_remove[:, 0] = False

            indices_to_remove = sorted_indices[sorted_indices_to_remove]
            logits[0, indices_to_remove] = -float("Inf")

        probs = F.softmax(logits, dim=-1)
        next_token = torch.multinomial(probs, num_samples=1)
        generated_ids = torch.cat((generated_ids, next_token), dim=1)

        if next_token.item() == tokenizer.eos_token_id:
            break

    output_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)

    return output_text

In [None]:
from transformers import GPT2Tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
modelo_base.to(device);

In [None]:
evaluation_texts = [
    "The night was quiet, and the leaves rustled softly as I walked through the forest. I didn't know exactly where I was going, only that I felt drawn forward.",
    "She had always found thunderstorms calming, but that evening the lightning flashes felt different. There was something electric in the air, something she couldn't name.",
    "I found an old book in the attic, filled with unfamiliar symbols. As I flipped through the pages, a faint hum seemed to rise from the paper itself.",
    "He woke up in a hospital room with no memory of how he got there. The hallway outside was dark, and the machines next to him were silent.",
    "At first, the apartment seemed perfectly ordinary. But small things started changing—doors left open, lights turned on, whispers in the night.",
    "The plane had landed safely, but we were rerouted through a strange airport with no signs. None of the staff would speak to us.",
    "Someone kept knocking at the front door at exactly the same time every night. When I opened it, there was never anyone there.",
    "We found an old camera while hiking. The photos inside showed places we hadn't been to yet—and people we hadn't met.",
    "He didn't like going into the basement, but the strange noise forced him to check. Everything looked normal, but the air felt heavy.",
    "I caught a glimpse of a shadow behind me in the mirror. When I turned around, the room was empty and still.",
    "They said not to enter the old building on the edge of town. It was abandoned, but I saw lights moving inside late at night.",
    "My daughter started drawing strange shapes in her notebook. When I asked about them, she said she learned them in a dream.",
    "The fog was dense as we drove through the mountain road. Figures seemed to appear and disappear along the roadside.",
    "Each morning, I woke up to find objects moved slightly out of place. No one else in the house noticed anything.",
    "We built the cabin to get away from the city. At first it was peaceful, but then we started hearing footsteps on the porch at night.",
    "A small box arrived in the mail with no sender listed. Inside was a folded piece of paper with only a date written on it.",
    "The town seemed friendly and calm, but I noticed something strange: the same people walked the same routes every single day.",
    "There's a door in the basement that doesn't match the rest of the house. It's locked, and we don't have the key.",
    "My dog refuses to enter the guest room. He just stares at the doorway and growls, even when no one is there.",
    "To understand why I was waking up so tired, I recorded myself sleeping. The recording picked up soft sounds I couldn't explain."
]

In [None]:
prompts = [
    "It was a quiet night when I heard a knock on the door.",
    "Once upon a time in a forgotten land,",
    "She opened the old journal and began to read.",
    "The lights started flickering, and then everything went dark.",
    "I was walking through the forest when I saw something move.",
    "He never liked elevators, but that day he had no choice.",
    "The last thing I remember was the scream.",
    "She whispered something before vanishing into the mist.",
    "They told us not to go into the basement.",
    "No one believed him until the blood appeared.",
    "I love Sundays. The calm, the coffee, the silence.",
    "We were late to the airport, but something told me to stop.",
    "I turned the corner and found a puppy in a box.",
    "The stars were brighter than ever that night.",
    "She received a letter with no return address.",
    "This was supposed to be a normal camping trip.",
    "He had always feared the ocean, but today was different.",
    "The dream felt more real than usual.",
    "I heard laughter from the attic.",
    "Suddenly, the phone rang—at 3 a.m.",
    "The child looked up and asked, 'Who is that man?'",
    "Rain tapped against the windows all night long.",
    "I picked up the mirror and saw something behind me.",
    "Everyone said the house was haunted, but we didn't believe them.",
    "They disappeared without leaving a trace.",
    "The diary ended mid-sentence.",
    "He found the key inside a hollow book.",
    "The train stopped in the middle of nowhere.",
    "She always counted the stairs. Today there was one extra.",
    "I've never been afraid of dolls, until now.",
    "The floorboards creaked, but no one was there.",
    "I saw a shadow crawl across the ceiling.",
    "My dreams have started to bleed into reality.",
    "I don't remember how I got this scar.",
    "The silence was deafening.",
    "I opened my eyes and saw her standing there.",
    "My reflection moved before I did.",
    "There was something alive inside the walls.",
    "I saw myself standing across the street.",
    "The wind carried a voice I recognized.",
    "He was staring at me from the other side of the glass.",
    "They buried her last week, but she was on the porch today.",
    "It started as a game, but we can't stop playing.",
    "The voices stopped when I turned off the radio.",
    "We made a deal. Now it's time to pay.",
    "Nobody remembers what happened that night.",
    "He smiled, but his eyes were empty.",
    "The news said it was just a storm.",
    "There was a name written in blood on the mirror.",
    "Something is wrong with the moon tonight.",
    "I feel watched. Even when I'm alone."
]

In [11]:
def calcular_perplexity(model, tokenizer, texto, device="cpu"):
    model.eval()
    model.to(device)

    inputs = tokenizer(texto, return_tensors="pt", truncation=True, padding="max_length", max_length=512).to(device)
    input_ids = inputs["input_ids"]

    with torch.no_grad():
        outputs = model(input_ids=input_ids)
        logits = outputs["logits"]

    loss_fn = torch.nn.CrossEntropyLoss(ignore_index=tokenizer.pad_token_id)
    loss = loss_fn(logits.view(-1, logits.size(-1)), input_ids.view(-1))

    return torch.exp(loss).item()

In [None]:
from collections import Counter

def distribucion_lexica(textos, tokenizer=None, nivel="palabra"):
    counter = Counter()

    for texto in textos:
        if nivel == "token" and tokenizer:
            tokens = tokenizer.tokenize(texto)
        else:
            tokens = texto.lower().split()
        counter.update(tokens)

    return counter

In [13]:
def densidad_lexica(textos):
    total_palabras = 0
    total_unicas = set()

    for texto in textos:
        palabras = texto.lower().split()
        total_palabras += len(palabras)
        total_unicas.update(palabras)

    if total_palabras == 0:
        return 0.0

    return len(total_unicas) / total_palabras

In [None]:
def repeticion_media(textos):
    repes = []

    for texto in textos:
        palabras = texto.lower().split()
        total = len(palabras)
        unicas = len(set(palabras))

        if total == 0:
            continue

        repetidas = total - unicas
        repes.append(repetidas / total)

    if not repes:
        return 0.0

    return sum(repes) / len(repes)

In [None]:
def calcular_entropia_media(model, tokenizer, textos):
    model.eval()
    entropias = []

    for texto in textos:
        inputs = tokenizer(texto, return_tensors="pt", truncation=True, padding=True).to(device)
        input_ids = inputs["input_ids"]

        with torch.no_grad():
            outputs = model(input_ids=input_ids)
            logits = outputs["logits"]
            probs = torch.softmax(logits, dim=-1)
            log_probs = torch.log(probs + 1e-12)

            entropia = -torch.sum(probs * log_probs, dim=-1)
            media = entropia.mean().item()
            entropias.append(media)

    return sum(entropias) / len(entropias)

In [16]:
import numpy as np

def normalizar_counter(counter, vocab_total):
    total = sum(counter.values())
    return np.array([counter.get(tok, 0) / total for tok in vocab_total])

In [17]:
def jaccard_sim(c1, c2):
    set1 = set(c1)
    set2 = set(c2)
    return len(set1 & set2) / len(set1 | set2)

In [None]:
from scipy.special import rel_entr

def kl_divergence(p, q):
    return np.sum(rel_entr(p, q))

In [19]:
from numpy import dot
from numpy.linalg import norm

def cosine_similarity(p, q):
    return dot(p, q) / (norm(p) * norm(q) + 1e-8)

In [None]:
textos_base = [generate_text(modelo_base, tokenizer, prompt, max_new_tokens=80) for prompt in prompts]

In [21]:
vocab_base = distribucion_lexica(textos_base, tokenizer, nivel="token")

In [22]:
densidad_lexica_base = densidad_lexica(textos_base)

In [23]:
repeticion_media_base = repeticion_media(textos_base)

In [None]:
entropia_media_base = calcular_entropia_media(modelo_base, tokenizer, evaluation_texts)

In [None]:
modelo_lora = copy.deepcopy(modelo_base)

In [None]:
import math

class LoRALayer(torch.nn.Module):
    def __init__(self, in_dim, out_dim, rank, alpha):
        super().__init__()
        self.A = torch.nn.Parameter(torch.empty(in_dim, rank))
        torch.nn.init.kaiming_uniform_(self.A, a=math.sqrt(5))
        self.B = torch.nn.Parameter(torch.zeros(rank, out_dim))
        self.alpha = alpha

    def forward(self, x):
        x = self.alpha * (x @ self.A @ self.B)
        return x

In [26]:
class LinearWithLoRA(torch.nn.Module):
    def __init__(self, linear, rank, alpha):
        super().__init__()
        self.linear = linear
        self.lora = LoRALayer(
            linear.in_features, linear.out_features, rank, alpha
        )

    def forward(self, x):
        return self.linear(x) + self.lora(x)

In [None]:
def replace_linear_with_lora(model, rank, alpha):
    for name, module in model.named_children():
        if isinstance(module, torch.nn.Linear) and any(x in name.lower() for x in ["q", "k", "v", "proj", "fc"]):
            setattr(model, name, LinearWithLoRA(module, rank, alpha))
        else:
            replace_linear_with_lora(module, rank, alpha)

In [None]:
total_params = sum(p.numel() for p in modelo_lora.parameters() if p.requires_grad)
print(f"Total trainable parameters before: {total_params:,}")

for param in modelo_lora.parameters():
    param.requires_grad = False

total_params = sum(p.numel() for p in modelo_lora.parameters() if p.requires_grad)
print(f"Total trainable parameters after: {total_params:,}")

Total trainable parameters before: 838,359,040
Total trainable parameters after: 0


In [None]:
replace_linear_with_lora(modelo_lora, rank=16, alpha=16)

total_params = sum(p.numel() for p in modelo_lora.parameters() if p.requires_grad)
print(f"Total trainable LoRA parameters: {total_params:,}")

Total trainable LoRA parameters: 5,898,240


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
modelo_lora.to(device)
torch.manual_seed(123)

<torch._C.Generator at 0x18c39071c30>

In [None]:
modelo_lora.load_state_dict(torch.load("modelo_lora.pth", map_location="cpu"))

<All keys matched successfully>

In [None]:
textos_lora = [generate_text(modelo_lora, tokenizer, prompt, max_new_tokens=80) for prompt in prompts]

In [33]:
vocab_lora = distribucion_lexica(textos_lora, tokenizer, nivel="token")

In [34]:
densidad_lexica_lora = densidad_lexica(textos_lora)

In [35]:
repeticion_media_lora = repeticion_media(textos_lora)

In [None]:
perplexitys_base = [calcular_perplexity(modelo_lora, tokenizer, texto) for texto in evaluation_texts]
perplexity_base = sum(perplexitys_base) / len(prompts)

print(f"Perplexity base: {perplexity_base:.2f}")

Perplexity base: 23.18


In [None]:
perplexitys_lora = [calcular_perplexity(modelo_lora, tokenizer, texto) for texto in evaluation_texts]
perplexity_lora = sum(perplexitys_lora) / len(prompts)

print(f"Perplexity LoRA: {perplexity_lora:.2f}")

Perplexity LoRA: 24.01


In [38]:
print(f"Palabras más comunes base: {vocab_base.most_common(30)}")
print(f"Palabras más comunes LoRA: {vocab_lora.most_common(30)}")

Palabras más comunes base: [('.', 184), ('Ġthe', 177), ('Ċ', 160), (',', 158), ('Ġwas', 105), ('Ġto', 90), ('Ġa', 87), ('Ġand', 86), ('Ġin', 69), ('Ġhe', 68), ('Ġof', 59), ('Ġhis', 52), ('Ġhad', 51), ('Ġit', 48), ('ĠHe', 46), ('Ġbut', 40), ('Ġthat', 38), ('Ġat', 36), ('The', 34), ('ĠI', 33), ('Ġon', 31), ("'t", 30), ('Ġno', 29), ('Ġhim', 28), ('."', 28), ('Ġwith', 28), ('Ġsaid', 27), ('Ġ"', 27), ("'s", 26), ('Ġbe', 24)]
Palabras más comunes LoRA: [('.', 247), (',', 173), ('Ġthe', 157), ('Ġwas', 116), ('Ġa', 97), ('Ġto', 91), ('Ġand', 89), ('ĠI', 84), ('âĢ', 74), ('Ġit', 65), ('Ļ', 64), ('Ġof', 61), ('Ġin', 56), ('Ġbut', 49), ('Ġhis', 48), ('ĠIt', 46), ('Ġhad', 44), ('Ġhe', 41), ('Ġfor', 38), ('ĠHe', 38), ('Ġhim', 37), ('Ġonly', 35), ('Ġmy', 34), ('Ġas', 31), ('Ġon', 30), ('t', 30), ('Ġno', 30), ('ĠThe', 30), ('Ġat', 29), ('Ġthere', 28)]


In [None]:
print(f"Densidad léxica base: {densidad_lexica_base:.3f}")
print(f"Densidad léxica lora: {densidad_lexica_lora:.3f}")

print(f"Repetición media base: {repeticion_media_base:.2f}")
print(f"Repetición media lora: {repeticion_media_lora:.2f}")

Densidad léxica base: 0.126
Densidad léxica LoRA: 0.149
Repetición media base: 2.43
Repetición media LoRA: 1.78


In [None]:
vocab_total = list(set(vocab_base) | set(vocab_lora))

vec_base = normalizar_counter(vocab_base, vocab_total)
vec_lora = normalizar_counter(vocab_lora, vocab_total)

print("KL Divergence:", kl_divergence(vec_base, vec_lora))
print("Cosine Similarity:", cosine_similarity(vec_base, vec_lora))
print("Jaccard Similarity:", jaccard_sim(vocab_base, vocab_lora))

KL Divergence: 0.213
Cosine Similarity: 0.761
Jaccard Similarity: 0.597
