In [18]:
import torch
import torch.nn as nn
import json
from IPython.display import clear_output
import torch.nn.functional as F

In [19]:
from capas_gpt import TransformerBlock, LayerNorm

In [20]:
class GPTModel(nn.Module):
    def __init__(self, cfg):
        super().__init__()
        self.tok_emb = nn.Embedding(cfg["vocab_size"], cfg["emb_dim"])
        self.pos_emb = nn.Embedding(cfg["context_length"], cfg["emb_dim"])
        self.drop_emb = nn.Dropout(cfg["drop_rate"])
        self.trf_blocks = nn.Sequential(*[TransformerBlock(cfg) for _ in range(cfg["n_layers"])])
        self.final_norm = LayerNorm(cfg["emb_dim"])
        self.out_head = nn.Linear(cfg["emb_dim"], cfg["vocab_size"], bias=False)

    def forward(self, input_ids, attention_mask=None, labels=None):
        batch_size, seq_len = input_ids.shape
        tok_embeds = self.tok_emb(input_ids)
        pos_embeds = self.pos_emb(torch.arange(seq_len, device=input_ids.device))
        x = tok_embeds + pos_embeds
        x = self.drop_emb(x)
        x = self.trf_blocks(x)
        x = self.final_norm(x)
        logits = self.out_head(x)

        loss = None
        if labels is not None:
            loss_fn = nn.CrossEntropyLoss()
            loss = loss_fn(logits.view(-1, logits.size(-1)), labels.view(-1))

        return {"loss": loss, "logits": logits}

In [21]:
with open("config_gpt.json", "r") as f:
    cfg = json.load(f)

In [22]:
model_path = "modelo_gpt_custom.pth"

model = GPTModel(cfg)
model.load_state_dict(torch.load(model_path, map_location="cpu"))
model.eval()

GPTModel(
  (tok_emb): Embedding(50257, 1280)
  (pos_emb): Embedding(1024, 1280)
  (drop_emb): Dropout(p=0.0, inplace=False)
  (trf_blocks): Sequential(
    (0): TransformerBlock(
      (att): MultiHeadAttention(
        (W_query): Linear(in_features=1280, out_features=1280, bias=True)
        (W_key): Linear(in_features=1280, out_features=1280, bias=True)
        (W_value): Linear(in_features=1280, out_features=1280, bias=True)
        (out_proj): Linear(in_features=1280, out_features=1280, bias=True)
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (ff): FeedForward(
        (layers): Sequential(
          (0): Linear(in_features=1280, out_features=5120, bias=True)
          (1): GELU()
          (2): Linear(in_features=5120, out_features=1280, bias=True)
        )
      )
      (norm1): LayerNorm()
      (norm2): LayerNorm()
      (drop_shortcut): Dropout(p=0.0, inplace=False)
    )
    (1): TransformerBlock(
      (att): MultiHeadAttention(
        (W_query): Linear(i

In [23]:
import random

def generate_text(model,tokenizer,prompt,seed=42,max_new_tokens=50,temperature=0.9,top_k=50,top_p=0.95,repetition_penalty=1.1):
    device="cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)
    model.eval()

    if seed is not None:
        torch.manual_seed(seed)
        random.seed(seed)

    input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
    generated_ids = input_ids.clone()

    for _ in range(max_new_tokens):
        input_ids_cropped = generated_ids[:, -cfg["context_length"]:]

        with torch.no_grad():
            outputs = model(input_ids=input_ids_cropped)
            logits = outputs["logits"][:, -1, :]

        for token_id in set(generated_ids[0].tolist()):
            logits[0, token_id] /= repetition_penalty

        logits = logits / temperature

        if top_k > 0:
            values, _ = torch.topk(logits, top_k)
            threshold = values[:, -1].unsqueeze(-1)
            logits[logits < threshold] = -float("Inf")

        if top_p < 1.0:
            sorted_logits, sorted_indices = torch.sort(logits, descending=True)
            probs = F.softmax(sorted_logits, dim=-1)
            cumulative_probs = torch.cumsum(probs, dim=-1)

            sorted_indices_to_remove = cumulative_probs > top_p
            sorted_indices_to_remove[:, 1:] = sorted_indices_to_remove[:, :-1].clone()
            sorted_indices_to_remove[:, 0] = False

            indices_to_remove = sorted_indices[sorted_indices_to_remove]
            logits[0, indices_to_remove] = -float("Inf")

        probs = F.softmax(logits, dim=-1)
        next_token = torch.multinomial(probs, num_samples=1)
        generated_ids = torch.cat((generated_ids, next_token), dim=1)

        if next_token.item() == tokenizer.eos_token_id:
            break

    output_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)

    return output_text

In [24]:
from transformers import GPT2Tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

In [25]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device);

In [26]:
import math

class LoRALayer(torch.nn.Module):
    def __init__(self, in_dim, out_dim, rank, alpha):
        super().__init__()
        self.A = torch.nn.Parameter(torch.empty(in_dim, rank))
        torch.nn.init.kaiming_uniform_(self.A, a=math.sqrt(5))
        self.B = torch.nn.Parameter(torch.zeros(rank, out_dim))
        self.alpha = alpha

    def forward(self, x):
        x = self.alpha * (x @ self.A @ self.B)
        return x

In [27]:
class LinearWithLoRA(torch.nn.Module):
    def __init__(self, linear, rank, alpha):
        super().__init__()
        self.linear = linear
        self.lora = LoRALayer(
            linear.in_features, linear.out_features, rank, alpha
        )

    def forward(self, x):
        return self.linear(x) + self.lora(x)

In [28]:
def replace_linear_with_lora(model, rank, alpha):
    for name, module in model.named_children():
        if isinstance(module, torch.nn.Linear) and any(x in name.lower() for x in ["q", "k", "v", "proj", "fc"]):
            setattr(model, name, LinearWithLoRA(module, rank, alpha))
        else:
            replace_linear_with_lora(module, rank, alpha)

In [29]:
total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total trainable parameters before: {total_params:,}")

for param in model.parameters():
    param.requires_grad = False

total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total trainable parameters after: {total_params:,}")

Total trainable parameters before: 838,359,040
Total trainable parameters after: 0


In [30]:
replace_linear_with_lora(model, rank=16, alpha=16)

total_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Total trainable LoRA parameters: {total_params:,}")

Total trainable LoRA parameters: 5,898,240


In [31]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
torch.manual_seed(123)

print(model)

GPTModel(
  (tok_emb): Embedding(50257, 1280)
  (pos_emb): Embedding(1024, 1280)
  (drop_emb): Dropout(p=0.0, inplace=False)
  (trf_blocks): Sequential(
    (0): TransformerBlock(
      (att): MultiHeadAttention(
        (W_query): LinearWithLoRA(
          (linear): Linear(in_features=1280, out_features=1280, bias=True)
          (lora): LoRALayer()
        )
        (W_key): LinearWithLoRA(
          (linear): Linear(in_features=1280, out_features=1280, bias=True)
          (lora): LoRALayer()
        )
        (W_value): LinearWithLoRA(
          (linear): Linear(in_features=1280, out_features=1280, bias=True)
          (lora): LoRALayer()
        )
        (out_proj): LinearWithLoRA(
          (linear): Linear(in_features=1280, out_features=1280, bias=True)
          (lora): LoRALayer()
        )
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (ff): FeedForward(
        (layers): Sequential(
          (0): Linear(in_features=1280, out_features=5120, bias=True)
      

In [32]:
model.load_state_dict(torch.load("modelo_lora.pth", map_location="cpu"))

<All keys matched successfully>

In [None]:
import textwrap

def historia_interactiva(model, tokenizer, prompt_inicial, pasos=10, tokens_por_paso=40, device="cpu"):
    contexto = prompt_inicial.strip()

    for paso in range(pasos):
        print(f"\n---------- Paso {paso + 1} ----------\n")
        print("Historia hasta ahora:\n" + contexto + "...\n")

        print("Generando opciones...\n")

        opcion1 = generate_text(model, tokenizer, contexto, max_new_tokens=tokens_por_paso,
                                  temperature=0.9, top_k=40, seed=paso * 2)
        opcion2 = generate_text(model, tokenizer, contexto, max_new_tokens=tokens_por_paso,
                                  temperature=1.1, top_k=40, seed=paso * 2 + 1)

        print("Opción 1:\n...", textwrap.fill(opcion1[len(contexto):].strip(), width=80))
        print("\nOpción 2:\n...", textwrap.fill(opcion2[len(contexto):].strip(), width=80))

        eleccion = input("\nElige 1 o 2: ").strip().lower()

        if eleccion == "2" or eleccion == "dos":
            contexto = opcion2.strip()
        elif eleccion == "fin":
            break
        else:
            contexto = opcion1.strip()
        clear_output(wait=True)

    clear_output(wait=True)
    print("\nHistoria completa:\n")
    print(textwrap.fill(contexto, width=80))

In [34]:
prompt_inicial = input("Escribe el inicio de tu historia (o déjalo vacío para que empiece la historia la IA automáticamente):\n> ").strip()
if prompt_inicial == "":
    prompts = [
    "It was a quiet night until the phone rang unexpectedly.",
    "Deep in the forest, something ancient had awakened.",
    "She never expected the letter to arrive after all these years.",
    "The sky turned red as the city fell silent.",
    "I was walking home when I saw the shadow move.",
    "The mirror in the attic began to whisper again.",
    "No one believed him when he said he saw a ghost at school.",
    "Every night, the same dream. Every night, a little closer.",
    "They thought it was just a power outage... until the screams began.",
    "He opened the door and there it was — not human, not anymore."
    ]
    prompt_inicial = random.choice(prompts)

historia_interactiva(model, tokenizer, prompt_inicial, pasos=10, tokens_por_paso=40)


Historia completa:

He opened the door and there it was — not human, not anymore. “Hello?” The voice spoke to her with a gentle yet menacing tone. She could feel its presence but she couldn’t figure out what made it’s presence so peculiar. It wasn’t like it bothered the others. It seemed to just exist as a thing that kept repeating the same sound each time through the day. For some reason, no matter how loud
