In [1]:
import torch
import torch.nn as nn
import json
import torch.nn.functional as F
import random
from IPython.display import clear_output

In [2]:
from capas_gpt import TransformerBlock, LayerNorm

In [3]:
class GPTModel(nn.Module):
    def __init__(self, cfg):
        super().__init__()
        self.tok_emb = nn.Embedding(cfg["vocab_size"], cfg["emb_dim"])
        self.pos_emb = nn.Embedding(cfg["context_length"], cfg["emb_dim"])
        self.drop_emb = nn.Dropout(cfg["drop_rate"])

        self.trf_blocks = nn.Sequential(
            *[TransformerBlock(cfg) for _ in range(cfg["n_layers"])])

        self.final_norm = LayerNorm(cfg["emb_dim"])
        self.out_head = nn.Linear(cfg["emb_dim"], cfg["vocab_size"], bias=False)

    def forward(self, in_idx):
        batch_size, seq_len = in_idx.shape
        tok_embeds = self.tok_emb(in_idx)
        pos_embeds = self.pos_emb(torch.arange(seq_len, device=in_idx.device))
        x = tok_embeds + pos_embeds
        x = self.drop_emb(x)
        x = self.trf_blocks(x)
        x = self.final_norm(x)
        logits = self.out_head(x)
        return logits

In [4]:
with open("config_gpt.json", "r") as f:
    cfg = json.load(f)

model = GPTModel(cfg)
model.load_state_dict(torch.load("modelo_gpt_custom.pth", map_location="cpu"))
model.eval()

GPTModel(
  (tok_emb): Embedding(50257, 1280)
  (pos_emb): Embedding(1024, 1280)
  (drop_emb): Dropout(p=0.0, inplace=False)
  (trf_blocks): Sequential(
    (0): TransformerBlock(
      (att): MultiHeadAttention(
        (W_query): Linear(in_features=1280, out_features=1280, bias=True)
        (W_key): Linear(in_features=1280, out_features=1280, bias=True)
        (W_value): Linear(in_features=1280, out_features=1280, bias=True)
        (out_proj): Linear(in_features=1280, out_features=1280, bias=True)
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (ff): FeedForward(
        (layers): Sequential(
          (0): Linear(in_features=1280, out_features=5120, bias=True)
          (1): GELU()
          (2): Linear(in_features=5120, out_features=1280, bias=True)
        )
      )
      (norm1): LayerNorm()
      (norm2): LayerNorm()
      (drop_shortcut): Dropout(p=0.0, inplace=False)
    )
    (1): TransformerBlock(
      (att): MultiHeadAttention(
        (W_query): Linear(i

In [5]:
from transformers import GPT2Tokenizer
tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
tokenizer.pad_token = tokenizer.eos_token

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
def generate_simple(model, tokenizer, prompt, max_new_tokens=50):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()
    
    input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
    
    for _ in range(max_new_tokens):
        input_ids_cropped = input_ids[:, -cfg["context_length"]:]
        
        with torch.no_grad():
            logits = model(input_ids_cropped)
        
        next_token_logits = logits[:, -1, :]
        next_token = torch.argmax(next_token_logits, dim=-1).unsqueeze(0)
        
        input_ids = torch.cat([input_ids, next_token], dim=1)

    return tokenizer.decode(input_ids[0], skip_special_tokens=True)

In [7]:
prompt = "Every effort moves you"
output = generate_simple(model, tokenizer, prompt, max_new_tokens=80)
print(output)

Every effort moves you forward.

"I'm not going to be a guy who's going to sit back and say, 'I'm not going to do anything.' I'm going to do everything I can to help the team win."

The Panthers have been a team that has been on the rise for a while now. They have won five of their last six games, including a 4-1 win


In [8]:
def generate_custom(model, tokenizer, prompt, max_new_tokens=50, temperature=1.0, top_k=50, seed=None):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.eval()
    model.to(device)

    if seed is not None:
        torch.manual_seed(seed)
        random.seed(seed)

    input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)

    for _ in range(max_new_tokens):
        input_ids_cropped = input_ids[:, -cfg["context_length"]:]

        with torch.no_grad():
            logits = model(input_ids_cropped)

        next_token_logits = logits[:, -1, :] / temperature

        if top_k is not None:
            values, indices = torch.topk(next_token_logits, top_k)
            probs = F.softmax(values, dim=-1)
            sampled = torch.multinomial(probs, num_samples=1)
            next_token = indices[0, sampled] 
        else:
            probs = F.softmax(next_token_logits, dim=-1)
            next_token = torch.multinomial(probs, num_samples=1).squeeze()

        next_token = next_token.view(1, 1)

        input_ids = torch.cat([input_ids, next_token], dim=1)

        if next_token.item() == tokenizer.eos_token_id:
            break

    return tokenizer.decode(input_ids[0], skip_special_tokens=True)

In [None]:
import textwrap

def historia_interactiva(model, tokenizer, prompt_inicial, pasos=10, tokens_por_paso=40, device="cpu"):
    contexto = prompt_inicial.strip()

    for paso in range(pasos):
        print(f"\n----------- Paso {paso + 1} -----------")
        print("Historia hasta ahora:\n" + contexto + "...\n")

        print("Generando opciones...\n")

        opcion1 = generate_custom(model, tokenizer, contexto, max_new_tokens=tokens_por_paso,
                                  temperature=0.9, top_k=40, seed=paso * 2)
        opcion2 = generate_custom(model, tokenizer, contexto, max_new_tokens=tokens_por_paso,
                                  temperature=1.1, top_k=40, seed=paso * 2 + 1)

        print("Opción 1:\n...", textwrap.fill(opcion1[len(contexto):].strip(), width=80))
        print("\nOpción 2:\n...", textwrap.fill(opcion2[len(contexto):].strip(), width=80))

        eleccion = input("\nElige 1 o 2: ").strip().lower()

        if eleccion == "2" or eleccion == "dos":
            contexto = opcion2.strip()
        elif eleccion == "fin":
            break
        else:
            contexto = opcion1.strip()
        clear_output(wait=True)

    clear_output(wait=True)
    print("\nHistoria completa:\n")
    print(textwrap.fill(contexto, width=80))

In [10]:
import random

In [11]:
prompt_inicial = input("Escribe el inicio de tu historia (o déjalo vacío para que empiece la IA una automáticamente):\n> ").strip()
if prompt_inicial == "":
    prompts = [
    "Today was the first day of summer, and everything felt possible.",
    "He found an old map tucked inside a library book.",
    "She woke up with the ability to speak every language in the world.",
    "The dog was not ordinary — it could solve puzzles.",
    "They built a treehouse that reached above the clouds.",
    "A letter from a stranger arrived, offering a surprising challenge.",
    "The lemonade stand made more money than anyone expected.",
    "He tripped over something... and discovered a tiny hidden door.",
    "Grandma's recipe book had secret instructions in invisible ink.",
    "Their science project accidentally sent an email to NASA."
    ]

    prompt_inicial = random.choice(prompts)

historia_interactiva(model, tokenizer, prompt_inicial, pasos=10, tokens_por_paso=40)


Historia completa:

The dog was not ordinary — it could solve puzzles. But it had a problem: What to
do with a pet who wanted nothing more than the chance to play.  He had no
interest in being around strangers, so he was living inside a house
