In [2]:
!pip install torch transformers datasets


Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting 

In [12]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments, DataCollatorForLanguageModeling
from datasets import Dataset
import os

In [13]:
# Desactivar Weights & Biases de forma segura
os.environ["WANDB_MODE"] = "offline"

In [14]:
# Cargar modelo pre-entrenado (GPT-2 como base)
MODEL_NAME = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer.pad_token = tokenizer.eos_token  # Solución al error de padding
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)


In [15]:
# Definir datos de entrenamiento
dialogues = [
    {"input": "Hola, ¿cómo estás?", "output": "¡Hola! Estoy bien, gracias. ¿Y tú?"},
    {"input": "¿Cuál es tu nombre?", "output": "Soy un chatbot basado en Transformers."},
    {"input": "Cuéntame un chiste", "output": "¿Por qué el libro de matemáticas estaba triste? Porque tenía demasiados problemas."}
]

def tokenize_function(example):
    return tokenizer(f"<s>{example['input']} {example['output']}</s>", truncation=True, padding='max_length', max_length=128)


In [16]:
# Crear dataset
dataset = Dataset.from_list(dialogues)
tokenized_datasets = dataset.map(tokenize_function, batched=False)

Map:   0%|          | 0/3 [00:00<?, ? examples/s]

In [17]:
# Configurar entrenamiento
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=2,
    num_train_epochs=3,
    logging_dir="./logs",
    logging_steps=10,
    save_steps=500,
    save_total_limit=2,
    report_to=[]  # Desactivar completamente el logging en W&B
)

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

  trainer = Trainer(


In [18]:
# Entrenar modelo
trainer.train()


Step,Training Loss


TrainOutput(global_step=6, training_loss=4.32051436106364, metrics={'train_runtime': 70.7318, 'train_samples_per_second': 0.127, 'train_steps_per_second': 0.085, 'total_flos': 587907072000.0, 'train_loss': 4.32051436106364, 'epoch': 3.0})

In [19]:
# Guardar modelo
model.save_pretrained("./chatbot_model")
tokenizer.save_pretrained("./chatbot_model")

('./chatbot_model/tokenizer_config.json',
 './chatbot_model/special_tokens_map.json',
 './chatbot_model/vocab.json',
 './chatbot_model/merges.txt',
 './chatbot_model/added_tokens.json',
 './chatbot_model/tokenizer.json')

In [20]:
# Función de interacción con el chatbot
def chat():
    print("Chatbot listo. Escribe 'salir' para terminar.")
    while True:
        user_input = input("Tú: ")
        if user_input.lower() == "salir":
            break
        inputs = tokenizer(user_input, return_tensors="pt", padding=True)
        outputs = model.generate(**inputs, max_length=50)
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        print("Chatbot:", response)

if __name__ == "__main__":
    chat()


Chatbot listo. Escribe 'salir' para terminar.
Tú: salir
