In [1]:
import torch
import torch.nn as nn
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
import bitsandbytes as bnb
from torchvision import models, transforms, datasets
import matplotlib.pyplot as plt

from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import LoraConfig, get_peft_model

import torch
import bitsandbytes as bnb

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
model_name = "PY007/TinyLlama-1.1B-Chat-v0.1"

# Tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Modelo en 4-bit con bitsandbytes
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,
    device_map="auto",
    torch_dtype=torch.float16,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

`torch_dtype` is deprecated! Use `dtype` instead!
The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


In [4]:
# Configuración LoRA
lora_config = LoraConfig(
    r=8,                 # rango bajo para adaptadores
    lora_alpha=32,       # escala
    target_modules=["q_proj","v_proj"],  # capas donde aplicar LoRA
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)

# Aplicar LoRA al modelo
model = get_peft_model(model, lora_config)

In [5]:
from datasets import load_dataset

# Dataset de prueba
dataset = load_dataset("bertin-project/alpaca-spanish")
train_dataset = dataset["train"]

def preprocess(examples):
    # Formatear prompt + respuesta para cada ejemplo en el batch
    prompts = [f"### Human: {inst}\n### Assistant: {out}" 
               for inst, out in zip(examples['instruction'], examples['output'])]
    tokenized = tokenizer(prompts, truncation=True, padding="max_length", max_length=128)
    tokenized["labels"] = tokenized["input_ids"].copy()
    return tokenized

train_dataset = train_dataset.map(preprocess, batched=True)
train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

Map: 100%|██████████| 51942/51942 [00:03<00:00, 13286.85 examples/s]


In [6]:
# Mostrar un ejemplo
print(train_dataset[0])

{'input_ids': tensor([    1,   835, 12968, 29901, 18613,  2182, 29948, 28711, 25348, 29973,
           13,  2277, 29937,  4007, 22137, 29901, 25348, 28711,  3976, 13321,
          553,  2251,   381,  1091,   265,  1682,   280,  1417, 29889, 32000,
        32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000,
        32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000,
        32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000,
        32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000,
        32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000,
        32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000,
        32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000,
        32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000,
        32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000, 32000,
        32000, 32000, 32000, 32000, 32000, 32000, 

In [7]:
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir="./tinyllama-lora",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=10,
    save_steps=200,
    save_total_limit=2,
    max_steps=1000,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset
)

trainer.train()

Step,Training Loss
10,5.7647
20,5.8363
30,5.1249
40,5.0113
50,4.614
60,4.2999
70,4.4468
80,4.5288
90,4.303
100,4.3067


TrainOutput(global_step=1000, training_loss=3.9183070449829103, metrics={'train_runtime': 1084.4133, 'train_samples_per_second': 14.755, 'train_steps_per_second': 0.922, 'total_flos': 1.2725954543616e+16, 'train_loss': 3.9183070449829103, 'epoch': 0.30803588618074007})

In [8]:
# Guardar adaptadores LoRA
model.save_pretrained("./tinyllama-lora")

In [9]:
# Cargar
from peft import PeftModel

# Modelo 4-bit usando bitsandbytes
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_4bit=True,
    device_map="auto",
    torch_dtype=torch.float16,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

model = PeftModel.from_pretrained(model, "./tinyllama-lora")

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


In [12]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(model_name)

prompt = "### Human: Que es la IA.\n### Assistant:"
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")


In [13]:
# Modo evaluación
model.eval()

# Generación
with torch.no_grad():
    outputs = model.generate(
    **inputs,
    max_new_tokens=200,
    do_sample=True,
    top_p=0.9,
    temperature=0.7,
    pad_token_id=tokenizer.eos_token_id,  # importante para modelos pequeños
    eos_token_id=tokenizer.eos_token_id,
)


# Decodificar tokens
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("💡 Resultado:")
print(generated_text)


💡 Resultado:
### Human: Que es la IA.
### Assistant: La IA es un tipo de entidad que se utiliza para procesar datos y realizar tareas con base en datos. En otras palabras, es una entidad que se puede programar para realizar tareas con base en datos. La IA se utiliza para procesar datos y realizar tareas con base en datos, como buscar información de recursos, procesamiento de datos, análisis de datos y creación de modelos. Mientras que las tareas tradicionales utilizan una serie de herramientas y programas de software, la IA utiliza tecnologías de inteligencia artificial para realizar tareas de forma automática. The Pioneer 100th Anniversary Limited Edition Edition is a limited edition of 300 copies, each of which will be signed by the entire band. This special edition includes a 180g vinyl, a 1
