# Fine Tuning DeepSeek R1 8b

### Importar librerías

In [1]:
import torch
torch.cuda.is_available()

True

In [2]:
import torch
from datasets import load_dataset
from unsloth.chat_templates import standardize_sharegpt, get_chat_template, train_on_responses_only
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
from unsloth import is_bfloat16_supported, FastLanguageModel

  from .autonotebook import tqdm as notebook_tqdm


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
Unsloth: Failed to patch Gemma3ForConditionalGeneration.


    PyTorch 2.6.0+cu124 with CUDA 1204 (you have 2.6.0+cu118)
    Python  3.11.9 (you have 3.11.9)
  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)
  Memory-efficient attention, SwiGLU, sparse and more won't be available.
  Set XFORMERS_MORE_DETAILS=1 for more details


🦥 Unsloth Zoo will now patch everything to make training faster!


In [3]:
max_seq_length = 2048 
dtype = None   
load_in_4bit = True 

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/tinyllama-chat", 
    max_seq_length=max_seq_length,  
    dtype=dtype,  
    load_in_4bit=load_in_4bit 
)

  GPU_BUFFERS = tuple([torch.empty(2*256*2048, dtype = dtype, device = f"cuda:{i}") for i in range(n_gpus)])


==((====))==  Unsloth 2025.3.19: Fast Llama patching. Transformers: 4.51.0.
   \\   /|    NVIDIA GeForce GTX 1650. Num GPUs = 1. Max memory: 4.0 GB. Platform: Windows.
O^O/ \_/ \    Torch: 2.6.0+cu118. CUDA: 7.5. CUDA Toolkit: 11.8. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


### Aplicar LoRA

In [4]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,  # LoRA rank (controls low-rank approximation quality)
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],  # Layers to apply LoRA
    lora_alpha=16, # Scaling factor for LoRA weights
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407, 
    use_rslora=False, 
    loftq_config=None
)

Unsloth 2025.3.19 patched 22 layers with 22 QKV layers, 22 O layers and 22 MLP layers.


### Cargar data

In [5]:
dataset = load_dataset("json", data_files="data\dataset.jsonl", split="train")

### Estandarizar GPT

In [6]:
dataset = standardize_sharegpt(dataset)

In [9]:
# Apply the Llama-3.1 chat template to the tokenizer
tokenizer = get_chat_template(
    tokenizer,  # Tokenizer being used
    chat_template="llama-3.1",  # The chat template format
)

# Function to format the conversation data into tokenized text
def convert_to_conversation(example):
    return {
        "conversations": [
            {"role": "user", "content": example["from"]},
            {"role": "assistant", "content": example["value"]}
        ]
    }

dataset = dataset.map(convert_to_conversation)



Map: 100%|██████████| 15/15 [00:00<00:00, 322.37 examples/s]


### Entrenar

In [14]:
def formatting_func(example):
    return [f"<|start_header_id|>user<|end_header_id|>\n\n{example['input']}<|eot_id|>\n<|start_header_id|>assistant<|end_header_id|>\n\n{example['output']}<|eot_id|>"]



trainer = SFTTrainer(
    model=model,  
    tokenizer=tokenizer, 
    train_dataset=dataset, 
    formatting_func=formatting_func,
    max_seq_length=max_seq_length,  
    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer),  
    dataset_num_proc=2, 
    packing=False,  

    args=TrainingArguments(
        per_device_train_batch_size=2,  # Number of examples per GPU batch
        gradient_accumulation_steps=4,  # Accumulate gradients over 4 batches before updating model
        warmup_steps=5,  # Number of warmup steps for learning rate schedule
        max_steps=60,  # Limit training steps to 60 (for quick testing)
        # num_train_epochs=1 
        learning_rate=2e-4,  
        fp16=not is_bfloat16_supported(),  
        bf16=is_bfloat16_supported(),  
        logging_steps=1,  # Log training metrics after every step
        optim="adamw_8bit",  
        weight_decay=0.01, 
        lr_scheduler_type="linear",  # Linear decay of learning rate
        seed=3407, 
        output_dir="outputs",  # Directory to save model checkpoints
        report_to="none",  # Use this for WandB etc

    ),
)
# Start training the model
trainer_stats = trainer.train()

KeyError: 'input'

### Guardar modelo

In [None]:
model="Project-7403"
model.save_pretrained(model)  
tokenizer.save_pretrained(model)