# Fine Tuning DeepSeek R1 8b

### Importar librerías

In [None]:
import torch
from datasets import load_dataset
from unsloth.chat_templates import standardize_sharegpt, get_chat_template, train_on_responses_only
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
from unsloth import is_bfloat16_supported, FastLanguageModel

In [None]:
max_seq_length = 2048 
dtype = None   
load_in_4bit = True 

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/DeepSeek-R1-Distill-Llama-8B", 
    max_seq_length=max_seq_length,  
    dtype=dtype,  
    load_in_4bit=load_in_4bit 
)

### Aplicar LoRA

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,  # LoRA rank (controls low-rank approximation quality)
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],  # Layers to apply LoRA
    lora_alpha=16, # Scaling factor for LoRA weights
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407, 
    use_rslora=False, 
    loftq_config=None
)

### Cargar data

In [None]:
dataset = load_dataset("json", data_files="data/dataset.jsonl", split="train")

### Estandarizar GPT

In [None]:
dataset = standardize_sharegpt(dataset)

Map: 100%|██████████| 15/15 [00:00<00:00, 347.29 examples/s]


In [None]:
# Apply the Llama-3.1 chat template to the tokenizer
tokenizer = get_chat_template(
    tokenizer,  # Tokenizer being used
    chat_template="llama-3.1",  # The chat template format
)

# Function to format the conversation data into tokenized text
def formatting_prompts_func(examples):
    convos = examples["conversations"]
    texts = [tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False) for convo in convos]
    return {"text": texts}

dataset = dataset.map(formatting_prompts_func, batched=True)

### Entrenar

In [None]:
trainer = SFTTrainer(
    model=model,  
    tokenizer=tokenizer, 
    train_dataset=dataset, 
    dataset_text_field="text",  
    max_seq_length=max_seq_length,  
    data_collator=DataCollatorForSeq2Seq(tokenizer=tokenizer),  
    dataset_num_proc=2, 
    packing=False,  

    args=TrainingArguments(
        per_device_train_batch_size=2,  # Number of examples per GPU batch
        gradient_accumulation_steps=4,  # Accumulate gradients over 4 batches before updating model
        warmup_steps=5,  # Number of warmup steps for learning rate schedule
        max_steps=60,  # Limit training steps to 60 (for quick testing)
        # num_train_epochs=1 
        learning_rate=2e-4,  
        fp16=not is_bfloat16_supported(),  
        bf16=is_bfloat16_supported(),  
        logging_steps=1,  # Log training metrics after every step
        optim="adamw_8bit",  
        weight_decay=0.01, 
        lr_scheduler_type="linear",  # Linear decay of learning rate
        seed=3407, 
        output_dir="outputs",  # Directory to save model checkpoints
        report_to="none",  # Use this for WandB etc

    ),
)

trainer = train_on_responses_only(
    trainer,
    instruction_part="<|start_header_id|>user<|end_header_id|>\n\n",  # Mark user input
    response_part="<|start_header_id|>assistant<|end_header_id|>\n\n",  # Mark assistant response
)
# Start training the model
trainer_stats = trainer.train()

### Guardar modelo

In [None]:
model="Project-7403"
model.save_pretrained(model)  
tokenizer.save_pretrained(model)