<a href="https://colab.research.google.com/github/frank-morales2020/MLxDL/blob/main/FineTune_DeepSeek_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -U transformers
!pip install -U datasets
!pip install -U accelerate
!pip install -U peft
!pip install -U trl # For SFTTrainer
!pip install -U bitsandbytes
!pip install unsloth # Recommended for speed and efficiency
!pip install --force-reinstall --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git # For latest Unsloth

In [None]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 2048
dtype = None # Automatically chooses best data type (float16, bfloat16, etc.)
load_in_4bit = True # Enable 4-bit quantization to reduce memory usage

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/DeepSeek-R1-Distill-Llama-8B", # Or other DeepSeek-R1 variants
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16, # Rank of the LoRA matrices (common values: 8, 16, 32, 64)
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], # Common target modules
    lora_alpha=16, # Scaling factor for LoRA weights
    lora_dropout=0, # Dropout rate for LoRA (set to 0 for inference)
    bias="none", # Or "all", "lora_only"
    use_gradient_checkpointing=True, # Recommended for memory saving
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)

In [None]:
from datasets import load_dataset
from unsloth.chat_templates import standardize_sharegpt

# Load your dataset (example: a medical counseling dataset)
dataset = load_dataset("Sulav/mental_health_counseling_conversations_sharegpt", split="train")

# Standardize to ShareGPT format if needed
dataset = standardize_sharegpt(dataset)

# Apply a chat template
def format_dataset(example):
    example["text"] = tokenizer.apply_chat_template(
        example["conversations"],
        tokenize=False,
        add_special_tokens=False
    )
    return example

dataset = dataset.map(format_dataset, batched=False)

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text", # The column in your dataset containing the formatted text
    max_seq_length=max_seq_length,
    args=TrainingArguments(
        per_device_train_batch_size=2, # Adjust based on your GPU memory
        gradient_accumulation_steps=4, # Accumulate gradients to simulate larger batch size
        warmup_steps=5,
        num_train_epochs=1, # Or set max_steps
        learning_rate=2e-4,
        fp16=not torch.cuda.is_bf16_supported(), # Use fp16 if bfloat16 is not supported
        bf16=torch.cuda.is_bf16_supported(), # Use bfloat16 if supported
        logging_steps=1,
        output_dir="outputs",
        optim="adamw_8bit",
        seed=3407,
        report_to="none",
    ),
)

# Start training
trainer.train()

TrainOutput(global_step=439, training_loss=2.243786193243735, metrics={'train_runtime': 1866.875, 'train_samples_per_second': 1.881, 'train_steps_per_second': 0.235, 'total_flos': 5.815900021093171e+16, 'train_loss': 2.243786193243735})

In [10]:
# Save only the LoRA adapters
model.save_pretrained("DeepSeek-R1-FineTuned", tokenizer) # Save only adapters

# Or push to Hugging Face Hub
ft_model="frankmorales2020/unsloth-DeepSeek-R1-Distill-Llama-8B-mental_health_counseling"
model.push_to_hub(ft_model, tokenizer)

# model.push_to_hub("your_username/your_fine_tuned_deepseek_r1_model", tokenizer)

# To save the full model in GGUF format (for local inference, e.g., with llama.cpp)
# model.push_to_hub_gguf(my_model, tokenizer, quantization_method = "q4_k_m")

No files have been modified since last commit. Skipping to prevent empty commit.


Saved model to https://huggingface.co/frankmorales2020/unsloth-DeepSeek-R1-Distill-Llama-8B-mental_health_counseling
