## Install Packages

In [1]:
!pip install bitsandbytes
!pip install -U bitsandbytes
!pip install -q unsloth transformers datasets accelerate bitsandbytes
!pip install --upgrade unsloth
!pip install --upgrade transformers
!pip install datasets



## Base Model DownLoad

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
from huggingface_hub import notebook_login
notebook_login()
# HuggingFace Token

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [4]:
'''
from huggingface_hub import snapshot_download

# Route setting
model_dir = "/content/drive/MyDrive/285_project/DeepSeek-R1-Distill-Llama-8B"

# Download and save models
snapshot_download(repo_id="unsloth/DeepSeek-R1-Distill-Llama-8B", local_dir=model_dir, local_dir_use_symlinks=False)

print(f"Model saved to: {model_dir}")
'''

'\nfrom huggingface_hub import snapshot_download\n\n# Route setting\nmodel_dir = "/content/drive/MyDrive/285_project/DeepSeek-R1-Distill-Llama-8B"\n\n# Download and save models\nsnapshot_download(repo_id="unsloth/DeepSeek-R1-Distill-Llama-8B", local_dir=model_dir, local_dir_use_symlinks=False)\n\nprint(f"Model saved to: {model_dir}")\n'

## Unsloth Framework

In [22]:
import torch
from unsloth import FastLanguageModel
from transformers import TrainingArguments

MODEL_NAME = "/content/drive/MyDrive/285_project/DeepSeek-R1-Distill-Llama-8B"
quantization_dtype = torch.float16  # FP16 setting

# Load model
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = MODEL_NAME,
    max_seq_length = 4096,
    dtype=quantization_dtype,
    load_in_4bit= False # 4-bit quantization
)

==((====))==  Unsloth 2025.2.15: Fast Llama patching. Transformers: 4.49.0.
   \\   /|    GPU: NVIDIA A100-SXM4-40GB. Max memory: 39.557 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.0. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [25]:
model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096, padding_idx=128004)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
    

In [26]:
tokenizer

LlamaTokenizerFast(name_or_path='/content/drive/MyDrive/285_project/DeepSeek-R1-Distill-Llama-8B', vocab_size=128000, model_max_length=131072, is_fast=True, padding_side='left', truncation_side='right', special_tokens={'bos_token': '<｜begin▁of▁sentence｜>', 'eos_token': '<｜end▁of▁sentence｜>', 'pad_token': '<|finetune_right_pad_id|>'}, clean_up_tokenization_spaces=False, added_tokens_decoder={
	128000: AddedToken("<｜begin▁of▁sentence｜>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	128001: AddedToken("<｜end▁of▁sentence｜>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	128002: AddedToken("<|reserved_special_token_0|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	128003: AddedToken("<|reserved_special_token_1|>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	128004: AddedToken("<|finetune_right_pad_id|>", rstrip=False, lstrip=False, single_word=Fals

In [27]:
print(model.dtype)

torch.float16


## SFT using QLora

In [8]:
import os
from datasets import load_dataset

In [9]:
train_prompt_style = """Below is an instruction that describes a task, paired with relevant background information.
Your task is to generate a well-reasoned and structured response.

Before providing an answer, carefully analyze the question and construct a logical, step-by-step reasoning process.
Make sure to justify your conclusions based on medical knowledge and clinical evidence.

### Instruction:
You are an advanced medical AI assistant with expertise in clinical reasoning, diagnostics, and treatment planning.
Your goal is to analyze the given medical question and provide a well-structured, evidence-based response. Treat every
question as happenning to your childrens or parents and you really want to try your best to help them.

### Question:
{}

### Response:
<think>
{}
</think>
{}"""

In [10]:
EOS_TOKEN = tokenizer.eos_token
tokenizer.eos_token

'<｜end▁of▁sentence｜>'

In [11]:
# Data formating
def formatting_prompts_func(examples):
    inputs = examples["Question"]
    cots = examples["Complex_CoT"]
    outputs = examples["Response"]

    texts = []
    for input_text, cot, output in zip(inputs, cots, outputs):
        cot = cot if cot else "No detailed reasoning provided."
        output = output if output else "No answer available."

        formatted_text = train_prompt_style.format(input_text, cot, output) + tokenizer.eos_token
        texts.append(formatted_text)

    return {"text": texts}

In [12]:
dataset = load_dataset("FreedomIntelligence/medical-o1-reasoning-SFT", "en", split = "train", trust_remote_code=True)
dataset = dataset.map(formatting_prompts_func, batched=True)

README.md:   0%|          | 0.00/1.65k [00:00<?, ?B/s]

medical_o1_sft.json:   0%|          | 0.00/74.1M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25371 [00:00<?, ? examples/s]

Map:   0%|          | 0/25371 [00:00<?, ? examples/s]

In [13]:
dataset['text'][0]

"Below is an instruction that describes a task, paired with relevant background information.\nYour task is to generate a well-reasoned and structured response.\n\nBefore providing an answer, carefully analyze the question and construct a logical, step-by-step reasoning process.\nMake sure to justify your conclusions based on medical knowledge and clinical evidence.\n\n### Instruction:\nYou are an advanced medical AI assistant with expertise in clinical reasoning, diagnostics, and treatment planning.\nYour goal is to analyze the given medical question and provide a well-structured, evidence-based response. Treat every\nquestion as happenning to your childrens or parents and you really want to try your best to help them.\n\n### Question:\nA 61-year-old woman with a long history of involuntary urine loss during activities like coughing or sneezing but no leakage at night undergoes a gynecological exam and Q-tip test. Based on these findings, what would cystometry most likely reveal about 

In [28]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,  # LoRA rank，控制适配层参数规模
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj",  # 关键投影层（自注意力层）
        "gate_proj", "up_proj", "down_proj",  # MLP 层
    ],
    lora_alpha=16,  # LoRA scaling factor
    lora_dropout=0,  # 设为 0 表示不进行 dropout，保持最大信息保留
    bias="none",  # 不训练偏置项
    use_gradient_checkpointing="unsloth",  # 适用于大模型，减少显存消耗
    random_state=3407,  # 设定随机种子，保证实验可复现
    use_rslora=False,  # 不使用 RsLoRA
    loftq_config=None  # 不进行 LoftQ 量化
)

In [15]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

In [30]:
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,  # 训练数据
    dataset_text_field="text",  # 训练数据中的文本字段
    max_seq_length=4096,  # 设定最大序列长度
    dataset_num_proc=2,  # 处理数据时使用的 CPU 线程数

    args=TrainingArguments(
        per_device_train_batch_size=6,  # 每个 GPU 上的 batch size
        gradient_accumulation_steps=4,  # 梯度累积步数，等效于更大的 batch size
        num_train_epochs=3,  # 训练轮数
        warmup_steps=5,  # 预热步数
        # max_steps=60,  # 训练步数
        learning_rate=2e-4,  # 学习率
        fp16=True,  # Explicitly set fp16 to True
        bf16=False,  # Explicitly set bf16 to False
        logging_steps=10,  # 每 10 步记录日志
        optim="adamw_8bit",  # 使用 8-bit AdamW 优化器
        weight_decay=0.01,  # 权重衰减
        lr_scheduler_type="linear",  # 线性学习率衰减
        seed=3407,  # 设定随机种子，保证实验可复现
        output_dir="/content/drive/MyDrive/285_project/outputs",  # 训练结果输出目录
    ),
)


In [31]:
import wandb
# 用自己的key
wandb.login(key = "3ac3b3b7b290b70c837757c7d76565341a57e614")



True

In [32]:
# Model Training
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 25,371 | Num Epochs = 3
O^O/ \_/ \    Batch size per device = 6 | Gradient Accumulation steps = 4
\        /    Total batch size = 24 | Total steps = 3,171
 "-____-"     Number of trainable parameters = 41,943,040


Step,Training Loss
10,1.8869
20,1.3379
30,1.2513
40,1.2053
50,1.2161
60,1.186
70,1.1868
80,1.203
90,1.1831
100,1.1634




In [33]:
trainer_stats

TrainOutput(global_step=3171, training_loss=1.0492317159068356, metrics={'train_runtime': 23023.1122, 'train_samples_per_second': 3.306, 'train_steps_per_second': 0.138, 'total_flos': 3.5123863283119964e+18, 'train_loss': 1.0492317159068356})