In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

### Installing Packages 

In [2]:
#Installing libraries
#%%capture
!pip install unsloth
!pip install --force-reinstall --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

Collecting unsloth
  Downloading unsloth-2025.1.8-py3-none-any.whl.metadata (53 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.9/53.9 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting unsloth_zoo>=2025.1.4 (from unsloth)
  Downloading unsloth_zoo-2025.1.5-py3-none-any.whl.metadata (16 kB)
Collecting xformers>=0.0.27.post2 (from unsloth)
  Downloading xformers-0.0.29.post2-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting bitsandbytes (from unsloth)
  Downloading bitsandbytes-0.45.1-py3-none-manylinux_2_24_x86_64.whl.metadata (5.8 kB)
Collecting triton>=3.0.0 (from unsloth)
  Downloading triton-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.4 kB)
Collecting tyro (from unsloth)
  Downloading tyro-0.9.13-py3-none-any.whl.metadata (9.4 kB)
Collecting transformers!=4.47.0,>=4.46.1 (from unsloth)
  Downloading transformers-4.48.2-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

### Importing libraries

In [3]:
#modules for fine-tuning
from unsloth import FastLanguageModel
import torch
from trl import SFTTrainer
from unsloth import is_bfloat16_supported

#Hugging Face libraries
from huggingface_hub import login
from transformers import TrainingArguments
from datasets import load_dataset

#importing weights and biases
import wandb

#importing kaggle secrets
from kaggle_secrets import UserSecretsClient


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


### Logging in to Hugging Face and weights and biases

In [4]:
#Ininitalizing Hugging Face and WnB tokens
user_secrets = UserSecretsClient()
hf_token = user_secrets.get_secret("HF_Token")
wnb_token = user_secrets.get_secret("wnb")

#loggin in to hugginface and wandb
login(hf_token)
wandb.login(key = wnb_token)
wandb.init(
    project = 'Fine-tune-Deepseek-R1-Distill-Llama-8B',
    job_type = 'training',
    anonymous = 'allow'
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33myvvonjemmymajala[0m ([33myvvonjemmymajala-i-kuku[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


### Loading The Model And Tokenizer

In [5]:
#setting parameters
max_seq_length = 2048
dtype = None
load_in_4bit = True

In [6]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/DeepSeek-R1-Distill-Llama-8B",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    token = hf_token
)

==((====))==  Unsloth 2025.1.8: Fast Llama patching. Transformers: 4.48.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu121. CUDA: 7.5. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.96G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/236 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/52.9k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

### Testing DeepSeek Before Fine-Tuning

In [7]:
#setting the prompt style
prompt_style = """
Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. 
Please answer the following medical question. 

### Question:
{}

### Response:
<think>{}
"""

In [8]:
#creating a test medical question
question = "A 61-year-old woman with a long history of involuntary urine loss during activities like coughing or sneezing but no leakage at night undergoes a gynecological exam and Q-tip test. Based on these findings, what would cystometry most likely reveal about her residual volume and detrusor contractions?"

#enable optimized inference model for unsloth models
FastLanguageModel.for_inference(model)

#format the question using the structured promot & tokenize it
inputs = tokenizer([prompt_style.format(question, "")], return_tensors="pt").to("cuda")

#generate a response using the model
output = model.generate(input_ids = inputs.input_ids,
                       attention_mask = inputs.attention_mask,
                       max_new_tokens = 1200,
                       use_cache = True)

#Decoding the generated output tokens into human readable text
response= tokenizer.batch_decode(output)
print(response[0].split("### Response")[1])

:
<think>
Okay, so I'm trying to figure out what cystometry would show for this 61-year-old woman. She's been dealing with involuntary urine loss when she coughs or sneezes but doesn't leak at night. She's had a gynecological exam and a Q-tip test. I need to determine what the cystometry results would likely reveal about her residual volume and detrusor contractions.

First, let me break down the information. Involuntary urine loss during activities like coughing suggests a possible issue with the lower urinary tract, maybe the bladder. The fact that she doesn't leak at night points away from something like nocturia, which is more common in conditions like overactive bladder or perhaps neurogenic bladder. So, it's more likely a daytime issue, perhaps stress urinary incontinence.

She underwent a gynecological exam and a Q-tip test. I'm not entirely sure about the specifics of the Q-tip test, but from what I recall, it's a diagnostic tool used to assess urethral function. It involves in

### Fine-Tuning Step By Step

In [9]:
#updating the training prompt_style. Changing the position of the </think> tag
train_prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. 
Please answer the following medical question. 

### Question:
{}

### Response:
<think>
{}
</think>
{}"""

In [10]:
#dowloading the dataset
dataset = load_dataset("FreedomIntelligence/medical-o1-reasoning-SFT",
                       "en", 
                       split = "train[0:650]",
                       trust_remote_code = True)

dataset

README.md:   0%|          | 0.00/1.25k [00:00<?, ?B/s]

medical_o1_sft.json:   0%|          | 0.00/74.1M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25371 [00:00<?, ? examples/s]

Dataset({
    features: ['Question', 'Complex_CoT', 'Response'],
    num_rows: 650
})

In [11]:
#formating the dataset to fit the prompt-style
EOS_TOKEN = tokenizer.eos_token
EOS_TOKEN

'<｜end▁of▁sentence｜>'

In [12]:
#Formatting the prompt function
def formatting_prompts_func(examples):
    inputs = examples["Question"]
    cots = examples["Complex_CoT"]
    outputs = examples["Response"]
    texts = []
    for input, cot, output in zip(inputs, cots, outputs):
        text = train_prompt_style.format(input, cot, output) + EOS_TOKEN
        texts.append(text)
    return {
        "text": texts,
    }

In [13]:
#update dataset formatting
#dataset_finetune = dataset.map(formatting_prompts_func, batched = True)
dataset = dataset.map(formatting_prompts_func, batched = True,)
dataset["text"][0]

Map:   0%|          | 0/650 [00:00<?, ? examples/s]

"Below is an instruction that describes a task, paired with an input that provides further context. \nWrite a response that appropriately completes the request. \nBefore answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.\n\n### Instruction:\nYou are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. \nPlease answer the following medical question. \n\n### Question:\nA 61-year-old woman with a long history of involuntary urine loss during activities like coughing or sneezing but no leakage at night undergoes a gynecological exam and Q-tip test. Based on these findings, what would cystometry most likely reveal about her residual volume and detrusor contractions?\n\n### Response:\n<think>\nOkay, let's think about this step by step. There's a 61-year-old woman here who's been dealing with involuntary urine leakages whenever she's doing something that ups her ab

### Setting up the model using LoRA

In [14]:
#Initializing the lora model
model_lora = FastLanguageModel.get_peft_model(
    model, 
    r = 16,
    target_modules= [
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj"
    ],
    lora_alpha = 16,
    lora_dropout = 0,
    bias= 'none',
    use_gradient_checkpointing = "unsloth",
    random_state = 3402,
    use_rslora = False,
    loftq_config = None
)

Unsloth 2025.1.8 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [15]:
#initializing the finetuning trainer
trainer = SFTTrainer(
    model=model_lora,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    #definiing the training arguments
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        # Use num_train_epochs = 1, warmup_ratio for full training runs!
        warmup_steps=5,
        max_steps=60,
        learning_rate=2e-4,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=10,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
    ),
)

Map (num_proc=2):   0%|          | 0/650 [00:00<?, ? examples/s]

### Training the model

In [16]:
#training the model
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 650 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 60
 "-____-"     Number of trainable parameters = 41,943,040


Step,Training Loss
10,1.8463
20,1.4591
30,1.4165
40,1.3939
50,1.32
60,1.3286


In [17]:
# Save the fine-tuned model
wandb.finish()

0,1
train/epoch,▁▂▄▅▇██
train/global_step,▁▂▄▅▇██
train/grad_norm,█▅▃▁▂▁
train/learning_rate,█▇▅▄▂▁
train/loss,█▃▂▂▁▁

0,1
total_flos,1.7911162265714688e+16
train/epoch,0.73846
train/global_step,60.0
train/grad_norm,0.23088
train/learning_rate,0.0
train/loss,1.3286
train_loss,1.46075
train_runtime,1179.8737
train_samples_per_second,0.407
train_steps_per_second,0.051
