In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [3]:
%%capture
!pip install unsloth
!pip install git+https://github.com/unslothai/unsloth.git

In [4]:
import unsloth
from unsloth import FastLanguageModel
import torch
from trl import SFTTrainer
from unsloth import is_bfloat16_supported
from huggingface_hub import login
from transformers import TrainingArguments
from datasets import load_dataset
import wandb
from kaggle_secrets import UserSecretsClient

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [5]:
user_secrets = UserSecretsClient()
hugging_face_token = user_secrets.get_secret("HF_TOKEN")
wnb_token = user_secrets.get_secret("wandb")

login(hugging_face_token)

wandb.login(key=wnb_token)
run = wandb.init(
    project='Fine-tune-DeepSeek-R1-Distill-Llama-8B', 
    job_type="training", 
    anonymous="allow"
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mbunnyyss09[0m ([33mmanvanth_reddy[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [6]:
max_seq_length = 2048 
dtype = None 
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/DeepSeek-R1-Distill-Llama-8B",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    token = hugging_face_token, 
)

==((====))==  Unsloth 2025.3.9: Fast Llama patching. Transformers: 4.49.0.
   \\   /|    Tesla T4. Num GPUs = 2. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.96G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/236 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/53.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

In [7]:
prompt_style = """Below is an instruction that describes a task, paired with an input that provides further context. 
Write a response that appropriately completes the request. 
Before answering, think carefully about the question and create a step-by-step chain of thoughts to ensure a logical and accurate response.

### Instruction:
You are a medical expert with advanced knowledge in clinical reasoning, diagnostics, and treatment planning. 
Please answer the following medical question. 

### Question:
{}

### Response:
<think>{}"""

In [8]:
question = "A 61-year-old woman with a long history of involuntary urine loss during activities like coughing or sneezing but no leakage at night undergoes a gynecological exam and Q-tip test. Based on these findings, what would cystometry most likely reveal about her residual volume and detrusor contractions?"


FastLanguageModel.for_inference(model) 
inputs = tokenizer([prompt_style.format(question, "")], return_tensors="pt").to("cuda")

outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=1200,
    use_cache=True,
)
response = tokenizer.batch_decode(outputs)
print(response[0].split("### Response:")[1])


<think>
Okay, so I've got this medical question to tackle. Let me try to break it down step by step. The patient is a 61-year-old woman with a history of involuntary urine loss during activities like coughing or sneezing, but she doesn't leak at night. She's had a gynecological exam and a Q-tip test. The question is asking what cystometry would most likely reveal about her residual volume and detrusor contractions.

First, I need to understand the key points here. The patient is experiencing urinary incontinence, specifically during activities that involve coughing or sneezing. These are typically activities that can cause sudden increases in intra-abdominal pressure, leading to urinary leakage. However, she doesn't leak at night, which is interesting because that suggests that her nighttime bladder capacity is normal or that she doesn't have significant nighttime bladder pressure.

She's had a gynecological exam. I'm not entirely sure what the Q-tip test entails, but from what I reca

In [9]:
import random
from datasets import load_dataset 

# Function for dynamic prompt styling
def dynamic_prompt_style(question, cot, response):
    styles = [
        "You are a top-tier medical AI. Answer the question concisely.",
        "Act as an experienced doctor. Explain step-by-step reasoning.",
        "Provide a well-researched medical response with citations.",
        "Offer a brief summary and then an in-depth analysis."
    ]
    prompt = f"""
    {random.choice(styles)}
    
    ### Question:
    {question}

    ### Thought Process:
    {cot}

    ### Response:
    {response}
    """
    return prompt

In [10]:
EOS_TOKEN = tokenizer.eos_token  # Ensure EOS token is added

def formatting_prompts_func(examples):
    inputs = examples["Question"]
    cots = examples["Complex_CoT"]
    outputs = examples["Response"]
    texts = []
    for input, cot, output in zip(inputs, cots, outputs):
        text = dynamic_prompt_style(input, cot, output) + EOS_TOKEN
        texts.append(text)
    return {"text": texts}

In [11]:
# Load the full training dataset
dataset = load_dataset(
    "FreedomIntelligence/medical-o1-reasoning-SFT", 
    "en",  
    split="train", 
    trust_remote_code=True
)

# Apply formatting to dataset
dataset = dataset.map(formatting_prompts_func, batched=True)

print(dataset["text"][0])

README.md:   0%|          | 0.00/1.65k [00:00<?, ?B/s]

medical_o1_sft.json:   0%|          | 0.00/74.1M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25371 [00:00<?, ? examples/s]

Map:   0%|          | 0/25371 [00:00<?, ? examples/s]


    Act as an experienced doctor. Explain step-by-step reasoning.
    
    ### Question:
    A 61-year-old woman with a long history of involuntary urine loss during activities like coughing or sneezing but no leakage at night undergoes a gynecological exam and Q-tip test. Based on these findings, what would cystometry most likely reveal about her residual volume and detrusor contractions?

    ### Thought Process:
    Okay, let's think about this step by step. There's a 61-year-old woman here who's been dealing with involuntary urine leakages whenever she's doing something that ups her abdominal pressure like coughing or sneezing. This sounds a lot like stress urinary incontinence to me. Now, it's interesting that she doesn't have any issues at night; she isn't experiencing leakage while sleeping. This likely means her bladder's ability to hold urine is fine when she isn't under physical stress. Hmm, that's a clue that we're dealing with something related to pressure rather than a bl

In [12]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,  
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj", 
        "up_proj",
        "down_proj",
    ],
    lora_alpha=16,
    lora_dropout=0,  
    bias="none",  
    use_gradient_checkpointing="unsloth",  # True or "unsloth" for very long context
    random_state=3407,
    use_rslora=False,  
    loftq_config=None,
)

Unsloth 2025.3.9 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [13]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    args=TrainingArguments(
        per_device_train_batch_size=2,   
        gradient_accumulation_steps=4,
        # Use num_train_epochs = 1, warmup_ratio for full training runs!
        warmup_steps=5,
        max_steps=100,
        learning_rate=2e-4,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=10,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",   
    ),
)  

Tokenizing to ["text"] (num_proc=2):   0%|          | 0/25371 [00:00<?, ? examples/s]

In [14]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 25,371 | Num Epochs = 1 | Total steps = 100
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 4 x 1) = 16
 "-____-"     Trainable parameters = 41,943,040/4,670,623,744 (0.90% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
10,1.8972
20,1.535
30,1.497
40,1.4239
50,1.4194
60,1.4006
70,1.4067
80,1.3827
90,1.4277
100,1.3821


In [18]:
question = "A 61-year-old woman with a long history of involuntary urine loss during activities like coughing or sneezing but no leakage at night undergoes a gynecological exam and Q-tip test. Based on these findings, what would cystometry most likely reveal about her residual volume and detrusor contractions?"

FastLanguageModel.for_inference(model)  # Unsloth has 2x faster inference!
inputs = tokenizer([prompt_style.format(question, "")], return_tensors="pt").to("cuda")

outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=1200,
    use_cache=True,
)
response = tokenizer.batch_decode(outputs)
print(response[0].split("### Response:")[1])



<think>
Okay, let's think this through. We have a 61-year-old woman who's been dealing with involuntary urine loss whenever she coughs or sneezes. That's pretty common, and it's usually related to the bladder not being able to hold its volume. So, this kind of involuntary leakage is often seen in people with an overactive bladder or maybe some nerve issues. 

Now, she underwent a gynecological exam and a Q-tip test. From what I understand, the Q-tip test is a way to see if there's any obstruction in the bladder outlet. If it's blocked, it can cause the bladder to fill up more quickly, leading to incontinence. But if the test shows it's not blocked, then we're more likely looking at something else, like the bladder itself being too sensitive or having some nerve issues.

Given her symptoms and the results of the Q-tip test, it seems like the obstruction isn't the main issue here. So, it's more likely a problem with how the bladder behaves. This could mean that her bladder is too sensit

In [23]:
!pip install -U langchain-community



In [22]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.10.0-cp310-cp310-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m59.2 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.10.0


In [24]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

# Load an embedding model (Sentence Transformers for retrieval)
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Sample medical knowledge base (convert this into vector embeddings)
documents = [
    "Urinary incontinence can be stress-related, urge-related, or mixed.",
    "Cystometry measures detrusor pressure, bladder capacity, and compliance.",
    "ST-elevation in leads II, III, and aVF suggests an inferior myocardial infarction.",
]

# Store documents in FAISS for fast retrieval
db = FAISS.from_texts(documents, embedding_model)

In [25]:
query = "What does cystometry reveal in stress urinary incontinence?"
retrieved_docs = db.similarity_search(query, k=2)  # Retrieve top 2 relevant docs

context = " ".join([doc.page_content for doc in retrieved_docs])  # Combine texts
print(context)  # Debug: Check retrieved docs

Cystometry measures detrusor pressure, bladder capacity, and compliance. Urinary incontinence can be stress-related, urge-related, or mixed.


In [26]:
question = "What does cystometry reveal in stress urinary incontinence?"
retrieved_context = " ".join([doc.page_content for doc in retrieved_docs])

# Modify the prompt style to include retrieved information
formatted_prompt = f"""
You are a medical expert. Answer the question based on the retrieved documents below.

### Retrieved Context:
{retrieved_context}

### Question:
{question}

### Response:
"""

# Tokenize and generate response
inputs = tokenizer(formatted_prompt, return_tensors="pt").to("cuda")

outputs = model.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=500,
)

response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
print(response.split("### Response:")[1].strip())  # Extract final output

Cystometry, a diagnostic procedure used to assess bladder function, reveals several important findings in stress urinary incontinence. This procedure helps evaluate the pressure changes in the bladder, how well the bladder can store urine, and the bladder's ability to empty. In the context of stress urinary incontinence, cystometry can show an elevated detrusor pressure, which indicates an increased pressure in the bladder during activities that may trigger an urge to urinate, such as coughing, sneezing, or laughing. Additionally, it may reveal a reduced bladder capacity, meaning the bladder cannot hold as much urine as it should. The compliance of the bladder, which refers to how well it stretches and returns to its original shape after being stretched, is also a key aspect that can be assessed through cystometry. In stress urinary incontinence, the bladder's compliance is often normal, as it is not typically stretched to its full capacity. Therefore, cystometry provides a comprehensi

In [27]:
new_model_local = "DeepSeek-R1-Medical-COT"
model.save_pretrained(new_model_local) 
tokenizer.save_pretrained(new_model_local)

model.save_pretrained_merged(new_model_local, tokenizer, save_method = "merged_16bit",)

Unsloth: You have 2 CPUs. Using `safe_serialization` is 10x slower.
We shall switch to Pytorch saving, which might take 3 minutes and not 30 minutes.
To force `safe_serialization`, set it to `None` instead.
Unsloth: Kaggle/Colab has limited disk space. We need to delete the downloaded
model which will save 4-16GB of disk space, allowing you to save on Kaggle/Colab.
Unsloth: Will remove a cached repo with size 6.0G


Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 15.63 out of 31.35 RAM for saving.
Unsloth: Saving model... This might take 5 minutes ...


 34%|███▍      | 11/32 [00:00<00:01, 14.30it/s]
We will save to Disk and not RAM now.
100%|██████████| 32/32 [00:25<00:00,  1.23it/s]


Unsloth: Saving tokenizer... Done.
Unsloth: Saving DeepSeek-R1-Medical-COT/pytorch_model-00001-of-00004.bin...
Unsloth: Saving DeepSeek-R1-Medical-COT/pytorch_model-00002-of-00004.bin...
Unsloth: Saving DeepSeek-R1-Medical-COT/pytorch_model-00003-of-00004.bin...
Unsloth: Saving DeepSeek-R1-Medical-COT/pytorch_model-00004-of-00004.bin...
Done.
