In [23]:
from google.colab import drive

print("Mounting Google Drive...")
drive.mount('/content/drive')

Mounting Google Drive...
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Install Unsloth (Optimized for T4 GPU)
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps xformers "trl<0.9.0" peft accelerate bitsandbytes

import torch
from unsloth import FastLanguageModel
from trl import SFTTrainer
from transformers import TrainingArguments
from datasets import load_dataset

Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-lak17w8k/unsloth_78a00219f28346ac895d88cc00a79786
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-lak17w8k/unsloth_78a00219f28346ac895d88cc00a79786
  Resolved https://github.com/unslothai/unsloth.git to commit 07a7ff47b1b2b37c088b8e0d7ed7bf8710d9aa22
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting unsloth_zoo>=2026.1.4 (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Downloading unsloth_zoo-2026.1.4-py3-none-any.whl.metadata (32 kB)
Collecting tyro (from unsloth@ git+https://github.com/unslothai/unsloth.git-

In [None]:
# base model : Mistral-7B
max_seq_length = 2048
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/mistral-7b-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

# LoRA Adapter for Efficient Fine-Tuning
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0,  # (quick)
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
)

==((====))==  Unsloth 2026.1.4: Fast Mistral patching. Transformers: 4.57.6.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.5.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/4.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/155 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/438 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Unsloth 2026.1.4 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


# Dataset2 ( Hugging Face.)
**We** use 'medalpaca/medical_meadow_medical_flashcards'

https://huggingface.co/datasets/medalpaca/medical_meadow_medical_flashcards


In [None]:
# load & Format MEDICAL Dataset
from datasets import load_dataset
import pandas as pd

dataset = load_dataset("medalpaca/medical_meadow_medical_flashcards", split = "train")

df = pd.DataFrame(dataset[:7])
df


Unnamed: 0,input,output,instruction
0,What is the relationship between very low Mg2+...,Very low Mg2+ levels correspond to low PTH lev...,Answer this question truthfully
1,What leads to genitourinary syndrome of menopa...,Low estradiol production leads to genitourinar...,Answer this question truthfully
2,What does low REM sleep latency and experienci...,Low REM sleep latency and experiencing halluci...,Answer this question truthfully
3,What are some possible causes of low PTH and h...,"PTH-independent hypercalcemia, which can be ca...",Answer this question truthfully
4,How does the level of anti-müllerian hormone r...,The level of anti-müllerian hormone is directl...,Answer this question truthfully
5,What does low Mobility and bulging of TM suggest?,Low Mobility and bulging of TM is suggestive o...,Answer this question truthfully
6,What are the possible causes of low glucose an...,Low glucose and high C-peptide levels can be c...,Answer this question truthfully


In [None]:
print("✅ Number of examples (rows) in dataset =", len(dataset))
print("\n✅ Dataset Columns =", dataset.column_names)
print("\n✅ Dataset Features:")
print(dataset.features)

✅ Number of examples (rows) in dataset = 33955

✅ Dataset Columns = ['input', 'output', 'instruction']

✅ Dataset Features:
{'input': Value('string'), 'output': Value('string'), 'instruction': Value('string')}


In [None]:
# Define how to talk to the model (Alpaca Format) - prompt template
medical_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
You are an AI Medical Assistant. Answer the following medical question truthfully and clearly.

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token
def formatting_prompts_func(examples):
    # This dataset uses 'input' for the question and 'output' for the answer
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for input_text, output_text in zip(inputs, outputs):
        # We hardcode the instruction to ensure it acts like a doctor
        text = medical_prompt.format(input_text, output_text) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }

dataset = dataset.map(formatting_prompts_func, batched = True)

Map:   0%|          | 0/33955 [00:00<?, ? examples/s]

In [None]:
# Start Training
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=2048,

    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        max_steps=60,
        learning_rate=2e-4,

        fp16=not torch.cuda.is_bf16_supported(),
        logging_steps=5,

        output_dir="outputs",
    ),
)

print("Starting Medical Fine-Tuning...")
trainer_stats = trainer.train()
print("Training Complete!")

Map:   0%|          | 0/33955 [00:00<?, ? examples/s]

Starting Medical Fine-Tuning...


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 33,955 | Num Epochs = 1 | Total steps = 60
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 41,943,040 of 7,283,675,136 (0.58% trained)
wandb: (1) Create a W&B account
wandb: (2) Use an existing W&B account
wandb: (3) Don't visualize my results
wandb: Enter your choice:

 3


wandb: You chose "Don't visualize my results"
wandb: Using W&B in offline mode.
wandb: W&B API key is configured. Use `wandb login --relogin` to force relogin


wandb: Detected [openai] in use.
wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
5,1.0809
10,0.7279
15,0.6556
20,0.6336
25,0.6177
30,0.5773
35,0.5881
40,0.5748
45,0.5772
50,0.5262


Training Complete!


# Save the model and test it

In [24]:
# Save Model
from google.colab import drive
import os

save_path = "/content/drive/MyDrive/M2-Sir/IA/Project/mistral7bModel_medical"

os.makedirs(save_path, exist_ok=True)

# Save the model and tokenizer
print("Saving model to Google Drive...")
model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)
print(f"Model and tokenizer saved at: {save_path}")

Saving model to Google Drive...
Model and tokenizer saved at: /content/drive/MyDrive/M2-Sir/IA/Project/mistral7bModel_medical


# GUI - Interface pour tester le Chatboat using Gradio

In [28]:
!pip install modelscope
import os

import gradio as gr
from unsloth import FastLanguageModel

os.environ["UNSLOTH_USE_MODELSCOPE"] = "1"


save_path = "/content/drive/MyDrive/M2-Sir/IA/Project/mistral7bModel_medical"

print("Loading model... please wait.")
model, tokenizer = FastLanguageModel.from_pretrained(
    save_path,
    max_seq_length=2048,
    load_in_4bit=True,
    device_map="auto"
)

# fast inference
FastLanguageModel.for_inference(model)

# Medical Prompt
medical_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
You are an AI Medical Assistant. Answer the following medical question truthfully and clearly.

### Input:
{}

### Response:
"""

# Chat Function
def chat_response(message, history):
    # Prepare the input for the model
    inputs = tokenizer(
        [medical_prompt.format(message)],
        return_tensors="pt"
    ).to("cuda")

    # Generate the answer
    outputs = model.generate(
        **inputs,
        max_new_tokens=256,
        use_cache=True,
        temperature=0.7
    )

    # Decode the answer and clean up the prompt
    response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
    cleaned_response = response.split("### Response:\n")[-1].strip()
    return cleaned_response

# Launch the GUI
gui = gr.ChatInterface(
    fn=chat_response,
    title="🩺 Medical AI Chatbot",
    description="Ask me a medical question. (Note: Not professional medical advice).",
    examples=["What are the symptoms of flu?", "How do I treat a burn?", "What is hypertension?"]
)

gui.launch(share=True)

Loading model... please wait.
==((====))==  Unsloth 2026.1.4: Fast Mistral patching. Transformers: 4.57.6.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.5.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


  self.chatbot = Chatbot(


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://52b997b90aff4f4f8f.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


