In [1]:
%%capture
!pip install -U bitsandbytes
!pip install -U transformers
!pip install -U peft
!pip install -U accelerate
!pip install -U datasets
!pip install -U trl

In [2]:
import torch
from datasets import load_dataset
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from trl import SFTTrainer, SFTConfig

In [3]:
from datasets import load_dataset

# 1. Load the reliable MedAlpaca dataset
dataset = load_dataset("medalpaca/medical_meadow_medical_flashcards", split="train")

# 2. Define the formatting function
def format_medical_prompts(example):
    # This dataset uses 'input' for the question and 'output' for the answer
    # We format it into the Mistral instruction format: <s>[INST] Q [/INST] A </s>
    text = f"<s>[INST] {example['input']} [/INST] {example['output']} </s>"
    return {"text": text}

# 3. Apply the format
dataset = dataset.map(format_medical_prompts)

# 4. Check a sample (It should look perfect now)
print("Sample Input to Model:")
print(dataset[0]['text'])








The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md: 0.00B [00:00, ?B/s]

medical_meadow_wikidoc_medical_flashcard(…):   0%|          | 0.00/17.7M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/33955 [00:00<?, ? examples/s]

Map:   0%|          | 0/33955 [00:00<?, ? examples/s]

Sample Input to Model:
<s>[INST] What is the relationship between very low Mg2+ levels, PTH levels, and Ca2+ levels? [/INST] Very low Mg2+ levels correspond to low PTH levels which in turn results in low Ca2+ levels. </s>


In [4]:


model_name = "mistralai/Mistral-7B-v0.1"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
)

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/996 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [5]:
# --- 3. Setup LoRA ---
model = prepare_model_for_kbit_training(model)
peft_config = LoraConfig(
    r=16,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj"]
)
model = get_peft_model(model, peft_config)

# --- 4. Training Arguments ---
training_arguments = SFTConfig(
    output_dir="./medalpaca_results",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=25,
    logging_steps=10,
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=60,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",

    # New Config Location for these arguments
    dataset_text_field="text",
    max_length=512,
    packing=False
)

# --- 5. Train ---
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    processing_class=tokenizer, # <--- FIXED: Renamed from 'tokenizer' to 'processing_class'
    args=training_arguments,
)

print("Starting training...")
trainer.train()



Adding EOS to train dataset:   0%|          | 0/33955 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/33955 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/33955 [00:00<?, ? examples/s]

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'pad_token_id': 2}.


Starting training...


  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: (1) Create a W&B account
[34m[1mwandb[0m: (2) Use an existing W&B account
[34m[1mwandb[0m: (3) Don't visualize my results
[34m[1mwandb[0m: Enter your choice:

 1


[34m[1mwandb[0m: You chose 'Create a W&B account'
[34m[1mwandb[0m: Create an account here: https://wandb.ai/authorize?signup=true&ref=models
[34m[1mwandb[0m: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mae22b031[0m ([33mae22b031-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
10,0.9386
20,0.9461
30,1.2781
40,1.3933
50,1.9476
60,0.8949


TrainOutput(global_step=60, training_loss=1.2331038157145182, metrics={'train_runtime': 480.9577, 'train_samples_per_second': 0.499, 'train_steps_per_second': 0.125, 'total_flos': 1469319846887424.0, 'train_loss': 1.2331038157145182, 'epoch': 0.007067970314524679})

In [7]:
# 1. Force the model out of training mode
model.config.use_cache = True
model.eval()  # Switch to Evaluation Mode

# 2. Try Generating Again with stricter settings
prompt = "What is the treatment for acute bronchitis?"
inputs = tokenizer(f"<s>[INST] {prompt} [/INST]", return_tensors="pt").to("cuda")

output = model.generate(
    **inputs,
    max_new_tokens=200,
    do_sample=True,
    temperature=0.1, # LOW temperature makes it more factual/robotic
    top_p=0.9,
    repetition_penalty=1.2 # Stops it from saying "A A A A"
)

# 3. Print result
print(tokenizer.decode(output[0], skip_special_tokens=True))

[INST] What is the treatment for acute bronchitis? [/INST] Acute bronchitis can be treated with antibiotics, cough suppressants, and over-the-counter medications to relieve symptoms such as fever, sore throat, and body aches. In some cases, patients may also benefit from inhaled corticosteroids or other anti-inflammatory drugs that help reduce inflammation in the airways. It's important to note that most cases of acute bronchitis resolve on their own without any specific treatment within two weeks; however, if symptoms persist beyond this timeframe or worsen significantly, it may indicate an underlying condition requiring further evaluation by a healthcare provider. 


In [8]:
from google.colab import drive
drive.mount('/content/drive')

# Save the adapters
new_model_name = "Mistral-7B-Medical-Finetune"
save_path = f"/content/drive/MyDrive/{new_model_name}"

trainer.model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)

print(f"✅ Model saved to Google Drive at: {save_path}")

Mounted at /content/drive
✅ Model saved to Google Drive at: /content/drive/MyDrive/Mistral-7B-Medical-Finetune
