In [1]:
import os
import torch
import wandb
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM, 
    AutoTokenizer, 
    BitsAndBytesConfig, 
    TrainingArguments, 
    logging
)
from peft import LoraConfig, get_peft_model
#from kaggle_secrets import UserSecretsClient
from huggingface_hub import login
from trl import SFTTrainer, setup_chat_format
import bitsandbytes as bnb

  warn(


In [2]:
# Load your merged model
model_name = "Cshavi/gemma2-2b-chatdoctor-medqa_merged"  # Your HF repo
# Or local path: model_name = "./gemma2-2b-chatdoctor-merged"

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    trust_remote_code=True,
)

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
# Load HealthCareMagic dataset
hc_dataset = load_dataset("wangrongsheng/HealthCareMagic-100k-en", split="train")
hc_dataset = hc_dataset.shuffle(seed=42).select(range(3000))  # 3k samples

def format_healthcare_chat(row):
    # Combine system instruction with user content (no separate system role)
    system_instruction = "You are an experienced medical doctor. Provide helpful, accurate medical guidance based on the patient's description."
    user_content = f"{system_instruction}\n\n{row['instruction']}\n\nPatient: {row['input']}"
    
    row_json = [
        {"role": "user", "content": user_content},
        {"role": "assistant", "content": row["output"]}
    ]
    row["text"] = tokenizer.apply_chat_template(row_json, tokenize=False)
    return row

hc_dataset = hc_dataset.map(format_healthcare_chat, num_proc=4)
hc_dataset = hc_dataset.train_test_split(test_size=0.1)

In [4]:
print(f"Training samples: {len(hc_dataset['train'])}")
print(f"Eval samples: {len(hc_dataset['test'])}")

Training samples: 2700
Eval samples: 300


In [5]:
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
)

print("Adding LoRA adapters...")
model = get_peft_model(model, peft_config)

Adding LoRA adapters...


In [6]:
training_args = TrainingArguments(
    output_dir="Gemma-2-2b-HealthCareMagic-v2",
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=4,
    optim="paged_adamw_32bit",
    num_train_epochs=2,
    eval_strategy="steps",
    eval_steps=150,
    save_steps=300,
    logging_steps=10,
    warmup_steps=50,
    learning_rate=8e-5,  # Lower learning rate for continued training
    fp16=False,
    bf16=True,
    group_by_length=True,
    report_to="wandb",
    load_best_model_at_end=True,
    metric_for_best_model="eval_loss",
    greater_is_better=False,
    save_total_limit=3,
)

In [7]:
wandb.init(
    project="Gemma-2-HealthCareMagic-v2",
    name="gemma2-2b-hcmagic-continued-training",
    config={
        "base_model": model_name,
        "dataset": "HealthCareMagic-100k",
        "samples": 3000,
        "method": "LoRA continued training"
    }
)

[34m[1mwandb[0m: Currently logged in as: [33mchhavin6v[0m ([33moutlier89[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [8]:
trainer = SFTTrainer(
    model=model,
    train_dataset=hc_dataset["train"],
    eval_dataset=hc_dataset["test"],
    args=training_args,
)

Converting train dataset to ChatML:   0%|          | 0/2700 [00:00<?, ? examples/s]

Adding EOS to train dataset:   0%|          | 0/2700 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/2700 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/2700 [00:00<?, ? examples/s]

Converting eval dataset to ChatML:   0%|          | 0/300 [00:00<?, ? examples/s]

Adding EOS to eval dataset:   0%|          | 0/300 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/300 [00:00<?, ? examples/s]

Truncating eval dataset:   0%|          | 0/300 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [9]:
model.config.use_cache = False
print("🚀 Starting training...")
trainer.train()

🚀 Starting training...


It is strongly recommended to train Gemma2 models with the `eager` attention implementation instead of `sdpa`. Use `eager` with `AutoModelForCausalLM.from_pretrained('<path-to-checkpoint>', attn_implementation='eager')`.


Step,Training Loss,Validation Loss
150,2.079,2.191686
300,2.0688,2.1408
450,2.0427,2.111355
600,2.0139,2.093284
750,1.9672,2.0889
900,1.9,2.087786
1050,1.7828,2.081408
1200,1.9088,2.076171
1350,1.8174,2.072997


TrainOutput(global_step=1350, training_loss=2.0613223238344545, metrics={'train_runtime': 1210.0074, 'train_samples_per_second': 4.463, 'train_steps_per_second': 1.116, 'total_flos': 1.9423879755992064e+16, 'train_loss': 2.0613223238344545})

In [10]:
trainer.save_model()
print("✅ Training completed!")

✅ Training completed!


In [11]:
repo_name = "Cshavi/Gemma-2-2b-HealthCareMagic-v2"  # Choose your repo name

print("🚀 Pushing model to Hugging Face Hub...")
trainer.model.push_to_hub(repo_name, use_temp_dir=False)
trainer.tokenizer.push_to_hub(repo_name, use_temp_dir=False)

print(f"✅ Model successfully pushed to: https://huggingface.co/{repo_name}")

🚀 Pushing model to Hugging Face Hub...


adapter_model.safetensors:   0%|          | 0.00/83.1M [00:00<?, ?B/s]

Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.


README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/34.4M [00:00<?, ?B/s]

✅ Model successfully pushed to: https://huggingface.co/Cshavi/Gemma-2-2b-HealthCareMagic-v2
