In [None]:
!pip install -q transformers datasets trl torch bitsandbytes peft tiktoken wandb

In [None]:
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer, SFTConfig
from trl import setup_chat_format
from huggingface_hub import login

import tiktoken

In [None]:
login()

In [None]:
dataset_name = "lavita/AlpaCare-MedInstruct-52k"

In [None]:
dataset = load_dataset(dataset_name, split="train")

In [None]:
# dataset = dataset.select(range(15000))

In [None]:
split = dataset.train_test_split(test_size=0.1, seed=42)

train_dataset = split["train"]
eval_dataset = split["test"]

In [None]:
dataset[0]

In [None]:
def formatting_func(example):
  if example["input"] == "<noinput>":
    text = f"### Instruction: {example['instruction']}\n ### Response: {example['output']}"
  else:
    text = f"### Instruction: {example['instruction']}\n ### Input: {example['input']}\n ### Response: {example['output']}"
  return text

In [None]:
# token_counts.describe()

In [None]:
model_name = "unsloth/Llama-3.2-1B-Instruct"

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    quantization_config=bnb_config
)

In [None]:
peft_config = LoraConfig(
        lora_alpha=8,
        lora_dropout=0.05,
        r=16,
        bias="none",
        target_modules="all-linear",
        task_type="CAUSAL_LM",
)

In [None]:
import torch
torch.cuda.empty_cache()

In [None]:
args = SFTConfig(
    output_dir="llama-med-1B",
    num_train_epochs=3,
    max_length=1024,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=16,
    optim="paged_adamw_32bit",
    logging_steps=25,
    learning_rate=2e-5,
    save_strategy="steps",
    fp16=True,
    max_grad_norm=0.3,                      # max gradient norm based on QLoRA paper
    warmup_ratio=0.03,                      # warmup ratio based on QLoRA paper
    report_to="wandb",
    packing=True,
    eval_packing=False
)

In [None]:
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    peft_config=peft_config,
    formatting_func=formatting_func,
    processing_class=tokenizer,
    args=args,
)

In [None]:
num_trainable = trainer.get_num_trainable_parameters()
total = sum(p.numel() for p in model.parameters())
percentage = num_trainable / total * 100
print(f"% of trainable params: {percentage:.2f}%")

In [None]:
trainer.train()

In [None]:
from peft import AutoPeftModelForCausalLM

checkpoint = "llama-med-1B/checkpoint-537/"
# Load PEFT model on CPU
model = AutoPeftModelForCausalLM.from_pretrained(
    checkpoint,
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True,
)

# Merge LoRA and base model and save
merged_model = model.merge_and_unload()
merged_model.save_pretrained("med-llama-1B-it", safe_serialization=True, max_shard_size="2GB")

In [None]:
tokenizer.save_pretrained("med-llama-1B-it")

In [None]:
model_dir = "ajaz10/med-llama-1B-it"

merged_model.push_to_hub(model_dir)
tokenizer.push_to_hub(model_dir)

## Inference

In [None]:
from transformers import AutoTokenizer, pipeline, AutoModelForCausalLM

model_id = "ajaz10/med-llama-1B-it"

model = AutoModelForCausalLM.from_pretrained(
  model_id,
  device_map="auto",
  torch_dtype=torch.float16
)
tokenizer = AutoTokenizer.from_pretrained(model_id)

# load into pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

In [None]:
prompt = (
    """
### Instruction:Act as a doctor, please answer the medical question based on the patient's description
### Input: I woke up this morning feeling the whole room is spinning when i was sitting down. I went to the bathroom walking unsteadily, as i tried to focus i feel nauseous. I try to vomit but it wont come out.. After taking panadol and sleep for few hours, i still feel the same.. By the way, if i lay down or sit down, my head do not spin, only when i want to move around then i feel the whole world is spinning.. And it is normal stomach discomfort at the same time? Earlier after i relieved myself, the spinning lessen so i am not sure whether its connected or coincidences.. Thank you doc!
### Response:\n"""
)

outputs = pipe(
    prompt,
    max_new_tokens=1024,
    temperature=0.5,
    top_p=0.9,
)

In [None]:
print(outputs[0]['generated_text'])