In [1]:
!pip install transformers==4.35 datasets peft accelerate bitsandbytes trl safetensors torch --no-cache



In [2]:
from datasets import load_dataset
from random import randrange

# Load dataset from the hub
dataset = load_dataset("medalpaca/medical_meadow_medqa", split="train")

print(f"Dataset Size: {len(dataset)}")
print(dataset[randrange(len(dataset))])
# Dataset Size: 10178

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Dataset Size: 10178
{'input': "Q:A 72-year-old male is brought from his nursing home to the emergency department for fever, chills, dyspnea, productive cough, and oliguria over the past 72 hours. He was in his normal state of health and slowly developed breathing problems and fever. His past medical history is significant for hepatitis C, hypertension, and hypercholesterolemia. His medications include bisoprolol, hydrochlorothiazide, and atorvastatin. Upon arrival to the ED, his blood pressure is 80/48 mm Hg, pulse is 120/min, a respiratory rate of 28/min, and body temperature of 39.0°C (102.2°F). Physical examination reveals decreased breathing sounds in the base of the left lung, along with increased vocal resonance, and pan-inspiratory crackles. The abdomen is mildly distended with a positive fluid wave. The patient’s level of consciousness ranges from disoriented to drowsiness. He is transferred immediately to the ICU where vasoactive support is initiated. Laboratory tests show leu

In [3]:
def format_prompt(sample):
    return f"""
Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{sample["instruction"]}

### Input:
{sample["input"]}

### Response:
{sample["output"]}
"""

In [4]:
from random import randrange

print(format_prompt(dataset[randrange(len(dataset))]))


Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Please answer with one of the option in the bracket

### Input:
Q:A 20-year-old woman is brought to the emergency department with a puncture wound on the right side of her chest. She was walking to her apartment when she was assaulted. As she resisted to give up her purse, the assailant stabbed her in the chest with a knife and ran away. She is in severe respiratory distress. Her heart rate is 140/min, respiratory rate is 28/min, and blood pressure is 145/65 mm Hg. The pulse oximetry shows an oxygen saturation of 84%. An oval puncture wound is seen on the right lateral aspect of her chest and she is stuporous. The heart sounds are normal and no jugular venous distension is seen. Distant breath sounds are present on the right. Which of the following changes during inspiration explains her breathing difficulty?? 


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

# Hugging Face model name
model_name = "llSourcell/medllama2_7b"
use_flash_attention = False

# BitsAndBytesConfig int-4 config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    use_cache=False,
    use_flash_attention_2=use_flash_attention,
    device_map="auto",
    torch_dtype=torch.float16
)

model.config.pretraining_tp = 1

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model

# LoRA config based on QLoRA paper
peft_config = LoraConfig(
    lora_alpha=32,
    lora_dropout=0.1,
    r=16,
    bias="none",
    task_type="CAUSAL_LM",
)
# Prepare model for training
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)

In [None]:
from transformers import TrainingArguments

args = TrainingArguments(
    output_dir="finetuned-llama-7b-chat-hf-med",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    gradient_checkpointing=True,
    optim="paged_adamw_32bit",
    logging_steps=10,
    save_strategy="epoch",
    learning_rate=2e-4,
    fp16=True,
    max_grad_norm=0.3,
    warmup_ratio=0.03,
    lr_scheduler_type="constant",
    disable_tqdm=False
)

In [None]:
from trl import SFTTrainer

max_seq_length = 1024 # max sequence length for model and packing of the dataset

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    packing=True,
    formatting_func=format_prompt,
    args=args,
)


In [None]:
# Train
trainer.train()

# Save model
trainer.save_model()


In [None]:
import torch
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer

# Load finetuned LLM model and tokenizer
model = AutoPeftModelForCausalLM.from_pretrained(
    args.output_dir,
    low_cpu_mem_usage=True,
    torch_dtype=torch.float16,
    load_in_4bit=True,
)
tokenizer = AutoTokenizer.from_pretrained(args.output_dir)

In [None]:
from datasets import load_dataset
from random import randrange

# Load dataset from the hub
dataset = load_dataset("medalpaca/medical_meadow_medqa", split="train")
sample = dataset[randrange(len(dataset))]

prompt = f"""
Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{sample["instruction"]}

### Input:
{sample["input"]}

### Response:
"""

input_ids = tokenizer(prompt, return_tensors="pt", truncation=True).input_ids.cuda()
outputs = model.generate(input_ids=input_ids, max_new_tokens=512, do_sample=True, top_p=0.6,temperature=0.9)

print(f"Instruction:\n{sample['instruction']}\n")
print(f"Input:\n{sample['input']}\n")
print(f"Generated Response:\n{tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0][len(prompt):]}\n")
print(f"Ground Truth:\n{sample['output']}")

In [None]:
Instruction:
Please answer with one of the option in the bracket

Input:
Q:Collagen is a very critical structural protein in many of our connective tissues. Defects in collagen produce diseases such as Ehlers-Danlos syndrome, where there is a defective lysyl hydroxylase gene, or osteogenesis imperfecta, where there is a defect in the production of type I collagen. Which of the following represents the basic repeating tripeptide of collagen??
{'A': 'Ser-X-Y', 'B': 'Met-X-Y', 'C': 'Gly-X-Y', 'D': 'Glu-X-Y', 'E': 'Asp-X-Y'},

Generated Response:
C: Gly-X-Y

Ground truth:
C: Gly-X-Y