In [None]:
!pip install -q bitsandbytes transformers datasets peft accelerate trl

In [None]:
from huggingface_hub import notebook_login
notebook_login()

In [None]:
from datasets import load_dataset
dataset = load_dataset("meddialog", "en")

print(dataset["train"][0])

def format_healthcare_qa(example):
    return {
        "text": f"### Patient:\n{example['question']}\n\n### Doctor:\n{example['answer']}"
    }

dataset = dataset.map(format_healthcare_qa)
dataset = dataset.remove_columns(["question", "answer"])

In [None]:

from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

model_id = "meta-llama/Llama-2-7b-hf"  

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype="float16"
)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

In [None]:
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model

model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)


In [None]:
def tokenize(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=512)

tokenized_dataset = dataset.map(tokenize, batched=True)
tokenized_dataset.set_format(type="torch", columns=["input_ids", "attention_mask"])


In [None]:
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling

training_args = TrainingArguments(
    output_dir="./llama-healthcare",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    logging_steps=20,
    num_train_epochs=3,
    learning_rate=2e-4,
    fp16=True,
    save_total_limit=1,
    save_strategy="epoch",
    report_to="none"
)

data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    data_collator=data_collator
)

In [None]:
trainer.train()

trainer.save_model("llama2-healthcare-lora")
tokenizer.save_pretrained("llama2-healthcare-lora")

In [None]:

from transformers import pipeline

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
response = pipe("### Patient:\nI have a sore throat and fever. What could be the cause?\n\n### Doctor:\n", max_new_tokens=100)
print(response[0]["generated_text"])
