<a href="https://colab.research.google.com/github/ayabg/chatbot-medical/blob/main/Fine_Tuning_Llama2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Step 1: Installed All the Required Packages

In [None]:
from google.colab import drive
drive.mount("/content/mydrive")

Drive already mounted at /content/mydrive; to attempt to forcibly remount, call drive.mount("/content/mydrive", force_remount=True).


In [None]:
%cd /content/mydrive/MyDrive/healthcare


/content/mydrive/MyDrive/healthcare


In [None]:
!nvidia-smi

/bin/bash: line 1: nvidia-smi: command not found


# Installing the Required Packages


In [None]:
!pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7

#Step 2: Import All the Required Libraries

In [None]:
import os
import random
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
from transformers import pipeline

  warn("The installed version of bitsandbytes was compiled without GPU support. "


/usr/local/lib/python3.10/dist-packages/bitsandbytes/libbitsandbytes_cpu.so: undefined symbol: cadam32bit_grad_fp32


#Step 5. Generate a System Message

In [None]:
system_message  = "Hello! I'm here to provide concise information about general health problem, including their causes, symptoms, treatments, and recommended medications. How can I assist you today?"
print(system_message)

Hello! I'm here to provide concise information about general health problem, including their causes, symptoms, treatments, and recommended medications. How can I assist you today?


#Step 7. Split the Data into Training and Test Sets

In [None]:
import pandas as pd
csv_file_path = 'final_data.csv'
df = pd.read_csv(csv_file_path)
print('Here are the first few rows of the DataFrame:')
df.head(10)

Here are the first few rows of the DataFrame:


Unnamed: 0,prompt,response,prompt_word_count,response_word_count
0,Who is at risk for Lymphocytic Choriomeningiti...,LCMV infections can occur after exposure to fr...,9,68
1,What are the symptoms of Lymphocytic Choriomen...,LCMV is most commonly recognized as causing ne...,9,367
2,How to diagnose Lymphocytic Choriomeningitis (...,"During the first phase of the disease, the mos...",7,110
3,What are the treatments for Lymphocytic Chorio...,"Aseptic meningitis, encephalitis, or meningoen...",9,61
4,How to prevent Lymphocytic Choriomeningitis (L...,LCMV infection can be prevented by avoiding co...,7,397
5,What is (are) Parasites - Cysticercosis ?,Cysticercosis is an infection caused by the la...,7,52
6,Who is at risk for Parasites - Cysticercosis? ?,Cysticercosis is an infection caused by the la...,9,325
7,How to diagnose Parasites - Cysticercosis ?,"If you think that you may have cysticercosis, ...",7,106
8,What are the treatments for Parasites - Cystic...,Some people with cysticercosis do not need to ...,9,50
9,How to prevent Parasites - Cysticercosis ?,"To prevent cysticercosis, the following precau...",7,124


In [None]:
df = df.drop("prompt_word_count", axis = 1)
df = df.drop("response_word_count", axis = 1)

In [None]:
df.head()

Unnamed: 0,prompt,response
0,Who is at risk for Lymphocytic Choriomeningiti...,LCMV infections can occur after exposure to fr...
1,What are the symptoms of Lymphocytic Choriomen...,LCMV is most commonly recognized as causing ne...
2,How to diagnose Lymphocytic Choriomeningitis (...,"During the first phase of the disease, the mos..."
3,What are the treatments for Lymphocytic Chorio...,"Aseptic meningitis, encephalitis, or meningoen..."
4,How to prevent Lymphocytic Choriomeningitis (L...,LCMV infection can be prevented by avoiding co...


In [None]:
# Randomly sample 3 data points
sampled_indices = df.sample(n=1500).index
# Create another DataFrame with the sampled data points
sampled_df = df.loc[sampled_indices]
sampled_df.shape

(1500, 2)

In [None]:
train_df = sampled_df.sample(frac=0.9, random_state=42)
test_df = sampled_df.drop(train_df.index)

In [None]:
train_df.to_json('train.jsonl', orient='records', lines=True)
test_df.to_json('test.jsonl', orient='records', lines=True)

#Step 8. Load the Llama 2 Model

In [None]:
model_name = "NousResearch/Llama-2-7b-chat-hf"
dataset_name = "train.jsonl"

new_model = "Medi-llama-2-7b-custom1000"
lora_r = 64
lora_alpha = 16
lora_dropout = 0.1
use_4bit = True
bnb_4bit_compute_dtype = "float16"
bnb_4bit_quant_type = "nf4"
use_nested_quant = False

output_dir = "./results"

num_train_epochs = 5

fp16 = False
bf16 = False

per_device_train_batch_size = 4

per_device_eval_batch_size = 4
gradient_accumulation_steps = 1

gradient_checkpointing = True

max_grad_norm = 0.3


learning_rate = 2e-4
weight_decay = 0.001

optim = "paged_adamw_32bit"
lr_scheduler_type = "cosine"

max_steps = -1

warmup_ratio = 0.03

group_by_length = True

save_steps = 0

logging_steps = 25

max_seq_length = None
packing = False

device_map = {"": 0} ## LOAD THE ENTIRE MODLE ON THE GPU

#Step 9. Load the Dataset and Train

In [None]:
# @title
# Load datasets
train_dataset = load_dataset('json', data_files='train.jsonl', split="train")
valid_dataset = load_dataset('json', data_files='test.jsonl', split="train")

# Preprocess datasets
train_dataset_mapped = train_dataset.map(lambda examples: {'text': [f'[INST] <<SYS>>\n{system_message.strip()}\n<</SYS>>\n\n' + prompt + ' [/INST] ' + response for prompt, response in zip(examples['prompt'], examples['response'])]}, batched=True)
valid_dataset_mapped = valid_dataset.map(lambda examples: {'text': [f'[INST] <<SYS>>\n{system_message.strip()}\n<</SYS>>\n\n' + prompt + ' [/INST] ' + response for prompt, response in zip(examples['prompt'], examples['response'])]}, batched=True)

# Load tokenizer and model with QLoRA configuration
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

# Load base model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map=device_map
)
model.config.use_cache = False
model.config.pretraining_tp = 1

# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training

# Load LoRA configuration
peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
)

# Set training parameters
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    report_to="tensorboard"
)

# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset_mapped,
    eval_dataset=valid_dataset_mapped,  # Pass validation dataset here
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=packing,
)

# Train model
trainer.train()

# Save trained model
trainer.model.save_pretrained(new_model)



RuntimeError: Found no NVIDIA driver on your system. Please check that you have an NVIDIA GPU and installed a driver from http://www.nvidia.com/Download/index.aspx

## Testing the Fine Tuned Lamma2 Medical LLM


In [None]:
# @title
logging.set_verbosity(logging.CRITICAL)
prompt = f"[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\nGive Me Medication for diaper Rash? [/INST]" # replace the command here with something relevant to your task
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=500)
result = pipe(prompt)
print(result[0]['generated_text'])

In [None]:
model

#Step 10: Run Inference on Original LLama 2 Model

In [None]:
# @title
prompt = f"[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\nWhat is siroline syrup used for? [/INST]"
num_new_tokens = 200

num_prompt_tokens = len(tokenizer(prompt)['input_ids'])
max_length = num_prompt_tokens + num_new_tokens

gen = pipeline('text-generation', model=model, tokenizer=tokenizer, max_length=max_length)
result = gen(prompt)
print(result[0]['generated_text'].replace(prompt, ''))

# Empty VRAM

In [None]:
del model

In [None]:
del pipe

In [None]:
del trainer

In [None]:
import gc
gc.collect()
gc.collect()

In [None]:
import torch

# Release GPU memory
torch.cuda.empty_cache()

#Step 11: Merge Our Model

In [None]:
# Merge and save the fine-tuned model
# Reload model in FP16 and merge it with LoRA weights

base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map=device_map,
)
model = PeftModel.from_pretrained(base_model, new_model)
model = model.merge_and_unload()

# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"


# Save the Fine Tuned LLM To Drive


In [None]:

model_path = "llama-2-7b-custom100-FineTuned"  # change to your preferred path

# Save the merged model
model.save_pretrained(model_path)
tokenizer.save_pretrained(model_path)