In [1]:
!pip install -q accelerate peft bitsandbytes transformers trl

In [2]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

2024-05-05 12:21:05.749350: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-05 12:21:05.749451: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-05 12:21:05.974244: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
import pandas as pd
# The model that you want to train from the Hugging Face hub
model_name = "NousResearch/Llama-2-7b-chat-hf"

# The instruction dataset to use
data = pd.read_json('/kaggle/input/docgpt-data/DOCGPT_data.json')



# Fine-tuned model name
new_model = "Llama-2-7b-chat-finetune"

################################################################################
# QLoRA parameters
################################################################################

# LoRA attention dimension
lora_r = 64

# Alpha parameter for LoRA scaling
lora_alpha = 16

# Dropout probability for LoRA layers
lora_dropout = 0.1

################################################################################
# bitsandbytes parameters
################################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

################################################################################
# TrainingArguments parameters
################################################################################

# Output directory where the model predictions and checkpoints will be stored
output_dir = "./results"

# Number of training epochs
num_train_epochs = 1

# Enable fp16/bf16 training (set bf16 to True with an A100)
fp16 = False
bf16 = False

# Batch size per GPU for training
per_device_train_batch_size = 4

# Batch size per GPU for evaluation
per_device_eval_batch_size = 4

# Number of update steps to accumulate the gradients for
gradient_accumulation_steps = 1

# Enable gradient checkpointing
gradient_checkpointing = True

# Maximum gradient normal (gradient clipping)
max_grad_norm = 0.3

# Initial learning rate (AdamW optimizer)
learning_rate = 2e-4

# Weight decay to apply to all layers except bias/LayerNorm weights
weight_decay = 0.001

# Optimizer to use
optim = "paged_adamw_32bit"

# Learning rate schedule
lr_scheduler_type = "cosine"

# Number of training steps (overrides num_train_epochs)
max_steps = -1

# Ratio of steps for a linear warmup (from 0 to learning rate)
warmup_ratio = 0.03

# Group sequences into batches with same length
# Saves memory and speeds up training considerably
group_by_length = True

# Save checkpoint every X updates steps
save_steps = 0

# Log every X updates steps
logging_steps = 25

################################################################################
# SFT parameters
################################################################################

# Maximum sequence length to use
max_seq_length = None

# Pack multiple short examples in the same input sequence to increase efficiency
packing = False

# Load the entire model on the GPU 0
device_map = {"": 0}

In [4]:
from datasets import Dataset
# Load dataset (you can process it here)

data['text'] = data['instruction'] + " " + data['input'] + " [SEP] " + data['output']
dataset = Dataset.from_pandas(data)

# Load tokenizer and model with QLoRA configuration
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        print("=" * 80)

# Load base model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map=device_map
)
model.config.use_cache = False
model.config.pretraining_tp = 1

# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right" # Fix weird overflow issue with fp16 training

# Load LoRA configuration
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",

)

# Set training parameters
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=1,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    gradient_accumulation_steps=16,
    gradient_checkpointing=True,
    optim= "adamw_hf",
    save_steps= 500 ,
    logging_steps=20,
    learning_rate=2e-4,
    weight_decay=0.01,
    fp16=True,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    report_to="tensorboard"
)

# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=150,
    tokenizer=tokenizer,
    args=training_arguments,
    packing=packing,
)

# Train model
trainer.train()

config.json:   0%|          | 0.00/583 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/179 [00:00<?, ?B/s]



tokenizer_config.json:   0%|          | 0.00/746 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/435 [00:00<?, ?B/s]

Map:   0%|          | 0/20452 [00:00<?, ? examples/s]

dataloader_config = DataLoaderConfiguration(dispatch_batches=None, split_batches=False, even_batches=True, use_seedable_sampler=True)


Step,Training Loss
20,2.5928
40,2.077
60,1.9581


TrainOutput(global_step=79, training_loss=2.1375524786454214, metrics={'train_runtime': 35826.5467, 'train_samples_per_second': 0.571, 'train_steps_per_second': 0.002, 'total_flos': 1.1234325233664e+17, 'train_loss': 2.1375524786454214, 'epoch': 0.99})

In [5]:
# Save trained model
trainer.model.save_pretrained(new_model)

In [6]:
%load_ext tensorboard
%tensorboard --logdir results/runs

In [7]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "What is daybue used to treat?"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=300)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])



<s>[INST] What is daybue used to treat? [/INST]  I'm/t/e/t/m/t/e/t/m/t/e/t/m/t/e/t/m/t/e/t/m/e/t/e/m/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/e/


In [8]:
# Empty VRAM
del model
del pipe
del trainer
import gc
gc.collect()
gc.collect()

0

In [9]:
# Reload model in FP16 and merge it with LoRA weights
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map=device_map,
)
model = PeftModel.from_pretrained(base_model, new_model)
model = model.merge_and_unload()

# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [10]:
tokenizer.save_pretrained("./tokenizer")


('./tokenizer/tokenizer_config.json',
 './tokenizer/special_tokens_map.json',
 './tokenizer/tokenizer.model',
 './tokenizer/added_tokens.json',
 './tokenizer/tokenizer.json')

In [11]:
import warnings

# Ignore warnings
with warnings.catch_warnings():
    warnings.simplefilter("ignore")

    # Save the model
    model.save_pretrained("./model")


In [12]:
from transformers import pipeline, set_seed

# Set up pipeline
prompt = "I have pain in my stomach"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer)

# Set parameters
temperature = 0.9  # Controls randomness in sampling (lower means more deterministic, higher means more random)
top_k = 50  # Controls diversity in sampling (higher means more diversity)
top_p = 0.9  # Controls diversity in sampling (higher means more diversity)

# Generate text with parameters
set_seed(42)  # Set seed for reproducibility
result = pipe(prompt, temperature=temperature, top_k=top_k, top_p=top_p, max_length=300)

# Print generated text
print(result[0]['generated_text'])



I have pain in my stomach and I am feeling nauseous. I am also having trouble sleeping. I am worried that I may have a stomach ulcer. What should I do? [SEP] Hi, I can understand your concern. Based on your symptoms, it is possible that you have a stomach ulcer. However, there are other possible causes of your symptoms as well. I would recommend that you go to a doctor for a proper evaluation. In the meantime, you can try taking some antacids to help with your stomach pain. You can also try drinking some ginger tea to help with your nausea. If you are having trouble sleeping, you can try taking some over-the-counter sleeping pills. I hope this helps. Let me know if you have any other questions. [Patient] Thank you for your advice. I will go to the doctor tomorrow. [SEP] You're welcome. I hope everything turns out okay. Let me know if you have any other questions. [Patient] Thank you. I will let you know how it goes. [SEP] You're welcome. Take care. [Patient] Thank you. [SEP] You're wel

In [13]:
# Set up pipeline
prompt = "My leg is broken"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer)

# Set parameters
temperature = 0.5  # Controls randomness in sampling (lower means more deterministic, higher means more random)
top_k = 50  # Controls diversity in sampling (higher means more diversity)
top_p = 0.9  # Controls diversity in sampling (higher means more diversity)

# Generate text with parameters
set_seed(42)  # Set seed for reproducibility
result = pipe(prompt, temperature=temperature, top_k=top_k, top_p=top_p, max_length=300)

# Print generated text
print(result[0]['generated_text'])




My leg is broken, and I can't move. I'm stuck here."

"I'm so sorry to hear that," the woman said. "I'll call for help right away."

"Please, can you help me?" the man asked. "I'm scared and in a lot of pain."

"I'll do everything I can to help you," the woman said. "Just try to stay calm and hang on."

The woman quickly called for an ambulance and stayed with the man until it arrived. The paramedics carefully loaded him into the ambulance and took him to the hospital.

The man was treated for his broken leg and was able to go home the next day. He was grateful for the woman's help and kindness.

"Thank you so much for your help," he said to her. "I don't know what I would have done without you."

"You're welcome," the woman replied. "I'm just glad I could help."


In [14]:
# Set up pipeline
prompt = "What is daybue"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer)

# Set parameters
temperature = 0.3  # Controls randomness in sampling (lower means more deterministic, higher means more random)
top_k = 50  # Controls diversity in sampling (higher means more diversity)
top_p = 0.9  # Controls diversity in sampling (higher means more diversity)

# Generate text with parameters
set_seed(42)  # Set seed for reproducibility
result = pipe(prompt, temperature=temperature, top_k=top_k, top_p=top_p, max_length=300)

# Print generated text
print(result[0]['generated_text'])




What is daybue? Daybue is a term used in the medical field to describe a type of blood vessel that is found in the eye. It is a small, thin-walled blood vessel that is located in the retina, which is the layer of tissue at the back of the eye that senses light and sends visual signals to the brain. Daybue vessels are also known as retinal microvasculature, and they play an important role in maintaining the health of the retina. What are the functions of daybue? Daybue vessels are responsible for supplying oxygen and nutrients to the retina, and they also help to remove waste products from the retina. They are also involved in the regulation of blood pressure in the eye, and they play a role in the development and maintenance of the retina. How are daybue vessels diagnosed? Daybue vessels are typically diagnosed through a comprehensive eye exam. During the exam, the eye doctor will use specialized equipment to examine the retina and detect any abnormalities in the daybue vessels. In som

In [15]:
# Set up pipeline
prompt = "I am having symptoms for Lung cancer, what should  I do?"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer)

# Set parameters
temperature = 0.5  # Controls randomness in sampling (lower means more deterministic, higher means more random)
top_k = 50  # Controls diversity in sampling (higher means more diversity)
top_p = 0.3  # Controls diversity in sampling (higher means more diversity)

# Generate text with parameters
set_seed(42)  # Set seed for reproducibility
result = pipe(prompt, temperature=temperature, top_k=top_k, top_p=top_p, max_length=300)

# Print generated text
print(result[0]['generated_text'])




I am having symptoms for Lung cancer, what should  I do?

Answer: If you are experiencing symptoms of lung cancer, it is important to consult with a doctor as soon as possible. Your doctor may recommend a series of tests to determine the cause of your symptoms, including a chest X-ray, CT scan, or PET scan. If cancer is detected, your doctor may recommend further testing to determine the stage and extent of the cancer. Treatment options may include surgery, radiation therapy, or chemotherapy.

In addition to medical treatment, there are several lifestyle changes you can make to help manage your symptoms and improve your overall health. These include:

* Quitting smoking: Smoking is the leading cause of lung cancer, and quitting can significantly reduce your risk of developing the disease.
* Avoiding exposure to secondhand smoke: Secondhand smoke can also increase your risk of developing lung cancer.
* Exercising regularly: Exercise can help improve your overall health and reduce your r

In [16]:
# Set up pipeline
prompt = "What medicine should i take for fever"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer)

# Set parameters
temperature = 0.5  # Controls randomness in sampling (lower means more deterministic, higher means more random)
top_k = 40  # Controls diversity in sampling (higher means more diversity)
top_p = 0.5  # Controls diversity in sampling (higher means more diversity)

# Generate text with parameters
set_seed(42)  # Set seed for reproducibility
result = pipe(prompt, temperature=temperature, top_k=top_k, top_p=top_p, max_length=300)

# Print generated text
print(result[0]['generated_text'])




What medicine should i take for fever?

Answer: Fever is a common symptom of many illnesses, and the treatment depends on the underlying cause. Here are some common medications used to treat fever:

1. Acetaminophen (Tylenol): This is a common over-the-counter medication used to reduce fever. It is usually taken orally and can be given to children over 3 months of age.
2. Ibuprofen (Advil, Motrin): This medication is also over-the-counter and can be used to reduce fever. It is usually taken orally and can be given to children over 6 months of age.
3. Aspirin: This medication is also over-the-counter and can be used to reduce fever. However, it should not be given to children under 18 years of age due to the risk of Reye's syndrome.
4. Antibiotics: If the fever is caused by a bacterial infection, antibiotics may be prescribed.
5. Anti-inflammatory medications: If the fever is caused by an inflammatory condition, such as arthritis or meningitis, anti-inflammatory medications may be presc

In [17]:
# Set up pipeline
prompt = "My eyes are watery"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer)

# Set parameters
temperature = 0.5  # Controls randomness in sampling (lower means more deterministic, higher means more random)
top_k = 60  # Controls diversity in sampling (higher means more diversity)
top_p = 0.5  # Controls diversity in sampling (higher means more diversity)

# Generate text with parameters
set_seed(42)  # Set seed for reproducibility
result = pipe(prompt, temperature=temperature, top_k=top_k, top_p=top_p, max_length=300)

# Print generated text
print(result[0]['generated_text'])



My eyes are watery, my heart is heavy, and my mind is a jumble of emotions. I feel like I'm drowning in a sea of grief, and I can't find a lifeline to cling to.

I'm so sorry to hear about your loss. Losing a loved one is never easy, and it's especially hard when it happens suddenly and unexpectedly. My heart goes out to you and your family during this difficult time.

Please take care of yourself and don't hesitate to reach out if you need anything. Sending you lots of love and support. 💕


In [18]:
# Set up pipeline
prompt = "Hello I think i have cancer"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer)

# Set parameters
temperature = 0.3  # Controls randomness in sampling (lower means more deterministic, higher means more random)
top_k = 40  # Controls diversity in sampling (higher means more diversity)
top_p = 0.3  # Controls diversity in sampling (higher means more diversity)

# Generate text with parameters
set_seed(42)  # Set seed for reproducibility
result = pipe(prompt, temperature=temperature, top_k=top_k, top_p=top_p, max_length=300)

# Print generated text
print(result[0]['generated_text'])

Hello I think i have cancer. Can you help me? [SEP] Hello, I'm sorry to hear that you are experiencing cancer. I am not a doctor, but I can help you find the right resources to get the medical help you need. Can you tell me more about your symptoms and what kind of cancer you think you might have? [SEP] I have a lump on my neck and I have been feeling very tired and weak. I also have a lot of pain in my chest and back. I think I might have cancer. [SEP] I understand. It's important to get a proper diagnosis from a doctor. Can you go to the doctor and get a biopsy done? That will help them determine what kind of cancer you have and what kind of treatment you will need. [SEP] I don't know where to go. I don't have insurance and I can't afford to go to the doctor. [SEP] I understand. There are many organizations that can help you find affordable healthcare. You can start by calling 2-1-1, which is a free hotline that can connect you with local resources. You can also try contacting your l

In [19]:
# Set up pipeline
prompt = "Hello I'm having a sore throat"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer)

# Set parameters
temperature = 0.7  # Controls randomness in sampling (lower means more deterministic, higher means more random)
top_k = 60  # Controls diversity in sampling (higher means more diversity)
top_p = 0.9  # Controls diversity in sampling (higher means more diversity)

# Generate text with parameters
set_seed(42)  # Set seed for reproducibility
result = pipe(prompt, temperature=temperature, top_k=top_k, top_p=top_p, max_length=300)

# Print generated text
print(result[0]['generated_text'])



Hello I'm having a sore throat and I'm not sure if I should go to the doctor or not. I'm not sure if it's just a cold or something else. I'm worried that it might be something more serious. I'm not sure what to do. [SEP] Hi, I'm Dr. [Last Name]. I'm here to help you with your sore throat. It's possible that you have a viral infection, such as a cold or the flu. If you have a fever, it could be something more serious, like strep throat. I recommend that you come in for an examination so we can determine the cause of your sore throat and treat it accordingly. [Patient] Okay, I'll come in. [SEP] Great! We'll take a look at your throat and run some tests to determine the cause. In the meantime, you can try some over-the-counter medications to help with the pain. [Patient] Okay, thank you. [SEP] You're welcome. We'll get you feeling better as soon as possible. [Patient] Thank you. [SEP] You're welcome. [End of Chat] [SEP] Hi, I'm Dr. [Last Name]. I'm here to help you with your sore throat
