In [1]:
%pip install \
accelerate==0.23.0 \
bitsandbytes==0.41.1 \
datasets==2.13.0 \
openai==0.28.1 \
peft==0.4.0 \
safetensors==0.4.0 \
transformers==4.34.0 \
trl==0.4.7


Collecting accelerate==0.23.0
  Using cached accelerate-0.23.0-py3-none-any.whl.metadata (18 kB)
Collecting bitsandbytes==0.41.1
  Using cached bitsandbytes-0.41.1-py3-none-any.whl.metadata (9.8 kB)
Collecting safetensors==0.4.0
  Using cached safetensors-0.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Collecting transformers==4.34.0
  Using cached transformers-4.34.0-py3-none-any.whl.metadata (121 kB)
Collecting tokenizers<0.15,>=0.14 (from transformers==4.34.0)
  Using cached tokenizers-0.14.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting huggingface-hub (from accelerate==0.23.0)
  Using cached huggingface_hub-0.17.3-py3-none-any.whl.metadata (13 kB)
Using cached accelerate-0.23.0-py3-none-any.whl (258 kB)
Using cached bitsandbytes-0.41.1-py3-none-any.whl (92.6 MB)
Using cached safetensors-0.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
Using cached transformers-4.34.0-py3-none-any.whl (7

In [2]:
%pip install py7zr

Note: you may need to restart the kernel to use updated packages.


In [3]:
import pandas as pd
from datasets import Dataset

# Load the CSV file into a pandas DataFrame
df = pd.read_csv("MeQSum.csv")

# Convert the DataFrame to a datasets.Dataset object
dataset = Dataset.from_pandas(df)

# Split the dataset into training and test datasets
train_dataset = dataset.select(range(800))
test_dataset = dataset.select(range(800, len(dataset)))

# Print information about the datasets
print("Train Dataset:")
print(train_dataset)

print("\nTest Dataset:")
print(test_dataset)


  from .autonotebook import tqdm as notebook_tqdm


Train Dataset:
Dataset({
    features: ['CHQ', 'Summary'],
    num_rows: 800
})

Test Dataset:
Dataset({
    features: ['CHQ', 'Summary'],
    num_rows: 200
})


In [4]:
print(train_dataset)

Dataset({
    features: ['CHQ', 'Summary'],
    num_rows: 800
})


In [5]:
train_dataset[0]

{'CHQ': 'SUBJECT: who and where to get cetirizine - D\nMESSAGE: I need/want to know who manufscturs Cetirizine. My Walmart is looking for a new supply and are not getting the recent',
 'Summary': 'Who manufactures cetirizine?'}

In [7]:
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
import torch
from peft import prepare_model_for_kbit_training

model_id = "meta-llama/Llama-2-7b-chat-hf"
access_token = "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"  # Replace with your actual token

# Set up the configuration for quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Authenticate and load the model
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    use_cache=False,
    device_map="auto",
    use_auth_token=access_token  # Use the token for authentication
)

model = prepare_model_for_kbit_training(model)

Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.36s/it]


In [8]:
def prompt_formatter(sample):
	return f"""<s>### Instruction:
You are a helpful, respectful and honest assistant. \
Your task is to summarize the following consumer health query. \
Your answer should be based on the provided consumer health query only.

### Consumer Health Query:
{sample['CHQ']}

### Summary:
{sample['Summary']} </s>"""

n = 0
print(prompt_formatter(train_dataset[n]))

<s>### Instruction:
You are a helpful, respectful and honest assistant. Your task is to summarize the following consumer health query. Your answer should be based on the provided consumer health query only.

### Consumer Health Query:
SUBJECT: who and where to get cetirizine - D
MESSAGE: I need/want to know who manufscturs Cetirizine. My Walmart is looking for a new supply and are not getting the recent

### Summary:
Who manufactures cetirizine? </s>


In [9]:
!huggingface-cli login --token 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

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/bio/.cache/huggingface/token
Login successful


In [10]:
%load_ext autoreload
%autoreload 2

In [11]:
from transformers import TrainingArguments, AutoTokenizer
from peft import LoraConfig, get_peft_model
from trl import SFTTrainer

#
# construct a Peft model.
# the QLoRA paper recommends LoRA dropout = 0.05 for small models (7B, 13B)
#
peft_config = LoraConfig(
    r=8,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)
model = get_peft_model(model, peft_config)

#
# set up the trainer
#
tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

args = TrainingArguments(
    output_dir="llama2-7b-chat-meqsum",
    num_train_epochs=2,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    logging_steps=4,
    save_strategy="epoch",
    learning_rate=2e-4,
    optim="paged_adamw_8bit",
    bf16=True,  # Disable bf16 precision
    fp16=False,   # Enable fp16 precision
    tf32=True,  # Disable tf32 precision (optional, depends on your setup)
    max_grad_norm=0.3,
    warmup_ratio=0.03,
    lr_scheduler_type="constant",
    disable_tqdm=False,
    report_to="none",
)


In [12]:
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    peft_config=peft_config,
    max_seq_length=1024,
    tokenizer=tokenizer,
    packing=True,
    formatting_func=prompt_formatter,
    args=args,
)

In [14]:
import gc

gc.collect()

torch.cuda.empty_cache()

In [13]:
trainer.train()

  0%|          | 0/200 [00:00<?, ?it/s]You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
  2%|▏         | 4/200 [01:09<56:03, 17.16s/it]  

{'loss': 2.0949, 'learning_rate': 0.0002, 'epoch': 0.04}


  4%|▍         | 8/200 [02:17<54:15, 16.95s/it]

{'loss': 1.9086, 'learning_rate': 0.0002, 'epoch': 0.08}


  6%|▌         | 12/200 [03:24<52:59, 16.91s/it]

{'loss': 1.8385, 'learning_rate': 0.0002, 'epoch': 0.12}


  8%|▊         | 16/200 [04:32<51:51, 16.91s/it]

{'loss': 1.683, 'learning_rate': 0.0002, 'epoch': 0.16}


 10%|█         | 20/200 [05:36<49:05, 16.37s/it]

{'loss': 1.6814, 'learning_rate': 0.0002, 'epoch': 1.02}


 12%|█▏        | 24/200 [06:43<49:03, 16.73s/it]

{'loss': 1.5921, 'learning_rate': 0.0002, 'epoch': 1.06}


 14%|█▍        | 28/200 [07:51<48:18, 16.85s/it]

{'loss': 1.5701, 'learning_rate': 0.0002, 'epoch': 1.1}


 16%|█▌        | 32/200 [08:58<47:18, 16.90s/it]

{'loss': 1.5774, 'learning_rate': 0.0002, 'epoch': 1.15}


 18%|█▊        | 35/200 [09:45<45:58, 16.72s/it]

{'train_runtime': 585.1593, 'train_samples_per_second': 2.734, 'train_steps_per_second': 0.342, 'train_loss': 1.7266004834856306, 'epoch': 1.18}





TrainOutput(global_step=35, training_loss=1.7266004834856306, metrics={'train_runtime': 585.1593, 'train_samples_per_second': 2.734, 'train_steps_per_second': 0.342, 'train_loss': 1.7266004834856306, 'epoch': 1.18})

# 3. Run inference using the fine-tuned model

In [14]:
trainer.save_model()

In [15]:
import torch
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer

model_folder = "llama2-7b-chat-meqsum"

# load both the adapter and the base model
model = AutoPeftModelForCausalLM.from_pretrained(
    model_folder,
    low_cpu_mem_usage=True,
    torch_dtype=torch.float16,
    load_in_4bit=True,
    device_map='auto'
)
# tokenizer = AutoTokenizer.from_pretrained(model_folder)


Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.19s/it]


In [16]:
print(model)

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32000, 4096)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): Linear4bit(
                in_features=4096, out_features=4096, bias=False
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.05, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
              (v_proj): Linear4

In [19]:
sample = test_dataset[1]

prompt = f"""### Instruction:
You are a helpful, respectful and honest assistant. \
Your task is to summarize the following consumer  health query. \
Your answer should be based on the provided text only.

### Consumer Health Query:
{sample['CHQ']}

### Summary:
"""

print(prompt)

### Instruction:
You are a helpful, respectful and honest assistant. Your task is to summarize the following consumer  health query. Your answer should be based on the provided text only.

### Consumer Health Query:
SUBJECT: Questions (see in comment box) on "Vistaril"
MESSAGE: I have a few Qs related to "Vistaril":
1. Is "Vistaril" a gluten-free product?
2. If not, is it because some of the raw materials in its ingredients that is gluten related?
3. If yes to Q1, is it a "Certified" gluten-free product and what kind of certification it has?
4. For capsule form, does it contain any kind of starch? If it does, what starch is it?
5.  Can you email to me a list of the ingredients in this drug?
Thank you very much!

### Summary:



In [20]:
input_ids = tokenizer(prompt, return_tensors="pt").input_ids.cuda()
outputs = model.generate(input_ids=input_ids, max_new_tokens=50, temperature=0.7)

print('Output:\n',
      tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0][len(prompt):])
print('\nGround truth:\n', sample['Summary'])


Output:
 What is the ingredients in Vistaril and is it gluten-free? 

Ground truth:
 What are the ingredients of vistaril and is it gluten and starch free?
