<a href="https://colab.research.google.com/github/ayyucedemirbas/llama2_instruction_tuning/blob/main/instruction_tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!huggingface-cli login

In [None]:
!pip install datasets bitsandbytes trl

In [2]:
import pandas as pd
import numpy as np
import torch
from datasets import Dataset, DatasetDict
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
)
import warnings

In [3]:
warnings.filterwarnings("ignore")

In [4]:
df = pd.read_csv("amazon_reviews.csv")

df["instruction"] = (
    "Analyze the following product review and determine its sentiment: "
    "Review: " + df["reviewText"] +
    " Please reply with one of the following options only: Positive, Negative, or Neutral. "
    "The correct sentiment is: " + df["overall"].apply(lambda x: "Positive" if x >= 4 else ("Negative" if x <= 2 else "Neutral"))
)

custom_ds = pd.DataFrame()
custom_ds["text"] = df["instruction"]


if custom_ds.isnull().values.any():
    print("DataFrame contains NaN values. Dropping rows with NaN values...")
    df_cleaned = custom_ds.dropna()
else:
    print("DataFrame does not contain any NaN values.")
    df_cleaned = custom_ds

dataset = Dataset.from_pandas(df_cleaned)

DataFrame contains NaN values. Dropping rows with NaN values...


In [9]:
df_cleaned

Unnamed: 0,text
0,Analyze the following product review and deter...
1,Analyze the following product review and deter...
2,Analyze the following product review and deter...
3,Analyze the following product review and deter...
4,Analyze the following product review and deter...
...,...
994,Analyze the following product review and deter...
995,Analyze the following product review and deter...
996,Analyze the following product review and deter...
997,Analyze the following product review and deter...


In [7]:
dataset

Dataset({
    features: ['text', '__index_level_0__'],
    num_rows: 997
})

In [8]:
dataset[0]

{'text': 'Analyze the following product review and determine its sentiment: Review: We got this GPS for my husband who is an (OTR) over the road trucker.  Very Impressed with the shipping time, it arrived a few days earlier than expected...  within a week of use however it started freezing up... could of just been a glitch in that unit.  Worked great when it worked!  Will work great for the normal person as well but does have the "trucker" option. (the big truck routes - tells you when a scale is coming up ect...)  Love the bigger screen, the ease of use, the ease of putting addresses into memory.  Nothing really bad to say about the unit with the exception of it freezing which is probably one in a million and that\'s just my luck.  I contacted the seller and within minutes of my email I received a email back with instructions for an exchange! VERY impressed all the way around! Please reply with one of the following options only: Positive, Negative, or Neutral. The correct sentiment is

In [5]:
model_name = "meta-llama/Llama-2-7b-hf"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name, quantization_config=bnb_config, trust_remote_code=True
)
model.config.use_cache = False

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

`low_cpu_mem_usage` was None, now default to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [6]:
from peft import LoraConfig, get_peft_model

lora_alpha = 16
lora_dropout = 0.1
lora_r = 64

peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_r,
    bias="none",
    task_type="CAUSAL_LM",
)

In [7]:
from transformers import TrainingArguments

output_dir = "./results"
per_device_train_batch_size = 1
gradient_accumulation_steps = 4
optim = "paged_adamw_32bit"
save_steps = 200
logging_steps = 10
learning_rate = 2e-4
max_grad_norm = 0.3
max_steps = 300
warmup_ratio = 0.03
lr_scheduler_type = "constant"

training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    fp16=True,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=True,
    lr_scheduler_type=lr_scheduler_type,
    report_to="none",
)

In [8]:
from trl import SFTTrainer


trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,
    tokenizer=tokenizer,
    args=training_arguments,
)

Applying chat template to train dataset:   0%|          | 0/997 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/997 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/997 [00:00<?, ? examples/s]

In [9]:
for name, module in trainer.model.named_modules():
    if "norm" in name:
        module = module.to(torch.float32)

In [10]:
trainer.train()

Step,Training Loss
10,2.3782
20,2.2333
30,1.8216
40,1.5172
50,1.2117
60,2.1025
70,1.8663
80,1.6776
90,1.4127
100,1.1049


TrainOutput(global_step=300, training_loss=1.673521629969279, metrics={'train_runtime': 1320.0066, 'train_samples_per_second': 0.909, 'train_steps_per_second': 0.227, 'total_flos': 1.0996091960451072e+16, 'train_loss': 1.673521629969279})

In [11]:
model_to_save = trainer.model.module if hasattr(trainer.model, 'module') else trainer.model
model_to_save.save_pretrained("outputs")

In [12]:
lora_config = LoraConfig.from_pretrained('outputs')
model = get_peft_model(model, lora_config)

In [14]:
text = dataset["text"][0]
device = "cuda:0"

In [18]:
inputs = tokenizer(text, return_tensors="pt").to(device)
outputs = model.generate(
    input_ids=inputs["input_ids"],
    attention_mask=inputs["attention_mask"],
    max_length=250,
    return_dict_in_generate=True,
    output_scores=True,
)

From v4.47 onwards, when a model cache is to be returned, `generate` will return a `Cache` instance instead by default (as opposed to the legacy tuple of tuples format). If you want to keep returning the legacy format, please set `return_legacy_cache=True`.


In [19]:
generated_text = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
print("Generated sentiment prediction:", generated_text)

Generated sentiment prediction: Analyze the following product review and determine its sentiment: Review: We got this GPS for my husband who is an (OTR) over the road trucker.  Very Impressed with the shipping time, it arrived a few days earlier than expected...  within a week of use however it started freezing up... could of just been a glitch in that unit.  Worked great when it worked!  Will work great for the normal person as well but does have the "trucker" option. (the big truck routes - tells you when a scale is coming up ect...)  Love the bigger screen, the ease of use, the ease of putting addresses into memory.  Nothing really bad to say about the unit with the exception of it freezing which is probably one in a million and that's just my luck.  I contacted the seller and within minutes of my email I received a email back with instructions for an exchange! VERY impressed all the way around! Please reply with one of the following options only: Positive, Negative, or Neutral. The

In [None]:
model.push_to_hub("ayyuce/llama2-amazon-product-review-sentiment", create_pr=1)