# Using LLaMA for sentiment analysis

1. First , Use to token from huggingface to login and use hugging face objects.

In [1]:
!pip install huggingface_hub
from huggingface_hub import notebook_login
notebook_login()

Collecting huggingface_hub
  Downloading huggingface_hub-0.19.0-py3-none-any.whl (311 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m311.2/311.2 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: huggingface_hub
Successfully installed huggingface_hub-0.19.0


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

### Install the following libraries:
  1. **Accelerate** : is a library that enables the same PyTorch code to be run across any distributed configuration. It helps im faster
  training in case we have multiple GPU/CPU.

  2.**PEFT**(Parameter-Efficient-Fine-Tuning) is a library that adapts pre-trained Language models to various downstream tasks without finetuning all the models paramrters. It only fine tunes a small number of parameters thereby decreasing computational and storage costs because fine-tuning large scale PLMs is quite costly.

  3. **bitsandbytes** : is a lightweight wrapper around CUDA custom functions, in particular 8-bit optimizers, matrix multiplication (LLM.int8()), and quantization functions. It allows to run models stored in 4-bit precision: while 4-bit bitsandbytes stores weights in 4-bits, the computation still happens in 16 or 32-bit and here any combination can be chosen (float16, bfloat16, float32, and so on).

  4. **transformers:** Python library for NLP tasks like speech translation , text classification etc.
  
  5.  **trl** : full stack library by hugging face which provide tools to train transformer language models with reinforcement learning from from the Supervised Fine-tuning step (SFT), Reward Modeling step (RM) to the Proximal Policy Optimization (PPO) step.

In [2]:
pip install -q -U accelerate==0.23.0 peft==0.5.0 bitsandbytes==0.41.1 transformers==4.31 trl==0.7.2

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/258.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━[0m [32m143.4/258.1 kB[0m [31m4.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m258.1/258.1 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.6/85.6 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m78.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m124.0/124.0 kB[0m [31m17.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m53.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━

### Environment variables:

1. Cuda_visible_device means which GPU to use. Here 0 means first GPU to be used.

2. Tokenizers_parallelism false means dont parallelise the tokenization.

In [3]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# Ignore warnings

In [4]:
import warnings
warnings.filterwarnings("ignore")

## Import everything else needed

In [5]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import Dataset
from peft import LoraConfig, PeftConfig
from trl import SFTTrainer
from transformers import (AutoModelForCausalLM,
                          AutoTokenizer,
                          BitsAndBytesConfig,
                          TrainingArguments,
                          pipeline,
                          logging)
from sklearn.metrics import (accuracy_score,
                             classification_report,
                             confusion_matrix)
from sklearn.model_selection import train_test_split

### Read data file with two columns: sentiment and text.

In [6]:
filename = "all-data.csv"
df = pd.read_csv(filename,
                 names=["sentiment", "text"],
                 encoding="utf-8", encoding_errors="replace")

#### Split the dataset in training and test set with 300 samples each stratified by sentiment such that all three sentiments( positive, negative and neutral) are there in training and test set.

In [7]:
X_train = list()
X_test = list()
for sentiment in ["positive", "neutral", "negative"]:
    train, test  = train_test_split(df[df.sentiment==sentiment],
                                    train_size=300,
                                    test_size=300,
                                    random_state=42)
    X_train.append(train)
    X_test.append(test)

### Shuffle the training set.

### Data not in train and test indexes should will be in eval dataset.

### For eval dataset, filter 50 samples with replacement for each sentiment .

In [8]:
X_train = pd.concat(X_train).sample(frac=1, random_state=10)
X_test = pd.concat(X_test)

eval_idx = [idx for idx in df.index if idx not in list(train.index) + list(test.index)]
X_eval = df[df.index.isin(eval_idx)]
X_eval = (X_eval
          .groupby('sentiment', group_keys=False)
          .apply(lambda x: x.sample(n=50, random_state=10, replace=True)))
X_train = X_train.reset_index(drop=True)

### Generate prompts for training and eval set to be used by llama, these prompts contains the expected answer we want our model to be fine-tuned with.

In [9]:
def generate_prompt(data_point):
    return f"""
            Analyze the sentiment of the news headline enclosed in square brackets,
            determine if it is positive, neutral, or negative, and return the answer as
            the corresponding sentiment label "positive" or "neutral" or "negative".

            [{data_point["text"]}] = {data_point["sentiment"]}
            """.strip()

def generate_test_prompt(data_point):
    return f"""
            Analyze the sentiment of the news headline enclosed in square brackets,
            determine if it is positive, neutral, or negative, and return the answer as
            the corresponding sentiment label "positive" or "neutral" or "negative".

            [{data_point["text"]}] = """.strip()

X_train = pd.DataFrame(X_train.apply(generate_prompt, axis=1),
                       columns=["text"])
X_eval = pd.DataFrame(X_eval.apply(generate_prompt, axis=1),
                      columns=["text"])

In [10]:
X_train.head()

Unnamed: 0,text
0,Analyze the sentiment of the news headline enc...
1,Analyze the sentiment of the news headline enc...
2,Analyze the sentiment of the news headline enc...
3,Analyze the sentiment of the news headline enc...
4,Analyze the sentiment of the news headline enc...


### Transform prompts for test set to be used by llama. In this we will keep label as blank.

In [11]:
y_true = X_test.sentiment
X_test = pd.DataFrame(X_test.apply(generate_test_prompt, axis=1), columns=["text"])

train_data = Dataset.from_pandas(X_train)
eval_data = Dataset.from_pandas(X_eval)

In [12]:
X_test.head()

Unnamed: 0,text
567,Analyze the sentiment of the news headline enc...
1752,Analyze the sentiment of the news headline enc...
995,Analyze the sentiment of the news headline enc...
601,Analyze the sentiment of the news headline enc...
568,Analyze the sentiment of the news headline enc...


### Next we have an evaluation function to evaluate the model. This function firstly maps the values of differnet sentiments to integers . It finds accuracy of model, accuracy for each sentiment, classification report and confusion matrix.

In [13]:
def evaluate(y_true, y_pred):
    labels = ['positive', 'neutral', 'negative']
    mapping = {'positive': 2, 'neutral': 1, 'none':1, 'negative': 0}
    def map_func(x):
        return mapping.get(x, 1)

    y_true = np.vectorize(map_func)(y_true)
    y_pred = np.vectorize(map_func)(y_pred)

    # Calculate accuracy
    accuracy = accuracy_score(y_true=y_true, y_pred=y_pred)
    print(f'Accuracy: {accuracy:.3f}')

    # Generate accuracy report
    unique_labels = set(y_true)  # Get unique labels

    for label in unique_labels:
        label_indices = [i for i in range(len(y_true))
                         if y_true[i] == label]
        label_y_true = [y_true[i] for i in label_indices]
        label_y_pred = [y_pred[i] for i in label_indices]
        accuracy = accuracy_score(label_y_true, label_y_pred)
        print(f'Accuracy for label {label}: {accuracy:.3f}')

    # Generate classification report
    class_report = classification_report(y_true=y_true, y_pred=y_pred)
    print('\nClassification Report:')
    print(class_report)

    # Generate confusion matrix
    conf_matrix = confusion_matrix(y_true=y_true, y_pred=y_pred, labels=[0, 1, 2])
    print('\nConfusion Matrix:')
    print(conf_matrix)

### This is for using pretrained language model from hugging face .

In [14]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Token: 
Add token as git credential? (Y/n) n
Token is valid (permission: write).
Your token has been saved to /root/.c

 ### Use the model name from hugging face from meta. Then instantiate a bit and byte config to be used when we instantiate pretrained model from AutoModelForCausalLLM  from hugging face.

 1. 7b-hf - It means 7 billion parameters and no RLHF.
 2. First the code loads the Llama-2 language model from the Hugging Face Hub.
 3. Then the code gets the float16 data type from the torch library. This is the data type that will be used for the computations.
 4. Next, it creates a BitsAndBytesConfig object with the following settings:

*   load_in_4bit: Load the model weights in 4-bit format.
*   bnb_4bit_quant_type: Use the "nf4" quantization type. 4-bit NormalFloat (NF4), is a new data type that is information theoretically optimal for normally distributed weights.
*   bnb_4bit_compute_dtype: Use the float16 data type for computations.
*   bnb_4bit_use_double_quant: Do not use double quantization (reduces the average memory footprint by quantizing also the quantization constants and saves an additional 0.4 bits per parameter.).

Then the code creates a AutoModelForCausalLM object from the pre-trained Llama-2 language model, using the BitsAndBytesConfig object for quantization.

In [15]:
model_name = "meta-llama/Llama-2-7b-hf"

compute_dtype = getattr(torch, "float16")

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False,
    use_auth_token=True
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    quantization_config=bnb_config,
    use_auth_token=True
)

(…)a/Llama-2-7b-hf/resolve/main/config.json:   0%|          | 0.00/609 [00:00<?, ?B/s]

(…)esolve/main/model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

(…)b-hf/resolve/main/generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

### Disable cache for model.

### Sets pre-training token prob to 1.

### Loads the tokenizer for llama.

### Set the padded token to be EOS toen and padding side to be right for llama


In [16]:
model.config.use_cache = False
model.config.pretraining_tp = 1

tokenizer = AutoTokenizer.from_pretrained(model_name,
                                          trust_remote_code=True,
                                         )
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

(…)7b-hf/resolve/main/tokenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

(…)lama-2-7b-hf/resolve/main/tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

(…)-hf/resolve/main/special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

### Next , there is predict function which used pretrained model on our tasks and predict the sentiment.
1. First take test data and for each datapoint retrieve the text column.
2. Then , use pipeline from hugging face to generate text from model using above prompt and gather that in result.
3. Extract the predicted sentiment label from text and append it to ypred

In [17]:
def predict(test, model, tokenizer):
    y_pred = []
    for i in tqdm(range(len(X_test))):
        prompt = X_test.iloc[i]["text"]
        pipe = pipeline(task="text-generation",
                        model=model,
                        tokenizer=tokenizer,
                        max_new_tokens = 1,
                        temperature = 0.0,
                       )
        result = pipe(prompt)
        answer = result[0]['generated_text'].split("=")[-1]
        if "positive" in answer:
            y_pred.append("positive")
        elif "negative" in answer:
            y_pred.append("negative")
        elif "neutral" in answer:
            y_pred.append("neutral")
        else:
            y_pred.append("none")
    return y_pred

In [18]:
y_pred = predict(test, model, tokenizer)

  0%|          | 0/900 [00:00<?, ?it/s]Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.
100%|██████████| 900/900 [03:50<00:00,  3.90it/s]


#### Call the evaluate function and we can see results are terrible because we have not done any fine-tuning

In [19]:
evaluate(y_true, y_pred)

Accuracy: 0.376
Accuracy for label 0: 0.023
Accuracy for label 1: 0.940
Accuracy for label 2: 0.163

Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.02      0.05       300
           1       0.34      0.94      0.50       300
           2       0.67      0.16      0.26       300

    accuracy                           0.38       900
   macro avg       0.63      0.38      0.27       900
weighted avg       0.63      0.38      0.27       900


Confusion Matrix:
[[  7 286   7]
 [  1 282  17]
 [  0 251  49]]


## Fine-Tuning
Next we initialize a simple fine-tuning Trainer for training LLM using PEFT(parameter efficient fine tuning) method which operates on reduced number of parameters as compared to overall model size.
It refines less paramaters of given model while keeping majority fixed.
This reduces computational and storage expenses.

1. We are using LoRA(Low Rank Adaptation) method for PEFT which represent the weight updates with two smaller matrices (called update matrices) through low-rank decomposition. To produce the final results, both the original and the adapted weights are combined.
*   lora_alpha: learning rate for lora matrices weights
*   lora dropout: dropout for lora weights
*   r: Rank of matrices
*   bias: If bias also needs to be learned for matrices.

2. Initialize Training arguments.

3. Initialize a SFTtrainer using above training arguments,PEFT config, Language model and tokenizer.




In [20]:
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)

training_arguments = TrainingArguments(
    output_dir="logs",
    num_train_epochs=3,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8, # 4
    optim="paged_adamw_32bit",
    save_steps=0,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=True,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="cosine",
    report_to="tensorboard",
    evaluation_strategy="epoch"
)

trainer = SFTTrainer(
    model=model,
    train_dataset=train_data,
    eval_dataset=eval_data,
    peft_config=peft_config,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
    max_seq_length=1024,
)

Map:   0%|          | 0/900 [00:00<?, ? examples/s]

Map:   0%|          | 0/150 [00:00<?, ? examples/s]

### Now, train the model and save it.

In [21]:
# Train model
trainer.train()

# Save trained model
trainer.model.save_pretrained("trained-model")

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Epoch,Training Loss,Validation Loss
0,0.8304,0.734711
2,0.7308,0.711401
2,0.7003,0.708836


### Next,we use this finetuned model to predict the sentiment again and evaluate the results.

### We can clearly see , performance has been greatly improved with net accuracy 0.82

In [22]:
y_pred = predict(test, model, tokenizer)
evaluate(y_true, y_pred)

100%|██████████| 900/900 [05:32<00:00,  2.71it/s]

Accuracy: 0.828
Accuracy for label 0: 0.930
Accuracy for label 1: 0.813
Accuracy for label 2: 0.740

Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.93      0.93       300
           1       0.72      0.81      0.77       300
           2       0.84      0.74      0.79       300

    accuracy                           0.83       900
   macro avg       0.83      0.83      0.83       900
weighted avg       0.83      0.83      0.83       900


Confusion Matrix:
[[279  20   1]
 [ 16 244  40]
 [  5  73 222]]





### Next, create a dataframe with text and predicted and actual label to analyze the results

In [23]:
evaluation = pd.DataFrame({'text': X_test["text"],
                           'y_true':y_true,
                           'y_pred': y_pred},
                         )
evaluation.to_csv("test_predictions.csv", index=False)

In [24]:
evaluation

Unnamed: 0,text,y_true,y_pred
567,Analyze the sentiment of the news headline enc...,positive,positive
1752,Analyze the sentiment of the news headline enc...,positive,positive
995,Analyze the sentiment of the news headline enc...,positive,neutral
601,Analyze the sentiment of the news headline enc...,positive,positive
568,Analyze the sentiment of the news headline enc...,positive,neutral
...,...,...,...
4219,Analyze the sentiment of the news headline enc...,negative,negative
4814,Analyze the sentiment of the news headline enc...,negative,negative
4059,Analyze the sentiment of the news headline enc...,negative,negative
4720,Analyze the sentiment of the news headline enc...,negative,negative
