In [1]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import load_dataset, Dataset
from peft import LoraConfig, PeftConfig
from trl import SFTTrainer
from trl import setup_chat_format
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments, pipeline
from sklearn.metrics import (accuracy_score, 
                             classification_report, 
                             confusion_matrix)
from sklearn.model_selection import train_test_split

In [2]:
import pandas as pd

file_path = 'C:/Users/20301162/Downloads/Sentiment/balanced_dataset_v1.csv'
df = pd.read_csv(file_path)
print(df.head())


   Unnamed: 0                                  conversation_text sentiment
0        87.0  আমি একজন companyx গ্রাহক আমি অনেক দিন যাবত com...  negative
1        46.0  নিজ আইডি কার্ড দিয়ে ভেরিফাইড কর সিম কার্ডে com...   neutral
2         NaN  আমি যখন প্রয়োজন তখন কোম্পানীএক্স থেকে সহজেই ল...  positive
3        57.0  companyx এর লোন নেয় ব্যবস্থা জন্য আলহামদুলিল্ল...  positive
4        70.0  আমি companyxে লোন নিয়ে এক কিস্তি প্রধান করে বা...   neutral


In [3]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [4]:


# Shuffle the DataFrame 
df = df.sample(frac=1, random_state=85).reset_index(drop=True)

# Split the DataFrame
train_size = 0.8
eval_size = 0.1

# Calculate sizes
train_end = int(train_size * len(df))
eval_end = train_end + int(eval_size * len(df))

# Split the data
X_train = df[:train_end].copy()  
X_eval = df[train_end:eval_end].copy()  
X_test = df[eval_end:].copy()  

# Define the prompt generation functions
def generate_prompt(data_point):
    return f"""
            Classify the text into neutral, positive, negative and return the answer as the corresponding text classification label.
text: {data_point["conversation_text"]}
label: {data_point["sentiment"]}""".strip()

def generate_test_prompt(data_point):
    return f"""
            Classify the text into neutral, positive, negative and return the answer as the corresponding text classification label.
text: {data_point["conversation_text"]}
label: """.strip()

# Generate prompts for training and evaluation data
X_train['text'] = X_train.apply(generate_prompt, axis=1)
X_eval['text'] = X_eval.apply(generate_prompt, axis=1)

# Generate test prompts and extract true labels
y_true = X_test['sentiment']
X_test = pd.DataFrame(X_test.apply(generate_test_prompt, axis=1), columns=["text"])


In [5]:
X_train.sentiment.value_counts()

sentiment
neutral     39
negative    35
positive    34
Name: count, dtype: int64

In [6]:
# Convert to datasets
train_data = Dataset.from_pandas(X_train[["text"]])
eval_data = Dataset.from_pandas(X_eval[["text"]])

In [7]:
base_model_name = "meta-llama/Meta-Llama-3.1-8B"


bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
)

model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    torch_dtype="float16",
    quantization_config=bnb_config, 
)

model.config.use_cache = False
model.config.pretraining_tp = 1

tokenizer = AutoTokenizer.from_pretrained(base_model_name)

tokenizer.pad_token_id = tokenizer.eos_token_id


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [8]:
def predict(test, model, tokenizer):
    y_pred = []
    categories = ["negative", "neutral", "positive"]
    
    for i in tqdm(range(len(test))):
        prompt = test.iloc[i]["text"]
        pipe = pipeline(task="text-generation", 
                        model=model, 
                        tokenizer=tokenizer, 
                        max_new_tokens=2, 
                        temperature=0.1)
        
        result = pipe(prompt)
        answer = result[0]['generated_text'].split("label:")[-1].strip()
        
        # Determine the predicted category
        for category in categories:
            if category.lower() in answer.lower():
                y_pred.append(category)
                break
        else:
            y_pred.append("none")
    
    return y_pred

y_pred = predict(X_test, model, tokenizer)

  attn_output = torch.nn.functional.scaled_dot_product_attention(
100%|██████████████████████████████████████████████████████████████████████████████████| 14/14 [00:01<00:00, 11.08it/s]


In [9]:
def evaluate(y_true, y_pred):
    labels =  ["negative", "neutral", "positive"]
    mapping = {label: idx for idx, label in enumerate(labels)}
    
    def map_func(x):
        return mapping.get(x, -1)  # Map to -1 if not found, but should not occur with correct data
    
    y_true_mapped = np.vectorize(map_func)(y_true)
    y_pred_mapped = np.vectorize(map_func)(y_pred)
    
    # Calculate accuracy
    accuracy = accuracy_score(y_true=y_true_mapped, y_pred=y_pred_mapped)
    print(f'Accuracy: {accuracy:.3f}')
    
    # Generate accuracy report
    unique_labels = set(y_true_mapped)  # Get unique labels
    
    for label in unique_labels:
        label_indices = [i for i in range(len(y_true_mapped)) if y_true_mapped[i] == label]
        label_y_true = [y_true_mapped[i] for i in label_indices]
        label_y_pred = [y_pred_mapped[i] for i in label_indices]
        label_accuracy = accuracy_score(label_y_true, label_y_pred)
        print(f'Accuracy for label {labels[label]}: {label_accuracy:.3f}')
        
    # Generate classification report
    class_report = classification_report(y_true=y_true_mapped, y_pred=y_pred_mapped, target_names=labels, labels=list(range(len(labels))))
    print('\nClassification Report:')
    print(class_report)
    
    # Generate confusion matrix
    conf_matrix = confusion_matrix(y_true=y_true_mapped, y_pred=y_pred_mapped, labels=list(range(len(labels))))
    print('\nConfusion Matrix:')
    print(conf_matrix)

evaluate(y_true, y_pred)

Accuracy: 0.071
Accuracy for label negative: 0.000
Accuracy for label neutral: 0.333
Accuracy for label positive: 0.000

Classification Report:
              precision    recall  f1-score   support

    negative       0.00      0.00      0.00         4
     neutral       0.20      0.33      0.25         3
    positive       0.00      0.00      0.00         7

   micro avg       0.20      0.07      0.11        14
   macro avg       0.07      0.11      0.08        14
weighted avg       0.04      0.07      0.05        14


Confusion Matrix:
[[0 2 0]
 [0 1 0]
 [0 2 0]]


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [10]:
import bitsandbytes as bnb

def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:  # needed for 16 bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)
modules = find_all_linear_names(model)
modules

['v_proj', 'q_proj', 'gate_proj', 'k_proj', 'up_proj', 'down_proj', 'o_proj']

In [11]:
output_dir="C:/Users/20301162/Downloads/Sentiment/llama-3.1-preprocess_aug_finetuned"

peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=modules,
)

training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=5,                     
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    gradient_checkpointing=True,
    optim="adamw_torch",
    logging_steps=10,                       
    learning_rate=1e-4,                     
    weight_decay=0.001,
    fp16=True,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=False,
    lr_scheduler_type="cosine",
    eval_strategy="epoch",                 
    eval_steps=None
)


trainer = SFTTrainer(
    model=model,
    args=training_arguments,
    train_dataset=train_data,
    eval_dataset=eval_data,
    peft_config=peft_config,
    dataset_text_field="text",
    tokenizer=tokenizer,
    max_seq_length=512,
    packing=False,
    dataset_kwargs={
    "add_special_tokens": False,
    "append_concat_token": False,
    }
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/108 [00:00<?, ? examples/s]

Map:   0%|          | 0/13 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


In [12]:
trainer.train()

  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Epoch,Training Loss,Validation Loss
0,1.6368,1.085742
2,0.7267,0.910657
4,0.6086,0.903244


TrainOutput(global_step=65, training_loss=0.8812093514662522, metrics={'train_runtime': 132.7243, 'train_samples_per_second': 4.069, 'train_steps_per_second': 0.49, 'total_flos': 5721054320418816.0, 'train_loss': 0.8812093514662522, 'epoch': 4.814814814814815})

In [13]:
y_pred = predict(X_test, model, tokenizer)
evaluate(y_true, y_pred)

100%|██████████████████████████████████████████████████████████████████████████████████| 14/14 [00:01<00:00,  8.73it/s]

Accuracy: 0.929
Accuracy for label negative: 1.000
Accuracy for label neutral: 1.000
Accuracy for label positive: 0.857

Classification Report:
              precision    recall  f1-score   support

    negative       1.00      1.00      1.00         4
     neutral       0.75      1.00      0.86         3
    positive       1.00      0.86      0.92         7

    accuracy                           0.93        14
   macro avg       0.92      0.95      0.93        14
weighted avg       0.95      0.93      0.93        14


Confusion Matrix:
[[4 0 0]
 [0 3 0]
 [0 1 6]]





In [14]:
# Save trained model and tokenizer
trainer.save_model(output_dir)
tokenizer.save_pretrained(output_dir)

('C:/Users/20301162/Downloads/Sentiment/llama-3.1-preprocess_aug_finetuned\\tokenizer_config.json',
 'C:/Users/20301162/Downloads/Sentiment/llama-3.1-preprocess_aug_finetuned\\special_tokens_map.json',
 'C:/Users/20301162/Downloads/Sentiment/llama-3.1-preprocess_aug_finetuned\\tokenizer.json')