!pip install -q -U -i https://pypi.org/simple/ bitsandbytes
!pip install -q -U transformers=="4.46.0"
!pip install -q -U accelerate
!pip install -q -U datasets
!pip install -q -U trl
!pip install -q -U peft
!pip install -q -U tensorboard
!pip install -q -U einops

In [39]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [40]:
import warnings
warnings.filterwarnings("ignore")

In [41]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import Dataset
from peft import LoraConfig, PeftConfig
from trl import SFTTrainer
from transformers import (AutoModelForCausalLM, 
                          AutoTokenizer, 
                          BitsAndBytesConfig, 
                          TrainingArguments, 
                          pipeline, 
                          logging)
from sklearn.metrics import (accuracy_score, 
                             classification_report, 
                             confusion_matrix)
from sklearn.model_selection import train_test_split

In [42]:
from transformers import BitsAndBytesConfig

In [43]:
from datasets import load_dataset
import pandas as pd

# Load dataset
ds = load_dataset("NebulaByte/E-Commerce_Customer_Support_Conversations")

# Convert to pandas DataFrame
df = pd.DataFrame(ds['train'])

KeyboardInterrupt: 

In [None]:
df.head()


Unnamed: 0,issue_area,issue_category,issue_sub_category,issue_category_sub_category,customer_sentiment,product_category,product_sub_category,issue_complexity,agent_experience_level,agent_experience_level_desc,conversation
0,Login and Account,Mobile Number and Email Verification,Verification requirement for mobile number or ...,Mobile Number and Email Verification -> Verifi...,neutral,Appliances,Oven Toaster Grills (OTG),medium,junior,"handles customer inquiries independently, poss...",Agent: Thank you for calling BrownBox Customer...
1,Cancellations and returns,Pickup and Shipping,Reasons for being asked to ship the item,Pickup and Shipping -> Reasons for being asked...,neutral,Electronics,Computer Monitor,less,junior,"handles customer inquiries independently, poss...",Agent: Thank you for calling BrownBox customer...
2,Cancellations and returns,Replacement and Return Process,Inability to click the 'Cancel' button,Replacement and Return Process -> Inability to...,neutral,Appliances,Juicer/Mixer/Grinder,medium,experienced,"confidently handles complex customer issues, e...",Agent: Thank you for calling BrownBox Customer...
3,Login and Account,Login Issues and Error Messages,Error message regarding exceeded attempts to e...,Login Issues and Error Messages -> Error messa...,neutral,Appliances,Water Purifier,less,inexperienced,"may struggle with ambiguous queries, rely on c...","Customer: Hi, I am facing an issue while loggi..."
4,Order,Order Delivery Issues,Delivery not attempted again,Order Delivery Issues -> Delivery not attempte...,negative,Electronics,Bp Monitor,medium,experienced,"confidently handles complex customer issues, e...",Agent: Thank you for contacting BrownBox custo...


In [7]:
df = df[["customer_sentiment", "conversation"]]

In [8]:
df.head()

Unnamed: 0,customer_sentiment,conversation
0,neutral,Agent: Thank you for calling BrownBox Customer...
1,neutral,Agent: Thank you for calling BrownBox customer...
2,neutral,Agent: Thank you for calling BrownBox Customer...
3,neutral,"Customer: Hi, I am facing an issue while loggi..."
4,negative,Agent: Thank you for contacting BrownBox custo...


In [9]:
print(df['customer_sentiment'].value_counts())


customer_sentiment
neutral       552
negative      316
frustrated    105
positive       27
Name: count, dtype: int64


In [10]:
print(df.isnull().sum())

customer_sentiment    0
conversation          0
dtype: int64


In [11]:
print(df.duplicated().sum())

1


In [12]:
df.customer_sentiment.unique()

array(['neutral', 'negative', 'frustrated', 'positive'], dtype=object)

In [37]:
df.rename(columns={'customer_sentiment': 'text'}, inplace=True)

In [None]:
X_train = list()
X_test = list()
for sentiment in ['neutral', 'negative', 'frustrated', 'positive']:
    train, test  = train_test_split(df[df.customer_sentiment==sentiment], 
                                    train_size=0.6,
                                    test_size=0.4,
                                    shuffle=True, 
                                    random_state=42)
    X_train.append(train)
    X_test.append(test)
# Combine splits into DataFrames

X_train = pd.concat(X_train).sample(frac=1, random_state=10)
X_test = pd.concat(X_test)
print("Training set size:", len(X_train))
print("Test set size:", len(X_test))


Training set size: 599
Test set size: 401


In [14]:
# Create evaluation set indices and sample 50 examples per sentiment

X_train.shape

(599, 2)

In [15]:
eval_idx = [idx for idx in df.index if idx not in list(train.index) + list(test.index)]
X_eval = df[df.index.isin(eval_idx)]
X_eval = (X_eval
          .groupby('customer_sentiment', group_keys=False)
          .apply(lambda x: x.sample(n=50, random_state=10, replace=True)))
X_train = X_train.reset_index(drop=True)


In [16]:
def generate_prompt(data_point):
    return f"""
    Based on the following conversation:

    Conversation:  "{data_point['text']}"

    Determine the overall sentiment of the conversation. Choose from the following options:
    - Neutral
    - Negative
    - Frustrated
    - Positive

    Solution: The correct sentiment is "{data_point['text']}"
    """.strip()

def generate_test_prompt(data_point):
    return f"""
    Based on the following conversation:

    Conversation:"{data_point['text']}"

    Determine the overall sentiment of the conversation. Choose from the following options:
    - Neutral
    - Negative
    - Frustrated
    - Positive

    Solution: The correct sentiment is 
    """.strip()
X_train = pd.DataFrame(X_train.apply(generate_prompt, axis=1), 
                       columns=["text"])
X_eval = pd.DataFrame(X_eval.apply(generate_prompt, axis=1), 
                      columns=["text"])

y_true = X_test.customer_sentiment
X_test = pd.DataFrame(X_test.apply(generate_test_prompt, axis=1), columns=["text"])

train_data = Dataset.from_pandas(X_train)
eval_data = Dataset.from_pandas(X_eval)

In [17]:
print(train_data[0])  # Prints the first entry in the training set
print(eval_data[0])   # Prints the first entry in the evaluation set

{'text': 'Based on the following conversation:\n\n    Conversation:  "Agent: Thank you for calling BrownBox Customer Support. My name is John. How can I assist you today?\n\nCustomer: Hi John, my name is Alex, and I have a question about the loyalty program.\n\nAgent: Hi Alex, I\'m here to help. Could you please provide me with your account number or email address so I can look into it?\n\nCustomer: Sure, my email address is alex@gmail.com.\n\nAgent: Thank you for providing that information, Alex. How can I assist you with the loyalty program?\n\nCustomer: I recently purchased a tablet from BrownBox, and I wanted to know how many loyalty points I can earn from it.\n\nAgent: I understand your concern, Alex. May I know the model of your tablet?\n\nCustomer: It\'s an iPad Pro.\n\nAgent: Thank you for providing that information, Alex. You can earn 10,000 loyalty points for purchasing an iPad Pro. These points can be redeemed for discounts or free products.\n\nCustomer: That\'s great to kno

In [18]:
# Ensure that 'conversation' column exists in X_train and X_eval before applying the function


# Apply the generate_test_prompt function to X_test, creating 'conversation' column
if 'conversation' not in X_test.columns:
    print("Column 'conversation' not found in X_test.")
else:
    X_test = pd.DataFrame(X_test, columns=["text"])

# Ensure you have the correct structure after applying the function
print(X_train.head())
print(X_eval.head())
print(X_test.head())

Column 'conversation' not found in X_test.
                                                text
0  Based on the following conversation:\n\n    Co...
1  Based on the following conversation:\n\n    Co...
2  Based on the following conversation:\n\n    Co...
3  Based on the following conversation:\n\n    Co...
4  Based on the following conversation:\n\n    Co...
                                                  text
146  Based on the following conversation:\n\n    Co...
937  Based on the following conversation:\n\n    Co...
196  Based on the following conversation:\n\n    Co...
693  Based on the following conversation:\n\n    Co...
298  Based on the following conversation:\n\n    Co...
                                                  text
989  Based on the following conversation:\n\n    Co...
141  Based on the following conversation:\n\n    Co...
243  Based on the following conversation:\n\n    Co...
139  Based on the following conversation:\n\n    Co...
493  Based on the following conver

In [19]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import numpy as np

def evaluate(y_true, y_pred):
    # Define label mapping
    mapping = {'positive': 2, 'neutral': 1, 'negative': 0, 'frustrated':3}
    
    # Map the string labels to numeric labels
    y_true = np.array([mapping.get(x, 1) for x in y_true])
    y_pred = np.array([mapping.get(x, 1) for x in y_pred])
    
    # Calculate overall accuracy
    accuracy = accuracy_score(y_true=y_true, y_pred=y_pred)
    print(f'Accuracy: {accuracy:.3f}')
    
    # Classification report (includes precision, recall, f1-score for each label)
    print('\nClassification Report:')
    print(classification_report(y_true=y_true, y_pred=y_pred, target_names=['negative', 'neutral', 'positive','frustrated']))
    
    # Confusion matrix
    conf_matrix = confusion_matrix(y_true=y_true, y_pred=y_pred, labels=[0, 1, 2])
    print('\nConfusion Matrix:')
    print(conf_matrix)


In [20]:
import torch
from transformers import AutoModelForCausalLM, BitsAndBytesConfig

model_name = "microsoft/Phi-3-mini-4k-instruct"
compute_dtype = getattr(torch, "float16")
bnb_config = BitsAndBytesConfig(llm_int8_enable_fp32_cpu_offload=True)

In [21]:
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"CUDA version: {torch.version.cuda}")

PyTorch version: 2.5.1+cu121
CUDA available: True
CUDA version: 12.1


In [22]:
device = torch.device("cuda") 

In [None]:
# Load model to GPU
import flash_attention
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    device_map="auto",
    quantization_config=bnb_config,
    attn_implementation="eager",  # Use eager fallback
).to(device)

`flash-attention` package not found, consider installing for better performance: No module named 'flash_attn'.
Current `flash-attention` does not support `window_size`. Either upgrade or use `attn_implementation='eager'`.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [24]:
from transformers import (AutoModelForCausalLM, 
                          AutoTokenizer)
max_seq_length = 512
tokenizer = AutoTokenizer.from_pretrained(model_name, 
                                          trust_remote_code=True,
                                          max_seq_length=max_seq_length,
                                         )
tokenizer.pad_token = tokenizer.eos_token

In [25]:
def predict(X_test, model, tokenizer):
    y_pred = []
    for i in tqdm(range(len(X_test))):
        prompt = X_test.iloc[i]["text"]
        pipe = pipeline(task="text-generation", 
                        model=model, 
                        tokenizer=tokenizer,
                        max_new_tokens = 3, 
                        temperature = 0.2,
                       )
        result = pipe(prompt, pad_token_id=pipe.tokenizer.eos_token_id)
        answer = result[0]['generated_text'].split("The correct option is")[-1].lower()
        if "Positive" in answer:
            y_pred.append("positive")
        elif "Negative" in answer:
            y_pred.append("negative")
        elif "Neutral" in answer:
            y_pred.append("neutral")
        elif "Frustrated" in answer:
            y_pred.append("frustrated")
        else:
            y_pred.append("none")
    return y_pred

In [26]:
y_pred = predict(X_test, model, tokenizer)

  0%|          | 0/401 [00:00<?, ?it/s]Device set to use cuda:0
The `seen_tokens` attribute is deprecated and will be removed in v4.41. Use the `cache_position` model input instead.
`get_max_cache()` is deprecated for all Cache classes. Use `get_max_cache_shape()` instead. Calling `get_max_cache()` will raise error from v4.48
You are not running the flash-attention implementation, expect numerical differences.
  0%|          | 1/401 [00:02<18:23,  2.76s/it]Device set to use cuda:0
  0%|          | 2/401 [00:04<13:06,  1.97s/it]Device set to use cuda:0
  1%|          | 3/401 [00:05<11:23,  1.72s/it]Device set to use cuda:0
  1%|          | 4/401 [00:06<10:12,  1.54s/it]Device set to use cuda:0
  1%|          | 5/401 [00:08<09:33,  1.45s/it]Device set to use cuda:0
  1%|▏         | 6/401 [00:10<10:51,  1.65s/it]Device set to use cuda:0
  2%|▏         | 7/401 [00:11<10:17,  1.57s/it]Device set to use cuda:0
  2%|▏         | 8/401 [00:13<10:18,  1.57s/it]Device set to use cuda:0
  2%|▏    

In [27]:
def evaluate(y_true, y_pred):
    labels = ['positive', 'neutral', 'negative','frustrated']
    mapping = {'positive': 2, 'neutral': 1, 'none':1, 'negative': 0,'frustrated':3}
    def map_func(x):
        return mapping.get(x, 1)
    
    y_true = np.vectorize(map_func)(y_true)
    y_pred = np.vectorize(map_func)(y_pred)
    
    # Calculate accuracy
    accuracy = accuracy_score(y_true=y_true, y_pred=y_pred)
    print(f'Accuracy: {accuracy:.3f}')
    
    # Generate accuracy report
    unique_labels = set(y_true)  # Get unique labels
    
    for label in unique_labels:
        label_indices = [i for i in range(len(y_true)) 
                         if y_true[i] == label]
        label_y_true = [y_true[i] for i in label_indices]
        label_y_pred = [y_pred[i] for i in label_indices]
        accuracy = accuracy_score(label_y_true, label_y_pred)
        print(f'Accuracy for label {label}: {accuracy:.3f}')
        
    # Generate classification report
    class_report = classification_report(y_true=y_true, y_pred=y_pred)
    print('\nClassification Report:')
    print(class_report)
    
    # Generate confusion matrix
    conf_matrix = confusion_matrix(y_true=y_true, y_pred=y_pred, labels=[0, 1, 2, 3])
    print('\nConfusion Matrix:')
    print(conf_matrix)

In [28]:
X_train['customer_sentiment'] = df.loc[X_train.index, 'customer_sentiment']
X_eval['customer_sentiment'] = df.loc[X_eval.index, 'customer_sentiment']
X_test['customer_sentiment'] = df.loc[X_test.index, 'customer_sentiment']

In [29]:
# Apply the prompt generation function to X_train and X_eval
X_train = pd.DataFrame(X_train.apply(generate_prompt, axis=1), columns=["customer_sentiment"])
X_eval = pd.DataFrame(X_eval.apply(generate_prompt, axis=1), columns=["customer_sentiment"])

# For test prompts, generate without the solution
y_true = X_test['customer_sentiment']
X_test = pd.DataFrame(X_test.apply(generate_test_prompt, axis=1), columns=["customer_sentiment"])

In [30]:
evaluate(y_true, y_pred)

Accuracy: 0.551
Accuracy for label 0: 0.000
Accuracy for label 1: 1.000
Accuracy for label 2: 0.000
Accuracy for label 3: 0.000

Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00       127
           1       0.55      1.00      0.71       221
           2       0.00      0.00      0.00        11
           3       0.00      0.00      0.00        42

    accuracy                           0.55       401
   macro avg       0.14      0.25      0.18       401
weighted avg       0.30      0.55      0.39       401


Confusion Matrix:
[[  0 127   0   0]
 [  0 221   0   0]
 [  0  11   0   0]
 [  0  42   0   0]]


In [31]:
import re

def get_num_layers(model):
    numbers = set()
    for name, _ in model.named_parameters():
        for number in re.findall(r'\d+', name):
            numbers.add(int(number))
    return max(numbers)

def get_last_layer_linears(model):
    names = []
    
    num_layers = get_num_layers(model)
    for name, module in model.named_modules():
        if str(num_layers) in name and not "encoder" in name:
            if isinstance(module, torch.nn.Linear):
                names.append(name)
    return names

In [32]:
peft_config = LoraConfig(
    r=16,
    lora_alpha=16,
    target_modules="all-linear",
    lora_dropout=0.00,
    bias="none",
    task_type="CAUSAL_LM",
)



In [33]:

train_data = Dataset.from_pandas(X_train)
eval_data = Dataset.from_pandas(X_eval)

In [34]:
print(model)

Phi3ForCausalLM(
  (model): Phi3Model(
    (embed_tokens): Embedding(32064, 3072, padding_idx=32000)
    (embed_dropout): Dropout(p=0.0, inplace=False)
    (layers): ModuleList(
      (0-31): 32 x Phi3DecoderLayer(
        (self_attn): Phi3Attention(
          (o_proj): Linear4bit(in_features=3072, out_features=3072, bias=False)
          (qkv_proj): Linear4bit(in_features=3072, out_features=9216, bias=False)
          (rotary_emb): Phi3RotaryEmbedding()
        )
        (mlp): Phi3MLP(
          (gate_up_proj): Linear4bit(in_features=3072, out_features=16384, bias=False)
          (down_proj): Linear4bit(in_features=8192, out_features=3072, bias=False)
          (activation_fn): SiLU()
        )
        (input_layernorm): Phi3RMSNorm()
        (resid_attn_dropout): Dropout(p=0.0, inplace=False)
        (resid_mlp_dropout): Dropout(p=0.0, inplace=False)
        (post_attention_layernorm): Phi3RMSNorm()
      )
    )
    (norm): Phi3RMSNorm()
  )
  (lm_head): Linear(in_features=3072, o

In [None]:
peft_config = LoraConfig(
    r=16,
    lora_alpha=16,
    target_modules=["o_proj", "lm_head"],  # Targeting output projection and language model head
    lora_dropout=0.00,
    bias="none",
    task_type="CAUSAL_LM",
)



training_arguments = TrainingArguments(
    output_dir="logs",
    num_train_epochs=4,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8, # 4
    optim="paged_adamw_32bit",
    save_steps=0,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=True,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="cosine",
    report_to="tensorboard",
    evaluation_strategy="epoch"
)

trainer = SFTTrainer(
    model=model,
    train_dataset=train_data,
    eval_dataset=eval_data,
    peft_config=peft_config,
    tokenizer=tokenizer,
    args=training_arguments,
)

In [None]:
# Train model
trainer.train()

# Save trained model


In [None]:
trainer.model.save_pretrained("trained-model")