In [1]:
# Loading Libraries
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
!pip install bitsandbytes trl peft
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import Dataset
from peft import LoraConfig
from trl import SFTTrainer, SFTConfig
from transformers import (AutoModelForCausalLM,
                          AutoTokenizer,
                          BitsAndBytesConfig,
                          TrainingArguments,
                          pipeline,
                          logging,DefaultDataCollator)
from sklearn.metrics import f1_score, classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

Collecting bitsandbytes
  Downloading bitsandbytes-0.45.2-py3-none-manylinux_2_24_x86_64.whl.metadata (5.8 kB)
Collecting trl
  Downloading trl-0.15.0-py3-none-any.whl.metadata (11 kB)
Downloading bitsandbytes-0.45.2-py3-none-manylinux_2_24_x86_64.whl (69.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m69.7/69.7 MB[0m [31m26.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading trl-0.15.0-py3-none-any.whl (318 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m318.3/318.3 kB[0m [31m26.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: trl, bitsandbytes
Successfully installed bitsandbytes-0.45.2 trl-0.15.0


In [2]:
logging.set_verbosity_error()

# Loading and processing the dataset

In [3]:
# Load the dataset
train_df = pd.read_csv('/kaggle/input/multi-lingual-sentiment-analysis/train.csv')
test_df = pd.read_csv('/kaggle/input/multi-lingual-sentiment-analysis/test.csv')

# Check for class distribution
print(train_df['label'].value_counts())

label
Positive    507
Negative    493
Name: count, dtype: int64


In [4]:
# Shuffle and reset index
train_df = train_df.sample(frac=1, random_state=42).reset_index(drop=True)

# Split the DataFrame into Train (80%), Validation (10%), Test (10%)
train_data, temp_data = train_test_split(train_df, test_size=0.2, random_state=42)
val_data, test_data = train_test_split(temp_data, test_size=0.5, random_state=42)

# Reset indices
train_data = train_data.reset_index(drop=True)
val_data = val_data.reset_index(drop=True)
test_data = test_data.reset_index(drop=True)

In [5]:
# Define the prompt generation functions
def generate_prompt(data_point):
    return f"""
            Classify the sentiment of the text into Positive or Negative.
text: {data_point["sentence"]}
label: {data_point["label"]}""".strip()

def generate_test_prompt(data_point):
    return f"""
            Classify the sentiment of the text into Positive or Negative.
text: {data_point["sentence"]}
label: """.strip()

In [6]:
# Generate prompts for training, validation, and testing
train_data.loc[:, 'text'] = train_data.apply(generate_prompt, axis=1)
val_data.loc[:, 'text'] = val_data.apply(generate_prompt, axis=1)
test_data.loc[:, 'text'] = test_data.apply(generate_prompt, axis=1)

In [7]:
# Extract true labels for the test set
y_true = test_data['label']

# Convert to Hugging Face datasets
train_dataset = Dataset.from_pandas(train_data[["text"]])
val_dataset = Dataset.from_pandas(val_data[["text"]])
test_dataset = Dataset.from_pandas(test_data[["text"]])

# Loading the model and tokenizer

In [8]:
base_model_name = "/kaggle/input/llama-3.1/transformers/8b-instruct/1"

# Configure 4-bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

# Load the model
model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    quantization_config=bnb_config,
    trust_remote_code=True
)

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
tokenizer.pad_token_id = tokenizer.eos_token_id

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

# Model Evaluation Before Fine-Tuning

In [9]:
def predict(test, model, tokenizer):
    y_pred = []
    categories = ["Positive", "Negative"]

    for i in tqdm(range(len(test))):
        prompt = test.iloc[i]["text"]
        pipe = pipeline(task="text-generation",
                        model=model,
                        tokenizer=tokenizer,
                        max_new_tokens=2,
                        temperature=0.1)

        result = pipe(prompt)
        answer = result[0]['generated_text'].split("label:")[-1].strip()

        # Determine the predicted category
        for category in categories:
            if category.lower() in answer.lower():
                y_pred.append(category)
                break
        else:
            y_pred.append("none")

    return y_pred

y_pred = predict(test_data, model, tokenizer)

100%|██████████| 100/100 [00:37<00:00,  2.64it/s]


In [10]:
def evaluate(y_true, y_pred):
    labels = ["Positive", "Negative"]
    mapping = {label: idx for idx, label in enumerate(labels)}

    y_true_mapped = np.vectorize(lambda x: mapping[x])(y_true)
    y_pred_mapped = np.vectorize(lambda x: mapping.get(x, -1))(y_pred)

    # Calculate F1 Score
    f1 = f1_score(y_true=y_true_mapped, y_pred=y_pred_mapped, average='weighted')
    print(f'F1 Score: {f1:.3f}')

    # Generate classification report
    class_report = classification_report(y_true=y_true_mapped, y_pred=y_pred_mapped, target_names=labels)
    print('\nClassification Report:')
    print(class_report)

    # Generate confusion matrix
    conf_matrix = confusion_matrix(y_true=y_true_mapped, y_pred=y_pred_mapped)
    print('\nConfusion Matrix:')
    print(conf_matrix)

evaluate(y_true, y_pred)

F1 Score: 1.000

Classification Report:
              precision    recall  f1-score   support

    Positive       1.00      1.00      1.00        53
    Negative       1.00      1.00      1.00        47

    accuracy                           1.00       100
   macro avg       1.00      1.00      1.00       100
weighted avg       1.00      1.00      1.00       100


Confusion Matrix:
[[53  0]
 [ 0 47]]


# Setting up the model

In [11]:
# Define LoRA configuration
peft_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM"
)

# Corrected Training Configuration
training_config = TrainingArguments(  # Changed to TrainingArguments
    output_dir="llama-3.1-fine-tuned-model",
    num_train_epochs=10,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    gradient_checkpointing=True,
    optim="paged_adamw_32bit",
    logging_steps=5,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=True,
    max_grad_norm=0.4,
    warmup_ratio=0.01,
    lr_scheduler_type="cosine",
    evaluation_strategy="no",
    save_strategy="no",
    save_total_limit=1,
    report_to="none"
)

# Initialize the trainer
trainer = SFTTrainer(
    model=model,
    args=training_config,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    peft_config=peft_config,
    tokenizer=tokenizer
)

  trainer = SFTTrainer(


Applying chat template to train dataset:   0%|          | 0/800 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/800 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/800 [00:00<?, ? examples/s]

Applying chat template to eval dataset:   0%|          | 0/100 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/100 [00:00<?, ? examples/s]

Tokenizing eval dataset:   0%|          | 0/100 [00:00<?, ? examples/s]

# Training the Model

In [12]:
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()

trainer.train()

{'loss': 8.0873, 'grad_norm': 2.973546028137207, 'learning_rate': 3e-05, 'mean_token_accuracy': 0.5821081388634585, 'epoch': 0.025}
{'loss': 5.9495, 'grad_norm': 2.793299674987793, 'learning_rate': 8e-05, 'mean_token_accuracy': 0.6437049977969507, 'epoch': 0.05}
{'loss': 5.7384, 'grad_norm': 4.504316806793213, 'learning_rate': 0.00012, 'mean_token_accuracy': 0.6478084581787782, 'epoch': 0.075}
{'loss': 6.5355, 'grad_norm': 7.0431365966796875, 'learning_rate': 0.00017, 'mean_token_accuracy': 0.6277959145038488, 'epoch': 0.1}
{'loss': 5.7468, 'grad_norm': 6.781351089477539, 'learning_rate': 0.00019999949650055513, 'mean_token_accuracy': 0.6614156543552532, 'epoch': 0.125}
{'loss': 4.724, 'grad_norm': 5.11175012588501, 'learning_rate': 0.00019999383219002835, 'mean_token_accuracy': 0.6960263160748787, 'epoch': 0.15}
{'loss': 5.4183, 'grad_norm': 4.4121413230896, 'learning_rate': 0.0001999818745523526, 'mean_token_accuracy': 0.6888526386200565, 'epoch': 0.175}
{'loss': 5.3752, 'grad_norm':

TrainOutput(global_step=2000, training_loss=2.8345662355422974, metrics={'train_runtime': 10043.6163, 'train_samples_per_second': 0.797, 'train_steps_per_second': 0.199, 'total_flos': 6.956485118681088e+16, 'train_loss': 2.8345662355422974})

In [13]:
trainer.save_model("/kaggle/working/model")

In [14]:
# Enable caching for faster inference
model.config.use_cache = True

# Model Evaluation After Fine-Tuning

In [15]:
# Predict on the test set
y_pred = predict(test_data, model, tokenizer)

# Evaluate the model performance
evaluate(y_true, y_pred)

100%|██████████| 100/100 [01:21<00:00,  1.23it/s]

F1 Score: 1.000

Classification Report:
              precision    recall  f1-score   support

    Positive       1.00      1.00      1.00        53
    Negative       1.00      1.00      1.00        47

    accuracy                           1.00       100
   macro avg       1.00      1.00      1.00       100
weighted avg       1.00      1.00      1.00       100


Confusion Matrix:
[[53  0]
 [ 0 47]]





# Generate Submission File

In [16]:
# Load the test dataset from the provided source
test_df = pd.read_csv('/kaggle/input/multi-lingual-sentiment-analysis/test.csv')

# Check the first few rows to understand the structure
print(test_df.head())

# Ensure the test DataFrame has the necessary columns
if 'sentence' not in test_df.columns:
    raise ValueError("Test DataFrame must contain a 'sentence' column.")

# Generate prompts for the unseen test data
test_df.loc[:, 'text'] = test_df.apply(generate_test_prompt, axis=1)

# Convert to Hugging Face dataset format
unseen_test_dataset = Dataset.from_pandas(test_df[["text"]])

"""## Predicting on Unseen Test Data"""

# Using the predict function defined earlier
y_pred_unseen = predict(test_df, model, tokenizer)

"""## Creating Submission File"""

# Ensure the test DataFrame has an identifier column
if 'id' not in test_df.columns:
    # If no 'id' column, use index as identifier
    test_df['id'] = test_df.index

test_df['id'] = range(1, len(test_df) + 1)

# Prepare the submission DataFrame
submission_df = pd.DataFrame({
    'ID': test_df['id'],
    'label': y_pred_unseen
})

# Save the submission file in the required format
submission_df.to_csv('submission.csv', index=False)

print("Submission file saved as 'submission.csv'")

   ID                                           sentence language
0   1                    1120 mAh, ਓਵਰਚਾਰਜਿੰਗ ਦੀ ਸੁਰੱਖਿਆ       pa
1   2  તે સઘન મોઇશ્ચરાઇઝિંગ પ્રદાન કરે છે અને સરસ સ્વ...       gu
2   3                      1120 ಎಂಎಎಚ್, ಮಿತಿಮೀರಿದ ರಕ್ಷಣೆ       kn
3   4  ভাৰতত নিৰ্মিত সৰ্বশ্ৰেষ্ঠ পাৰফিউম ব্ৰেণ্ডবোৰৰ ...       as
4   5  میں نے حال ہی میں "انفولڈ" سے ایک ٹیمپلیٹ خرید...       ur


100%|██████████| 100/100 [01:19<00:00,  1.25it/s]

Submission file saved as 'submission.csv'



