In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        (os.path.join(dirname, filename))

print("Ready to start......")
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
!pip install -U transformers accelerate bitsandbytes peft datasets evaluate


Collecting transformers
  Downloading transformers-4.48.3-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
Collecting accelerate
  Downloading accelerate-1.3.0-py3-none-any.whl.metadata (19 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.2-py3-none-manylinux_2_24_x86_64.whl.metadata (5.8 kB)
Collecting datasets
  Downloading datasets-3.3.0-py3-none-any.whl.metadata (19 kB)
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Downloading transformers-4.48.3-py3-none-any.whl (9.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.7/9.7 MB[0m [31m88.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading accelerate-1.3.0-py3-none-any.whl (336 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m336.6/336.6 kB[0m [31m22.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading bitsandbytes-0.45.2-py3-none-many

**Import Necessary Libraries**

In [3]:
import pandas as pd
import numpy as np
import torch
import evaluate

from datasets import Dataset, DatasetDict
from transformers import (AutoTokenizer, AutoModelForSequenceClassification, 
                          TrainingArguments, Trainer, TextClassificationPipeline, 
                          BitsAndBytesConfig, DataCollatorWithPadding)
from peft import LoraConfig, get_peft_model, TaskType

In [4]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    # Use float16 to match GPU compute capabilities on T4s (adjust if needed)
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",             # nf4 quantization for better precision
    bnb_4bit_use_double_quant=True         # enables nested double quantization
)

**Load the Dataset**

In [None]:
# Load CSV files
train_df = pd.read_csv("/kaggle/input/multi-lingual-sentiment-analysis/train.csv") 
test_df = pd.read_csv("/kaggle/input/multi-lingual-sentiment-analysis/test.csv")    

# Map sentiment labels to integers
label_map = {"Negative": 0, "Positive": 1}
train_df["label"] = train_df["label"].map(label_map)

# Convert to Hugging Face Dataset
train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)

# Create a DatasetDict for ease of use
data_dict = DatasetDict({
    "train": train_dataset,
    "test": test_dataset
})

In [None]:
model_id = "/kaggle/input/llama-3.1/transformers/8b-instruct/2"  
tokenizer = AutoTokenizer.from_pretrained(model_id, model_max_length=1024)
tokenizer.pad_token = tokenizer.eos_token  # Set the pad token

def tokenize_function(example):
    # Tokenize each sentence with truncation and pad to a fixed length of 512
    return tokenizer(example["sentence"], truncation=True, padding="max_length", max_length=512)

# Apply the tokenization; remove columns not needed by the model (sentence, language, ID)
tokenized_datasets = data_dict.map(tokenize_function, batched=True, 
                                   remove_columns=["sentence", "language", "ID"])

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

In [7]:
model = AutoModelForSequenceClassification.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",        # Distribute over available GPUs (e.g., T4x2 on Kaggle)
    num_labels=2,             # For binary sentiment classification (Negative / Positive)
    pad_token_id=tokenizer.eos_token_id
)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at /kaggle/input/llama-3.1/transformers/8b-instruct/2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Freeze all parameters except the classification head (score.weight)
for name, param in model.named_parameters():
    if name != "score.weight":
        param.requires_grad = False
        # print(name, param.requires_grad)

In [9]:
lora_config = LoraConfig(
    r=16,
    target_modules=["q_proj", "v_proj"],  # Target the attention projection layers
    task_type=TaskType.SEQ_CLS,  # Use SEQ_CLS (not SEQUENCE_CLASSIFICATION)
    lora_alpha=32,
    lora_dropout=0.05
)

# Wrap the model with LoRA adapters
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

trainable params: 6,823,936 || all params: 7,511,756,800 || trainable%: 0.0908


In [10]:
for param in model.parameters():
    if param.requires_grad:
        param.data = param.data.float()

**Define the Model and Training Configuration**

In [11]:
training_args = TrainingArguments(
    output_dir="./llama_finetuned",
    evaluation_strategy="steps",
    eval_steps=100,
    save_steps=500,
    save_total_limit=2,
    learning_rate=2e-5,
    num_train_epochs=5,
    per_device_train_batch_size=4,    # Adjust for available GPU memory
    per_device_eval_batch_size=4,
    gradient_accumulation_steps=2,
    fp16=True,                        # Enable AMP mixed precision training
    logging_dir="./logs",
    logging_steps=50,
    report_to="none"
)

metric = evaluate.load("accuracy")
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

data_collator = DataCollatorWithPadding(tokenizer, padding=True)

split_datasets = tokenized_datasets["train"].train_test_split(test_size=0.1, seed=42)



Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

In [12]:


trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=split_datasets["train"],
    eval_dataset=split_datasets["test"],
    compute_metrics=compute_metrics,
    data_collator=data_collator,
    tokenizer=tokenizer
)

  trainer = Trainer(


In [13]:
trainer.train()

Step,Training Loss,Validation Loss,Accuracy
100,0.8208,0.722283,0.56
200,0.427,0.377766,0.82
300,0.1682,0.259225,0.91
400,0.1255,0.246573,0.93
500,0.068,0.262908,0.93


TrainOutput(global_step=560, training_loss=0.34642221725412775, metrics={'train_runtime': 4115.2702, 'train_samples_per_second': 1.093, 'train_steps_per_second': 0.136, 'total_flos': 9.58076309911634e+16, 'train_loss': 0.34642221725412775, 'epoch': 4.96})

In [14]:
model.config.id2label = {0: "Negative", 1: "Positive"}

classifier = TextClassificationPipeline(
    model=model,
    tokenizer=tokenizer,
    framework="pt",
    task="sentiment-analysis"
)

predictions = []
for text in test_df["sentence"]:
    pred = classifier(text)[0]["label"]
    predictions.append(pred)

submission_df = pd.DataFrame({
    "ID": test_df["ID"],
    "label": predictions
})

submission_df.to_csv("submission.csv", index=False)
print("Submission file created.")

Device set to use cuda:0
The model 'PeftModelForSequenceClassification' is not supported for sentiment-analysis. Supported models are ['AlbertForSequenceClassification', 'BartForSequenceClassification', 'BertForSequenceClassification', 'BigBirdForSequenceClassification', 'BigBirdPegasusForSequenceClassification', 'BioGptForSequenceClassification', 'BloomForSequenceClassification', 'CamembertForSequenceClassification', 'CanineForSequenceClassification', 'LlamaForSequenceClassification', 'ConvBertForSequenceClassification', 'CTRLForSequenceClassification', 'Data2VecTextForSequenceClassification', 'DebertaForSequenceClassification', 'DebertaV2ForSequenceClassification', 'DiffLlamaForSequenceClassification', 'DistilBertForSequenceClassification', 'ElectraForSequenceClassification', 'ErnieForSequenceClassification', 'ErnieMForSequenceClassification', 'EsmForSequenceClassification', 'FalconForSequenceClassification', 'FlaubertForSequenceClassification', 'FNetForSequenceClassification', 'Funn

Submission file created.
