In [None]:
!pip install transformers
!pip install torch
!pip install datasets
!pip install kagglehub

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
import re
import pandas as pd
import numpy as np
from datasets import Dataset
from sklearn.model_selection import train_test_split
import torch
from sklearn.metrics import accuracy_score, f1_score, classification_report
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    logging
)
import matplotlib.pyplot as plt
from tqdm import tqdm
import kagglehub

In [None]:
path = kagglehub.dataset_download("crowdflower/twitter-airline-sentiment")
print("Path to dataset files:", path)
df = pd.read_csv(f"{path}/Tweets.csv")
df.head() 

In [None]:
def clean_text(text):
    text = str(text).lower()
    text = re.sub(r"http\S+", "", text)
    text = re.sub(r"@\S+", "", text)
    #text = re.sub(r"[^a-zA-Z0-9\s]", "", text)
    return text

df['text'] = df['text'].fillna('')
df['clean_text'] = df['text'].apply(clean_text)

print(df[['text','clean_text']].head(10))

In [None]:
MODEL_NAME = "cardiffnlp/twitter-roberta-base-sentiment"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)

In [None]:
label_map = {'negative': 0, 'neutral': 1, 'positive': 2}
df['label'] = df['airline_sentiment'].map(label_map)

train_df, temp_df = train_test_split(
    df,
    test_size=0.3,
    random_state=42,
    stratify=df['airline_sentiment']
)
val_df, test_df = train_test_split(
    temp_df,
    test_size=0.5,
    random_state=42,
    stratify=temp_df['airline_sentiment']
)
train_dataset = Dataset.from_pandas(train_df[["clean_text", "label"]],preserve_index=False)
val_dataset   = Dataset.from_pandas(val_df[["clean_text", "label"]],preserve_index=False)


def tokenize_fn(example):
    return tokenizer(
        example['clean_text'],
        truncation=True,
        padding='max_length',
        max_length=128
    )
train_dataset = train_dataset.map(tokenize_fn, batched=True)
val_dataset   = val_dataset.map(tokenize_fn, batched=True)

train_dataset = train_dataset.remove_columns(['clean_text'])
val_dataset   = val_dataset.remove_columns(['clean_text'])

train_dataset = train_dataset.rename_column("label", "labels")
val_dataset   = val_dataset.rename_column("label", "labels")

train_dataset.set_format('torch')
val_dataset.set_format('torch')

In [None]:
print(len(df))
print("Train:", len(train_df))
print("Val:", len(val_df))
print("Test:", len(test_df))

In [None]:
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=3
)

In [None]:
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=16,
    num_train_epochs=3,
   
    learning_rate=5e-6,

    eval_strategy="steps",
    eval_steps=50,

    logging_strategy="steps",
    logging_steps=50,

    save_strategy="steps",
    save_steps=50,

    save_total_limit=1,  

    load_best_model_at_end=True,
    metric_for_best_model="f1",
    greater_is_better=True,

    weight_decay=0.01,
    report_to="none"
)

In [None]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    acc = accuracy_score(labels, predictions)
    f1 = f1_score(labels, predictions, average='weighted')
    return {"accuracy": acc, "f1": f1}

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics
)
trainer.train()
trainer.evaluate()

In [None]:
log_history = trainer.state.log_history
df_logs = pd.DataFrame(log_history)

df_logs.head()

In [None]:
train_loss = df_logs[df_logs["loss"].notna()]
eval_loss = df_logs[df_logs["eval_loss"].notna()]

In [None]:
log_history = trainer.state.log_history
df_logs = pd.DataFrame(log_history)

train_loss = df_logs[df_logs["loss"].notna()]
eval_loss = df_logs[df_logs["eval_loss"].notna()]
eval_acc  = df_logs[df_logs["eval_accuracy"].notna()]

plt.figure(figsize=(18,5))

# 1️⃣ Training Loss
plt.subplot(1,3,1)
plt.plot(train_loss["step"], train_loss["loss"],color="red", linewidth=2)
plt.title("Training Loss")
plt.xlabel("Step")
plt.ylabel("Loss")
plt.grid(True)

x_start = train_loss["step"].iloc[0]
y_start = train_loss["loss"].iloc[0]
x_end = train_loss["step"].iloc[-1]
y_end = train_loss["loss"].iloc[-1]
plt.annotate(
    "",
    xy=(x_end, y_end),         
    xytext=(x_start, y_start),  
    arrowprops=dict(
        arrowstyle="->",
         linestyle="--",
         color="gray",
         linewidth=1,
         mutation_scale=30))


plt.subplot(1,3,2)
plt.plot(eval_loss["step"], eval_loss["eval_loss"],color="orange", linewidth=2)
plt.title("Validation Loss")
plt.xlabel("Step")
plt.ylabel("Loss")
plt.grid(True)
x_start = eval_loss["step"].iloc[0]
y_start = eval_loss["eval_loss"].iloc[0]
x_end = eval_loss["step"].iloc[-1]
y_end = eval_loss["eval_loss"].iloc[-1]
plt.annotate(
    "",
    xy=(x_end, y_end),          
    xytext=(x_start, y_start),  
    arrowprops=dict(
        arrowstyle="->",
         linestyle="--",
         color="gray",
         linewidth=1,
         mutation_scale=30))

plt.subplot(1,3,3)
plt.plot(eval_acc["step"], eval_acc["eval_accuracy"],color="green", linewidth=2)
plt.title("Validation Accuracy")
plt.xlabel("Step")
plt.ylabel("Accuracy")
plt.grid(True)
x_start = eval_acc["step"].iloc[0]
y_start = eval_acc["eval_accuracy"].iloc[0]
x_end = eval_acc["step"].iloc[-1]
y_end = eval_acc["eval_accuracy"].iloc[-1]
plt.annotate(
    "",
    xy=(x_end, y_end),         
    xytext=(x_start, y_start), 
    arrowprops=dict(
        arrowstyle="->",
         linestyle="--",
         color="gray",
         linewidth=1,
         mutation_scale=30))

plt.tight_layout()
plt.show()



In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

In [None]:
labels = ['negative', 'neutral', 'positive']

true_labels = test_df['label'].tolist()

batch_size = 100
all_predictions = []

model.eval()

for i in tqdm(range(0, len(test_df), batch_size)):
    
    batch_texts = test_df['clean_text'][i:i+batch_size].tolist()
    
    tokens = tokenizer(
        batch_texts,
        padding=True,
        truncation=True,
        return_tensors='pt'
    )
    
    tokens = {k: v.to(device) for k, v in tokens.items()}
    
    with torch.no_grad():
        outputs = model(**tokens)
        preds = torch.nn.functional.softmax(outputs.logits, dim=-1)
        batch_predicted_classes = preds.argmax(dim=1).cpu().tolist()
        all_predictions.extend(batch_predicted_classes)

test_df['predicted_label'] = [labels[i] for i in all_predictions]

In [None]:
test_df['true_label'] = [labels[i] for i in test_df['label']]
y_true = test_df['true_label']
y_pred = test_df['predicted_label']

acc = accuracy_score(y_true, y_pred)
f1  = f1_score(y_true, y_pred, average='weighted')

print("Accuracy:", acc)
print("F1-score:", f1)

In [None]:
print(classification_report(y_true, y_pred))

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

cm = confusion_matrix(y_true, y_pred)

plt.figure(figsize=(6,5))
sns.heatmap(cm, annot=True, fmt="d",
            xticklabels=labels,
            yticklabels=labels)

plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()