# Importing File and Kappa Score


In [None]:
import pandas as pd
from sklearn.metrics import cohen_kappa_score

# Load the first sheet (index 0) or by name if it's called "Annotation"
dfw = pd.read_excel("/content/Final_Annotation_Updated.xlsx", sheet_name=0)

# Preview
print(dfw.head(2))

# Drop missing values in annotator columns
dfw = dfw.dropna(subset=["Annotator_1_Sentiment", "Annotator_11_Sentiment"])

# Calculate Cohen's Kappa
kappa = cohen_kappa_score(dfw["Annotator_1_Sentiment"], dfw["Annotator_11_Sentiment"])

print(f"Cohen's Kappa Score: {kappa:.4f}")

# First Run

In [None]:
!pip install evaluate
%pip install evaluate -qqq
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer
)
import evaluate
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report

# -----------------------
# 1. Data Preparation
# -----------------------
df = pd.read_excel("/content/Final_Annotation.xlsx")
df = df.dropna(subset=["Final"])  # Drop rows with missing sentiment labels
df["label"] = df["Final"].map({"Negative": 0, "Neutral": 1, "Positive": 2})
df = df[["Sentence", "label"]]

# Print class distribution for debugging purposes.
print("Full dataset label distribution:")
print(df["label"].value_counts())

train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
print("\nTraining set label distribution:")
print(train_df["label"].value_counts())
print("\nTest set label distribution:")
print(test_df["label"].value_counts())

train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)

# -----------------------
# 2. Tokenization
# -----------------------
model_name = "xlm-roberta-base"  # or your chosen model
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize_function(examples):
    return tokenizer(examples["Sentence"], truncation=True, padding="max_length", max_length=256)

train_dataset = train_dataset.map(tokenize_function, batched=True)
test_dataset = test_dataset.map(tokenize_function, batched=True)

train_dataset = train_dataset.rename_column("label", "labels")
test_dataset = test_dataset.rename_column("label", "labels")

train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
test_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

# -----------------------
# 3. Model & Metric Setup
# -----------------------
def model_init():
    return AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)

accuracy_metric = evaluate.load("accuracy")
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return accuracy_metric.compute(predictions=predictions, references=labels)

# -----------------------
# 4. Training Arguments (Modified)
# -----------------------
# We remove early stopping and best-model loading to force full training, and add overwrite_output_dir.
training_args = TrainingArguments(
    output_dir="./best_model_xlmroberta",
    overwrite_output_dir=True,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="steps",
    logging_steps=50,
    num_train_epochs=11,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    learning_rate=2e-5,
    warmup_steps=500,
    weight_decay=0.1,
    lr_scheduler_type="linear",
    label_smoothing_factor=0.1,
    # Removed load_best_model_at_end and early stopping for full epoch training.
    report_to="none"
)

# -----------------------
# 5. Trainer Setup and Full Training
# -----------------------
trainer = Trainer(
    model_init=model_init,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
)


trainer.train()

# -----------------------
# 6. Evaluation and Error Analysis
# -----------------------
eval_results = trainer.evaluate()
print("\nFinal evaluation results:", eval_results)

# Get predictions on the test set.
predictions = trainer.predict(test_dataset)
y_pred = np.argmax(predictions.predictions, axis=-1)
y_true = predictions.label_ids

# Confusion Matrix.
conf_matrix = confusion_matrix(y_true, y_pred)
print("\nConfusion Matrix:")
print(conf_matrix)

plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues",
            xticklabels=["Negative", "Neutral", "Positive"],
            yticklabels=["Negative", "Neutral", "Positive"])
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()

print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=["Negative", "Neutral", "Positive"]))

# Error Analysis: Print misclassified examples.
test_df_predictions = test_dataset.to_pandas()
test_df_predictions["predicted"] = y_pred
label_map = {0: "Negative", 1: "Neutral", 2: "Positive"}
test_df_predictions["true_label_str"] = test_df_predictions["labels"].map(label_map)
test_df_predictions["predicted_str"] = test_df_predictions["predicted"].map(label_map)

misclassified = test_df_predictions[test_df_predictions["labels"] != test_df_predictions["predicted"]]
print("\nSome misclassified examples:")
print(misclassified[["Sentence", "true_label_str", "predicted_str"]].head(10))
misclassified.to_csv('misclassified.csv',encoding='utf-8')

# -----------------------
# 7. Interactive Testing
# -----------------------
new_sentence = input("\nEnter a new sentence to test: ")
inputs = tokenizer(new_sentence, truncation=True, padding="max_length", max_length=256, return_tensors="pt")
# Ensure the inputs are on the same device as the model (e.g., cuda:0)
inputs = {key: value.to(trainer.model.device) for key, value in inputs.items()}
outputs = trainer.model(**inputs)
# Apply softmax to get probabilities.
probs = outputs.logits.softmax(dim=-1)
pred_class = int(probs.argmax(dim=-1).item())
print(f"\nPredicted sentiment: {label_map[pred_class]}")
print(f"Probabilities: {probs.detach().cpu().numpy()}")

# Second Run + Stopwords

In [None]:
!pip install evaluate
%pip install evaluate -qqq
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from datasets import Dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer
)
import evaluate
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report

# -----------------------
# 1. Data Preparation
# -----------------------
df = pd.read_excel("/content/Final_Annotation_Updated.xlsx")
df = df.dropna(subset=["Final"])  # Drop rows with missing sentiment labels
df["label"] = df["Final"].map({"Negative": 0, "Neutral": 1, "Positive": 2})
df = df[["Sentence", "label"]]

# Print class distribution for debugging purposes.
print("Full dataset label distribution:")
print(df["label"].value_counts())

train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
print("\nTraining set label distribution:")
print(train_df["label"].value_counts())
print("\nTest set label distribution:")
print(test_df["label"].value_counts())

train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)

# -----------------------
# 2. Tokenization
# -----------------------
model_name = "xlm-roberta-base"  # or your chosen model
tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize_function(examples):
    return tokenizer(examples["Sentence"], truncation=True, padding="max_length", max_length=256)

train_dataset = train_dataset.map(tokenize_function, batched=True)
test_dataset = test_dataset.map(tokenize_function, batched=True)

train_dataset = train_dataset.rename_column("label", "labels")
test_dataset = test_dataset.rename_column("label", "labels")

train_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
test_dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

# -----------------------
# 3. Model & Metric Setup
# -----------------------
def model_init():
    return AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)

accuracy_metric = evaluate.load("accuracy")
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return accuracy_metric.compute(predictions=predictions, references=labels)

# -----------------------
# 4. Training Arguments (Modified)
# -----------------------
# We remove early stopping and best-model loading to force full training, and add overwrite_output_dir.
training_args = TrainingArguments(
    output_dir="./best_model_xlmroberta",
    overwrite_output_dir=True,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="steps",
    logging_steps=50,
    num_train_epochs=11,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    learning_rate=2e-5,
    warmup_steps=500,
    weight_decay=0.1,
    lr_scheduler_type="linear",
    label_smoothing_factor=0.1,
    # Removed load_best_model_at_end and early stopping for full epoch training.
    report_to="none"
)

# -----------------------
# 5. Trainer Setup and Full Training
# -----------------------
trainer = Trainer(
    model_init=model_init,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
)


trainer.train()

# -----------------------
# 6. Evaluation and Error Analysis
# -----------------------
eval_results = trainer.evaluate()
print("\nFinal evaluation results:", eval_results)

# Get predictions on the test set.
predictions = trainer.predict(test_dataset)
y_pred = np.argmax(predictions.predictions, axis=-1)
y_true = predictions.label_ids

# Confusion Matrix.
conf_matrix = confusion_matrix(y_true, y_pred)
print("\nConfusion Matrix:")
print(conf_matrix)

plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues",
            xticklabels=["Negative", "Neutral", "Positive"],
            yticklabels=["Negative", "Neutral", "Positive"])
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()

print("\nClassification Report:")
print(classification_report(y_true, y_pred, target_names=["Negative", "Neutral", "Positive"]))

# Error Analysis: Print misclassified examples.
test_df_predictions = test_dataset.to_pandas()
test_df_predictions["predicted"] = y_pred
label_map = {0: "Negative", 1: "Neutral", 2: "Positive"}
test_df_predictions["true_label_str"] = test_df_predictions["labels"].map(label_map)
test_df_predictions["predicted_str"] = test_df_predictions["predicted"].map(label_map)

misclassified = test_df_predictions[test_df_predictions["labels"] != test_df_predictions["predicted"]]
print("\nSome misclassified examples:")
print(misclassified[["Sentence", "true_label_str", "predicted_str"]].head(10))
misclassified.to_csv('misclassified.csv',encoding='utf-8')

# -----------------------
# 7. Interactive Testing
# -----------------------
new_sentence = input("\nEnter a new sentence to test: ")
inputs = tokenizer(new_sentence, truncation=True, padding="max_length", max_length=256, return_tensors="pt")
# Ensure the inputs are on the same device as the model (e.g., cuda:0)
inputs = {key: value.to(trainer.model.device) for key, value in inputs.items()}
outputs = trainer.model(**inputs)
# Apply softmax to get probabilities.
probs = outputs.logits.softmax(dim=-1)
pred_class = int(probs.argmax(dim=-1).item())
print(f"\nPredicted sentiment: {label_map[pred_class]}")
print(f"Probabilities: {probs.detach().cpu().numpy()}")