In [None]:
import json
import pandas as pd

def process_absa_dataset(input_path: str, output_path: str):
    """
    Converts an ABSA dataset from raw JSONL format to a flat format with text, aspect, and sentiment columns.

    Args:
        input_path (str): Path to the input .jsonl file
        output_path (str): Path to the output .csv file
    """
    processed_data = []

    with open(input_path, 'r', encoding='utf-8') as f:
        for line in f:
            entry = json.loads(line.strip())
            text = entry.get("text", "")
            labels = entry.get("labels", [])

            for label in labels:
                start, end, aspect_sentiment = label
                if "_" in aspect_sentiment:
                    *aspect_parts, sentiment = aspect_sentiment.split("_")
                    aspect = "_".join(aspect_parts)
                else:
                    aspect, sentiment = aspect_sentiment, "neutral"


                processed_data.append({
                    "text": text,
                    "aspect": aspect,
                    "sentiment": sentiment
                })

    # Save to CSV
    df = pd.DataFrame(processed_data)
    df.to_csv(output_path, index=False)
    print(f"Processed dataset saved to: {output_path}")



In [None]:

input_file = "review_with_aspect.jsonl"
output_file = "aspect_based_sentiment.csv"
process_absa_dataset(input_file, output_file)

In [None]:
from transformers import AutoTokenizer, AutoModel

model_name = "answerdotai/ModernBERT-base"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)


In [None]:
import pandas as pd

# Load your preprocessed dataset
df = pd.read_csv("aspect_based_sentiment.csv")

# Optional: label encoding
label_map = {"negative": 0, "neutral": 1, "positive": 2}
df["label"] = df["sentiment"].map(label_map)



In [None]:
from transformers import AutoTokenizer


# Encode text + aspect as sentence pairs
def encode_pair(row):
    return tokenizer(
        row["text"],
        row["aspect"],
        padding="max_length",
        truncation=True,
        max_length=128,
        return_tensors="pt"
    )

# Example encoding (batching will come later)
sample = df.iloc[0]
encoded = encode_pair(sample)

print(encoded.keys())  # Shows input_ids, attention_mask, etc.


In [None]:
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn

class ABSADataset(Dataset):
    def __init__(self, df, tokenizer, max_len=128):
        self.df = df
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        inputs = self.tokenizer(
            row["text"],
            row["aspect"],
            padding="max_length",
            truncation=True,
            max_length=self.max_len,
            return_tensors="pt"
        )
        return {
            "input_ids": inputs["input_ids"].squeeze(0),
            "attention_mask": inputs["attention_mask"].squeeze(0),
            "label": torch.tensor(row["label"])
        }


In [None]:
import torch.nn as nn
import torch.optim as optim

# Model
class ModernBertClassifier(nn.Module):
    def __init__(self, model_name, num_labels=3, dropout=0.3, classifier_layers=1, hidden_dim=256):
        super().__init__()
        self.bert = AutoModel.from_pretrained(model_name)
        self.dropout = nn.Dropout(dropout)
        in_features = self.bert.config.hidden_size

        if(classifier_layers == 1):
            self.classifier = nn.Linear(in_features, num_labels)
        elif classifier_layers == 2:
            self.classifier = nn.Sequential(
                nn.Linear(in_features, hidden_dim),
                nn.ReLU(),
                nn.Dropout(dropout),
                nn.Linear(hidden_dim, num_labels)
            )
        else:
            raise ValueError("classifier_layers must be 1 or 2")

    def forward(self, input_ids, attention_mask):
        output = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = output.last_hidden_state[:, 0, :]
        return self.classifier(self.dropout(cls_output))


    

In [None]:
from sklearn.model_selection import train_test_split

train_df, temp_df = train_test_split(df, test_size=0.3, stratify=df['label'], random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, stratify=temp_df['label'], random_state=42)

train_dataset = ABSADataset(train_df, tokenizer)
val_dataset = ABSADataset(val_df, tokenizer)
test_dataset = ABSADataset(test_df, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=8)
val_loader = DataLoader(val_dataset, batch_size=128, num_workers=8)
test_loader = DataLoader(test_dataset, batch_size=128, num_workers=8)


In [None]:
print(f"Train size: {len(train_dataset)}")


In [None]:
from sklearn.metrics import accuracy_score, f1_score

def evaluate(model, dataloader, compute_loss=False):
    model.eval()
    all_preds = []
    all_labels = []
    total_loss = 0
    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["label"].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().tolist())
            all_labels.extend(labels.cpu().tolist())

            if compute_loss:
                loss = criterion(outputs, labels)
                total_loss += loss.item()

    acc = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average="macro")
    avg_loss = total_loss / len(dataloader) if compute_loss else None
    return acc, f1, avg_loss



In [None]:
#Baseline 1 - Majority class

from sklearn.dummy import DummyClassifier

# Prepare labels
y_train = train_df["label"]
y_test = test_df["label"]

# Create and train dummy classifier
dummy = DummyClassifier(strategy="most_frequent")
dummy.fit([[0]] * len(y_train), y_train)  # Fake features, labels only matter

# Predict and evaluate
dummy_preds = dummy.predict([[0]] * len(y_test))
acc_dummy = accuracy_score(y_test, dummy_preds)
f1_dummy = f1_score(y_test, dummy_preds, average="macro")

print(f"Majority Class Baseline - Accuracy: {acc_dummy:.4f} | F1 Score: {f1_dummy:.4f}")


In [None]:
#Baseline 2 - Logictic regression with TF-IDF
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score

# Combine text + aspect for TF-IDF baseline (mimicking ABSA input)
train_texts = (train_df["text"] + " [ASPECT] " + train_df["aspect"]).tolist()
test_texts = (test_df["text"] + " [ASPECT] " + test_df["aspect"]).tolist()

# Vectorize
vectorizer = TfidfVectorizer(max_features=5000)
X_train = vectorizer.fit_transform(train_texts)
X_test = vectorizer.transform(test_texts)

y_train = train_df["label"]
y_test = test_df["label"]

# Train logistic regression
clf = LogisticRegression(max_iter=200)
clf.fit(X_train, y_train)

# Predict and evaluate
baseline_preds = clf.predict(X_test)
acc = accuracy_score(y_test, baseline_preds)
f1 = f1_score(y_test, baseline_preds, average="macro")

print(f"Logistic Regression Baseline - Accuracy: {acc:.4f} | F1 Score: {f1:.4f}")


In [None]:
import wandb

#Login wandb
wandb.login()


In [None]:
# Define sweep config: Random search for LR, Dropout, and Architecture
sweep_config = {
    'method': 'random',  # grid search is tooo expensive.
    'metric': {
      'name': 'val_f1',
      'goal': 'maximize'   
    },
    'parameters': {
        'learning_rate': {
            'values': [1e-5, 2e-5, 3e-5]
        },
        'dropout': {
            'values': [0.1, 0.3, 0.5]
        },
        'classifier_layers': {
            'values': [1, 2]   # 1 for single linear, 2 for hidden+output
        },
        'batch_size': {
            'values': [32, 64, 128] #256 does not fit in memory
        }
    }
}


In [None]:
import wandb
import torch
from torch.utils.data import DataLoader
from sklearn.metrics import f1_score, accuracy_score
from tqdm import tqdm  # <--- import tqdm

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def train_sweep(config=None):
    with wandb.init(config=config, name="runs-with-5-epoch"):
        config = wandb.config

        # ----- Model -----
        model = ModernBertClassifier(
            model_name="answerdotai/ModernBERT-base",
            num_labels=3,
            dropout=config.dropout,
            classifier_layers=config.classifier_layers
        )

        model = model.to(device)
        
        if torch.cuda.device_count() > 1:
            print("Using", torch.cuda.device_count(), "GPUs!")
            model = torch.nn.DataParallel(model)

        # ----- Data Loaders -----
        train_loader = DataLoader(
            train_dataset, batch_size=config.batch_size, shuffle=True, num_workers=8
        )
        val_loader = DataLoader(
            val_dataset, batch_size=config.batch_size, shuffle=False, num_workers=8
        )

        # ----- Optimizer -----
        optimizer = torch.optim.AdamW(model.parameters(), lr=config.learning_rate)
        criterion = torch.nn.CrossEntropyLoss()

        num_epochs = 5  # Keep it short due to complexity. 
        #Previous experiments showed that 5 epochs is enough for convergence.

        best_macro_f1 = 0.0
        for epoch in range(num_epochs):
            # ----- TRAINING -----
            model.train()
            train_losses, train_labels, train_preds = [], [], []
            train_pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Train]", leave=False)
            for batch in train_pbar:
                optimizer.zero_grad()

                input_ids = batch['input_ids'].to(device)
                attention_mask = batch['attention_mask'].to(device)
                labels = batch['label'].to(device)

                outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                train_losses.append(loss.item())
                preds = torch.argmax(outputs, dim=1)
                train_labels.extend(labels.cpu().numpy())
                train_preds.extend(preds.cpu().numpy())

            train_acc = accuracy_score(train_labels, train_preds)
            train_f1 = f1_score(train_labels, train_preds, average='macro')

            # ----- VALIDATION -----
            model.eval()
            val_losses, val_labels, val_preds = [], [], []
            val_pbar = tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Val]", leave=False)
            with torch.no_grad():
                for batch in val_pbar:
                    input_ids = batch['input_ids'].to(device)
                    attention_mask = batch['attention_mask'].to(device)
                    labels = batch['label'].to(device)

                    outputs = model(input_ids=input_ids, attention_mask=attention_mask)
                    loss = criterion(outputs, labels)

                    val_losses.append(loss.item())
                    preds = torch.argmax(outputs, dim=1)
                    val_labels.extend(labels.cpu().numpy())
                    val_preds.extend(preds.cpu().numpy())

            val_acc = accuracy_score(val_labels, val_preds)
            val_f1 = f1_score(val_labels, val_preds, average='macro')
            
            # Print summary for your monitoring
            print(f"Epoch {epoch+1}/{num_epochs} - "
                  f"Train F1: {train_f1:.4f} | Val F1: {val_f1:.4f} | "
                  f"Train Loss: {sum(train_losses)/len(train_losses):.4f} | "
                  f"Val Loss: {sum(val_losses)/len(val_losses):.4f}")

            # Log to WANDB
            wandb.log({
                "epoch": epoch + 1,
                "train_loss": sum(train_losses) / len(train_losses),
                "train_acc": train_acc,
                "train_f1": train_f1,
                "val_loss": sum(val_losses) / len(val_losses),
                "val_acc": val_acc,
                "val_f1": val_f1
            })

            # Save best
            if val_f1 > best_macro_f1:
                best_macro_f1 = val_f1

        # Log best macro-F1 at the end
        wandb.log({"best_val_macro_f1": best_macro_f1})


In [None]:
sweep_id = wandb.sweep(sweep_config, project="aspect-sentiment-modernbert")
print(f"Sweep ID: {sweep_id}")

wandb.agent(sweep_id, function=train_sweep, count=5)  # Change count as needed


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import f1_score, accuracy_score
from tqdm import tqdm

best_config = {
    "learning_rate": 3e-5,        
    "dropout": 0.3,              
    "classifier_layers": 1,       
    "batch_size": 64,
    "num_epochs": 3             
}

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

best_model = ModernBertClassifier(
    model_name="answerdotai/ModernBERT-base",
    num_labels=3,
    dropout=best_config["dropout"],
    classifier_layers=best_config["classifier_layers"]
)

if torch.cuda.device_count() > 1:
    print("Using", torch.cuda.device_count(), "GPUs via DataParallel!")
    best_model = torch.nn.DataParallel(best_model)
best_model = best_model.to(device)

# ---- Prepare DataLoaders ----
train_loader = DataLoader(train_dataset, batch_size=best_config["batch_size"], shuffle=True, num_workers=8)
val_loader = DataLoader(val_dataset, batch_size=best_config["batch_size"], shuffle=False, num_workers=8)
test_loader = DataLoader(test_dataset, batch_size=best_config["batch_size"], shuffle=False, num_workers=8)

# ---- Training Loop ----


optimizer = optim.AdamW(best_model.parameters(), lr=best_config["learning_rate"])
criterion = nn.CrossEntropyLoss()

best_val_f1 = 0
best_model_state = None

for epoch in range(best_config["num_epochs"]):
    best_model.train()
    train_labels, train_preds = [], []
    for batch in tqdm(train_loader, desc=f"Epoch {epoch+1} [Train]"):
        optimizer.zero_grad()
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)
        outputs = best_model(input_ids=input_ids, attention_mask=attention_mask)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        preds = torch.argmax(outputs, dim=1)
        train_labels.extend(labels.cpu().numpy())
        train_preds.extend(preds.cpu().numpy())
    train_f1 = f1_score(train_labels, train_preds, average='macro')
    train_acc = accuracy_score(train_labels, train_preds)
    print(f"Epoch {epoch+1} Train F1: {train_f1:.4f} Train Acc: {train_acc:.4f}")

    # Validation step
    best_model.eval()
    val_labels, val_preds = [], []
    with torch.no_grad():
        for batch in tqdm(val_loader, desc=f"Epoch {epoch+1} [Val]"):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['label'].to(device)
            outputs = best_model(input_ids=input_ids, attention_mask=attention_mask)
            preds = torch.argmax(outputs, dim=1)
            val_labels.extend(labels.cpu().numpy())
            val_preds.extend(preds.cpu().numpy())
    val_f1 = f1_score(val_labels, val_preds, average='macro')
    val_acc = accuracy_score(val_labels, val_preds)
    print(f"Epoch {epoch+1} Val F1: {val_f1:.4f} Val Acc: {val_acc:.4f}")

    if val_f1 > best_val_f1:
        best_val_f1 = val_f1
        best_model_state = best_model.state_dict()  # Save best model

# ---- Load the best model weights ----
if best_model_state is not None:
    best_model.load_state_dict(best_model_state)


In [None]:
# ---- Evaluation on Test Set ----
best_model.eval()
test_labels, test_preds = [], []
test_loss_values = []
with torch.no_grad():
    for batch in tqdm(test_loader, desc="Testing"):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['label'].to(device)
        outputs = best_model(input_ids=input_ids, attention_mask=attention_mask)
        loss = criterion(outputs, labels)
        preds = torch.argmax(outputs, dim=1)
        test_labels.extend(labels.cpu().numpy())
        test_preds.extend(preds.cpu().numpy())
        test_loss_values.append(loss.item())

test_f1 = f1_score(test_labels, test_preds, average='macro')
test_acc = accuracy_score(test_labels, test_preds)
test_loss = sum(test_loss_values) / len(test_loss_values)

wandb.init(
    project="aspect-sentiment-modernbert",
    name="final_test_eval" 
)

wandb.log({
    "test_accuracy": test_acc,
    "test_macro_f1": test_f1,
    "test_loss": test_loss
})

print(f"Test Accuracy: {test_acc:.4f} | Test F1 Score: {test_f1:.4f} | Test Loss: {test_loss:.4f}")


In [None]:
#lime

import numpy as np
import torch

class_names = ["negative", "neutral", "positive"]

if isinstance(best_model, torch.nn.DataParallel):
    best_model = best_model.module
best_model.cpu()  # Now REALLY on CPU


def lime_predict(texts):
    """
    texts: list of "aspect [SEP] text" strings
    """
    aspects = []
    sentences = []
    for s in texts:
        # Split on [SEP] for aspect and text
        if "[SEP]" in s:
            parts = s.split("[SEP]")
            aspect = parts[0].strip()
            text = parts[1].strip()
        else:
            # fallback if [SEP] missing
            aspect = ""
            text = s.strip()
        aspects.append(aspect)
        sentences.append(text)

    # Tokenize as pairs
    encoded = tokenizer(
        sentences,
        aspects,
        padding=True,
        truncation=True,
        max_length=128,
        return_tensors="pt"
    )
    input_ids = encoded['input_ids'].to('cpu')
    attention_mask = encoded['attention_mask'].to('cpu')
    best_model.eval()
    with torch.no_grad():
        logits = best_model(input_ids=input_ids, attention_mask=attention_mask)
        probs = torch.softmax(logits, dim=1)
        return probs.cpu().numpy()


from lime.lime_text import LimeTextExplainer
import numpy as np

explainer = LimeTextExplainer(class_names=class_names)

num_examples = 5
import random
random.seed(333)
indices = random.sample(range(len(test_df)), num_examples)

random.seed(15)
indices2 = random.sample(range(len(test_df)), num_examples)

#pls note that for some of the examples in the report indices2 are used.
for idx in indices2:  
    row = test_df.iloc[idx]
    input_text = f"{row['aspect']} [SEP] {row['text']}"
    true_label = row['label']
    print(f"Sample {idx} - True: {class_names[true_label]}")
    exp = explainer.explain_instance(
        input_text,
        lime_predict,
        num_features=10,
        labels=[0, 1, 2],
        num_samples=500
    )
    pred_class = np.argmax(lime_predict([input_text])[0])
    print("Predicted class:", class_names[pred_class])
    #exp.show_in_notebook(text=input_text)
    exp.save_to_file(f'lime_explanation_{idx}.html')



Error in callback <bound method _WandbInit._pre_run_cell_hook of <wandb.sdk.wandb_init._WandbInit object at 0x7fadcc9901d0>> (for pre_run_cell), with arguments args (<ExecutionInfo object at 7faf26f1b4d0, raw_cell="#lime

import numpy as np
import torch

class_name.." store_history=True silent=False shell_futures=True cell_id=vscode-notebook-cell://ssh-remote%2B34.77.197.199/home/ubuntu/ERDEM/nlp/new/new.ipynb#X24sdnNjb2RlLXJlbW90ZQ%3D%3D>,),kwargs {}:


BrokenPipeError: [Errno 32] Broken pipe

Sample 18180 - True: positive
Predicted class: negative
Sample 11495 - True: positive
Predicted class: positive
Sample 11571 - True: positive
Predicted class: positive
Sample 7069 - True: negative
Predicted class: negative
Sample 13251 - True: neutral
Predicted class: neutral
Error in callback <bound method _WandbInit._post_run_cell_hook of <wandb.sdk.wandb_init._WandbInit object at 0x7fadcc9901d0>> (for post_run_cell), with arguments args (<ExecutionResult object at 7faf26f1a490, execution_count=46 error_before_exec=None error_in_exec=None info=<ExecutionInfo object at 7faf26f1b4d0, raw_cell="#lime

import numpy as np
import torch

class_name.." store_history=True silent=False shell_futures=True cell_id=vscode-notebook-cell://ssh-remote%2B34.77.197.199/home/ubuntu/ERDEM/nlp/new/new.ipynb#X24sdnNjb2RlLXJlbW90ZQ%3D%3D> result=None>,),kwargs {}:


BrokenPipeError: [Errno 32] Broken pipe