In [1]:
import json
import pandas as pd

def process_absa_dataset(input_path: str, output_path: str):
    """
    Converts an ABSA dataset from raw JSONL format to a flat format with text, aspect, and sentiment columns.

    Args:
        input_path (str): Path to the input .jsonl file
        output_path (str): Path to the output .csv file
    """
    processed_data = []

    with open(input_path, 'r', encoding='utf-8') as f:
        for line in f:
            entry = json.loads(line.strip())
            text = entry.get("text", "")
            labels = entry.get("labels", [])

            for label in labels:
                start, end, aspect_sentiment = label
                if "_" in aspect_sentiment:
                    *aspect_parts, sentiment = aspect_sentiment.split("_")
                    aspect = "_".join(aspect_parts)
                else:
                    aspect, sentiment = aspect_sentiment, "neutral"


                processed_data.append({
                    "text": text,
                    "aspect": aspect,
                    "sentiment": sentiment
                })

    # Save to CSV
    df = pd.DataFrame(processed_data)
    df.to_csv(output_path, index=False)
    print(f"Processed dataset saved to: {output_path}")



In [2]:

input_file = "review_with_aspect.jsonl"
output_file = "aspect_based_sentiment.csv"
process_absa_dataset(input_file, output_file)

Processed dataset saved to: aspect_based_sentiment.csv


In [3]:
from transformers import AutoTokenizer, AutoModel

model_name = "answerdotai/ModernBERT-base"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)


  from .autonotebook import tqdm as notebook_tqdm


In [4]:
import pandas as pd

# Load your preprocessed dataset
df = pd.read_csv("aspect_based_sentiment.csv")

# Optional: label encoding
label_map = {"negative": 0, "neutral": 1, "positive": 2}
df["label"] = df["sentiment"].map(label_map)



In [5]:
from transformers import AutoTokenizer


# Encode text + aspect as sentence pairs
def encode_pair(row):
    return tokenizer(
        row["text"],
        row["aspect"],
        padding="max_length",
        truncation=True,
        max_length=128,
        return_tensors="pt"
    )

# Example encoding (batching will come later)
sample = df.iloc[0]
encoded = encode_pair(sample)

print(encoded.keys())  # Shows input_ids, attention_mask, etc.


dict_keys(['input_ids', 'attention_mask'])


In [6]:
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn

class ABSADataset(Dataset):
    def __init__(self, df, tokenizer, max_len=128):
        self.df = df
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        inputs = self.tokenizer(
            row["text"],
            row["aspect"],
            padding="max_length",
            truncation=True,
            max_length=self.max_len,
            return_tensors="pt"
        )
        return {
            "input_ids": inputs["input_ids"].squeeze(0),
            "attention_mask": inputs["attention_mask"].squeeze(0),
            "label": torch.tensor(row["label"])
        }


In [7]:
import torch.nn as nn
import torch.optim as optim

# Model
class ModernBertClassifier(nn.Module):
    def __init__(self, model_name, num_labels=3):
        super().__init__()
        self.bert = AutoModel.from_pretrained(model_name)
        self.dropout = nn.Dropout(0.3)
        self.classifier = nn.Linear(self.bert.config.hidden_size, num_labels)

    def forward(self, input_ids, attention_mask):
        output = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        cls_output = output.last_hidden_state[:, 0, :]
        return self.classifier(self.dropout(cls_output))

In [18]:
from sklearn.model_selection import train_test_split

train_df, temp_df = train_test_split(df, test_size=0.3, stratify=df['label'], random_state=42)
val_df, test_df = train_test_split(temp_df, test_size=0.5, stratify=temp_df['label'], random_state=42)

train_dataset = ABSADataset(train_df, tokenizer)
val_dataset = ABSADataset(val_df, tokenizer)
test_dataset = ABSADataset(test_df, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=8)
val_loader = DataLoader(val_dataset, batch_size=128, num_workers=8)
test_loader = DataLoader(test_dataset, batch_size=128, num_workers=8)


In [9]:
print(f"Train size: {len(train_dataset)}")


Train size: 103660


In [19]:
from sklearn.metrics import accuracy_score, f1_score

def evaluate(model, dataloader, compute_loss=False):
    model.eval()
    all_preds = []
    all_labels = []
    total_loss = 0
    with torch.no_grad():
        for batch in dataloader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["label"].to(device)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            preds = torch.argmax(outputs, dim=1)
            all_preds.extend(preds.cpu().tolist())
            all_labels.extend(labels.cpu().tolist())

            if compute_loss:
                loss = criterion(outputs, labels)
                total_loss += loss.item()

    acc = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average="macro")
    avg_loss = total_loss / len(dataloader) if compute_loss else None
    return acc, f1, avg_loss



In [14]:
#Baseline 1 - Majority class

from sklearn.dummy import DummyClassifier

# Prepare labels
y_train = train_df["label"]
y_test = test_df["label"]

# Create and train dummy classifier
dummy = DummyClassifier(strategy="most_frequent")
dummy.fit([[0]] * len(y_train), y_train)  # Fake features, labels only matter

# Predict and evaluate
dummy_preds = dummy.predict([[0]] * len(y_test))
acc_dummy = accuracy_score(y_test, dummy_preds)
f1_dummy = f1_score(y_test, dummy_preds, average="macro")

print(f"Majority Class Baseline - Accuracy: {acc_dummy:.4f} | F1 Score: {f1_dummy:.4f}")


Majority Class Baseline - Accuracy: 0.4667 | F1 Score: 0.2121


In [15]:
#Baseline 2 - Logictic regression with TF-IDF
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score

# Combine text + aspect for TF-IDF baseline (mimicking ABSA input)
train_texts = (train_df["text"] + " [ASPECT] " + train_df["aspect"]).tolist()
test_texts = (test_df["text"] + " [ASPECT] " + test_df["aspect"]).tolist()

# Vectorize
vectorizer = TfidfVectorizer(max_features=5000)
X_train = vectorizer.fit_transform(train_texts)
X_test = vectorizer.transform(test_texts)

y_train = train_df["label"]
y_test = test_df["label"]

# Train logistic regression
clf = LogisticRegression(max_iter=200)
clf.fit(X_train, y_train)

# Predict and evaluate
baseline_preds = clf.predict(X_test)
acc = accuracy_score(y_test, baseline_preds)
f1 = f1_score(y_test, baseline_preds, average="macro")

print(f"Logistic Regression Baseline - Accuracy: {acc:.4f} | F1 Score: {f1:.4f}")


Logistic Regression Baseline - Accuracy: 0.7165 | F1 Score: 0.7387


In [22]:
import torch
model = ModernBertClassifier(model_name, num_labels=3)
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=2e-5)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = nn.DataParallel(model)  # Wrap model
model = model.to(device)

history = {
    "train_loss": [],
    "train_acc": [],
    "train_f1": [],
    "val_loss": [],
    "val_acc": [],
    "val_f1": []
}

best_f1 = 0.0

from tqdm import tqdm

for epoch in range(10):
    model.train()
    total_loss = 0
    all_preds = []
    all_labels = []

    print(f"\nEpoch {epoch+1}")
    for batch in tqdm(train_loader, desc="Training"):
        optimizer.zero_grad()
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["label"].to(device)

        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        preds = torch.argmax(outputs, dim=1)
        all_preds.extend(preds.cpu().tolist())
        all_labels.extend(labels.cpu().tolist())

    train_acc = accuracy_score(all_labels, all_preds)
    train_f1 = f1_score(all_labels, all_preds, average="macro")
    avg_train_loss = total_loss / len(train_loader)

    val_acc, val_f1, val_loss = evaluate(model, val_loader, compute_loss=True)

    history["train_loss"].append(avg_train_loss)
    history["train_acc"].append(train_acc)
    history["train_f1"].append(train_f1)
    history["val_loss"].append(val_loss)
    history["val_acc"].append(val_acc)
    history["val_f1"].append(val_f1)

    # Save every n epoch
    n = 1
    if (epoch + 1) % n == 0:
        torch.save({
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'history': history
        }, f'checkpoint_epoch_{epoch+1}.pt')

    # Save best model based on validation F1
    if val_f1 > best_f1:
        best_f1 = val_f1
        torch.save(model.state_dict(), "best_model.pt")
        print(f"New best model saved with Val F1: {val_f1:.4f}")

    print(f"Epoch {epoch+1} | Train Loss: {avg_train_loss:.4f} | Train Acc: {train_acc:.4f} | Train F1: {train_f1:.4f}")
    print(f"             | Val   Loss: {val_loss:.4f}   | Val   Acc: {val_acc:.4f}   | Val   F1: {val_f1:.4f}")





Epoch 1


Training:   0%|          | 0/810 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. 

New best model saved with Val F1: 0.8164
Epoch 1 | Train Loss: 0.5233 | Train Acc: 0.7175 | Train F1: 0.7681
             | Val   Loss: 0.4442   | Val   Acc: 0.7733   | Val   F1: 0.8164

Epoch 2


Training:   0%|          | 0/810 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. 

New best model saved with Val F1: 0.8216
Epoch 2 | Train Loss: 0.4379 | Train Acc: 0.7751 | Train F1: 0.8185
             | Val   Loss: 0.4263   | Val   Acc: 0.7814   | Val   F1: 0.8216

Epoch 3


Training:   0%|          | 0/810 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. 

New best model saved with Val F1: 0.8244
Epoch 3 | Train Loss: 0.4087 | Train Acc: 0.7953 | Train F1: 0.8350
             | Val   Loss: 0.4257   | Val   Acc: 0.7819   | Val   F1: 0.8244

Epoch 4


Training:   0%|          | 0/810 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. 

New best model saved with Val F1: 0.8245
Epoch 4 | Train Loss: 0.3769 | Train Acc: 0.8151 | Train F1: 0.8512
             | Val   Loss: 0.4385   | Val   Acc: 0.7819   | Val   F1: 0.8245

Epoch 5


Training:   0%|          | 0/810 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. 

Epoch 5 | Train Loss: 0.3371 | Train Acc: 0.8389 | Train F1: 0.8705
             | Val   Loss: 0.4643   | Val   Acc: 0.7719   | Val   F1: 0.8183

Epoch 6


Training:   0%|          | 0/810 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. 

Epoch 6 | Train Loss: 0.2985 | Train Acc: 0.8587 | Train F1: 0.8865
             | Val   Loss: 0.5111   | Val   Acc: 0.7693   | Val   F1: 0.8151

Epoch 7


Training:   0%|          | 0/810 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. 

Epoch 7 | Train Loss: 0.2652 | Train Acc: 0.8745 | Train F1: 0.8992
             | Val   Loss: 0.5184   | Val   Acc: 0.7643   | Val   F1: 0.8118

Epoch 8


Training:   0%|          | 0/810 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. 

Epoch 8 | Train Loss: 0.2334 | Train Acc: 0.8873 | Train F1: 0.9095
             | Val   Loss: 0.5844   | Val   Acc: 0.7586   | Val   F1: 0.8052

Epoch 9


Training:   0%|          | 0/810 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. 

Epoch 9 | Train Loss: 0.2133 | Train Acc: 0.8932 | Train F1: 0.9141
             | Val   Loss: 0.6682   | Val   Acc: 0.7618   | Val   F1: 0.8082

Epoch 10


Training:   0%|          | 0/810 [00:00<?, ?it/s]huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. 

Epoch 10 | Train Loss: 0.1965 | Train Acc: 0.8975 | Train F1: 0.9175
             | Val   Loss: 0.7157   | Val   Acc: 0.7558   | Val   F1: 0.8042


In [23]:
import matplotlib.pyplot as plt

# Ensure history contains all six tracked variables
assert all(k in history for k in ["train_loss", "val_loss", "train_acc", "val_acc", "train_f1", "val_f1"]), "Missing keys in history"

epochs = range(1, len(history["train_loss"]) + 1)

# Loss Curve
plt.figure()
plt.plot(epochs, history["train_loss"], label="Train Loss")
plt.plot(epochs, history["val_loss"], label="Validation Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training and Validation Loss")
plt.legend()
plt.grid(True)
plt.savefig("loss_curve.png", dpi=300)
plt.close()

# Accuracy Curve
plt.figure()
plt.plot(epochs, history["train_acc"], label="Train Accuracy")
plt.plot(epochs, history["val_acc"], label="Validation Accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Training and Validation Accuracy")
plt.legend()
plt.grid(True)
plt.savefig("accuracy_curve.png", dpi=300)
plt.close()

# F1 Score Curve
plt.figure()
plt.plot(epochs, history["train_f1"], label="Train F1 Score")
plt.plot(epochs, history["val_f1"], label="Validation F1 Score")
plt.xlabel("Epoch")
plt.ylabel("F1 Score (Macro)")
plt.title("Training and Validation F1 Score")
plt.legend()
plt.grid(True)
plt.savefig("f1_curve.png", dpi=300)
plt.close()


In [24]:
test_acc, test_f1, test_loss = evaluate(model, test_loader, compute_loss=True)
print(f"Test Accuracy: {test_acc:.4f} | Test F1 Score: {test_f1:.4f} | Test Loss: {test_loss:.4f}")


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

Test Accuracy: 0.7614 | Test F1 Score: 0.8086 | Test Loss: 0.7135


In [28]:
import wandb

# Initialize project
wandb.init(project="aspect-sentiment-modernbert", name="modernbert-log")

# Optionally log config
wandb.config.update({
    "epochs": len(history["train_loss"]),
    "batch_size": 128,
    "learning_rate": 2e-5,
    "model": "ModernBERT"
})

# Log each epoch from training history
for epoch in range(len(history["train_loss"])):
    wandb.log({
        "epoch": epoch + 1,
        "train_loss": history["train_loss"][epoch],
        "val_loss": history["val_loss"][epoch],
        "train_acc": history["train_acc"][epoch],
        "val_acc": history["val_acc"][epoch],
        "train_f1": history["train_f1"][epoch],
        "val_f1": history["val_f1"][epoch]
    })


[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/ubuntu/.netrc
[34m[1mwandb[0m: Currently logged in as: [33merdemerturk[0m ([33merdemerturk-middle-east-technical-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
