In [None]:
!pip install evaluate

Collecting evaluate
  Downloading evaluate-0.4.6-py3-none-any.whl.metadata (9.5 kB)
Downloading evaluate-0.4.6-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.6


In [None]:
#import data
import pandas as pd
from datasets import Dataset

#task config
subtask = "subtask_2"#don't change
task = "task2"#don't change
langs = ["eng","zho"] #chang the language you want to test
domains = ["restaurant","laptop"] #change what domain you want to test
train_dfs = []

for lang in langs:
    for domain in domains:
        train_url = f"https://raw.githubusercontent.com/DimABSA/DimABSA2026/refs/heads/main/task-dataset/track_a/{subtask}/{lang}/{lang}_{domain}_train_alltasks.jsonl"

        # Read directly into DataFrames
        train_df = pd.read_json(train_url, lines=True)

        # Store for later concatenation
        train_dfs.append(train_df)

        print(f"Loaded {lang}_{domain} data: {len(train_df)} samples")

# Combine all into single DataFrames
df = pd.concat(train_dfs, ignore_index=True)
print(f"total samples: {len(df)}")

# For Task 2, we need to extract triplets (aspect-opinion-VA)
rows = []
for _, row in df.iterrows():
    text = row["Text"]
    id_ = row["ID"]

    # Process triplets - for Task 2 data
    if "Quadruplet" in row:
        for triplet in row["Quadruplet"]:
            aspect = triplet["Aspect"]
            opinion = triplet["Opinion"]
            va = triplet["VA"]
            valence, arousal = map(float, va.split("#"))

            rows.append({
                "ID": id_,
                "Text": text,
                "Aspect": aspect,
                "Opinion": opinion,
                "VA": va,
                "Valence": valence,
                "Arousal": arousal
            })

# Create dataset
raw_datasets = Dataset.from_pandas(pd.DataFrame(rows))
print(f"Processed {len(raw_datasets)} triplets")

Loaded eng_restaurant data: 2284 samples
Loaded eng_laptop data: 4076 samples
Loaded zho_restaurant data: 6050 samples
Loaded zho_laptop data: 3490 samples
total samples: 15900
Processed 24457 triplets


In [None]:
from transformers import AutoTokenizer, DataCollatorForTokenClassification
from datasets import Dataset
import pandas as pd
import re

# --- Load MULTILINGUAL tokenizer ---
checkpoint = "bert-base-multilingual-uncased"  # Changed to multilingual model
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

special_tokens_dict = {'additional_special_tokens': ['[NULL]']}
tokenizer.add_special_tokens(special_tokens_dict)

# --- Define label mapping ---
label_list = ["O", "B-ASP", "I-ASP", "B-OPI", "I-OPI"]
label2id = {l: i for i, l in enumerate(label_list)}
id2label = {i: l for l, i in label2id.items()}

print("Label mapping:", label2id, id2label)

# --- Function to detect Chinese text ---
def is_chinese(text):
    return bool(re.search('[\u4e00-\u9fff]', text))

# --- Simplified function to create BIO labels for both languages ---
def create_bio_labels(text, spans):
    # Check if we're dealing with Chinese text
    if is_chinese(text):
        # For Chinese, process character by character
        words = list(text)
        labels = ["O"] * len(words)

        for span in spans:
            aspect = span.get("Aspect", "")
            opinion = span.get("Opinion", "")

            # Find aspect in text
            if aspect and aspect != "NULL":
                start_idx = text.find(aspect)
                if start_idx >= 0:
                    labels[start_idx] = "B-ASP"
                    for j in range(1, len(aspect)):
                        if start_idx + j < len(labels):
                            labels[start_idx + j] = "I-ASP"

            # Find opinion in text
            if opinion and opinion != "NULL":
                start_idx = text.find(opinion)
                if start_idx >= 0:
                    labels[start_idx] = "B-OPI"
                    for j in range(1, len(opinion)):
                        if start_idx + j < len(labels):
                            labels[start_idx + j] = "I-OPI"
    else:
        # For English and other languages, split by space
        words = text.split()
        labels = ["O"] * len(words)

        for span in spans:
            aspect = span.get("Aspect", "")
            opinion = span.get("Opinion", "")

            # Process aspect
            if aspect and aspect != "NULL":
                aspect_tokens = aspect.lower().split()
                for i in range(len(words) - len(aspect_tokens) + 1):
                    if [w.lower() for w in words[i:i+len(aspect_tokens)]] == aspect_tokens:
                        labels[i] = "B-ASP"
                        for j in range(1, len(aspect_tokens)):
                            labels[i+j] = "I-ASP"
                        break

            # Process opinion
            if opinion and opinion != "NULL":
                opinion_tokens = opinion.lower().split()
                for i in range(len(words) - len(opinion_tokens) + 1):
                    if [w.lower() for w in words[i:i+len(opinion_tokens)]] == opinion_tokens:
                        labels[i] = "B-OPI"
                        for j in range(1, len(opinion_tokens)):
                            labels[i+j] = "I-OPI"
                        break

    # Prepend [NULL] token
    words = ["[NULL]"] + words
    labels = ["O"] + labels

    return words, [label2id[l] for l in labels]

# --- Convert dataset to word-level BIO format ---
rows = []
for _, row in df.iterrows():
    # Handle both Triplet and Quadruplet keys
    spans = row.get("Triplet", row.get("Quadruplet", []))

    words, labels = create_bio_labels(row["Text"], spans)
    rows.append({
        "tokens": words,
        "labels": labels,
        "is_chinese": is_chinese(row["Text"])  # Save language info
    })

bio_dataset = Dataset.from_pandas(pd.DataFrame(rows))

# --- Tokenize & align labels ---
def tokenize_and_align_labels(batch):
    tokenized = tokenizer(batch["tokens"], is_split_into_words=True, truncation=True, padding=False)
    aligned_labels = []

    for i, (labels, is_zh) in enumerate(zip(batch["labels"], batch["is_chinese"])):
        word_ids = tokenized.word_ids(batch_index=i)
        new_labels = []
        prev_word = None
        for word_id in word_ids:
            if word_id is None:
                new_labels.append(-100)  # ignore special tokens
            elif word_id != prev_word:
                new_labels.append(labels[word_id])
            else:
                # For Chinese, label all subwords; for others, only first subword
                label_value = labels[word_id] if is_zh else -100
                new_labels.append(label_value)
            prev_word = word_id
        aligned_labels.append(new_labels)

    tokenized["labels"] = aligned_labels
    tokenized["is_chinese"] = batch["is_chinese"]  # Keep language info
    return tokenized

tokenized_datasets = bio_dataset.map(tokenize_and_align_labels, batched=True)

# --- Data collator ---
data_collator = DataCollatorForTokenClassification(tokenizer)

print(tokenized_datasets.column_names)
print(tokenized_datasets[0])

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/625 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/872k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.72M [00:00<?, ?B/s]

Label mapping: {'O': 0, 'B-ASP': 1, 'I-ASP': 2, 'B-OPI': 3, 'I-OPI': 4} {0: 'O', 1: 'B-ASP', 2: 'I-ASP', 3: 'B-OPI', 4: 'I-OPI'}


Map:   0%|          | 0/15900 [00:00<?, ? examples/s]

['tokens', 'labels', 'is_chinese', 'input_ids', 'token_type_ids', 'attention_mask']
{'tokens': ['[NULL]', 'ca', 'n', "'", 't', 'wait', 'wait', 'for', 'my', 'next', 'visit', '.'], 'labels': [-100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -100], 'is_chinese': False, 'input_ids': [101, 105879, 10678, 156, 112, 162, 41550, 41550, 10139, 11153, 12878, 25332, 119, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}


In [None]:
for _, row in df.iterrows():
    # Handle both Triplet and Quadruplet keys
    spans = row.get("Triplet", row.get("Quadruplet", []))

    print(spans)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[{'Aspect': '鮮奶茶', 'Category': 'DRINKS#QUALITY', 'Opinion': '不會太甜', 'VA': '5.50#5.00'}, {'Aspect': '鮮奶茶', 'Category': 'DRINKS#QUALITY', 'Opinion': '很不錯', 'VA': '6.00#5.50'}]
[{'Aspect': '義式香料手扒雞', 'Category': 'FOOD#QUALITY', 'Opinion': '驚艷', 'VA': '6.50#6.17'}]
[{'Aspect': '台東土雞', 'Category': 'FOOD#QUALITY', 'Opinion': '很Q彈多汁', 'VA': '6.38#6.12'}]
[{'Aspect': '香料', 'Category': 'FOOD#QUALITY', 'Opinion': '入味', 'VA': '6.50#6.00'}, {'Aspect': '皮', 'Category': 'FOOD#QUALITY', 'Opinion': '很脆', 'VA': '6.17#5.83'}]
[{'Aspect': '手扒雞', 'Category': 'FOOD#QUALITY', 'Opinion': '好吃', 'VA': '6.75#6.38'}]
[{'Aspect': '服務態度', 'Category': 'SERVICE#GENERAL', 'Opinion': '很好', 'VA': '6.00#5.75'}]
[{'Aspect': '食材', 'Category': 'FOOD#QUALITY', 'Opinion': '新鮮', 'VA': '5.88#5.25'}]
[{'Aspect': '食材份量', 'Category': 'FOOD#STYLE_OPTIONS', 'Opinion': '足', 'VA': '6.00#5.33'}]
[{'Aspect': '紅柑獅魚', 'Category': 'FOOD#QUALITY', 'Opinion': '好吃', 'VA': '6.00

In [None]:
tokenized_datasets[10]

{'tokens': ['[NULL]', 'it', 'was', 'horrible', '.'],
 'labels': [-100, 0, 0, 0, 3, -100, -100, 0, -100],
 'is_chinese': False,
 'input_ids': [101, 105879, 10197, 10140, 36129, 45795, 10301, 119, 102],
 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0],
 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1]}

In [None]:
# Remove the tokens column as we don't need it anymore
tokenized_datasets = tokenized_datasets.remove_columns(["tokens"])

# Also remove is_chinese column since the model doesn't need it
tokenized_datasets = tokenized_datasets.remove_columns(["is_chinese"])

# Convert dataset to PyTorch tensors
tokenized_datasets.set_format("torch")

# Check final columns
print(tokenized_datasets.column_names)

['labels', 'input_ids', 'token_type_ids', 'attention_mask']


In [None]:
# Split into train/validation/test
# First: train + temp (where temp will be split further into val + test)
dataset_splits = tokenized_datasets.train_test_split(test_size=0.2, seed=42)

train_dataset = dataset_splits["train"]
temp_dataset = dataset_splits["test"]

# Now split temp into validation and test (50/50 → 10% val, 10% test overall)
temp_splits = temp_dataset.train_test_split(test_size=0.5, seed=42)

eval_dataset = temp_splits["train"]   # validation set
test_dataset = temp_splits["test"]    # final test set

# Make DataLoaders
from torch.utils.data import DataLoader

train_dataloader = DataLoader(
    train_dataset, shuffle=True, batch_size=8, collate_fn=data_collator
)
eval_dataloader = DataLoader(
    eval_dataset, batch_size=8, collate_fn=data_collator
)
test_dataloader = DataLoader(
    test_dataset, batch_size=8, collate_fn=data_collator
)


In [None]:
for batch in train_dataloader: #inspecting the batch
    break
{k: v.shape for k, v in batch.items()}

{'input_ids': torch.Size([8, 24]),
 'token_type_ids': torch.Size([8, 24]),
 'attention_mask': torch.Size([8, 24]),
 'labels': torch.Size([8, 24])}

In [None]:
from transformers import AutoConfig, AutoModelForTokenClassification

# Use bert-base-multilingual-uncased to match your tokenizer
checkpoint = "bert-base-multilingual-uncased"

config = AutoConfig.from_pretrained(
    checkpoint,
    num_labels=len(label_list),   # e.g., 5: O, B-ASP, I-ASP, B-OPI, I-OPI
    id2label=id2label,
    label2id=label2id
)

model = AutoModelForTokenClassification.from_pretrained(checkpoint, config=config)

# Resize embeddings if you added special tokens like [NULL]
model.resize_token_embeddings(len(tokenizer))

model.safetensors:   0%|          | 0.00/672M [00:00<?, ?B/s]

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-multilingual-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
The new embeddings will be initialized from a multivariate normal distribution that has old embeddings' mean and covariance. As described in this article: https://nlp.stanford.edu/~johnhew/vocab-expansion.html. To disable this, use `mean_resizing=False`


Embedding(105880, 768, padding_idx=0)

In [None]:
# Pass the batch through the model
outputs = model(**batch)

# Cross-entropy loss for token classification
print("Loss:", outputs.loss)

# Logits shape: (batch_size, seq_len, num_labels)
print("Logits shape:", outputs.logits.shape)


Loss: tensor(1.5840, grad_fn=<NllLossBackward0>)
Logits shape: torch.Size([8, 24, 5])


In [None]:
from torch.optim import AdamW

# Define optimizer that updates the model's parameters
optimizer = AdamW(model.parameters(), lr=5e-5)
# ⚠️ The only thing you might tune later:
# Learning rate (lr) → try 5e-5, 3e-5, or 1e-5 to see which gives better results.
# Weight decay → if overfitting, you can add e.g. weight_decay=0.01.

In [None]:
from transformers import get_scheduler

# Train for more epochs since dataset is small
num_epochs = 5   # you can try 5, 8, or even 10

# Total number of training steps
num_training_steps = num_epochs * len(train_dataloader)

# Warmup = 10% of training steps
num_warmup_steps = int(0.1 * num_training_steps)

# Define learning rate scheduler
lr_scheduler = get_scheduler(
    "linear",                # linear decay schedule
    optimizer=optimizer,
    num_warmup_steps=num_warmup_steps,   # gradual warmup
    num_training_steps=num_training_steps,
)

print(f"Total steps: {num_training_steps}, Warmup steps: {num_warmup_steps}")


Total steps: 7950, Warmup steps: 795


In [None]:
import torch

# Step 1: Choose device
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

# Step 2: Move the model to the chosen device
model.to(device)
device


device(type='cuda')

In [None]:
from tqdm.auto import tqdm
import torch
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Function to convert BIO tags to aspect and opinion spans
def convert_bio_to_spans(tokens, bio_labels):
    """Convert BIO tags to aspect and opinion spans"""
    aspects = []
    opinions = []

    current_asp = None
    current_opi = None

    for i, (token, label) in enumerate(zip(tokens, bio_labels)):
        if label == "B-ASP":
            if current_asp is not None:
                aspects.append((current_asp[0], i-1, ' '.join(tokens[current_asp[0]:i])))
            current_asp = (i, None)
        elif label == "I-ASP":
            continue
        elif current_asp is not None and current_asp[1] is None:
            aspects.append((current_asp[0], i-1, ' '.join(tokens[current_asp[0]:i])))
            current_asp = None

        if label == "B-OPI":
            if current_opi is not None:
                opinions.append((current_opi[0], i-1, ' '.join(tokens[current_opi[0]:i])))
            current_opi = (i, None)
        elif label == "I-OPI":
            continue
        elif current_opi is not None and current_opi[1] is None:
            opinions.append((current_opi[0], i-1, ' '.join(tokens[current_opi[0]:i])))
            current_opi = None

    # Add final spans if they exist
    if current_asp is not None:
        aspects.append((current_asp[0], len(tokens)-1, ' '.join(tokens[current_asp[0]:])))
    if current_opi is not None:
        opinions.append((current_opi[0], len(tokens)-1, ' '.join(tokens[current_opi[0]:])))

    return aspects, opinions

# Form aspect-opinion pairs based on proximity
def form_pairs(aspects, opinions):
    """Form aspect-opinion pairs based on proximity"""
    pairs = []

    for asp in aspects:
        closest_opinion = None
        min_distance = float('inf')

        for opi in opinions:
            # Calculate distance between spans
            dist = min(abs(opi[0] - asp[1]), abs(asp[0] - opi[1]))
            if dist < min_distance:
                min_distance = dist
                closest_opinion = opi

        if closest_opinion is not None:
            pairs.append((asp, closest_opinion))

    return pairs

# Training and evaluation loop
num_epochs = 8
best_f1 = 0.0

for epoch in range(num_epochs):
    # ------------------- TRAINING -------------------
    model.train()
    total_train_loss = 0
    train_progress_bar = tqdm(train_dataloader, desc=f"Epoch {epoch+1} Training")

    for batch in train_progress_bar:
        batch = {k: v.to(device) for k, v in batch.items() if k != 'ID' and k != 'text'}
        outputs = model(**batch)
        loss = outputs.loss
        loss.backward()

        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()

        total_train_loss += loss.item()
        train_progress_bar.set_postfix({"loss": f"{loss.item():.4f}"})

    avg_train_loss = total_train_loss / len(train_dataloader)
    print(f"\nEpoch {epoch+1}: Avg Train Loss = {avg_train_loss:.4f}")

    # ------------------- VALIDATION -------------------
    model.eval()

    # Metrics counters (matching original starter pack)
    asp_target_num = 0  # Total number of aspect spans in ground truth
    opi_target_num = 0  # Total number of opinion spans in ground truth
    asp_opi_target_num = 0  # Total number of aspect-opinion pairs in ground truth

    asp_predict_num = 0  # Total number of aspect spans predicted
    opi_predict_num = 0  # Total number of opinion spans predicted
    asp_opi_predict_num = 0  # Total number of aspect-opinion pairs predicted

    asp_match_num = 0  # Matched aspect spans (predictions matching ground truth)
    opi_match_num = 0  # Matched opinion spans (predictions matching ground truth)
    asp_opi_match_num = 0  # Matched aspect-opinion pairs

    val_progress_bar = tqdm(eval_dataloader, desc=f"Epoch {epoch+1} Validation", leave=False)

    with torch.no_grad():
        for batch in val_progress_bar:
            # Move batch to device
            batch_on_device = {k: v.to(device) for k, v in batch.items() if k != 'ID' and k != 'text'}

            # Get text content for span extraction
            texts = batch.get('text', [''] * len(batch['input_ids']))

            # Get model predictions
            outputs = model(**batch_on_device)
            logits = outputs.logits
            predictions = torch.argmax(logits, dim=-1)

            # Process each item in batch
            for i, (pred_seq, label_seq, attn_mask, text) in enumerate(zip(
                    predictions, batch["labels"], batch["attention_mask"], texts)):

                # Convert predictions and labels to BIO tags, ignoring padding and special tokens
                pred_bio = []
                true_bio = []
                tokens = []

                for p, l, m in zip(pred_seq, label_seq, attn_mask):
                    if l.item() != -100 and m.item() == 1:  # Not padding/special token
                        pred_bio.append(id2label[p.item()])
                        true_bio.append(id2label[l.item()])
                        # Actual token content - use tokenizer to get token if needed
                        tokens.append("")  # Placeholder, we don't need actual tokens here

                # Convert BIO tags to spans
                pred_aspects, pred_opinions = convert_bio_to_spans(tokens, pred_bio)
                true_aspects, true_opinions = convert_bio_to_spans(tokens, true_bio)

                # Form aspect-opinion pairs
                pred_pairs = form_pairs(pred_aspects, pred_opinions)
                true_pairs = form_pairs(true_aspects, true_opinions)

                # Update counts for aspects
                asp_target_num += len(true_aspects)
                asp_predict_num += len(pred_aspects)
                for pred_asp in pred_aspects:
                    for true_asp in true_aspects:
                        if pred_asp[0] == true_asp[0] and pred_asp[1] == true_asp[1]:
                            asp_match_num += 1
                            break

                # Update counts for opinions
                opi_target_num += len(true_opinions)
                opi_predict_num += len(pred_opinions)
                for pred_opi in pred_opinions:
                    for true_opi in true_opinions:
                        if pred_opi[0] == true_opi[0] and pred_opi[1] == true_opi[1]:
                            opi_match_num += 1
                            break

                # Update counts for aspect-opinion pairs
                asp_opi_target_num += len(true_pairs)
                asp_opi_predict_num += len(pred_pairs)
                for pred_pair in pred_pairs:
                    pred_asp, pred_opi = pred_pair
                    for true_pair in true_pairs:
                        true_asp, true_opi = true_pair
                        if (pred_asp[0] == true_asp[0] and pred_asp[1] == true_asp[1] and
                            pred_opi[0] == true_opi[0] and pred_opi[1] == true_opi[1]):
                            asp_opi_match_num += 1
                            break

    # Calculate metrics
    asp_precision = float(asp_match_num) / float(asp_predict_num + 1e-6)
    asp_recall = float(asp_match_num) / float(asp_target_num + 1e-6)
    asp_f1 = 2 * asp_precision * asp_recall / (asp_precision + asp_recall + 1e-6)

    opi_precision = float(opi_match_num) / float(opi_predict_num + 1e-6)
    opi_recall = float(opi_match_num) / float(opi_target_num + 1e-6)
    opi_f1 = 2 * opi_precision * opi_recall / (opi_precision + opi_recall + 1e-6)

    pair_precision = float(asp_opi_match_num) / float(asp_opi_predict_num + 1e-6)
    pair_recall = float(asp_opi_match_num) / float(asp_opi_target_num + 1e-6)
    pair_f1 = 2 * pair_precision * pair_recall / (pair_precision + pair_recall + 1e-6)

    print("\nEvaluation Results:")
    print(f"Aspect - Precision: {asp_precision:.4f}, Recall: {asp_recall:.4f}, F1: {asp_f1:.4f}")
    print(f"Opinion - Precision: {opi_precision:.4f}, Recall: {opi_recall:.4f}, F1: {opi_f1:.4f}")
    print(f"Aspect-Opinion Pair - Precision: {pair_precision:.4f}, Recall: {pair_recall:.4f}, F1: {pair_f1:.4f}")

    # Use pair F1 as the main metric for model selection
    if pair_f1 > best_f1:
        best_f1 = pair_f1
        save_path = "./best_model"
        model.save_pretrained(save_path)
        tokenizer.save_pretrained(save_path)
        print(f"New best model saved at Pair F1 = {pair_f1:.4f}\n")

Epoch 1 Training:   0%|          | 0/1590 [00:00<?, ?it/s]


Epoch 1: Avg Train Loss = 0.4727


Epoch 1 Validation:   0%|          | 0/199 [00:00<?, ?it/s]


Evaluation Results:
Aspect - Precision: 0.7444, Recall: 0.7536, F1: 0.7490
Opinion - Precision: 0.6513, Recall: 0.7204, F1: 0.6841
Aspect-Opinion Pair - Precision: 0.5552, Recall: 0.5638, F1: 0.5595
New best model saved at Pair F1 = 0.5595



Epoch 2 Training:   0%|          | 0/1590 [00:00<?, ?it/s]


Epoch 2: Avg Train Loss = 0.2926


Epoch 2 Validation:   0%|          | 0/199 [00:00<?, ?it/s]


Evaluation Results:
Aspect - Precision: 0.7994, Recall: 0.7402, F1: 0.7687
Opinion - Precision: 0.7428, Recall: 0.7261, F1: 0.7343
Aspect-Opinion Pair - Precision: 0.6380, Recall: 0.5820, F1: 0.6087
New best model saved at Pair F1 = 0.6087



Epoch 3 Training:   0%|          | 0/1590 [00:00<?, ?it/s]


Epoch 3: Avg Train Loss = 0.2104


Epoch 3 Validation:   0%|          | 0/199 [00:00<?, ?it/s]


Evaluation Results:
Aspect - Precision: 0.7976, Recall: 0.7582, F1: 0.7774
Opinion - Precision: 0.7809, Recall: 0.7409, F1: 0.7604
Aspect-Opinion Pair - Precision: 0.6526, Recall: 0.6080, F1: 0.6295
New best model saved at Pair F1 = 0.6295



Epoch 4 Training:   0%|          | 0/1590 [00:00<?, ?it/s]


Epoch 4: Avg Train Loss = 0.1373


Epoch 4 Validation:   0%|          | 0/199 [00:00<?, ?it/s]


Evaluation Results:
Aspect - Precision: 0.7872, Recall: 0.7953, F1: 0.7912
Opinion - Precision: 0.7460, Recall: 0.7667, F1: 0.7562
Aspect-Opinion Pair - Precision: 0.6331, Recall: 0.6394, F1: 0.6363
New best model saved at Pair F1 = 0.6363



Epoch 5 Training:   0%|          | 0/1590 [00:00<?, ?it/s]


Epoch 5: Avg Train Loss = 0.0861


Epoch 5 Validation:   0%|          | 0/199 [00:00<?, ?it/s]


Evaluation Results:
Aspect - Precision: 0.7899, Recall: 0.7968, F1: 0.7933
Opinion - Precision: 0.7526, Recall: 0.7576, F1: 0.7551
Aspect-Opinion Pair - Precision: 0.6390, Recall: 0.6383, F1: 0.6387
New best model saved at Pair F1 = 0.6387



Epoch 6 Training:   0%|          | 0/1590 [00:00<?, ?it/s]


Epoch 6: Avg Train Loss = 0.0672


Epoch 6 Validation:   0%|          | 0/199 [00:00<?, ?it/s]


Evaluation Results:
Aspect - Precision: 0.7899, Recall: 0.7968, F1: 0.7933
Opinion - Precision: 0.7526, Recall: 0.7576, F1: 0.7551
Aspect-Opinion Pair - Precision: 0.6390, Recall: 0.6383, F1: 0.6387


Epoch 7 Training:   0%|          | 0/1590 [00:00<?, ?it/s]


Epoch 7: Avg Train Loss = 0.0674


Epoch 7 Validation:   0%|          | 0/199 [00:00<?, ?it/s]


Evaluation Results:
Aspect - Precision: 0.7899, Recall: 0.7968, F1: 0.7933
Opinion - Precision: 0.7526, Recall: 0.7576, F1: 0.7551
Aspect-Opinion Pair - Precision: 0.6390, Recall: 0.6383, F1: 0.6387


Epoch 8 Training:   0%|          | 0/1590 [00:00<?, ?it/s]


Epoch 8: Avg Train Loss = 0.0675


Epoch 8 Validation:   0%|          | 0/199 [00:00<?, ?it/s]


Evaluation Results:
Aspect - Precision: 0.7899, Recall: 0.7968, F1: 0.7933
Opinion - Precision: 0.7526, Recall: 0.7576, F1: 0.7551
Aspect-Opinion Pair - Precision: 0.6390, Recall: 0.6383, F1: 0.6387


In [None]:
from huggingface_hub import login

login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
repo_name = "aspect-opinion-bio-multilingual-faulty"

In [None]:
# Make sure model and tokenizer are the ones you fine-tuned
model.push_to_hub(repo_name)
tokenizer.push_to_hub(repo_name)

Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...crnv8as/model.safetensors:   0%|          |  555kB /  667MB            

README.md: 0.00B [00:00, ?B/s]

CommitInfo(commit_url='https://huggingface.co/affan002/aspect-opinion-bio-multilingual-faulty/commit/0dc2d1f9a192000c11ba82bffac01a5164dacf40', commit_message='Upload tokenizer', commit_description='', oid='0dc2d1f9a192000c11ba82bffac01a5164dacf40', pr_url=None, repo_url=RepoUrl('https://huggingface.co/affan002/aspect-opinion-bio-multilingual-faulty', endpoint='https://huggingface.co', repo_type='model', repo_id='affan002/aspect-opinion-bio-multilingual-faulty'), pr_revision=None, pr_num=None)