In [None]:
!pip install transformers torch scikit-learn --quiet

import os
os.environ["WANDB_DISABLED"] = "true"  # disable W&B logging

import pandas as pd
import numpy as np
import re
import torch
import torch.nn as nn
from torch.utils.data import Dataset

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import accuracy_score, classification_report, f1_score

from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    Trainer,
    TrainingArguments
)

from transformers.modeling_outputs import SequenceClassifierOutput
from torch.nn import CrossEntropyLoss

df_tweets = pd.read_csv("tweets_ready_bert.csv", sep=",")
df_tweets = df_tweets.dropna(subset=["tweets", "polarity"])
df_tweets.head()

label_encoder = LabelEncoder()
y = label_encoder.fit_transform(df_tweets["polarity"])

X_train, X_test, y_train, y_test = train_test_split(
    df_tweets["tweets"].values,
    y,
    stratify=y,
    test_size=0.3,
    random_state=42
)

class_weights = compute_class_weight(
    class_weight="balanced",
    classes=np.unique(y_train),
    y=y_train
)
device = "cuda" if torch.cuda.is_available() else "cpu"
class_weights = torch.tensor(class_weights, dtype=torch.float).to(device)

print("Class Weights:", dict(zip(label_encoder.classes_, class_weights.tolist())))

class TweetDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_len=128):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = str(self.texts[idx])
        label = self.labels[idx]  

        encoding = self.tokenizer(
            text,
            add_special_tokens=True,
            max_length=self.max_len,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )

        return {
            "input_ids": encoding["input_ids"].flatten(),
            "attention_mask": encoding["attention_mask"].flatten(),
            "labels": torch.tensor(label, dtype=torch.long)
        }

model_name = "indolem/indobertweet-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=len(label_encoder.classes_)
)

class WeightedTrainer(Trainer):
    def __init__(self, class_weights, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.class_weights = class_weights

    def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None):
        labels = inputs.get("labels")
        outputs = model(**inputs)
        logits = outputs.get("logits")

        loss_fct = CrossEntropyLoss(weight=self.class_weights)
        loss = loss_fct(logits.view(-1, self.model.config.num_labels), labels.view(-1))
        return (loss, outputs) if return_outputs else loss
train_dataset = TweetDataset(X_train, y_train, tokenizer, max_len=128)
test_dataset = TweetDataset(X_test, y_test, tokenizer, max_len=128)

training_args = TrainingArguments(
    output_dir="./indobertweet_results",
    num_train_epochs=9,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir="./indobertweet_logs",
    logging_steps=10,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    report_to="none",
    greater_is_better=True
)

def compute_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions.argmax(-1)
    acc = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average='macro')
    report = classification_report(
        labels, preds, target_names=[str(cls) for cls in label_encoder.classes_], output_dict=True
    )
    return {
        "accuracy": acc,
        "f1_macro": f1,
        "classification_report": report
    }
trainer = WeightedTrainer(
    class_weights=class_weights,
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics
)

trainer.train()

print("\nIndoBERTweet Model Evaluation (Weighted):")
eval_results = trainer.evaluate()
print(f"Accuracy: {eval_results['eval_accuracy']:.4f}")
print(f"F1 Macro: {eval_results['eval_f1_macro']:.4f}\n")

print("Classification Report:")
report = eval_results["eval_classification_report"]

for i, class_name in enumerate(label_encoder.classes_):
    if str(i) in report:
        metrics = report[str(i)]
        print(f"Class: {class_name}")
        print(f"  Precision: {metrics['precision']:.4f}")
        print(f"  Recall: {metrics['recall']:.4f}")
        print(f"  F1-Score: {metrics['f1-score']:.4f}")
if 'macro avg' in report:
    metrics = report['macro avg']
    print(f"Macro Avg:")
    print(f"  Precision: {metrics['precision']:.4f}")
    print(f"  Recall: {metrics['recall']:.4f}")
    print(f"  F1-Score: {metrics['f1-score']:.4f}")

Class Weights: {np.int64(0): 1.031874656677246, np.int64(1): 1.3810557126998901, np.int64(2): 0.7652243375778198}


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/445M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/445M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indolem/indobertweet-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy,F1 Macro,Classification Report
1,0.9644,0.938628,0.60928,0.569075,"{'0': {'precision': 0.5714285714285714, 'recall': 0.3622641509433962, 'f1-score': 0.44341801385681295, 'support': 265.0}, '1': {'precision': 0.46551724137931033, 'recall': 0.5482233502538071, 'f1-score': 0.5034965034965035, 'support': 197.0}, '2': {'precision': 0.7040572792362768, 'recall': 0.8263305322128851, 'f1-score': 0.7603092783505154, 'support': 357.0}, 'accuracy': 0.6092796092796092, 'macro avg': {'precision': 0.5803343640147195, 'recall': 0.5789393444700295, 'f1-score': 0.569074598567944, 'support': 819.0}, 'weighted avg': {'precision': 0.6037654660166623, 'recall': 0.6092796092796092, 'f1-score': 0.5960012176214904, 'support': 819.0}}"
2,0.72,0.707291,0.721612,0.698986,"{'0': {'precision': 0.7074235807860262, 'recall': 0.6113207547169811, 'f1-score': 0.6558704453441295, 'support': 265.0}, '1': {'precision': 0.6593406593406593, 'recall': 0.6091370558375635, 'f1-score': 0.633245382585752, 'support': 197.0}, '2': {'precision': 0.7573529411764706, 'recall': 0.865546218487395, 'f1-score': 0.807843137254902, 'support': 357.0}, 'accuracy': 0.7216117216117216, 'macro avg': {'precision': 0.7080390604343854, 'recall': 0.6953346763473132, 'f1-score': 0.6989863217282611, 'support': 819.0}, 'weighted avg': {'precision': 0.7176219277147825, 'recall': 0.7216117216117216, 'f1-score': 0.7166727819115842, 'support': 819.0}}"
3,0.5849,0.727107,0.676435,0.650639,"{'0': {'precision': 0.7394366197183099, 'recall': 0.39622641509433965, 'f1-score': 0.515970515970516, 'support': 265.0}, '1': {'precision': 0.5106382978723404, 'recall': 0.8527918781725888, 'f1-score': 0.6387832699619772, 'support': 197.0}, '2': {'precision': 0.8074712643678161, 'recall': 0.7871148459383753, 'f1-score': 0.7971631205673759, 'support': 357.0}, 'accuracy': 0.6764346764346765, 'macro avg': {'precision': 0.6858487273194888, 'recall': 0.6787110464017679, 'f1-score': 0.6506389688332898, 'support': 819.0}, 'weighted avg': {'precision': 0.7140582298968419, 'recall': 0.6764346764346765, 'f1-score': 0.6680826922554938, 'support': 819.0}}"
4,0.5481,0.888724,0.713065,0.695644,"{'0': {'precision': 0.6496062992125984, 'recall': 0.6226415094339622, 'f1-score': 0.6358381502890174, 'support': 265.0}, '1': {'precision': 0.6971428571428572, 'recall': 0.6192893401015228, 'f1-score': 0.6559139784946236, 'support': 197.0}, '2': {'precision': 0.7615384615384615, 'recall': 0.8319327731092437, 'f1-score': 0.7951807228915663, 'support': 357.0}, 'accuracy': 0.7130647130647131, 'macro avg': {'precision': 0.7027625392979724, 'recall': 0.6912878742149097, 'f1-score': 0.6956442838917357, 'support': 819.0}, 'weighted avg': {'precision': 0.7098315542340808, 'recall': 0.7130647130647131, 'f1-score': 0.7101241534338457, 'support': 819.0}}"
5,0.0833,1.282401,0.673993,0.655834,"{'0': {'precision': 0.643979057591623, 'recall': 0.4641509433962264, 'f1-score': 0.5394736842105263, 'support': 265.0}, '1': {'precision': 0.5282392026578073, 'recall': 0.8071065989847716, 'f1-score': 0.6385542168674698, 'support': 197.0}, '2': {'precision': 0.8256880733944955, 'recall': 0.7563025210084033, 'f1-score': 0.7894736842105263, 'support': 357.0}, 'accuracy': 0.673992673992674, 'macro avg': {'precision': 0.6659687778813086, 'recall': 0.6758533544631339, 'f1-score': 0.6558338617628409, 'support': 819.0}, 'weighted avg': {'precision': 0.6953458063335812, 'recall': 0.673992673992674, 'f1-score': 0.6722806011011465, 'support': 819.0}}"
6,0.0587,1.453025,0.720391,0.710602,"{'0': {'precision': 0.6474820143884892, 'recall': 0.6792452830188679, 'f1-score': 0.6629834254143646, 'support': 265.0}, '1': {'precision': 0.6190476190476191, 'recall': 0.7258883248730964, 'f1-score': 0.6682242990654206, 'support': 197.0}, '2': {'precision': 0.8612903225806452, 'recall': 0.7478991596638656, 'f1-score': 0.800599700149925, 'support': 357.0}, 'accuracy': 0.7203907203907204, 'macro avg': {'precision': 0.7092733186722512, 'recall': 0.7176775891852767, 'f1-score': 0.7106024748765701, 'support': 819.0}, 'weighted avg': {'precision': 0.7338409767113809, 'recall': 0.7203907203907204, 'f1-score': 0.7242306319953818, 'support': 819.0}}"
7,0.0007,1.558943,0.724054,0.716754,"{'0': {'precision': 0.6141479099678456, 'recall': 0.720754716981132, 'f1-score': 0.6631944444444444, 'support': 265.0}, '1': {'precision': 0.6764705882352942, 'recall': 0.700507614213198, 'f1-score': 0.6882793017456359, 'support': 197.0}, '2': {'precision': 0.868421052631579, 'recall': 0.7394957983193278, 'f1-score': 0.7987897125567323, 'support': 357.0}, 'accuracy': 0.724053724053724, 'macro avg': {'precision': 0.7196798502782397, 'recall': 0.720252709837886, 'f1-score': 0.7167544862489376, 'support': 819.0}, 'weighted avg': {'precision': 0.7399758459258924, 'recall': 0.724053724053724, 'f1-score': 0.7283339164889151, 'support': 819.0}}"
8,0.0107,1.596287,0.732601,0.720462,"{'0': {'precision': 0.6388888888888888, 'recall': 0.6943396226415094, 'f1-score': 0.6654611211573237, 'support': 265.0}, '1': {'precision': 0.6947368421052632, 'recall': 0.6700507614213198, 'f1-score': 0.6821705426356589, 'support': 197.0}, '2': {'precision': 0.8328445747800587, 'recall': 0.7955182072829131, 'f1-score': 0.8137535816618912, 'support': 357.0}, 'accuracy': 0.7326007326007326, 'macro avg': {'precision': 0.7221567685914035, 'recall': 0.7199695304485808, 'f1-score': 0.7204617484849579, 'support': 819.0}, 'weighted avg': {'precision': 0.7368671876028978, 'recall': 0.7326007326007326, 'f1-score': 0.7341206625875589, 'support': 819.0}}"
9,0.0005,1.629657,0.736264,0.719348,"{'0': {'precision': 0.6570397111913358, 'recall': 0.6867924528301886, 'f1-score': 0.6715867158671587, 'support': 265.0}, '1': {'precision': 0.7045454545454546, 'recall': 0.6294416243654822, 'f1-score': 0.6648793565683646, 'support': 197.0}, '2': {'precision': 0.8114754098360656, 'recall': 0.8319327731092437, 'f1-score': 0.8215767634854771, 'support': 357.0}, 'accuracy': 0.7362637362637363, 'macro avg': {'precision': 0.724353525190952, 'recall': 0.7160556167683049, 'f1-score': 0.7193476119736668, 'support': 819.0}, 'weighted avg': {'precision': 0.7357847366576727, 'recall': 0.7362637362637363, 'f1-score': 0.7353536233370943, 'support': 819.0}}"





IndoBERTweet Model Evaluation (Weighted):




Accuracy: 0.7363
F1 Macro: 0.7193

Classification Report:
Class: 0
  Precision: 0.6570
  Recall: 0.6868
  F1-Score: 0.6716
Class: 1
  Precision: 0.7045
  Recall: 0.6294
  F1-Score: 0.6649
Class: 2
  Precision: 0.8115
  Recall: 0.8319
  F1-Score: 0.8216
Macro Avg:
  Precision: 0.7244
  Recall: 0.7161
  F1-Score: 0.7193
