In [1]:
!pip install datasets evaluate scikit-learn transformers==4.26.1

Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting transformers==4.26.1
  Downloading transformers-4.26.1-py3-none-any.whl.metadata (100 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m100.3/100.3 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers==4.26.1)
  Downloading tokenizers-0.13.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12

In [2]:
!git clone https://github.com/amark-23/Sentiment-Analysis-Models.git
%cd slp-labs-NLP/NLPlab_main

Cloning into 'slp-labs-NLP'...
remote: Enumerating objects: 376, done.[K
remote: Counting objects: 100% (12/12), done.[K
remote: Compressing objects: 100% (8/8), done.[K
remote: Total 376 (delta 9), reused 4 (delta 4), pack-reused 364 (from 2)[K
Receiving objects: 100% (376/376), 75.05 MiB | 12.19 MiB/s, done.
Resolving deltas: 100% (156/156), done.
Updating files: 100% (82/82), done.
/content/slp-labs-NLP/NLPlab_main


In [7]:
import numpy as np
import os
os.environ["WANDB_DISABLED"] = "true"
import evaluate
from datasets import Dataset
from transformers import TrainingArguments, Trainer, AutoTokenizer, AutoModelForSequenceClassification
from sklearn.preprocessing import LabelEncoder
from utils.load_datasets import load_MR, load_Semeval2017A

# === SELECT DATASET ===
DATASET = 'MR'  # or 'Semeval2017A'

# === Define models per dataset ===
MODEL_LIST = {
    "MR": [
        'siebert/sentiment-roberta-large-english',
        #"bert-base-multilingual-cased"
        'distilbert-base-uncased-finetuned-sst-2-english',
        'AnkitAI/reviews-roberta-base-sentiment-analysis'
    ],
    "Semeval2017A": [
        'cardiffnlp/twitter-roberta-base-sentiment',
        'finiteautomata/bertweet-base-sentiment-analysis',
        #'j-hartmann/sentiment-roberta-large-english-3-classes'
    ]
}

# === Evaluation metric ===
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=128)

def prepare_dataset(X, y):
    return Dataset.from_dict({'text': X, 'label': y})

# === Load dataset ===
if DATASET == "Semeval2017A":
    X_train, y_train, X_test, y_test = load_Semeval2017A()
elif DATASET == "MR":
    X_train, y_train, X_test, y_test = load_MR()
else:
    raise ValueError("Invalid dataset")

# === Encode labels ===
le = LabelEncoder()
le.fit(list(set(y_train)))
y_train_enc = le.transform(y_train)
y_test_enc = le.transform(y_test)
n_classes = len(le.classes_)

# === Prepare HuggingFace datasets ===
train_set = prepare_dataset(X_train, y_train_enc)
test_set = prepare_dataset(X_test, y_test_enc)

# === Loop over models ===
for PRETRAINED_MODEL in MODEL_LIST[DATASET]:
    print(f"\n Fine-tuning: {PRETRAINED_MODEL}")

    # Tokenizer & model
    tokenizer = AutoTokenizer.from_pretrained(PRETRAINED_MODEL)
    model = AutoModelForSequenceClassification.from_pretrained(
        PRETRAINED_MODEL, num_labels=n_classes)

    # Tokenize
    tokenized_train = train_set.map(tokenize_function)
    tokenized_test = test_set.map(tokenize_function)


    # Small subset for quick training (just for testing in Colab)
    train_subset_size = min(2000, len(tokenized_train))
    eval_subset_size = min(2000, len(tokenized_test))
    small_train = tokenized_train.shuffle(seed=42).select(range(train_subset_size))
    small_eval = tokenized_test.shuffle(seed=42).select(range(eval_subset_size))


    # Training setup
    args = TrainingArguments(
        output_dir=f"{PRETRAINED_MODEL.replace('/', '_')}_output",
        evaluation_strategy="epoch",
        num_train_epochs=3,
        per_device_train_batch_size=2,
        logging_steps=5,
        save_strategy="no"
    )

    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=small_train, #tokenized_train
        eval_dataset=small_eval,   #tokenized_test
        compute_metrics=compute_metrics,
    )

    # Fine-tune
    trainer.train()

    # Evaluate
    print(" Evaluation:")
    trainer.evaluate()



 Fine-tuning: siebert/sentiment-roberta-large-english


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--siebert--sentiment-roberta-large-english/snapshots/74cea614e245b0832c770ec9aa51bd58df965b9c/config.json
Model config RobertaConfig {
  "_name_or_path": "siebert/sentiment-roberta-large-english",
  "architectures": [
    "RobertaForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.26.1",
  "type

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

Map:   0%|          | 0/662 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
The following columns in the training set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 2000
  Num Epochs = 3
  Instantaneous batch size per device = 2
  Total train batch size (w. parallel, distributed & accumulation) = 2
  Gradient Accumulation steps = 1
  Total optimization steps =

Epoch,Training Loss,Validation Loss,Accuracy
1,0.6711,0.695748,0.5
2,0.6578,0.698105,0.5
3,0.6914,0.693422,0.5


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 662
  Batch size = 8
The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 662
  Batch size = 8
The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 662
  Batch size = 8



 Evaluation:



 Fine-tuning: distilbert-base-uncased-finetuned-sst-2-english


loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--distilbert-base-uncased-finetuned-sst-2-english/snapshots/714eb0fa89d2f80546fda750413ed43d93601a13/config.json
Model config DistilBertConfig {
  "_name_or_path": "distilbert-base-uncased-finetuned-sst-2-english",
  "activation": "gelu",
  "architectures": [
    "DistilBertForSequenceClassification"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "finetuning_task": "sst-2",
  "hidden_dim": 3072,
  "id2label": {
    "0": "NEGATIVE",
    "1": "POSITIVE"
  },
  "initializer_range": 0.02,
  "label2id": {
    "NEGATIVE": 0,
    "POSITIVE": 1
  },
  "max_position_embeddings": 512,
  "model_type": "distilbert",
  "n_heads": 12,
  "n_layers": 6,
  "output_past": true,
  "pad_token_id": 0,
  "qa_dropout": 0.1,
  "seq_classif_dropout": 0.2,
  "sinusoidal_pos_embds": false,
  "tie_weights_": true,
  "transformers_version": "4.26.1",
  "vocab_size": 30522
}

loading file vocab.txt from cache

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

Map:   0%|          | 0/662 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
The following columns in the training set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 2000
  Num Epochs = 3
  Instantaneous batch size per device = 2
  Total train batch size (w. parallel, distributed & accumulation) = 2
  Gradient Accumulation steps = 1
  Total optimization s

Epoch,Training Loss,Validation Loss,Accuracy
1,0.0056,0.525897,0.879154
2,0.0002,0.759763,0.886707
3,0.0002,0.896738,0.885196


The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 662
  Batch size = 8
The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 662
  Batch size = 8
The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 662


 Evaluation:



 Fine-tuning: AnkitAI/reviews-roberta-base-sentiment-analysis


loading file vocab.json from cache at /root/.cache/huggingface/hub/models--AnkitAI--reviews-roberta-base-sentiment-analysis/snapshots/9656e5136c9ae0adefb50800b1df46f0860e0428/vocab.json
loading file merges.txt from cache at /root/.cache/huggingface/hub/models--AnkitAI--reviews-roberta-base-sentiment-analysis/snapshots/9656e5136c9ae0adefb50800b1df46f0860e0428/merges.txt
loading file tokenizer.json from cache at None
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at /root/.cache/huggingface/hub/models--AnkitAI--reviews-roberta-base-sentiment-analysis/snapshots/9656e5136c9ae0adefb50800b1df46f0860e0428/special_tokens_map.json
loading file tokenizer_config.json from cache at /root/.cache/huggingface/hub/models--AnkitAI--reviews-roberta-base-sentiment-analysis/snapshots/9656e5136c9ae0adefb50800b1df46f0860e0428/tokenizer_config.json
loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--AnkitAI--reviews-robe

Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

Map:   0%|          | 0/662 [00:00<?, ? examples/s]

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
The following columns in the training set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running training *****
  Num examples = 2000
  Num Epochs = 3
  Instantaneous batch size per device = 2
  Total train batch size (w. parallel, distributed & accumulation) = 2
  Gradient Accumulation steps = 1
  Total optimization steps =

Epoch,Training Loss,Validation Loss,Accuracy
1,0.3224,0.696157,0.844411
2,0.6082,0.791288,0.871601
3,0.0015,0.790411,0.873112


The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 662
  Batch size = 8
The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 662
  Batch size = 8
The following columns in the evaluation set don't have a corresponding argument in `RobertaForSequenceClassification.forward` and have been ignored: text. If text are not expected by `RobertaForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 662
  Batch size = 8



 Evaluation:
