In [None]:
!pip install transformers datasets accelerate ray[tune] optuna -U
!pip install transformers tensorflow openpyxl scikit-learn -q

Collecting datasets
  Downloading datasets-4.4.1-py3-none-any.whl.metadata (19 kB)
Collecting optuna
  Downloading optuna-4.6.0-py3-none-any.whl.metadata (17 kB)
Collecting ray[tune]
  Downloading ray-2.51.1-cp312-cp312-manylinux2014_x86_64.whl.metadata (21 kB)
Collecting pyarrow>=21.0.0 (from datasets)
  Downloading pyarrow-22.0.0-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (3.2 kB)
Collecting click!=8.3.0,>=7.0 (from ray[tune])
  Downloading click-8.3.1-py3-none-any.whl.metadata (2.6 kB)
Collecting tensorboardX>=1.9 (from ray[tune])
  Downloading tensorboardx-2.6.4-py3-none-any.whl.metadata (6.2 kB)
Collecting colorlog (from optuna)
  Downloading colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)
Downloading datasets-4.4.1-py3-none-any.whl (511 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m511.6/511.6 kB[0m [31m20.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading optuna-4.6.0-py3-none-any.whl (404 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [

In [None]:
import torch
from datasets import load_dataset, Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, set_seed
from google.colab import files
import numpy as np
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import ParameterGrid
import pandas as pd
from nltk.sentiment import SentimentIntensityAnalyzer
import nltk
import time
import random
from datetime import datetime
import itertools

set_seed(42)

if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

In [None]:
# --- 2. DATA PREPARATION (LIMITED SUBSET) ---

print("\n--- Loading and Preprocessing Data ---")
uploaded = files.upload()

df = pd.read_csv('Philippines-News-Headlines-Dataset-for-Sentiment-Analysis.csv')

nltk.download('vader_lexicon', quiet=True)
sia = SentimentIntensityAnalyzer()

def vader_label(score):
    if score >= 0.05:
        return 2
    elif score <= -0.05:
        return 0
    else:
        return 1

df['sentiment_score'] = df['Headlines'].apply(lambda x: sia.polarity_scores(str(x))['compound'])
df['label'] = df['sentiment_score'].apply(vader_label)

texts = df['Headlines'].tolist()
labels = df['label'].tolist()
dataset = Dataset.from_dict({"text": texts, "label": labels})

train_data = dataset.select(range(2000))
eval_data = dataset.select(range(500))

print(f"Loaded dataset with {len(train_data)} training and {len(eval_data)} evaluation samples.")

# Initialize Tokenizer
MODEL_NAME = "ProsusAI/finbert"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, padding=True)

tokenized_train = train_data.map(tokenize_function, batched=True)
tokenized_eval = eval_data.map(tokenize_function, batched=True)

tokenized_train = tokenized_train.rename_column("label", "labels")
tokenized_eval = tokenized_eval.rename_column("label", "labels")

tokenized_train.set_format("torch", columns=['input_ids', 'attention_mask', 'labels'])
tokenized_eval.set_format("torch", columns=['input_ids', 'attention_mask', 'labels'])

print("Tokenization complete!")


--- Loading and Preprocessing Data ---


Saving Philippines-News-Headlines-Dataset-for-Sentiment-Analysis.csv to Philippines-News-Headlines-Dataset-for-Sentiment-Analysis.csv
Loaded dataset with 2000 training and 500 evaluation samples.


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/758 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Tokenization complete!


In [None]:
# --- 3. MODEL, METRICS, AND HYPERPARAMETER DEFINITION ---

def model_init():
    # Model must be re-initialized for every run to ensure independence
    return AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=3).to(device)

def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    acc = accuracy_score(p.label_ids, preds)
    f1 = f1_score(p.label_ids, preds, average="weighted")
    return {"accuracy": acc, "f1": f1}

# --- HYPERPARAMETER RANDOM DEFINITION ---
def tune_hp(trial):
    """
    This function defines the hyperparameter space to be explored.
    The `trial` object allows us to suggest different values.
    """
    # 1. Learning Rate (Critical for performance)
    learning_rate = trial.suggest_categorical("learning_rate", [5e-5, 3e-5, 1e-5])

    # 2. Batch Size (Affects VRAM and stability)
    per_device_train_batch_size = trial.suggest_categorical("per_device_train_batch_size", [8, 16, 32])

    # 3. Weight Decay (Regularization against overfitting)
    weight_decay = trial.suggest_float("weight_decay", 0.0, 0.1, step=0.05)

    # --- EXPANSION SUPPORT: Add more parameters here if needed ---
    num_train_epochs = trial.suggest_categorical("num_train_epochs", [3, 4, 5])

    return {
        "learning_rate": learning_rate,
        "per_device_train_batch_size": per_device_train_batch_size,
        "weight_decay": weight_decay,
        "num_train_epochs": num_train_epochs
    }

In [None]:
# --- 4. TRAINING ARGUMENTS (Fixed for all runs) ---
training_args = TrainingArguments(
    output_dir="./random_search_results",
    # Evaluation settings (fixed)
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="f1", # Optimize for F1-Score
    fp16=torch.cuda.is_available(),
    report_to="none",
    # Fixed parameters
    num_train_epochs=5, # Will be overridden if specified in tune_hp
    warmup_steps=500,
)

# Initialize the Trainer
trainer = Trainer(
    model_init=model_init, # We pass the function, not the object, for fresh initialization
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_eval,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
)

  trainer = Trainer(


pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

In [None]:
# --- 5. EXECUTION OF RANDOM SEARCH AND EXPERIMENT LOGGING---
import pandas as pd
from google.colab import files

print("\n--- Starting Random Search (Total Runs: 5) ---")
print("Optimizing for 'f1' score...")

best_trial = trainer.hyperparameter_search(
    # We use 'Optuna' as the backend for the hyperparameter search
    backend="optuna",
    # Pass the function that defines the search space
    hp_space=tune_hp,
    # Maximize the F1 score (higher is better)
    direction="maximize",
    # Set the total number of experiments to run (3*2*3 = 18 total combinations)
    n_trials=5,
)

[I 2025-11-18 17:18:54,803] A new study created in memory with name: no-name-1200cbf3-fe5f-496a-a4c8-51ab7c131690



--- Starting Random Search (Total Runs: 5) ---
Optimizing for 'f1' score...


model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.944645,0.572,0.547217
2,No log,0.656807,0.758,0.723684
3,No log,0.347227,0.89,0.885449


[I 2025-11-18 17:19:52,990] Trial 0 finished with value: 1.7754494456644514 and parameters: {'learning_rate': 5e-05, 'per_device_train_batch_size': 32, 'weight_decay': 0.0, 'num_train_epochs': 3}. Best is trial 0 with value: 1.7754494456644514.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.810098,0.65,0.580779
2,1.107600,0.51606,0.81,0.800841
3,1.107600,0.240392,0.914,0.908697
4,0.402600,0.160195,0.948,0.947487


[I 2025-11-18 17:22:48,636] Trial 1 finished with value: 1.8954865256447406 and parameters: {'learning_rate': 1e-05, 'per_device_train_batch_size': 8, 'weight_decay': 0.0, 'num_train_epochs': 4}. Best is trial 1 with value: 1.8954865256447406.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.49693,0.814,0.792317
2,0.815800,0.181257,0.946,0.946813
3,0.815800,0.067221,0.982,0.982086
4,0.217900,0.026789,0.996,0.996006


[I 2025-11-18 17:26:00,467] Trial 2 finished with value: 1.99200620184968 and parameters: {'learning_rate': 5e-05, 'per_device_train_batch_size': 8, 'weight_decay': 0.1, 'num_train_epochs': 4}. Best is trial 2 with value: 1.99200620184968.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,1.032871,0.566,0.512048
2,No log,0.751178,0.698,0.648953
3,No log,0.505276,0.796,0.75521
4,1.004600,0.306704,0.9,0.89687
5,1.004600,0.189667,0.94,0.939526


[I 2025-11-18 17:29:21,206] Trial 3 finished with value: 1.8795261680831272 and parameters: {'learning_rate': 1e-05, 'per_device_train_batch_size': 16, 'weight_decay': 0.0, 'num_train_epochs': 5}. Best is trial 2 with value: 1.99200620184968.


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.809325,0.654,0.585088
2,1.107700,0.516567,0.81,0.800946
3,1.107700,0.272958,0.904,0.901306


[I 2025-11-18 17:31:49,233] Trial 4 finished with value: 1.805305756146301 and parameters: {'learning_rate': 1e-05, 'per_device_train_batch_size': 8, 'weight_decay': 0.1, 'num_train_epochs': 3}. Best is trial 2 with value: 1.99200620184968.
