In [1]:
# Step 1: Install required libraries
!pip install transformers tensorflow openpyxl scikit-learn -q

In [2]:
# Step 2: Import libraries
import torch
from datasets import load_dataset, Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import TrainingArguments, Trainer
from google.colab import files
import numpy as np
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import ParameterGrid
import pandas as pd
from nltk.sentiment import SentimentIntensityAnalyzer
import nltk
import time
import random
from datetime import datetime
import itertools

In [3]:
# Step 3: Check GPU
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    device = torch.device("cpu")
    print("GPU not available, using CPU.")

Using GPU: Tesla T4


In [4]:
# Step 4: Load and preprocess data
print("\n--- Loading and Preprocessing Data ---")
uploaded = files.upload()

df = pd.read_csv('Philippines-News-Headlines-Dataset-for-Sentiment-Analysis.csv')

nltk.download('vader_lexicon', quiet=True)
sia = SentimentIntensityAnalyzer()

def vader_label(score):
    if score >= 0.05:
        return 2
    elif score <= -0.05:
        return 0
    else:
        return 1

df['sentiment_score'] = df['Headlines'].apply(lambda x: sia.polarity_scores(str(x))['compound'])
df['label'] = df['sentiment_score'].apply(vader_label)

texts = df['Headlines'].tolist()
labels = df['label'].tolist()
dataset = Dataset.from_dict({"text": texts, "label": labels})

train_data = dataset.select(range(2000))
eval_data = dataset.select(range(500))

print(f"Loaded dataset with {len(train_data)} training and {len(eval_data)} evaluation samples.")


--- Loading and Preprocessing Data ---


Saving Philippines-News-Headlines-Dataset-for-Sentiment-Analysis.csv to Philippines-News-Headlines-Dataset-for-Sentiment-Analysis.csv
Loaded dataset with 2000 training and 500 evaluation samples.


In [5]:
# Step 5: Tokenization
MODEL_NAME = "ProsusAI/finbert"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, padding=True)

tokenized_train = train_data.map(tokenize_function, batched=True)
tokenized_eval = eval_data.map(tokenize_function, batched=True)

tokenized_train = tokenized_train.rename_column("label", "labels")
tokenized_eval = tokenized_eval.rename_column("label", "labels")

tokenized_train.set_format("torch", columns=['input_ids', 'attention_mask', 'labels'])
tokenized_eval.set_format("torch", columns=['input_ids', 'attention_mask', 'labels'])

print("Tokenization complete!")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/758 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Tokenization complete!


In [6]:
# Step 6: Define metrics
def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    acc = accuracy_score(p.label_ids, preds)
    f1 = f1_score(p.label_ids, preds, average="weighted")
    return {"accuracy": acc, "f1": f1}

In [7]:
# Step 7: Define hyperparameter search space
hyperparameter_space = {
    "num_train_epochs": [2, 3, 4],
    "per_device_train_batch_size": [8, 16, 32],
    "warmup_steps": [200, 500, 800],
    "weight_decay": [0.01, 0.05, 0.1],
    "learning_rate": [2e-5, 3e-5, 5e-5]
}

print("\n" + "="*80)
print("HYPERPARAMETER SEARCH SPACE")
print("="*80)
for param, values in hyperparameter_space.items():
    print(f"{param}: {values}")

total_combinations = np.prod([len(v) for v in hyperparameter_space.values()])
print(f"\nTotal possible combinations: {total_combinations}")



HYPERPARAMETER SEARCH SPACE
num_train_epochs: [2, 3, 4]
per_device_train_batch_size: [8, 16, 32]
warmup_steps: [200, 500, 800]
weight_decay: [0.01, 0.05, 0.1]
learning_rate: [2e-05, 3e-05, 5e-05]

Total possible combinations: 243


In [8]:
# Step 8: Helper function to train and evaluate
def train_and_evaluate(config, experiment_name, trial_num):
    """Train model with given hyperparameters and return results"""

    print(f"\n{'='*80}")
    print(f"Running: {experiment_name} - Trial {trial_num}")
    print(f"{'='*80}")
    print("Configuration:")
    for key, value in config.items():
        print(f"  {key}: {value}")

    # Create fresh model
    model = AutoModelForSequenceClassification.from_pretrained(
        MODEL_NAME,
        num_labels=3
    ).to(device)

    # Setup training arguments
    training_args = TrainingArguments(
        output_dir=f"./results_{experiment_name}_trial_{trial_num}",
        num_train_epochs=config["num_train_epochs"],
        per_device_train_batch_size=config["per_device_train_batch_size"],
        per_device_eval_batch_size=config["per_device_train_batch_size"],
        warmup_steps=config["warmup_steps"],
        weight_decay=config["weight_decay"],
        learning_rate=config["learning_rate"],
        logging_dir=f"./logs_{experiment_name}_trial_{trial_num}",
        logging_steps=50,
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        fp16=torch.cuda.is_available(),
        report_to=[]
    )

    # Create trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_train,
        eval_dataset=tokenized_eval,
        compute_metrics=compute_metrics,
        processing_class=tokenizer,
    )

    # Train and time it
    start_time = time.time()
    trainer.train()
    training_time = time.time() - start_time

    # Evaluate
    eval_results = trainer.evaluate()

    # Prepare results
    result = {
        "experiment_type": experiment_name,
        "trial_number": trial_num,
        "num_train_epochs": config["num_train_epochs"],
        "per_device_train_batch_size": config["per_device_train_batch_size"],
        "warmup_steps": config["warmup_steps"],
        "weight_decay": config["weight_decay"],
        "learning_rate": config["learning_rate"],
        "eval_accuracy": eval_results["eval_accuracy"],
        "eval_f1": eval_results["eval_f1"],
        "eval_loss": eval_results["eval_loss"],
        "training_time_seconds": round(training_time, 2),
        "training_time_minutes": round(training_time / 60, 2),
        "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    }

    print(f"\nResults:")
    print(f"  Accuracy: {eval_results['eval_accuracy']:.4f}")
    print(f"  F1 Score: {eval_results['eval_f1']:.4f}")
    print(f"  Loss: {eval_results['eval_loss']:.4f}")
    print(f"  Training Time: {training_time/60:.2f} minutes")

    # Clean up
    del model
    del trainer
    torch.cuda.empty_cache()

    return result

In [11]:
# Step 9: GRID SEARCH Implementation
print("\n" + "="*80)
print("STARTING GRID SEARCH")
print("="*80)

grid_results = []
param_grid = list(ParameterGrid(hyperparameter_space))

print(f"Grid Search will test {len(param_grid)} combinations")
print("Note: This is comprehensive but can be time-consuming!")

# Limit grid search for demonstration (you can remove this limit)
MAX_GRID_TRIALS = 5  # Change this to len(param_grid) for full grid search
grid_start_time = time.time()

for i, params in enumerate(param_grid[:MAX_GRID_TRIALS], 1):
    try:
        result = train_and_evaluate(params, "GridSearch", i)
        grid_results.append(result)
    except Exception as e:
        print(f"Error in Grid Search trial {i}: {str(e)}")
        continue

grid_total_time = time.time() - grid_start_time

print("\n" + "="*80)
print(f"GRID SEARCH COMPLETED - Total Time: {grid_total_time/60:.2f} minutes")
print("="*80)


STARTING GRID SEARCH
Grid Search will test 243 combinations
Note: This is comprehensive but can be time-consuming!

Running: GridSearch - Trial 1
Configuration:
  learning_rate: 2e-05
  num_train_epochs: 2
  per_device_train_batch_size: 8
  warmup_steps: 200
  weight_decay: 0.01


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.7452,0.495396,0.812,0.787331
2,0.4164,0.240314,0.92,0.920725



Results:
  Accuracy: 0.9200
  F1 Score: 0.9207
  Loss: 0.2403
  Training Time: 1.90 minutes

Running: GridSearch - Trial 2
Configuration:
  learning_rate: 2e-05
  num_train_epochs: 2
  per_device_train_batch_size: 8
  warmup_steps: 200
  weight_decay: 0.05


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.7461,0.496061,0.818,0.796161
2,0.4223,0.241262,0.916,0.916973



Results:
  Accuracy: 0.9160
  F1 Score: 0.9170
  Loss: 0.2413
  Training Time: 2.02 minutes

Running: GridSearch - Trial 3
Configuration:
  learning_rate: 2e-05
  num_train_epochs: 2
  per_device_train_batch_size: 8
  warmup_steps: 200
  weight_decay: 0.1


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.7448,0.498196,0.812,0.789922
2,0.4158,0.240831,0.918,0.919326



Results:
  Accuracy: 0.9180
  F1 Score: 0.9193
  Loss: 0.2408
  Training Time: 1.93 minutes

Running: GridSearch - Trial 4
Configuration:
  learning_rate: 2e-05
  num_train_epochs: 2
  per_device_train_batch_size: 8
  warmup_steps: 500
  weight_decay: 0.01


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.8868,0.700395,0.73,0.676285
2,0.5983,0.312956,0.902,0.901627



Results:
  Accuracy: 0.9020
  F1 Score: 0.9016
  Loss: 0.3130
  Training Time: 1.57 minutes

Running: GridSearch - Trial 5
Configuration:
  learning_rate: 2e-05
  num_train_epochs: 2
  per_device_train_batch_size: 8
  warmup_steps: 500
  weight_decay: 0.05


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.8867,0.699221,0.73,0.677521
2,0.6044,0.321925,0.9,0.898239



Results:
  Accuracy: 0.9000
  F1 Score: 0.8982
  Loss: 0.3219
  Training Time: 1.78 minutes

GRID SEARCH COMPLETED - Total Time: 9.35 minutes


In [12]:
# Step 10: RANDOM SEARCH Implementation
print("\n" + "="*80)
print("STARTING RANDOM SEARCH")
print("="*80)

random_results = []
NUM_RANDOM_TRIALS = 5  # Number of random combinations to try

print(f"Random Search will test {NUM_RANDOM_TRIALS} random combinations")
print("Note: This samples the search space more efficiently!")

random_start_time = time.time()

for i in range(1, NUM_RANDOM_TRIALS + 1):
    # Randomly sample hyperparameters
    random_config = {
        "num_train_epochs": random.choice(hyperparameter_space["num_train_epochs"]),
        "per_device_train_batch_size": random.choice(hyperparameter_space["per_device_train_batch_size"]),
        "warmup_steps": random.choice(hyperparameter_space["warmup_steps"]),
        "weight_decay": random.choice(hyperparameter_space["weight_decay"]),
        "learning_rate": random.choice(hyperparameter_space["learning_rate"])
    }

    try:
        result = train_and_evaluate(random_config, "RandomSearch", i)
        random_results.append(result)
    except Exception as e:
        print(f"Error in Random Search trial {i}: {str(e)}")
        continue

random_total_time = time.time() - random_start_time

print("\n" + "="*80)
print(f"RANDOM SEARCH COMPLETED - Total Time: {random_total_time/60:.2f} minutes")
print("="*80)


STARTING RANDOM SEARCH
Random Search will test 5 random combinations
Note: This samples the search space more efficiently!

Running: RandomSearch - Trial 1
Configuration:
  num_train_epochs: 4
  per_device_train_batch_size: 8
  warmup_steps: 200
  weight_decay: 0.1
  learning_rate: 3e-05


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.706,0.406242,0.86,0.850631
2,0.4199,0.124044,0.97,0.970412
3,0.2086,0.036864,0.994,0.994
4,0.0525,0.02896,0.994,0.994



Results:
  Accuracy: 0.9940
  F1 Score: 0.9940
  Loss: 0.0290
  Training Time: 4.30 minutes

Running: RandomSearch - Trial 2
Configuration:
  num_train_epochs: 4
  per_device_train_batch_size: 8
  warmup_steps: 200
  weight_decay: 0.1
  learning_rate: 3e-05


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.706,0.406242,0.86,0.850631
2,0.4199,0.124044,0.97,0.970412
3,0.2086,0.036864,0.994,0.994
4,0.0525,0.02896,0.994,0.994



Results:
  Accuracy: 0.9940
  F1 Score: 0.9940
  Loss: 0.0290
  Training Time: 3.52 minutes

Running: RandomSearch - Trial 3
Configuration:
  num_train_epochs: 4
  per_device_train_batch_size: 8
  warmup_steps: 200
  weight_decay: 0.1
  learning_rate: 3e-05


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.706,0.406242,0.86,0.850631
2,0.4199,0.124044,0.97,0.970412
3,0.2086,0.036864,0.994,0.994
4,0.0525,0.02896,0.994,0.994



Results:
  Accuracy: 0.9940
  F1 Score: 0.9940
  Loss: 0.0290
  Training Time: 4.95 minutes

Running: RandomSearch - Trial 4
Configuration:
  num_train_epochs: 4
  per_device_train_batch_size: 8
  warmup_steps: 200
  weight_decay: 0.1
  learning_rate: 3e-05


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.706,0.406242,0.86,0.850631
2,0.4199,0.124044,0.97,0.970412
3,0.2086,0.036864,0.994,0.994
4,0.0525,0.02896,0.994,0.994



Results:
  Accuracy: 0.9940
  F1 Score: 0.9940
  Loss: 0.0290
  Training Time: 4.39 minutes

Running: RandomSearch - Trial 5
Configuration:
  num_train_epochs: 4
  per_device_train_batch_size: 8
  warmup_steps: 200
  weight_decay: 0.1
  learning_rate: 3e-05


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,0.706,0.406242,0.86,0.850631
2,0.4199,0.124044,0.97,0.970412
3,0.2086,0.036864,0.994,0.994
4,0.0525,0.02896,0.994,0.994



Results:
  Accuracy: 0.9940
  F1 Score: 0.9940
  Loss: 0.0290
  Training Time: 5.38 minutes

RANDOM SEARCH COMPLETED - Total Time: 22.74 minutes


In [13]:
# Step 11: Compare and Analyze Results
print("\n" + "="*80)
print("COMPARATIVE ANALYSIS")
print("="*80)

# Combine all results
all_results = grid_results + random_results
results_df = pd.DataFrame(all_results)

# Analysis by method
if grid_results:
    grid_df = pd.DataFrame(grid_results)
    best_grid = grid_df.loc[grid_df['eval_accuracy'].idxmax()]
    avg_grid_time = grid_df['training_time_minutes'].mean()

    print("\nGRID SEARCH SUMMARY:")
    print(f"  Trials Completed: {len(grid_results)}")
    print(f"  Best Accuracy: {best_grid['eval_accuracy']:.4f}")
    print(f"  Best F1 Score: {best_grid['eval_f1']:.4f}")
    print(f"  Average Time per Trial: {avg_grid_time:.2f} minutes")
    print(f"  Total Time: {grid_total_time/60:.2f} minutes")

if random_results:
    random_df = pd.DataFrame(random_results)
    best_random = random_df.loc[random_df['eval_accuracy'].idxmax()]
    avg_random_time = random_df['training_time_minutes'].mean()

    print("\nRANDOM SEARCH SUMMARY:")
    print(f"  Trials Completed: {len(random_results)}")
    print(f"  Best Accuracy: {best_random['eval_accuracy']:.4f}")
    print(f"  Best F1 Score: {best_random['eval_f1']:.4f}")
    print(f"  Average Time per Trial: {avg_random_time:.2f} minutes")
    print(f"  Total Time: {random_total_time/60:.2f} minutes")

# Overall best
if all_results:
    best_overall = results_df.loc[results_df['eval_accuracy'].idxmax()]
    print("\nOVERALL BEST CONFIGURATION:")
    print(f"  Method: {best_overall['experiment_type']}")
    print(f"  Accuracy: {best_overall['eval_accuracy']:.4f}")
    print(f"  F1 Score: {best_overall['eval_f1']:.4f}")
    print(f"  Configuration:")
    print(f"    - Epochs: {best_overall['num_train_epochs']}")
    print(f"    - Batch Size: {best_overall['per_device_train_batch_size']}")
    print(f"    - Learning Rate: {best_overall['learning_rate']}")
    print(f"    - Warmup Steps: {best_overall['warmup_steps']}")
    print(f"    - Weight Decay: {best_overall['weight_decay']}")


COMPARATIVE ANALYSIS

GRID SEARCH SUMMARY:
  Trials Completed: 5
  Best Accuracy: 0.9200
  Best F1 Score: 0.9207
  Average Time per Trial: 1.84 minutes
  Total Time: 9.35 minutes

RANDOM SEARCH SUMMARY:
  Trials Completed: 5
  Best Accuracy: 0.9940
  Best F1 Score: 0.9940
  Average Time per Trial: 4.51 minutes
  Total Time: 22.74 minutes

OVERALL BEST CONFIGURATION:
  Method: RandomSearch
  Accuracy: 0.9940
  F1 Score: 0.9940
  Configuration:
    - Epochs: 4
    - Batch Size: 8
    - Learning Rate: 3e-05
    - Warmup Steps: 200
    - Weight Decay: 0.1


In [17]:
# Step 12: Export to Excel
excel_filename = 'Results-Inglesa.xlsx'

with pd.ExcelWriter(excel_filename, engine='openpyxl') as writer:
    # All results
    results_df_sorted = results_df.sort_values('eval_accuracy', ascending=False).reset_index(drop=True)
    results_df_sorted.insert(0, 'Rank', range(1, len(results_df_sorted) + 1))
    results_df_sorted.to_excel(writer, sheet_name='All Results', index=False)

    # Grid Search results
    if grid_results:
        grid_df_sorted = grid_df.sort_values('eval_accuracy', ascending=False).reset_index(drop=True)
        grid_df_sorted.insert(0, 'Rank', range(1, len(grid_df_sorted) + 1))
        grid_df_sorted.to_excel(writer, sheet_name='Grid Search', index=False)

    # Random Search results
    if random_results:
        random_df_sorted = random_df.sort_values('eval_accuracy', ascending=False).reset_index(drop=True)
        random_df_sorted.insert(0, 'Rank', range(1, len(random_df_sorted) + 1))
        random_df_sorted.to_excel(writer, sheet_name='Random Search', index=False)

    # Comparison summary
    comparison_data = []
    if grid_results:
        comparison_data.append({
            'Method': 'Grid Search',
            'Trials': len(grid_results),
            'Best Accuracy': best_grid['eval_accuracy'],
            'Best F1': best_grid['eval_f1'],
            'Avg Time per Trial (min)': avg_grid_time,
            'Total Time (min)': grid_total_time/60,
            'Efficiency Score': best_grid['eval_accuracy'] / (grid_total_time/60)
        })

    if random_results:
        comparison_data.append({
            'Method': 'Random Search',
            'Trials': len(random_results),
            'Best Accuracy': best_random['eval_accuracy'],
            'Best F1': best_random['eval_f1'],
            'Avg Time per Trial (min)': avg_random_time,
            'Total Time (min)': random_total_time/60,
            'Efficiency Score': best_random['eval_accuracy'] / (random_total_time/60)
        })

    comparison_df = pd.DataFrame(comparison_data)
    comparison_df.to_excel(writer, sheet_name='Method Comparison', index=False)

    # Best configurations
    top_10 = results_df_sorted.head(10)
    top_10.to_excel(writer, sheet_name='Top 10 Configurations', index=False)

print("\nDownloading file...")

files.download(excel_filename)

print("\n" + "="*80)
print("ANALYSIS COMPLETE!")
print("="*80)



Downloading file...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


ANALYSIS COMPLETE!
