In [None]:
# Step 1: Install required libraries
!pip install transformers tensorflow openpyxl -q

This code installs three tools: one to use AI language models (transformers), one for creating and training AI programs (tensorflow), and one to work with Excel files (openpyxl)

In [None]:
# Step 2: Import all libraries
import torch
from datasets import load_dataset, Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import TrainingArguments, Trainer
from google.colab import files
import numpy as np
from sklearn.metrics import accuracy_score, f1_score
import pandas as pd
from nltk.sentiment import SentimentIntensityAnalyzer
import nltk
import time

This code imports various libraries and modules needed for tasks like using AI models, handling datasets, working with data, calculating metrics, performing sentiment analysis, timing operations, and managing files in Google Colab, so you can use their functions in your program without rewriting them.

In [None]:
# Check GPU
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    device = torch.device("cpu")
    print("GPU not available, using CPU.")

GPU not available, using CPU.


This code checks if a GPU is available for PyTorch to use, if it is, it sets the device to the GPU and prints its name, otherwise it falls back to using the CPU and informs you that GPU is not available.

In [None]:
# Step 3: Load and preprocess data
print("\n--- Loading and Preprocessing Data ---")
uploaded = files.upload()

df = pd.read_csv('Philippines-News-Headlines-Dataset-for-Sentiment-Analysis.csv')

nltk.download('vader_lexicon', quiet=True)
sia = SentimentIntensityAnalyzer()

def vader_label(score):
    if score >= 0.05:
        return 2
    elif score <= -0.05:
        return 0
    else:
        return 1

df['sentiment_score'] = df['Headlines'].apply(lambda x: sia.polarity_scores(str(x))['compound'])
df['label'] = df['sentiment_score'].apply(vader_label)

print("Sample of headlines with generated labels:")
print(df[['Headlines', 'sentiment_score', 'label']].head())

texts = df['Headlines'].tolist()
labels = df['label'].tolist()
dataset = Dataset.from_dict({"text": texts, "label": labels})

train_data = dataset.select(range(2000))
eval_data = dataset.select(range(500))

print(f"Loaded dataset with {len(train_data)} training and {len(eval_data)} evaluation samples.")


--- Loading and Preprocessing Data ---


Saving Philippines-News-Headlines-Dataset-for-Sentiment-Analysis.csv to Philippines-News-Headlines-Dataset-for-Sentiment-Analysis.csv
Sample of headlines with generated labels:
                                           Headlines  sentiment_score  label
0                                Miranda's doctrine.              0.0      1
1    US equity firm looks to more investments in PH.              0.0      1
2  Nickel Asia Corp Announces its Notice of Annua...              0.0      1
3                    DoF reconvenes the Green Force.              0.0      1
4                  Cebu MSMEs get training from DTI.              0.0      1
Loaded dataset with 2000 training and 500 evaluation samples.


This code loads a CSV file of news headlines, uses the VADER sentiment analysis tool to score each headline's sentiment, labels them as positive, neutral, or negative based on the score, and then creates training and evaluation datasets for machine learning models.

In [None]:
# Step 4: Tokenization
MODEL_NAME = "ProsusAI/finbert"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, padding=True)

tokenized_train = train_data.map(tokenize_function, batched=True)
tokenized_eval = eval_data.map(tokenize_function, batched=True)

tokenized_train = tokenized_train.rename_column("label", "labels")
tokenized_eval = tokenized_eval.rename_column("label", "labels")

tokenized_train.set_format("torch", columns=['input_ids', 'attention_mask', 'labels'])
tokenized_eval.set_format("torch", columns=['input_ids', 'attention_mask', 'labels'])

print("Tokenization complete!")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/758 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

Map:   0%|          | 0/500 [00:00<?, ? examples/s]

Tokenization complete!


This code loads a pretrained tokenizer for the "ProsusAI/finbert" model, converts text data into token IDs with padding and truncation, renames columns for compatibility, formats the tokenized data for PyTorch, and prepares the training and evaluation sets for use in model training.



In [None]:
# Step 5: Define hyperparameter configurations
hyperparameter_configs = [
    {
        "experiment_name": "Experiment 1 - Baseline",
        "num_train_epochs": 3,
        "per_device_train_batch_size": 16,
        "per_device_eval_batch_size": 16,
        "warmup_steps": 500,
        "weight_decay": 0.01,
        "logging_steps": 100
    },
    {
        "experiment_name": "Experiment 2 - More Epochs",
        "num_train_epochs": 5,
        "per_device_train_batch_size": 16,
        "per_device_eval_batch_size": 16,
        "warmup_steps": 500,
        "weight_decay": 0.01,
        "logging_steps": 100
    },
    {
        "experiment_name": "Experiment 3 - Larger Batch Size",
        "num_train_epochs": 3,
        "per_device_train_batch_size": 32,
        "per_device_eval_batch_size": 32,
        "warmup_steps": 500,
        "weight_decay": 0.01,
        "logging_steps": 50
    },
    {
        "experiment_name": "Experiment 4 - More Warmup & Weight Decay",
        "num_train_epochs": 4,
        "per_device_train_batch_size": 16,
        "per_device_eval_batch_size": 16,
        "warmup_steps": 800,
        "weight_decay": 0.1,
        "logging_steps": 100
    },
    {
        "experiment_name": "Experiment 5 - Small Batch, High Regularization",
        "num_train_epochs": 4,
        "per_device_train_batch_size": 8,
        "per_device_eval_batch_size": 8,
        "warmup_steps": 1000,
        "weight_decay": 0.2,
        "logging_steps": 150
    }
]

This code defines different sets of training hyperparameters for multiple experiments, specifying values like the number of training epochs, batch sizes, warmup steps, weight decay for regularization, and how often to log training progress, to compare their effects on model performance.

In [None]:
# Step 6: Define metrics
def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    acc = accuracy_score(p.label_ids, preds)
    f1 = f1_score(p.label_ids, preds, average="weighted")
    return {"accuracy": acc, "f1": f1}

This code defines a function that calculates evaluation metrics—accuracy and weighted F1 score—based on the model's predictions and true labels, helping assess model performance during training and evaluation.

In [None]:
# Step 7: Run all experiments
results_list = []

print("\n" + "="*80)
print("STARTING HYPERPARAMETER EXPERIMENTS")
print("="*80)

for idx, config in enumerate(hyperparameter_configs, 1):
    print(f"\n{'='*80}")
    print(f"RUNNING {config['experiment_name']}")
    print(f"{'='*80}")
    print(f"Configuration:")
    for key, value in config.items():
        if key != "experiment_name":
            print(f"  {key}: {value}")

    model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=3).to(device)

    training_args = TrainingArguments(
        output_dir=f"./results_exp_{idx}",
        num_train_epochs=config["num_train_epochs"],
        per_device_train_batch_size=config["per_device_train_batch_size"],
        per_device_eval_batch_size=config["per_device_eval_batch_size"],
        warmup_steps=config["warmup_steps"],
        weight_decay=config["weight_decay"],
        logging_dir=f"./logs_exp_{idx}",
        logging_steps=config["logging_steps"],
        eval_strategy="epoch",
        save_strategy="epoch",
        load_best_model_at_end=True,
        fp16=torch.cuda.is_available(),
        report_to=[]
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_train,
        eval_dataset=tokenized_eval,
        compute_metrics=compute_metrics,
        processing_class=tokenizer,
    )

    start_time = time.time()
    trainer.train()
    training_time = time.time() - start_time

    eval_results = trainer.evaluate()

    result_row = {
        "Experiment": config["experiment_name"],
        "num_train_epochs": config["num_train_epochs"],
        "per_device_train_batch_size": config["per_device_train_batch_size"],
        "per_device_eval_batch_size": config["per_device_eval_batch_size"],
        "warmup_steps": config["warmup_steps"],
        "weight_decay": config["weight_decay"],
        "logging_steps": config["logging_steps"],
        "eval_accuracy": eval_results["eval_accuracy"],
        "eval_f1": eval_results["eval_f1"],
        "eval_loss": eval_results["eval_loss"],
        "training_time_seconds": round(training_time, 2),
        "training_time_minutes": round(training_time / 60, 2)
    }

    results_list.append(result_row)

    print(f"\n{config['experiment_name']} Results:")
    print(f"  Accuracy: {eval_results['eval_accuracy']:.4f}")
    print(f"  F1 Score: {eval_results['eval_f1']:.4f}")
    print(f"  Loss: {eval_results['eval_loss']:.4f}")
    print(f"  Training Time: {training_time/60:.2f} minutes")

    del model
    del trainer
    torch.cuda.empty_cache()

print("\n" + "="*80)
print("ALL EXPERIMENTS COMPLETED!")
print("="*80)


STARTING HYPERPARAMETER EXPERIMENTS

RUNNING Experiment 1 - Baseline
Configuration:
  num_train_epochs: 3
  per_device_train_batch_size: 16
  per_device_eval_batch_size: 16
  warmup_steps: 500
  weight_decay: 0.01
  logging_steps: 100


pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]



Epoch,Training Loss,Validation Loss


KeyboardInterrupt: 

This code runs multiple training experiments with different hyperparameter settings, evaluating each model's performance on validation data, recording metrics like accuracy, F1 score, loss, and training time, then outputs the results and cleans up resources after each run.

In [None]:
# Step 8: Create and save results
results_df = pd.DataFrame(results_list)
results_df_sorted = results_df.sort_values('eval_accuracy', ascending=False).reset_index(drop=True)
results_df_sorted.insert(0, 'Rank', range(1, len(results_df_sorted) + 1))

print("\n" + "="*80)
print("EXPERIMENT RESULTS SUMMARY (Ranked by Accuracy)")
print("="*80)
print(results_df_sorted.to_string(index=False))

excel_filename = 'bert_hyperparameter_experiments_results.xlsx'

with pd.ExcelWriter(excel_filename, engine='openpyxl') as writer:
    results_df_sorted.to_excel(writer, sheet_name='Results', index=False)

    comparison_df = results_df_sorted[['Rank', 'Experiment', 'eval_accuracy', 'eval_f1',
                                        'eval_loss', 'training_time_minutes']]
    comparison_df.to_excel(writer, sheet_name='Quick Comparison', index=False)

    hyperparam_df = results_df_sorted[['Rank', 'Experiment', 'num_train_epochs',
                                        'per_device_train_batch_size', 'per_device_eval_batch_size',
                                        'warmup_steps', 'weight_decay', 'logging_steps']]
    hyperparam_df.to_excel(writer, sheet_name='Hyperparameters', index=False)

print(f"\n✓ Results saved to: {excel_filename}")
print("\nDownloading file...")

files.download(excel_filename)

print("\n" + "="*80)
print("ANALYSIS COMPLETE - File downloaded successfully!")
print("="*80)

This code takes the collected experiment results, organizes and ranks them by accuracy, prints a summary table, saves detailed results into an Excel file with multiple sheets for full results, quick comparison, and hyperparameter settings, then downloads the Excel file for further analysis.