# Master Profile Notebook
This notebook combines all the profiling experiments we have done so far into one notebook: uses the optimal hyperparameters, knowledge distillation, trains using AMP, and uses FP16 weights.






## Imports
We import the necessary libraries and modules, including the custom `finbert` modules we have defined that allow for profiling.

In [1]:
from __future__ import annotations

from pathlib import Path
import shutil
import time
import sys
sys.path.append('..')

import numpy as np
import pandas as pd
import torch
from sklearn.metrics import classification_report
from torch.nn import CrossEntropyLoss
from transformers import AutoModelForSequenceClassification

from finbert.finbert import *
from finbert.finbert_profile import *
from finbert.profile_utils import get_model_size_mb, print_device_info, setup_nltk_data
import finbert.utils as tools
from finbert.distillFinBert import *

import wandb

%load_ext autoreload
%autoreload 2

project_dir = Path.cwd().parent
pd.set_option('max_colwidth', None)

✓ Helper utilities loaded


In [2]:
# Paths
cl_path = project_dir / 'models' / 'master-profile'
cl_path_baseline = project_dir / 'models' / 'master-profile' / 'baseline'
cl_path_amp = project_dir / 'models' / 'master-profile' / 'amp'
cl_data_path = project_dir / 'data' / 'sentiment_data'

# Clean up previous run
try:
    shutil.rmtree(cl_path)
except:
    pass


In [3]:
# Baseline model and training (no AMP)
bertmodel_baseline = AutoModelForSequenceClassification.from_pretrained(
    'bert-base-uncased', cache_dir=None, num_labels=3
)

config_baseline = Config(
    data_dir=cl_data_path,
    bert_model=bertmodel_baseline,
    num_train_epochs=6,
    model_dir=cl_path_baseline,  # Use baseline path
    max_seq_length=64,
    train_batch_size=32,
    learning_rate=0.00001420326287435756,
    output_mode='classification',
    warm_up_proportion=0.14386028719686458,
    local_rank=-1,
    discriminate=True,
    gradual_unfreeze=False,
    use_amp=False,  # Baseline: AMP is off
)
config_baseline.profile_train_steps = 20

finbert_baseline = ProfiledFinBert(config_baseline)
finbert_baseline.base_model = 'bert-base-uncased'
finbert_baseline.prepare_model(label_list=['positive', 'negative', 'neutral'])

train_data_baseline = finbert_baseline.get_data('train')
test_data_baseline = finbert_baseline.get_data('test')

model_baseline = finbert_baseline.create_the_model()
    
# Train baseline
start = time.perf_counter()
trained_model_baseline = finbert_baseline.train(train_examples=train_data_baseline, model=model_baseline)
baseline_train_wall_s = time.perf_counter() - start

# AMP model and training
bertmodel_amp = AutoModelForSequenceClassification.from_pretrained(
    'distilbert-base-uncased', cache_dir=None, num_labels=3
)

config_amp = Config(
    data_dir=cl_data_path,
    bert_model=bertmodel_amp,
    num_train_epochs=6,
    model_dir=cl_path_amp,  # Changed from cl_path
    max_seq_length=64,
    train_batch_size=32,
    learning_rate=0.00001420326287435756,
    output_mode='classification',
    warm_up_proportion=0.14386028719686458,
    local_rank=-1,
    discriminate=True,
    gradual_unfreeze=False,
    use_amp=True,  # Enable AMP
)
config_amp.profile_train_steps = 20

# W&B
wandb.init(
    entity="si2449-columbia-university",
    project="finbert-experiments",
    name="master-profile",
    group="master-profile",
    config=vars(config_amp)
)

finbert_amp = DistillFinBert(config_amp)
finbert_amp.base_model = 'distilbert-base-uncased'
finbert_amp.prepare_model(label_list=['positive', 'negative', 'neutral'])

train_data_amp = finbert_amp.get_data('train')
test_data_amp = finbert_amp.get_data('test')

# train_data and test_data already loaded above
model_amp = finbert_amp.create_the_model()
    
# Train AMP
start = time.perf_counter()
trained_model_amp = finbert_amp.train(train_examples=train_data_amp, model=model_amp)
amp_train_wall_s = time.perf_counter() - start


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
12/17/2025 03:26:09 - INFO - finbert.finbert -   device: cuda n_gpu: 1, distributed training: False, 16-bits training: False
12/17/2025 03:26:11 - INFO - finbert.utils -   *** Example ***
12/17/2025 03:26:11 - INFO - finbert.utils -   guid: train-1
12/17/2025 03:26:11 - INFO - finbert.utils -   tokens: [CLS] after the reporting period , bio ##tie north american licensing partner so ##max ##on pharmaceuticals announced positive results with na ##lm ##efe ##ne in a pilot phase 2 clinical trial for smoking ce ##ssa ##tion [SEP]
12/17/2025 03:26:11 - INFO - finbert.utils -   input_ids: 101 2044 1996 7316 2558 1010 16012 9515 2167 2137 13202 4256 2061 17848 2239 24797 2623 3893 3463 2007 6583 13728 2723


Starting Profiled Training
Device: cuda
Profiling activities: [<ProfilerActivity.CPU: 0>, <ProfilerActivity.CUDA: 2>]



Iteration:  17%|█▋        | 19/109 [00:14<01:10,  1.28it/s]
Epoch:   0%|          | 0/6 [00:14<?, ?it/s]



Profiling complete for first epoch (20 steps)
Continuing full training without profiling...


PROFILING RESULTS - Training


By CPU Time:
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                                   Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg     Self CUDA   Self CUDA %    CUDA total  CUDA time avg       CPU Mem  Self CPU Mem      CUDA Mem  Self CUDA Mem    # of Calls  
-------------------------------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                                  cudaStreamSynchronize        76.15%       12.163s        76.15%

Iteration: 100%|██████████| 109/109 [01:17<00:00,  1.41it/s]
12/17/2025 03:28:24 - INFO - finbert.utils -   *** Example ***
12/17/2025 03:28:24 - INFO - finbert.utils -   guid: validation-1
12/17/2025 03:28:24 - INFO - finbert.utils -   tokens: [CLS] our in - depth expertise extends to the fields of energy , industry , urban & mobility and water & environment [SEP]
12/17/2025 03:28:24 - INFO - finbert.utils -   input_ids: 101 2256 1999 1011 5995 11532 8908 2000 1996 4249 1997 2943 1010 3068 1010 3923 1004 12969 1998 2300 1004 4044 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12/17/2025 03:28:24 - INFO - finbert.utils -   attention_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12/17/2025 03:28:24 - INFO - finbert.utils -   token_type_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

Validation losses: [0.7724753526540903]


Iteration: 100%|██████████| 109/109 [01:17<00:00,  1.41it/s]
12/17/2025 03:29:45 - INFO - finbert.utils -   *** Example ***
12/17/2025 03:29:45 - INFO - finbert.utils -   guid: validation-1
12/17/2025 03:29:45 - INFO - finbert.utils -   tokens: [CLS] our in - depth expertise extends to the fields of energy , industry , urban & mobility and water & environment [SEP]
12/17/2025 03:29:45 - INFO - finbert.utils -   input_ids: 101 2256 1999 1011 5995 11532 8908 2000 1996 4249 1997 2943 1010 3068 1010 3923 1004 12969 1998 2300 1004 4044 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12/17/2025 03:29:45 - INFO - finbert.utils -   attention_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12/17/2025 03:29:45 - INFO - finbert.utils -   token_type_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

Validation losses: [0.7724753526540903, 0.6911069934184735]


Iteration: 100%|██████████| 109/109 [01:17<00:00,  1.41it/s]
12/17/2025 03:31:06 - INFO - finbert.utils -   *** Example ***
12/17/2025 03:31:06 - INFO - finbert.utils -   guid: validation-1
12/17/2025 03:31:06 - INFO - finbert.utils -   tokens: [CLS] our in - depth expertise extends to the fields of energy , industry , urban & mobility and water & environment [SEP]
12/17/2025 03:31:06 - INFO - finbert.utils -   input_ids: 101 2256 1999 1011 5995 11532 8908 2000 1996 4249 1997 2943 1010 3068 1010 3923 1004 12969 1998 2300 1004 4044 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12/17/2025 03:31:06 - INFO - finbert.utils -   attention_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12/17/2025 03:31:06 - INFO - finbert.utils -   token_type_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

Validation losses: [0.7724753526540903, 0.6911069934184735, 0.6911069934184735]


Iteration: 100%|██████████| 109/109 [01:17<00:00,  1.41it/s]
12/17/2025 03:32:27 - INFO - finbert.utils -   *** Example ***
12/17/2025 03:32:27 - INFO - finbert.utils -   guid: validation-1
12/17/2025 03:32:27 - INFO - finbert.utils -   tokens: [CLS] our in - depth expertise extends to the fields of energy , industry , urban & mobility and water & environment [SEP]
12/17/2025 03:32:27 - INFO - finbert.utils -   input_ids: 101 2256 1999 1011 5995 11532 8908 2000 1996 4249 1997 2943 1010 3068 1010 3923 1004 12969 1998 2300 1004 4044 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12/17/2025 03:32:27 - INFO - finbert.utils -   attention_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12/17/2025 03:32:27 - INFO - finbert.utils -   token_type_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

Validation losses: [0.7724753526540903, 0.6911069934184735, 0.6911069934184735, 0.6911069934184735]


Iteration: 100%|██████████| 109/109 [01:04<00:00,  1.69it/s]
12/17/2025 03:33:35 - INFO - finbert.utils -   *** Example ***
12/17/2025 03:33:35 - INFO - finbert.utils -   guid: validation-1
12/17/2025 03:33:35 - INFO - finbert.utils -   tokens: [CLS] our in - depth expertise extends to the fields of energy , industry , urban & mobility and water & environment [SEP]
12/17/2025 03:33:35 - INFO - finbert.utils -   input_ids: 101 2256 1999 1011 5995 11532 8908 2000 1996 4249 1997 2943 1010 3068 1010 3923 1004 12969 1998 2300 1004 4044 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12/17/2025 03:33:35 - INFO - finbert.utils -   attention_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12/17/2025 03:33:35 - INFO - finbert.utils -   token_type_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

Validation losses: [0.7724753526540903, 0.6911069934184735, 0.6911069934184735, 0.6911069934184735, 0.6911069934184735]


Iteration: 100%|██████████| 109/109 [00:36<00:00,  2.97it/s]
12/17/2025 03:34:14 - INFO - finbert.utils -   *** Example ***
12/17/2025 03:34:14 - INFO - finbert.utils -   guid: validation-1
12/17/2025 03:34:14 - INFO - finbert.utils -   tokens: [CLS] our in - depth expertise extends to the fields of energy , industry , urban & mobility and water & environment [SEP]
12/17/2025 03:34:14 - INFO - finbert.utils -   input_ids: 101 2256 1999 1011 5995 11532 8908 2000 1996 4249 1997 2943 1010 3068 1010 3923 1004 12969 1998 2300 1004 4044 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12/17/2025 03:34:14 - INFO - finbert.utils -   attention_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12/17/2025 03:34:14 - INFO - finbert.utils -   token_type_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

Validation losses: [0.7724753526540903, 0.6911069934184735, 0.6911069934184735, 0.6911069934184735, 0.6911069934184735, 0.6911069934184735]


Epoch: 100%|██████████| 6/6 [07:09<00:00, 71.59s/it]
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[34m[1mwandb[0m: Currently logged in as: [33mtfs2123[0m ([33msi2449-columbia-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


12/17/2025 03:34:19 - INFO - finbert.distillFinBert -   device: cuda n_gpu: 1, distributed training: False, 16-bits training: False
12/17/2025 03:34:20 - INFO - finbert.utils -   *** Example ***
12/17/2025 03:34:20 - INFO - finbert.utils -   guid: train-1
12/17/2025 03:34:20 - INFO - finbert.utils -   tokens: [CLS] after the reporting period , bio ##tie north american licensing partner so ##max ##on pharmaceuticals announced positive results with na ##lm ##efe ##ne in a pilot phase 2 clinical trial for smoking ce ##ssa ##tion [SEP]
12/17/2025 03:34:20 - INFO - finbert.utils -   input_ids: 101 2044 1996 7316 2558 1010 16012 9515 2167 2137 13202 4256 2061 17848 2239 24797 2623 3893 3463 2007 6583 13728 27235 2638 1999 1037 4405 4403 1016 6612 3979 2005 9422 8292 11488 3508 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12/17/2025 03:34:20 - INFO - finbert.utils -   attention_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0

Validation losses: [0.699349004488725]


Iteration: 100%|██████████| 109/109 [00:16<00:00,  6.72it/s]
12/17/2025 03:35:18 - INFO - finbert.utils -   *** Example ***
12/17/2025 03:35:18 - INFO - finbert.utils -   guid: validation-1
12/17/2025 03:35:18 - INFO - finbert.utils -   tokens: [CLS] our in - depth expertise extends to the fields of energy , industry , urban & mobility and water & environment [SEP]
12/17/2025 03:35:18 - INFO - finbert.utils -   input_ids: 101 2256 1999 1011 5995 11532 8908 2000 1996 4249 1997 2943 1010 3068 1010 3923 1004 12969 1998 2300 1004 4044 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12/17/2025 03:35:18 - INFO - finbert.utils -   attention_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12/17/2025 03:35:18 - INFO - finbert.utils -   token_type_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

Validation losses: [0.699349004488725, 0.6039920311707717]


Iteration: 100%|██████████| 109/109 [00:15<00:00,  7.08it/s]
12/17/2025 03:35:35 - INFO - finbert.utils -   *** Example ***
12/17/2025 03:35:35 - INFO - finbert.utils -   guid: validation-1
12/17/2025 03:35:35 - INFO - finbert.utils -   tokens: [CLS] our in - depth expertise extends to the fields of energy , industry , urban & mobility and water & environment [SEP]
12/17/2025 03:35:35 - INFO - finbert.utils -   input_ids: 101 2256 1999 1011 5995 11532 8908 2000 1996 4249 1997 2943 1010 3068 1010 3923 1004 12969 1998 2300 1004 4044 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12/17/2025 03:35:35 - INFO - finbert.utils -   attention_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12/17/2025 03:35:35 - INFO - finbert.utils -   token_type_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

Validation losses: [0.699349004488725, 0.6039920311707717, 0.6039920311707717]


Iteration: 100%|██████████| 109/109 [00:16<00:00,  6.71it/s]
12/17/2025 03:35:53 - INFO - finbert.utils -   *** Example ***
12/17/2025 03:35:53 - INFO - finbert.utils -   guid: validation-1
12/17/2025 03:35:53 - INFO - finbert.utils -   tokens: [CLS] our in - depth expertise extends to the fields of energy , industry , urban & mobility and water & environment [SEP]
12/17/2025 03:35:53 - INFO - finbert.utils -   input_ids: 101 2256 1999 1011 5995 11532 8908 2000 1996 4249 1997 2943 1010 3068 1010 3923 1004 12969 1998 2300 1004 4044 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12/17/2025 03:35:53 - INFO - finbert.utils -   attention_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12/17/2025 03:35:53 - INFO - finbert.utils -   token_type_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

Validation losses: [0.699349004488725, 0.6039920311707717, 0.6039920311707717, 0.6039920311707717]


Iteration: 100%|██████████| 109/109 [00:15<00:00,  7.08it/s]
12/17/2025 03:36:11 - INFO - finbert.utils -   *** Example ***
12/17/2025 03:36:11 - INFO - finbert.utils -   guid: validation-1
12/17/2025 03:36:11 - INFO - finbert.utils -   tokens: [CLS] our in - depth expertise extends to the fields of energy , industry , urban & mobility and water & environment [SEP]
12/17/2025 03:36:11 - INFO - finbert.utils -   input_ids: 101 2256 1999 1011 5995 11532 8908 2000 1996 4249 1997 2943 1010 3068 1010 3923 1004 12969 1998 2300 1004 4044 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12/17/2025 03:36:11 - INFO - finbert.utils -   attention_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12/17/2025 03:36:11 - INFO - finbert.utils -   token_type_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

Validation losses: [0.699349004488725, 0.6039920311707717, 0.6039920311707717, 0.6039920311707717, 0.6039920311707717]


Iteration: 100%|██████████| 109/109 [00:16<00:00,  6.80it/s]
12/17/2025 03:36:29 - INFO - finbert.utils -   *** Example ***
12/17/2025 03:36:29 - INFO - finbert.utils -   guid: validation-1
12/17/2025 03:36:29 - INFO - finbert.utils -   tokens: [CLS] our in - depth expertise extends to the fields of energy , industry , urban & mobility and water & environment [SEP]
12/17/2025 03:36:29 - INFO - finbert.utils -   input_ids: 101 2256 1999 1011 5995 11532 8908 2000 1996 4249 1997 2943 1010 3068 1010 3923 1004 12969 1998 2300 1004 4044 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12/17/2025 03:36:29 - INFO - finbert.utils -   attention_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12/17/2025 03:36:29 - INFO - finbert.utils -   token_type_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0

Validation losses: [0.699349004488725, 0.6039920311707717, 0.6039920311707717, 0.6039920311707717, 0.6039920311707717, 0.6039920311707717]


Epoch: 100%|██████████| 6/6 [01:45<00:00, 17.62s/it]


In [4]:
import time
import numpy as np
import pandas as pd
import torch

def timed_eval(*, finbert, model, examples, use_amp=False):
    """Evaluation with timing."""
    loader = finbert.get_loader(examples, phase="eval")
    device = finbert.device
    model.eval()

    preds, labels = [], []
    if device.type == "cuda":
        torch.cuda.synchronize(device)

    start = time.perf_counter()
    with torch.no_grad():
        for batch in loader:
            batch = tuple(t.to(device) for t in batch)
            input_ids, attention_mask, token_type_ids, label_ids, _ = batch  # token_type_ids unused for DistilBERT

            if use_amp and device.type == "cuda":
                with torch.amp.autocast(device_type="cuda"):
                    out = model(input_ids=input_ids, attention_mask=attention_mask)
            else:
                out = model(input_ids=input_ids, attention_mask=attention_mask)

            logits = out.logits if hasattr(out, "logits") else out[0]
            preds.extend(logits.detach().cpu().numpy())
            labels.extend(label_ids.detach().cpu().numpy().tolist())

    if device.type == "cuda":
        torch.cuda.synchronize(device)

    wall_s = time.perf_counter() - start
    n = len(labels)

    return pd.DataFrame({"predictions": preds, "labels": labels}), {
        "eval_wall_s": wall_s,
        "eval_samples_per_s": n / wall_s if wall_s > 0 else float("inf"),
    }

def get_metrics(df):
    """Extract accuracy and F1 from eval results."""
    preds = np.array([np.argmax(p) for p in df['predictions']])
    labels = np.array(df['labels'])
    acc = (preds == labels).mean()
    from sklearn.metrics import f1_score
    f1 = f1_score(labels, preds, average='macro')
    return {"accuracy": acc, "f1_macro": f1}


# Baseline eval (no AMP)
baseline_eval_df, baseline_timing = timed_eval(
    finbert=finbert_baseline, model=trained_model_baseline, examples=test_data_baseline, use_amp=False
)
baseline_metrics = get_metrics(baseline_eval_df)
print(f"Baseline Model - Accuracy: {baseline_metrics['accuracy']:.4f}, F1: {baseline_metrics['f1_macro']:.4f}")
print(f"Baseline Model - Throughput: {baseline_timing['eval_samples_per_s']:.1f} samples/sec")

# AMP-trained eval (AMP enabled)
amp_trained_eval_df, amp_trained_timing = timed_eval(
    finbert=finbert_amp, model=trained_model_amp, examples=test_data_amp, use_amp=True
)
amp_trained_metrics = get_metrics(amp_trained_eval_df)
print(f"Trained AMP Model - Accuracy: {amp_trained_metrics['accuracy']:.4f}, F1: {amp_trained_metrics['f1_macro']:.4f}")
print(f"Trained AMP Model - Throughput: {amp_trained_timing['eval_samples_per_s']:.1f} samples/sec")

12/17/2025 03:36:31 - INFO - finbert.utils -   *** Example ***
12/17/2025 03:36:31 - INFO - finbert.utils -   guid: test-1
12/17/2025 03:36:31 - INFO - finbert.utils -   tokens: [CLS] the bristol port company has sealed a one million pound contract with cooper specialised handling to supply it with four 45 - ton ##ne , custom ##ised reach stack ##ers from ko ##ne ##cr ##ane ##s [SEP]
12/17/2025 03:36:31 - INFO - finbert.utils -   input_ids: 101 1996 7067 3417 2194 2038 10203 1037 2028 2454 9044 3206 2007 6201 17009 8304 2000 4425 2009 2007 2176 3429 1011 10228 2638 1010 7661 5084 3362 9991 2545 2013 12849 2638 26775 7231 2015 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12/17/2025 03:36:31 - INFO - finbert.utils -   attention_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12/17/2025 03:36:31 - INFO - finbert.utils -   token_type_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 

Baseline Model - Accuracy: 0.7237, F1: 0.6852
Baseline Model - Throughput: 154.6 samples/sec
Trained AMP Model - Accuracy: 0.7722, F1: 0.7555
Trained AMP Model - Throughput: 830.8 samples/sec


In [5]:
device = finbert_baseline.device

# Evaluate baseline (no AMP/FP16)
baseline_eval_df, baseline_timing = timed_eval(
    finbert=finbert_baseline, model=model_baseline, examples=test_data_baseline, use_amp=False
)
baseline_metrics = get_metrics(baseline_eval_df)

results_records = [{
    "variant": "baseline",
    "model_size_mb": get_model_size_mb(model_baseline),
    **baseline_timing,
    **baseline_metrics,
}]
print(f"Baseline - Accuracy: {baseline_metrics['accuracy']:.4f}, F1: {baseline_metrics['f1_macro']:.4f}")
print(f"Baseline - Throughput: {baseline_timing['eval_samples_per_s']:.1f} samples/sec")

device = finbert_amp.device
if device.type == "cuda":
    # Load model with FP16 weights
    fp16_amp_model = AutoModelForSequenceClassification.from_pretrained(
        cl_path_amp, num_labels=3, dtype=torch.float16
    ).to(device)

    # AMP autocast + FP16 weights
    amp_fp16_eval_df, amp_fp16_timing = timed_eval(
        finbert=finbert_amp, model=fp16_amp_model, examples=test_data_amp, use_amp=True
    )
    amp_fp16_metrics = get_metrics(amp_fp16_eval_df)

    results_records.append({
        "variant": "amp_fp16_weights",
        "model_size_mb": get_model_size_mb(fp16_amp_model),
        **amp_fp16_timing,
        **amp_fp16_metrics,
    })
    print(f"AMP+FP16 - Accuracy: {amp_fp16_metrics['accuracy']:.4f}, F1: {amp_fp16_metrics['f1_macro']:.4f}")
    print(f"AMP+FP16 - Throughput: {amp_fp16_timing['eval_samples_per_s']:.1f} samples/sec")
else:
    print("AMP+FP16 eval requires CUDA")

results_df = pd.DataFrame(results_records)

# ---- Added statistics ----
if len(results_df) > 1:
    base = results_df.iloc[0]
    amp = results_df.iloc[1]
    # Relative speedup: how much faster is AMP+FP16 throughput
    speedup = amp['eval_samples_per_s'] / base['eval_samples_per_s'] if base['eval_samples_per_s'] > 0 else float('nan')
    # Compression: how much smaller is the model (MB)
    compression = amp['model_size_mb'] / base['model_size_mb'] if base['model_size_mb'] > 0 else float('nan')
    compression_str = f"{(1/compression):.2f}x" if compression > 0 else "n/a"
    # Accuracy delta: amp - baseline
    acc_delta = amp['accuracy'] - base['accuracy']
    print(f"\n[STATS]")
    print(f"Relative speedup (AMP+FP16 vs. baseline): {speedup:.2f}x")
    print(f"Model size compression (AMP+FP16 vs. baseline): {compression_str} smaller "
          f"({amp['model_size_mb']:.1f}MB vs {base['model_size_mb']:.1f}MB)")
    print(f"Accuracy delta (AMP+FP16 minus baseline): {acc_delta:+.3f}")

    # Optionally also store in DataFrame
    stats_row = {
        "variant": "amp_fp16_vs_baseline",
        "speedup_x": speedup,
        "compression_x": (1/compression) if compression > 0 else float('nan'),
        "accuracy_delta": acc_delta,
        "base_accuracy": base['accuracy'],
        "amp_fp16_accuracy": amp['accuracy']
    }
    # This row for stats, not normal metric row
    results_df = pd.concat([results_df, pd.DataFrame([stats_row])], ignore_index=True)

results_df

12/17/2025 03:36:39 - INFO - finbert.utils -   *** Example ***
12/17/2025 03:36:39 - INFO - finbert.utils -   guid: test-1
12/17/2025 03:36:39 - INFO - finbert.utils -   tokens: [CLS] the bristol port company has sealed a one million pound contract with cooper specialised handling to supply it with four 45 - ton ##ne , custom ##ised reach stack ##ers from ko ##ne ##cr ##ane ##s [SEP]
12/17/2025 03:36:39 - INFO - finbert.utils -   input_ids: 101 1996 7067 3417 2194 2038 10203 1037 2028 2454 9044 3206 2007 6201 17009 8304 2000 4425 2009 2007 2176 3429 1011 10228 2638 1010 7661 5084 3362 9991 2545 2013 12849 2638 26775 7231 2015 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12/17/2025 03:36:39 - INFO - finbert.utils -   attention_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12/17/2025 03:36:39 - INFO - finbert.utils -   token_type_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 

Baseline - Accuracy: 0.7237, F1: 0.6852
Baseline - Throughput: 154.9 samples/sec


12/17/2025 03:36:46 - INFO - finbert.utils -   *** Example ***
12/17/2025 03:36:46 - INFO - finbert.utils -   guid: test-1
12/17/2025 03:36:46 - INFO - finbert.utils -   tokens: [CLS] the bristol port company has sealed a one million pound contract with cooper specialised handling to supply it with four 45 - ton ##ne , custom ##ised reach stack ##ers from ko ##ne ##cr ##ane ##s [SEP]
12/17/2025 03:36:46 - INFO - finbert.utils -   input_ids: 101 1996 7067 3417 2194 2038 10203 1037 2028 2454 9044 3206 2007 6201 17009 8304 2000 4425 2009 2007 2176 3429 1011 10228 2638 1010 7661 5084 3362 9991 2545 2013 12849 2638 26775 7231 2015 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12/17/2025 03:36:46 - INFO - finbert.utils -   attention_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
12/17/2025 03:36:46 - INFO - finbert.utils -   token_type_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 

AMP+FP16 - Accuracy: 0.7722, F1: 0.7555
AMP+FP16 - Throughput: 872.4 samples/sec

[STATS]
Relative speedup (AMP+FP16 vs. baseline): 5.63x
Model size compression (AMP+FP16 vs. baseline): 3.27x smaller (127.7MB vs 417.7MB)
Accuracy delta (AMP+FP16 minus baseline): +0.048


Unnamed: 0,variant,model_size_mb,eval_wall_s,eval_samples_per_s,accuracy,f1_macro,speedup_x,compression_x,accuracy_delta,base_accuracy,amp_fp16_accuracy
0,baseline,417.658215,6.262848,154.881624,0.723711,0.685212,,,,,
1,amp_fp16_weights,127.71192,1.111841,872.42683,0.772165,0.755461,,,,,
2,amp_fp16_vs_baseline,,,,,,5.632862,3.270315,0.048454,0.723711,0.772165


In [6]:
# Log summary to W&B with both baseline and AMP/FP16 metrics (if available)
summary = {}

# Always add baseline summary stats
summary.update({
    "baseline_accuracy": baseline_metrics["accuracy"],
    "baseline_f1": baseline_metrics["f1_macro"],
    "baseline_throughput": baseline_timing["eval_samples_per_s"],
    "baseline_model_size": get_model_size_mb(model_baseline),
})

# Add AMP/FP16 statistics if on CUDA and results available
if device.type == "cuda":
    summary.update({
        "amp_fp16_trained_accuracy": amp_fp16_metrics["accuracy"],
        "amp_fp16_trained_f1": amp_fp16_metrics["f1_macro"],
        "amp_fp16_trained_throughput": amp_fp16_timing["eval_samples_per_s"],
        "amp_fp16_trained_model_size": get_model_size_mb(fp16_amp_model),
    })

wandb.log(summary)

# We'll upload the results_df (or plot_df) as a W&B Table for interactive dashboards

# Option 1: upload the underlying DataFrame as a W&B Table
# (works best if your DataFrame columns cover all metrics you'd like to compare)
table = wandb.Table(dataframe=results_df)

wandb.log({"variant_summary_table": table})

wandb.finish()

0,1
amp_fp16_trained_accuracy,▁
amp_fp16_trained_f1,▁
amp_fp16_trained_model_size,▁
amp_fp16_trained_throughput,▁
baseline_accuracy,▁
baseline_f1,▁
baseline_model_size,▁
baseline_throughput,▁

0,1
amp_fp16_trained_accuracy,0.77216
amp_fp16_trained_f1,0.75546
amp_fp16_trained_model_size,127.71192
amp_fp16_trained_throughput,872.42683
baseline_accuracy,0.72371
baseline_f1,0.68521
baseline_model_size,417.65821
baseline_throughput,154.88162
