# FinBERT Profiling: Mini FinBert + Knowledge Distillation

This notebook is intentionally **thin**: it reuses the profiling utilities in `pipelines/finBERT/finbert/` (especially `finbert/finbert_profile.py` and `finbert/profile_utils.py`) instead of copying large code blocks.

The purpose of this notebook is to profile finetuning and distilling a 'mini-bert' model on financial sentiment analysis.


In [None]:
from __future__ import annotations

from pathlib import Path
import shutil
import os
import logging
import sys
sys.path.append('..')

from textblob import TextBlob
from pprint import pprint
from sklearn.metrics import classification_report

from transformers import AutoModelForSequenceClassification

from finbert.finbert import *
from finbert.finbert_profile import *
from finbert.profile_utils import get_model_size_mb, print_device_info, setup_nltk_data, timed_eval
import finbert.utils as tools

from finbert.finbert_kd_trainer import KDFinBert



%load_ext autoreload
%autoreload 2

project_dir = Path.cwd().parent
pd.set_option('max_colwidth', None)

import wandb

In [None]:
wandb.init(
    entity="si2449-columbia-university",
    project="finbert-experiments",
    name="finetuning/distilling-mini_bert",
    group="knowledge-distillation",
)

In [None]:
cl_path = project_dir/'models'/'student'
cl_data_path = project_dir/'data'/'sentiment_data'

In [None]:
try:
    shutil.rmtree(cl_path) 
except:
    pass

bertmodel = AutoModelForSequenceClassification.from_pretrained("prajjwal1/bert-mini", cache_dir=None, num_labels=3)


config = Config(   data_dir=cl_data_path,
                   bert_model=bertmodel,
                   num_train_epochs=4,
                   model_dir=cl_path,
                   max_seq_length = 48,
                   train_batch_size = 32,
                   learning_rate = 2e-5,
                   output_mode='classification',
                   warm_up_proportion=0.2,
                   local_rank=-1,
                   discriminate=True,
                   gradual_unfreeze=True,
                   encoder_no = 4)

config.profile_train_steps = 20
config.num_hidden_layers = 4

In [None]:
finbert = ProfiledFinBert(config)
finbert.config.base_model = 'prajjwal1/bert-mini'
finbert.config.discriminate=True
finbert.config.gradual_unfreeze=True

In [None]:
finbert.prepare_model(label_list=['positive','negative','neutral'])

In [None]:
train_data = finbert.get_data('train')

In [None]:
model = finbert.create_the_model()

In [None]:
start = time.perf_counter()
trained_model = finbert.train(train_examples = train_data, model = model)
train_wall_s = time.perf_counter() - start

In [None]:
test_data = finbert.get_data("test")

results = finbert.evaluate(examples=test_data, model=trained_model)

eval_df, eval_timing = timed_eval(
    finbert=finbert, model=trained_model, examples=test_data, use_amp=False
)

In [None]:
def report(df, cols=['label','prediction','logits']):
    #print('Validation loss:{0:.2f}'.format(metrics['best_validation_loss']))
    cs = CrossEntropyLoss(weight=finbert.class_weights)
    loss = cs(torch.tensor(list(df[cols[2]])),torch.tensor(list(df[cols[0]])))
    print("Evaluation Loss:{0:.2f}".format(loss))
    print("Evaluation Accuracy:{0:.2f}".format((df[cols[0]] == df[cols[1]]).sum() / df.shape[0]) )
    print("\nClassification Report:")
    return_val = classification_report(df[cols[0]], df[cols[1]], output_dict=True)
    
    
    new_report = {}
    
    for key in return_val.keys():
        new_report["Finetuning " + key] = return_val[key]
    
    new_report["Finetuning Evalaution Loss"] = loss
    new_report["Fintuning Evaluation Accuracy"] = (df[cols[0]] == df[cols[1]]).sum() / df.shape[0]
    
    
    print(new_report)
        
    return new_report

In [None]:
results['prediction'] = results.predictions.apply(lambda x: np.argmax(x,axis=0))

In [None]:
wandb_report = report(results,cols=['labels','prediction','predictions'])


In [None]:
teacher_path = project_dir/'models'/'teacher'
teacher = AutoModelForSequenceClassification.from_pretrained(
    teacher_path, num_labels=3, cache_dir=None
)
student = trained_model

new_path = project_dir/'models'/'distilled_student'

In [None]:
config = Config(   data_dir=cl_data_path,
                   bert_model=None,
                   num_train_epochs=2,
                   model_dir=new_path,
                   max_seq_length = 48,
                   train_batch_size = 32,
                   learning_rate = 5e-6,
                   output_mode='classification',
                   warm_up_proportion=0.1,
                   local_rank=-1,
                   discriminate=True,
                   gradual_unfreeze=True,
                   encoder_no = 4,
                   base_model='prajjwal1/bert-mini')


In [None]:
kd = KDFinBert(teacher=teacher, student=student, config=config)

In [None]:
kd.prepare_model(label_list=["positive", "negative", "neutral"])

In [None]:
train_data = kd.get_data('train')
model = kd.create_the_model()

In [None]:
start = time.perf_counter()
trained_model = kd.train(train_examples = train_data, teacher=teacher, student=student)
train_wall_s = time.perf_counter() - start

In [None]:
test_data = kd.get_data("test")

results = kd.evaluate(examples=test_data, model=trained_model)



In [None]:
results['prediction'] = results.predictions.apply(lambda x: np.argmax(x,axis=0))

In [None]:
def kd_report(df, cols=['label','prediction','logits']):
    #print('Validation loss:{0:.2f}'.format(metrics['best_validation_loss']))
    cs = CrossEntropyLoss(weight=finbert.class_weights)
    loss = cs(torch.tensor(list(df[cols[2]])),torch.tensor(list(df[cols[0]])))
    print("Loss:{0:.2f}".format(loss))
    print("Accuracy:{0:.2f}".format((df[cols[0]] == df[cols[1]]).sum() / df.shape[0]) )
    print("\nClassification Report:")
    return_val = classification_report(df[cols[0]], df[cols[1]], output_dict=True)
    
    return_val["Evaluation Loss"] = loss
    return_val["Evaluation Accuracy"] = (df[cols[0]] == df[cols[1]]).sum() / df.shape[0]
    return return_val

In [None]:
wandb_kd_report = kd_report(results,cols=['labels','prediction','predictions'])

summary = {
        "device": str(finbert.device),
        "model_dir": str(cl_path),
        "train_wall_s": float(train_wall_s),
        "train_examples": int(len(train_data)),
        "train_examples_per_s": float((len(train_data) * finbert.config.num_train_epochs) / train_wall_s)
        if train_wall_s > 0
        else float("inf"),
        "model_size_mb": float(get_model_size_mb(trained_model)),
        "profile_train_steps": finbert.config.profile_train_steps,
        **(finbert.profile_results.get("training_summary", {}) or {}),
        **eval_timing,
        **wandb_report,
        **wandb_kd_report,
    }

wandb.log(summary)
wandb.finish()