# Bitcoin Sentiment Analysis with FinBERT

This notebook implements sentiment analysis on Bitcoin-related text using the FinBERT model.
We fine-tune the pre-trained FinBERT model on a Bitcoin sentiment dataset and evaluate
its performance using balanced accuracy and accuracy metrics.

Import libraries for sentiment analysis using FinBERT with PyTorch, HuggingFace transformers, and evaluation metrics

In [21]:
from loguru import logger
import torch
import numpy as np
import pandas as pd
from datasets import load_dataset
from transformers import (
    pipeline,
    AutoTokenizer,
    Trainer,
    TrainingArguments,
    AutoModelForSequenceClassification,
)
from sklearn.metrics import balanced_accuracy_score, accuracy_score

Set the pre-trained FinBERT model for financial sentiment analysis

In [22]:
model_name = "yiyanghkust/finbert-tone"

Check CUDA availability and set device for GPU acceleration or fallback to CPU

In [23]:
if torch.cuda.is_available():
    logger.info("CUDA available. GPU will be used for computation.")
    device = 0
else:
    logger.info("CUDA not available. Using CPU for computation.")
    device = -1

[32m2025-07-16 14:36:03.035[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mCUDA available. GPU will be used for computation.[0m


Initialize sentiment analysis pipeline with FinBERT model and test with sample text

In [24]:
sentiment_pipeline = pipeline(
    task="sentiment-analysis", model=model_name, batch_size=128, device=device
)

result = sentiment_pipeline("I love you")

logger.info(result)

Device set to use cuda:0
[32m2025-07-16 14:36:04.494[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1m[{'label': 'Positive', 'score': 0.9885214567184448}][0m


Load Bitcoin sentiment dataset, split into train/val/test sets, preprocess text data, and prepare for model training

In [25]:
ds = load_dataset("juanka0357/bitcoin-sentiment-analysis")

full_dataset = ds["train"]

total_samples = len(full_dataset)
train_size = int(0.6 * total_samples)
val_size = int(0.2 * total_samples)
test_size = total_samples - train_size - val_size

ds_train = full_dataset.select(range(train_size))
ds_val = full_dataset.select(range(train_size, train_size + val_size))
ds_test = full_dataset.select(
    range(train_size + val_size, train_size + val_size + test_size)
)

tokenizer = AutoTokenizer.from_pretrained(model_name)

ds_train = ds_train.rename_column("output", "labels")
ds_val = ds_val.rename_column("output", "labels")
ds_test = ds_test.rename_column("output", "labels")

ds_train = ds_train.rename_column("input", "text")
ds_val = ds_val.rename_column("input", "text")
ds_test = ds_test.rename_column("input", "text")

all_labels = set()
for split in [ds_train, ds_val, ds_test]:
    for example in split:
        all_labels.add(example["labels"])

label_to_id = {label: idx for idx, label in enumerate(sorted(all_labels))}
id_to_label = {idx: label for label, idx in label_to_id.items()}

def convert_labels_to_ids(examples):
    examples["labels"] = [label_to_id[label] for label in examples["labels"]]
    return examples

ds_train = ds_train.map(convert_labels_to_ids, batched=True)
ds_val = ds_val.map(convert_labels_to_ids, batched=True)
ds_test = ds_test.map(convert_labels_to_ids, batched=True)

def tokenize_function(examples):
    tokenized = tokenizer(
        examples["text"], truncation=True, padding="max_length", max_length=128
    )
    return tokenized

ds_train = ds_train.map(tokenize_function, batched=True)
ds_val = ds_val.map(tokenize_function, batched=True)
ds_test = ds_test.map(tokenize_function, batched=True)

ds_train.set_format(
    type="torch",
    columns=["input_ids", "token_type_ids", "attention_mask", "labels"],
)
ds_val.set_format(
    type="torch",
    columns=["input_ids", "token_type_ids", "attention_mask", "labels"],
)
ds_test.set_format(
    type="torch",
    columns=["input_ids", "token_type_ids", "attention_mask", "labels"],
)

logger.info(f"ds_train: {ds_train[0]}")

ds_train_shuffle = ds_train.shuffle(seed=42)

[32m2025-07-16 14:36:08.349[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m65[0m - [1mds_train: {'labels': tensor(2), 'input_ids': tensor([    3, 17039,  4154,   487,  4121,   641,  6615,   574,  1898,  1419,
           35,   445,  5674,   599,  2464,     9,  3209,    19,  4269,    48,
            4,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
            0,     0,     0,     0,     0,     0,     0,     0,    

Define evaluation metrics function to compute balanced accuracy and accuracy scores for model predictions

In [26]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return {
        "balanced_accuracy": balanced_accuracy_score(predictions, labels),
        "accuracy": accuracy_score(predictions, labels),
    }

Configure training arguments with hyperparameters for fine-tuning FinBERT model on sentiment analysis task

In [27]:
args = TrainingArguments(
    output_dir="temp/",
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="steps",
    logging_steps=50,
    learning_rate=2e-6,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=3,
    weight_decay=0.1,
    load_best_model_at_end=True,
    metric_for_best_model="balanced_accuracy",
)

Load pre-trained FinBERT model and configure it for sequence classification with custom label mappings

In [28]:
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=len(label_to_id),
    id2label=id_to_label,
    label2id=label_to_id,
)

Initialize trainer with model and datasets, then fine-tune FinBERT and generate predictions on test set

In [29]:
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=ds_train_shuffle,
    eval_dataset=ds_val,
    compute_metrics=compute_metrics,
)

trainer.train()

predictions = trainer.predict(ds_test)

Epoch,Training Loss,Validation Loss,Balanced Accuracy,Accuracy
1,No log,1.981895,0.455102,0.546174
2,3.741800,1.242855,0.621209,0.651715
3,1.314500,1.1233,0.641171,0.656992


Log model predictions and ground truth labels for analysis and debugging purposes

In [30]:
logger.info(f"Raw logits/predictions from the model: {predictions[0]}")
logger.info(f"Labels from the dataset: {predictions[1]}")

[32m2025-07-16 14:36:43.520[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mRaw logits/predictions from the model: [[-1.8044204   0.20923893 -1.3121202 ]
 [ 0.04278786  0.7093077  -0.63570625]
 [-1.8451368   3.171665   -0.84985834]
 ...
 [-2.0087445  -0.15749407  3.3543968 ]
 [-2.0087445  -0.15749407  3.3543968 ]
 [-2.0087445  -0.15749407  3.3543968 ]][0m
[32m2025-07-16 14:36:43.523[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mLabels from the dataset: [1 1 0 2 1 0 2 1 2 1 2 0 0 1 1 2 1 2 1 1 1 1 2 1 2 1 1 2 1 2 1 2 2 2 1 2 2
 2 1 2 1 2 2 2 1 2 2 2 0 2 1 2 2 2 1 1 2 0 0 0 0 0 1 2 1 1 1 2 1 2 1 1 2 0
 2 2 2 0 0 2 2 0 2 2 1 0 1 1 2 2 2 1 0 1 2 1 1 2 2 1 2 2 2 2 1 1 2 2 0 2 2
 0 1 2 1 1 1 0 2 2 2 1 2 2 1 1 2 1 2 2 2 2 0 1 2 2 2 1 2 0 2 2 0 2 0 2 2 2
 2 2 2 2 2 2 2 2 1 2 2 2 2 1 2 1 1 1 2 1 1 1 1 1 0 1 1 1 2 1 1 1 1 0 0 0 0
 1 0 1 2 1 2 1 2 2 1 2 1 1 1 2 2 1 1 1 1 1 1 1 2 2 2 2 1 1 2 2 1 1 1 2 2 1
 1 1 1 0 1 1 2 1 2 2 2 2 2 1