# Bitcoin Sentiment Analysis with FinBERT

This notebook implements sentiment analysis on Bitcoin-related text using the FinBERT model.
We fine-tune the pre-trained FinBERT model on a Bitcoin sentiment dataset and evaluate
its performance using balanced accuracy and accuracy metrics.

Import libraries for sentiment analysis using FinBERT with PyTorch, HuggingFace transformers, and evaluation metrics

In [1]:
from loguru import logger
import torch
import numpy as np
import pandas as pd
from datasets import load_dataset
from transformers import (
    pipeline,
    AutoTokenizer,
    Trainer,
    TrainingArguments,
    AutoModelForSequenceClassification,
)
from sklearn.metrics import balanced_accuracy_score, accuracy_score

  from .autonotebook import tqdm as notebook_tqdm


Set the pre-trained FinBERT model for financial sentiment analysis

In [2]:
model_name = "yiyanghkust/finbert-tone"

Check CUDA availability and set device for GPU acceleration or fallback to CPU

In [3]:
if torch.cuda.is_available():
    logger.info("CUDA available. GPU will be used for computation.")
    device = 0
else:
    logger.info("CUDA not available. Using CPU for computation.")
    device = -1

[32m2025-07-17 11:04:54.094[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mCUDA available. GPU will be used for computation.[0m


Initialize sentiment analysis pipeline with FinBERT model and test with sample text

In [4]:
sentiment_pipeline = pipeline(
    task="sentiment-analysis", model=model_name, batch_size=128, device=device
)

result = sentiment_pipeline("I love you")

logger.info(result)

Device set to use cuda:0
[32m2025-07-17 11:04:58.019[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m7[0m - [1m[{'label': 'Positive', 'score': 0.9885214567184448}][0m


Load Bitcoin sentiment dataset, split into train/val/test sets, preprocess text data, and prepare for model training

In [5]:
# Load local dataset from parquet file
import os
from datasets import Dataset

# Load the local parquet file
data_path = os.path.join('..', 'backend', 'src', 'data', 'clean', 'cryptopanic_news_clean_with_labels.parquet')
df = pd.read_parquet(data_path)

# First, let's check what columns we have
logger.info(f"Available columns: {list(df.columns)}")
logger.info(f"Dataset shape: {df.shape}")

# Identify text and label columns
text_column = "description"
label_column = "sentiment" 

logger.info(f"Using text column: '{text_column}' and label column: '{label_column}'")
logger.info(f"Unique labels: {df[label_column].value_counts().to_dict()}")

# Prepare the dataset with required columns
df_prepared = df[[text_column, label_column]].copy()
df_prepared.columns = ['text', 'labels']  # Rename to standard names

# Remove any rows with missing values
df_prepared = df_prepared.dropna()

# Convert to Hugging Face Dataset
full_dataset = Dataset.from_pandas(df_prepared, preserve_index=False)

# Split into train/val/test sets (60%/20%/20%)
total_samples = len(full_dataset)
train_size = int(0.6 * total_samples)
val_size = int(0.2 * total_samples)
test_size = total_samples - train_size - val_size

# Shuffle before splitting for better distribution
full_dataset = full_dataset.shuffle(seed=42)

ds_train = full_dataset.select(range(train_size))
ds_val = full_dataset.select(range(train_size, train_size + val_size))
ds_test = full_dataset.select(
    range(train_size + val_size, train_size + val_size + test_size)
)

logger.info(f"Train size: {len(ds_train)}, Val size: {len(ds_val)}, Test size: {len(ds_test)}")

# Initialize tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Create label mappings
all_labels = set()
for split in [ds_train, ds_val, ds_test]:
    for example in split:
        all_labels.add(example["labels"])

label_to_id = {label: idx for idx, label in enumerate(sorted(all_labels))}
id_to_label = {idx: label for label, idx in label_to_id.items()}

logger.info(f"Label mappings: {label_to_id}")

def convert_labels_to_ids(examples):
    examples["labels"] = [label_to_id[label] for label in examples["labels"]]
    return examples


# Convert labels to IDs
ds_train = ds_train.map(convert_labels_to_ids, batched=True)
ds_val = ds_val.map(convert_labels_to_ids, batched=True)
ds_test = ds_test.map(convert_labels_to_ids, batched=True)


# Tokenize the text
def tokenize_function(examples):
    tokenized = tokenizer(
        examples["text"], truncation=True, padding="max_length", max_length=128
    )
    return tokenized

ds_train = ds_train.map(tokenize_function, batched=True)
ds_val = ds_val.map(tokenize_function, batched=True)
ds_test = ds_test.map(tokenize_function, batched=True)

# Set format for PyTorch
ds_train.set_format(
    type="torch",
    columns=["input_ids", "token_type_ids", "attention_mask", "labels"],
)
ds_val.set_format(
    type="torch",
    columns=["input_ids", "token_type_ids", "attention_mask", "labels"],
)
ds_test.set_format(
    type="torch",
    columns=["input_ids", "token_type_ids", "attention_mask", "labels"],
)

logger.info(f"ds_train example: {ds_train[0]}")

# Shuffle training data
ds_train_shuffle = ds_train.shuffle(seed=42)


[32m2025-07-17 11:04:58.098[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m10[0m - [1mAvailable columns: ['cryptopanic_id', 'title', 'description', 'source_domain', 'published_at', 'cryptopanic_url', 'currencies', 'sentiment'][0m
[32m2025-07-17 11:04:58.099[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m11[0m - [1mDataset shape: (19468, 8)[0m
[32m2025-07-17 11:04:58.100[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m17[0m - [1mUsing text column: 'description' and label column: 'sentiment'[0m
[32m2025-07-17 11:04:58.105[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m18[0m - [1mUnique labels: {'Positive': 12461, 'Negative': 4977, 'Neutral': 2030}[0m
[32m2025-07-17 11:04:58.144[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m45[0m - [1mTrain size: 11680, Val size: 3893, Test size: 3895[0m
[32m2025-07-17 11:05:00.957[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m

Define evaluation metrics function to compute balanced accuracy and accuracy scores for model predictions

In [6]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return {
        "balanced_accuracy": balanced_accuracy_score(predictions, labels),
        "accuracy": accuracy_score(predictions, labels),
    }

Configure training arguments with hyperparameters for fine-tuning FinBERT model on sentiment analysis task

In [7]:
args = TrainingArguments(
    output_dir="temp/",
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="steps",
    logging_steps=50,
    learning_rate=2e-6,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=3,
    weight_decay=0.1,
    load_best_model_at_end=True,
    metric_for_best_model="balanced_accuracy",
)

Load pre-trained FinBERT model and configure it for sequence classification with custom label mappings

In [8]:
model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=len(label_to_id),
    id2label=id_to_label,
    label2id=label_to_id,
)

Initialize trainer with model and datasets, then fine-tune FinBERT and generate predictions on test set

In [9]:
trainer = Trainer(
    model=model,
    args=args,
    train_dataset=ds_train_shuffle,
    eval_dataset=ds_val,
    compute_metrics=compute_metrics,
)

trainer.train()

predictions = trainer.predict(ds_test)

Epoch,Training Loss,Validation Loss,Balanced Accuracy,Accuracy
1,0.9822,0.940989,0.317979,0.625482
2,0.9362,0.91113,0.2929,0.627023
3,0.9125,0.909911,0.281448,0.629848


Log model predictions and ground truth labels for analysis and debugging purposes

In [10]:
logger.info(f"Raw logits/predictions from the model: {predictions[0]}")
logger.info(f"Labels from the dataset: {predictions[1]}")

[32m2025-07-17 11:12:41.704[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m1[0m - [1mRaw logits/predictions from the model: [[-0.55833644 -0.18570296  1.1872989 ]
 [-0.56787324 -0.7137708   2.1606197 ]
 [-0.13270241 -0.444199    1.428188  ]
 ...
 [-0.0583357  -1.1820269   0.31701052]
 [-0.01474534 -0.6527146   0.16227673]
 [-0.31564218 -1.3497086   0.7305849 ]][0m
[32m2025-07-17 11:12:41.705[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m2[0m - [1mLabels from the dataset: [1 2 2 ... 2 0 2][0m


In [12]:
# Create directory for saving the model components
import os
import json

output_dir = "../models/finbert_bitcoin_sentiment"
os.makedirs(output_dir, exist_ok=True)

# Save the fine-tuned model
trainer.model.save_pretrained(output_dir)

# Save the tokenizer
tokenizer.save_pretrained(output_dir)

# Save label mappings for later reference
with open(f"{output_dir}/label_mappings.json", "w") as f:
    json.dump({"id_to_label": id_to_label, "label_to_id": label_to_id}, f)

logger.info(f"Model and tokenizer saved to {output_dir}")

[32m2025-07-17 11:21:18.599[0m | [1mINFO    [0m | [36m__main__[0m:[36m<module>[0m:[36m18[0m - [1mModel and tokenizer saved to ../models/finbert_bitcoin_sentiment[0m
