In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Load FinBERT (Hugging Face model)
model_name = "yiyanghkust/finbert-tone"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

labels = ["negative", "neutral", "positive"]

def predict_finbert(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=128)
    with torch.no_grad():
        outputs = model(**inputs)
        probs = torch.softmax(outputs.logits, dim=1).squeeze()
    pred_class = torch.argmax(probs).item()
    return {
        "text": text,
        "prediction": labels[pred_class],
        "confidence": float(probs[pred_class])
    }

# Example
print(predict_finbert("The bank resolved my issue quickly and politely."))
print(predict_finbert("Terrible experience, my loan was delayed for weeks."))


{'text': 'The bank resolved my issue quickly and politely.', 'prediction': 'negative', 'confidence': 0.8156906962394714}
{'text': 'Terrible experience, my loan was delayed for weeks.', 'prediction': 'positive', 'confidence': 0.9621630311012268}


In [None]:
# ============================================================
# Sentiment Analysis for Banking Texts using FinBERT
# Dataset: Financial PhraseBank (100% Agreement version)
# Optimized for CPU with metrics & plots (incl. ROC/AUC)
# ============================================================

import warnings
warnings.filterwarnings('ignore')

# Import libraries
import torch
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from transformers import (
    AutoTokenizer, 
    AutoModelForSequenceClassification, 
    Trainer, 
    TrainingArguments
)
import evaluate
from sklearn.metrics import (
    classification_report, 
    confusion_matrix, 
    roc_curve, 
    auc
)
from sklearn.preprocessing import label_binarize
import seaborn as sns
import matplotlib.pyplot as plt

# Use all available CPU threads
torch.set_num_threads(torch.get_num_threads())

# ============================================================
# Load dataset
# ============================================================
df = pd.read_csv(
    "../datasets/FinancialPhraseBank-v1.0/Sentences_AllAgree.txt",
    sep="@", header=None, encoding="latin-1",
    names=["sentence", "sentiment"]
)

# Encode labels
label_map = {"positive": 0, "negative": 1, "neutral": 2}
df["label"] = df["sentiment"].map(label_map)

# Train/test split
train_texts, test_texts, train_labels, test_labels = train_test_split(
    df["sentence"].tolist(),
    df["label"].tolist(),
    test_size=0.2,
    random_state=42
)

# ============================================================
# Tokenization (limit sequence length to 128 for speed)
# ============================================================
model_name = "yiyanghkust/finbert-tone"
tokenizer = AutoTokenizer.from_pretrained(model_name)

train_encodings = tokenizer(train_texts, padding=True, truncation=True, max_length=128, return_tensors="pt")
test_encodings = tokenizer(test_texts, padding=True, truncation=True, max_length=128, return_tensors="pt")

# ============================================================
# Dataset class for Trainer
# ============================================================
class SentimentDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item["labels"] = torch.tensor(self.labels[idx])
        return item

train_ds = SentimentDataset(train_encodings, train_labels)
test_ds = SentimentDataset(test_encodings, test_labels)

# ============================================================
# Load pre-trained FinBERT model
# ============================================================
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)

# ============================================================
# Define metrics
# ============================================================
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    return metric.compute(predictions=preds, references=labels)

# ============================================================
# Training arguments (CPU-optimized)
# ============================================================
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    learning_rate=2e-5,
    per_device_train_batch_size=4,
    num_train_epochs=1,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=20
)

# ============================================================
# Trainer
# ============================================================
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=test_ds,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

# ============================================================
# Train model
# ============================================================
trainer.train()

# ============================================================
# Save final model with clear name
# ============================================================
save_path = "./models/finbert-financial-sentiment"
trainer.save_model(save_path)
tokenizer.save_pretrained(save_path)

print(f"\nModel and tokenizer saved to: {save_path}")

# ============================================================
# Evaluate with detailed metrics
# ============================================================
predictions = trainer.predict(test_ds)
y_pred = np.argmax(predictions.predictions, axis=-1)
y_true = predictions.label_ids
y_probs = torch.softmax(torch.tensor(predictions.predictions), dim=1).numpy()

# Classification report
print("\nClassification Report:\n")
print(classification_report(y_true, y_pred, target_names=["positive", "negative", "neutral"]))

# Confusion Matrix
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(6,5))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues",
            xticklabels=["positive", "negative", "neutral"],
            yticklabels=["positive", "negative", "neutral"])
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("True")
plt.show()

# Bar chart for F1-scores
report = classification_report(y_true, y_pred, target_names=["positive", "negative", "neutral"], output_dict=True)
f1_scores = [report[label]["f1-score"] for label in ["positive", "negative", "neutral"]]

plt.figure(figsize=(6,4))
sns.barplot(x=["positive", "negative", "neutral"], y=f1_scores, palette="viridis")
plt.title("F1-scores per class")
plt.ylabel("F1-score")
plt.ylim(0,1)
plt.show()

# ============================================================
# ROC / AUC curves for each class
# ============================================================
n_classes = 3
y_true_bin = label_binarize(y_true, classes=[0,1,2])

plt.figure(figsize=(7,6))
for i, label in enumerate(["positive", "negative", "neutral"]):
    fpr, tpr, _ = roc_curve(y_true_bin[:, i], y_probs[:, i])
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label=f"{label} (AUC = {roc_auc:.2f})")

plt.plot([0,1], [0,1], "k--")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curves per Class")
plt.legend()
plt.show()

# ============================================================
# Inference on new text
# ============================================================
text = "profit up"
inputs = tokenizer(text, return_tensors="pt")
outputs = model(**inputs)
pred = torch.argmax(outputs.logits).item()

labels = ["positive", "negative", "neutral"]
print("\nExample Inference →", text)
print("Predicted Sentiment:", labels[pred])
