In [7]:
import torch
from transformers import pipeline

# Force PyTorch backend manually
torch.backends.quantized.engine = 'qnnpack'

# Load FinBERT
analyzer = pipeline("text-classification", model="ProsusAI/finbert", framework="pt")

text = "Regulatory concerns led to a decline in investor confidence."
result = analyzer(text)
print(result)


Device set to use mps:0


[{'label': 'negative', 'score': 0.9731943011283875}]


In [1]:
!pip uninstall pandas -y
!pip install --no-cache-dir pandas


Found existing installation: pandas 2.2.3
Uninstalling pandas-2.2.3:
  Successfully uninstalled pandas-2.2.3
Collecting pandas
  Downloading pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl.metadata (89 kB)
Downloading pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl (11.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.3/11.3 MB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0m [36m0:00:01[0m
[?25hInstalling collected packages: pandas
Successfully installed pandas-2.2.3


In [None]:
from transformers import pipeline

# Load stock emotions model
stock_emotion_pipeline = pipeline("text-classification", model="finance-ml/stock-emotions-bert")

# Test with a sample from your dataset
test_text = "Tesla stock is crashing! I'm really worried about my investments. 😟"
result = stock_emotion_pipeline(test_text)

print(result)


# Code to Load & Preprocess:

In [8]:
import pandas as pd

# Load your dataset
df = pd.read_csv("train_stockemo.csv")

# Keep only the needed columns
df = df[['processed', 'emo_label']]  

# Remove NaN values (if any)
df = df.dropna()

# Check dataset
print(df.head())


                                           processed   emo_label
0  Amazon Dow futures up by 100 points already  [...  excitement
1  Tesla Daddy's drinkin' eArly tonight! Here's t...  excitement
2  Apple We’ll been riding since last December fr...   confusion
3  Tesla happy new year, 2020, everyone [wine gla...  excitement
4  Tesla haha just a collection of greats..."Mars...  excitement


# Convert Labels to Numerical Format

In [10]:
from datasets import Dataset
from sklearn.preprocessing import LabelEncoder

# Convert dataframe to dataset format
dataset = Dataset.from_pandas(df)

# Encode emotion labels
label_encoder = LabelEncoder()
dataset = dataset.add_column("labels", label_encoder.fit_transform(dataset["emo_label"]))

# Split into training and validation sets
train_test_split = dataset.train_test_split(test_size=0.1)
train_dataset = train_test_split["train"]
valid_dataset = train_test_split["test"]


# Split Data for Training & Testing

In [11]:
from sklearn.model_selection import train_test_split

# Split data
train_texts, test_texts, train_labels, test_labels = train_test_split(
    df['processed'].tolist(), df['emo_label'].tolist(), test_size=0.2, random_state=42
)

print(f"Training size: {len(train_texts)}, Testing size: {len(test_texts)}")


Training size: 6400, Testing size: 1600


# Load FinBERT Model for Fine-Tuning

In [None]:
import torch.nn as nn
from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments

# Set num_labels dynamically based on your dataset
num_labels = len(label_encoder.classes_)

# Load FinBERT with correct num_labels
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")

hidden_size = model.config.hidden_size  # Usually 768 for BERT models

# Replace the classification head
model.classifier = nn.Linear(hidden_size, num_labels)

# Update model configuration
model.config.num_labels = num_labels

NameError: name 'nn' is not defined

# Convert Data into PyTorch Format

In [None]:
import torch
from torch.utils.data import Dataset
from transformers import AutoTokenizer

# Load FinBERT tokenizer
tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")

class FinancialDataset(Dataset):
    def __init__(self, texts, labels, tokenizer):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        encoding = self.tokenizer(
            self.texts[idx], truncation=True, padding="max_length", 
            max_length=128, return_tensors="pt"
        )
        return {
            "input_ids": encoding["input_ids"].squeeze(0),
            "attention_mask": encoding["attention_mask"].squeeze(0),
            "labels": torch.tensor(self.labels[idx], dtype=torch.long)
        }

# Create datasets
train_dataset = FinancialDataset(train_texts, train_labels, tokenizer)
test_dataset = FinancialDataset(test_texts, test_labels, tokenizer)


# Train FinBERT on the Dataset


In [None]:
from transformers import TrainingArguments, Trainer, DataCollatorWithPadding

# Ensure num_labels is set correctly
num_labels = len(set(train_labels))  # Get unique labels count

# Load FinBERT with correct number of labels
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert", num_labels=num_labels)

# Use Data Collator for automatic padding
from transformers import DataCollatorWithPadding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# Set up training arguments
training_args = TrainingArguments(
    output_dir="./finbert-emotion",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    save_total_limit=2,  # Keeps last 2 checkpoints
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    tokenizer=tokenizer,  # Important for padding
    data_collator=data_collator
)

# Train FinBERT
trainer.train()


# Save and Test the Model

In [None]:
from transformers import pipeline

# Save fine-tuned model and tokenizer
model.save_pretrained("./finbert-emotion-model")
tokenizer.save_pretrained("./finbert-emotion-model")

# Load fine-tuned model for inference
emotion_analyzer = pipeline(
    "text-classification", 
    model="./finbert-emotion-model", 
    tokenizer="./finbert-emotion-model",
    device=0 if torch.cuda.is_available() else "mps"  # Use GPU if available
)

# Test a financial statement
text = "The stock market crash is making investors panic."
result = emotion_analyzer(text)

print(result)
