In [2]:
!pip install torch transformers deep-translator numpy

Collecting transformers
  Downloading transformers-4.57.3-py3-none-any.whl (12.0 MB)
     ---------------------------------------- 12.0/12.0 MB 5.7 MB/s eta 0:00:00
Collecting deep-translator
  Downloading deep_translator-1.11.4-py3-none-any.whl (42 kB)
     ---------------------------------------- 42.3/42.3 kB 2.1 MB/s eta 0:00:00
Collecting huggingface-hub<1.0,>=0.34.0
  Downloading huggingface_hub-0.36.0-py3-none-any.whl (566 kB)
     -------------------------------------- 566.1/566.1 kB 4.4 MB/s eta 0:00:00
Collecting regex!=2019.12.17
  Downloading regex-2025.11.3-cp311-cp311-win_amd64.whl (277 kB)
     -------------------------------------- 277.7/277.7 kB 5.7 MB/s eta 0:00:00
Collecting tokenizers<=0.23.0,>=0.22.0
  Downloading tokenizers-0.22.1-cp39-abi3-win_amd64.whl (2.7 MB)
     ---------------------------------------- 2.7/2.7 MB 4.9 MB/s eta 0:00:00
Collecting safetensors>=0.4.3
  Downloading safetensors-0.7.0-cp38-abi3-win_amd64.whl (341 kB)
     ---------------------------


[notice] A new release of pip available: 22.3 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [15]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import torch.nn.functional as F
import numpy as np

MODEL_NAME = "yiyanghkust/finbert-tone"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
model.eval()
LABELS = ["positive", "negative", "neutral"]


def analyze_financial_sentiment(news_texts):
    results = []
    scores = []
    
    for text in news_texts:
        
        context_text = "Consider this news from a depositor's perspective: " + text
        
        inputs = tokenizer(context_text, return_tensors="pt", truncation=True, padding=True, max_length=512)
        with torch.no_grad():
            outputs = model(**inputs)
            probs = F.softmax(outputs.logits, dim=-1)[0].numpy()
        
        
        finbert_score = float(probs[0] - probs[1]) 
        
        
        negative_keywords = [
    "reduced", "decline", "cut", "downgraded", "loss",
    "profit decline", "net loss", "earnings miss", "revenue drop",
    "lower EPS", "asset write-down", "loan default",
    "rights issue", "share dilution", "regulatory penalty",
    "management resignation", "fraud", "scandal",
    "interest rate hike", "tightened liquidity", "inflation rises",
    "currency depreciation", "budget cuts", "debt concerns",
    "capital flight", "bearish", "sell-off", "slump", "correction",
    "downtrend", "negative circuit", "record low", "weak turnover",
    "political instability", "economic slowdown", "crisis"
]
        positive_keywords = [
    "increase", "upgraded", "rise", "boost", "profit", "gain",
    "profit growth", "net income jump", "earnings beat forecasts", "revenue surge",
    "record EPS", "dividend declared", "bonus share issue", "stock split",
    "share buyback", "AGM announced", "rights offering oversubscribed",
    "major contract win", "new project launch", "merger", "acquisition",
    "strategic partnership", "interest rate cut", "liquidity injection",
    "fiscal stimulus", "tax relief", "credit rating upgrade",
    "increased foreign investment", "bullish", "rally", "surge",
    "uptrend", "record high", "positive circuit", "heavy buying",
    "high turnover", "net foreign inflow"
]
        
        adjusted_score = finbert_score
        if any(word in text.lower() for word in negative_keywords):
            adjusted_score = min(finbert_score, 0)
        elif any(word in text.lower() for word in positive_keywords):
            adjusted_score = max(finbert_score, 0)
        
        
        results.append({
            "text": text,
            "positive_prob": float(probs[0]),
            "negative_prob": float(probs[1]),
            "neutral_prob": float(probs[2]),
            "finbert_score": finbert_score,
        })
        scores.append(adjusted_score)

        print(f"News: {text[:50]}..., Positive: {probs[0]:.3f}, Negative: {probs[1]:.3f}, Neutral: {probs[2]:.3f}")
        print(f"FinBERT score: {finbert_score:.3f}")
    
    return results, scores


df = pd.read_csv("news_data.csv")

news_texts = df['content'].tolist()

results, final_scores = analyze_financial_sentiment(news_texts)

df['positive_prob'] = [r['positive_prob'] for r in results]
df['negative_prob'] = [r['negative_prob'] for r in results]
df['neutral_prob'] = [r['neutral_prob'] for r in results]
df['finbert_score'] = [r['finbert_score'] for r in results]

# df.to_csv("news_with_sentiment.csv", index=False)

News: Commercial banks have published new interest rates..., Positive: 0.689, Negative: 0.122, Neutral: 0.189
FinBERT score: 0.567
