## Importing Required Dependencies

In [1]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch
import pandas as pd
from collections import Counter

### FinBERT model and tokenizer

In [3]:
model_name = "ProsusAI/finbert"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name)

## X posts with their topics related to finance

In [4]:
posts_data = [
    # Stock Market & Investments
    ("S&P 500 hits 6,000! Is this the peak or just the start of 2025’s bull run? #investing", "Stock Market & Investments"),
    ("Tech stocks are bleeding today—overvalued or buying opportunity? #markets", "Stock Market & Investments"),
    ("Small caps finally outperforming—told you they’d shine in 2025! #finance", "Stock Market & Investments"),
    ("Dividends > growth stocks in this rate environment. Thoughts? #invest", "Stock Market & Investments"),
    ("Market volatility spiking again. Fed needs to chill with these rate hints. #stocks", "Stock Market & Investments"),
    ("Bought more $TECH at $150. Earnings next week—let’s see! #stockmarket", "Stock Market & Investments"),
    ("REITs are back, baby! 6% yields and climbing. #realestateinvesting", "Stock Market & Investments"),
    ("Anyone else shorting $MEGA after that weak guidance? #trading", "Stock Market & Investments"),
    ("Value stocks making a comeback this quarter—growth is so 2024. #wallstreet", "Stock Market & Investments"),
    ("IPO market heating up again—3 new listings this week. #investing2025", "Stock Market & Investments"),
    # Cryptocurrency
    ("Bitcoin at $85k—new ATH or another fakeout? #crypto", "Cryptocurrency"),
    ("ETH staking yields dropping below 3%. Time to diversify? #blockchain", "Cryptocurrency"),
    ("XRP pumping after SEC rumors—anyone buying the dip? #cryptotrading", "Cryptocurrency"),
    ("Crypto regulations coming in Q2 2025—bullish or bearish? #bitcoin", "Cryptocurrency"),
    ("Solana outperforming BTC YTD. Still early innings. #altcoins", "Cryptocurrency"),
    ("DeFi is dead, long live CeFi—agree? #financefuture", "Cryptocurrency"),
    ("Just cashed out 10% of my BTC holdings at $84k. Feels good. #cryptowins", "Cryptocurrency"),
    ("Stablecoins under scrutiny again—USDT safe? #crypto2025", "Cryptocurrency"),
    ("NFTs making a quiet comeback—new projects dropping this month. #web3", "Cryptocurrency"),
    ("Mining stocks > pure crypto plays right now. Change my mind. #investing", "Cryptocurrency"),
    # Interest Rates & Economy
    ("Fed hints at pausing rate cuts—markets not happy. #economy", "Interest Rates & Economy"),
    ("10-year Treasury at 3.8%—where’s it heading by June? #bonds", "Interest Rates & Economy"),
    ("Inflation ticked up to 2.7% in Feb. Rate cuts off the table? #finance", "Interest Rates & Economy"),
    ("Mortgage rates steady at 6.3%—housing market still frozen. #realestate", "Interest Rates & Economy"),
    ("Recession fears fading—soft landing confirmed? #macroeconomics", "Interest Rates & Economy"),
    ("Central banks are out of sync globally—chaos incoming? #markets2025", "Interest Rates & Economy"),
    ("Rate cuts saved 2024, but 2025 feels shaky. #federalreserve", "Interest Rates & Economy"),
    ("Yield curve flattening again—bond traders, what’s your take? #fixedincome", "Interest Rates & Economy"),
    ("Dollar strength killing emerging markets. #currencywars", "Interest Rates & Economy"),
    ("ECB cuts rates, Fed holds—currency traders rejoice! #forex", "Interest Rates & Economy"),
    # Personal Finance & Budgeting
    ("Tax season reminder: File by April 15 or extend! #personalfinance", "Personal Finance & Budgeting"),
    ("Just maxed out my Roth IRA for 2025—feels amazing. #retirement", "Personal Finance & Budgeting"),
    ("Credit card debt hit a new high—y’all okay out there? #money", "Personal Finance & Budgeting"),
    ("Side hustle income up 20% this year—hustle harder! #financialfreedom", "Personal Finance & Budgeting"),
    ("Budget tip: Cut subscriptions you don’t use. Saved $50/month. #frugal", "Personal Finance & Budgeting"),
    ("Emergency fund at 6 months—finally secure! #moneytips", "Personal Finance & Budgeting"),
    ("Student loan payments kicking my ass—forgiveness when? #debt", "Personal Finance & Budgeting"),
    ("Invested my tax refund in $SPY—better than spending it. #wealth", "Personal Finance & Budgeting"),
    ("Savings rate at 3% APY—best I’ve seen in years. #banking", "Personal Finance & Budgeting"),
    ("Financial goal for 2025: Pay off $10k in debt. Who’s with me? #goals", "Personal Finance & Budgeting"),
    # Trading & Speculation
    ("Options volume on $TECH is insane today—earnings play? #trading", "Trading & Speculation"),
    ("Short squeeze on $GME again—meme stocks never die! #wallstreetbets", "Trading & Speculation"),
    ("Day trading $NVDA calls—up 300% in 2 hours. #yolo", "Trading & Speculation"),
    ("Futures pointing down—gonna be a red Monday. #markets", "Trading & Speculation"),
    ("Algo traders ruining the game—can’t compete anymore. #finance", "Trading & Speculation"),
    ("Leveraged 3x on $TSLA—praying for a bounce. #risk", "Trading & Speculation"),
    ("Volatility index at 22—time to buy puts? #options", "Trading & Speculation"),
    ("Swing trading $AAPL—perfect setup this week. #stocks", "Trading & Speculation"),
    ("Caught the $BTC dip at $80k—scalping profits now. #crypto", "Trading & Speculation"),
    ("Hedge funds dumping bonds—big move coming? #trading2025", "Trading & Speculation"),
    # Financial News & Commentary
    ("TechCorp beats earnings—stock up 8% AH. #earnings", "Financial News & Commentary"),
    ("Oil prices spike after Middle East tensions—$90/barrel? #commodities", "Financial News & Commentary"),
    ("Gold hitting $2,700—safe haven vibes. #preciousmetals", "Financial News & Commentary"),
    ("Bank of America upgrades $JPM—bullish on banks! #finance", "Financial News & Commentary"),
    ("China’s stimulus package—too little, too late? #globaleconomy", "Financial News & Commentary"),
    ("AI stocks overvalued—bubble popping soon? #tech", "Financial News & Commentary"),
    ("Unemployment steady at 4.1%—labor market holding. #jobs", "Financial News & Commentary"),
    ("Retail sales up 1.2%—consumers still spending! #economy", "Financial News & Commentary"),
    ("Fed’s Powell speaking tomorrow—markets on edge. #fomc", "Financial News & Commentary"),
    ("EU green bonds issuance hits €50B—sustainability push. #esg", "Financial News & Commentary"),
    # Crypto & Tech Hype
    ("Web3 adoption accelerating—2025 is the year! #blockchain", "Crypto & Tech Hype"),
    ("AI tokens pumping—$FET up 15% today. #cryptotrends", "Crypto & Tech Hype"),
    ("Metaverse stocks quietly rallying—watch $META. #techinvesting", "Crypto & Tech Hype"),
    ("Quantum computing ETFs—next big thing? #futurefinance", "Crypto & Tech Hype"),
    ("Tesla’s robotaxi news tomorrow—$TSLA to $400? #elonmusk", "Crypto & Tech Hype"),
    # Wealth & Inequality
    ("Top 1% owns 50% of stocks—system’s rigged. #wealthgap", "Wealth & Inequality"),
    ("Millionaires fleeing high-tax states—where to next? #finance", "Wealth & Inequality"),
    ("Inheritance tax debate heating up—fair or theft? #policy", "Wealth & Inequality"),
    ("Middle class squeezed again—rents up 5% YOY. #economy", "Wealth & Inequality"),
    ("Billionaires betting on private equity—should we? #investing", "Wealth & Inequality"),
    # Random Finance Takes
    ("Cash is trash—inflation’s eating it alive. #money", "Random Finance Takes"),
    ("Divorce just cost me half my portfolio—FML. #finance", "Random Finance Takes"),
    ("Financial advisors are overrated—DIY investing FTW! #wealth", "Random Finance Takes"),
    ("Gambling on stocks > casinos—higher ROI. #yolo", "Random Finance Takes"),
    ("Retirement at 40—my plan’s on track! #firemovement", "Random Finance Takes"),
    # Sector-Specific
    ("Energy stocks surging—$XOM to $150? #oil", "Sector-Specific"),
    ("Healthcare ETFs outperforming—$XLV my pick. #invest", "Sector-Specific"),
    ("Bank stocks cheap—$WFC at 10x P/E. #finance", "Sector-Specific"),
    ("Retail sector tanking—$WMT an exception? #stocks", "Sector-Specific"),
    ("Semiconductors cooling off—$AMD still a buy? #tech", "Sector-Specific"),
    # Predictions & Hot Takes
    ("2025 crash coming—too much euphoria now. #markets", "Predictions & Hot Takes"),
    ("Bitcoin to $100k by June—mark my words! #crypto", "Predictions & Hot Takes"),
    ("Fed cuts rates 2 more times this year—bullish! #economy", "Predictions & Hot Takes"),
    ("Small caps dominate 2025—large caps over. #investing", "Predictions & Hot Takes"),
    ("Housing bubble pops by fall—rates won’t save it. #realestate", "Predictions & Hot Takes"),
    # Humor & Memes
    ("My portfolio’s redder than my ex’s lipstick. #finance", "Humor & Memes"),
    ("Bought the dip—now I’m the dip. #trading", "Humor & Memes"),
    ("Crypto bros vs. stock bros—same delusion, different assets. #meme", "Humor & Memes"),
    ("When your 401(k) becomes a 201(k)—thanks, Fed! #retirement", "Humor & Memes"),
    ("Invested in $DOGE—woof woof, I’m broke! #yolo", "Humor & Memes"),
    # Questions & Polls
    ("Best ETF for 2025: $VOO or $VTI? #investing", "Questions & Polls"),
    ("Crypto or stocks—where’s your money going? #financepoll", "Questions & Polls"),
    ("Rate cuts: Good or bad for savers? #economy", "Questions & Polls"),
    ("Bullish or bearish on $TSLA this quarter? #poll", "Questions & Polls"),
    ("Should I sell $BTC at $85k or HODL? #cryptohelp", "Questions & Polls"),
    # Miscellaneous
    ("FinBERT says $TECH is a buy—AI knows best? #nlp", "Miscellaneous"),
    ("Tax loopholes for 2025—anyone got tips? #moneytips", "Miscellaneous"),
    ("401(k) match increased—thank you, employer! #benefits", "Miscellaneous"),
    ("Crowdfunding a startup—worth the risk? #finance", "Miscellaneous"),
    ("Financial literacy should be mandatory—agree? #education", "Miscellaneous"),
]

## Function to analyze sentiment in batches

In [5]:
def analyze_sentiment_batch(texts, batch_size=16):
    model.eval()
    sentiments = []
    confidences = []
    
    for i in range(0, len(texts), batch_size):
        batch = texts[i:i + batch_size]
        inputs = tokenizer(batch, return_tensors="pt", padding=True, truncation=True, max_length=128)
        
        with torch.no_grad():
            outputs = model(**inputs)
            logits = outputs.logits
            probs = torch.softmax(logits, dim=1)
            
            labels = ["positive", "negative", "neutral"]
            batch_preds = torch.argmax(probs, dim=1).tolist()
            batch_conf = probs.max(dim=1).values.tolist()
            
            sentiments.extend([labels[p] for p in batch_preds])
            confidences.extend(batch_conf)
    
    return sentiments, confidences

In [6]:
# Extracting posts and topics
posts, topics = zip(*posts_data)
sentiments, confidences = analyze_sentiment_batch(list(posts))

# Creating DataFrame
df = pd.DataFrame({
    "Post": posts,
    "Topic": topics,
    "Sentiment": sentiments,
    "Confidence": confidences
})

##  Grouping by topic and final sentiment

In [7]:
topic_sentiments = {}
for topic in df["Topic"].unique():
    topic_df = df[df["Topic"] == topic]
    sentiment_counts = Counter(topic_df["Sentiment"])
    total = sum(sentiment_counts.values())
    
    majority_sentiment = max(sentiment_counts, key=sentiment_counts.get) # Determiningmajority sentiment
    sentiment_dist = {s: f"{(count/total)*100:.1f}%" for s, count in sentiment_counts.items()}
    
    topic_sentiments[topic] = {
        "Sentiment Distribution": sentiment_dist,
        "Final Sentiment": majority_sentiment,
        "Posts Analyzed": total
    }

## Output

In [8]:
print("Sentiment Analysis Results by Topic:")
for topic, result in topic_sentiments.items():
    print(f"\nTopic: {topic}")
    print(f"Posts Analyzed: {result['Posts Analyzed']}")
    print(f"Sentiment Distribution: {result['Sentiment Distribution']}")
    print(f"Final Sentiment: {result['Final Sentiment']}")

Sentiment Analysis Results by Topic:

Topic: Stock Market & Investments
Posts Analyzed: 10
Sentiment Distribution: {'neutral': '70.0%', 'positive': '20.0%', 'negative': '10.0%'}
Final Sentiment: neutral

Topic: Cryptocurrency
Posts Analyzed: 10
Sentiment Distribution: {'neutral': '80.0%', 'negative': '20.0%'}
Final Sentiment: neutral

Topic: Interest Rates & Economy
Posts Analyzed: 10
Sentiment Distribution: {'negative': '40.0%', 'neutral': '60.0%'}
Final Sentiment: neutral

Topic: Personal Finance & Budgeting
Posts Analyzed: 10
Sentiment Distribution: {'neutral': '60.0%', 'positive': '30.0%', 'negative': '10.0%'}
Final Sentiment: neutral

Topic: Trading & Speculation
Posts Analyzed: 10
Sentiment Distribution: {'neutral': '90.0%', 'positive': '10.0%'}
Final Sentiment: neutral

Topic: Financial News & Commentary
Posts Analyzed: 10
Sentiment Distribution: {'positive': '50.0%', 'negative': '20.0%', 'neutral': '30.0%'}
Final Sentiment: positive

Topic: Crypto & Tech Hype
Posts Analyzed: 5
