# Sentiment Analysis Demo Notebook

This notebook demonstrates how to:
1. Scrape or load stock news data.
2. Apply VADER sentiment analysis on headlines.
3. Generate a treemap visualization for positive or negative sentiment.

---

In [None]:
# In[1]: Imports and Setup

import pandas as pd
import numpy as np
import plotly.express as px
import requests
import nltk

from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.downloader.download('vader_lexicon')

# If you need local imports from your codebase, e.g.:
# from backend.marketviews_sentiment_panel_finalized import (
#     get_data_to_draw, draw_sentiment_panel, ...
# )

print("Imports done. Environment ready.")


In [None]:
# In[2]: (Optional) Data Scraping or Mock Data

# Example: using mock data or a smaller subset to demonstrate
# Typically, you'd have a function to scrape Finviz or Wikipedia.
# For demonstration, let's create a small DataFrame of "headlines."

headlines_data = {
    'ticker': ['AAPL', 'AAPL', 'GOOG', 'AMZN'],
    'date': ['2023-12-01', '2023-12-01', '2023-12-02', '2023-12-02'],
    'time': ['09:00', '10:00', '11:00', '12:00'],
    'headline': [
        "Apple announces new product line with positive reception",
        "Apple's earnings exceed expectations",
        "Google faces minor lawsuit, negative sentiment around search monopoly",
        "Amazon to open new distribution centers, positive expansion news"
    ]
}

df_headlines = pd.DataFrame(headlines_data)
df_headlines['date'] = pd.to_datetime(df_headlines['date'])
df_headlines


In [None]:
# In[3]: Sentiment Analysis with VADER

vader = SentimentIntensityAnalyzer()

# Apply VADER to each headline
scores = df_headlines['headline'].apply(vader.polarity_scores).tolist()
scores_df = pd.DataFrame(scores)

# Merge the scores back to the original DataFrame
df_headlines = df_headlines.join(scores_df)

print("DataFrame with VADER scores:")
df_headlines


In [None]:
# In[4]: Aggregate Sentiment Scores (Compound)

# We'll group by 'ticker' and compute the average of each sentiment score
mean_scores = df_headlines.groupby('ticker')[['neg', 'neu', 'pos', 'compound']].mean().reset_index()
mean_scores.rename(columns={'compound': 'Sentiment Score'}, inplace=True)

print("Mean scores per ticker:")
mean_scores


In [None]:
# In[5]: Treemap Visualization (Plotly)

# Let's pretend these are our "top 5" or "low 5" for demonstration
# In a real scenario, you might filter or combine with Wikipedia sector data.
# For a quick demo, we'll just add a "Sector" column manually:
sector_map = {
    'AAPL': 'Technology',
    'GOOG': 'Technology',
    'AMZN': 'Consumer Discretionary'
}
mean_scores['Sector'] = mean_scores['ticker'].map(sector_map).fillna('Misc')

fig = px.treemap(
    mean_scores,
    path=[px.Constant("Sectors"), 'Sector', 'ticker'],
    values='Sentiment Score',
    color='Sentiment Score',
    hover_data=['neg', 'neu', 'pos', 'Sentiment Score'],
    color_continuous_scale=['#FF0000', '#000000', '#00FF00'],
    color_continuous_midpoint=0
)

# Customize text or hover:
fig.data[0].customdata = mean_scores[['neg', 'neu', 'pos', 'Sentiment Score']].round(3).to_numpy()
fig.update_traces(
    textinfo="text",
    texttemplate="%{label}<br>Score: %{customdata[3]}",
    hovertemplate=(
        "Ticker: %{label}<br>"
        "Neg: %{customdata[0]}<br>"
        "Neu: %{customdata[1]}<br>"
        "Pos: %{customdata[2]}<br>"
        "Score: %{customdata[3]}<extra></extra>"
    )
)
fig.update_layout(margin=dict(t=50, l=25, r=25, b=25))

fig.show()
