<a href="https://colab.research.google.com/github/athens-21/Huawai-cloud/blob/main/%E0%B9%87only_data_for_ONE_stock_efficiently.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ================================================================
# FNSPID Single Stock Deep Analysis - Optimized Version
# Load only data for ONE stock efficiently
# ================================================================

# ============================================================
# SECTION 1: Setup
# ============================================================
print("🔧 Installing dependencies...")
!pip install -q pandas numpy plotly yfinance scipy scikit-learn textblob spacy networkx

print("📥 Downloading spaCy model...")
!python -m spacy download en_core_web_sm -q

import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
import yfinance as yf
from datetime import datetime, timedelta
from collections import Counter
import gc
from textblob import TextBlob
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import spacy
import networkx as nx
import json

print("✅ Setup complete!")

🔧 Installing dependencies...
📥 Downloading spaCy model...
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m68.2 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.
✅ Setup complete!


In [None]:
# ================================================================
# FNSPID Single Stock Deep Analysis - Optimized Version
# Load only data for ONE stock efficiently
# ================================================================

# ============================================================
# SECTION 1: Setup
# ============================================================
print("🔧 Installing dependencies...")
!pip install -q pandas numpy plotly yfinance scipy scikit-learn textblob spacy networkx

print("📥 Downloading spaCy model...")
!python -m spacy download en_core_web_sm -q

import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
import yfinance as yf
from datetime import datetime, timedelta
from collections import Counter
import gc
from textblob import TextBlob
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import spacy
import networkx as nx
import json

print("✅ Setup complete!")

🔧 Installing dependencies...
📥 Downloading spaCy model...
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m68.2 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.
✅ Setup complete!


In [None]:
#============================================================
# SECTION 2: Configuration
# ============================================================
# 🎯 CONFIGURE YOUR ANALYSIS HERE
TICKER = 'AAPL'  # Change to any S&P 500 ticker
YEARS = 5        # Number of years to analyze (1-4)
MIN_NEWS_COUNT = 50  # Minimum news articles needed

print(f"\n🎯 Target: {TICKER}")
print(f"📅 Period: {YEARS} years")
print(f"📰 Minimum news: {MIN_NEWS_COUNT} articles\n")



🎯 Target: AAPL
📅 Period: 5 years
📰 Minimum news: 50 articles



In [None]:
# ============================================================
# SECTION 3: Download Dataset
# ============================================================
print("📥 Downloading FNSPID dataset (~500 MB)...")
print("⏳ This will take 2-3 minutes...\n")

!wget -q --show-progress https://huggingface.co/datasets/Zihan1004/FNSPID/resolve/main/Stock_news/nasdaq_exteral_data.csv

import os
if os.path.exists('nasdaq_exteral_data.csv'):
    file_size = os.path.getsize('nasdaq_exteral_data.csv') / (1024**2)
    print(f"✅ Download complete! Size: {file_size:.1f} MB\n")
else:
    raise FileNotFoundError("Download failed!")


📥 Downloading FNSPID dataset (~500 MB)...
⏳ This will take 2-3 minutes...

✅ Download complete! Size: 22156.7 MB



In [None]:
# ============================================================
# SECTION 4: Detect Columns First
# ============================================================
print("🔍 Checking dataset structure...")

# Read first few rows to detect columns
sample = pd.read_csv('nasdaq_exteral_data.csv', nrows=5)
print(f"\n📋 Available columns: {list(sample.columns)}")

# Detect column names (case-insensitive)
col_mapping = {}
for col in sample.columns:
    col_lower = col.lower()
    if 'date' in col_lower or 'time' in col_lower:
        col_mapping['date'] = col
    elif 'tick' in col_lower or 'symbol' in col_lower:
        col_mapping['ticker'] = col
    elif 'headline' in col_lower or 'title' in col_lower:
        col_mapping['headline'] = col

print(f"\n🎯 Detected columns:")
for key, val in col_mapping.items():
    print(f"  {key} → {val}")

# Check if we have all required columns
if len(col_mapping) < 3:
    print("\n❌ Missing required columns!")
    print("Available columns:", list(sample.columns))
    raise ValueError("Dataset must have: date, ticker, and headline columns")

🔍 Checking dataset structure...

📋 Available columns: ['Unnamed: 0', 'Date', 'Article_title', 'Stock_symbol', 'Url', 'Publisher', 'Author', 'Article', 'Lsa_summary', 'Luhn_summary', 'Textrank_summary', 'Lexrank_summary']

🎯 Detected columns:
  date → Date
  headline → Article_title
  ticker → Stock_symbol


In [None]:
# ============================================================
# SECTION 5: Smart Load - Filter While Reading
# ============================================================
print(f"\n🔍 Loading data for {TICKER}...")
print("⏳ Filtering while reading (2-3 minutes)...\n")

# Calculate date range - force timezone naive
cutoff_date = pd.Timestamp.now().tz_localize(None) - pd.Timedelta(days=365 * YEARS)
print(f"📅 Filtering news after: {cutoff_date.strftime('%Y-%m-%d')}")

# Load and filter in chunks
chunks = []
total_rows = 0
matched_rows = 0

# Sentiment keywords for analysis
POSITIVE_WORDS = [
    'gain', 'profit', 'surge', 'rally', 'growth', 'rise', 'jump',
    'soar', 'advance', 'upgrade', 'beat', 'outperform', 'strong',
    'record', 'high', 'boost', 'success', 'win', 'positive', 'bullish'
]

NEGATIVE_WORDS = [
    'loss', 'decline', 'fall', 'drop', 'plunge', 'tumble', 'crash',
    'downgrade', 'miss', 'weak', 'concern', 'risk', 'lawsuit', 'probe',
    'investigation', 'fraud', 'scandal', 'bearish', 'negative', 'warning'
]

# Use detected column names
actual_cols = [col_mapping['date'], col_mapping['ticker'], col_mapping['headline']]

for chunk in pd.read_csv(
    'nasdaq_exteral_data.csv',
    usecols=actual_cols,
    chunksize=100000
):
    # Rename to standard names
    chunk = chunk.rename(columns={
        col_mapping['date']: 'date',
        col_mapping['ticker']: 'ticker',
        col_mapping['headline']: 'headline'
    })

    # Parse dates and remove timezone info
    chunk['date'] = pd.to_datetime(chunk['date'], errors='coerce', utc=True)
    chunk['date'] = chunk['date'].dt.tz_localize(None)  # Remove timezone
    chunk = chunk.dropna(subset=['date'])

    total_rows += len(chunk)

    # Filter for our ticker and date range (now both are tz-naive)
    filtered_chunk = chunk[
        (chunk['ticker'] == TICKER) &
        (chunk['date'] >= cutoff_date)
    ]

    if len(filtered_chunk) > 0:
        chunks.append(filtered_chunk)
        matched_rows += len(filtered_chunk)

    print(f"  Processed {total_rows:,} rows | Found {matched_rows:,} matches for {TICKER}...", end='\r')

print(f"\n\n✅ Loading complete!")

# Combine chunks
if len(chunks) > 0:
    news_df = pd.concat(chunks, ignore_index=True)
    print(f"📊 Loaded {len(news_df):,} articles for {TICKER}")
else:
    raise ValueError(f"❌ No data found for {TICKER}. Try different ticker or increase YEARS.")

# Check if we have enough data
if len(news_df) < MIN_NEWS_COUNT:
    print(f"\n⚠️  Warning: Only {len(news_df)} articles found (minimum: {MIN_NEWS_COUNT})")
    print("Consider: (1) Increasing YEARS, or (2) Choosing different ticker")

# Clean up memory
del chunks
gc.collect()

# Show sample
print(f"\n👀 Sample headlines:")
for _, row in news_df.head(3).iterrows():
    date_str = row['date'].strftime('%Y-%m-%d')
    headline = str(row['headline'])[:70]
    print(f"  • {date_str}: {headline}...")



🔍 Loading data for AAPL...
⏳ Filtering while reading (2-3 minutes)...

📅 Filtering news after: 2020-10-30
  Processed 15,549,299 rows | Found 8,865 matches for AAPL...

✅ Loading complete!
📊 Loaded 8,865 articles for AAPL

👀 Sample headlines:
  • 2023-12-16: My 6 Largest Portfolio Holdings Heading Into 2024 -- and the Important...
  • 2023-12-16: Brokers Suggest Investing in Apple (AAPL): Read This Before Placing a ...
  • 2023-12-16: Company News for Dec 19, 2023...


In [None]:
# ============================================================
# SECTION 5: Sentiment Analysis
# ============================================================
print("\n🧠 Analyzing sentiment...")

def calculate_sentiment(headline):
    """Calculate sentiment score from headline"""
    if pd.isna(headline):
        return 0.5, 0.5, 'neutral'

    headline = str(headline).lower()

    # Count positive/negative words
    pos_count = sum(1 for word in POSITIVE_WORDS if word in headline)
    neg_count = sum(1 for word in NEGATIVE_WORDS if word in headline)

    total = pos_count + neg_count
    if total == 0:
        # Use TextBlob for neutral cases
        try:
            blob = TextBlob(headline)
            polarity = float(blob.sentiment.polarity)
            subjectivity = float(blob.sentiment.subjectivity)
        except:
            polarity, subjectivity = 0.0, 0.5
    else:
        polarity = (pos_count - neg_count) / total
        subjectivity = total / 10.0  # Rough estimate

    # Classify
    if polarity > 0.1:
        label = 'positive'
    elif polarity < -0.1:
        label = 'negative'
    else:
        label = 'neutral'

    return polarity, min(subjectivity, 1.0), label

# Apply sentiment analysis
sentiments = news_df['headline'].apply(calculate_sentiment)
news_df[['polarity', 'subjectivity', 'sentiment']] = pd.DataFrame(
    sentiments.tolist(),
    index=news_df.index
)

# Statistics
sentiment_dist = news_df['sentiment'].value_counts()
print("\n✅ Sentiment Analysis Complete!")
print(f"\n📊 Distribution:")
for sentiment, count in sentiment_dist.items():
    pct = count / len(news_df) * 100
    print(f"  {sentiment.upper()}: {count:,} ({pct:.1f}%)")

print(f"\n✅ Top Positive:")
positive_news = news_df[news_df['sentiment'] == 'positive'].nlargest(2, 'polarity')
for _, row in positive_news.iterrows():
    print(f"  • {row['headline'][:80]}...")

print(f"\n❌ Top Negative:")
negative_news = news_df[news_df['sentiment'] == 'negative'].nsmallest(2, 'polarity')
for _, row in negative_news.iterrows():
    print(f"  • {row['headline'][:80]}...")



🧠 Analyzing sentiment...

✅ Sentiment Analysis Complete!

📊 Distribution:
  NEUTRAL: 4,073 (45.9%)
  POSITIVE: 3,942 (44.5%)
  NEGATIVE: 850 (9.6%)

✅ Top Positive:
  • Is FlexShares Quality Dividend ETF (QDF) a Strong ETF Right Now?...
  • Is FlexShares STOXX US ESG Select Index Fund (ESG) a Strong ETF Right Now?...

❌ Top Negative:
  • EXCLUSIVE-US lawmakers warn Biden to probe EU targeting of tech firms -letter...
  • EXCLUSIVE-US lawmakers urge Biden to probe EU targeting of tech firms -letter...


In [None]:
# ============================================================
# SECTION 6: Get Stock Price Data
# ============================================================
print(f"\n💰 Fetching {TICKER} stock prices from Yahoo Finance...")

try:
    stock_data = yf.download(
        TICKER,
        start=cutoff_date,
        end=datetime.now(),
        progress=False
    )

    if len(stock_data) > 0:
        # Calculate metrics
        stock_data['Returns'] = stock_data['Close'].pct_change() * 100
        stock_data['Volatility'] = stock_data['Returns'].rolling(20).std()

        # Get values safely
        start_price = float(stock_data['Close'].iloc[0])
        end_price = float(stock_data['Close'].iloc[-1])
        total_return = ((end_price / start_price) - 1) * 100
        avg_return = float(stock_data['Returns'].mean())
        volatility = float(stock_data['Returns'].std())
        best_day = float(stock_data['Returns'].max())
        worst_day = float(stock_data['Returns'].min())

        print(f"✅ Downloaded {len(stock_data)} trading days\n")
        print(f"📊 Stock Statistics:")
        print(f"  Start Price: ${start_price:.2f}")
        print(f"  End Price: ${end_price:.2f}")
        print(f"  Total Return: {total_return:+.2f}%")
        print(f"  Avg Daily Return: {avg_return:.3f}%")
        print(f"  Volatility (Std): {volatility:.2f}%")
        print(f"  Best Day: +{best_day:.2f}%")
        print(f"  Worst Day: {worst_day:.2f}%")

        has_price_data = True
    else:
        print("❌ No stock data available")
        has_price_data = False
        stock_data = None

except Exception as e:
    print(f"❌ Error fetching stock data: {e}")
    has_price_data = False
    stock_data = None


💰 Fetching AAPL stock prices from Yahoo Finance...
✅ Downloaded 1255 trading days

📊 Stock Statistics:
  Start Price: $105.89
  End Price: $269.28
  Total Return: +154.30%
  Avg Daily Return: 0.090%
  Volatility (Std): 1.78%
  Best Day: +15.33%
  Worst Day: -9.25%


In [None]:
# ============================================================
# SECTION 7: Feature Engineering
# ============================================================
print("\n🔧 Engineering features...")

# Create daily aggregations
news_df['date_only'] = news_df['date'].dt.date
daily_features = news_df.groupby('date_only').agg({
    'headline': 'count',
    'polarity': 'mean',
    'subjectivity': 'mean'
}).reset_index()

daily_features.columns = ['date', 'news_count', 'avg_polarity', 'avg_subjectivity']
daily_features['date'] = pd.to_datetime(daily_features['date'])
daily_features = daily_features.sort_values('date')

# Rolling features
daily_features['sentiment_7d_mean'] = daily_features['avg_polarity'].rolling(7, min_periods=1).mean()
daily_features['sentiment_7d_std'] = daily_features['avg_polarity'].rolling(7, min_periods=1).std().fillna(0)
daily_features['news_volume_7d'] = daily_features['news_count'].rolling(7, min_periods=1).sum()
daily_features['sentiment_momentum'] = daily_features['avg_polarity'].diff().fillna(0)

# Add temporal features
daily_features['day_of_week'] = daily_features['date'].dt.dayofweek
daily_features['month'] = daily_features['date'].dt.month
daily_features['quarter'] = daily_features['date'].dt.quarter
daily_features['is_weekend'] = daily_features['day_of_week'].isin([5, 6]).astype(int)

print(f"✅ Features created: {len(daily_features)} days")


🔧 Engineering features...
✅ Features created: 558 days


In [None]:
# ============================================================
# SECTION 8: Merge News with Stock Prices
# ============================================================
if has_price_data:
    print("\n🔗 Merging news with stock prices...")

    # Prepare stock data - handle MultiIndex columns from yfinance
    stock_data_reset = stock_data.reset_index()

    # Flatten MultiIndex columns if present
    if isinstance(stock_data_reset.columns, pd.MultiIndex):
        stock_data_reset.columns = ['_'.join(col).strip('_') if isinstance(col, tuple) else col
                                     for col in stock_data_reset.columns]

    # Find the correct column names
    date_col = None
    open_col = None
    close_col = None
    returns_col = None

    for col in stock_data_reset.columns:
        col_lower = str(col).lower()
        if 'date' in col_lower:
            date_col = col
        elif 'open' in col_lower:
            open_col = col
        elif 'close' in col_lower and 'adj' not in col_lower:
            close_col = col
        elif 'return' in col_lower:
            returns_col = col

    # Normalize date column
    if date_col:
        stock_data_reset['date'] = pd.to_datetime(stock_data_reset[date_col]).dt.normalize()
    else:
        # If no date column found, use index
        stock_data_reset['date'] = pd.to_datetime(stock_data_reset.index).normalize()

    # Rename columns to standard names
    rename_map = {}
    if open_col:
        rename_map[open_col] = 'Open'
    if close_col:
        rename_map[close_col] = 'Close'
    if returns_col:
        rename_map[returns_col] = 'Returns'

    stock_data_reset = stock_data_reset.rename(columns=rename_map)

    # Select only needed columns
    merge_cols = ['date']
    if 'Open' in stock_data_reset.columns:
        merge_cols.append('Open')
    if 'Close' in stock_data_reset.columns:
        merge_cols.append('Close')
    if 'Returns' in stock_data_reset.columns:
        merge_cols.append('Returns')

    stock_for_merge = stock_data_reset[merge_cols].copy()

    # Merge
    merged = daily_features.merge(
        stock_for_merge,
        on='date',
        how='left'
    )

    # Create target labels if we have price data
    if 'Open' in merged.columns and 'Close' in merged.columns:
        merged['price_change'] = (merged['Close'] - merged['Open']) / merged['Open']
        merged['price_movement'] = pd.cut(
            merged['price_change'],
            bins=[-np.inf, -0.02, 0.02, np.inf],
            labels=['DOWN', 'NEUTRAL', 'UP']
        )
    else:
        merged['price_movement'] = 'NEUTRAL'

    print(f"✅ Merged {len(merged)} days")
    print(f"📰 Days with news: {(merged['news_count'] > 0).sum()}")
    if 'Close' in merged.columns:
        print(f"💰 Days with prices: {merged['Close'].notna().sum()}")

    # Correlation analysis
    if 'Returns' in merged.columns:
        corr_data = merged[['avg_polarity', 'Returns']].dropna()
        if len(corr_data) > 10:
            correlation = corr_data.corr().iloc[0, 1]
            print(f"\n📈 Correlation (Sentiment vs Returns): {correlation:.3f}")

else:
    print("\n⚠️  Skipping price merge (no stock data)")
    merged = daily_features.copy()
    merged['price_movement'] = 'NEUTRAL'


🔗 Merging news with stock prices...
✅ Merged 558 days
📰 Days with news: 558
💰 Days with prices: 387

📈 Correlation (Sentiment vs Returns): 0.412


In [None]:
# ============================================================
# SECTION 9: Named Entity Recognition
# ============================================================
print("\n🔍 Extracting entities from news (this may take 1-2 minutes)...")

nlp = spacy.load('en_core_web_sm')

# Sample headlines for entity extraction
sample_size = min(100, len(news_df))
sample_headlines = news_df['headline'].dropna().sample(sample_size, random_state=42).tolist()

entities = {
    'ORG': Counter(),
    'GPE': Counter(),
    'PERSON': Counter(),
    'MONEY': Counter(),
    'PRODUCT': Counter()
}

for headline in sample_headlines:
    try:
        doc = nlp(str(headline)[:500])
        for ent in doc.ents:
            if ent.label_ in entities:
                entities[ent.label_][ent.text.strip()] += 1
    except:
        continue

print("✅ Entity extraction complete!\n")
print(f"🏢 Top Organizations:")
for org, count in entities['ORG'].most_common(5):
    print(f"  • {org}: {count}")

print(f"\n🌍 Top Countries/Locations:")
for loc, count in entities['GPE'].most_common(5):
    print(f"  • {loc}: {count}")

print(f"\n👤 Top People:")
for person, count in entities['PERSON'].most_common(5):
    print(f"  • {person}: {count}")


🔍 Extracting entities from news (this may take 1-2 minutes)...
✅ Entity extraction complete!

🏢 Top Organizations:
  • Apple: 9
  • BAC: 3
  • Nasdaq: 2
  • Buy Now and Hold Forever: 2
  • META: 2

🌍 Top Countries/Locations:
  • US: 15
  • China: 3
  • India: 2
  • Belgium: 1
  • France: 1

👤 Top People:
  • Warren Buffett: 5
  • Warren Buffett's: 3
  • Buy: 1
  • Powell: 1
  • Warren Buffett-Led: 1


In [None]:
# ============================================================
# SECTION 10: Visualizations
# ============================================================
print("\n🎨 Creating interactive visualizations...")

# Prepare data
merged['date'] = pd.to_datetime(merged['date'])
news_df['date'] = pd.to_datetime(news_df['date'])

color_map = {
    'positive': 'green',
    'negative': 'red',
    'neutral': 'orange'
}

# Create 4-panel dashboard
fig = make_subplots(
    rows=4, cols=1,
    subplot_titles=(
        f'{TICKER} Stock Price with News Sentiment',
        'Weekly Sentiment Distribution',
        'Sentiment Score Trends',
        'News Volume vs Daily Returns'
    ),
    specs=[
        [{"secondary_y": False}],
        [{"secondary_y": False}],
        [{"secondary_y": False}],
        [{"secondary_y": True}]
    ],
    vertical_spacing=0.08,
    row_heights=[0.3, 0.2, 0.2, 0.3]
)

# Panel 1: Stock Price + News Markers
if has_price_data:
    fig.add_trace(
        go.Scatter(
            x=merged['date'],
            y=merged['Close'],
            mode='lines',
            name='Stock Price',
            line=dict(color='blue', width=2)
        ),
        row=1, col=1
    )

    # Add news markers
    for sentiment in ['positive', 'negative', 'neutral']:
        sent_news = news_df[news_df['sentiment'] == sentiment]
        news_with_prices = sent_news.merge(
            merged[['date', 'Close']],
            left_on=pd.to_datetime(sent_news['date']).dt.normalize(),
            right_on='date',
            how='inner'
        )

        if len(news_with_prices) > 0:
            sizes = (news_with_prices['polarity'].abs() * 20 + 5).fillna(10)

            fig.add_trace(
                go.Scatter(
                    x=news_with_prices['date_y'],
                    y=news_with_prices['Close'],
                    mode='markers',
                    name=f'{sentiment.title()} News',
                    marker=dict(
                        size=sizes,
                        color=color_map[sentiment],
                        opacity=0.6
                    ),
                    text=news_with_prices['headline'],
                    hoverinfo='text'
                ),
                row=1, col=1
            )

# Panel 2: Weekly Sentiment Distribution
news_df['week'] = news_df['date'].dt.to_period('W').dt.to_timestamp()
weekly_sentiment = news_df.groupby(['week', 'sentiment']).size().unstack(fill_value=0)

for sentiment in ['positive', 'neutral', 'negative']:
    if sentiment in weekly_sentiment.columns:
        fig.add_trace(
            go.Bar(
                x=weekly_sentiment.index,
                y=weekly_sentiment[sentiment],
                name=sentiment.title(),
                marker_color=color_map[sentiment]
            ),
            row=2, col=1
        )

# Panel 3: Sentiment Trends
fig.add_trace(
    go.Scatter(
        x=merged['date'],
        y=merged['avg_polarity'],
        mode='lines+markers',
        name='Avg Sentiment',
        line=dict(color='purple', width=2),
        marker=dict(size=4)
    ),
    row=3, col=1
)
fig.add_hline(y=0, line_dash="dash", line_color="gray", row=3, col=1)

# Panel 4: News Volume vs Returns
fig.add_trace(
    go.Bar(
        x=merged['date'],
        y=merged['news_count'],
        name='News Volume',
        marker_color='lightblue',
        opacity=0.6
    ),
    row=4, col=1,
    secondary_y=False
)

if has_price_data:
    fig.add_trace(
        go.Scatter(
            x=merged['date'],
            y=merged['Returns'],
            mode='lines',
            name='Daily Returns (%)',
            line=dict(color='darkgreen', width=2)
        ),
        row=4, col=1,
        secondary_y=True
    )

# Layout
fig.update_xaxes(title_text="Date", row=4, col=1)
fig.update_yaxes(title_text="Price ($)", row=1, col=1)
fig.update_yaxes(title_text="Number of Articles", row=2, col=1)
fig.update_yaxes(title_text="Sentiment Score", row=3, col=1)
fig.update_yaxes(title_text="News Count", row=4, col=1, secondary_y=False)
if has_price_data:
    fig.update_yaxes(title_text="Returns (%)", row=4, col=1, secondary_y=True)

fig.update_layout(
    height=1600,
    showlegend=True,
    title_text=f"{TICKER} News Impact Analysis ({YEARS} Years)",
    title_font_size=20,
    hovermode='x unified',
    barmode='stack'
)

fig.show()

print("\n✅ Dashboard created!")



🎨 Creating interactive visualizations...



✅ Dashboard created!


In [None]:
# ============================================================
# SECTION 11: Save Results
# ============================================================
print("\n💾 Saving results...")

# Save processed data
output_file = f"{TICKER}_analysis_{YEARS}y.csv"
merged.to_csv(output_file, index=False)
print(f"✅ Saved: {output_file}")

# Save dashboard
html_file = f"{TICKER}_dashboard_{YEARS}y.html"
fig.write_html(html_file)
print(f"✅ Saved: {html_file}")

# Save summary report
summary = {
    'ticker': TICKER,
    'analysis_period_years': YEARS,
    'total_news_articles': int(len(news_df)),
    'date_range': {
        'start': str(news_df['date'].min()),
        'end': str(news_df['date'].max())
    },
    'sentiment_distribution': {
        'positive': int(sentiment_dist.get('positive', 0)),
        'neutral': int(sentiment_dist.get('neutral', 0)),
        'negative': int(sentiment_dist.get('negative', 0))
    },
    'avg_sentiment': float(news_df['polarity'].mean()),
    'top_organizations': [
        {'name': org, 'mentions': count}
        for org, count in entities['ORG'].most_common(10)
    ],
    'top_locations': [
        {'name': loc, 'mentions': count}
        for loc, count in entities['GPE'].most_common(10)
    ]
}

if has_price_data:
    summary['stock_performance'] = {
        'total_return_pct': float(total_return),
        'avg_daily_return_pct': float(avg_return),
        'volatility_pct': float(volatility),
        'correlation_sentiment_returns': float(correlation) if 'correlation' in locals() else None
    }

json_file = f"{TICKER}_summary_{YEARS}y.json"
with open(json_file, 'w') as f:
    json.dump(summary, f, indent=2)
print(f"✅ Saved: {json_file}")


💾 Saving results...
✅ Saved: AAPL_analysis_5y.csv
✅ Saved: AAPL_dashboard_5y.html
✅ Saved: AAPL_summary_5y.json


In [None]:
# ============================================================
# SECTION 12: Final Summary
# ============================================================
print("\n" + "="*60)
print(f"🎉 ANALYSIS COMPLETE: {TICKER}")
print("="*60)
print(f"\n📊 Summary:")
print(f"  • News Articles: {len(news_df):,}")
print(f"  • Date Range: {news_df['date'].min().strftime('%Y-%m-%d')} to {news_df['date'].max().strftime('%Y-%m-%d')}")
print(f"  • Avg Sentiment: {news_df['polarity'].mean():.3f}")
print(f"  • Positive News: {sentiment_dist.get('positive', 0):,} ({sentiment_dist.get('positive', 0)/len(news_df)*100:.1f}%)")
print(f"  • Negative News: {sentiment_dist.get('negative', 0):,} ({sentiment_dist.get('negative', 0)/len(news_df)*100:.1f}%)")

if has_price_data:
    print(f"\n💰 Stock Performance:")
    print(f"  • Total Return: {total_return:+.2f}%")
    print(f"  • Volatility: {volatility:.2f}%")
    if 'correlation' in locals():
        print(f"  • Sentiment-Return Correlation: {correlation:.3f}")

print(f"\n📁 Output Files:")
print(f"  • {output_file}")
print(f"  • {html_file}")
print(f"  • {json_file}")

print("\n✨ Next Steps:")
print("  1. Open the HTML file to explore interactive dashboard")
print("  2. Review the CSV for detailed daily data")
print("  3. Check JSON for structured summary")
print("  4. Change TICKER and YEARS at the top to analyze other stocks")

print("\n" + "="*60)


🎉 ANALYSIS COMPLETE: AAPL

📊 Summary:
  • News Articles: 8,865
  • Date Range: 2022-06-03 to 2023-12-16
  • Avg Sentiment: 0.245
  • Positive News: 3,942 (44.5%)
  • Negative News: 850 (9.6%)

💰 Stock Performance:
  • Total Return: +154.30%
  • Volatility: 1.78%
  • Sentiment-Return Correlation: 0.412

📁 Output Files:
  • AAPL_analysis_5y.csv
  • AAPL_dashboard_5y.html
  • AAPL_summary_5y.json

✨ Next Steps:
  1. Open the HTML file to explore interactive dashboard
  2. Review the CSV for detailed daily data
  3. Check JSON for structured summary
  4. Change TICKER and YEARS at the top to analyze other stocks



In [None]:
# ============================================================
# SECTION 13: Relationship Network Map
# ============================================================
print("\n🕸️ Building relationship network map...")

# Initialize relationship data structure
relationships = {
    'Indices': [],
    'Peers': [],
    'Holders': [],
    'Analysts': [],
    'Executives': [],
    'News_Topics': [],
    'Locations': [],
    'Events': []
}

# 1. Extract from NER entities
if entities['ORG']:
    # Filter out the main ticker and common terms
    exclude_terms = {TICKER, 'Apple', 'Inc', 'Corp', 'Ltd', 'LLC', 'Co'}
    relationships['Peers'] = [
        org for org, count in entities['ORG'].most_common(15)
        if org not in exclude_terms and count > 1
    ][:10]

if entities['GPE']:
    relationships['Locations'] = [
        loc for loc, count in entities['GPE'].most_common(10)
    ][:8]

if entities['PERSON']:
    relationships['Executives'] = [
        person for person, count in entities['PERSON'].most_common(8)
        if 'Buffett' not in person  # Filter investment personalities
    ][:6]

# 2. Extract common topics from headlines
print("  • Analyzing headline topics...")
all_headlines = ' '.join(news_df['headline'].dropna().astype(str).str.lower())
common_words = [
    'iphone', 'ipad', 'mac', 'services', 'revenue', 'earnings',
    'profit', 'growth', 'market', 'sales', 'china', 'ai', 'chip',
    'supply', 'demand', 'innovation', 'launch', 'product'
]
relationships['News_Topics'] = [
    word.upper() for word in common_words
    if all_headlines.count(word) > 20
][:8]

# 3. Add known indices for AAPL
relationships['Indices'] = ['S&P 500', 'NASDAQ-100', 'DOW 30', 'NASDAQ Composite']

# 4. Add major analyst firms (common in tech sector)
relationships['Analysts'] = [
    'Goldman Sachs', 'Morgan Stanley', 'JP Morgan',
    'Bank of America', 'Wedbush', 'Piper Sandler'
]

# 5. Add major institutional holders (known for AAPL)
relationships['Holders'] = [
    'Vanguard', 'BlackRock', 'Berkshire Hathaway',
    'State Street', 'Fidelity', 'Geode Capital'
]

# 6. Extract event types from sentiment analysis
high_impact_dates = merged.nlargest(5, 'news_count')['date'].dt.strftime('%Y-%m-%d').tolist()
relationships['Events'] = [f"High Volume: {d}" for d in high_impact_dates]

print(f"✅ Extracted relationships:")
for category, items in relationships.items():
    if items:
        print(f"  • {category}: {len(items)} items")

# Create network graph using NetworkX
G = nx.Graph()

# Add center node
G.add_node(TICKER, node_type='center', size=100, color='#FF6B6B')

# Add relationship categories
category_colors = {
    'Indices': '#4ECDC4',
    'Peers': '#45B7D1',
    'Holders': '#96CEB4',
    'Analysts': '#FFEAA7',
    'Executives': '#DFE6E9',
    'News_Topics': '#74B9FF',
    'Locations': '#FD79A8',
    'Events': '#A29BFE'
}

# Build the network
edges = []
node_data = {}

for category, items in relationships.items():
    if not items:
        continue

    # Add category hub
    cat_node = category
    G.add_node(
        cat_node,
        node_type='category',
        size=60,
        color=category_colors.get(category, '#95A5A6')
    )
    G.add_edge(TICKER, cat_node, weight=3)
    edges.append((TICKER, cat_node))

    # Add items
    for item in items[:8]:  # Limit to 8 items per category
        item_node = f"{category}:{item}"
        G.add_node(
            item_node,
            node_type='item',
            size=30,
            color=category_colors.get(category, '#95A5A6'),
            label=item
        )
        G.add_edge(cat_node, item_node, weight=1)
        edges.append((cat_node, item_node))

print(f"\n📊 Network stats:")
print(f"  • Total nodes: {G.number_of_nodes()}")
print(f"  • Total edges: {G.number_of_edges()}")

# Create interactive visualization with Plotly
print("\n🎨 Creating interactive network visualization...")

# Calculate layout
pos = nx.spring_layout(G, k=2, iterations=50, seed=42)

# Prepare edge traces
edge_trace = go.Scatter(
    x=[],
    y=[],
    line=dict(width=0.5, color='#888'),
    hoverinfo='none',
    mode='lines'
)

for edge in G.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_trace['x'] += tuple([x0, x1, None])
    edge_trace['y'] += tuple([y0, y1, None])

# Prepare node traces by type
node_traces = {}

for node in G.nodes():
    node_type = G.nodes[node].get('node_type', 'item')

    if node_type not in node_traces:
        node_traces[node_type] = {
            'x': [],
            'y': [],
            'text': [],
            'size': [],
            'color': []
        }

    x, y = pos[node]
    node_traces[node_type]['x'].append(x)
    node_traces[node_type]['y'].append(y)

    # Label
    if node_type == 'center':
        label = f"<b>{node}</b>"
    elif node_type == 'category':
        label = f"<b>{node}</b>"
    else:
        label = G.nodes[node].get('label', node.split(':')[-1])

    node_traces[node_type]['text'].append(label)
    node_traces[node_type]['size'].append(G.nodes[node].get('size', 20))
    node_traces[node_type]['color'].append(G.nodes[node].get('color', '#95A5A6'))

# Create figure
fig_network = go.Figure()

# Add edges
fig_network.add_trace(edge_trace)

# Add nodes by type
trace_config = {
    'center': {'name': 'Company', 'symbol': 'circle'},
    'category': {'name': 'Category', 'symbol': 'square'},
    'item': {'name': 'Related Entity', 'symbol': 'circle'}
}

for node_type, data in node_traces.items():
    config = trace_config.get(node_type, {'name': 'Other', 'symbol': 'circle'})

    fig_network.add_trace(go.Scatter(
        x=data['x'],
        y=data['y'],
        mode='markers+text',
        name=config['name'],
        text=data['text'],
        textposition='top center',
        textfont=dict(size=10),
        marker=dict(
            size=data['size'],
            color=data['color'],
            symbol=config['symbol'],
            line=dict(width=2, color='white')
        ),
        hoverinfo='text',
        hovertext=data['text']
    ))

# Update layout
fig_network.update_layout(
    title=dict(
        text=f"{TICKER} Relationship Network Map",
        font=dict(size=24)
    ),
    showlegend=True,
    hovermode='closest',
    margin=dict(b=20, l=5, r=5, t=80),
    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    plot_bgcolor='white',
    height=900,
    width=1200
)

# Show the network
fig_network.show()

print("✅ Network visualization created!")

# Save network visualization
network_html = f"{TICKER}_relationship_network_{YEARS}y.html"
fig_network.write_html(network_html)
print(f"✅ Saved: {network_html}")

# Save network data
network_summary = {
    'ticker': TICKER,
    'relationships': {
        category: items for category, items in relationships.items() if items
    },
    'network_stats': {
        'total_nodes': G.number_of_nodes(),
        'total_edges': G.number_of_edges(),
        'categories': list(relationships.keys())
    }
}

network_json = f"{TICKER}_network_data_{YEARS}y.json"
with open(network_json, 'w') as f:
    json.dump(network_summary, f, indent=2)
print(f"✅ Saved: {network_json}")

print("\n📊 Relationship Summary:")
for category, items in relationships.items():
    if items:
        print(f"\n{category}:")
        for item in items[:5]:
            print(f"  • {item}")
        if len(items) > 5:
            print(f"  ... and {len(items)-5} more")


🕸️ Building relationship network map...
  • Analyzing headline topics...
✅ Extracted relationships:
  • Indices: 4 items
  • Peers: 5 items
  • Holders: 6 items
  • Analysts: 6 items
  • Executives: 5 items
  • News_Topics: 8 items
  • Locations: 8 items
  • Events: 5 items

📊 Network stats:
  • Total nodes: 56
  • Total edges: 55

🎨 Creating interactive network visualization...


✅ Network visualization created!
✅ Saved: AAPL_relationship_network_5y.html
✅ Saved: AAPL_network_data_5y.json

📊 Relationship Summary:

Indices:
  • S&P 500
  • NASDAQ-100
  • DOW 30
  • NASDAQ Composite

Peers:
  • BAC
  • Nasdaq
  • Buy Now and Hold Forever
  • META
  • Fed

Holders:
  • Vanguard
  • BlackRock
  • Berkshire Hathaway
  • State Street
  • Fidelity
  ... and 1 more

Analysts:
  • Goldman Sachs
  • Morgan Stanley
  • JP Morgan
  • Bank of America
  • Wedbush
  ... and 1 more

Executives:
  • Buy
  • Powell
  • KGC
  • JBLU
  • NVDA

News_Topics:
  • IPHONE
  • MAC
  • SERVICES
  • REVENUE
  • EARNINGS
  ... and 3 more

Locations:
  • US
  • China
  • India
  • Belgium
  • France
  ... and 3 more

Events:
  • High Volume: 2023-09-13
  • High Volume: 2023-08-04
  • High Volume: 2023-09-07
  • High Volume: 2023-09-12
  • High Volume: 2023-05-05


In [None]:
# ============================================================
# SECTION 14: Supply Chain Analysis
# ============================================================
print("\n🔗 Building supply chain network...")

# Initialize supply chain data
supply_chain = {
    'Suppliers': [],
    'Customers': [],
    'Partners': [],
    'Logistics': [],
    'Raw_Materials': []
}

# 1. Known AAPL suppliers (major ones)
known_suppliers = [
    'TSMC', 'Foxconn', 'Samsung', 'SK Hynix', 'Qualcomm',
    'Broadcom', 'Intel', 'LG Display', 'Sharp', 'Sony',
    'Corning', 'Murata', 'TDK', 'AMS', 'Dialog Semiconductor'
]

# 2. Known customer segments/channels
known_customers = [
    'Direct Retail', 'Apple Store', 'Online Store',
    'Telecom Carriers', 'Enterprise Customers',
    'Education Sector', 'Government', 'Resellers'
]

# 3. Known partners
known_partners = [
    'IBM', 'Cisco', 'SAP', 'Salesforce', 'Adobe',
    'Microsoft', 'Google', 'Amazon AWS'
]

# 4. Logistics providers
known_logistics = [
    'UPS', 'FedEx', 'DHL', 'Flexport'
]

# 5. Raw materials/components
known_materials = [
    'Rare Earth Elements', 'Aluminum', 'Glass',
    'OLED Displays', 'Chips/Processors', 'Batteries',
    'Cameras', 'Memory (DRAM/NAND)'
]

# Extract from news entities to find mentions
print("  • Analyzing news for supply chain entities...")

suppliers_found = []
partners_found = []

for org, count in entities['ORG'].most_common(30):
    org_lower = org.lower()
    # Check if mentioned supplier
    if any(supplier.lower() in org_lower for supplier in known_suppliers):
        suppliers_found.append(org)
    # Check if mentioned partner
    elif any(partner.lower() in org_lower for partner in known_partners):
        partners_found.append(org)

# Combine with known entities
supply_chain['Suppliers'] = list(set(known_suppliers[:12] + suppliers_found[:3]))[:12]
supply_chain['Customers'] = known_customers[:8]
supply_chain['Partners'] = list(set(known_partners[:8] + partners_found[:2]))[:8]
supply_chain['Logistics'] = known_logistics[:4]
supply_chain['Raw_Materials'] = known_materials[:8]

print(f"✅ Supply chain entities identified:")
for category, items in supply_chain.items():
    print(f"  • {category}: {len(items)} entities")

# Create directed supply chain network
G_supply = nx.DiGraph()

# Add center node (AAPL)
G_supply.add_node(
    TICKER,
    node_type='center',
    size=120,
    color='#FF6B6B',
    layer='center'
)

# Define positions and colors for each category
category_positions = {
    'Suppliers': {'layer': 'left', 'color': '#4ECDC4'},
    'Customers': {'layer': 'right', 'color': '#45B7D1'},
    'Partners': {'layer': 'bottom', 'color': '#96CEB4'},
    'Logistics': {'layer': 'bottom_left', 'color': '#FFEAA7'},
    'Raw_Materials': {'layer': 'top', 'color': '#A29BFE'}
}

# Build supply chain network
for category, items in supply_chain.items():
    if not items:
        continue

    config = category_positions.get(category, {})

    for i, item in enumerate(items):
        node_id = f"{category}:{item}"

        G_supply.add_node(
            node_id,
            node_type='entity',
            size=40,
            color=config.get('color', '#95A5A6'),
            label=item,
            layer=config.get('layer', 'other'),
            category=category
        )

        # Add directed edges based on flow
        if category in ['Suppliers', 'Raw_Materials', 'Logistics']:
            # Flow TO AAPL (suppliers, materials, logistics)
            G_supply.add_edge(node_id, TICKER, weight=2)
        elif category == 'Customers':
            # Flow FROM AAPL (customers)
            G_supply.add_edge(TICKER, node_id, weight=2)
        elif category == 'Partners':
            # Bidirectional for partners
            G_supply.add_edge(TICKER, node_id, weight=1)
            G_supply.add_edge(node_id, TICKER, weight=1)

print(f"\n📊 Supply chain network stats:")
print(f"  • Total nodes: {G_supply.number_of_nodes()}")
print(f"  • Total edges: {G_supply.number_of_edges()}")

# Create hierarchical layout
print("\n🎨 Creating supply chain visualization...")

# Manual positioning for better supply chain layout
pos_supply = {}

# Center: AAPL
pos_supply[TICKER] = (0, 0)

# Calculate positions for each category
def distribute_nodes(nodes, center_x, center_y, radius, start_angle, end_angle):
    positions = {}
    n = len(nodes)
    if n == 0:
        return positions

    angle_step = (end_angle - start_angle) / max(n - 1, 1) if n > 1 else 0

    for i, node in enumerate(nodes):
        angle = start_angle + i * angle_step
        x = center_x + radius * np.cos(np.radians(angle))
        y = center_y + radius * np.sin(np.radians(angle))
        positions[node] = (x, y)

    return positions

# Position suppliers (left side)
supplier_nodes = [n for n in G_supply.nodes() if G_supply.nodes[n].get('category') == 'Suppliers']
pos_supply.update(distribute_nodes(supplier_nodes, -3, 0, 2.5, 60, 120))

# Position customers (right side)
customer_nodes = [n for n in G_supply.nodes() if G_supply.nodes[n].get('category') == 'Customers']
pos_supply.update(distribute_nodes(customer_nodes, 3, 0, 2.5, -120, -60))

# Position partners (bottom)
partner_nodes = [n for n in G_supply.nodes() if G_supply.nodes[n].get('category') == 'Partners']
pos_supply.update(distribute_nodes(partner_nodes, 0, -2.5, 2, -30, -150))

# Position logistics (bottom left)
logistics_nodes = [n for n in G_supply.nodes() if G_supply.nodes[n].get('category') == 'Logistics']
pos_supply.update(distribute_nodes(logistics_nodes, -2, -2, 1.5, 200, 240))

# Position raw materials (top)
material_nodes = [n for n in G_supply.nodes() if G_supply.nodes[n].get('category') == 'Raw_Materials']
pos_supply.update(distribute_nodes(material_nodes, 0, 2.5, 2.5, 30, 150))

# Create edge traces with arrows
edge_traces = []

for edge in G_supply.edges():
    x0, y0 = pos_supply[edge[0]]
    x1, y1 = pos_supply[edge[1]]

    # Calculate arrow direction
    dx = x1 - x0
    dy = y1 - y0

    edge_trace = go.Scatter(
        x=[x0, x1, None],
        y=[y0, y1, None],
        line=dict(width=1.5, color='#888'),
        hoverinfo='none',
        mode='lines',
        showlegend=False
    )
    edge_traces.append(edge_trace)

    # Add arrow annotation
    # Calculate arrow position (70% along the edge)
    arrow_x = x0 + 0.7 * dx
    arrow_y = y0 + 0.7 * dy

    edge_traces.append(go.Scatter(
        x=[arrow_x],
        y=[arrow_y],
        mode='markers',
        marker=dict(
            size=8,
            color='#888',
            symbol='arrow',
            angle=np.degrees(np.arctan2(dy, dx)),
        ),
        hoverinfo='none',
        showlegend=False
    ))

# Create node traces by category
fig_supply = go.Figure()

# Add edges first
for trace in edge_traces:
    fig_supply.add_trace(trace)

# Add nodes by category
categories_config = {
    'Suppliers': {'name': '📦 Suppliers', 'color': '#4ECDC4', 'symbol': 'square'},
    'Customers': {'name': '🛒 Customers', 'color': '#45B7D1', 'symbol': 'diamond'},
    'Partners': {'name': '🤝 Partners', 'color': '#96CEB4', 'symbol': 'hexagon'},
    'Logistics': {'name': '🚚 Logistics', 'color': '#FFEAA7', 'symbol': 'triangle-up'},
    'Raw_Materials': {'name': '⚙️ Raw Materials', 'color': '#A29BFE', 'symbol': 'circle'}
}

for category, config in categories_config.items():
    category_nodes = [n for n in G_supply.nodes() if G_supply.nodes[n].get('category') == category]

    if not category_nodes:
        continue

    x_coords = [pos_supply[n][0] for n in category_nodes]
    y_coords = [pos_supply[n][1] for n in category_nodes]
    labels = [G_supply.nodes[n].get('label', n.split(':')[-1]) for n in category_nodes]

    fig_supply.add_trace(go.Scatter(
        x=x_coords,
        y=y_coords,
        mode='markers+text',
        name=config['name'],
        text=labels,
        textposition='top center',
        textfont=dict(size=9),
        marker=dict(
            size=40,
            color=config['color'],
            symbol=config['symbol'],
            line=dict(width=2, color='white')
        ),
        hoverinfo='text',
        hovertext=labels
    ))

# Add center node (AAPL)
fig_supply.add_trace(go.Scatter(
    x=[0],
    y=[0],
    mode='markers+text',
    name='🏢 Company',
    text=[f'<b>{TICKER}</b>'],
    textposition='middle center',
    textfont=dict(size=16, color='white'),
    marker=dict(
        size=120,
        color='#FF6B6B',
        symbol='circle',
        line=dict(width=3, color='white')
    ),
    hoverinfo='text',
    hovertext=[f'{TICKER} - Center of Supply Chain']
))

# Add annotations for flow directions
annotations = [
    dict(
        x=-3, y=2.8,
        text="<b>INBOUND FLOW</b><br>Materials & Components",
        showarrow=False,
        font=dict(size=12, color='#666'),
        align='center'
    ),
    dict(
        x=3, y=2.8,
        text="<b>OUTBOUND FLOW</b><br>Products to Market",
        showarrow=False,
        font=dict(size=12, color='#666'),
        align='center'
    ),
    dict(
        x=0, y=-3.5,
        text="<b>PARTNERSHIPS</b><br>Strategic Collaborations",
        showarrow=False,
        font=dict(size=12, color='#666'),
        align='center'
    )
]

# Update layout
fig_supply.update_layout(
    title=dict(
        text=f"{TICKER} Supply Chain Network Analysis",
        font=dict(size=24),
        x=0.5,
        xanchor='center'
    ),
    showlegend=True,
    legend=dict(
        x=1.02,
        y=1,
        bgcolor='rgba(255,255,255,0.8)',
        bordercolor='#ddd',
        borderwidth=1
    ),
    hovermode='closest',
    margin=dict(b=50, l=50, r=150, t=100),
    xaxis=dict(
        showgrid=False,
        zeroline=False,
        showticklabels=False,
        range=[-4.5, 4.5]
    ),
    yaxis=dict(
        showgrid=False,
        zeroline=False,
        showticklabels=False,
        range=[-4, 3.5]
    ),
    plot_bgcolor='#f8f9fa',
    height=900,
    width=1400,
    annotations=annotations
)

# Show the supply chain
fig_supply.show()

print("✅ Supply chain visualization created!")

# Save supply chain visualization
supply_html = f"{TICKER}_supply_chain_{YEARS}y.html"
fig_supply.write_html(supply_html)
print(f"✅ Saved: {supply_html}")

# Calculate supply chain risk metrics
print("\n⚠️ Supply Chain Risk Analysis:")

# Concentration risk (if too dependent on few suppliers)
supplier_concentration = len(supply_chain['Suppliers'])
if supplier_concentration < 5:
    risk_level = "HIGH"
elif supplier_concentration < 10:
    risk_level = "MEDIUM"
else:
    risk_level = "LOW"

print(f"  • Supplier Concentration Risk: {risk_level}")
print(f"    - Number of identified suppliers: {supplier_concentration}")

# Geographic risk (based on locations in NER)
high_risk_locations = ['China', 'Taiwan', 'Russia']
risky_locations = [loc for loc in relationships.get('Locations', [])
                   if any(risk in loc for risk in high_risk_locations)]

if risky_locations:
    print(f"  • Geographic Risk: HIGH")
    print(f"    - High-risk regions mentioned: {', '.join(risky_locations)}")
else:
    print(f"  • Geographic Risk: MODERATE")

# News sentiment impact on supply chain
supply_chain_keywords = ['supply', 'chip', 'shortage', 'supplier', 'component', 'logistics']
supply_news = news_df[
    news_df['headline'].str.lower().str.contains('|'.join(supply_chain_keywords), na=False)
]

if len(supply_news) > 0:
    avg_supply_sentiment = supply_news['polarity'].mean()
    print(f"  • Supply Chain Sentiment: {avg_supply_sentiment:.3f}")
    print(f"    - Related news articles: {len(supply_news)}")

    if avg_supply_sentiment < -0.1:
        print(f"    - Status: ⚠️ NEGATIVE sentiment detected")
    elif avg_supply_sentiment > 0.1:
        print(f"    - Status: ✅ POSITIVE sentiment")
    else:
        print(f"    - Status: ⚖️ NEUTRAL sentiment")

# Save supply chain data
supply_chain_data = {
    'ticker': TICKER,
    'supply_chain': supply_chain,
    'network_stats': {
        'total_entities': G_supply.number_of_nodes() - 1,  # Exclude center
        'total_connections': G_supply.number_of_edges()
    },
    'risk_metrics': {
        'supplier_concentration_risk': risk_level,
        'number_of_suppliers': supplier_concentration,
        'geographic_risk_locations': risky_locations,
        'supply_chain_news_count': len(supply_news),
        'supply_chain_sentiment': float(avg_supply_sentiment) if len(supply_news) > 0 else None
    }
}

supply_json = f"{TICKER}_supply_chain_data_{YEARS}y.json"
with open(supply_json, 'w') as f:
    json.dump(supply_chain_data, f, indent=2)
print(f"\n✅ Saved: {supply_json}")

print("\n📊 Supply Chain Summary:")
for category, items in supply_chain.items():
    if items:
        print(f"\n{category.replace('_', ' ')}:")
        for item in items[:5]:
            print(f"  • {item}")
        if len(items) > 5:
            print(f"  ... and {len(items)-5} more")


🔗 Building supply chain network...
  • Analyzing news for supply chain entities...
✅ Supply chain entities identified:
  • Suppliers: 12 entities
  • Customers: 8 entities
  • Partners: 8 entities
  • Logistics: 4 entities
  • Raw_Materials: 8 entities

📊 Supply chain network stats:
  • Total nodes: 41
  • Total edges: 48

🎨 Creating supply chain visualization...


✅ Supply chain visualization created!
✅ Saved: AAPL_supply_chain_5y.html

⚠️ Supply Chain Risk Analysis:
  • Supplier Concentration Risk: LOW
    - Number of identified suppliers: 12
  • Geographic Risk: HIGH
    - High-risk regions mentioned: China
  • Supply Chain Sentiment: 0.171
    - Related news articles: 283
    - Status: ✅ POSITIVE sentiment

✅ Saved: AAPL_supply_chain_data_5y.json

📊 Supply Chain Summary:

Suppliers:
  • Corning
  • SK Hynix
  • Sharp
  • Murata
  • LG Display
  ... and 7 more

Customers:
  • Direct Retail
  • Apple Store
  • Online Store
  • Telecom Carriers
  • Enterprise Customers
  ... and 3 more

Partners:
  • Adobe
  • Salesforce
  • Cisco
  • IBM
  • SAP
  ... and 3 more

Logistics:
  • UPS
  • FedEx
  • DHL
  • Flexport

Raw Materials:
  • Rare Earth Elements
  • Aluminum
  • Glass
  • OLED Displays
  • Chips/Processors
  ... and 3 more


In [None]:
# ============================================================
# SECTION 13: Relationship Network Map (Data-Driven)
# ============================================================
print("\n🕸️ Building relationship network map from actual data...")

# Initialize relationship data structure
relationships = {
    'Indices': [],
    'Peers': [],
    'Holders': [],
    'Analysts': [],
    'Executives': [],
    'News_Topics': [],
    'Locations': [],
    'Events': []
}

# 1. Extract Organizations from NER (Peers & Analysts)
print("  • Extracting organizations from news...")
if entities['ORG']:
    exclude_terms = {TICKER, 'Apple', 'Inc', 'Corp', 'Ltd', 'LLC', 'Co', 'Company'}

    # Known analyst/financial institutions
    analyst_keywords = ['bank', 'securities', 'capital', 'asset', 'management', 'partners',
                       'goldman', 'morgan', 'jpmorgan', 'jp morgan', 'bofa', 'ubs', 'credit suisse',
                       'barclays', 'citi', 'deutsche', 'wells fargo', 'jefferies', 'piper', 'wedbush']

    # Known tech companies (peers)
    tech_keywords = ['microsoft', 'google', 'alphabet', 'amazon', 'meta', 'facebook',
                    'tesla', 'nvidia', 'intel', 'amd', 'qualcomm', 'samsung', 'tsmc']

    # Known holders
    holder_keywords = ['vanguard', 'blackrock', 'berkshire', 'state street', 'fidelity',
                      'geode', 'invesco', 'franklin', 'jpmorgan chase']

    for org, count in entities['ORG'].most_common(50):
        if org in exclude_terms or count < 2:
            continue

        org_lower = org.lower()

        # Categorize
        if any(keyword in org_lower for keyword in analyst_keywords):
            if len(relationships['Analysts']) < 10:
                relationships['Analysts'].append({'name': org, 'mentions': count})
        elif any(keyword in org_lower for keyword in holder_keywords):
            if len(relationships['Holders']) < 10:
                relationships['Holders'].append({'name': org, 'mentions': count})
        elif any(keyword in org_lower for keyword in tech_keywords):
            if len(relationships['Peers']) < 10:
                relationships['Peers'].append({'name': org, 'mentions': count})
        else:
            # Default to peers if tech-related
            if len(relationships['Peers']) < 15:
                relationships['Peers'].append({'name': org, 'mentions': count})

# 2. Extract Locations from NER
print("  • Extracting locations...")
if entities['GPE']:
    for loc, count in entities['GPE'].most_common(15):
        if count >= 2:
            relationships['Locations'].append({'name': loc, 'mentions': count})

# 3. Extract People (Executives)
print("  • Extracting executives and key people...")
if entities['PERSON']:
    # Filter out investors and keep likely executives
    exclude_people = ['buffett', 'munger', 'cathie wood', 'elon musk']
    for person, count in entities['PERSON'].most_common(15):
        person_lower = person.lower()
        if not any(excl in person_lower for excl in exclude_people) and count >= 2:
            relationships['Executives'].append({'name': person, 'mentions': count})

# 4. Extract News Topics from headlines (using topic modeling)
print("  • Analyzing headline topics...")
all_headlines_text = ' '.join(news_df['headline'].dropna().astype(str).str.lower())

# Common tech/finance terms to track
topic_keywords = {
    'iPhone': ['iphone'],
    'Services': ['service', 'services', 'subscription'],
    'Revenue': ['revenue', 'sales', 'earning'],
    'AI/ML': ['ai', 'artificial intelligence', 'machine learning'],
    'China': ['china', 'chinese'],
    'Supply Chain': ['supply', 'chip', 'semiconductor'],
    'Privacy': ['privacy', 'security', 'data'],
    'Lawsuit': ['lawsuit', 'legal', 'court'],
    'Competition': ['competition', 'antitrust', 'monopoly'],
    'Innovation': ['innovation', 'launch', 'new product']
}

for topic, keywords in topic_keywords.items():
    count = sum(all_headlines_text.count(kw) for kw in keywords)
    if count > 20:  # Significant mentions
        relationships['News_Topics'].append({'name': topic, 'mentions': count})

# Sort by mentions
for key in relationships:
    if relationships[key] and isinstance(relationships[key][0], dict):
        relationships[key] = sorted(relationships[key], key=lambda x: x['mentions'], reverse=True)

# 5. Extract Events from high-volume days
print("  • Identifying key events...")
high_volume_days = merged.nlargest(10, 'news_count')[['date', 'news_count', 'avg_polarity']].copy()
for _, row in high_volume_days.iterrows():
    date_str = row['date'].strftime('%Y-%m-%d')
    sentiment = 'Positive' if row['avg_polarity'] > 0.1 else 'Negative' if row['avg_polarity'] < -0.1 else 'Neutral'
    relationships['Events'].append({
        'name': f"{date_str} ({sentiment})",
        'mentions': int(row['news_count'])
    })

# 6. Add known indices (using Yahoo Finance API)
print("  • Identifying market indices...")
try:
    # Check which indices AAPL belongs to
    ticker_info = yf.Ticker(TICKER)
    sector = ticker_info.info.get('sector', 'Technology')
    industry = ticker_info.info.get('industry', 'Consumer Electronics')

    # Major indices for large tech stocks
    relationships['Indices'] = [
        {'name': 'S&P 500', 'mentions': 1},
        {'name': 'NASDAQ-100', 'mentions': 1},
        {'name': 'DJIA', 'mentions': 1},
        {'name': 'NASDAQ Composite', 'mentions': 1}
    ]
except:
    relationships['Indices'] = [
        {'name': 'S&P 500', 'mentions': 1},
        {'name': 'NASDAQ', 'mentions': 1}
    ]

print(f"\n✅ Extracted relationships from data:")
for category, items in relationships.items():
    if items:
        print(f"  • {category}: {len(items)} items")

# Create network graph using NetworkX
G = nx.Graph()

# Add center node
G.add_node(TICKER, node_type='center', size=100, color='#FF6B6B', mentions=len(news_df))

# Define category colors
category_colors = {
    'Indices': '#4ECDC4',
    'Peers': '#45B7D1',
    'Holders': '#96CEB4',
    'Analysts': '#FFEAA7',
    'Executives': '#DFE6E9',
    'News_Topics': '#74B9FF',
    'Locations': '#FD79A8',
    'Events': '#A29BFE'
}

# Build the network
edges = []
node_data = {}

for category, items in relationships.items():
    if not items:
        continue

    # Add category hub
    cat_node = category
    cat_mentions = sum(item.get('mentions', 0) for item in items)
    G.add_node(
        cat_node,
        node_type='category',
        size=min(80, 40 + len(items) * 5),  # Scale by number of items
        color=category_colors.get(category, '#95A5A6'),
        mentions=cat_mentions
    )
    G.add_edge(TICKER, cat_node, weight=min(10, len(items)))

    # Add items (limit to top items per category)
    max_items = 8
    for item in items[:max_items]:
        item_name = item['name'] if isinstance(item, dict) else item
        item_mentions = item.get('mentions', 1) if isinstance(item, dict) else 1

        item_node = f"{category}:{item_name}"
        G.add_node(
            item_node,
            node_type='item',
            size=max(20, min(50, 20 + item_mentions // 5)),  # Scale by mentions
            color=category_colors.get(category, '#95A5A6'),
            label=item_name,
            mentions=item_mentions
        )
        G.add_edge(cat_node, item_node, weight=max(1, item_mentions // 10))

print(f"\n📊 Network stats:")
print(f"  • Total nodes: {G.number_of_nodes()}")
print(f"  • Total edges: {G.number_of_edges()}")

# Create interactive visualization with Plotly
print("\n🎨 Creating interactive network visualization...")

# Calculate layout with better spacing
pos = nx.spring_layout(G, k=3, iterations=100, seed=42, scale=2)

# Prepare edge traces with varying thickness
edge_x = []
edge_y = []
edge_weights = []

for edge in G.edges(data=True):
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_x.extend([x0, x1, None])
    edge_y.extend([y0, y1, None])
    edge_weights.append(edge[2].get('weight', 1))

edge_trace = go.Scatter(
    x=edge_x,
    y=edge_y,
    line=dict(width=0.5, color='#888'),
    hoverinfo='none',
    mode='lines',
    showlegend=False
)

# Prepare node traces by type
node_traces_dict = {}

for node in G.nodes():
    node_data = G.nodes[node]
    node_type = node_data.get('node_type', 'item')

    if node_type not in node_traces_dict:
        node_traces_dict[node_type] = {
            'x': [], 'y': [], 'text': [], 'size': [], 'color': [], 'hovertext': []
        }

    x, y = pos[node]
    node_traces_dict[node_type]['x'].append(x)
    node_traces_dict[node_type]['y'].append(y)

    # Label
    if node_type == 'center':
        label = f"<b>{node}</b>"
        hover = f"<b>{node}</b><br>Total Articles: {node_data.get('mentions', 0):,}"
    elif node_type == 'category':
        label = f"<b>{node}</b>"
        hover = f"<b>{node}</b><br>Total Mentions: {node_data.get('mentions', 0):,}"
    else:
        label = node_data.get('label', node.split(':')[-1])
        hover = f"{label}<br>Mentions: {node_data.get('mentions', 0):,}"

    node_traces_dict[node_type]['text'].append(label)
    node_traces_dict[node_type]['size'].append(node_data.get('size', 20))
    node_traces_dict[node_type]['color'].append(node_data.get('color', '#95A5A6'))
    node_traces_dict[node_type]['hovertext'].append(hover)

# Create figure
fig_network = go.Figure()

# Add edges
fig_network.add_trace(edge_trace)

# Add nodes by type
trace_config = {
    'center': {'name': '🏢 Company', 'symbol': 'star'},
    'category': {'name': '📁 Category', 'symbol': 'square'},
    'item': {'name': '🔗 Entity', 'symbol': 'circle'}
}

for node_type, data in node_traces_dict.items():
    config = trace_config.get(node_type, {'name': 'Other', 'symbol': 'circle'})

    fig_network.add_trace(go.Scatter(
        x=data['x'],
        y=data['y'],
        mode='markers+text',
        name=config['name'],
        text=data['text'],
        textposition='top center',
        textfont=dict(size=9),
        marker=dict(
            size=data['size'],
            color=data['color'],
            symbol=config['symbol'],
            line=dict(width=2, color='white'),
            opacity=0.8
        ),
        hoverinfo='text',
        hovertext=data['hovertext']
    ))

# Update layout
fig_network.update_layout(
    title=dict(
        text=f"{TICKER} Relationship Network Map<br><sub>Based on {len(news_df):,} news articles</sub>",
        font=dict(size=24),
        x=0.5,
        xanchor='center'
    ),
    showlegend=True,
    legend=dict(
        x=1.02, y=1,
        bgcolor='rgba(255,255,255,0.9)',
        bordercolor='#ddd',
        borderwidth=1
    ),
    hovermode='closest',
    margin=dict(b=20, l=20, r=180, t=100),
    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
    plot_bgcolor='#f8f9fa',
    height=1000,
    width=1400,
    dragmode='pan'  # Enable panning
)

# Add zoom/pan controls
fig_network.update_xaxes(fixedrange=False)
fig_network.update_yaxes(fixedrange=False)

# Show the network
fig_network.show()

print("✅ Network visualization created!")

# Save network visualization
network_html = f"{TICKER}_relationship_network_{YEARS}y.html"
fig_network.write_html(network_html)
print(f"✅ Saved: {network_html}")

# Save network data
network_summary = {
    'ticker': TICKER,
    'analysis_period': f"{YEARS} years",
    'total_news_analyzed': len(news_df),
    'relationships': {
        category: [
            {'name': item['name'], 'mentions': item['mentions']}
            if isinstance(item, dict) else {'name': item, 'mentions': 1}
            for item in items
        ]
        for category, items in relationships.items() if items
    },
    'network_stats': {
        'total_nodes': G.number_of_nodes(),
        'total_edges': G.number_of_edges(),
        'categories': list(relationships.keys())
    }
}

network_json = f"{TICKER}_network_data_{YEARS}y.json"
with open(network_json, 'w') as f:
    json.dump(network_summary, f, indent=2)
print(f"✅ Saved: {network_json}")

print("\n📊 Top Relationships by Category:")
for category, items in relationships.items():
    if items:
        print(f"\n{category}:")
        display_items = items[:5] if len(items) > 5 else items
        for item in display_items:
            if isinstance(item, dict):
                print(f"  • {item['name']} ({item['mentions']} mentions)")
            else:
                print(f"  • {item}")
        if len(items) > 5:
            print(f"  ... and {len(items)-5} more")


🕸️ Building relationship network map from actual data...
  • Extracting organizations from news...
  • Extracting locations...
  • Extracting executives and key people...
  • Analyzing headline topics...
  • Identifying key events...
  • Identifying market indices...

✅ Extracted relationships from data:
  • Indices: 4 items
  • Peers: 5 items
  • News_Topics: 10 items
  • Locations: 3 items
  • Events: 10 items

📊 Network stats:
  • Total nodes: 34
  • Total edges: 33

🎨 Creating interactive network visualization...


✅ Network visualization created!
✅ Saved: AAPL_relationship_network_5y.html
✅ Saved: AAPL_network_data_5y.json

📊 Top Relationships by Category:

Indices:
  • S&P 500 (1 mentions)
  • NASDAQ-100 (1 mentions)
  • DJIA (1 mentions)
  • NASDAQ Composite (1 mentions)

Peers:
  • BAC (3 mentions)
  • Nasdaq (2 mentions)
  • Buy Now and Hold Forever (2 mentions)
  • META (2 mentions)
  • Fed (2 mentions)

News_Topics:
  • AI/ML (1379 mentions)
  • Revenue (683 mentions)
  • Privacy (328 mentions)
  • Supply Chain (266 mentions)
  • China (261 mentions)
  ... and 5 more

Locations:
  • US (15 mentions)
  • China (3 mentions)
  • India (2 mentions)

Events:
  • 2023-09-13 (Positive) (56 mentions)
  • 2023-08-04 (Positive) (54 mentions)
  • 2023-09-07 (Negative) (54 mentions)
  • 2023-09-12 (Neutral) (54 mentions)
  • 2023-05-05 (Positive) (50 mentions)
  ... and 5 more


In [None]:
# ============================================================
# SECTION 13: Enhanced Relationship Network Map (Bloomberg-Style)
# ============================================================
print("\n🕸 Building relationship network map (Bloomberg-style)...")

# Initialize relationship data structure
relationships = {
    'Indices': [],
    'Peers': [],
    'Holders': [],
    'Analysts': [],
    'Executives': [],
    'News_Topics': [],
    'Locations': [],
    'Events': []
}

# 1. Extract from NER entities
if entities['ORG']:
    exclude_terms = {TICKER, 'Apple', 'Inc', 'Corp', 'Ltd', 'LLC', 'Co'}
    relationships['Peers'] = [
        org for org, count in entities['ORG'].most_common(15)
        if org not in exclude_terms and count > 1
    ][:10]

if entities['GPE']:
    relationships['Locations'] = [
        loc for loc, count in entities['GPE'].most_common(10)
    ][:8]

if entities['PERSON']:
    relationships['Executives'] = [
        person for person, count in entities['PERSON'].most_common(8)
        if 'Buffett' not in person
    ][:6]

# 2. Extract topics from headlines
print(" • Analyzing headline topics...")
all_headlines = ' '.join(news_df['headline'].dropna().astype(str).str.lower())
common_words = [
    'iphone', 'ipad', 'mac', 'services', 'revenue', 'earnings',
    'profit', 'growth', 'market', 'sales', 'china', 'ai', 'chip',
    'supply', 'demand', 'innovation', 'launch', 'product'
]
relationships['News_Topics'] = [
    word.upper() for word in common_words
    if all_headlines.count(word) > 20
][:8]

# 3. Add known indices
relationships['Indices'] = ['S&P 500', 'NASDAQ-100', 'DOW 30', 'NASDAQ Composite']

# 4. Add major analysts
relationships['Analysts'] = [
    'Goldman Sachs', 'Morgan Stanley', 'JP Morgan',
    'Bank of America', 'Wedbush', 'Piper Sandler'
]

# 5. Add major holders
relationships['Holders'] = [
    'Vanguard', 'BlackRock', 'Berkshire Hathaway',
    'State Street', 'Fidelity', 'Geode Capital'
]

# 6. Extract events
high_impact_dates = merged.nlargest(5, 'news_count')['date'].dt.strftime('%Y-%m-%d').tolist()
relationships['Events'] = [f"High Volume: {d}" for d in high_impact_dates]

print(f"✅ Extracted relationships:")
for category, items in relationships.items():
    if items:
        print(f"  • {category}: {len(items)} items")

# Create network graph
G = nx.Graph()
G.add_node(TICKER, node_type='center', size=120, color='#FF6B6B')

# Define colors matching Bloomberg style
category_config = {
    'Indices': {'color': '#00C9A7', 'emoji': '📊', 'angle_range': (0, 60)},
    'Peers': {'color': '#4ECDC4', 'emoji': '🏢', 'angle_range': (60, 120)},
    'Holders': {'color': '#FFD93D', 'emoji': '💼', 'angle_range': (120, 180)},
    'Analysts': {'color': '#FFA07A', 'emoji': '📈', 'angle_range': (180, 240)},
    'Executives': {'color': '#B19CD9', 'emoji': '👤', 'angle_range': (240, 270)},
    'News_Topics': {'color': '#FF6B9D', 'emoji': '📰', 'angle_range': (270, 300)},
    'Locations': {'color': '#74B9FF', 'emoji': '🌍', 'angle_range': (300, 330)},
    'Events': {'color': '#A29BFE', 'emoji': '📅', 'angle_range': (330, 360)}
}

# Build network with weighted edges
for category, items in relationships.items():
    if not items:
        continue

    config = category_config.get(category, {'color': '#95A5A6', 'emoji': '•', 'angle_range': (0, 360)})

    for item in items[:10]:
        node_id = f"{category}:{item}"
        G.add_node(
            node_id,
            node_type='entity',
            size=35,
            color=config['color'],
            label=item,
            category=category
        )
        # Weighted edge (thicker = more important)
        weight = 3 if category in ['Indices', 'Holders'] else 1.5
        G.add_edge(TICKER, node_id, weight=weight)

print(f"\n📊 Network stats:")
print(f"  • Total nodes: {G.number_of_nodes()}")
print(f"  • Total edges: {G.number_of_edges()}")

# Create circular layout with angle distribution
print("\n🎨 Creating Bloomberg-style network visualization...")

def create_circular_layout(G, category_config):
    """Create circular layout with angle-based positioning"""
    pos = {}
    pos[TICKER] = (0, 0)  # Center

    for node in G.nodes():
        if node == TICKER:
            continue

        node_data = G.nodes[node]
        category = node_data.get('category', 'Other')
        config = category_config.get(category, {'angle_range': (0, 360)})

        # Get category nodes
        category_nodes = [n for n in G.nodes()
                         if G.nodes[n].get('category') == category and n != TICKER]

        # Calculate angle
        angle_start, angle_end = config['angle_range']
        angle_span = angle_end - angle_start

        if len(category_nodes) > 1:
            idx = category_nodes.index(node)
            angle = angle_start + (angle_span * idx / (len(category_nodes) - 1))
        else:
            angle = (angle_start + angle_end) / 2

        # Calculate position
        radius = 3.5  # Fixed radius
        x = radius * np.cos(np.radians(angle))
        y = radius * np.sin(np.radians(angle))
        pos[node] = (x, y)

    return pos

pos = create_circular_layout(G, category_config)

# Create Plotly figure
fig_network = go.Figure()

# Add edges with varying thickness
for edge in G.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    weight = G.edges[edge].get('weight', 1)

    fig_network.add_trace(go.Scatter(
        x=[x0, x1, None],
        y=[y0, y1, None],
        mode='lines',
        line=dict(width=weight, color='rgba(150,150,150,0.3)'),
        hoverinfo='none',
        showlegend=False
    ))

# Add nodes by category
for category, config in category_config.items():
    category_nodes = [n for n in G.nodes()
                     if G.nodes[n].get('category') == category]

    if not category_nodes:
        continue

    x_coords = [pos[n][0] for n in category_nodes]
    y_coords = [pos[n][1] for n in category_nodes]
    labels = [G.nodes[n].get('label', n.split(':')[-1]) for n in category_nodes]

    fig_network.add_trace(go.Scatter(
        x=x_coords,
        y=y_coords,
        mode='markers+text',
        name=f"{config['emoji']} {category}",
        text=labels,
        textposition='top center',
        textfont=dict(size=9, color='#2C3E50'),
        marker=dict(
            size=35,
            color=config['color'],
            line=dict(width=2, color='white'),
            opacity=0.9
        ),
        hovertemplate='<b>%{text}</b><br>Category: ' + category + '<extra></extra>'
    ))

# Add center node with special styling
fig_network.add_trace(go.Scatter(
    x=[0],
    y=[0],
    mode='markers+text',
    name='🎯 Company',
    text=[f'<b>{TICKER}</b>'],
    textposition='middle center',
    textfont=dict(size=18, color='white', family='Arial Black'),
    marker=dict(
        size=120,
        color='#FF6B6B',
        line=dict(width=4, color='white'),
        opacity=1
    ),
    hovertemplate=f'<b>{TICKER}</b><br>Central Entity<extra></extra>',
    showlegend=True
))

# Add sector labels (category names)
for category, config in category_config.items():
    angle_start, angle_end = config['angle_range']
    angle_mid = (angle_start + angle_end) / 2
    label_radius = 4.3

    x = label_radius * np.cos(np.radians(angle_mid))
    y = label_radius * np.sin(np.radians(angle_mid))

    fig_network.add_annotation(
        x=x, y=y,
        text=f"<b>{config['emoji']} {category}</b>",
        showarrow=False,
        font=dict(size=11, color=config['color'], family='Arial'),
        bgcolor='rgba(255,255,255,0.8)',
        bordercolor=config['color'],
        borderwidth=2,
        borderpad=4
    )

# Update layout
fig_network.update_layout(
    title=dict(
        text=f"{TICKER} Relationship Network Map<br><sub>Bloomberg-Style Visualization</sub>",
        font=dict(size=24, color='#2C3E50'),
        x=0.5,
        xanchor='center'
    ),
    showlegend=True,
    legend=dict(
        x=1.02,
        y=1,
        bgcolor='rgba(255,255,255,0.95)',
        bordercolor='#BDC3C7',
        borderwidth=2,
        font=dict(size=10)
    ),
    hovermode='closest',
    plot_bgcolor='#F8F9FA',
    paper_bgcolor='white',
    xaxis=dict(
        showgrid=False,
        zeroline=False,
        showticklabels=False,
        range=[-5, 5]
    ),
    yaxis=dict(
        showgrid=False,
        zeroline=False,
        showticklabels=False,
        range=[-5, 5],
        scaleanchor='x',
        scaleratio=1
    ),
    height=900,
    width=1200,
    margin=dict(l=50, r=200, t=100, b=50)
)

fig_network.show()
print("✅ Relationship network visualization created!")

# Save files
network_html = f"{TICKER}_relationship_network_{YEARS}y.html"
fig_network.write_html(network_html)
print(f"✅ Saved: {network_html}")


🕸 Building relationship network map (Bloomberg-style)...
 • Analyzing headline topics...
✅ Extracted relationships:
  • Indices: 4 items
  • Peers: 5 items
  • Holders: 6 items
  • Analysts: 6 items
  • Executives: 5 items
  • News_Topics: 8 items
  • Locations: 8 items
  • Events: 5 items

📊 Network stats:
  • Total nodes: 48
  • Total edges: 47

🎨 Creating Bloomberg-style network visualization...


✅ Relationship network visualization created!
✅ Saved: AAPL_relationship_network_5y.html


In [None]:
# ============================================================
# SECTION 14: Enhanced Supply Chain Analysis
# ============================================================
print("\n🔗 Building enhanced supply chain network...")

# Supply chain data
supply_chain = {
    'Raw_Materials': ['Rare Earth Elements', 'Aluminum', 'Glass', 'OLED Displays',
                      'Chips/Processors', 'Batteries', 'Cameras', 'Memory'],
    'Tier_2_Suppliers': ['TSMC', 'Samsung', 'SK Hynix', 'LG Display', 'Sharp'],
    'Tier_1_Suppliers': ['Foxconn', 'Broadcom', 'Qualcomm', 'Intel', 'Sony'],
    'Manufacturing': ['Assembly Plants', 'Quality Control', 'Testing Facilities'],
    'Logistics': ['UPS', 'FedEx', 'DHL', 'Flexport'],
    'Distribution': ['Apple Stores', 'Online Store', 'Authorized Resellers'],
    'Customers': ['Direct Consumers', 'Enterprise', 'Education', 'Government'],
    'Partners': ['IBM', 'Cisco', 'SAP', 'Microsoft', 'Google']
}

print(f"✅ Supply chain entities identified:")
for category, items in supply_chain.items():
    print(f"  • {category}: {len(items)} entities")

# Create directed graph for supply chain
G_supply = nx.DiGraph()
G_supply.add_node(TICKER, node_type='center', layer=3, size=100)

# Define layers and colors (left to right flow)
layer_config = {
    'Raw_Materials': {'layer': 0, 'color': '#A29BFE', 'symbol': 'circle'},
    'Tier_2_Suppliers': {'layer': 1, 'color': '#74B9FF', 'symbol': 'square'},
    'Tier_1_Suppliers': {'layer': 2, 'color': '#4ECDC4', 'symbol': 'square'},
    'Manufacturing': {'layer': 3, 'color': '#00D2FF', 'symbol': 'diamond'},
    'Logistics': {'layer': 4, 'color': '#FFEAA7', 'symbol': 'triangle-up'},
    'Distribution': {'layer': 5, 'color': '#55EFC4', 'symbol': 'hexagon'},
    'Customers': {'layer': 6, 'color': '#81C784', 'symbol': 'star'},
    'Partners': {'layer': 3.5, 'color': '#FFD93D', 'symbol': 'cross'}
}

# Build supply chain network
for category, items in supply_chain.items():
    config = layer_config.get(category, {'layer': 3, 'color': '#95A5A6'})

    for item in items[:8]:
        node_id = f"{category}:{item}"
        G_supply.add_node(
            node_id,
            node_type='entity',
            layer=config['layer'],
            size=30,
            color=config['color'],
            label=item,
            category=category
        )

        # Add directed edges based on supply chain flow
        if config['layer'] < 3:
            # Upstream (suppliers → AAPL)
            G_supply.add_edge(node_id, TICKER, weight=2)
        elif config['layer'] > 3:
            # Downstream (AAPL → customers)
            G_supply.add_edge(TICKER, node_id, weight=2)
        else:
            # Partners (bidirectional)
            G_supply.add_edge(TICKER, node_id, weight=1)
            G_supply.add_edge(node_id, TICKER, weight=1)

print(f"\n📊 Supply chain network stats:")
print(f"  • Total nodes: {G_supply.number_of_nodes()}")
print(f"  • Total edges: {G_supply.number_of_edges()}")

# Create hierarchical layout
print("\n🎨 Creating supply chain flow visualization...")

def create_supply_chain_layout(G, layer_config):
    """Create left-to-right hierarchical layout"""
    pos = {}

    # Group nodes by layer
    layers = {}
    for node in G.nodes():
        layer = G.nodes[node].get('layer', 3)
        if layer not in layers:
            layers[layer] = []
        layers[layer].append(node)

    # Position nodes
    x_spacing = 2.5
    for layer_num, nodes in sorted(layers.items()):
        n_nodes = len(nodes)
        y_spacing = 1.2
        total_height = (n_nodes - 1) * y_spacing

        for i, node in enumerate(nodes):
            x = (layer_num - 3) * x_spacing  # Center at layer 3 (AAPL)
            y = -total_height/2 + i * y_spacing
            pos[node] = (x, y)

    return pos

pos_supply = create_supply_chain_layout(G_supply, layer_config)

# Create figure
fig_supply = go.Figure()

# Add edges with arrows
for edge in G_supply.edges():
    x0, y0 = pos_supply[edge[0]]
    x1, y1 = pos_supply[edge[1]]

    # Calculate arrow
    dx = x1 - x0
    dy = y1 - y0

    # Edge line
    fig_supply.add_trace(go.Scatter(
        x=[x0, x1],
        y=[y0, y1],
        mode='lines',
        line=dict(width=1.5, color='rgba(100,100,100,0.3)'),
        hoverinfo='none',
        showlegend=False
    ))

    # Arrow head
    arrow_x = x0 + 0.85 * dx
    arrow_y = y0 + 0.85 * dy
    angle = np.degrees(np.arctan2(dy, dx))

    fig_supply.add_annotation(
        x=arrow_x, y=arrow_y,
        ax=x0 + 0.75 * dx, ay=y0 + 0.75 * dy,
        xref='x', yref='y',
        axref='x', ayref='y',
        showarrow=True,
        arrowhead=2,
        arrowsize=1,
        arrowwidth=2,
        arrowcolor='rgba(100,100,100,0.5)'
    )

# Add nodes by category
for category, config in layer_config.items():
    category_nodes = [n for n in G_supply.nodes()
                     if G_supply.nodes[n].get('category') == category]

    if not category_nodes:
        continue

    x_coords = [pos_supply[n][0] for n in category_nodes]
    y_coords = [pos_supply[n][1] for n in category_nodes]
    labels = [G_supply.nodes[n].get('label', n.split(':')[-1]) for n in category_nodes]

    fig_supply.add_trace(go.Scatter(
        x=x_coords,
        y=y_coords,
        mode='markers+text',
        name=category.replace('_', ' '),
        text=labels,
        textposition='middle center',
        textfont=dict(size=8, color='white'),
        marker=dict(
            size=40,
            color=config['color'],
            symbol=config.get('symbol', 'circle'),
            line=dict(width=2, color='white'),
            opacity=0.9
        ),
        hovertemplate='<b>%{text}</b><br>' + category + '<extra></extra>'
    ))

# Add center node (AAPL)
fig_supply.add_trace(go.Scatter(
    x=[0],
    y=[0],
    mode='markers+text',
    name='Company',
    text=[f'<b>{TICKER}</b>'],
    textposition='middle center',
    textfont=dict(size=16, color='white', family='Arial Black'),
    marker=dict(
        size=100,
        color='#FF6B6B',
        symbol='circle',
        line=dict(width=4, color='white')
    ),
    hovertemplate=f'<b>{TICKER}</b><br>Supply Chain Hub<extra></extra>'
))

# Add flow labels
flow_annotations = [
    {'x': -6, 'y': 5, 'text': '⬅ <b>UPSTREAM</b><br>Suppliers & Materials', 'color': '#74B9FF'},
    {'x': 6, 'y': 5, 'text': '<b>DOWNSTREAM</b> ➡<br>Distribution & Customers', 'color': '#81C784'},
    {'x': 0, 'y': -6, 'text': '🤝 <b>PARTNERSHIPS</b><br>Strategic Collaborations', 'color': '#FFD93D'}
]

for annotation in flow_annotations:
    fig_supply.add_annotation(
        x=annotation['x'],
        y=annotation['y'],
        text=annotation['text'],
        showarrow=False,
        font=dict(size=12, color=annotation['color'], family='Arial'),
        bgcolor='rgba(255,255,255,0.9)',
        bordercolor=annotation['color'],
        borderwidth=2,
        borderpad=8
    )

# Update layout
fig_supply.update_layout(
    title=dict(
        text=f"{TICKER} Supply Chain Flow Analysis<br><sub>Hierarchical Value Chain Visualization</sub>",
        font=dict(size=24, color='#2C3E50'),
        x=0.5,
        xanchor='center'
    ),
    showlegend=True,
    legend=dict(
        x=1.02,
        y=1,
        bgcolor='rgba(255,255,255,0.95)',
        bordercolor='#BDC3C7',
        borderwidth=2,
        font=dict(size=9)
    ),
    hovermode='closest',
    plot_bgcolor='#F8F9FA',
    paper_bgcolor='white',
    xaxis=dict(
        showgrid=True,
        gridwidth=1,
        gridcolor='rgba(200,200,200,0.2)',
        zeroline=True,
        zerolinewidth=2,
        zerolinecolor='rgba(255,107,107,0.3)',
        showticklabels=False,
        range=[-8, 8]
    ),
    yaxis=dict(
        showgrid=True,
        gridwidth=1,
        gridcolor='rgba(200,200,200,0.2)',
        zeroline=True,
        zerolinewidth=2,
        zerolinecolor='rgba(255,107,107,0.3)',
        showticklabels=False,
        range=[-7, 6],
        scaleanchor='x',
        scaleratio=1
    ),
    height=1000,
    width=1600,
    margin=dict(l=50, r=200, t=100, b=50)
)

fig_supply.show()
print("✅ Supply chain visualization created!")

# Save files
supply_html = f"{TICKER}_supply_chain_{YEARS}y.html"
fig_supply.write_html(supply_html)
print(f"✅ Saved: {supply_html}")

# Supply chain risk analysis
print("\n⚠ Supply Chain Risk Analysis:")

supplier_concentration = len(supply_chain.get('Tier_1_Suppliers', []))
risk_level = 'HIGH' if supplier_concentration < 5 else 'MEDIUM' if supplier_concentration < 10 else 'LOW'

print(f"  • Supplier Concentration Risk: {risk_level}")
print(f"    - Number of Tier-1 suppliers: {supplier_concentration}")

# Check for geographic risk
high_risk_locations = ['China', 'Taiwan']
risky_locations = [loc for loc in relationships.get('Locations', [])
                   if any(risk in loc for risk in high_risk_locations)]

if risky_locations:
    print(f"  • Geographic Risk: HIGH")
    print(f"    - High-risk regions: {', '.join(risky_locations)}")
else:
    print(f"  • Geographic Risk: MODERATE")

# Sentiment analysis on supply chain
supply_keywords = ['supply', 'chip', 'shortage', 'supplier', 'component']
supply_news = news_df[
    news_df['headline'].str.lower().str.contains('|'.join(supply_keywords), na=False)
]

if len(supply_news) > 0:
    avg_sentiment = supply_news['polarity'].mean()
    print(f"  • Supply Chain Sentiment: {avg_sentiment:.3f}")
    print(f"    - Related articles: {len(supply_news)}")

    if avg_sentiment < -0.1:
        print(f"    - Status: ⚠ NEGATIVE")
    elif avg_sentiment > 0.1:
        print(f"    - Status: ✅ POSITIVE")
    else:
        print(f"    - Status: ⚖ NEUTRAL")

print("\n✅ Enhanced visualizations complete!")
print(f"\n📁 Generated files:")
print(f"  • {network_html}")
print(f"  • {supply_html}")


🔗 Building enhanced supply chain network...
✅ Supply chain entities identified:
  • Raw_Materials: 8 entities
  • Tier_2_Suppliers: 5 entities
  • Tier_1_Suppliers: 5 entities
  • Manufacturing: 3 entities
  • Logistics: 4 entities
  • Distribution: 3 entities
  • Customers: 4 entities
  • Partners: 5 entities

📊 Supply chain network stats:
  • Total nodes: 38
  • Total edges: 40

🎨 Creating supply chain flow visualization...


✅ Supply chain visualization created!
✅ Saved: AAPL_supply_chain_5y.html

⚠ Supply Chain Risk Analysis:
  • Supplier Concentration Risk: MEDIUM
    - Number of Tier-1 suppliers: 5
  • Geographic Risk: HIGH
    - High-risk regions: China
  • Supply Chain Sentiment: 0.171
    - Related articles: 283
    - Status: ✅ POSITIVE

✅ Enhanced visualizations complete!

📁 Generated files:
  • AAPL_relationship_network_5y.html
  • AAPL_supply_chain_5y.html


In [None]:
# ============================================================
# SECTION 11: Yearly Price Movement Causes Analysis
# ============================================================
print("\n📈 Analyzing Yearly Price Movements and Causes...")

# Add year to merged and news_df
merged['year'] = merged['date'].dt.year
merged['month'] = merged['date'].dt.month
merged['quarter'] = merged['date'].dt.quarter
merged['day_of_year'] = merged['date'].dt.dayofyear
news_df['year'] = news_df['date'].dt.year

# Define price movement thresholds
UP_THRESHOLD = 0.02  # >2% up
DOWN_THRESHOLD = -0.02  # <-2% down

# Function to categorize news types (simple keyword-based for grouping similar news)
def categorize_news(headline):
    """Categorize news into types based on keywords"""
    headline_lower = str(headline).lower()
    if any(word in headline_lower for word in ['earnings', 'profit', 'revenue', 'quarterly', 'financial']):
        return 'Earnings/Financials'
    elif any(word in headline_lower for word in ['product', 'launch', 'iphone', 'device', 'software', 'update', 'feature']):
        return 'Product Launches'
    elif any(word in headline_lower for word in ['lawsuit', 'regulation', 'antitrust', 'probe', 'investigation', 'legal']):
        return 'Legal/Regulatory'
    elif any(word in headline_lower for word in ['acquisition', 'merger', 'deal', 'partnership', 'investment']):
        return 'M&A/Partnerships'
    elif any(word in headline_lower for word in ['supply', 'chain', 'chip', 'shortage', 'production', 'manufacturing']):
        return 'Supply Chain'
    elif any(word in headline_lower for word in ['market', 'economy', 'recession', 'inflation', 'interest']):
        return 'Macro/Economic'
    else:
        return 'Other/Misc'

# Apply news categorization
news_df['news_type'] = news_df['headline'].apply(categorize_news)

# Group news by date and type for aggregation (count similar types)
daily_news_types = news_df.groupby(['date', 'news_type']).size().unstack(fill_value=0)
daily_news_types['total_news'] = daily_news_types.sum(axis=1)
daily_news_types['date'] = daily_news_types.index
daily_news_types = daily_news_types.reset_index(drop=True)

# Merge news types counts with merged df
for col in daily_news_types.columns:
    if col not in ['date', 'total_news']:
        merged[col] = daily_news_types.set_index('date')[col].reindex(merged.set_index('date').index).fillna(0).values
merged['total_news'] = daily_news_types.set_index('date')['total_news'].reindex(merged.set_index('date').index).fillna(0).values

# Function to find significant movements and linked causes
def find_movement_causes(df, news_df):
    """Find news causes for significant up/down days, grouped by type"""
    causes = {'UP': [], 'DOWN': []}
    for idx, row in df.iterrows():
        date = row['date']
        year = row['year']
        ret = row['Returns']
        # Look for news in the past 3 days (including the day)
        window_start = date - pd.Timedelta(days=3)
        window_news = news_df[(news_df['date'] >= window_start) & (news_df['date'] <= date)]
        if len(window_news) == 0:
            continue
        if ret > UP_THRESHOLD:
            # Top positive polarity news in window
            top_news = window_news.nlargest(3, 'polarity')
            headlines = '; '.join([str(h)[:80] for h in top_news['headline']])
            dominant_type = top_news['news_type'].mode().iloc[0] if len(top_news['news_type'].mode()) > 0 else 'Other/Misc'
            causes['UP'].append({
                'date': date.strftime('%Y-%m-%d'),
                'year': year,
                'headlines': headlines,
                'cause_type': dominant_type,
                'return_pct': f"+{ret:.2f}%",
                'news_count': len(window_news)
            })
        elif ret < DOWN_THRESHOLD:
            # Top negative polarity news in window
            top_news = window_news.nsmallest(3, 'polarity')
            headlines = '; '.join([str(h)[:80] for h in top_news['headline']])
            dominant_type = top_news['news_type'].mode().iloc[0] if len(top_news['news_type'].mode()) > 0 else 'Other/Misc'
            causes['DOWN'].append({
                'date': date.strftime('%Y-%m-%d'),
                'year': year,
                'headlines': headlines,
                'cause_type': dominant_type,
                'return_pct': f"{ret:.2f}%",
                'news_count': len(window_news)
            })
    return causes

# Find causes
movement_causes = find_movement_causes(merged, news_df)

# Print detailed causes by year and type (grouping similar news types)
print("\n📋 Detailed Causes of Significant Price Movements:")
print("   (Showing top 3 examples per type/year for brevity)")
for direction in ['UP', 'DOWN']:
    print(f"\n{direction} Movements (Stock {'Up' if direction=='UP' else 'Down'}):")
    yearly_types = {}
    for event in movement_causes[direction]:
        year = event['year']
        typ = event['cause_type']
        if year not in yearly_types:
            yearly_types[year] = {}
        if typ not in yearly_types[year]:
            yearly_types[year][typ] = []
        desc = f"{event['date']} ({event['return_pct']}): {event['headlines']}... [Type: {typ}, News: {event['news_count']}]"
        yearly_types[year][typ].append(desc)

    for year in sorted(yearly_types.keys()):
        print(f"\n  📅 Year {year}:")
        for typ in sorted(yearly_types[year].keys()):
            print(f"    📰 {typ}:")
            for item in yearly_types[year][typ][:3]:  # Top 3 per type
                print(f"      • {item}")

# Seasonal pattern analysis (do patterns repeat? e.g., Q1 up, Q2 sideways)
print("\n🔄 Seasonal Patterns Across Years:")
print("   (Avg Daily Return % by Quarter - Positive=Up Trend, Negative=Down, Near 0=Sideways)")

seasonal_patterns = merged.groupby(['year', 'quarter']).agg({
    'Returns': ['mean', 'sum', 'count', 'std']
}).round(3)
seasonal_patterns.columns = ['Avg_Return_%', 'Total_Return_%', 'Trading_Days', 'Volatility_%']
print(seasonal_patterns)

# Interpretation
print("\n📊 Pattern Insights:")
q_means = seasonal_patterns.groupby('quarter')['Avg_Return_%'].mean().round(3)
for q in range(1,5):
    trend = "Up Trend" if q_means[q] > 0.1 else ("Down Trend" if q_means[q] < -0.1 else "Sideways")
    print(f"  • Quarter {q}: Avg {q_means[q]:.3f}% → {trend} (consistent across years?)")


📈 Analyzing Yearly Price Movements and Causes...

📋 Detailed Causes of Significant Price Movements:
   (Showing top 3 examples per type/year for brevity)

UP Movements (Stock Up):

  📅 Year 2022:
    📰 Earnings/Financials:
      • 2022-07-05 (+1.89%): 5-Star Analyst Lays Out the Bullish Case for Apple Stock; Apple (AAPL) Outpaces Stock Market Gains: What You Should Know; Will Apple TV+ Ever Become Profitable?... [Type: Earnings/Financials, News: 33]
      • 2022-07-12 (+0.68%): The Trade Desk's Deal With Disney May Boost Both Stocks; TSM Q2 Preview: Double-Digit Earnings Growth in Store?; 'Succession,' 'Ted Lasso' lead nominations for TV's Emmy awards... [Type: Earnings/Financials, News: 38]
      • 2022-07-14 (+2.05%): Chipmaker TSMC's shares jump after quarterly profit beats estimates; Chipmaker TSMC's shares jump after quarterly profit beats estimates; Buy Microsoft Stock Before the Price Rises... [Type: Earnings/Financials, News: 75]
    📰 Legal/Regulatory:
      • 2022-09-14 (+0.

In [None]:
# ============================================================
# SECTION 12: BEAUTIFUL & PROFESSIONAL VISUALIZATIONS (Enhanced)
# ============================================================
print("\nCreating Beautiful, Professional-Grade Interactive Dashboards...")

# Set a professional theme
import plotly.io as pio
pio.templates.default = "plotly_white"

# Color palette
PALETTE = {
    'up': '#00C853',      # Vibrant Green
    'down': '#D50000',    # Deep Red
    'neutral': '#FFB300', # Amber
    'background': '#FAFAFA',
    'text': '#212121',
    'grid': '#E0E0E0',
    'accent': '#2962FF'
}

# Font settings
FONT = dict(family="Segoe UI, Arial, sans-serif", size=12, color=PALETTE['text'])

# === GRAPH 1: Quarterly Returns Heatmap - Pattern Repetition ===
print("   [Graph 1] Quarterly Returns Pattern Heatmap")

# Prepare data
quarterly = merged.groupby(['year', 'quarter']).agg({
    'Returns': 'mean'
}).reset_index()
quarterly_pivot = quarterly.pivot(index='year', columns='quarter', values='Returns')

fig1 = go.Figure(data=go.Heatmap(
    z=quarterly_pivot.values,
    x=[f'Q{q}' for q in quarterly_pivot.columns],
    y=quarterly_pivot.index.astype(str),
    colorscale=[
        [0.0, PALETTE['down']],
        [0.4, '#FF8A80'],
        [0.5, '#FFF3E0'],
        [0.6, '#C8E6C9'],
        [1.0, PALETTE['up']]
    ],
    zmid=0,
    text=np.round(quarterly_pivot.values * 100, 2),
    texttemplate="%{text}%",
    textfont=dict(size=11, color='white'),
    hoverongaps=False,
    colorbar=dict(title="Avg Return %", titleside="right")
))

fig1.update_layout(
    title={
        'text': f"<b>{TICKER} Quarterly Return Patterns</b><br><sub>Do trends repeat every year? (Green=Up, Red=Down)</sub>",
        'x': 0.5, 'xanchor': 'center', 'font': dict(size=16, family="Segoe UI")
    },
    xaxis_title="<b>Quarter</b>",
    yaxis_title="<b>Year</b>",
    plot_bgcolor=PALETTE['background'],
    paper_bgcolor=PALETTE['background'],
    font=FONT,
    height=500,
    margin=dict(l=60, r=60, t=100, b=60)
)

# Add annotations for strong patterns
q_means = quarterly.groupby('quarter')['Returns'].mean()
for q in range(1,5):
    avg = q_means[q] * 100
    trend = "Strong Up" if avg > 0.3 else ("Up" if avg > 0.1 else ("Down" if avg < -0.1 else "Sideways"))
    color = PALETTE['up'] if avg > 0 else (PALETTE['down'] if avg < 0 else PALETTE['neutral'])
    fig1.add_annotation(
        x=f'Q{q}', y=quarterly_pivot.index[-1],
        text=f"{trend}",
        showarrow=False,
        font=dict(color=color, size=10, family="Arial Black"),
        xanchor='center', yanchor='top', yshift=-5
    )

fig1.show()

# === GRAPH 2: Yearly Price & News Volume with Cause Highlights ===
print("   [Graph 2] Yearly Price Movement with News Causes")

fig2 = make_subplots(
    rows=3, cols=1,
    subplot_titles=(
        f"<b>{TICKER} Yearly Total Return</b>",
        "<b>News Volume by Type</b>",
        "<b>Significant Price Events (Bubble = News Impact)</b>"
    ),
    vertical_spacing=0.08,
    row_heights=[0.35, 0.35, 0.3],
    shared_xaxes=True
)

# 1. Yearly Returns
yearly_ret = merged.groupby('year')['Returns'].sum().round(2)
fig2.add_trace(go.Bar(
    x=yearly_ret.index,
    y=yearly_ret.values,
    name="Total Return %",
    marker_color=[PALETTE['up'] if x > 0 else PALETTE['down'] for x in yearly_ret.values],
    text=yearly_ret.values,
    textposition='outside',
    texttemplate="%{text}%"
), row=1, col=1)

# 2. News Volume by Type
news_by_type_year = news_df.groupby(['year', 'news_type']).size().unstack(fill_value=0)
colors = px.colors.qualitative.Set3[:len(news_by_type_year.columns)]
for i, typ in enumerate(news_by_type_year.columns):
    fig2.add_trace(go.Bar(
        x=news_by_type_year.index,
        y=news_by_type_year[typ],
        name=typ,
        marker_color=colors[i]
    ), row=2, col=1)

# 3. Bubble Chart: Up/Down Events
up_df = pd.DataFrame(movement_causes['UP'])
down_df = pd.DataFrame(movement_causes['DOWN'])

if not up_df.empty:
    fig2.add_trace(go.Scatter(
        x=up_df['year'] + np.random.normal(0, 0.08, len(up_df)),
        y=up_df['return_pct'].str.extract(r'([+-]?\d+\.?\d*)').astype(float),
        mode='markers+text',
        name='Price Up',
        marker=dict(
            size=up_df['news_count'],
            sizemode='area',
            sizeref=2.*max(up_df['news_count'])/(40.**2),
            color=PALETTE['up'],
            opacity=0.8,
            line=dict(width=2, color='white')
        ),
        text=up_df['cause_type'],
        textposition="middle center",
        textfont=dict(size=9, color='white', family="Arial Bold"),
        hovertemplate="<b>%{text}</b><br>Return: %{y}%<br>News: %{marker.size}<extra></extra>"
    ), row=3, col=1)

if not down_df.empty:
    fig2.add_trace(go.Scatter(
        x=down_df['year'] + np.random.normal(0, 0.08, len(down_df)),
        y=down_df['return_pct'].str.extract(r'([+-]?\d+\.?\d*)').astype(float),
        mode='markers+text',
        name='Price Down',
        marker=dict(
            size=down_df['news_count'],
            sizemode='area',
            sizeref=2.*max(down_df['news_count'])/(40.**2),
            color=PALETTE['down'],
            opacity=0.8,
            line=dict(width=2, color='white')
        ),
        text=down_df['cause_type'],
        textposition="middle center",
        textfont=dict(size=9, color='white', family="Arial Bold"),
        hovertemplate="<b>%{text}</b><br>Return: %{y}%<br>News: %{marker.size}<extra></extra>"
    ), row=3, col=1)

fig2.update_layout(
    title={
        'text': f"<b>{TICKER} Yearly Analysis Dashboard</b><br><sub>Price, News, and Causal Events</sub>",
        'x': 0.5, 'xanchor': 'center', 'font': dict(size=18)
    },
    barmode='stack',
    plot_bgcolor=PALETTE['background'],
    paper_bgcolor=PALETTE['background'],
    font=FONT,
    height=900,
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    margin=dict(l=60, r=60, t=120, b=60)
)

fig2.update_xaxes(showgrid=True, gridcolor=PALETTE['grid'])
fig2.update_yaxes(showgrid=True, gridcolor=PALETTE['grid'])

fig2.show()

# === GRAPH 3: Seasonal Trend Calendar (Intra-Year Pattern) ===
print("   [Graph 3] Seasonal Trend Calendar")

monthly_avg = merged.groupby('month')['Returns'].mean().reset_index()
monthly_avg['Return %'] = (monthly_avg['Returns'] * 100).round(2)

fig3 = go.Figure(data=go.Bar(
    x=['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'],
    y=monthly_avg['Return %'],
    marker_color=[PALETTE['up'] if x > 0 else PALETTE['down'] for x in monthly_avg['Return %']],
    text=monthly_avg['Return %'],
    textposition='outside',
    texttemplate="%{text}%",
    hovertemplate="<b>%{x}</b>: %{y}%<extra></extra>"
))

fig3.add_hline(y=0, line_color=PALETTE['grid'], line_width=1)

fig3.update_layout(
    title={
        'text': f"<b>{TICKER} Average Monthly Return Pattern</b><br><sub>Consistent seasonal behavior across 5 years?</sub>",
        'x': 0.5, 'xanchor': 'center', 'font': dict(size=16)
    },
    xaxis_title="<b>Month</b>",
    yaxis_title="<b>Avg Daily Return %</b>",
    plot_bgcolor=PALETTE['background'],
    paper_bgcolor=PALETTE['background'],
    font=FONT,
    height=500,
    margin=dict(l=60, r=60, t=100, b=60),
    yaxis=dict(range=[monthly_avg['Return %'].min()*1.2, monthly_avg['Return %'].max()*1.2])
)

fig3.show()

# === GRAPH 4: News Sentiment vs Price (Interactive Scatter) ===
print("   [Graph 4] Sentiment vs Price Movement")

scatter_data = merged.dropna(subset=['avg_polarity', 'Returns']).copy()
scatter_data['size'] = scatter_data['news_count']
scatter_data['color'] = scatter_data['Returns'].apply(
    lambda x: PALETTE['up'] if x > 0.02 else (PALETTE['down'] if x < -0.02 else PALETTE['neutral'])
)

fig4 = go.Figure()

fig4.add_trace(go.Scatter(
    x=scatter_data['avg_polarity'],
    y=scatter_data['Returns'] * 100,
    mode='markers',
    marker=dict(
        size=scatter_data['size'],
        sizemode='area',
        sizeref=2.*scatter_data['size'].max()/(50.**2),
        color=scatter_data['color'],
        opacity=0.7,
        line=dict(width=1, color='white')
    ),
    text=scatter_data['date'].dt.strftime('%Y-%m-%d'),
    hovertemplate="<b>Date:</b> %{text}<br><b>Sentiment:</b> %{x:.2f}<br><b>Return:</b> %{y:.2f}%<br><b>News Count:</b> %{marker.size}<extra></extra>"
))

# Add trendline
z = np.polyfit(scatter_data['avg_polarity'], scatter_data['Returns']*100, 1)
p = np.poly1d(z)
fig4.add_trace(go.Scatter(
    x=scatter_data['avg_polarity'],
    y=p(scatter_data['avg_polarity']),
    mode='lines',
    line=dict(color=PALETTE['accent'], width=3, dash='dot'),
    name=f"Trend (r={correlation:.2f})"
))

fig4.update_layout(
    title={
        'text': f"<b>{TICKER} News Sentiment vs Daily Return</b><br><sub>Correlation: {correlation:.3f} | Bubble Size = News Volume</sub>",
        'x': 0.5, 'xanchor': 'center', 'font': dict(size=16)
    },
    xaxis_title="<b>Average Sentiment Polarity</b>",
    yaxis_title="<b>Daily Return %</b>",
    plot_bgcolor=PALETTE['background'],
    paper_bgcolor=PALETTE['background'],
    font=FONT,
    height=600,
    showlegend=False,
    margin=dict(l=60, r=60, t=100, b=60),
    xaxis=dict(range=[-1, 1]),
    yaxis=dict(range=[scatter_data['Returns'].min()*120, scatter_data['Returns'].max()*120])
)

fig4.show()

# Final Summary
print(f"\nCOMPLETED: {TICKER} Deep Analysis Dashboard")
print(f"   • {len(movement_causes['UP'])} Up events | {len(movement_causes['DOWN'])} Down events")
print(f"   • Sentiment-Return Correlation: {correlation:+.3f}")
print(f"   • Check graphs for repeating patterns (e.g., Q1 rally, Sep dip)")
print(f"   • All graphs are interactive: hover, zoom, click legend")

# Optional: Save to HTML
# fig1.write_html(f"{TICKER}_Quarterly_Pattern.html")
# fig2.write_html(f"{TICKER}_Yearly_Dashboard.html")
# etc.


Creating Beautiful, Professional-Grade Interactive Dashboards...
   [Graph 1] Quarterly Returns Pattern Heatmap


   [Graph 2] Yearly Price Movement with News Causes


   [Graph 3] Seasonal Trend Calendar


   [Graph 4] Sentiment vs Price Movement



COMPLETED: AAPL Deep Analysis Dashboard
   • 205 Up events | 177 Down events
   • Sentiment-Return Correlation: +0.412
   • Check graphs for repeating patterns (e.g., Q1 rally, Sep dip)
   • All graphs are interactive: hover, zoom, click legend
