In [5]:
# Sentiment vs Stock Price Analysis with Mock Data
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

class SentimentStockVisualizer:
    def __init__(self):
        pass
        
    def generate_mock_sentiment_data(self, ticker, start_date, end_date):
        """
        Generate mock sentiment data for visualization purposes
        """
        # Create date range
        date_range = pd.date_range(start=start_date, end=end_date, freq='1H')
        
        # Generate synthetic sentiment data with some correlation to stock movements
        np.random.seed(42)  # For reproducibility
        
        # Base sentiment - random walk with mean reversion
        base_sentiment = np.random.normal(0.5, 0.1, len(date_range))
        # Apply some smoothing to simulate trends
        sentiment_smooth = pd.Series(base_sentiment).rolling(window=24, min_periods=1).mean().values
        
        # Generate other metrics
        mentions_count = np.random.poisson(20, len(date_range))
        social_impact = np.random.gamma(2, 10, len(date_range))
        
        # Create DataFrame
        sentiment_df = pd.DataFrame({
            'sentiment_score': sentiment_smooth,
            'mentions_count': mentions_count,
            'social_impact': social_impact
        }, index=date_range)
        
        return sentiment_df
        
    def fetch_data_for_visualization(self, ticker, days=30):
        """
        Fetch stock data and generate mock sentiment data
        """
        try:
            end_date = datetime.now()
            start_date = end_date - timedelta(days=days)
            
            print(f"Downloading market data for {ticker} from {start_date} to {end_date}")
            stock_data = yf.download(ticker, start=start_date, end=end_date, interval="1h")
            
            if stock_data.empty:
                print(f"No market data found for {ticker}")
                return None, None
                
            # Generate mock sentiment data
            sentiment_df = self.generate_mock_sentiment_data(ticker, start_date, end_date)
            
            # Align indices
            common_dates = sentiment_df.index.intersection(stock_data.index)
            if len(common_dates) < 10:  # Need at least 10 data points
                # Recreate sentiment data using stock data dates to ensure alignment
                sentiment_df = self.generate_mock_sentiment_data(ticker, 
                                                              stock_data.index[0], 
                                                              stock_data.index[-1])
                # Now match up the indices again
                sentiment_df = sentiment_df.reindex(stock_data.index, method='nearest')
            else:
                sentiment_df = sentiment_df.loc[common_dates]
                stock_data = stock_data.loc[common_dates]
                
            # Add some correlation with the stock data
            # This makes the mock data more realistic by slightly correlating sentiment with price changes
            price_changes = stock_data['Close'].pct_change().fillna(0)
            noise_factor = 0.3  # How much the price affects sentiment
            sentiment_df['sentiment_score'] += price_changes.shift(3) * noise_factor  # Lagged effect
            sentiment_df['sentiment_score'] = sentiment_df['sentiment_score'].clip(0, 1)  # Keep in 0-1 range
            
            print(f"Generated {len(sentiment_df)} records of mock sentiment data for {ticker}")
            return sentiment_df, stock_data
            
        except Exception as e:
            print(f"Failed to download {ticker} data: {str(e)}")
            return None, None
        
    def create_correlation_plot(self, ticker, days=30):
        """
        Create correlation plot between sentiment and stock price
        """
        sentiment_df, stock_data = self.fetch_data_for_visualization(ticker, days)
        
        if sentiment_df is None or stock_data is None:
            print(f"Insufficient data for {ticker}")
            return
            
        # Merge datasets
        try:
            # Resample both datasets to hourly to ensure alignment
            stock_data_resampled = stock_data.resample('1H').last().dropna()
            sentiment_resampled = sentiment_df.resample('1H').mean().dropna()
            
            # Merge data
            merged = pd.merge(
                sentiment_resampled,
                stock_data_resampled,
                left_index=True,
                right_index=True,
                how='inner'
            )
            
            if merged.empty:
                print(f"No overlapping data found for {ticker}")
                return
                
            # Calculate price changes
            merged['price_change'] = merged['Close'].pct_change()
            merged['price_change_next'] = merged['Close'].pct_change().shift(-1)
            merged = merged.dropna()
            
            # Calculate lagged sentiment correlations
            lags = range(1, 13)  # Check correlations up to 12 hours
            correlations = {}
            
            for lag in lags:
                merged[f'sentiment_lag_{lag}'] = merged['sentiment_score'].shift(lag)
                corr = merged['price_change'].corr(merged[f'sentiment_lag_{lag}'])
                correlations[lag] = corr
                
            # Plot correlation data
            fig = make_subplots(
                rows=2, cols=1,
                subplot_titles=(
                    f"{ticker} Sentiment vs. Stock Price", 
                    "Lagged Sentiment Correlation with Price Changes"
                ),
                vertical_spacing=0.15,
                specs=[[{"secondary_y": True}], [{"secondary_y": False}]]
            )
            
            # Add sentiment and price to first subplot
            fig.add_trace(
                go.Scatter(
                    x=merged.index,
                    y=merged['sentiment_score'],
                    name='Sentiment Score',
                    line=dict(color='blue')
                ),
                row=1, col=1
            )
            
            fig.add_trace(
                go.Scatter(
                    x=merged.index,
                    y=merged['Close'],
                    name='Stock Price',
                    line=dict(color='green')
                ),
                row=1, col=1, secondary_y=True
            )
            
            # Add correlation by lag to second subplot
            fig.add_trace(
                go.Bar(
                    x=list(correlations.keys()),
                    y=list(correlations.values()),
                    name='Correlation',
                    marker_color='purple'
                ),
                row=2, col=1
            )
            
            # Update layout
            fig.update_layout(
                height=800,
                title_text=f"Sentiment Analysis Impact on {ticker} Stock Price",
                hovermode="x unified"
            )
            
            fig.update_yaxes(title_text="Sentiment Score", row=1, col=1)
            fig.update_yaxes(title_text="Stock Price ($)", row=1, col=1, secondary_y=True)
            fig.update_xaxes(title_text="Date", row=1, col=1)
            fig.update_xaxes(title_text="Lag (Hours)", row=2, col=1)
            fig.update_yaxes(title_text="Correlation", row=2, col=1)
            
            return fig
            
        except Exception as e:
            print(f"Error creating correlation plot: {str(e)}")
            return None
    
    def create_heatmap(self, tickers, days=30):
        """
        Create a heatmap showing sentiment-price correlations across multiple tickers
        """
        correlation_data = {}
        
        for ticker in tickers:
            sentiment_df, stock_data = self.fetch_data_for_visualization(ticker, days)
            
            if sentiment_df is None or stock_data is None:
                print(f"Skipping {ticker} due to insufficient data")
                continue
                
            try:
                # Resample and merge
                stock_data_resampled = stock_data.resample('1H').last().dropna()
                sentiment_resampled = sentiment_df.resample('1H').mean().dropna()
                
                merged = pd.merge(
                    sentiment_resampled,
                    stock_data_resampled['Close'],
                    left_index=True,
                    right_index=True,
                    how='inner'
                )
                
                if merged.empty:
                    continue
                    
                # Calculate correlations
                correlation = merged['sentiment_score'].corr(merged['Close'])
                correlation_data[ticker] = correlation
                
            except Exception as e:
                print(f"Error processing {ticker}: {str(e)}")
                continue
        
        if not correlation_data:
            print("No correlation data generated")
            return None
            
        # Create heatmap
        correlations = pd.Series(correlation_data)
        correlations = correlations.sort_values(ascending=False)
        
        fig = px.bar(
            x=correlations.index,
            y=correlations.values,
            title="Sentiment-Price Correlation by Tech Company",
            labels={'x': 'Ticker', 'y': 'Correlation'},
            color=correlations.values,
            color_continuous_scale="RdBu",
            text=correlations.values.round(2)
        )
        
        fig.update_layout(height=600)
        fig.update_traces(texttemplate='%{text:.2f}', textposition='outside')
        
        return fig
    
    def create_sentiment_impact_dashboard(self, tickers, days=30):
        """
        Create a comprehensive dashboard showing sentiment impact
        """
        sentiment_trends = {}
        price_trends = {}
        impact_scores = {}
        
        for ticker in tickers:
            sentiment_df, stock_data = self.fetch_data_for_visualization(ticker, days)
            
            if sentiment_df is None or stock_data is None:
                continue
                
            try:
                # Daily aggregation for dashboard
                daily_sentiment = sentiment_df.resample('D').mean()
                daily_stock = stock_data.resample('D').last()
                
                # Store trends
                sentiment_trends[ticker] = daily_sentiment['sentiment_score'].tolist()
                price_trends[ticker] = daily_stock['Close'].tolist()
                
                # Calculate impact score
                merged = pd.merge(
                    daily_sentiment,
                    daily_stock['Close'],
                    left_index=True,
                    right_index=True,
                    how='inner'
                )
                
                # Create impact score based on correlation and volatility
                correlation = merged['sentiment_score'].corr(merged['Close'])
                price_volatility = merged['Close'].pct_change().std()
                sentiment_volatility = merged['sentiment_score'].pct_change().std()
                
                impact_score = abs(correlation) * (price_volatility * sentiment_volatility * 100)
                impact_scores[ticker] = impact_score
                
            except Exception as e:
                print(f"Error processing {ticker} for dashboard: {str(e)}")
                continue
        
        # Create dashboard visualization
        dates = pd.date_range(end=datetime.now(), periods=days).tolist()
        
        # Create figure
        fig = make_subplots(
            rows=2, cols=2,
            subplot_titles=(
                "Sentiment Impact Score by Company",
                "Average Sentiment Trend",
                "Stock Price Trends (Normalized)",
                "Sentiment vs Price Movement"
            ),
            specs=[
                [{"type": "bar"}, {"type": "scatter"}],
                [{"type": "scatter"}, {"type": "scatter"}]
            ],
            vertical_spacing=0.12,
            horizontal_spacing=0.08
        )
        
        # Plot 1: Impact score by company
        impact_df = pd.Series(impact_scores).sort_values(ascending=False)
        
        fig.add_trace(
            go.Bar(
                x=impact_df.index,
                y=impact_df.values,
                marker_color='rgb(55, 83, 109)'
            ),
            row=1, col=1
        )
        
        # Plot 2: Sentiment trends
        for ticker, values in sentiment_trends.items():
            if len(values) < len(dates):
                # Pad with NaN if needed
                values = [np.nan] * (len(dates) - len(values)) + values
            else:
                # Trim if too long
                values = values[-len(dates):]
                
            fig.add_trace(
                go.Scatter(
                    x=dates[-len(values):],
                    y=values,
                    name=ticker,
                    mode='lines'
                ),
                row=1, col=2
            )
        
        # Plot 3: Normalized price trends
        for ticker, values in price_trends.items():
            if len(values) < len(dates):
                continue
                
            # Normalize prices to percentage change from start
            normalized = [(v/values[0]-1)*100 for v in values]
            
            fig.add_trace(
                go.Scatter(
                    x=dates[-len(normalized):],
                    y=normalized,
                    name=ticker,
                    mode='lines'
                ),
                row=2, col=1
            )
        
        # Plot 4: Sample detail view for first ticker
        if tickers and tickers[0] in sentiment_trends and tickers[0] in price_trends:
            ticker = tickers[0]
            
            sentiment_df, stock_data = self.fetch_data_for_visualization(ticker, days)
            if sentiment_df is not None and stock_data is not None:
                # Daily data
                daily_sent = sentiment_df.resample('D').mean()
                daily_price = stock_data.resample('D').last()
                
                # Create dual axis plot
                fig.add_trace(
                    go.Scatter(
                        x=daily_sent.index,
                        y=daily_sent['sentiment_score'],
                        name='Sentiment',
                        line=dict(color='blue')
                    ),
                    row=2, col=2
                )
                
                fig.add_trace(
                    go.Scatter(
                        x=daily_price.index,
                        y=daily_price['Close'],
                        name='Price',
                        yaxis="y2",
                        line=dict(color='green')
                    ),
                    row=2, col=2
                )
                
                # Add second y-axis
                fig.update_layout(
                    yaxis4=dict(
                        title="Sentiment",
                        side="left"
                    ),
                    yaxis5=dict(
                        title=f"{ticker} Price ($)",
                        side="right",
                        overlaying="y4"
                    )
                )
        
        # Update layout
        fig.update_layout(
            height=900,
            width=1100,
            title_text="Tech Company Sentiment Impact Dashboard",
            showlegend=False,
            hovermode="closest"
        )
        
        # Update axes titles
        fig.update_xaxes(title_text="Company", row=1, col=1)
        fig.update_yaxes(title_text="Impact Score", row=1, col=1)
        
        fig.update_xaxes(title_text="Date", row=1, col=2)
        fig.update_yaxes(title_text="Sentiment Score", row=1, col=2)
        
        fig.update_xaxes(title_text="Date", row=2, col=1)
        fig.update_yaxes(title_text="Price Change (%)", row=2, col=1)
        
        fig.update_xaxes(title_text="Date", row=2, col=2)
        
        return fig

    def analyze_sentiment_patterns(self, ticker, days=30):
        """
        Analyze and visualize patterns in sentiment and their relationship to key market events
        """
        sentiment_df, stock_data = self.fetch_data_for_visualization(ticker, days)
        
        if sentiment_df is None or stock_data is None:
            print(f"Insufficient data for {ticker}")
            return None
            
        # Detect key events (significant price movements)
        stock_daily = stock_data.resample('D').last()
        stock_daily['returns'] = stock_daily['Close'].pct_change()
        
        # Define significant events as days with returns > 1.5 std dev
        std_dev = stock_daily['returns'].std()
        threshold = 1.5 * std_dev
        
        significant_events = stock_daily[abs(stock_daily['returns']) > threshold].copy()
        significant_events['event_type'] = significant_events['returns'].apply(
            lambda x: 'Positive' if x > 0 else 'Negative'
        )
        
        # Resample to daily for cleaner visualization
        sentiment_daily = sentiment_df.resample('D').mean()
        
        # Calculate some metrics
        sentiment_daily['smoothed'] = sentiment_daily['sentiment_score'].rolling(3).mean()
        sentiment_daily['momentum'] = sentiment_daily['sentiment_score'].diff(3)
        
        # Create the visualization
        fig = make_subplots(
            rows=2, cols=1,
            subplot_titles=(
                f"{ticker} Stock Price with Key Events",
                "Sentiment Patterns"
            ),
            vertical_spacing=0.15,
            specs=[[{"secondary_y": False}], [{"secondary_y": True}]]
        )
        
        # Plot stock price with key events
        fig.add_trace(
            go.Scatter(
                x=stock_daily.index,
                y=stock_daily['Close'],
                name='Stock Price',
                line=dict(color='black', width=1)
            ),
            row=1, col=1
        )
        
        # Add positive events
        positive_events = significant_events[significant_events['event_type'] == 'Positive']
        if not positive_events.empty:
            fig.add_trace(
                go.Scatter(
                    x=positive_events.index,
                    y=positive_events['Close'],
                    mode='markers',
                    marker=dict(color='green', size=12, symbol='triangle-up'),
                    name='Positive Events'
                ),
                row=1, col=1
            )
        
        # Add negative events
        negative_events = significant_events[significant_events['event_type'] == 'Negative']
        if not negative_events.empty:
            fig.add_trace(
                go.Scatter(
                    x=negative_events.index,
                    y=negative_events['Close'],
                    mode='markers',
                    marker=dict(color='red', size=12, symbol='triangle-down'),
                    name='Negative Events'
                ),
                row=1, col=1
            )
        
        # Plot sentiment patterns
        fig.add_trace(
            go.Scatter(
                x=sentiment_daily.index,
                y=sentiment_daily['sentiment_score'],
                name='Daily Sentiment',
                line=dict(color='blue', width=1)
            ),
            row=2, col=1
        )
        
        fig.add_trace(
            go.Scatter(
                x=sentiment_daily.index,
                y=sentiment_daily['smoothed'],
                name='Smoothed (3-day)',
                line=dict(color='purple', width=2)
            ),
            row=2, col=1
        )
        
        fig.add_trace(
            go.Scatter(
                x=sentiment_daily.index,
                y=sentiment_daily['momentum'],
                name='Momentum',
                line=dict(color='orange', width=1, dash='dot'),
                yaxis="y2"
            ),
            row=2, col=1
        )
        
        # Add event markers to sentiment chart too
        for event_date in significant_events.index:
            fig.add_vline(
                x=event_date, 
                line_width=1, 
                line_dash="dash", 
                line_color="gray",
                row=2, col=1
            )
        
        # Update layout
        fig.update_layout(
            height=800,
            title_text=f"Sentiment Patterns and Key Market Events for {ticker}",
            legend=dict(
                orientation="h",
                yanchor="bottom",
                y=1.02,
                xanchor="right",
                x=1
            )
        )
        
        fig.update_yaxes(title_text="Stock Price ($)", row=1, col=1)
        fig.update_xaxes(title_text="Date", row=1, col=1)
        
        fig.update_yaxes(title_text="Sentiment Score", row=2, col=1)
        fig.update_yaxes(title_text="Momentum", row=2, col=1, secondary_y=True)
        fig.update_xaxes(title_text="Date", row=2, col=1)
        
        return fig

# Example usage
if __name__ == "__main__":
    # Define tech tickers to analyze
    tech_tickers = ["AAPL", "MSFT", "GOOGL", "AMZN", "META", "NVDA", "TSLA"]
    
    # Initialize visualizer
    visualizer = SentimentStockVisualizer()
    
    # Display correlation for a specific ticker
    apple_fig = visualizer.create_correlation_plot("AAPL", days=30)
    if apple_fig:
        apple_fig.show()
    
    # Create heatmap of correlations
    heatmap = visualizer.create_heatmap(tech_tickers, days=30)
    if heatmap:
        heatmap.show()
    
    # Create comprehensive dashboard
    dashboard = visualizer.create_sentiment_impact_dashboard(tech_tickers, days=30)
    if dashboard:
        dashboard.show()
        
    # Analyze sentiment patterns for Apple
    patterns = visualizer.analyze_sentiment_patterns("AAPL", days=60)
    if patterns:
        patterns.show()

Downloading market data for AAPL from 2025-03-08 11:44:26.205933 to 2025-04-07 11:44:26.205933
YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Failed to download AAPL data: 'NotImplementedType' object has no attribute '_indexed_same'
Insufficient data for AAPL
Downloading market data for AAPL from 2025-03-08 11:44:26.590919 to 2025-04-07 11:44:26.590919
Failed to download AAPL data: 'NotImplementedType' object has no attribute '_indexed_same'
Skipping AAPL due to insufficient data
Downloading market data for MSFT from 2025-03-08 11:44:26.635093 to 2025-04-07 11:44:26.635093
Failed to download MSFT data: 'NotImplementedType' object has no attribute '_indexed_same'
Skipping MSFT due to insufficient data
Downloading market data for GOOGL from 2025-03-08 11:44:26.769491 to 2025-04-07 11:44:26.769491


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Failed to download GOOGL data: 'NotImplementedType' object has no attribute '_indexed_same'
Skipping GOOGL due to insufficient data
Downloading market data for AMZN from 2025-03-08 11:44:26.837179 to 2025-04-07 11:44:26.837179
Failed to download AMZN data: 'NotImplementedType' object has no attribute '_indexed_same'
Skipping AMZN due to insufficient data
Downloading market data for META from 2025-03-08 11:44:26.916729 to 2025-04-07 11:44:26.916729


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

Failed to download META data: 'NotImplementedType' object has no attribute '_indexed_same'
Skipping META due to insufficient data
Downloading market data for NVDA from 2025-03-08 11:44:27.079229 to 2025-04-07 11:44:27.079229
Failed to download NVDA data: 'NotImplementedType' object has no attribute '_indexed_same'
Skipping NVDA due to insufficient data
Downloading market data for TSLA from 2025-03-08 11:44:27.203972 to 2025-04-07 11:44:27.203972



[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Failed to download TSLA data: 'NotImplementedType' object has no attribute '_indexed_same'
Skipping TSLA due to insufficient data
No correlation data generated
Downloading market data for AAPL from 2025-03-08 11:44:27.285392 to 2025-04-07 11:44:27.285392
Failed to download AAPL data: 'NotImplementedType' object has no attribute '_indexed_same'
Downloading market data for MSFT from 2025-03-08 11:44:27.362923 to 2025-04-07 11:44:27.362923
Failed to download MSFT data: 'NotImplementedType' object has no attribute '_indexed_same'
Downloading market data for GOOGL from 2025-03-08 11:44:27.431922 to 2025-04-07 11:44:27.431922


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Failed to download GOOGL data: 'NotImplementedType' object has no attribute '_indexed_same'
Downloading market data for AMZN from 2025-03-08 11:44:27.519107 to 2025-04-07 11:44:27.519107
Failed to download AMZN data: 'NotImplementedType' object has no attribute '_indexed_same'
Downloading market data for META from 2025-03-08 11:44:27.622221 to 2025-04-07 11:44:27.622221


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


Failed to download META data: 'NotImplementedType' object has no attribute '_indexed_same'
Downloading market data for NVDA from 2025-03-08 11:44:27.741264 to 2025-04-07 11:44:27.741264
Failed to download NVDA data: 'NotImplementedType' object has no attribute '_indexed_same'
Downloading market data for TSLA from 2025-03-08 11:44:27.786432 to 2025-04-07 11:44:27.786432
Failed to download TSLA data: 'NotImplementedType' object has no attribute '_indexed_same'


ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed