# Task 3: Correlation between News Sentiment and Stock Movement

**Financial News Sentiment Analysis - Week 1 Challenge**

This notebook performs correlation analysis between news sentiment and stock price movements:
- **Date Alignment**: Align news and stock datasets by dates
- **Sentiment Analysis**: Analyze sentiment of news headlines using NLP tools
- **Stock Returns**: Calculate daily stock returns
- **Correlation Analysis**: Measure correlation between sentiment scores and stock returns


## 1. Setup and Imports


In [None]:
# Standard library imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
from datetime import datetime, timedelta
from scipy import stats
from scipy.stats import pearsonr, spearmanr

# Sentiment Analysis
from textblob import TextBlob
import nltk

# Stock data
import yfinance as yf

# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (14, 8)
warnings.filterwarnings('ignore')

# Set paths
PROJECT_ROOT = Path('..')
DATA_DIR = PROJECT_ROOT / 'data'
FIGURES_DIR = PROJECT_ROOT / 'figures'

# Create directories if they don't exist
FIGURES_DIR.mkdir(exist_ok=True)

# Download NLTK data if needed
try:
    nltk.data.find('vader_lexicon')
except LookupError:
    nltk.download('vader_lexicon', quiet=True)

print("‚úÖ Setup complete!")


## 2. Load News Data


In [None]:
# Load the financial news dataset
news_data_files = list(DATA_DIR.glob('*.csv')) + list(DATA_DIR.glob('*.json'))

if not news_data_files:
    print("‚ö†Ô∏è  No news data files found in data/ directory.")
    print("Please ensure your dataset is placed in the data/ directory.")
    print("Expected columns: headline, url, publisher, date, stock")
    news_df = None
else:
    news_file = news_data_files[0]
    print(f"üì∞ Loading news data from: {news_file.name}")
    
    if news_file.suffix == '.csv':
        news_df = pd.read_csv(news_file, parse_dates=['date'], low_memory=False)
    elif news_file.suffix == '.json':
        news_df = pd.read_json(news_file, convert_dates=['date'])
    
    # Clean data
    news_df = news_df.dropna(subset=['headline', 'date', 'stock'])
    news_df['date'] = pd.to_datetime(news_df['date'], errors='coerce', utc=True)
    
    print(f"‚úÖ News data loaded successfully!")
    print(f"Shape: {news_df.shape[0]:,} rows √ó {news_df.shape[1]} columns")
    print(f"Date range: {news_df['date'].min().date()} to {news_df['date'].max().date()}")
    print(f"Unique stocks: {news_df['stock'].nunique()}")
    print(f"\nFirst few rows:")
    display(news_df.head())


## 3. Sentiment Analysis on Headlines


In [None]:
def analyze_sentiment_textblob(text):
    """
    Analyze sentiment using TextBlob.
    Returns polarity score (-1 to 1) and subjectivity score (0 to 1).
    """
    if pd.isna(text) or text == '':
        return 0.0, 0.0
    
    blob = TextBlob(str(text))
    polarity = blob.sentiment.polarity  # -1 (negative) to 1 (positive)
    subjectivity = blob.sentiment.subjectivity  # 0 (objective) to 1 (subjective)
    
    return polarity, subjectivity

print("üîç Performing sentiment analysis on headlines...")
print("This may take a few moments...\n")

# Apply sentiment analysis
sentiment_results = news_df['headline'].apply(analyze_sentiment_textblob)
news_df['sentiment_polarity'] = [result[0] for result in sentiment_results]
news_df['sentiment_subjectivity'] = [result[1] for result in sentiment_results]

# Classify sentiment
news_df['sentiment_label'] = news_df['sentiment_polarity'].apply(
    lambda x: 'Positive' if x > 0.1 else ('Negative' if x < -0.1 else 'Neutral')
)

print("‚úÖ Sentiment analysis complete!")
print(f"\nSentiment distribution:")
print(news_df['sentiment_label'].value_counts())
print(f"\nSentiment statistics:")
print(news_df['sentiment_polarity'].describe())


In [None]:
# Visualize sentiment distribution
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Sentiment label distribution
sentiment_counts = news_df['sentiment_label'].value_counts()
axes[0].bar(sentiment_counts.index, sentiment_counts.values, 
            color=['green', 'gray', 'red'], alpha=0.7)
axes[0].set_ylabel('Number of Articles')
axes[0].set_title('Sentiment Label Distribution')
axes[0].grid(True, alpha=0.3, axis='y')
for i, v in enumerate(sentiment_counts.values):
    axes[0].text(i, v + max(sentiment_counts.values) * 0.01, f'{v:,}', 
                ha='center', fontsize=10)

# Sentiment polarity distribution
axes[1].hist(news_df['sentiment_polarity'], bins=50, edgecolor='black', alpha=0.7, color='steelblue')
axes[1].axvline(news_df['sentiment_polarity'].mean(), color='red', linestyle='--', 
               label=f'Mean: {news_df["sentiment_polarity"].mean():.3f}')
axes[1].axvline(0, color='black', linestyle='-', linewidth=0.5, label='Neutral')
axes[1].set_xlabel('Sentiment Polarity')
axes[1].set_ylabel('Frequency')
axes[1].set_title('Sentiment Polarity Distribution')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(FIGURES_DIR / 'sentiment_distribution.png', dpi=300, bbox_inches='tight')
plt.show()

print("‚úÖ Sentiment visualization created!")


## 4. Date Alignment and Aggregation

Normalize dates in both news and stock datasets to ensure alignment.


In [None]:
# Normalize dates: Convert to date only (remove time component)
# This ensures news articles match trading days
news_df['date_only'] = news_df['date'].dt.date

# Get unique stocks from news data
stocks_in_news = news_df['stock'].unique().tolist()
print(f"üìä Stocks found in news data: {len(stocks_in_news)}")
print(f"Stocks: {stocks_in_news[:10]}...")  # Show first 10

# Aggregate sentiment by stock and date
# If multiple articles for same stock on same day, calculate average sentiment
daily_sentiment = news_df.groupby(['stock', 'date_only']).agg({
    'sentiment_polarity': ['mean', 'count'],
    'sentiment_subjectivity': 'mean'
}).reset_index()

# Flatten column names
daily_sentiment.columns = ['stock', 'date', 'avg_sentiment', 'article_count', 'avg_subjectivity']
daily_sentiment['date'] = pd.to_datetime(daily_sentiment['date'])

print(f"\n‚úÖ Daily sentiment aggregated!")
print(f"Total stock-date combinations: {len(daily_sentiment):,}")
print(f"\nSample aggregated data:")
display(daily_sentiment.head(10))


## 5. Load and Prepare Stock Price Data


In [None]:
# Determine date range from news data
if news_df is not None and len(news_df) > 0:
    min_date = news_df['date'].min()
    max_date = news_df['date'].max()
    # Extend range slightly to ensure we have stock data
    start_date = min_date - timedelta(days=5)
    end_date = max_date + timedelta(days=5)
    
    print(f"üì• Downloading stock data from {start_date.date()} to {end_date.date()}...")
    print(f"Analyzing {len(stocks_in_news)} stocks...\n")
    
    # Download stock data for all stocks in news
    stock_data = {}
    for ticker in stocks_in_news[:20]:  # Limit to first 20 stocks for performance
        try:
            print(f"Downloading {ticker}...", end=" ")
            ticker_obj = yf.Ticker(ticker)
            df = ticker_obj.history(start=start_date, end=end_date)
            
            if not df.empty:
                df.columns = [col.lower() for col in df.columns]
                df.index.name = 'date'
                df = df.reset_index()
                df['date'] = pd.to_datetime(df['date']).dt.date
                df['date'] = pd.to_datetime(df['date'])
                
                required_cols = ['date', 'open', 'high', 'low', 'close', 'volume']
                if all(col in df.columns for col in required_cols):
                    stock_data[ticker] = df
                    print(f"‚úÖ {len(df)} records")
                else:
                    print(f"‚ùå Missing columns")
            else:
                print(f"‚ùå No data")
        except Exception as e:
            print(f"‚ùå Error: {str(e)[:50]}")
    
    print(f"\n‚úÖ Successfully downloaded data for {len(stock_data)} stocks")
else:
    print("‚ö†Ô∏è  No news data available. Cannot proceed with correlation analysis.")
    stock_data = {}


## 6. Calculate Daily Stock Returns


In [None]:
# Calculate daily returns for each stock
stock_returns = {}

for ticker, df in stock_data.items():
    df = df.copy()
    df = df.sort_values('date')
    
    # Calculate daily returns (percentage change)
    df['daily_return'] = df['close'].pct_change() * 100  # Convert to percentage
    
    # Calculate log returns (alternative method)
    df['log_return'] = np.log(df['close'] / df['close'].shift(1)) * 100
    
    # Store only date and returns
    stock_returns[ticker] = df[['date', 'daily_return', 'log_return', 'close']].copy()
    
    print(f"‚úÖ Calculated returns for {ticker}: {len(df)} days")

print(f"\nüìä Daily returns calculated for {len(stock_returns)} stocks")

# Display sample
if stock_returns:
    sample_ticker = list(stock_returns.keys())[0]
    print(f"\nSample returns for {sample_ticker}:")
    display(stock_returns[sample_ticker].head(10))


## 7. Merge Sentiment and Stock Returns Data


In [None]:
# Merge sentiment and returns data for each stock
merged_data = []

for ticker in stock_returns.keys():
    if ticker in daily_sentiment['stock'].values:
        # Get sentiment data for this stock
        stock_sentiment = daily_sentiment[daily_sentiment['stock'] == ticker].copy()
        
        # Get returns data for this stock
        stock_ret = stock_returns[ticker].copy()
        stock_ret['date_only'] = pd.to_datetime(stock_ret['date']).dt.date
        stock_ret['date_only'] = pd.to_datetime(stock_ret['date_only'])
        
        # Merge on date
        merged = pd.merge(
            stock_sentiment[['date', 'avg_sentiment', 'article_count', 'avg_subjectivity']],
            stock_ret[['date', 'daily_return', 'log_return', 'close']],
            left_on='date',
            right_on='date',
            how='inner'
        )
        
        merged['stock'] = ticker
        merged_data.append(merged)

if merged_data:
    correlation_df = pd.concat(merged_data, ignore_index=True)
    correlation_df = correlation_df.sort_values(['stock', 'date'])
    
    print(f"‚úÖ Merged data created!")
    print(f"Total merged records: {len(correlation_df):,}")
    print(f"Stocks with merged data: {correlation_df['stock'].nunique()}")
    print(f"\nSample merged data:")
    display(correlation_df.head(10))
    
    print(f"\nData quality:")
    print(f"  Records with sentiment and returns: {len(correlation_df):,}")
    print(f"  Missing sentiment: {correlation_df['avg_sentiment'].isna().sum()}")
    print(f"  Missing returns: {correlation_df['daily_return'].isna().sum()}")
else:
    print("‚ö†Ô∏è  No merged data available. Check stock ticker matching.")
    correlation_df = pd.DataFrame()


## 8. Correlation Analysis

Calculate correlation between sentiment scores and stock returns.


In [None]:
# Remove rows with missing data
correlation_clean = correlation_df.dropna(subset=['avg_sentiment', 'daily_return'])

if len(correlation_clean) > 0:
    # Overall correlation (all stocks combined)
    overall_corr_pearson, overall_p_pearson = pearsonr(
        correlation_clean['avg_sentiment'], 
        correlation_clean['daily_return']
    )
    overall_corr_spearman, overall_p_spearman = spearmanr(
        correlation_clean['avg_sentiment'], 
        correlation_clean['daily_return']
    )
    
    print("=" * 70)
    print("OVERALL CORRELATION ANALYSIS (All Stocks Combined)")
    print("=" * 70)
    print(f"\nPearson Correlation: {overall_corr_pearson:.4f}")
    print(f"  P-value: {overall_p_pearson:.4f}")
    print(f"  Significance: {'***' if overall_p_pearson < 0.001 else '**' if overall_p_pearson < 0.01 else '*' if overall_p_pearson < 0.05 else 'ns'}")
    
    print(f"\nSpearman Correlation: {overall_corr_spearman:.4f}")
    print(f"  P-value: {overall_p_spearman:.4f}")
    print(f"  Significance: {'***' if overall_p_spearman < 0.001 else '**' if overall_p_spearman < 0.01 else '*' if overall_p_spearman < 0.05 else 'ns'}")
    
    print(f"\nInterpretation:")
    if abs(overall_corr_pearson) < 0.1:
        strength = "negligible"
    elif abs(overall_corr_pearson) < 0.3:
        strength = "weak"
    elif abs(overall_corr_pearson) < 0.5:
        strength = "moderate"
    elif abs(overall_corr_pearson) < 0.7:
        strength = "strong"
    else:
        strength = "very strong"
    
    direction = "positive" if overall_corr_pearson > 0 else "negative"
    print(f"  {strength.capitalize()} {direction} correlation between news sentiment and stock returns")
else:
    print("‚ö†Ô∏è  No clean data available for correlation analysis")


In [None]:
# Per-stock correlation analysis
if len(correlation_clean) > 0:
    stock_correlations = []
    
    for ticker in correlation_clean['stock'].unique():
        stock_data = correlation_clean[correlation_clean['stock'] == ticker]
        
        if len(stock_data) > 10:  # Need sufficient data points
            corr_pearson, p_pearson = pearsonr(
                stock_data['avg_sentiment'], 
                stock_data['daily_return']
            )
            corr_spearman, p_spearman = spearmanr(
                stock_data['avg_sentiment'], 
                stock_data['daily_return']
            )
            
            stock_correlations.append({
                'Stock': ticker,
                'Pearson_Correlation': corr_pearson,
                'Pearson_P_Value': p_pearson,
                'Spearman_Correlation': corr_spearman,
                'Spearman_P_Value': p_spearman,
                'Data_Points': len(stock_data),
                'Significant': 'Yes' if p_pearson < 0.05 else 'No'
            })
    
    if stock_correlations:
        corr_summary = pd.DataFrame(stock_correlations)
        corr_summary = corr_summary.sort_values('Pearson_Correlation', ascending=False)
        
        print("\n" + "=" * 70)
        print("PER-STOCK CORRELATION ANALYSIS")
        print("=" * 70)
        display(corr_summary)
        
        print(f"\nüìä Summary Statistics:")
        print(f"  Mean Pearson Correlation: {corr_summary['Pearson_Correlation'].mean():.4f}")
        print(f"  Median Pearson Correlation: {corr_summary['Pearson_Correlation'].median():.4f}")
        print(f"  Stocks with significant correlation (p<0.05): {corr_summary['Significant'].value_counts().get('Yes', 0)}")
        print(f"  Stocks with positive correlation: {(corr_summary['Pearson_Correlation'] > 0).sum()}")
        print(f"  Stocks with negative correlation: {(corr_summary['Pearson_Correlation'] < 0).sum()}")


## 9. Visualizations


In [None]:
# Scatter plot: Sentiment vs Returns
if len(correlation_clean) > 0:
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    
    # 1. Overall scatter plot
    ax1 = axes[0, 0]
    ax1.scatter(correlation_clean['avg_sentiment'], correlation_clean['daily_return'], 
               alpha=0.5, s=20, color='steelblue')
    
    # Add regression line
    z = np.polyfit(correlation_clean['avg_sentiment'], correlation_clean['daily_return'], 1)
    p = np.poly1d(z)
    ax1.plot(correlation_clean['avg_sentiment'], p(correlation_clean['avg_sentiment']), 
            "r--", alpha=0.8, linewidth=2, label=f'Linear fit (r={overall_corr_pearson:.3f})')
    
    ax1.axhline(y=0, color='black', linestyle='-', linewidth=0.5, alpha=0.3)
    ax1.axvline(x=0, color='black', linestyle='-', linewidth=0.5, alpha=0.3)
    ax1.set_xlabel('Average Sentiment Polarity', fontsize=12)
    ax1.set_ylabel('Daily Return (%)', fontsize=12)
    ax1.set_title('Sentiment vs Stock Returns (All Stocks)', fontsize=14, fontweight='bold')
    ax1.legend()
    ax1.grid(True, alpha=0.3)
    
    # 2. Correlation by stock (bar chart)
    if 'corr_summary' in locals() and len(corr_summary) > 0:
        ax2 = axes[0, 1]
        top_stocks = corr_summary.head(15)
        colors = ['green' if x > 0 else 'red' for x in top_stocks['Pearson_Correlation']]
        ax2.barh(range(len(top_stocks)), top_stocks['Pearson_Correlation'], color=colors, alpha=0.7)
        ax2.set_yticks(range(len(top_stocks)))
        ax2.set_yticklabels(top_stocks['Stock'])
        ax2.axvline(x=0, color='black', linestyle='-', linewidth=0.5)
        ax2.set_xlabel('Pearson Correlation Coefficient', fontsize=12)
        ax2.set_title('Correlation by Stock (Top 15)', fontsize=14, fontweight='bold')
        ax2.invert_yaxis()
        ax2.grid(True, alpha=0.3, axis='x')
    
    # 3. Time series: Sentiment and Returns for top correlated stock
    if 'corr_summary' in locals() and len(corr_summary) > 0:
        top_stock = corr_summary.iloc[0]['Stock']
        top_stock_data = correlation_clean[correlation_clean['stock'] == top_stock].sort_values('date')
        
        ax3 = axes[1, 0]
        ax3_twin = ax3.twinx()
        
        line1 = ax3.plot(top_stock_data['date'], top_stock_data['avg_sentiment'], 
                        color='blue', label='Sentiment', linewidth=2)
        line2 = ax3_twin.plot(top_stock_data['date'], top_stock_data['daily_return'], 
                             color='red', label='Daily Return (%)', linewidth=1.5, alpha=0.7)
        
        ax3.axhline(y=0, color='gray', linestyle='--', linewidth=0.5, alpha=0.5)
        ax3_twin.axhline(y=0, color='gray', linestyle='--', linewidth=0.5, alpha=0.5)
        
        ax3.set_xlabel('Date', fontsize=12)
        ax3.set_ylabel('Sentiment Polarity', fontsize=12, color='blue')
        ax3_twin.set_ylabel('Daily Return (%)', fontsize=12, color='red')
        ax3.set_title(f'{top_stock} - Sentiment and Returns Over Time', fontsize=14, fontweight='bold')
        
        lines = line1 + line2
        labels = [l.get_label() for l in lines]
        ax3.legend(lines, labels, loc='best')
        ax3.grid(True, alpha=0.3)
        ax3.tick_params(axis='x', rotation=45)
    
    # 4. Distribution of correlations
    if 'corr_summary' in locals() and len(corr_summary) > 0:
        ax4 = axes[1, 1]
        ax4.hist(corr_summary['Pearson_Correlation'], bins=20, edgecolor='black', alpha=0.7, color='steelblue')
        ax4.axvline(corr_summary['Pearson_Correlation'].mean(), color='red', linestyle='--', 
                   label=f'Mean: {corr_summary["Pearson_Correlation"].mean():.3f}')
        ax4.axvline(0, color='black', linestyle='-', linewidth=0.5)
        ax4.set_xlabel('Pearson Correlation Coefficient', fontsize=12)
        ax4.set_ylabel('Frequency', fontsize=12)
        ax4.set_title('Distribution of Stock Correlations', fontsize=14, fontweight='bold')
        ax4.legend()
        ax4.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(FIGURES_DIR / 'correlation_analysis.png', dpi=300, bbox_inches='tight')
    plt.show()
    
    print("‚úÖ Correlation visualizations created!")


In [None]:
# Analyze correlation with different lags
if len(correlation_clean) > 0:
    lag_correlations = []
    
    for lag in range(-2, 3):  # -2 to +2 days
        lag_data = correlation_clean.copy()
        
        if lag != 0:
            # Shift returns by lag days
            lag_data = lag_data.sort_values(['stock', 'date'])
            lag_data['daily_return_lag'] = lag_data.groupby('stock')['daily_return'].shift(-lag)
            lag_data_clean = lag_data.dropna(subset=['avg_sentiment', 'daily_return_lag'])
            
            if len(lag_data_clean) > 10:
                corr, p_val = pearsonr(lag_data_clean['avg_sentiment'], lag_data_clean['daily_return_lag'])
                lag_correlations.append({
                    'Lag': lag,
                    'Correlation': corr,
                    'P_Value': p_val,
                    'Data_Points': len(lag_data_clean)
                })
        else:
            # Already calculated (lag 0)
            lag_correlations.append({
                'Lag': 0,
                'Correlation': overall_corr_pearson,
                'P_Value': overall_p_pearson,
                'Data_Points': len(correlation_clean)
            })
    
    if lag_correlations:
        lag_df = pd.DataFrame(lag_correlations)
        lag_df = lag_df.sort_values('Lag')
        
        print("=" * 70)
        print("LAG ANALYSIS: Sentiment vs Returns at Different Time Lags")
        print("=" * 70)
        print("Lag: Negative = sentiment leads returns, Positive = returns lead sentiment")
        print()
        display(lag_df)
        
        # Visualization
        fig, ax = plt.subplots(figsize=(10, 6))
        colors = ['green' if x > 0 else 'red' for x in lag_df['Correlation']]
        ax.bar(lag_df['Lag'], lag_df['Correlation'], color=colors, alpha=0.7, edgecolor='black')
        ax.axhline(y=0, color='black', linestyle='-', linewidth=0.5)
        ax.set_xlabel('Lag (days)', fontsize=12)
        ax.set_ylabel('Correlation Coefficient', fontsize=12)
        ax.set_title('Correlation at Different Time Lags', fontsize=14, fontweight='bold')
        ax.grid(True, alpha=0.3, axis='y')
        
        # Add value labels
        for i, row in lag_df.iterrows():
            ax.text(row['Lag'], row['Correlation'] + (0.01 if row['Correlation'] > 0 else -0.01), 
                   f"{row['Correlation']:.3f}", ha='center', fontsize=9)
        
        plt.tight_layout()
        plt.savefig(FIGURES_DIR / 'lag_analysis.png', dpi=300, bbox_inches='tight')
        plt.show()
        
        print("\n‚úÖ Lag analysis complete!")


In [None]:
print("=" * 70)
print("CORRELATION ANALYSIS SUMMARY")
print("=" * 70)

if len(correlation_clean) > 0:
    print(f"\nüìä Dataset Overview:")
    print(f"  ‚Ä¢ Total news articles analyzed: {len(news_df):,}")
    print(f"  ‚Ä¢ Unique stocks: {correlation_df['stock'].nunique() if 'correlation_df' in locals() else 0}")
    print(f"  ‚Ä¢ Stock-date pairs with both sentiment and returns: {len(correlation_clean):,}")
    
    print(f"\nüìà Overall Correlation:")
    print(f"  ‚Ä¢ Pearson Correlation: {overall_corr_pearson:.4f} (p={overall_p_pearson:.4f})")
    print(f"  ‚Ä¢ Spearman Correlation: {overall_corr_spearman:.4f} (p={overall_p_spearman:.4f})")
    
    if 'corr_summary' in locals() and len(corr_summary) > 0:
        print(f"\nüìä Per-Stock Analysis:")
        print(f"  ‚Ä¢ Mean correlation: {corr_summary['Pearson_Correlation'].mean():.4f}")
        print(f"  ‚Ä¢ Stocks with significant correlation: {corr_summary['Significant'].value_counts().get('Yes', 0)}")
        print(f"  ‚Ä¢ Strongest positive correlation: {corr_summary.iloc[0]['Stock']} ({corr_summary.iloc[0]['Pearson_Correlation']:.4f})")
        if corr_summary['Pearson_Correlation'].min() < 0:
            print(f"  ‚Ä¢ Strongest negative correlation: {corr_summary.iloc[-1]['Stock']} ({corr_summary.iloc[-1]['Pearson_Correlation']:.4f})")
    
    if 'lag_df' in locals() and len(lag_df) > 0:
        best_lag = lag_df.loc[lag_df['Correlation'].abs().idxmax()]
        print(f"\n‚è∞ Lag Analysis:")
        print(f"  ‚Ä¢ Best correlation at lag {best_lag['Lag']} days: {best_lag['Correlation']:.4f}")
    
    print(f"\nüí° Key Insights:")
    if abs(overall_corr_pearson) < 0.1:
        print(f"  ‚Ä¢ Very weak correlation suggests news sentiment may not be a strong predictor")
    elif abs(overall_corr_pearson) < 0.3:
        print(f"  ‚Ä¢ Weak correlation - sentiment has limited predictive power")
    elif abs(overall_corr_pearson) < 0.5:
        print(f"  ‚Ä¢ Moderate correlation - sentiment can be a useful indicator")
    else:
        print(f"  ‚Ä¢ Strong correlation - sentiment is a significant predictor of returns")
    
    if overall_corr_pearson > 0:
        print(f"  ‚Ä¢ Positive correlation: Positive news sentiment associated with positive returns")
    else:
        print(f"  ‚Ä¢ Negative correlation: Positive news sentiment associated with negative returns (contrarian effect)")

print("\n" + "=" * 70)
print("‚úÖ Correlation Analysis Complete!")
print("=" * 70)
