In [11]:
import pandas as pd
from textblob import TextBlob
import os

# ---------- Load and Clean News ----------
def load_news(news_path):
    df_news = pd.read_csv(news_path)
    print("📰 News Columns:", df_news.columns.tolist())

    # Fix if 'Date' instead of 'date'
    if 'Date' in df_news.columns:
        df_news.rename(columns={'Date': 'date'}, inplace=True)

    if 'date' not in df_news.columns or 'headline' not in df_news.columns:
        raise ValueError("❌ News file must contain 'date' and 'headline' columns.")

    df_news['date'] = pd.to_datetime(df_news['date'], errors='coerce')
    df_news = df_news.dropna(subset=['date', 'headline'])

    from textblob import TextBlob
    df_news['sentiment'] = df_news['headline'].apply(lambda x: TextBlob(str(x)).sentiment.polarity)
    df_news['date'] = df_news['date'].dt.date

    sentiment_daily = df_news.groupby('date')['sentiment'].mean().reset_index()
    return sentiment_daily


def process_stock(stock_path, sentiment_df):
    stock = pd.read_csv(stock_path)
    print(f"📈 Processing {stock_path}")
    print("📊 Stock Columns:", stock.columns.tolist())

    # Fix common column name variants
    if 'Date' in stock.columns:
        stock.rename(columns={'Date': 'date'}, inplace=True)
    if 'Close' in stock.columns:
        stock.rename(columns={'Close': 'close'}, inplace=True)

    if 'date' not in stock.columns or 'close' not in stock.columns:
        raise ValueError(f"❌ {stock_path} must have 'date' and 'close' columns.")

    stock['date'] = pd.to_datetime(stock['date'], errors='coerce')
    stock = stock.dropna(subset=['date', 'close'])
    stock['date'] = stock['date'].dt.date
    stock = stock.sort_values('date')
    stock['daily_return'] = stock['close'].pct_change()

    merged = pd.merge(stock, sentiment_df, on='date', how='inner')
    correlation = merged[['daily_return', 'sentiment']].corr().iloc[0, 1]

    company_name = os.path.basename(stock_path).split('.')[0]
    return company_name, correlation


# ---------- Main Runner ----------
def analyze_multiple_stocks(news_path, stock_folder):
    sentiment_df = load_news(news_path)
    
    results = []
    for file in os.listdir(stock_folder):
        if file.endswith('.csv'):
            path = os.path.join(stock_folder, file)
            company, corr = process_stock(path, sentiment_df)
            results.append((company, corr))
    
    print("\n📊 Correlation between sentiment and stock returns:")
    for company, corr in results:
        print(f"{company}: {corr:.3f}")


# ---------- Run ----------
# Replace with your actual paths
news_file = r"C:\Users\HP\Downloads\raw_analyst_ratings.csv\raw_analyst_ratings.csv"
stock_dir = r"C:\Users\HP\Downloads\yfinance_data\yfinance_data"

analyze_multiple_stocks(news_file, stock_dir)


📰 News Columns: ['Unnamed: 0', 'headline', 'url', 'publisher', 'date', 'stock']
📈 Processing C:\Users\HP\Downloads\yfinance_data\yfinance_data\AAPL_historical_data.csv
📊 Stock Columns: ['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'Dividends', 'Stock Splits']
📈 Processing C:\Users\HP\Downloads\yfinance_data\yfinance_data\AMZN_historical_data.csv
📊 Stock Columns: ['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'Dividends', 'Stock Splits']
📈 Processing C:\Users\HP\Downloads\yfinance_data\yfinance_data\GOOG_historical_data.csv
📊 Stock Columns: ['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'Dividends', 'Stock Splits']
📈 Processing C:\Users\HP\Downloads\yfinance_data\yfinance_data\META_historical_data.csv
📊 Stock Columns: ['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'Dividends', 'Stock Splits']
📈 Processing C:\Users\HP\Downloads\yfinance_data\yfinance_data\MSFT_historical_data.csv
📊 Stock Columns: ['Date', 'Open', 'High'

In [2]:
pip install textblob


Collecting textblob
  Downloading textblob-0.19.0-py3-none-any.whl.metadata (4.4 kB)
Collecting nltk>=3.9 (from textblob)
  Downloading nltk-3.9.1-py3-none-any.whl.metadata (2.9 kB)
Collecting regex>=2021.8.3 (from nltk>=3.9->textblob)
  Downloading regex-2024.11.6-cp312-cp312-win_amd64.whl.metadata (41 kB)
Downloading textblob-0.19.0-py3-none-any.whl (624 kB)
   ---------------------------------------- 0.0/624.3 kB ? eta -:--:--
   ---------------------------------------- 0.0/624.3 kB ? eta -:--:--
   --------------------------------- ------ 524.3/624.3 kB 2.1 MB/s eta 0:00:01
   ---------------------------------------- 624.3/624.3 kB 1.8 MB/s eta 0:00:00
Downloading nltk-3.9.1-py3-none-any.whl (1.5 MB)
   ---------------------------------------- 0.0/1.5 MB ? eta -:--:--
   ------ --------------------------------- 0.3/1.5 MB ? eta -:--:--
   -------------------- ------------------- 0.8/1.5 MB 2.4 MB/s eta 0:00:01
   ---------------------------------------- 1.5/1.5 MB 2.4 MB/s eta 0:00

In [5]:
!python -m textblob.download_corpora


Finished.


[nltk_data] Downloading package brown to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\brown.zip.
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt_tab.zip.
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping taggers\averaged_perceptron_tagger_eng.zip.
[nltk_data] Downloading package conll2000 to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\conll2000.zip.
[nltk_data] Downloading package movie_reviews to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping corpora\movie_reviews.zip.
