In [1]:
# Cell 1: Import libraries and custom module
import pandas as pd
import sys
import os
sys.path.insert(0, os.path.abspath("../scripts"))
import correlation as cr
import importlib
importlib.reload(cr)

<module 'correlation' from 'c:\\Users\\HP\\Desktop\\TenX\\financial-news-dataset\\scripts\\correlation.py'>

In [2]:
stock_symbols = ['AAPL', 'AMZN', 'GOOG', 'META', 'MSFT', 'NVDA', 'TSLA']
stock_data = {}
for symbol in stock_symbols:
    stock_data[symbol] = pd.read_csv(f'../data/yfinance_data/{symbol}_historical_data.csv')

news_data = pd.read_csv('../data/raw_analyst_ratings.csv')

In [3]:
# Cell 3: Ensure the date columns are in datetime format
for symbol in stock_symbols:
    stock_data[symbol]['Date'] = pd.to_datetime(stock_data[symbol]['Date'], format='ISO8601').dt.tz_localize(None)

news_data['date'] = pd.to_datetime(news_data['date'], format='ISO8601').dt.tz_localize(None)


In [4]:
# Cell 4: Filter the stock data to match the date range of the news data
start_date = news_data['date'].min()

filtered_stock_data = {}
for symbol in stock_symbols:
    if stock_data[symbol]['Date'].dt.tz is not None:
        stock_data[symbol]['Date'] = stock_data[symbol]['Date'].dt.tz_convert(None)
    else:
        stock_data[symbol]['Date'] = stock_data[symbol]['Date'].dt.tz_localize(None)

    filtered_stock_data[symbol] = cr.filter_stock_data(stock_data[symbol], start_date)


In [9]:
# Cell 5: Calculate daily stock returns for each stock
stock_with_returns = {}
for symbol in stock_symbols:
    stock_with_returns[symbol] = cr.calculate_daily_return(filtered_stock_data[symbol])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  stock_data['daily_return'] = stock_data['Close'].pct_change()


In [6]:
# Cell 6: Perform sentiment analysis on the news dataset
news_with_sentiment = cr.analyze_news_sentiment(news_data)

In [7]:
# Cell 7: Calculate the correlation between each stock's returns and news sentiment
correlations = {}
for symbol in stock_symbols:
    correlations[symbol] = cr.calculate_correlation(stock_with_returns[symbol], news_with_sentiment)


In [8]:
# Cell 8: Output the results
for symbol, corr in correlations.items():
    print(f'Correlation between {symbol} stock returns and news sentiment: {corr}')

Correlation between AAPL stock returns and news sentiment: 0.10636486235108424
Correlation between AMZN stock returns and news sentiment: 0.042304677458744194
Correlation between GOOG stock returns and news sentiment: 0.07333158759301969
Correlation between META stock returns and news sentiment: 0.09394822795576822
Correlation between MSFT stock returns and news sentiment: 0.08792859342152523
Correlation between NVDA stock returns and news sentiment: 0.03404484640567281
Correlation between TSLA stock returns and news sentiment: 0.060215230136827516
