In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from textblob import TextBlob

# Define the ticker and the time period
ticker = 'AAPL'
start_date = '2020-01-01'
end_date = '2023-01-01'

# Fetch the stock data
aapl_data = yf.download(ticker, start=start_date, end=end_date)


[*********************100%***********************]  1 of 1 completed


In [None]:
aapl_data.head()

Unnamed: 0_level_0,Adj Close,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-02,72.796013,75.087502,75.150002,73.797501,74.059998,135480400
2020-01-03,72.088295,74.357498,75.144997,74.125,74.287498,146322800
2020-01-06,72.66272,74.949997,74.989998,73.1875,73.447502,118387200
2020-01-07,72.320984,74.597504,75.224998,74.370003,74.959999,108872000
2020-01-08,73.484352,75.797501,76.110001,74.290001,74.290001,132079200


In [None]:
aapl_data.columns = ['Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']

aapl_data.columns

Index(['Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume'], dtype='object')

In [None]:
# Create a DataFrame with dates from the AAPL data
dates = aapl_data.index
text_data = pd.DataFrame(dates, columns=['Date'])


In [None]:
# Generate random headlines
headline_samples = [
    "AAPL hits record high",
    "Concerns over AAPL's future growth",
    "AAPL to unveil new product next month",
    "AAPL reports earnings that exceed forecasts",
    "Market downturn affects AAPL",
    "AAPL invests in renewable energy",
    "New AAPL CEO announced",
    "AAPL faces regulatory scrutiny",
    "AAPL rumored to acquire a tech startup",
    "AAPL's market share grows"
]

# Assign a random headline to each date
text_data['Headline'] = np.random.choice(headline_samples, size=len(text_data))

# Display the first few rows of the updated DataFrame
text_data.head()

Unnamed: 0,Date,Headline
0,2020-01-02,AAPL to unveil new product next month
1,2020-01-03,AAPL invests in renewable energy
2,2020-01-06,AAPL faces regulatory scrutiny
3,2020-01-07,Market downturn affects AAPL
4,2020-01-08,AAPL's market share grows


In [None]:
text_data.head(30)

Unnamed: 0,Date,Headline,Sentiment
0,2020-01-02,AAPL to unveil new product next month,0.068182
1,2020-01-03,AAPL invests in renewable energy,0.0
2,2020-01-06,AAPL faces regulatory scrutiny,0.0
3,2020-01-07,Market downturn affects AAPL,0.0
4,2020-01-08,AAPL's market share grows,0.0
5,2020-01-09,New AAPL CEO announced,0.136364
6,2020-01-10,New AAPL CEO announced,0.136364
7,2020-01-13,AAPL to unveil new product next month,0.068182
8,2020-01-14,AAPL reports earnings that exceed forecasts,0.0
9,2020-01-15,Market downturn affects AAPL,0.0


In [None]:
def calculate_sentiment(text):
    return TextBlob(text).sentiment.polarity

# Applying the sentiment analysis function to your headlines
text_data['Sentiment'] = text_data['Headline'].apply(calculate_sentiment)

In [None]:
text_data.head()

Unnamed: 0,Date,Headline,Sentiment
0,2020-01-02,AAPL to unveil new product next month,0.068182
1,2020-01-03,AAPL invests in renewable energy,0.0
2,2020-01-06,AAPL faces regulatory scrutiny,0.0
3,2020-01-07,Market downturn affects AAPL,0.0
4,2020-01-08,AAPL's market share grows,0.0


In [None]:
# Aligning sentiment scores with the stock closing prices on the same date
combined_data = text_data.set_index('Date').join(aapl_data['Close'])
combined_data.head()

Unnamed: 0_level_0,Headline,Sentiment,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2020-01-02,AAPL to unveil new product next month,0.068182,75.087502
2020-01-03,AAPL invests in renewable energy,0.0,74.357498
2020-01-06,AAPL faces regulatory scrutiny,0.0,74.949997
2020-01-07,Market downturn affects AAPL,0.0,74.597504
2020-01-08,AAPL's market share grows,0.0,75.797501


In [None]:
combined_data['Daily Returns'] = combined_data['Close'].pct_change()


In [None]:
combined_data.head()

Unnamed: 0_level_0,Headline,Sentiment,Close,Daily Returns
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-01-02,AAPL to unveil new product next month,0.068182,75.087502,
2020-01-03,AAPL invests in renewable energy,0.0,74.357498,-0.009722
2020-01-06,AAPL faces regulatory scrutiny,0.0,74.949997,0.007968
2020-01-07,Market downturn affects AAPL,0.0,74.597504,-0.004703
2020-01-08,AAPL's market share grows,0.0,75.797501,0.016086


In [None]:
correlation = combined_data['Sentiment'].corr(combined_data['Daily Returns'])
print("Correlation between sentiment and daily stock returns:", correlation)


Correlation between sentiment and daily stock returns: 0.0024608839113656527


In [None]:
correlation = combined_data['Sentiment'].corr(combined_data['Close'])
print("Correlation between sentiment and closing:", correlation)

Correlation between sentiment and closing: 0.03911582497448469
