In [None]:
import nltk
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
import datetime as dt
import requests
from bs4 import BeautifulSoup
import matplotlib.dates as mdates

from nltk.sentiment import SentimentIntensityAnalyzer
from tqdm.notebook import tqdm

plt.style.use('ggplot')

## Data loading

In [None]:
ticker = 'AAPL'

news = yf.Ticker(ticker).news

news_dict = {}

for article in tqdm(news):
    # Time information
    timestamp = article['providerPublishTime']
    datetime = dt.datetime.fromtimestamp(timestamp)
    
    # News text
    response = requests.get(article['link'])
    soup = BeautifulSoup(response.text, 'html.parser')

    article_text = ""
    for paragraph in soup.find_all('p'):
        article_text += paragraph.get_text() + "\n"

    news_dict[datetime] = [
        article['title'], 
        article['link'],
        article_text,
    ]

df = pd.DataFrame(news_dict, index=['Title', 'Link', 'Text']).T

display(df)

## Sentiment score

In [None]:
sia = SentimentIntensityAnalyzer()

res = {}

for i, row in tqdm(df.iterrows(), total=len(df)):
    text = row['Text']
    myid = i
    res[myid] = sia.polarity_scores(text)

vaders = pd.DataFrame(res).T

result_df = pd.concat([df, vaders], axis=1)

display(result_df) 
    

# Compare with stock price

In [None]:

start = result_df.index.min()
end = result_df.index.max()

stock_data = yf.download(
    ticker, 
    start=start,
    end=end,
    interval='1h',
    progress=False
)[['Adj Close']]

cmap = plt.get_cmap('jet_r')
norm = plt.Normalize(result_df['compound'].min(), result_df['compound'].max()) 

fig, ax = plt.subplots(figsize=(10, 5))

stock_data.plot(xlabel='Time', ax=ax)

for i, row in result_df.iterrows():
    color = cmap(norm(row['compound']))
    ax.axvline(x=i, color=color, linestyle='--')

ax.set_title(f'{ticker} Stock Price with News Sentiment')

sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
sm.set_array([])  # Create an empty array for the colorbar
cbar = plt.colorbar(sm)
cbar.set_label('Sentiment Score')