#### Install all required libraries

In [18]:
!pip install flask
!pip install nltk
!pip install urllib3
!pip install bs4
!pip install plotly



## Import Libraries

In [19]:
# Import libraries
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import pandas as pd
import plotly.express as px
# NLTK VADER for sentiment analysis
import nltk
nltk.downloader.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer

# for extracting data from finviz
finviz_url = 'https://finviz.com/quote.ashx?t='

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\CLOUDSEALS\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


## Get HTML News Headlines Table from FinViz

In [20]:
tickers = ['AMZN', 'TSLA', 'GOOG']
news_tables = {}
for ticker in tickers:
    url = finviz_url + ticker
    req = Request(url=url,headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'}) 
    response = urlopen(req)    
    # Read the contents of the file into 'html'
    html = BeautifulSoup(response)
    # Find 'news-table' in the Soup and load it into 'news_table'
    news_table = html.find(id='news-table')
    # Add the table to our dictionary
    news_tables[ticker] = news_table

In [21]:
def get_news(ticker):
    url = finviz_url + ticker
    req = Request(url=url,headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:20.0) Gecko/20100101 Firefox/20.0'}) 
    response = urlopen(req)    
    # Read the contents of the file into 'html'
    html = BeautifulSoup(response)
    # Find 'news-table' in the Soup and load it into 'news_table'
    news_table = html.find(id='news-table')
    return news_table

# Read one single day of headlines for 'AMZN' 
amzn = news_tables['AMZN']
# Get all the table rows tagged in HTML with <tr> into 'amzn_tr'
amzn_tr = amzn.findAll('tr')

for i, table_row in enumerate(amzn_tr):
    # Read the text of the element 'a' into 'link_text'
    a_text = table_row.a.text
    # Read the text of the element 'td' into 'data_text'
    td_text = table_row.td.text
    # Print the contents of 'link_text' and 'data_text' 
    print(a_text)
    print(td_text)
    # Exit after printing 4 rows of data
    

Is Amazon Stock a Buy Now?
Feb-09-23 06:07AM
First came fast-fashion giant Shein. Now other Chinese brands are gunning for the same success
05:00AM
Amazon's first HQ2 incentive payment delayed to at least 2026
05:00AM
Supply Chain Management Definition and 11 Biggest Companies in the World
Feb-08-23 05:33PM
4 Reasons You Should Cancel Amazon Prime
05:01PM
Why Apple, Amazon, and Meta Platforms All Fell on Microsoft's AI News
04:51PM
S&P Global and AWS to Pioneer Next Generation Technology for Global Markets
04:23PM
Earnings Roundup: Apple, Amazon, Starbucks, and More
03:12PM
As Big Techs Growth and Innovation Slow, Its Market Dominance Endures
02:27PM
Amazon Takes On Mark Cuban's Cost Plus Drugs Co. With Subscription Plan As Giants Look To Disrupt $365 Billion Prescription Market
01:57PM
2 Key Things From Amazon's Earnings Call Investors Should Know
11:29AM
Here's How Rivian Stock Dealt Amazon Its First Annual Loss Since 2014
11:00AM
Zoom and eBay join Dell, Okta, Spotify, Google, Intel

## Parse News into DataFrame

In [22]:
def parse_news(news_table):
    parsed_news = []
    
    for x in news_table.findAll('tr'):
        text = x.a.get_text() 
        date_scrape = x.td.text.split()

        if len(date_scrape) == 1:
            time = date_scrape[0]
        else:
            date = date_scrape[0]
            time = date_scrape[1]
            
        parsed_news.append([date, time, text])
        columns = ['date', 'time', 'headline']
        parsed_news_df = pd.DataFrame(parsed_news, columns=columns)
        parsed_news_df['datetime'] = pd.to_datetime(parsed_news_df['date'] + ' ' + parsed_news_df['time'])
    return parsed_news_df

## Score News Sentiment and Save Results into DataFrame

In [23]:
parsed_news_df=parse_news(news_table)

In [24]:
def score_news(parsed_news_df):
    # Instantiate the sentiment intensity analyzer
    vader = SentimentIntensityAnalyzer()
    
    # Iterate through the headlines and get the polarity scores using vader
    scores = parsed_news_df['headline'].apply(vader.polarity_scores).tolist()

    # Convert the 'scores' list of dicts into a DataFrame
    scores_df = pd.DataFrame(scores)

    # Join the DataFrames of the news and the list of dicts
    parsed_and_scored_news = parsed_news_df.join(scores_df, rsuffix='_right')        
    parsed_and_scored_news = parsed_and_scored_news.set_index('datetime')    
    parsed_and_scored_news = parsed_and_scored_news.drop(['date', 'time'], 1)          
    parsed_and_scored_news = parsed_and_scored_news.rename(columns={"compound": "sentiment_score"})

    return parsed_and_scored_news

parsed_and_scored_news = score_news(parsed_news_df)
parsed_and_scored_news.head()


In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only.



Unnamed: 0_level_0,headline,neg,neu,pos,sentiment_score
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-02-08 18:26:00,"Bret Taylor, Former Co-CEO of Salesforce, to C...",0.0,0.851,0.149,0.2732
2023-02-08 16:26:00,Google runs into technical complications durin...,0.0,1.0,0.0,0.0
2023-02-08 16:09:00,Microsofts Bing is the first threat to Googles...,0.224,0.658,0.118,-0.3818
2023-02-08 14:38:00,Why the AI stock rally isnt another meme craze...,0.0,0.772,0.228,0.3058
2023-02-08 14:27:00,"As Big Techs Growth and Innovation Slow, Its M...",0.0,0.533,0.467,0.7184


## Resample Sentiment by Hour and Date and Use Plotly to Plot It

In [25]:
def plot_hourly_sentiment(parsed_and_scored_news, ticker):
   
    # Group by date and ticker columns from scored_news and calculate the mean
    mean_scores = parsed_and_scored_news.resample('H').mean()

    # Plot a bar chart with plotly 
    fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title = ticker + ' Hourly Sentiment Scores')
    fig.show()
    
plot_hourly_sentiment(parsed_and_scored_news, ticker)

In [26]:
def plot_daily_sentiment(parsed_and_scored_news, ticker):
   
    # Group by date and ticker columns from scored_news and calculate the mean
    mean_scores = parsed_and_scored_news.resample('D').mean()

    # Plot a bar chart with plotly
    fig = px.bar(mean_scores, x=mean_scores.index, y='sentiment_score', title = ticker + ' Daily Sentiment Scores')
    fig.show()
    
plot_daily_sentiment(parsed_and_scored_news, ticker)

## Putting It All Together

In [29]:
ticker = 'AMZN'
news_table = get_news(ticker)
parsed_news_df = parse_news(news_table)
parsed_and_scored_news = score_news(parsed_news_df)
plot_hourly_sentiment(parsed_and_scored_news, ticker)
plot_daily_sentiment(parsed_and_scored_news, ticker)
#parsed_and_scored_news.head()


In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only.



In [30]:
ticker = 'TSLA'
news_table = get_news(ticker)
parsed_news_df = parse_news(news_table)
parsed_and_scored_news = score_news(parsed_news_df)
plot_hourly_sentiment(parsed_and_scored_news, ticker)
plot_daily_sentiment(parsed_and_scored_news, ticker)
#parsed_and_scored_news.head()


In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only.

