In [12]:
# Import libraries
import pandas as pd
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
from urllib.request import urlopen, Request
from nltk.sentiment.vader import SentimentIntensityAnalyzer

In [13]:
# Parameters 
n = 3 #the # of article headlines displayed per ticker
tickers = ['AAPL', 'TSLA', 'AMZN']

In [26]:
# Get Data
finwiz_url = 'https://finviz.com/quote.ashx?t='
news_tables = {}

for ticker in tickers:
    url = finwiz_url + ticker
    print("current url is: " +url)
    header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36."}
    req = Request(url=url,headers=header) 
    resp = urlopen(req)    
    html = BeautifulSoup(resp, features="lxml")
    news_table = html.find(id='news-table')
    news_tables[ticker] = news_table

try:
    for ticker in tickers:
        df = news_tables[ticker]
        df_tr = df.findAll('tr')
    
        print ('\n')
        print ('Recent News Headlines for {}: '.format(ticker))
        
        for i, table_row in enumerate(df_tr):
            a_text = table_row.a.text
            td_text = table_row.td.text
            td_text = td_text.strip()
            print(a_text,'(',td_text,')')
            if i == n-1:
                break
except KeyError:
    pass

current url is: https://finviz.com/quote.ashx?t=AAPL
current url is: https://finviz.com/quote.ashx?t=TSLA
current url is: https://finviz.com/quote.ashx?t=AMZN


Recent News Headlines for AAPL: 
Printing a_text: Dow Jones Jumps Again On Fed Hopes; Twitter Stock Skyrockets As Tesla's Elon Musk Gives In
Dow Jones Jumps Again On Fed Hopes; Twitter Stock Skyrockets As Tesla's Elon Musk Gives In ( Oct-05-22 12:59AM )
Printing a_text: Apple suppliers moved operations closer to Cupertino amid pandemic
Apple suppliers moved operations closer to Cupertino amid pandemic ( Oct-04-22 06:22PM )
Printing a_text: Stocks moving after hours: Twitter, Amylyx Pharmaceuticals, AutoZone, Airbnb
Stocks moving after hours: Twitter, Amylyx Pharmaceuticals, AutoZone, Airbnb ( 05:03PM )


Recent News Headlines for TSLA: 
Printing a_text: Dow Jones Jumps Again On Fed Hopes; Twitter Stock Skyrockets As Tesla's Elon Musk Gives In
Dow Jones Jumps Again On Fed Hopes; Twitter Stock Skyrockets As Tesla's Elon Musk Give

In [35]:
# Iterate through the news
parsed_news = []
for file_name, news_table in news_tables.items():
    for x in news_table.findAll('tr'):
        print(x.get_text())
        text = x.get_text() 
        date_scrape = x.td.text.split()

        if len(date_scrape) == 1:
            time = date_scrape[0]
            
        else:
            date = date_scrape[0]
            time = date_scrape[1]

        ticker = file_name.split('_')[0]
        
        parsed_news.append([ticker, date, time, text ])

Oct-05-22 12:59AMDow Jones Jumps Again On Fed Hopes; Twitter Stock Skyrockets As Tesla's Elon Musk Gives In Investor's Business Daily
Oct-04-22 06:22PMApple suppliers moved operations closer to Cupertino amid pandemic American City Business Journals
05:03PMStocks moving after hours: Twitter, Amylyx Pharmaceuticals, AutoZone, Airbnb Yahoo Finance Video
04:59PMWhat EU's USB-C mandate may mean for Apple Yahoo Finance Video
04:29PMApple Discriminated Against Pro-Union Staff, NLRB Alleges Bloomberg

04:21PM
Loading…

04:21PMDow Jones Rallies; These New Cathie Wood Buys Soar; Twitter Stock Flies As Elon Musk Bites Takeover Bullet Investor's Business Daily
02:25PMEU passes law forcing Apple to change iPhone charger The Independent
01:58PMApple Loses Major Court Case TheStreet.com
11:57AMEU leaves Apple in a tangle amid fight for USB-C standardization Yahoo Finance Video
11:46AMApple Told to Ditch Unique iPhone Charger in EU The Wall Street Journal
11:22AMApple Suppliers Add Manufacturing Site

In [38]:
import nltk
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Ishaan\AppData\Roaming\nltk_data...


True

In [39]:
# Sentiment Analysis
analyzer = SentimentIntensityAnalyzer()

columns = ['Ticker', 'Date', 'Time', 'Headline']
news = pd.DataFrame(parsed_news, columns=columns)
scores = news['Headline'].apply(analyzer.polarity_scores).tolist()

df_scores = pd.DataFrame(scores)
news = news.join(df_scores, rsuffix='_right')

In [40]:
# View Data 
news['Date'] = pd.to_datetime(news.Date).dt.date

unique_ticker = news['Ticker'].unique().tolist()
news_dict = {name: news.loc[news['Ticker'] == name] for name in unique_ticker}

values = []
for ticker in tickers: 
    dataframe = news_dict[ticker]
    dataframe = dataframe.set_index('Ticker')
    dataframe = dataframe.drop(columns = ['Headline'])
    print ('\n')
    print (dataframe.head())
    
    mean = round(dataframe['compound'].mean(), 2)
    values.append(mean)
    
df = pd.DataFrame(list(zip(tickers, values)), columns =['Ticker', 'Mean Sentiment']) 
df = df.set_index('Ticker')
df = df.sort_values('Mean Sentiment', ascending=False)
print ('\n')
print (df)



              Date     Time  neg    neu    pos  compound
Ticker                                                  
AAPL    2022-10-05  12:59AM  0.0  0.872  0.128    0.4215
AAPL    2022-10-04  06:22PM  0.0  1.000  0.000    0.0000
AAPL    2022-10-04  05:03PM  0.0  1.000  0.000    0.0000
AAPL    2022-10-04  04:59PM  0.0  1.000  0.000    0.0000
AAPL    2022-10-04  04:29PM  0.0  1.000  0.000    0.0000


              Date     Time  neg    neu    pos  compound
Ticker                                                  
TSLA    2022-10-05  12:59AM  0.0  0.872  0.128    0.4215
TSLA    2022-10-04  11:39PM  0.0  0.783  0.217    0.3612
TSLA    2022-10-04  11:13PM  0.0  0.749  0.251    0.5719
TSLA    2022-10-04  11:12PM  0.0  0.761  0.239    0.5719
TSLA    2022-10-04  07:32PM  0.0  1.000  0.000    0.0000


              Date     Time    neg    neu    pos  compound
Ticker                                                    
AMZN    2022-10-04  06:39PM  0.000  0.822  0.178    0.3818
AMZN    2022-10-04 