In [None]:
# IMPORTING LIBARIES
from urllib import response
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import pandas as pd 
import matplotlib.pyplot as plt

In [None]:
# GETTING HTML CODE FROM WEBSITE
finVizUrl = 'https://finviz.com/quote.ashx?t=' # general url format
tickers = ['AMZN', 'GOOG', 'AAPL'] # list of tickers

newsTables = {} # create new dictionary to store all news articles

for ticker in tickers:
    url = finVizUrl + ticker # complete URL for each ticker

    req = Request(url = url, headers = {'user-agent': 'my-app'})
    response = urlopen(req)

    html = BeautifulSoup(response, 'html') # parse through html code in response (url)
    newsTable = html.find(id = 'news-table') # gets html object of the whole table that stores the news articles for each ticker
    newsTables[ticker] = newsTable # store table of news articles for each ticker in the correct index

In [None]:
# GETTING NEWSPAPER TITLES
parsedData = [] # array to store all data

for ticker, newsTable in newsTables.items():
    for row in newsTable.findAll('tr'): # find all inside tag <tr>
        title = row.a.get_text() # text inside <tr> is the newspaper headline
        dateData = row.td.text.split(' ') # split headline obtained at each space

        if len(dateData) == 1: # check formatting of dateTime of the news article
            time = dateData[0] # if dateTime only has one 'part', only have time data
        else:
            date = dateData[0] # dateTime has a date portion
            time = dateData[1] # dateTime has a time portion
        
        parsedData.append([ticker, date, time, title]) # store all data in array


df = pd.DataFrame(parsedData, columns = ['ticker', 'date', 'time', 'title']) # creating dataframe to store data 

In [None]:
# GETTING SENTIMENT
vader = SentimentIntensityAnalyzer() # initialzie vader as sentiment intensity analyzer method

f = lambda title: vader.polarity_scores(title)['compound'] # lambda function to return compound score of sentiment analysis for each title
df['compound'] = df['title'].apply(f) # apply lambda function, f, on every title and store result in column, 'compound'
df['date'] = pd.to_datetime(df.date).dt.date # converts date column Strings to datetime format

plt.figure(figsize = (10, 8)) # setting up a figure of dimensions (10, 8)

meanDf = df.groupby(['ticker', 'date']).mean() # calculating average of compound score per (level 1) ticker, and (level 2) date
meanDf = meanDf.unstack() # unstacking data - put dates on x-axis
meanDf = meanDf.xs('compound', axis = "columns").transpose() # take cross section of compound to remove 'label' and transpose (setting up 'table')

meanDf.plot(kind = "bar") # plot bar graph
plt.show() # show the bar graph  