In [1]:
#This project requires you to download yfinance, BeautifulSoup, Matplotlib & NLTK packages. 
#Incase you don't have them installed, type the following commands in your python terminal - (python version 3.7).
#pip install yfinance
#pip install beautifulsoup4
#pip install nltk
#pip install matplotlib

In [2]:
#importing libraries
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import nltk
#Uncomment the line below if your system does not have 'vader-lexicon' installed
#nltk.download('vader_lexicon')

In [3]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf

In [4]:
#Extracting financial news from finviz
finviz_url = 'https://finviz.com/quote.ashx?t='

In [5]:
# List of ticker - AMZN - Amazon, FB - Facebook, GOOG - Google, AAPL - Apple, MSFT - Microsoft, TSLA - Tesla
tickers = ['AMZN','FB', 'GOOG' 'AAPL', 'MSFT', 'TSLA']

In [6]:
#Empty Dictionary
news_tables = {}

In [7]:
#Requesting data 
for ticker in tickers:
    url = finviz_url + ticker
    #Faking a user agent so that python requests are not blocked   
    req = Request(url = url, headers = {'user-agent':'my-app/0.0.1'})
    response = urlopen(req)

    #Parsing HTML documents using Beautiful Soup
    html = BeautifulSoup(response,'html')
    
    #Saving the News Table in a dictionary
    news_table = html.find(id = 'news-table')
    news_tables[ticker] = news_table

HTTPError: HTTP Error 404: Not Found

In [None]:
#Test Run
#fb_data = news_table['FB']
#fb_rows = fb_data.findAll('tr')

In [None]:
#for index, row in enumerate(fb_rows):
    #title = row.a.text()
    #timestamp = row.td.text
    #print(timestamp + ' ' + title)

In [None]:
#Empty List
parsed_data=[]

In [None]:
#Decomposing the dictionary 4 into relevant items - Ticker, Date,Time, News Title
for ticker, news_table in news_tables.items():
    for row in news_table.findAll('tr'):
        title = row.a.get_text()
        #Splitting date after every space
        date_data = row.td.text.split(' ')
        
        #Check if date_data has two elements (date + time) or just 1 element (time)
        if len(date_data)==1:
            time=date_data[0]
        else:
            date = date_data[0]
            time = date_data[1]
        #Appending 
        parsed_data.append([ticker, date, time, title])

In [None]:
#Converting list into pandas dataframe
df = pd.DataFrame(parsed_data,columns=['Ticker', 'Date', 'Time', 'Title'])

In [None]:
df.head()

In [None]:
#Calling Sentiment Intensity Analyzer Function & assigning it to variable 'vader'
vader = SentimentIntensityAnalyzer()

In [None]:
#Function 'f' to calculate polarity scores of News Title
#Interested only in compound Scores
f = lambda title: vader.polarity_scores(title)['compound']
df['Compound'] = df['Title'].apply(f)

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
#Converting Date column (string) data into Date format
df['Date'] = pd.to_datetime(df.Date).dt.date

In [None]:
#Creating a new dataframe using Groupby function & storing mean of compound scores
mean_df = df.groupby(['Ticker','Date']).mean()

In [None]:
mean_df.head()

In [None]:
mean_df.tail()

In [None]:
#convert the dataframe into unstacked format.
mean_df = mean_df.unstack()

#Transposing the matrix:
#1. Ticker Row is converted to Columns
#2. The xs() function is used to get cross-section from the Series/DataFrame.
#This method takes a key argument to select data at a particular level of a MultiIndex.

mean_df = mean_df.xs('Compound',axis='columns').transpose()
mean_df.head()

In [None]:
#Plot Compound Scores of each stock into a Bar chart
mean_df.plot(kind='bar',figsize=(18,7), grid=True)
plt.show()

In [None]:
#Downloading historical data using yfinance. We are interested only in 'Adjusted Closing Price'
df2 = pd.DataFrame()
for ticker in tickers:
    df2[ticker] = yf.download(ticker,start = '2021-02-25')['Adj Close']

In [None]:
df2.head()

In [None]:
df2_returns = df2.pct_change()
df2_returns.tail()