<a href="https://colab.research.google.com/github/ishaanpaul98/Sentiment-Analysis/blob/main/Sentiment_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports #

In [97]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from wordcloud import WordCloud
import re

from bs4 import BeautifulSoup
from urllib.request import urlopen, Request
from nltk.sentiment.vader import SentimentIntensityAnalyzer

import datetime
import yfinance as yf
import time
import requests
import io

In [98]:
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

In [99]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier

In [100]:
from sklearn.metrics import accuracy_score,precision_score,recall_score,confusion_matrix,roc_curve,classification_report
from sklearn.metrics import plot_confusion_matrix

# Stock Data Helper Functions #

In [101]:
def getStockDataDaily(symbol, day):
    print("Getting stock data for stock $"+symbol)
    df = yf.download(symbol, start=day, period = "1d")
    return df

getStockDataDaily('AAPL', "2022-12-21")

Getting stock data for stock $AAPL
[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-12-21,132.979996,136.809998,132.75,135.449997,135.449997,85928000
2022-12-22,134.350006,134.559998,130.300003,132.229996,132.229996,77852100
2022-12-23,130.919998,132.419998,129.639999,131.860001,131.860001,63771000


In [102]:
def arrayToString(arr):
    print("Starting array to list")
    listToStr = ' '.join([str(elem) for elem in arr])
    return listToStr


In [103]:
def getMultiStockDataDaily(symbols, day):
    if len(symbols) > 1:
        print("Length of symbols array is more than 1. STARTING ARRAYTOSTRING")
        symbols = arrayToString(symbols)
    print("Getting stock data for stock $"+symbols)
    df = yf.download(symbols, start=day, period = "1d", group_by='ticker')
    return df

getMultiStockDataDaily(['AAPL', 'TSLA'], "2022-12-21")

Length of symbols array is more than 1. STARTING ARRAYTOSTRING
Starting array to list
Getting stock data for stock $AAPL TSLA
[*********************100%***********************]  2 of 2 completed


Unnamed: 0_level_0,TSLA,TSLA,TSLA,TSLA,TSLA,TSLA,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL
Unnamed: 0_level_1,Open,High,Low,Close,Adj Close,Volume,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
2022-12-21,139.339996,141.259995,135.889999,137.570007,137.570007,145417400,132.979996,136.809998,132.75,135.449997,135.449997,85928000
2022-12-22,136.0,136.630005,122.260002,125.349998,125.349998,210090300,134.350006,134.559998,130.300003,132.229996,132.229996,77852100
2022-12-23,126.370003,128.619995,121.019997,123.150002,123.150002,166396100,130.919998,132.419998,129.639999,131.860001,131.860001,63771000


In [104]:
def getStockClose(symbol):
    print("Getting stock close for stock $"+symbol)
    ticker = yf.Ticker(symbol)
    data = ticker.history()
    #print(data)
    last_price = data['Close'].iloc[-1]
    print(symbol, last_price)
    return last_price

getStockClose("AAPL")

Getting stock close for stock $AAPL
AAPL 131.86000061035156


131.86000061035156

In [105]:
def getStockOpen(symbol):
    print("Getting stock open for stock $"+symbol)
    ticker = yf.Ticker(symbol)
    data = ticker.history()
    #print(data)
    last_price = data['Open'].iloc[-1]
    print(symbol, last_price)
    return last_price

getStockOpen("AAPL")

Getting stock open for stock $AAPL
AAPL 130.9199981689453


130.9199981689453

In [106]:
def getStockHigh(symbol):
    print("Getting stock high for stock $"+symbol)
    ticker = yf.Ticker(symbol)
    data = ticker.history()
    #print(data)
    last_price = data['High'].iloc[-1]
    print(symbol, last_price)
    return last_price

getStockHigh("AAPL")

Getting stock high for stock $AAPL
AAPL 132.4199981689453


132.4199981689453

In [107]:
def getStockLow(symbol):
    print("Getting stock low for stock $"+symbol)
    ticker = yf.Ticker(symbol)
    data = ticker.history()
    #print(data)
    last_price = data['Low'].iloc[-1]
    print(symbol, last_price)
    return last_price

getStockLow("AAPL")

Getting stock low for stock $AAPL
AAPL 129.63999938964844


129.63999938964844

# Gathering FinViz Data #

In [108]:
# Parameters 
n = 3 #the # of article headlines displayed per ticker
tickers = ['AAPL', 'TSLA', 'AMZN']

In [109]:
# Get Data
finwiz_url = 'https://finviz.com/quote.ashx?t='
news_tables = {}

for ticker in tickers:
    url = finwiz_url + ticker
    print("current url is: " +url)
    header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36."}
    req = Request(url=url,headers=header) 
    resp = urlopen(req)    
    html = BeautifulSoup(resp, features="lxml")
    news_table = html.find(id='news-table')
    news_tables[ticker] = news_table

try:
    for ticker in tickers:
        df = news_tables[ticker]
        df_tr = df.findAll('tr')
    
        print ('\n')
        print ('Recent News Headlines for {}: '.format(ticker))
        
        for i, table_row in enumerate(df_tr):
            a_text = table_row.a.text
            td_text = table_row.td.text
            td_text = td_text.strip()
            print(a_text,'(',td_text,')')
            if i == n-1:
                break
except KeyError:
    pass

current url is: https://finviz.com/quote.ashx?t=AAPL
current url is: https://finviz.com/quote.ashx?t=TSLA
current url is: https://finviz.com/quote.ashx?t=AMZN


Recent News Headlines for AAPL: 
Apple Japan hit with $98 million in back taxes- Nikkei ( Dec-26-22 01:06PM )
Apple Japan hit with $98 mln in back taxes- Nikkei ( 01:04PM )
Apple Stock: Bear vs. Bull ( 07:19AM )


Recent News Headlines for TSLA: 
Dow Jones Futures Rise: Market Rally Not Finished Yet; Tesla Shanghai Production Halted ( Dec-26-22 06:22PM )
Dow Jones Futures Loom: Market Rally Not Finished Yet; Tesla Shanghai Production Halted ( 02:42PM )
Dow Jones Futures: Market Rally Not Finished Yet; Tesla Shanghai Production Halted ( 12:48PM )


Recent News Headlines for AMZN: 
Amazon's Advertising Business Still Has Lots of Room for Growth ( Dec-26-22 09:40AM )
Better Buy: Netflix vs. Amazon ( 08:30AM )
2 Investing Lessons I Learned From the Stock Market in 2022 ( 06:14AM )


In [110]:
# Iterate through the news
parsed_news = []
for file_name, news_table in news_tables.items():
    for x in news_table.findAll('tr'):
        print(x.get_text())
        text = x.get_text() 
        date_scrape = x.td.text.split()

        if len(date_scrape) == 1:
            time = date_scrape[0]
            
        else:
            date = date_scrape[0]
            time = date_scrape[1]

        ticker = file_name.split('_')[0]
        
        parsed_news.append([ticker, date, time, text ])

Dec-26-22 01:06PMApple Japan hit with $98 million in back taxes- Nikkei Reuters
01:04PMApple Japan hit with $98 mln in back taxes- Nikkei Reuters
07:19AMApple Stock: Bear vs. Bull Motley Fool
06:01AMWarren Buffett vs. the Market: What It Means for Investors Motley Fool
05:07AMThe Media Industry Is Handing Apple and Amazon a Big Opportunity Motley Fool

05:00AM
Loading…

05:00AMCEOs from Elon Musk to Jamie Dimon fought to bring workers back to the office in 2022. Heres who wonand who lostthe great return-to-office war Fortune
Dec-25-22 11:17AM12 Most Advanced Countries in Electronics Insider Monkey
11:14AMWant Unstoppable Stocks for 2023 and Beyond? Focus on Competitive Advantages Motley Fool
09:01AMTarget, Amazon and 4 More Retailers That Will Reward You for Turning in Your Old Stuff GOBankingRates
08:10AMBetter Buy: Apple vs. Nvidia Motley Fool
06:00AMInvestors in Apple (NASDAQ:AAPL) have made a solid return of 228% over the past five years Simply Wall St.
05:30AM3 Investment Moves to

# Sentiment Analysis of FinViz data #

In [111]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Ishaan\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [112]:
# Sentiment Analysis
analyzer = SentimentIntensityAnalyzer()

columns = ['Ticker', 'Date', 'Time', 'Headline']
news = pd.DataFrame(parsed_news, columns=columns)
scores = news['Headline'].apply(analyzer.polarity_scores).tolist()

df_scores = pd.DataFrame(scores)
news = news.join(df_scores, rsuffix='_right')

In [113]:
# View Data 
#news = pd.DataFrame()
news['Date'] = pd.to_datetime(news.Date).dt.date

unique_ticker = news['Ticker'].unique().tolist()
news_dict = {name: news.loc[news['Ticker'] == name] for name in unique_ticker}

values = []
for ticker in tickers: 
    dataframe = news_dict[ticker]
    dataframe = dataframe.set_index('Ticker')
    dataframe = dataframe.drop(columns = ['Headline'])
    print ('\n')
    print (dataframe.head())
    
    mean = round(dataframe['compound'].mean(), 2)
    values.append(mean)
    
df = pd.DataFrame(list(zip(tickers, values)), columns =['Ticker', 'Mean Sentiment']) 
df = df.set_index('Ticker')
df = df.sort_values('Mean Sentiment', ascending=False)
print ('\n')
print (df)



              Date     Time    neg    neu    pos  compound
Ticker                                                    
AAPL    2022-12-26  01:06PM  0.000  1.000  0.000    0.0000
AAPL    2022-12-26  01:04PM  0.000  1.000  0.000    0.0000
AAPL    2022-12-26  07:19AM  0.326  0.674  0.000   -0.4404
AAPL    2022-12-26  06:01AM  0.209  0.791  0.000   -0.4404
AAPL    2022-12-26  05:07AM  0.177  0.549  0.274    0.1531


              Date     Time    neg    neu    pos  compound
Ticker                                                    
TSLA    2022-12-26  06:22PM  0.000  1.000  0.000    0.0000
TSLA    2022-12-26  02:42PM  0.112  0.888  0.000   -0.2263
TSLA    2022-12-26  12:48PM  0.000  1.000  0.000    0.0000
TSLA    2022-12-26  11:16AM  0.379  0.621  0.000   -0.6705
TSLA    2022-12-26  09:19AM  0.000  0.897  0.103    0.1280


              Date     Time    neg    neu    pos  compound
Ticker                                                    
AMZN    2022-12-26  09:40AM  0.176  0.667  0.158  

In [114]:
#Testing helper functions from stock_helper_functions.ipynb
aapl_open = getStockOpen("AAPL")

Getting stock open for stock $AAPL
AAPL 130.9199981689453


# Creating Dataset #

In [115]:
#Defining a new dataframe called 'dataset'
dataset = pd.DataFrame()

In [116]:
#Pulling stock data
for i in tickers:
    dataset = getStockDataDaily(i, day='2022-12-23')
    print(dataset)
    

The stock name is: $AAPL
Getting stock data for stock $AAPL
[*********************100%***********************]  1 of 1 completed
                  Open        High         Low       Close   Adj Close  \
Date                                                                     
2022-12-23  130.919998  132.419998  129.639999  131.860001  131.860001   

              Volume  
Date                  
2022-12-23  63771000  
The stock name is: $TSLA
Getting stock data for stock $TSLA
[*********************100%***********************]  1 of 1 completed
                  Open        High         Low       Close   Adj Close  \
Date                                                                     
2022-12-23  126.370003  128.619995  121.019997  123.150002  123.150002   

               Volume  
Date                   
2022-12-23  166396100  
The stock name is: $AMZN
Getting stock data for stock $AMZN
[*********************100%***********************]  1 of 1 completed
             Open       Hi