<a href="https://colab.research.google.com/github/ishaanpaul98/Sentiment-Analysis/blob/main/Sentiment_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports #

In [407]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from wordcloud import WordCloud
import re

from bs4 import BeautifulSoup
from urllib.request import urlopen, Request
from nltk.sentiment.vader import SentimentIntensityAnalyzer

import datetime
import yfinance as yf
import time
import requests
import io

In [408]:
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

In [409]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier

In [410]:
from sklearn.metrics import accuracy_score,precision_score,recall_score,confusion_matrix,roc_curve,classification_report
from sklearn.metrics import plot_confusion_matrix

# Stock Data Helper Functions #

In [411]:
def arrayToString(arr):
    print("Starting array to list")
    listToStr = ' '.join([str(elem) for elem in arr])
    return listToStr


In [412]:
def getStockDataDaily(symbols, day):
    symbols = arrayToString(symbols)
    print("Getting stock data for stock $"+symbols)
    df = yf.download(symbols, start=day, period = "1d", group_by='ticker')
    return df

getStockDataDaily(['AAPL', 'TSLA'], "2022-12-21")

Starting array to list
Getting stock data for stock $AAPL TSLA
[*********************100%***********************]  2 of 2 completed


Unnamed: 0_level_0,TSLA,TSLA,TSLA,TSLA,TSLA,TSLA,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL
Unnamed: 0_level_1,Open,High,Low,Close,Adj Close,Volume,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
2022-12-21,139.339996,141.259995,135.889999,137.570007,137.570007,145051100,132.979996,136.809998,132.75,135.449997,135.449997,85859600


In [413]:
def getStockClose(symbol):
    print("Getting stock close for stock $"+symbol)
    ticker = yf.Ticker(symbol)
    data = ticker.history()
    #print(data)
    last_price = data['Close'].iloc[-1]
    print(symbol, last_price)
    return last_price

getStockClose("AAPL")

Getting stock close for stock $AAPL
AAPL 135.4499969482422


135.4499969482422

In [414]:
def getStockOpen(symbol):
    print("Getting stock open for stock $"+symbol)
    ticker = yf.Ticker(symbol)
    data = ticker.history()
    #print(data)
    last_price = data['Open'].iloc[-1]
    print(symbol, last_price)
    return last_price

getStockOpen("AAPL")

Getting stock open for stock $AAPL
AAPL 132.97999572753906


132.97999572753906

In [415]:
def getStockHigh(symbol):
    print("Getting stock high for stock $"+symbol)
    ticker = yf.Ticker(symbol)
    data = ticker.history()
    #print(data)
    last_price = data['High'].iloc[-1]
    print(symbol, last_price)
    return last_price

getStockHigh("AAPL")

Getting stock high for stock $AAPL
AAPL 136.80999755859375


136.80999755859375

In [416]:
def getStockLow(symbol):
    print("Getting stock low for stock $"+symbol)
    ticker = yf.Ticker(symbol)
    data = ticker.history()
    #print(data)
    last_price = data['Low'].iloc[-1]
    print(symbol, last_price)
    return last_price

getStockLow("AAPL")

Getting stock low for stock $AAPL
AAPL 132.75


132.75

# Gathering FinViz Data #

In [417]:
# Parameters 
n = 3 #the # of article headlines displayed per ticker
tickers = ['AAPL', 'TSLA', 'AMZN']

In [418]:
# Get Data
finwiz_url = 'https://finviz.com/quote.ashx?t='
news_tables = {}

for ticker in tickers:
    url = finwiz_url + ticker
    print("current url is: " +url)
    header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36."}
    req = Request(url=url,headers=header) 
    resp = urlopen(req)    
    html = BeautifulSoup(resp, features="lxml")
    news_table = html.find(id='news-table')
    news_tables[ticker] = news_table

try:
    for ticker in tickers:
        df = news_tables[ticker]
        df_tr = df.findAll('tr')
    
        print ('\n')
        print ('Recent News Headlines for {}: '.format(ticker))
        
        for i, table_row in enumerate(df_tr):
            a_text = table_row.a.text
            td_text = table_row.td.text
            td_text = td_text.strip()
            print(a_text,'(',td_text,')')
            if i == n-1:
                break
except KeyError:
    pass

current url is: https://finviz.com/quote.ashx?t=AAPL
current url is: https://finviz.com/quote.ashx?t=TSLA
current url is: https://finviz.com/quote.ashx?t=AMZN


Recent News Headlines for AAPL: 
Why Amazon, Apple, and Chewy Stocks All Rallied on Wednesday ( Dec-21-22 06:37PM )
Apple to launch first AR headset in 2023 ( 04:56PM )
Tech stocks: The 'best' and worst performers of 2022 ( 04:18PM )


Recent News Headlines for TSLA: 
Analysis-Short sellers gain nearly $304 billion after tumble in U.S. stocks ( Dec-22-22 01:04AM )
Tesla offers discount on some car models in U.S., Canada ( 12:11AM )
Dow Jones Futures: S&P 500 Regains Key Level, But Stay Cautious; Tesla Doubles U.S. Discount ( Dec-21-22 10:04PM )


Recent News Headlines for AMZN: 
Why Amazon, Apple, and Chewy Stocks All Rallied on Wednesday ( Dec-21-22 06:37PM )
How the EU Took On Big Tech in 2022 ( 03:52PM )
Justin Bieber the latest major act to sell music rights in reported $200 million deal ( 03:39PM )


In [419]:
# Iterate through the news
parsed_news = []
for file_name, news_table in news_tables.items():
    for x in news_table.findAll('tr'):
        print(x.get_text())
        text = x.get_text() 
        date_scrape = x.td.text.split()

        if len(date_scrape) == 1:
            time = date_scrape[0]
            
        else:
            date = date_scrape[0]
            time = date_scrape[1]

        ticker = file_name.split('_')[0]
        
        parsed_news.append([ticker, date, time, text ])

Dec-21-22 06:37PMWhy Amazon, Apple, and Chewy Stocks All Rallied on Wednesday Motley Fool
04:56PMApple to launch first AR headset in 2023 Yahoo Finance Video
04:18PMTech stocks: The 'best' and worst performers of 2022 Yahoo Finance
03:52PMHow the EU Took On Big Tech in 2022 WSJ
03:39PMJustin Bieber the latest major act to sell music rights in reported $200 million deal Yahoo Finance

03:16PM
Loading…

03:16PMApple's Regulatory Risks Led Pullback Likely To Render A Buying Opportunity, Analyst Says Benzinga
02:51PMTeslas stock drop has been bad. But this company has wiped out more investor wealth in 2022. MarketWatch
02:25PM5 Things You Might Not Know About Apple CEO Tim Cook Benzinga
12:47PMApple Stock Has Struggled This Year. The Case for a Better Next Year. Barrons.com
12:00PM2 Growth Stocks to Buy in December Motley Fool
11:07AMApple Stock Has Struggled in 2022. But Analyst Sees Resilient Demand for Tech Giants Products Next Year. Barrons.com
10:44AMTech stocks had a rough year. Appl

# Sentiment Analysis of FinViz data #

In [420]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Ishaan\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [421]:
# View Data 
news['Date'] = pd.to_datetime(news.Date).dt.date

unique_ticker = news['Ticker'].unique().tolist()
news_dict = {name: news.loc[news['Ticker'] == name] for name in unique_ticker}

values = []
for ticker in tickers: 
    dataframe = news_dict[ticker]
    dataframe = dataframe.set_index('Ticker')
    dataframe = dataframe.drop(columns = ['Headline'])
    print ('\n')
    print (dataframe.head())
    
    mean = round(dataframe['compound'].mean(), 2)
    values.append(mean)
    
df = pd.DataFrame(list(zip(tickers, values)), columns =['Ticker', 'Mean Sentiment']) 
df = df.set_index('Ticker')
df = df.sort_values('Mean Sentiment', ascending=False)
print ('\n')
print (df)



              Date     Time    neg    neu    pos  compound
Ticker                                                    
AAPL    2022-12-18  10:21AM  0.331  0.669  0.000   -0.7125
AAPL    2022-12-18  10:00AM  0.153  0.847  0.000   -0.4404
AAPL    2022-12-18  08:30AM  0.000  1.000  0.000    0.0000
AAPL    2022-12-18  07:50AM  0.157  0.703  0.141   -0.0772
AAPL    2022-12-18  07:30AM  0.195  0.805  0.000   -0.4404


              Date     Time    neg    neu    pos  compound
Ticker                                                    
TSLA    2022-12-18  06:15PM  0.163  0.837  0.000   -0.5106
TSLA    2022-12-18  05:01PM  0.000  0.737  0.263    0.3612
TSLA    2022-12-18  02:09PM  0.299  0.701  0.000   -0.7506
TSLA    2022-12-18  02:05PM  0.369  0.631  0.000   -0.8779
TSLA    2022-12-18  12:21PM  0.206  0.794  0.000   -0.3818


              Date     Time    neg    neu    pos  compound
Ticker                                                    
AMZN    2022-12-18  06:11PM  0.091  0.909  0.000  

In [422]:
#Testing helper functions from stock_helper_functions.ipynb
aapl_open = getStockOpen("AAPL")

Getting stock open for stock $AAPL
AAPL 132.97999572753906


# Creating Dataset #

In [423]:
#Defining a new dataframe called 'dataset'
dataset = pd.DataFrame()

In [424]:
#Pulling stock data
daily_stock = getStockDataDaily(tickers, '2022-12-21')
print(daily_stock)

Starting array to list
Getting stock data for stock $AAPL TSLA AMZN
[*********************100%***********************]  3 of 3 completed
                  TSLA                                                  \
                  Open        High         Low       Close   Adj Close   
Date                                                                     
2022-12-21  139.339996  141.259995  135.889999  137.570007  137.570007   

                        AMZN                                              \
               Volume   Open       High        Low      Close  Adj Close   
Date                                                                       
2022-12-21  145051100  86.18  87.230003  85.209999  86.769997  86.769997   

                            AAPL                                              \
              Volume        Open        High     Low       Close   Adj Close   
Date                                                                           
2022-12-21  59185400 

In [448]:
#for i in tickers:
    #df_individual = daily_stock[i]
    #print(df_individual)
daily_stock.index[0]

('AAPL', 'Adj Close', Timestamp('2022-12-21 00:00:00'))