<a href="https://colab.research.google.com/github/ishaanpaul98/Sentiment-Analysis/blob/main/Sentiment_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports #

In [188]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
import re

from bs4 import BeautifulSoup
from urllib.request import urlopen, Request
from nltk.sentiment.vader import SentimentIntensityAnalyzer

import datetime
import yfinance as yf
import time
import requests
import io

In [189]:
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

In [190]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier

In [191]:
from sklearn.metrics import accuracy_score,precision_score,recall_score,confusion_matrix,roc_curve,classification_report
from sklearn.metrics import plot_confusion_matrix

# Stock Data Helper Functions #

In [192]:
def arrayToString(arr):
    print("Starting array to list")
    listToStr = ' '.join([str(elem) for elem in arr])
    return listToStr


In [193]:
def getStockDataDaily(symbols, day):
    symbols = arrayToString(symbols)
    print("Getting stock data for stock $"+symbols)
    df = yf.download(symbols, start=day, period = "1d")
    return df

getStockDataDaily(['AAPL', 'TSLA'], "2022-12-20")

Starting array to list
Getting stock data for stock $AAPL TSLA
[*********************100%***********************]  2 of 2 completed


Unnamed: 0_level_0,Adj Close,Adj Close,Close,Close,High,High,Low,Low,Open,Open,Volume,Volume
Unnamed: 0_level_1,AAPL,TSLA,AAPL,TSLA,AAPL,TSLA,AAPL,TSLA,AAPL,TSLA,AAPL,TSLA
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
2022-12-20,132.300003,137.800003,132.300003,137.800003,133.25,148.470001,129.889999,137.660004,131.389999,146.050003,77256200,158881700


In [194]:
def getStockClose(symbol):
    print("Getting stock close for stock $"+symbol)
    ticker = yf.Ticker(symbol)
    data = ticker.history()
    #print(data)
    last_price = data['Close'].iloc[-1]
    print(symbol, last_price)
    return last_price

getStockClose("AAPL")

Getting stock close for stock $AAPL
AAPL 132.3000030517578


132.3000030517578

In [195]:
def getStockOpen(symbol):
    print("Getting stock open for stock $"+symbol)
    ticker = yf.Ticker(symbol)
    data = ticker.history()
    #print(data)
    last_price = data['Open'].iloc[-1]
    print(symbol, last_price)
    return last_price

getStockOpen("AAPL")

Getting stock open for stock $AAPL
AAPL 131.38999938964844


131.38999938964844

In [196]:
def getStockHigh(symbol):
    print("Getting stock high for stock $"+symbol)
    ticker = yf.Ticker(symbol)
    data = ticker.history()
    #print(data)
    last_price = data['High'].iloc[-1]
    print(symbol, last_price)
    return last_price

getStockHigh("AAPL")

Getting stock high for stock $AAPL
AAPL 133.25


133.25

In [197]:
def getStockLow(symbol):
    print("Getting stock low for stock $"+symbol)
    ticker = yf.Ticker(symbol)
    data = ticker.history()
    #print(data)
    last_price = data['Low'].iloc[-1]
    print(symbol, last_price)
    return last_price

getStockLow("AAPL")

Getting stock low for stock $AAPL
AAPL 129.88999938964844


129.88999938964844

# Gathering FinViz Data #

In [198]:
# Parameters 
n = 3 #the # of article headlines displayed per ticker
tickers = ['AAPL', 'TSLA', 'AMZN']

In [199]:
# Get Data
finwiz_url = 'https://finviz.com/quote.ashx?t='
news_tables = {}

for ticker in tickers:
    url = finwiz_url + ticker
    print("current url is: " +url)
    header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36."}
    req = Request(url=url,headers=header) 
    resp = urlopen(req)    
    html = BeautifulSoup(resp, features="lxml")
    news_table = html.find(id='news-table')
    news_tables[ticker] = news_table

try:
    for ticker in tickers:
        df = news_tables[ticker]
        df_tr = df.findAll('tr')
    
        print ('\n')
        print ('Recent News Headlines for {}: '.format(ticker))
        
        for i, table_row in enumerate(df_tr):
            a_text = table_row.a.text
            td_text = table_row.td.text
            td_text = td_text.strip()
            print(a_text,'(',td_text,')')
            if i == n-1:
                break
except KeyError:
    pass

current url is: https://finviz.com/quote.ashx?t=AAPL
current url is: https://finviz.com/quote.ashx?t=TSLA
current url is: https://finviz.com/quote.ashx?t=AMZN


Recent News Headlines for AAPL: 
Apple (AAPL) Stock Sinks As Market Gains: What You Should Know ( Dec-20-22 05:45PM )
Apple Partially Shifts Macbook Production to Vietnam ( 04:33PM )
How big tech defeated the biggest antitrust push in decades on Capitol Hill ( 04:11PM )


Recent News Headlines for TSLA: 
Dow Jones Futures Rise On Nike As Market Rally Snaps Losing Streak; Tesla Meltdown Continues ( Dec-20-22 09:52PM )
Musk Will Resign as Twitter CEO and Focus on Engineering Teams ( 09:24PM )
Elon Musk says hell step down as Twitter CEO when he finds someone foolish enough to take the job ( 09:15PM )


Recent News Headlines for AMZN: 
Wells Fargo, Amazon Look to Move On ( Dec-20-22 07:04PM )
USPS will electrify entire mail-delivery fleet within just a few years ( 04:21PM )
Pharmacies limit the purchase of kids' meds. How bad are 

In [200]:
# Iterate through the news
parsed_news = []
for file_name, news_table in news_tables.items():
    for x in news_table.findAll('tr'):
        print(x.get_text())
        text = x.get_text() 
        date_scrape = x.td.text.split()

        if len(date_scrape) == 1:
            time = date_scrape[0]
            
        else:
            date = date_scrape[0]
            time = date_scrape[1]

        ticker = file_name.split('_')[0]
        
        parsed_news.append([ticker, date, time, text ])

Dec-20-22 05:45PMApple (AAPL) Stock Sinks As Market Gains: What You Should Know Zacks
04:33PMApple Partially Shifts Macbook Production to Vietnam Investopedia
04:11PMHow big tech defeated the biggest antitrust push in decades on Capitol Hill LA Times
12:03PMAmazon settles with the EU over antitrust charges Yahoo Finance Video
09:49AM12 Best Stocks To Invest In Right Now Insider Monkey

09:43AM
Loading…

09:43AMNo love for meme stocks  stock-trading kids are loading up on Apple and index funds MarketWatch
09:41AMStocks mixed at open, Apple and Tesla move lower Yahoo Finance Video
09:31AMTarget, Amazon and 4 More Retailers That Will Reward You for Turning in Your Old Stuff GOBankingRates
09:09AMApple files for more construction at NW Austin campus American City Business Journals
09:08AMTwitter Rolls Out 'Blue For Business,' Visa Pitches Auto-Payments On Ethereum Blockchain, Justin Bieber Slams H&M: Today's Top Stories Benzinga
09:01AM1 Top Stock to Buy for 2023 and Beyond Motley Fool
08:

# Sentiment Analysis of FinViz data #

In [201]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Ishaan\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [202]:
# View Data 
news['Date'] = pd.to_datetime(news.Date).dt.date

unique_ticker = news['Ticker'].unique().tolist()
news_dict = {name: news.loc[news['Ticker'] == name] for name in unique_ticker}

values = []
for ticker in tickers: 
    dataframe = news_dict[ticker]
    dataframe = dataframe.set_index('Ticker')
    dataframe = dataframe.drop(columns = ['Headline'])
    print ('\n')
    print (dataframe.head())
    
    mean = round(dataframe['compound'].mean(), 2)
    values.append(mean)
    
df = pd.DataFrame(list(zip(tickers, values)), columns =['Ticker', 'Mean Sentiment']) 
df = df.set_index('Ticker')
df = df.sort_values('Mean Sentiment', ascending=False)
print ('\n')
print (df)



              Date     Time    neg    neu    pos  compound
Ticker                                                    
AAPL    2022-12-18  10:21AM  0.331  0.669  0.000   -0.7125
AAPL    2022-12-18  10:00AM  0.153  0.847  0.000   -0.4404
AAPL    2022-12-18  08:30AM  0.000  1.000  0.000    0.0000
AAPL    2022-12-18  07:50AM  0.157  0.703  0.141   -0.0772
AAPL    2022-12-18  07:30AM  0.195  0.805  0.000   -0.4404


              Date     Time    neg    neu    pos  compound
Ticker                                                    
TSLA    2022-12-18  06:15PM  0.163  0.837  0.000   -0.5106
TSLA    2022-12-18  05:01PM  0.000  0.737  0.263    0.3612
TSLA    2022-12-18  02:09PM  0.299  0.701  0.000   -0.7506
TSLA    2022-12-18  02:05PM  0.369  0.631  0.000   -0.8779
TSLA    2022-12-18  12:21PM  0.206  0.794  0.000   -0.3818


              Date     Time    neg    neu    pos  compound
Ticker                                                    
AMZN    2022-12-18  06:11PM  0.091  0.909  0.000  

In [203]:
#Testing helper functions from stock_helper_functions.ipynb
aapl_open = getStockOpen("AAPL")

Getting stock open for stock $AAPL
AAPL 131.38999938964844


# Creating Dataset #

In [204]:
#Defining a new dataframe called 'dataset'
dataset = pd.DataFrame()

In [216]:
#Pulling stock data and adding stock data to dataframe
daily_stock = getStockDataDaily(tickers, '2022-12-20')
#dataset = pd.concat([dataset, daily_stock])
print(daily_stock.Close)

Starting array to list
Getting stock data for stock $AAPL TSLA AMZN
[*********************100%***********************]  3 of 3 completed
                  AAPL       AMZN        TSLA
Date                                         
2022-12-20  132.300003  85.190002  137.800003


In [206]:
#Get sentimenent from previous analysis and adding to dataset
dataset['sentiment'] = df
print(dataset)

        sentiment
Ticker           
AAPL        -0.05
TSLA        -0.10
AMZN        -0.11
