<a href="https://colab.research.google.com/github/ishaanpaul98/Sentiment-Analysis/blob/main/Sentiment_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports #

In [1]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from wordcloud import WordCloud
import re

from bs4 import BeautifulSoup
from urllib.request import urlopen, Request
from nltk.sentiment.vader import SentimentIntensityAnalyzer

import datetime
import yfinance as yf
import time
import requests
import io

In [2]:
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

In [3]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier

In [4]:
from sklearn.metrics import accuracy_score,precision_score,recall_score,confusion_matrix,roc_curve,classification_report
from sklearn.metrics import plot_confusion_matrix

# Stock Data Helper Functions #

In [5]:
def arrayToString(arr):
    print("Starting array to list")
    listToStr = ' '.join([str(elem) for elem in arr])
    return listToStr


In [6]:
def getStockDataDaily(symbols, day):
    if len(symbols) > 1:
        symbols = arrayToString(symbols)
    print("Getting stock data for stock $"+symbols)
    df = yf.download(symbols, start=day, period = "1d", group_by='ticker')
    return df

getStockDataDaily(['AAPL', 'TSLA'], "2022-12-21")

Starting array to list
Getting stock data for stock $AAPL TSLA
[*********************100%***********************]  2 of 2 completed


Unnamed: 0_level_0,TSLA,TSLA,TSLA,TSLA,TSLA,TSLA,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL
Unnamed: 0_level_1,Open,High,Low,Close,Adj Close,Volume,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
2022-12-21,139.339996,141.259995,135.889999,137.570007,137.570007,145051100,132.979996,136.809998,132.75,135.449997,135.449997,85859600
2022-12-22,136.0,136.600006,123.910004,124.18,124.18,115160694,134.352005,134.550003,130.699997,130.729996,130.729996,35043191


In [7]:
def getStockClose(symbol):
    print("Getting stock close for stock $"+symbol)
    ticker = yf.Ticker(symbol)
    data = ticker.history()
    #print(data)
    last_price = data['Close'].iloc[-1]
    print(symbol, last_price)
    return last_price

getStockClose("AAPL")

Getting stock close for stock $AAPL
AAPL 130.7310028076172


130.7310028076172

In [8]:
def getStockOpen(symbol):
    print("Getting stock open for stock $"+symbol)
    ticker = yf.Ticker(symbol)
    data = ticker.history()
    #print(data)
    last_price = data['Open'].iloc[-1]
    print(symbol, last_price)
    return last_price

getStockOpen("AAPL")

Getting stock open for stock $AAPL
AAPL 134.3520050048828


134.3520050048828

In [9]:
def getStockHigh(symbol):
    print("Getting stock high for stock $"+symbol)
    ticker = yf.Ticker(symbol)
    data = ticker.history()
    #print(data)
    last_price = data['High'].iloc[-1]
    print(symbol, last_price)
    return last_price

getStockHigh("AAPL")

Getting stock high for stock $AAPL
AAPL 134.5500030517578


134.5500030517578

In [10]:
def getStockLow(symbol):
    print("Getting stock low for stock $"+symbol)
    ticker = yf.Ticker(symbol)
    data = ticker.history()
    #print(data)
    last_price = data['Low'].iloc[-1]
    print(symbol, last_price)
    return last_price

getStockLow("AAPL")

Getting stock low for stock $AAPL
AAPL 130.6999969482422


130.6999969482422

# Gathering FinViz Data #

In [11]:
# Parameters 
n = 3 #the # of article headlines displayed per ticker
tickers = ['AAPL', 'TSLA', 'AMZN']

In [12]:
# Get Data
finwiz_url = 'https://finviz.com/quote.ashx?t='
news_tables = {}

for ticker in tickers:
    url = finwiz_url + ticker
    print("current url is: " +url)
    header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36."}
    req = Request(url=url,headers=header) 
    resp = urlopen(req)    
    html = BeautifulSoup(resp, features="lxml")
    news_table = html.find(id='news-table')
    news_tables[ticker] = news_table

try:
    for ticker in tickers:
        df = news_tables[ticker]
        df_tr = df.findAll('tr')
    
        print ('\n')
        print ('Recent News Headlines for {}: '.format(ticker))
        
        for i, table_row in enumerate(df_tr):
            a_text = table_row.a.text
            td_text = table_row.td.text
            td_text = td_text.strip()
            print(a_text,'(',td_text,')')
            if i == n-1:
                break
except KeyError:
    pass

current url is: https://finviz.com/quote.ashx?t=AAPL
current url is: https://finviz.com/quote.ashx?t=TSLA
current url is: https://finviz.com/quote.ashx?t=AMZN


Recent News Headlines for AAPL: 
Is The Trade Desk Stock a Buy Now? ( Dec-22-22 10:15AM )
6 reasons why Apple stock will rebound in 2023: Analyst ( 10:12AM )
Alphabet (GOOGL) Boosts YouTube Streaming Efforts With NFL Deal ( 09:36AM )


Recent News Headlines for TSLA: 
U.S. Senate Finance Committee asks GM, Tesla, Toyota about Chinese supply chain ( Dec-22-22 12:01PM )
UPDATE 1-U.S. Senate Finance Committee asks GM, Tesla, Toyota about Chinese supply chain ( 11:52AM )
Why Tesla Investors Have New Concerns Today ( 11:51AM )


Recent News Headlines for AMZN: 
Why Alphabet, Amazon, and Microsoft Stocks All Fell Thursday ( Dec-22-22 11:59AM )
Will Shopify Stock Recover in 2023? ( 11:00AM )
Alphabet (GOOGL) Boosts YouTube Streaming Efforts With NFL Deal ( 09:36AM )


In [13]:
# Iterate through the news
parsed_news = []
for file_name, news_table in news_tables.items():
    for x in news_table.findAll('tr'):
        print(x.get_text())
        text = x.get_text() 
        date_scrape = x.td.text.split()

        if len(date_scrape) == 1:
            time = date_scrape[0]
            
        else:
            date = date_scrape[0]
            time = date_scrape[1]

        ticker = file_name.split('_')[0]
        
        parsed_news.append([ticker, date, time, text ])

Dec-22-22 10:15AMIs The Trade Desk Stock a Buy Now? Motley Fool
10:12AM6 reasons why Apple stock will rebound in 2023: Analyst Yahoo Finance Video
09:36AMAlphabet (GOOGL) Boosts YouTube Streaming Efforts With NFL Deal Zacks
09:25AMGoldman Is Banking on Apple Partnership in Consumer Push The Wall Street Journal
09:21AMThe hottest tech trends to watch in 2023 Yahoo Finance

09:15AM
Loading…

09:15AMCould Apple Still Be the World's Largest Company in 2023? Motley Fool
08:00AMApple Stock In 2023: Will Goggles Finally Become A Reality? Investor's Business Daily
06:21AMAs U.S. fails to crack down on Big Tech, Europe shows what progress looks like MarketWatch
06:16AM6 big reasons Apple stock is a must buy for 2023: analyst Yahoo Finance
06:02AM2 Magnificent Semiconductor Stocks Down 46% and 51% to Buy Before the Next Bull Market Motley Fool
06:00AMApple's Steady Diet of Resistance and My Contrarian Conundrum TheStreet.com
05:31AMBitcoin interest surged in 2022 as 'crypto winter' arrived: Morn

# Sentiment Analysis of FinViz data #

In [14]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Ishaan\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [15]:
# View Data 
news = pd.DataFrame()
news['Date'] = pd.to_datetime(news.Date).dt.date

unique_ticker = news['Ticker'].unique().tolist()
news_dict = {name: news.loc[news['Ticker'] == name] for name in unique_ticker}

values = []
for ticker in tickers: 
    dataframe = news_dict[ticker]
    dataframe = dataframe.set_index('Ticker')
    dataframe = dataframe.drop(columns = ['Headline'])
    print ('\n')
    print (dataframe.head())
    
    mean = round(dataframe['compound'].mean(), 2)
    values.append(mean)
    
df = pd.DataFrame(list(zip(tickers, values)), columns =['Ticker', 'Mean Sentiment']) 
df = df.set_index('Ticker')
df = df.sort_values('Mean Sentiment', ascending=False)
print ('\n')
print (df)

AttributeError: 'DataFrame' object has no attribute 'Date'

In [None]:
#Testing helper functions from stock_helper_functions.ipynb
aapl_open = getStockOpen("AAPL")

# Creating Dataset #

In [None]:
#Defining a new dataframe called 'dataset'
dataset = pd.DataFrame()

In [None]:
#Pulling stock data
for i in tickers:
    dataset = getStockDataDaily(i, day='2022-22-12')
    print(dataset)
    