<a href="https://colab.research.google.com/github/ishaanpaul98/Sentiment-Analysis/blob/main/Sentiment_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports #

In [225]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from wordcloud import WordCloud
import re

from bs4 import BeautifulSoup
from urllib.request import urlopen, Request
from nltk.sentiment.vader import SentimentIntensityAnalyzer

import datetime
import yfinance as yf
import time
import requests
import io

In [226]:
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

In [227]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier

In [228]:
from sklearn.metrics import accuracy_score,precision_score,recall_score,confusion_matrix,roc_curve,classification_report
from sklearn.metrics import plot_confusion_matrix

# Stock Data Helper Functions #

In [229]:
def getStockDataDaily(symbol, day):
    print("Getting stock data for stock $"+symbol)
    df = yf.download(symbol, start=day, period = "1d")
    return df

getStockDataDaily('AAPL', "2022-12-21")

Getting stock data for stock $AAPL
[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-12-21,132.979996,136.809998,132.75,135.449997,135.449997,85928000
2022-12-22,134.350006,134.559998,130.300003,132.229996,132.229996,77852100
2022-12-23,130.919998,132.419998,129.639999,131.860001,131.860001,63814900
2022-12-27,131.380005,131.410004,128.720001,130.029999,130.029999,69007800
2022-12-28,129.669998,131.029999,125.870003,126.040001,126.040001,85438400
2022-12-29,127.989998,130.479996,127.730003,129.610001,129.610001,75703700
2022-12-30,128.410004,129.949997,127.43,129.929993,129.929993,76960600
2023-01-03,130.279999,130.899994,124.169998,125.07,125.07,112117500
2023-01-04,126.889999,128.660004,125.080002,126.360001,126.360001,89113600
2023-01-05,127.129997,127.769997,124.760002,125.019997,125.019997,80962700


In [230]:
def arrayToString(arr):
    print("Starting array to list")
    listToStr = ' '.join([str(elem) for elem in arr])
    return listToStr


In [231]:
def getMultiStockDataDaily(symbols, day):
    if len(symbols) > 1:
        print("Length of symbols array is more than 1. STARTING ARRAYTOSTRING")
        symbols = arrayToString(symbols)
    print("Getting stock data for stock $"+symbols)
    df = yf.download(symbols, start=day, period = "1d", group_by='ticker')
    return df

getMultiStockDataDaily(['AAPL', 'TSLA'], "2022-12-21")

Length of symbols array is more than 1. STARTING ARRAYTOSTRING
Starting array to list
Getting stock data for stock $AAPL TSLA
[*********************100%***********************]  2 of 2 completed


Unnamed: 0_level_0,TSLA,TSLA,TSLA,TSLA,TSLA,TSLA,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL
Unnamed: 0_level_1,Open,High,Low,Close,Adj Close,Volume,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
2022-12-21,139.339996,141.259995,135.889999,137.570007,137.570007,145417400,132.979996,136.809998,132.75,135.449997,135.449997,85928000
2022-12-22,136.0,136.630005,122.260002,125.349998,125.349998,210090300,134.350006,134.559998,130.300003,132.229996,132.229996,77852100
2022-12-23,126.370003,128.619995,121.019997,123.150002,123.150002,166989700,130.919998,132.419998,129.639999,131.860001,131.860001,63814900
2022-12-27,117.5,119.669998,108.760002,109.099998,109.099998,208643400,131.380005,131.410004,128.720001,130.029999,130.029999,69007800
2022-12-28,110.349998,116.269997,108.239998,112.709999,112.709999,221070500,129.669998,131.029999,125.870003,126.040001,126.040001,85438400
2022-12-29,120.389999,123.57,117.5,121.82,121.82,221923300,127.989998,130.479996,127.730003,129.610001,129.610001,75703700
2022-12-30,119.949997,124.480003,119.75,123.18,123.18,157304500,128.410004,129.949997,127.43,129.929993,129.929993,76960600
2023-01-03,118.470001,118.800003,104.639999,108.099998,108.099998,231402800,130.279999,130.899994,124.169998,125.07,125.07,112117500
2023-01-04,109.110001,114.589996,107.519997,113.639999,113.639999,180389000,126.889999,128.660004,125.080002,126.360001,126.360001,89113600
2023-01-05,110.510002,111.75,107.160004,110.339996,110.339996,157986300,127.129997,127.769997,124.760002,125.019997,125.019997,80962700


In [232]:
def getStockClose(symbol):
    print("Getting stock close for stock $"+symbol)
    ticker = yf.Ticker(symbol)
    data = ticker.history()
    #print(data)
    last_price = data['Close'].iloc[-1]
    print(symbol, last_price)
    return last_price

getStockClose("AAPL")

Getting stock close for stock $AAPL
AAPL 135.2100067138672


135.2100067138672

In [233]:
def getStockOpen(symbol):
    print("Getting stock open for stock $"+symbol)
    ticker = yf.Ticker(symbol)
    data = ticker.history()
    #print(data)
    last_price = data['Open'].iloc[-1]
    print(symbol, last_price)
    return last_price

getStockOpen("AAPL")

Getting stock open for stock $AAPL
AAPL 136.81500244140625


136.81500244140625

In [234]:
def getStockHigh(symbol):
    print("Getting stock high for stock $"+symbol)
    ticker = yf.Ticker(symbol)
    data = ticker.history()
    #print(data)
    last_price = data['High'].iloc[-1]
    print(symbol, last_price)
    return last_price

getStockHigh("AAPL")

Getting stock high for stock $AAPL
AAPL 138.61000061035156


138.61000061035156

In [235]:
def getStockLow(symbol):
    print("Getting stock low for stock $"+symbol)
    ticker = yf.Ticker(symbol)
    data = ticker.history()
    #print(data)
    last_price = data['Low'].iloc[-1]
    print(symbol, last_price)
    return last_price

getStockLow("AAPL")

Getting stock low for stock $AAPL
AAPL 135.02999877929688


135.02999877929688

# Gathering FinViz Data (Today's News) #

In [236]:
# Parameters 
n = 3 #the # of article headlines displayed per ticker
tickers = ['AAPL']

In [237]:
from finvizfinance.news import News
fnews = News()
all_news = fnews.get_news()
all_news

{'news':        Date                                              Title  \
 0   07:14PM        Stocks Fall After Retail and Inflation Data   
 1   07:07PM  Asian Stocks to Drop on Deepening Growth Conce...   
 2   07:05PM  HMRC trials answer by text system to cut call ...   
 3   07:04PM  Why inflation is falling but prices are still ...   
 4   06:43PM  Filing reveals eye-popping amount former Disne...   
 ..      ...                                                ...   
 85  08:15AM  Irish House Price Growth Slows Adding to Signs...   
 86  08:02AM  Stocks making the biggest moves premarket: Uni...   
 87  08:00AM  Foreign Investors Pulled $91 Billion From Chin...   
 88  07:58AM          Trump slams evangelical Christian leaders   
 89  07:41AM  Taylor Swift Gave Universal Music a $230 Milli...   
 
                Source                                               Link  
 0         www.wsj.com  https://www.wsj.com/articles/global-stocks-mar...  
 1   www.bloomberg.com  https://ww

In [238]:
# Get Data
finwiz_url = 'https://finviz.com/quote.ashx?t='
news_tables = {}

for ticker in tickers:
    url = finwiz_url + ticker
    print("current url is: " +url)
    header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36."}
    req = Request(url=url,headers=header) 
    resp = urlopen(req)    
    html = BeautifulSoup(resp, features="lxml")
    news_table = html.find(id='news-table')
    news_tables[ticker] = news_table

try:
    for ticker in tickers:
        df = news_tables[ticker]
        df_tr = df.findAll('tr')
    
        print ('\n')
        print ('Recent News Headlines for {}: '.format(ticker))
        
        for i, table_row in enumerate(df_tr):
            a_text = table_row.a.text
            td_text = table_row.td.text
            td_text = td_text.strip()
            print(a_text,'(',td_text,')')
            if i == n-1:
                break
except KeyError:
    pass

current url is: https://finviz.com/quote.ashx?t=AAPL


Recent News Headlines for AAPL: 
Dow Jones Reverses Lower On Fed, Recession Fears; Tesla, Megacaps Hit Resistance ( Jan-18-23 07:28PM )
Stocks trending in after hours: Alcoa, Discover Financial, Vroom, Apple ( 05:04PM )
Apple delays development of AR glasses indefinitely: report ( 05:02PM )


In [239]:
# Iterate through the news
parsed_news = []
for file_name, news_table in news_tables.items():
    for x in news_table.findAll('tr'):
        print(x.get_text())
        text = x.get_text() 
        date_scrape = x.td.text.split()

        if len(date_scrape) == 1:
            time = date_scrape[0]
            
        else:
            date = date_scrape[0]
            time = date_scrape[1]

        ticker = file_name.split('_')[0]
        
        parsed_news.append([ticker, date, time, text ])

Jan-18-23 07:28PMDow Jones Reverses Lower On Fed, Recession Fears; Tesla, Megacaps Hit Resistance Investor's Business Daily
05:04PMStocks trending in after hours: Alcoa, Discover Financial, Vroom, Apple Yahoo Finance Video
05:02PMApple delays development of AR glasses indefinitely: report Fox Business
04:32PMApple to Expand Smart-Home Lineup, Taking On Amazon and Google Bloomberg
04:19PMBrazil antitrust agency to investigate MercadoLibre complaint against Apple Reuters

02:35PM
Loading…

02:35PMBig Tech braces for dismal profits, more job cuts Reuters
02:30PMApple wants to control everything from its chips to screens Yahoo Finance
02:20PMApple doing audit related to its human rights policy and labor practices this year Fox Business
02:11PMApple Could Boost Profits by Designing Its Own iPhone Parts, Analyst Says Barrons.com
02:10PMSpotify launches new attack on Apple for abusive behaviors MarketWatch
01:08PMApple Resurrects the Full-Size HomePod With a $299 Price Tag The Wall Street Jou

# Gathering Data From AlphaAdvantage for Historical News #

In [240]:
from decouple import config
import requests
import urllib.parse
import json
AAapikey = config('AAKey')

In [241]:
#Function to convert user provided date to date required by AlphaAdvantage
def toAADate(oldDate):
    newDate = str(oldDate) + 'T0001'
    return newDate

In [242]:
# Get data from AlphaAdvantage for one ticker for a particular day
def getHistoricNewsData(ticker, startDate = None, endDate = None):
    url = 'https://www.alphavantage.co/query?'
    if startDate and endDate:
        startDate = toAADate(startDate)
        endDate = toAADate(endDate)
        Myparams = {'function': 'NEWS_SENTIMENT', 'tickers': ticker, 'time_from': startDate, 'time_to': endDate, 'sort': 'LATEST','limit': 100, 'apikey': AAapikey}
    else:
        Myparams = {'function': 'NEWS_SENTIMENT', 'tickers': ticker, 'sort': 'LATEST','limit': 100, 'apikey': AAapikey}
    r = requests.get(url, params = Myparams)
    data = r.json()
    #return data
    historic_news = pd.DataFrame(columns=['Date', 'Headline', 'Ticker'])
    for i in data.get("feed"):
        test_date = i.get("time_published")
        test_date = test_date[:8]
        row = [test_date, i.get("title"), ticker]
        new_df = pd.DataFrame([row],columns=['Date', 'Headline', 'Ticker'])
        historic_news = pd.concat([historic_news, new_df], axis=0, ignore_index=True)
    return historic_news

historic_news = getHistoricNewsData('AAPL', '2023012', '20230117')
print(historic_news)

        Date                                           Headline Ticker
0   20230118   Apple HomePod Second-Gen: First Listen, Hands-On   AAPL
1   20230118            Why Qualcomm Stock Defied Gravity Today   AAPL
2   20230118  Dow Reverses Lower On Hawkish Fed, Weak Econom...   AAPL
3   20230118  Alphabet  ( GOOGL )  to Expand Portfolio With ...   AAPL
4   20230118  Apple Could Boost Profits by Designing Its Own...   AAPL
5   20230118  Dow Jones Falls Despite Cooling Inflation Sign...   AAPL
6   20230118  Microsoft Is Laying Off Thousands. Here's What...   AAPL
7   20230118  Sharply Lower PPI, Retail Sales Data Further S...   AAPL
8   20230118  Google Reportedly Working on Apple AirTag Alte...   AAPL
9   20230118                Here Come the FAANG Earnings Charts   AAPL
10  20230118  Apple HomePod  ( 2nd Gen ) : Pricing, Features...   AAPL
11  20230118  Roblox's  ( RBLX )  December Metrics Indicate ...   AAPL
12  20230118  GE Has Been One of the Best Stocks of 2023. Ca...   AAPL
13  20

# Sentiment Analysis of News data #

In [243]:
def SentimentAnalysisNewsData(parsedNews):
    nltk.download('vader_lexicon')
    # Sentiment Analysis
    analyzer = SentimentIntensityAnalyzer()

    columns = ['Ticker', 'Date', 'Time', 'Headline']
    news = pd.DataFrame(parsedNews, columns=columns)
    scores = news['Headline'].apply(analyzer.polarity_scores).tolist()

    df_scores = pd.DataFrame(scores)
    news = news.join(df_scores, rsuffix='_right')
    news['Date'] = pd.to_datetime(news.Date).dt.date

    unique_ticker = news['Ticker'].unique().tolist()
    news_dict = {name: news.loc[news['Ticker'] == name] for name in unique_ticker}

    values = []
    for ticker in tickers: 
        dataframe = news_dict[ticker]
        dataframe = dataframe.set_index('Ticker')
        dataframe = dataframe.drop(columns = ['Headline'])
        print ('\n')
        print (dataframe.head())
        
        mean = round(dataframe['compound'].mean(), 2)
        values.append(mean)
        
    df = pd.DataFrame(list(zip(tickers, values)), columns =['Ticker', 'Mean Sentiment']) 
    df = df.set_index('Ticker')
    df = df.sort_values('Mean Sentiment', ascending=False)
    print ('\n')
    print (df)
    return df

In [244]:
HistoricSentiment = SentimentAnalysisNewsData(historic_news)
TodaysSentiment = SentimentAnalysisNewsData(parsed_news)



              Date  Time    neg    neu    pos  compound
Ticker                                                 
AAPL    2023-01-18   NaN  0.000  1.000  0.000    0.0000
AAPL    2023-01-18   NaN  0.000  1.000  0.000    0.0000
AAPL    2023-01-18   NaN  0.421  0.579  0.000   -0.6249
AAPL    2023-01-18   NaN  0.000  0.753  0.247    0.3182
AAPL    2023-01-18   NaN  0.000  0.641  0.359    0.6808


        Mean Sentiment
Ticker                
AAPL              0.13


              Date     Time    neg    neu    pos  compound
Ticker                                                    
AAPL    2023-01-18  07:28PM  0.375  0.625  0.000   -0.7783
AAPL    2023-01-18  05:04PM  0.000  1.000  0.000    0.0000
AAPL    2023-01-18  05:02PM  0.000  1.000  0.000    0.0000
AAPL    2023-01-18  04:32PM  0.000  0.692  0.308    0.4588
AAPL    2023-01-18  04:19PM  0.196  0.804  0.000   -0.2960


        Mean Sentiment
Ticker                
AAPL             -0.05


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Ishaan\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Ishaan\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


# Creating Dataset #