<a href="https://colab.research.google.com/github/ishaanpaul98/Sentiment-Analysis/blob/main/Sentiment_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports #

In [246]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from wordcloud import WordCloud
import re

from bs4 import BeautifulSoup
from urllib.request import urlopen, Request
from nltk.sentiment.vader import SentimentIntensityAnalyzer

import datetime
import yfinance as yf
import time
import requests
import io

In [247]:
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

In [248]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier

In [249]:
from sklearn.metrics import accuracy_score,precision_score,recall_score,confusion_matrix,roc_curve,classification_report
from sklearn.metrics import plot_confusion_matrix

# Stock Data Helper Functions #

In [250]:
def getStockDataDaily(symbol, day):
    print("Getting stock data for stock $"+symbol)
    df = yf.download(symbol, start=day, period = "1d")
    return df

getStockDataDaily('AAPL', "2022-12-21")

Getting stock data for stock $AAPL
[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-12-21,132.979996,136.809998,132.75,135.449997,135.449997,85928000
2022-12-22,134.350006,134.559998,130.300003,132.229996,132.229996,77852100
2022-12-23,130.919998,132.419998,129.639999,131.860001,131.860001,63814900
2022-12-27,131.380005,131.410004,128.720001,130.029999,130.029999,69007800
2022-12-28,129.669998,131.029999,125.870003,126.040001,126.040001,85438400
2022-12-29,127.989998,130.479996,127.730003,129.610001,129.610001,75703700
2022-12-30,128.410004,129.949997,127.43,129.929993,129.929993,76960600
2023-01-03,130.279999,130.899994,124.169998,125.07,125.07,112117500
2023-01-04,126.889999,128.660004,125.080002,126.360001,126.360001,89113600
2023-01-05,127.129997,127.769997,124.760002,125.019997,125.019997,80962700


In [251]:
def arrayToString(arr):
    print("Starting array to list")
    listToStr = ' '.join([str(elem) for elem in arr])
    return listToStr


In [252]:
def getMultiStockDataDaily(symbols, day):
    if len(symbols) > 1:
        print("Length of symbols array is more than 1. STARTING ARRAYTOSTRING")
        symbols = arrayToString(symbols)
    print("Getting stock data for stock $"+symbols)
    df = yf.download(symbols, start=day, period = "1d", group_by='ticker')
    return df

getMultiStockDataDaily(['AAPL', 'TSLA'], "2022-12-21")

Length of symbols array is more than 1. STARTING ARRAYTOSTRING
Starting array to list
Getting stock data for stock $AAPL TSLA
[*********************100%***********************]  2 of 2 completed


Unnamed: 0_level_0,TSLA,TSLA,TSLA,TSLA,TSLA,TSLA,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL
Unnamed: 0_level_1,Open,High,Low,Close,Adj Close,Volume,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
2022-12-21,139.339996,141.259995,135.889999,137.570007,137.570007,145417400.0,132.979996,136.809998,132.75,135.449997,135.449997,85928000
2022-12-22,136.0,136.630005,122.260002,125.349998,125.349998,210090300.0,134.350006,134.559998,130.300003,132.229996,132.229996,77852100
2022-12-23,126.370003,128.619995,121.019997,123.150002,123.150002,166989700.0,130.919998,132.419998,129.639999,131.860001,131.860001,63814900
2022-12-27,117.5,119.669998,108.760002,109.099998,109.099998,208643400.0,131.380005,131.410004,128.720001,130.029999,130.029999,69007800
2022-12-28,110.349998,116.269997,108.239998,112.709999,112.709999,221070500.0,129.669998,131.029999,125.870003,126.040001,126.040001,85438400
2022-12-29,120.389999,123.57,117.5,121.82,121.82,221923300.0,127.989998,130.479996,127.730003,129.610001,129.610001,75703700
2022-12-30,119.949997,124.480003,119.75,123.18,123.18,157304500.0,128.410004,129.949997,127.43,129.929993,129.929993,76960600
2023-01-03,118.470001,118.800003,104.639999,108.099998,108.099998,231402800.0,130.279999,130.899994,124.169998,125.07,125.07,112117500
2023-01-04,109.110001,114.589996,107.519997,113.639999,113.639999,180389000.0,126.889999,128.660004,125.080002,126.360001,126.360001,89113600
2023-01-05,110.510002,111.75,107.160004,110.339996,110.339996,157986300.0,127.129997,127.769997,124.760002,125.019997,125.019997,80962700


In [266]:
def getMonthlyStockData(symbol, day, interval):
    print("Getting stock data for stock $"+symbol)
    df = yf.download(symbol, start=day, period = interval, group_by='ticker')
    return df

getMonthlyStockData('AAPL', "2022-11-21", '1mo')

Getting stock data for stock $AAPL
[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-11-21,150.160004,150.369995,147.720001,148.009995,148.009995,58724100
2022-11-22,148.130005,150.419998,146.929993,150.179993,150.179993,51804100
2022-11-23,149.449997,151.830002,149.339996,151.070007,151.070007,58301400
2022-11-25,148.309998,148.880005,147.119995,148.110001,148.110001,35195900
2022-11-28,145.139999,146.639999,143.380005,144.220001,144.220001,69246000
2022-11-29,144.289993,144.809998,140.350006,141.169998,141.169998,83763800
2022-11-30,141.399994,148.720001,140.550003,148.029999,148.029999,111380900
2022-12-01,148.210007,149.130005,146.610001,148.309998,148.309998,71250400
2022-12-02,145.960007,148.0,145.649994,147.809998,147.809998,65447400
2022-12-05,147.770004,150.919998,145.770004,146.630005,146.630005,68826400


# Gathering FinViz Data (Today's News) #

In [254]:
# Parameters 
n = 3 #the # of article headlines displayed per ticker
tickers = ['AAPL']

In [255]:
from finvizfinance.news import News
fnews = News()
all_news = fnews.get_news()
all_news

{'news':        Date                                              Title  \
 0   01:53PM  Stocks Take Leg Higher on Fed Waller’s Comment...   
 1   01:51PM  MarketWatch Live: U.S. stocks move higher but ...   
 2   01:22PM   Federal Reserve Probes Goldman Consumer Business   
 3   01:10PM  Yellen warns of 'global financial crisis' if U...   
 4   01:07PM  : Baker Hughes data show a decline in active U...   
 ..      ...                                                ...   
 85  04:51AM  IMF's Georgieva and ECB's Lagarde discuss the ...   
 86  04:25AM  Swiss Minister Signals Support Using Russian A...   
 87  04:22AM  Global equity funds post second weekly inflows...   
 88  04:00AM    Why the January Stock Market Bounce Is Illusory   
 89  03:55AM  European shares gain on China's reopening opti...   
 
                  Source                                               Link  
 0     www.bloomberg.com  https://www.bloomberg.com/news/articles/2023-0...  
 1   www.marketwatch.com  http

In [256]:
# Get Data
finwiz_url = 'https://finviz.com/quote.ashx?t='
news_tables = {}

for ticker in tickers:
    url = finwiz_url + ticker
    print("current url is: " +url)
    header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36."}
    req = Request(url=url,headers=header) 
    resp = urlopen(req)    
    html = BeautifulSoup(resp, features="lxml")
    news_table = html.find(id='news-table')
    news_tables[ticker] = news_table

try:
    for ticker in tickers:
        df = news_tables[ticker]
        df_tr = df.findAll('tr')
    
        print ('\n')
        print ('Recent News Headlines for {}: '.format(ticker))
        
        for i, table_row in enumerate(df_tr):
            a_text = table_row.a.text
            td_text = table_row.td.text
            td_text = td_text.strip()
            print(a_text,'(',td_text,')')
            if i == n-1:
                break
except KeyError:
    pass

current url is: https://finviz.com/quote.ashx?t=AAPL


Recent News Headlines for AAPL: 
Analyst Report: Apple Inc. ( Jan-20-23 12:41PM )
Heres Why You Should Consider Investing in Apple (AAPL) ( 12:32PM )
Apple wins appeal to keep $308 million U.S. patent verdict at bay ( 12:08PM )


In [257]:
# Iterate through the news
parsed_news = []
for file_name, news_table in news_tables.items():
    for x in news_table.findAll('tr'):
        print(x.get_text())
        text = x.get_text() 
        date_scrape = x.td.text.split()

        if len(date_scrape) == 1:
            time = date_scrape[0]
            
        else:
            date = date_scrape[0]
            time = date_scrape[1]

        ticker = file_name.split('_')[0]
        
        parsed_news.append([ticker, date, time, text ])

Jan-20-23 12:41PMAnalyst Report: Apple Inc. Morningstar Research
12:32PMHeres Why You Should Consider Investing in Apple (AAPL) Insider Monkey
12:08PMApple wins appeal to keep $308 million U.S. patent verdict at bay Reuters
10:51AMThis Startup Uses AI To Leave Apple's AirTag In The Dust Benzinga
10:00AM1 Huge Reason Stocks Could Rally in 2023 Motley Fool

08:03AM
Loading…

08:03AM2 Top Warren Buffett Stocks to Buy in 2023 and Hold Forever Motley Fool
07:41AMApple appeals investigation by UK competition watchdog Reuters
07:38AMApple appeals investigation by UK competition watchdog Reuters
06:27AMApple Stock: Bull vs. Bear Motley Fool
06:03AMThe Global Smartphone Market Just Fell Off a Cliff. Apple Beat the Odds. Motley Fool
05:55AM3 Stocks Warren Buffett Is Almost Certainly Buying in 2023 Motley Fool
05:06AM89% of Warren Buffett's Secret Portfolio Is Invested in Just 5 Stocks Motley Fool
Jan-19-23 09:45AMApple Stock Is Down 25% From Its High. Time to Buy? Motley Fool
08:45AMNew to Inves

# Gathering Data From AlphaAdvantage for Historical News #

In [258]:
from decouple import config
import requests
import urllib.parse
import json
AAapikey = config('AAKey')

In [259]:
#Function to convert user provided date to date required by AlphaAdvantage
def toAADate(oldDate):
    newDate = str(oldDate) + 'T0001'
    return newDate

In [260]:
# Get data from AlphaAdvantage for one ticker for a particular day
def getHistoricNewsData(ticker, startDate = None, endDate = None):
    url = 'https://www.alphavantage.co/query?'
    if startDate and endDate:
        startDate = toAADate(startDate)
        endDate = toAADate(endDate)
        Myparams = {'function': 'NEWS_SENTIMENT', 'tickers': ticker, 'time_from': startDate, 'time_to': endDate, 'sort': 'LATEST','limit': 100, 'apikey': AAapikey}
    else:
        Myparams = {'function': 'NEWS_SENTIMENT', 'tickers': ticker, 'sort': 'LATEST','limit': 100, 'apikey': AAapikey}
    r = requests.get(url, params = Myparams)
    data = r.json()
    #return data
    historic_news = pd.DataFrame(columns=['Date', 'Headline', 'Ticker'])
    for i in data.get("feed"):
        test_date = i.get("time_published")
        test_date = test_date[:8]
        row = [test_date, i.get("title"), ticker]
        new_df = pd.DataFrame([row],columns=['Date', 'Headline', 'Ticker'])
        historic_news = pd.concat([historic_news, new_df], axis=0, ignore_index=True)
    return historic_news

historic_news = getHistoricNewsData('AAPL', '2023012', '20230117')
print(historic_news)

        Date                                           Headline Ticker
0   20230120  Microsoft and Alphabet Deepen the White-Collar...   AAPL
1   20230120  Amazon's Alexa Smart Speaker Blazed A Trail, T...   AAPL
2   20230120  Five Things We Don't Still Don't Know About Ne...   AAPL
3   20230120  Netflix  ( NFLX )  Q4 Earnings Miss, Revenues ...   AAPL
4   20230120  Major tech layoffs from Alphabet and Microsoft...   AAPL
5   20230120  This Startup Uses AI To Leave Apple's AirTag I...   AAPL
6   20230120  Could Alphabet Cuts Spell a Potentially Tough ...   AAPL
7   20230120           1 Huge Reason Stocks Could Rally in 2023   AAPL
8   20230120  This Solar Company's 5-Year Return Outshines T...   AAPL
9   20230120  EXCLUSIVE: Poll Reveals Top Picks For A $25,00...   AAPL
10  20230120  LG ELECTRONICS USA DEBUTS ITS NEW CINEBEAM SMA...   AAPL
11  20230120                 Futures Fall Ahead Of Housing Data   AAPL
12  20230120  DraftKings, FanDuel And These Streaming Apps C...   AAPL
13  20

# Sentiment Analysis of News data #

In [261]:
def SentimentAnalysisNewsData(parsedNews):
    nltk.download('vader_lexicon')
    # Sentiment Analysis
    analyzer = SentimentIntensityAnalyzer()

    columns = ['Ticker', 'Date', 'Time', 'Headline']
    news = pd.DataFrame(parsedNews, columns=columns)
    scores = news['Headline'].apply(analyzer.polarity_scores).tolist()

    df_scores = pd.DataFrame(scores)
    news = news.join(df_scores, rsuffix='_right')
    news['Date'] = pd.to_datetime(news.Date).dt.date

    unique_ticker = news['Ticker'].unique().tolist()
    news_dict = {name: news.loc[news['Ticker'] == name] for name in unique_ticker}

    values = []
    for ticker in tickers: 
        dataframe = news_dict[ticker]
        dataframe = dataframe.set_index('Ticker')
        dataframe = dataframe.drop(columns = ['Headline'])
        print ('\n')
        print (dataframe.head())
        
        mean = round(dataframe['compound'].mean(), 2)
        values.append(mean)
        
    df = pd.DataFrame(list(zip(tickers, values)), columns =['Ticker', 'Mean Sentiment']) 
    df = df.set_index('Ticker')
    df = df.sort_values('Mean Sentiment', ascending=False)
    print ('\n')
    print (df)
    return df

In [262]:
HistoricSentiment = SentimentAnalysisNewsData(historic_news)
TodaysSentiment = SentimentAnalysisNewsData(parsed_news)



              Date  Time    neg    neu    pos  compound
Ticker                                                 
AAPL    2023-01-20   NaN  0.318  0.682  0.000   -0.4215
AAPL    2023-01-20   NaN  0.000  0.748  0.252    0.4019
AAPL    2023-01-20   NaN  0.000  1.000  0.000    0.0000
AAPL    2023-01-20   NaN  0.114  0.643  0.243    0.4215
AAPL    2023-01-20   NaN  0.000  0.870  0.130    0.3400


        Mean Sentiment
Ticker                
AAPL              0.09


              Date     Time    neg    neu    pos  compound
Ticker                                                    
AAPL    2023-01-20  12:41PM  0.000  1.000  0.000    0.0000
AAPL    2023-01-20  12:32PM  0.000  1.000  0.000    0.0000
AAPL    2023-01-20  12:08PM  0.000  0.675  0.325    0.6486
AAPL    2023-01-20  10:51AM  0.098  0.902  0.000   -0.0516
AAPL    2023-01-20  10:00AM  0.220  0.606  0.174   -0.1531


        Mean Sentiment
Ticker                
AAPL             -0.04


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Ishaan\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Ishaan\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


# Creating Dataset #

In [263]:
for i in tickers:
    

IndentationError: expected an indented block after 'for' statement on line 1 (1525691964.py, line 2)