<a href="https://colab.research.google.com/github/ishaanpaul98/Sentiment-Analysis/blob/main/Sentiment_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [25]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
import re

from bs4 import BeautifulSoup
from urllib.request import urlopen, Request
from nltk.sentiment.vader import SentimentIntensityAnalyzer

import datetime
import yfinance as yf
import time
import requests
import io

In [26]:
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

In [27]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier

In [28]:
from sklearn.metrics import accuracy_score,precision_score,recall_score,confusion_matrix,roc_curve,classification_report
from sklearn.metrics import plot_confusion_matrix

In [45]:
#Importing files
import ipynb
from ipynb.fs.full.stock_helper_functions import *

In [29]:
# Parameters 
n = 3 #the # of article headlines displayed per ticker
tickers = ['AAPL', 'TSLA', 'AMZN']

In [30]:
# Get Data
finwiz_url = 'https://finviz.com/quote.ashx?t='
news_tables = {}

for ticker in tickers:
    url = finwiz_url + ticker
    print("current url is: " +url)
    header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36."}
    req = Request(url=url,headers=header) 
    resp = urlopen(req)    
    html = BeautifulSoup(resp, features="lxml")
    news_table = html.find(id='news-table')
    news_tables[ticker] = news_table

try:
    for ticker in tickers:
        df = news_tables[ticker]
        df_tr = df.findAll('tr')
    
        print ('\n')
        print ('Recent News Headlines for {}: '.format(ticker))
        
        for i, table_row in enumerate(df_tr):
            a_text = table_row.a.text
            td_text = table_row.td.text
            td_text = td_text.strip()
            print(a_text,'(',td_text,')')
            if i == n-1:
                break
except KeyError:
    pass

current url is: https://finviz.com/quote.ashx?t=AAPL
current url is: https://finviz.com/quote.ashx?t=TSLA
current url is: https://finviz.com/quote.ashx?t=AMZN


Recent News Headlines for AAPL: 
Never Mind the FAANG Stocks, Buy the GHOST Stocks Before 2023 ( Dec-18-22 10:21AM )
Its Time to Boot the Moochers Off Your Spotify, Netflix and Other Accounts ( 10:00AM )
EXCLUSIVE: New American Accessories Brand Aims to Redefine Luxury NFT Experience ( 08:30AM )


Recent News Headlines for TSLA: 
Dow Jones Futures: After Stock Market Rally's Ugly Outside Week, Here's What To Do ( Dec-18-22 06:15PM )
Twitter Outlaws Posts Promoting Rival Social Media ( 05:01PM )
Dow Jones Futures: Stock Market Rally Suffers Ugly Outside Week; Here's What To Do ( 02:09PM )


Recent News Headlines for AMZN: 
Heres everything coming to Amazons Prime Video in January 2023 ( Dec-18-22 06:11PM )
Is Amazon.com (AMZN) a Worthy Investment Choice? ( 05:35PM )
4 Reasons You Should Cancel Amazon Prime ( 12:00PM )


In [31]:
# Iterate through the news
parsed_news = []
for file_name, news_table in news_tables.items():
    for x in news_table.findAll('tr'):
        print(x.get_text())
        text = x.get_text() 
        date_scrape = x.td.text.split()

        if len(date_scrape) == 1:
            time = date_scrape[0]
            
        else:
            date = date_scrape[0]
            time = date_scrape[1]

        ticker = file_name.split('_')[0]
        
        parsed_news.append([ticker, date, time, text ])

Dec-18-22 10:21AMNever Mind the FAANG Stocks, Buy the GHOST Stocks Before 2023 Motley Fool
10:00AMIts Time to Boot the Moochers Off Your Spotify, Netflix and Other Accounts The Wall Street Journal
08:30AMEXCLUSIVE: New American Accessories Brand Aims to Redefine Luxury NFT Experience WWD
07:50AMA Bull Market Is Coming: 2 Trillion-Dollar Growth Stocks to Buy Before They Soar Motley Fool
07:30AMSemiconductor Stocks Are On the Rise: 3 Companies to Watch Right Now Motley Fool

07:20AM
Loading…

07:20AM3 Metaverse Stocks to Buy Right Now Motley Fool
Dec-17-22 09:51AM15 Most Boycotted Companies and Brands in History Insider Monkey
09:30AMTarget, Amazon and 4 More Retailers That Will Reward You for Turning in Your Old Stuff GOBankingRates
08:15AMSteve Jobs 'wouldnt have succeeded' without Tim Cook, former Apple exec explains Yahoo Finance
06:40AM3 Stocks to Invest in Virtual Reality Motley Fool
06:32AMDoes ExxonMobil's Massive $50 Billion Stock Buyback Make It a Buy? Motley Fool
06:00AMInside

In [32]:
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Ishaan\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [33]:
# View Data 
news['Date'] = pd.to_datetime(news.Date).dt.date

unique_ticker = news['Ticker'].unique().tolist()
news_dict = {name: news.loc[news['Ticker'] == name] for name in unique_ticker}

values = []
for ticker in tickers: 
    dataframe = news_dict[ticker]
    dataframe = dataframe.set_index('Ticker')
    dataframe = dataframe.drop(columns = ['Headline'])
    print ('\n')
    print (dataframe.head())
    
    mean = round(dataframe['compound'].mean(), 2)
    values.append(mean)
    
df = pd.DataFrame(list(zip(tickers, values)), columns =['Ticker', 'Mean Sentiment']) 
df = df.set_index('Ticker')
df = df.sort_values('Mean Sentiment', ascending=False)
print ('\n')
print (df)



              Date     Time    neg    neu    pos  compound
Ticker                                                    
AAPL    2022-12-18  10:21AM  0.331  0.669  0.000   -0.7125
AAPL    2022-12-18  10:00AM  0.153  0.847  0.000   -0.4404
AAPL    2022-12-18  08:30AM  0.000  1.000  0.000    0.0000
AAPL    2022-12-18  07:50AM  0.157  0.703  0.141   -0.0772
AAPL    2022-12-18  07:30AM  0.195  0.805  0.000   -0.4404


              Date     Time    neg    neu    pos  compound
Ticker                                                    
TSLA    2022-12-18  06:15PM  0.163  0.837  0.000   -0.5106
TSLA    2022-12-18  05:01PM  0.000  0.737  0.263    0.3612
TSLA    2022-12-18  02:09PM  0.299  0.701  0.000   -0.7506
TSLA    2022-12-18  02:05PM  0.369  0.631  0.000   -0.8779
TSLA    2022-12-18  12:21PM  0.206  0.794  0.000   -0.3818


              Date     Time    neg    neu    pos  compound
Ticker                                                    
AMZN    2022-12-18  06:11PM  0.091  0.909  0.000  

In [48]:
#Testing helper functions from stock_helper_functions.ipynb
aapl_open = getStockOpen("AAPL")

Getting stock open for stock $AAPL
AAPL 136.69000244140625
