In [6]:
# Import libraries
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
import os
import pandas as pd
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import plotly.graph_objects as go\

finwiz_url = 'https://finviz.com/quote.ashx?t='

In [7]:
news_tables = {}
# tickers = ['AMZN', 'TSLA', 'GOOG', 'NFLX', 'NIO']
tickers = ['NIO']

for ticker in tickers:
    url = finwiz_url + ticker
    req = Request(url=url,headers={'user-agent': 'my-app/0.0.1'}) 
    response = urlopen(req)    
    # Read the contents of the file into 'html'
    html = BeautifulSoup(response)
    # Find 'news-table' in the Soup and load it into 'news_table'
    news_table = html.find(id='news-table')
    # Add the table to our dictionary
    news_tables[ticker] = news_table

In [8]:
parsed_news = []

# Iterate through the news
for file_name, news_table in news_tables.items():
    # Iterate through all tr tags in 'news_table'
    for x in news_table.findAll('tr'):
        # read the text from each tr tag into text
        # get text from a only
        text = x.a.get_text() 
        # splite text in the td tag into a list 
        date_scrape = x.td.text.split()
        # if the length of 'date_scrape' is 1, load 'time' as the only element

        if len(date_scrape) == 1:
            time = date_scrape[0]
            
        # else load 'date' as the 1st element and 'time' as the second    
        else:
            date = date_scrape[0]
            time = date_scrape[1]
        # Extract the ticker from the file name, get the string up to the 1st '_'  
        ticker = file_name.split('_')[0]
        
        # Append ticker, date, time and headline as a list to the 'parsed_news' list
        parsed_news.append([ticker, date, time, text])
        
parsed_news

[['NIO',
  'Jul-16-20',
  '04:25PM',
  'Tesla Inc. stock underperforms Thursday when compared to competitors'],
 ['NIO',
  'Jul-16-20',
  '10:48AM',
  "Nikola's stock falls after Deutsche Bank analyst talks bullishly but doesn't recommend buying"],
 ['NIO', 'Jul-15-20', '02:34PM', 'Why NIO Stock Is Slipping Today'],
 ['NIO',
  'Jul-14-20',
  '10:25AM',
  "NIO Soars More Than 350% in 3 Months: What's Driving it?"],
 ['NIO',
  'Jul-13-20',
  '11:14AM',
  'Chinese EV Manufacturer Li Auto Files For Nasdaq Listing'],
 ['NIO',
  'Jul-13-20',
  '08:45AM',
  'EV Stocks are Shooting the Lights Out: Is it Just a Bubble?'],
 ['NIO',
  'Jul-11-20',
  '04:48AM',
  'Chinese EV maker Li Auto files for U.S. listing'],
 ['NIO',
  'Jul-10-20',
  '03:11PM',
  'Get in the Drivers Seat Now and Trade These 3 EV Stocks'],
 ['NIO',
  'Jul-10-20',
  '02:59PM',
  'Electric Vehicle Stocks Remain Hot. 2 That Are Making News.'],
 ['NIO',
  'Jul-10-20',
  '07:06AM',
  '3 Things NIO Stock Bulls Need to Happen Soon']

In [9]:
# Instantiate the sentiment intensity analyzer
vader = SentimentIntensityAnalyzer()

# Set column names
columns = ['ticker', 'date', 'time', 'headline']

# Convert the parsed_news list into a DataFrame called 'parsed_and_scored_news'
parsed_and_scored_news = pd.DataFrame(parsed_news, columns=columns)

# Iterate through the headlines and get the polarity scores using vader
scores = parsed_and_scored_news['headline'].apply(vader.polarity_scores).tolist()

# Convert the 'scores' list of dicts into a DataFrame
scores_df = pd.DataFrame(scores)

# Join the DataFrames of the news and the list of dicts
parsed_and_scored_news = parsed_and_scored_news.join(scores_df, rsuffix='_right')

# Convert the date column from string to datetime
parsed_and_scored_news['date'] = pd.to_datetime(parsed_and_scored_news.date).dt.date

parsed_and_scored_news.head()

Unnamed: 0,ticker,date,time,headline,neg,neu,pos,compound
0,NIO,2020-07-16,04:25PM,Tesla Inc. stock underperforms Thursday when c...,0.0,1.0,0.0,0.0
1,NIO,2020-07-16,10:48AM,Nikola's stock falls after Deutsche Bank analy...,0.182,0.818,0.0,-0.395
2,NIO,2020-07-15,02:34PM,Why NIO Stock Is Slipping Today,0.0,1.0,0.0,0.0
3,NIO,2020-07-14,10:25AM,NIO Soars More Than 350% in 3 Months: What's D...,0.0,1.0,0.0,0.0
4,NIO,2020-07-13,11:14AM,Chinese EV Manufacturer Li Auto Files For Nasd...,0.0,1.0,0.0,0.0


In [12]:
# Group by date and ticker columns from scored_news and calculate the mean
mean_scores = parsed_and_scored_news.groupby(['ticker','date']).mean()
print(mean_scores)
# Unstack the column ticker
mean_scores = mean_scores.unstack()
print(mean_scores)
# Get the cross-section of compound in the 'columns' axis
mean_scores = mean_scores.xs('compound', axis="columns").transpose()

                        neg       neu       pos  compound
ticker date                                              
NIO    2020-05-27  0.000000  1.000000  0.000000  0.000000
       2020-05-28  0.039125  0.905875  0.055000  0.069900
       2020-06-01  0.000000  0.926000  0.074000  0.190900
       2020-06-03  0.000000  1.000000  0.000000  0.000000
       2020-06-04  0.000000  0.959857  0.040143  0.062914
       2020-06-05  0.000000  0.741000  0.259000  0.336867
       2020-06-08  0.000000  1.000000  0.000000  0.000000
       2020-06-09  0.000000  0.934667  0.065333  0.098667
       2020-06-10  0.000000  0.824667  0.175333  0.314867
       2020-06-11  0.000000  0.791000  0.209000  0.327850
       2020-06-12  0.000000  0.783000  0.217000  0.432200
       2020-06-15  0.000000  0.804000  0.196000  0.296000
       2020-06-16  0.000000  1.000000  0.000000  0.000000
       2020-06-17  0.000000  1.000000  0.000000  0.000000
       2020-06-18  0.000000  1.000000  0.000000  0.000000
       2020-06

In [14]:
import yfinance as yf

prices = yf.Ticker('NIO').history(period='30d', interval='1d')
fig0 = go.Figure(
                go.Bar(
                        x=mean_scores.index,
                        y=mean_scores['NIO'],
#                         yaxis='y2',
                        name='News sentiment',
                        marker_color=mean_scores['NIO']
                        )
                )
fig0.add_trace(
                go.Scatter(
                            x=prices.index,
                            y=prices['Close'],
                            mode='lines',
                            yaxis='y2',
                            name='NIO',
                            line=go.scatter.Line(color='Green'),
                            hovertemplate='%{y:.2f}',

                            )
                )

fig0.update_layout(
                    hovermode='x',
                    hoverlabel_align='right',
                    yaxis2=dict(
                                title='NIO',
                                overlaying='y',
                                side='right',
                                showgrid=False,
                                ),
                    
                    )

fig0.show()