In [1]:
# Kevin's original file.
import os
import requests
from datetime import datetime, timedelta
import time
import warnings
import pandas as pd
import numpy as np
from pycorenlp import StanfordCoreNLP

warnings.filterwarnings('ignore')

# cd stanford-corenlp-4.2.2
# java -mx6g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer -timeout 5000


In [3]:


#Connect to running Stanford NLP server
nlp = StanfordCoreNLP('http://localhost:9000')

def get_news(ticker, date):
    FINNHUB_API_KEY = "c3dhc1iad3icrjj6i7qg"
    r = requests.get(f'https://finnhub.io/api/v1/company-news?symbol={ticker}&from={date}&to={date}&token={FINNHUB_API_KEY}')
    data = r.json()
    h = []
    for d in data:
        d['date'] = datetime.utcfromtimestamp(d['datetime']).strftime('%Y-%m-%d')
        h.append([d['id'], d['category'], d['date'], d['headline'], d['related'], d['source'], d['summary'], d['url']])

    df = pd.DataFrame(h, columns=['id', 'category', 'date', 'headline', 'related', 'source', 'summary', 'url'])
    df['date'] = pd.to_datetime(df['date'])
    return df

def increment_one_day(str_date):
    _date = datetime.strptime(str_date, '%Y-%m-%d') + timedelta(days=1)
    _date = _date.strftime('%Y-%m-%d')
    return _date

def get_sentiment(text):
    result = nlp.annotate(text, properties={
                       'annotators': 'sentiment',
                       'outputFormat': 'json',
                       'timeout': 5000,
                   })
    return np.mean([int(i['sentimentValue']) for i in result['sentences']])


tickers = ['TSLA', 'GE', 'NVDA', 'AMD']

all_news = pd.DataFrame([])

for ticker in tickers:
    #start date
    _date = '2021-06-25'
    
    #create empty dataframe
    df = pd.DataFrame([])
    
    #loop over dates and get news articles and append to dataframe : limit 60 api calls per minute
    while _date != datetime.today().strftime('%Y-%m-%d'):
        df = df.append(get_news(ticker, _date))
        df = df.drop_duplicates()
        time.sleep(1.1)
        _date = increment_one_day(_date)
    
    #There are some repeat headlines on the same day, so getting a daily headline count per article
    #Maybe duplicates of the same headline indicates more important news??
    duplicate_headlines = df[['date', 'headline', 'id']]
    dh = (duplicate_headlines.groupby(['date', 'headline'], as_index=False)
          .count()
          .rename(columns={'id': 'headline_count'}))
      
    # Get unique headlines by date
    no_dups = df.drop_duplicates(subset=['date', 'headline'])
    
    #Merge in headline counts
    no_dups = no_dups.merge(dh, how='left', on=['date', 'headline'])
    
    #Insert ticker
    no_dups.insert(0, 'ticker', ticker)
    
    #Append to dataframe that has all tickers
    all_news = all_news.append(no_dups)
    
"""
Loop over each headline, get the sentiment, then create sentiment column in dataframe

Stanford NLP Sentiment Scale:
0: Very Negative
1: Negative
2: Neutral
3: Positive
4: Very Positive
"""

sent = []
error_count = 0
for i in all_news['headline'].tolist():
    text = str(i)
    try:
        sent.append(get_sentiment(text))
    except:
        print(i)
        sent.append(-1)
        error_count += 1
all_news['sentiment'] = sent

Dow Jones Rallies As Nike Surges 15% On Earnings; Tesla Stock Reverses Lower
Nikola Stock Is 80% Off Its High Price: Time to Buy?
Wall Street in Fives - Must Read Lists at Midday TSLA;PCRFY;ROK;SPCE;NFLX;BB;AMZN;GOOGL;GOOG;IKNX;ADM;TWLO;ASAN;HOOD;CS;QCOM;TNXP;TIGR;NOK;OMP;RAD;NKE;FDX;KMX;JKS;PAYX;T;VZ;TMUS;CMCSA;DISH;NTAP;LAZR;BTC;$BTC
Wall Street in Fives - Must Read Lists for Friday PCRFY;TSLA;ROK;SPCE;AMZN;GOOG;GOOGL;IKNX;WULF;NFLX;BB;VZ;CMCSA;CMCSK;T;NTAP;LAZR;QCOM;CS;HOOD;TWLO;ASAN;LUV;TNXP;WFC;BAC;NOK;OMP;RAD;NKE;FDX;KMX;JKS;PAYX
Space company Astra begins trading on the Nasdaq, stock jumps 20%
My $374k 38 Stock Retirement Portfolio Outperforms The S&P 500 By 4.2% In June
Tesla rival Nio shares jump 3% premarket after it reports record deliveries for June month and quarter
Nio Posts Record Monthly-Deliveries In June As High Demand Overshadows Chip Shortage, Sees 116% Surge
Ford’s June Sales Fell 27% in U.S. Due to Chip Shortage
Ford’s June Sales Fell 27% in U.S. Due to Chip Short

In [4]:
all_news.reset_index(drop=True,inplace=True)

In [5]:
all_news

Unnamed: 0,ticker,id,category,date,headline,related,source,summary,url,headline_count,sentiment
0,TSLA,68493345,company,2021-06-25,UPDATE 1-Biden's EV charging push could boost ...,TSLA,Yahoo,President Joe Biden's plan to spend billions o...,https://finnhub.io/api/news?id=0bb0d30b5b802b3...,1,3.0
1,TSLA,68492553,company,2021-06-25,Top Stocks for July 2021,TSLA,Yahoo,The Russell 1000 Index is a market-capitalizat...,https://finnhub.io/api/news?id=45b2934694bcc2c...,1,2.0
2,TSLA,68490865,company,2021-06-25,Top Robinhood Stocks Popular on Reddit,TSLA,Yahoo,In this article we discuss the top Robinhood s...,https://finnhub.io/api/news?id=c85260490dcd982...,1,2.0
3,TSLA,68493348,company,2021-06-25,Biden's EV charging push could boost automaker...,TSLA,Yahoo,(Reuters) -President Joe Biden's plan to spend...,https://finnhub.io/api/news?id=11b7bbb03ff7ded...,1,2.0
4,TSLA,68493349,company,2021-06-25,"Why Churchill Capital IV, Fisker, and Nikola S...",TSLA,Yahoo,Shares of several companies in the electric-ve...,https://finnhub.io/api/news?id=32506301b3f4cf1...,1,2.0
...,...,...,...,...,...,...,...,...,...,...,...
787,AMD,68654704,company,2021-07-09,Dow Jones Futures Fall: Market Rally Closes Of...,AMD,Yahoo,The major indexes finished well off lows Thurs...,https://finnhub.io/api/news?id=f4f79331a2b0036...,1,2.0
788,AMD,68677789,company,2021-07-10,"Is NVIDIA Heading to $1,000?",AMD,Yahoo,Most gamers are still using NVIDIA's older pro...,https://finnhub.io/api/news?id=8fe78169a88fa0f...,1,2.0
789,AMD,68677790,company,2021-07-10,Dow Jones Futures: Market Rally Has This Risk ...,AMD,Yahoo,"The market rally is at highs, with Roku and AM...",https://finnhub.io/api/news?id=da6eaa6570c2319...,1,2.0
790,AMD,68677381,company,2021-07-10,"If You Invested $5,000 in AMD in 2015, This Is...",AMD,Yahoo,Advanced Micro Devices has delivered massive r...,https://finnhub.io/api/news?id=6915cd3cf1a7c69...,1,2.0


In [43]:
sent = []
error_count = 0
for i in all_news['headline'].tolist():
    text = str(i)
    try:
        sent.append(get_sentiment(text))
    except:
        print(i)
        sent.append(-1)
        error_count += 1
all_news['sentiment'] = sent

Is Li Auto Stock A Buy Now? Shares Jump 45% In June As EV Stocks Roar Back
General Electric (GE) Stock Moves -0.85%: What You Should Know


In [38]:
all_news['ticker'].value_counts()


TSLA    70
NVDA    16
AMD     13
GE       5
Name: ticker, dtype: int64

In [36]:
all_news['sentiment'].unique()

array([ 2. ,  1. , -1. ,  3. ,  1.5,  2.5])

In [39]:
error_count

2

In [59]:
all_news['headline'].iloc[0]

'Dow Jones Futures: Market Rally Closes Off Lows; Roku, AMD, Tesla Among 10 Stocks With Bullish Reversals'

In [56]:
get_sentiment('Is Li Auto Stock A Buy Now?')

2.0