In [56]:
import eikon as ek # the Eikon Python wrapper package
import cufflinks as cf
import pandas as pd
import pickle
import datetime
import nltk, bs4  # NLP toolkit & BeautyfulSoup
from bs4 import BeautifulSoup  # HTML parsing
from nltk import word_tokenize  # tokenizing
import configparser as cp
import pandas as pd
import numpy as np
from IPython.display import HTML
from time import sleep
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from pytz import timezone # set timezone
pd.option_context('display.max_rows', 1000);

In [2]:
import sys
print(sys.version)

3.6.2 |Continuum Analytics, Inc.| (default, Jul 20 2017, 12:30:02) [MSC v.1900 64 bit (AMD64)]


In [32]:
ek.set_app_key(#ENTER YOU API APP KEY HERE)
ek.get_port_number()

'9000'

In [95]:
def get_filename_datetime():
    # Use current date to get a pickle file name.
    return "Query-" + str(datetime.date.today()) + ".pkl"

def get_news(query, hours, minutes, count = 100):
    time = datetime.datetime.now() - datetime.timedelta(hours = hours, minutes= minutes)
    news = ek.get_news_headlines(query, 
                                  count= count,
                                  date_from= time.strftime('%Y-%m-%dT%H:%M:%S'),
                                  date_to= datetime.datetime.now(timezone('US/Eastern')))
    stories = []
    for i, storyId in enumerate(news['storyId']):
        try:
            html = ek.get_news_story(storyId)
            story = BeautifulSoup(html, 'html.parser').get_text()
            stories.append(story)
        except:
            stories.append('')
    news['story'] = stories
    pickle.dump(news, open(get_filename_datetime(), 'wb'))
    return news, time

def get_sentiment(query, hours, minutes, symbol):
    '''
    State how man hours and minutes to go back in time from current datetime.
    '''
    news, time = get_news(query, hours, minutes)
    
    sid = SentimentIntensityAnalyzer() #instantiated sentiment object.
    sentiment = pd.DataFrame()
    try:
        for storyId in news['storyId']:
            row = news[news['storyId'] == storyId]
            scores = sid.polarity_scores(row['story'][0])
            sentiment = sentiment.append(pd.DataFrame(scores, index=[row['versionCreated'][0]]))
        sentiment.index = pd.DatetimeIndex(sentiment.index)
        sentiment.sort_index(inplace=True)
        sentiment = sentiment.tz_localize('EST')
        sentiment['Transformed Sentiment'] = sentiment['compound'].apply(lambda x: 1 if x > 0.5 else (-1 if x < -0.5 else 0))
    except KeyError:
        return("No news found between {} EST and {} EST for the passed `{}`!".format(time.strftime('%H:%M %p'),
                                                                                     datetime.datetime.now(timezone('US/Eastern')).strftime('%H:%M %p'), 
                                                                                     query))
    resampled_senitment = sentiment['Transformed Sentiment'].resample('T', label='right').last().ffill()
    symbols = [symbol]
    time_series = ek.get_timeseries(symbols, 
                                    start_date= time.strftime('%Y-%m-%dT%H:%M:%S'), 
                                    end_date= datetime.datetime.now(timezone('US/Eastern')), 
                                    interval='minute', 
                                    fields='CLOSE')
    time_series = time_series.tz_localize('EST')
    
    return time_series.join(resampled_senitment, how = 'outer')

In [96]:
%%time
df = get_sentiment('R:EUR= AND Language:LEN',5,0,'EUR=') #get me all articles sentiment scores that mention EUR/USD for the last 5hrs
df

Wall time: 1min 13s


Unnamed: 0,CLOSE,Transformed Sentiment
2020-02-04 07:17:00-05:00,1.1051,
2020-02-04 07:18:00-05:00,1.1051,
2020-02-04 07:19:00-05:00,1.1052,
2020-02-04 07:20:00-05:00,1.1052,
2020-02-04 07:21:00-05:00,1.1054,
2020-02-04 07:22:00-05:00,1.1053,
2020-02-04 07:23:00-05:00,1.1053,
2020-02-04 07:24:00-05:00,1.1054,
2020-02-04 07:25:00-05:00,1.1053,
2020-02-04 07:26:00-05:00,1.1053,


In [87]:
for i in range(-1,2,1):
    display(df[df['Transformed Sentiment'] == i])

Unnamed: 0,CLOSE,Transformed Sentiment
2020-02-04 10:30:00-05:00,1.1051,-1.0
2020-02-04 10:31:00-05:00,1.1052,-1.0
2020-02-04 10:32:00-05:00,1.1051,-1.0
2020-02-04 10:33:00-05:00,1.1050,-1.0
2020-02-04 10:34:00-05:00,1.1049,-1.0
2020-02-04 10:35:00-05:00,1.1050,-1.0
2020-02-04 10:57:00-05:00,1.1055,-1.0
2020-02-04 10:58:00-05:00,1.1054,-1.0
2020-02-04 10:59:00-05:00,1.1054,-1.0
2020-02-04 11:00:00-05:00,1.1055,-1.0


Unnamed: 0,CLOSE,Transformed Sentiment
2020-02-04 10:08:00-05:00,1.1054,0.0
2020-02-04 10:09:00-05:00,1.1052,0.0
2020-02-04 10:10:00-05:00,1.1055,0.0
2020-02-04 10:11:00-05:00,1.1054,0.0
2020-02-04 10:12:00-05:00,1.1056,0.0
2020-02-04 10:13:00-05:00,1.1054,0.0
2020-02-04 10:14:00-05:00,1.1054,0.0
2020-02-04 10:15:00-05:00,1.1055,0.0
2020-02-04 10:16:00-05:00,1.1053,0.0
2020-02-04 10:17:00-05:00,1.1051,0.0


Unnamed: 0,CLOSE,Transformed Sentiment
2020-02-04 10:20:00-05:00,1.1050,1.0
2020-02-04 10:21:00-05:00,1.1050,1.0
2020-02-04 10:22:00-05:00,1.1050,1.0
2020-02-04 10:23:00-05:00,1.1049,1.0
2020-02-04 10:24:00-05:00,1.1051,1.0
2020-02-04 10:25:00-05:00,1.1050,1.0
2020-02-04 10:26:00-05:00,1.1051,1.0
2020-02-04 10:27:00-05:00,1.1050,1.0
2020-02-04 10:28:00-05:00,1.1051,1.0
2020-02-04 10:29:00-05:00,1.1049,1.0


In [90]:
cf.set_config_file(offline=True)

In [97]:
df.iplot(secondary_y='Transformed Sentiment', width=1.5, title='Minute Data vs Resampled Minute Sentiment Scores')

In [89]:
from statsmodels.tsa.stattools import grangercausalitytests
#[y,x]
grangercausalitytests(df.dropna()[['CLOSE','Transformed Sentiment']].values,maxlag=7);


Granger Causality
number of lags (no zero) 1
ssr based F test:         F=1.6873  , p=0.1946  , df_denom=438, df_num=1
ssr based chi2 test:   chi2=1.6989  , p=0.1924  , df=1
likelihood ratio test: chi2=1.6956  , p=0.1929  , df=1
parameter F test:         F=1.6873  , p=0.1946  , df_denom=438, df_num=1

Granger Causality
number of lags (no zero) 2
ssr based F test:         F=1.3342  , p=0.2644  , df_denom=435, df_num=2
ssr based chi2 test:   chi2=2.6991  , p=0.2594  , df=2
likelihood ratio test: chi2=2.6908  , p=0.2604  , df=2
parameter F test:         F=1.3342  , p=0.2644  , df_denom=435, df_num=2

Granger Causality
number of lags (no zero) 3
ssr based F test:         F=1.8369  , p=0.1397  , df_denom=432, df_num=3
ssr based chi2 test:   chi2=5.6001  , p=0.1328  , df=3
likelihood ratio test: chi2=5.5647  , p=0.1348  , df=3
parameter F test:         F=1.8369  , p=0.1397  , df_denom=432, df_num=3

Granger Causality
number of lags (no zero) 4
ssr based F test:         F=1.4522  , p=0.2159  