In [3]:
import eikon as ek  # the Eikon Python wrapper package
import numpy as np  # NumPy
import pandas as pd  # pandas
import cufflinks as cf  # Cufflinks
import configparser as cp
from IPython.display import HTML

In [4]:
cfg = cp.ConfigParser()
cfg.read('eikon.cfg')  # adjust for different file location
ek.set_app_key(cfg['eikon']['app_id']) #set_app_id function being deprecated
cf.set_config_file(offline=True)  # set the plotting mode to offline

In [5]:
rics = [
    'GE',  # General Electric stock
    'AAPL.O',  # Apple stock,
    'MSFT.O', # msft
    'AMZN.O', # amazon
]

In [6]:
rics

['GE', 'AAPL.O', 'MSFT.O', 'AMZN.O']

In [7]:
tech2017 = ek.get_timeseries(rics, fields='CLOSE',
                         start_date='2017-01-01',
                         end_date='2017-12-31')
tech2017.head()

CLOSE,GE,AAPL.O,MSFT.O,AMZN.O
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-01-03,30.462044,116.15,62.58,753.67
2017-01-04,30.471657,116.02,62.3,757.18
2017-01-05,30.298632,116.61,62.3,780.45
2017-01-06,30.385144,117.91,62.84,795.99
2017-01-09,30.240956,118.99,62.64,796.92


In [8]:
tech2017.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 251 entries, 2017-01-03 to 2017-12-29
Data columns (total 4 columns):
GE        251 non-null float64
AAPL.O    251 non-null float64
MSFT.O    251 non-null float64
AMZN.O    251 non-null float64
dtypes: float64(4)
memory usage: 9.8 KB


In [9]:
tech2017.isna().sum()

CLOSE
GE        0
AAPL.O    0
MSFT.O    0
AMZN.O    0
dtype: int64

In [10]:
tech2017.corr()

CLOSE,GE,AAPL.O,MSFT.O,AMZN.O
CLOSE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
GE,1.0,-0.888599,-0.975964,-0.91579
AAPL.O,-0.888599,1.0,0.909939,0.909549
MSFT.O,-0.975964,0.909939,1.0,0.94567
AMZN.O,-0.91579,0.909549,0.94567,1.0


In [11]:
tech2017.corr().iplot(kind='heatmap', colorscale='reds')

In [12]:
tech2017.normalize().iplot(kind='lines')

In [16]:
# Random Walk Hypothesis
# If a stock price follows a (simple) random walk (no drift & normally distributed returns),
# then it rises and falls with the same probability of 50% (“toss of a coin”).

In [13]:
def add_lags(data, ric, lags):
    cols = []
    df = pd.DataFrame(data[ric])
    for lag in range(1, lags + 1):
        col = 'lag_{}'.format(lag)  # defines the column name
        df[col] = df[ric].shift(lag)  # creates the lagged data column
        cols.append(col)  # stores the column name
    df.dropna(inplace=True)  # gets rid of incomplete data rows
    return df, cols

In [14]:
dfs = {}
for ric in rics:
    df, cols = add_lags(tech2017, ric, 5)
    dfs[ric] = df

In [15]:
dfs['GE'].head()

Unnamed: 0_level_0,GE,lag_1,lag_2,lag_3,lag_4,lag_5
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-01-10,30.154444,30.240956,30.385144,30.298632,30.471657,30.462044
2017-01-11,30.250569,30.154444,30.240956,30.385144,30.298632,30.471657
2017-01-12,30.173669,30.250569,30.154444,30.240956,30.385144,30.298632
2017-01-13,30.144831,30.173669,30.250569,30.154444,30.240956,30.385144
2017-01-17,30.058319,30.144831,30.173669,30.250569,30.154444,30.240956


In [17]:
cols

['lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5']

In [21]:
regs = {}
for ric in rics:
    df = dfs[ric]  # getting data for the RIC
    reg = np.linalg.lstsq(df[cols], df[ric], rcond=None)[0]  # the OLS regression
    regs[ric] = reg  # storing the results

In [23]:
for ric in rics:
    print('{:10} | {}'.format(ric, regs[ric]))

GE         | [ 1.11034491 -0.24194065  0.12033898 -0.06115058  0.070129  ]
AAPL.O     | [ 1.04320349 -0.08044811 -0.0171149   0.16231508 -0.10683212]
MSFT.O     | [ 0.90732726  0.01880445  0.09222626  0.00618147 -0.0230931 ]
AMZN.O     | [ 1.06244456 -0.14829249  0.07329284 -0.05828381  0.07251993]


In [31]:
pd.DataFrame(regs).T

Unnamed: 0,0,1,2,3,4
GE,1.110345,-0.241941,0.120339,-0.061151,0.070129
AAPL.O,1.043203,-0.080448,-0.017115,0.162315,-0.106832
MSFT.O,0.907327,0.018804,0.092226,0.006181,-0.023093
AMZN.O,1.062445,-0.148292,0.073293,-0.058284,0.07252


In [32]:
pd.DataFrame(regs).T.mean(axis=0)

0    1.030830
1   -0.112969
2    0.067186
3    0.012266
4    0.003181
dtype: float64