In [1]:
import numpy as np
import pandas as pd
import scipy
from datetime import datetime
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

%matplotlib inline
warnings.filterwarnings('ignore')
sns.set_style('whitegrid')

In [2]:
eth_price_raw = pd.read_csv('ethereum_price.csv')

In [3]:
# set df to manipulate
eth_price = eth_price_raw[['Date', 'Close']].copy()

# convert date to datetime
eth_price['Date'] = pd.to_datetime(eth_price['Date'], format='%b %d, %Y')
eth_price.sort_values('Date', inplace=True)
eth_price.reset_index(drop=True, inplace=True)
print(eth_price.shape)
eth_price.head()

(929, 2)


Unnamed: 0,Date,Close
0,2015-08-07,2.77
1,2015-08-08,0.753325
2,2015-08-09,0.701897
3,2015-08-10,0.708448
4,2015-08-11,1.07


In [4]:
print('{}\n{}'.format(eth_price.Date.head(1),
                      eth_price.Date.tail(1)))
print(len(eth_price))

0   2015-08-07
Name: Date, dtype: datetime64[ns]
928   2018-02-20
Name: Date, dtype: datetime64[ns]
929


### Exogenous variables:
- **[CBOE Volatility Index (VIX)](https://en.wikipedia.org/wiki/VIX)**: measure of stock market's expectation of volatility implied by S&P 500 index options, aka 'fear index'
     - [Data Source](http://www.cboe.com/products/vix-index-volatility/vix-options-and-futures/vix-index/vix-historical-data)
- **[US Dollar Index (DXY)](https://en.wikipedia.org/wiki/U.S._Dollar_Index)**: measure of value of USD relative to a basket of foreign currencies
    - [Data Source](https://quotes.wsj.com/index/DXY/historical-prices)
- **[Federal Funds Rate (EFFR)](https://en.wikipedia.org/wiki/Federal_funds_rate)**: the interest rate at which depository institutions (banks and credit unions) lend reserve balances to other depository institutions overnight, on an uncollateralized basis 
    - [Data Source](https://fred.stlouisfed.org/series/EFFR)

In [5]:
vix_raw = pd.read_csv('vixcurrent.csv', header=1)
print(vix_raw.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3693 entries, 0 to 3692
Data columns (total 5 columns):
Date         3693 non-null object
VIX Open     3693 non-null float64
VIX High     3693 non-null float64
VIX Low      3693 non-null float64
VIX Close    3693 non-null float64
dtypes: float64(4), object(1)
memory usage: 144.3+ KB
None


In [6]:
vix_raw[:1]

Unnamed: 0,Date,VIX Open,VIX High,VIX Low,VIX Close
0,1/2/2004,17.96,18.68,17.54,18.22


In [7]:
vix_close = vix_raw[['Date', 'VIX Close']].copy()
vix_close['Date'] = pd.to_datetime(vix_close['Date'], format='%m/%d/%Y')
vix_close.sort_values('Date', inplace=True)
vix_close.reset_index(drop=True, inplace=True)
vix_close.head()

Unnamed: 0,Date,VIX Close
0,2004-01-02,18.22
1,2004-01-05,17.49
2,2004-01-06,16.73
3,2004-01-07,15.5
4,2004-01-08,15.61


In [8]:
vix_close = vix_close.loc[(vix_close['Date'] >= '2015-08-07')
                          & (vix_close['Date'] <= '2018-02-20')]

In [9]:
vix_close.reset_index(drop=True, inplace=True)

In [10]:
vix_close.tail()

Unnamed: 0,Date,VIX Close
634,2018-02-13,24.97
635,2018-02-14,19.26
636,2018-02-15,19.13
637,2018-02-16,19.46
638,2018-02-20,20.6


In [11]:
print(vix_close.shape)

(639, 2)


In [12]:
eth_exog = eth_price.merge(vix_close, how='outer', on='Date')
eth_exog.columns = ['date', 'eth_close', 'vix_close']
eth_exog.head(10)

Unnamed: 0,date,eth_close,vix_close
0,2015-08-07,2.77,13.39
1,2015-08-08,0.753325,
2,2015-08-09,0.701897,
3,2015-08-10,0.708448,12.23
4,2015-08-11,1.07,13.71
5,2015-08-12,1.22,13.61
6,2015-08-13,1.83,13.49
7,2015-08-14,1.83,12.83
8,2015-08-15,1.69,
9,2015-08-16,1.57,


In [13]:
dxy_raw = pd.read_csv('dxy_historical.csv')
dxy_raw[:1]

Unnamed: 0,Date,Open,High,Low,Close
0,02/20/18,89.24,89.8,89.22,89.72


In [14]:
dxy_raw.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 658 entries, 0 to 657
Data columns (total 5 columns):
Date      658 non-null object
 Open     658 non-null float64
 High     658 non-null float64
 Low      658 non-null float64
 Close    658 non-null float64
dtypes: float64(4), object(1)
memory usage: 25.8+ KB


In [15]:
dxy_close = dxy_raw[['Date', ' Close']].copy()
dxy_close['Date'] = pd.to_datetime(dxy_close['Date'], format='%x')
dxy_close.sort_values('Date', inplace=True)
dxy_close.reset_index(drop=True, inplace=True)
dxy_close.columns = ['date', 'close']
dxy_close.head()

Unnamed: 0,date,close
0,2015-08-07,97.56
1,2015-08-10,97.19
2,2015-08-11,97.2
3,2015-08-12,96.29
4,2015-08-13,96.36


In [16]:
print(dxy_close.shape)

(658, 2)


In [17]:
eth_exog = eth_exog.merge(dxy_close, how='outer', on='date')
eth_exog.columns = ['date', 'eth_close', 'vix_close', 'dxy_close']
eth_exog.head(10)

Unnamed: 0,date,eth_close,vix_close,dxy_close
0,2015-08-07,2.77,13.39,97.56
1,2015-08-08,0.753325,,
2,2015-08-09,0.701897,,
3,2015-08-10,0.708448,12.23,97.19
4,2015-08-11,1.07,13.71,97.2
5,2015-08-12,1.22,13.61,96.29
6,2015-08-13,1.83,13.49,96.36
7,2015-08-14,1.83,12.83,96.59
8,2015-08-15,1.69,,
9,2015-08-16,1.57,,


In [18]:
effr_raw = pd.read_csv('EFFR.csv')
effr_raw.head()

Unnamed: 0,DATE,EFFR
0,2013-09-03,0.09
1,2013-09-04,0.08
2,2013-09-05,0.08
3,2013-09-06,0.08
4,2013-09-09,0.08


In [20]:
effr = effr_raw.copy()
effr.columns = ['date', 'effr']
effr['date'] = pd.to_datetime(effr['date'], format='%Y-%m-%d')
effr.sort_values('date', inplace=True)
effr = effr.loc[(effr['date'] >= '2015-08-07') & (effr['date'] <= '2018-02-20')]
effr.reset_index(drop=True, inplace=True)
effr.head()

Unnamed: 0,date,effr
0,2015-08-07,0.14
1,2015-08-10,0.14
2,2015-08-11,0.15
3,2015-08-12,0.15
4,2015-08-13,0.15


In [21]:
effr.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 663 entries, 0 to 662
Data columns (total 2 columns):
date    663 non-null datetime64[ns]
effr    663 non-null object
dtypes: datetime64[ns](1), object(1)
memory usage: 10.4+ KB


In [22]:
effr['effr'].value_counts().sort_values(ascending=True)

0.3       1
0.82      1
0.25      1
0.55      1
0.57      1
1.34      1
0.56      1
0.20      1
1.33      1
0.35      1
0.39      1
1.17      2
0.07      2
1.06      2
0.83      2
1.41      2
0.31      2
0.08      2
0.30      3
1.07      4
0.29      4
0.15     13
0.12     22
0.13     25
0.36     25
0.14     26
0.38     26
.        27
0.40     29
0.4      37
1.42     41
0.41     47
0.66     58
0.91     61
0.37     72
1.16    118
Name: effr, dtype: int64

In [29]:
print(effr.loc[effr['effr'] == '.'])
print('\n{}'.format(len(effr.loc[effr['effr'] == '.'])))

          date effr
21  2015-09-07    .
46  2015-10-12    .
68  2015-11-11    .
79  2015-11-26    .
100 2015-12-25    .
105 2016-01-01    .
116 2016-01-18    .
136 2016-02-15    .
211 2016-05-30    .
236 2016-07-04    .
281 2016-09-05    .
306 2016-10-10    .
330 2016-11-11    .
339 2016-11-24    .
361 2016-12-26    .
366 2017-01-02    .
376 2017-01-16    .
401 2017-02-20    .
471 2017-05-29    .
497 2017-07-04    .
541 2017-09-04    .
566 2017-10-09    .
599 2017-11-23    .
621 2017-12-25    .
626 2018-01-01    .
636 2018-01-15    .
661 2018-02-19    .

27


In [24]:
eth_exog = eth_exog.merge(effr, how='outer', on='date')

In [25]:
eth_exog.head()

Unnamed: 0,date,eth_close,vix_close,dxy_close,effr
0,2015-08-07,2.77,13.39,97.56,0.14
1,2015-08-08,0.753325,,,
2,2015-08-09,0.701897,,,
3,2015-08-10,0.708448,12.23,97.19,0.14
4,2015-08-11,1.07,13.71,97.2,0.15


In [30]:
print(eth_exog.loc[eth_exog['effr'] == '.'])
print('\n{}'.format(len(eth_exog.loc[eth_exog['effr'] == '.'])))

          date    eth_close  vix_close  dxy_close effr
31  2015-09-07     1.250000        NaN      96.13    .
66  2015-10-12     0.626030      16.17      94.88    .
96  2015-11-11     0.791829      16.06      98.85    .
111 2015-11-26     0.884183        NaN      99.87    .
140 2015-12-25     0.870363        NaN        NaN    .
147 2016-01-01     0.948024        NaN        NaN    .
164 2016-01-18     1.430000        NaN      99.10    .
192 2016-02-15     5.290000        NaN      96.62    .
297 2016-05-30    12.730000        NaN      95.70    .
332 2016-07-04    11.470000        NaN      95.51    .
395 2016-09-05    11.720000        NaN      95.77    .
430 2016-10-10    11.760000      13.38      96.89    .
462 2016-11-11    10.290000      14.17      98.99    .
475 2016-11-24     9.230000        NaN     101.69    .
507 2016-12-26     7.270000        NaN        NaN    .
514 2017-01-02     8.380000        NaN     102.78    .
528 2017-01-16     9.640000        NaN     101.57    .
563 2017-0