In [1]:
import numpy as np
import pandas as pd
import scipy
from datetime import datetime
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

%matplotlib inline
warnings.filterwarnings('ignore')
sns.set_style('whitegrid')
sns.set_palette('muted')

## Load Data

In [16]:
eth_raw = pd.read_csv('eth.csv')
eth_raw.tail()

Unnamed: 0,date,txVolume(USD),adjustedTxVolume(USD),txCount,marketcap(USD),price(USD),exchangeVolume(USD),generatedCoins,fees,activeAddresses,medianTxValue(USD),medianFee,averageDifficulty,paymentCount,blockSize,blockCount
2018-09-05,808418000.0,808418000.0,626957.0,29107550000.0,286.05,2390390000.0,20329.5,362.38021,306686.0,0.429075,0.000175,3435482000000000.0,296219.0,151363404,5956,
2018-09-06,801284200.0,801284200.0,588959.0,23576870000.0,231.65,2097310000.0,20565.75,511.158838,293094.0,1.8532,0.000251,3399788000000000.0,300386.0,141998572,6048,
2018-09-07,651384300.0,651384300.0,582184.0,23366570000.0,229.53,1678260000.0,20218.3125,506.599508,296253.0,0.154933,0.000261,3399656000000000.0,260736.0,148491755,5913,
2018-09-08,525260700.0,525260700.0,543337.0,22187690000.0,217.91,1517200000.0,20289.75,404.495484,277064.0,0.0,0.00026,3375920000000000.0,225257.0,149634966,5936,
2018-09-09,474110000.0,474110000.0,565631.0,20203490000.0,198.38,1585980000.0,20266.875,433.078937,279792.0,0.0,0.000269,3209593000000000.0,232806.0,130475678,5899,


In [2]:
eth_price_raw = pd.read_csv('ethereum_price.csv')
print(eth_price_raw.info())
eth_price_raw.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 929 entries, 0 to 928
Data columns (total 7 columns):
Date          929 non-null object
Open          929 non-null float64
High          929 non-null float64
Low           929 non-null float64
Close         929 non-null float64
Volume        929 non-null object
Market Cap    929 non-null object
dtypes: float64(4), object(3)
memory usage: 50.9+ KB
None


Unnamed: 0,Date,Open,High,Low,Close,Volume,Market Cap
0,"Feb 20, 2018",943.57,965.26,892.95,895.37,2545260000,92206500000
1,"Feb 19, 2018",921.67,957.78,921.55,943.87,2169020000,90047700000
2,"Feb 18, 2018",973.35,982.93,915.45,923.92,2567290000,95077100000
3,"Feb 17, 2018",944.75,976.6,940.75,974.12,2525720000,92264000000
4,"Feb 16, 2018",934.79,950.0,917.85,944.21,2369450000,91272100000


In [8]:
eth = eth_price_raw.copy()
eth['Date'] = pd.to_datetime(eth['Date'], format='%b %d, %Y')
eth.sort_values('Date', inplace=True)
eth.reset_index(drop=True, inplace=True)
print(eth.info())
eth.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 929 entries, 0 to 928
Data columns (total 7 columns):
Date          929 non-null datetime64[ns]
Open          929 non-null float64
High          929 non-null float64
Low           929 non-null float64
Close         929 non-null float64
Volume        929 non-null object
Market Cap    929 non-null object
dtypes: datetime64[ns](1), float64(4), object(2)
memory usage: 50.9+ KB
None


Unnamed: 0,Date,Open,High,Low,Close,Volume,Market Cap
0,2015-08-07,2.83,3.54,2.52,2.77,164329,-
1,2015-08-08,2.79,2.8,0.714725,0.753325,674188,167911000
2,2015-08-09,0.706136,0.87981,0.629191,0.701897,532170,42637600
3,2015-08-10,0.713989,0.729854,0.636546,0.708448,405283,43130000
4,2015-08-11,0.708087,1.13,0.663235,1.07,1463100,42796500


In [9]:
eth['Volume'] = pd.to_numeric(eth['Volume'].str.replace(',', ''), errors='coerce')
eth['Market Cap'] = pd.to_numeric(eth['Market Cap'].str.replace(',', ''), errors='coerce')
eth.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Market Cap
0,2015-08-07,2.83,3.54,2.52,2.77,164329,
1,2015-08-08,2.79,2.8,0.714725,0.753325,674188,167911000.0
2,2015-08-09,0.706136,0.87981,0.629191,0.701897,532170,42637600.0
3,2015-08-10,0.713989,0.729854,0.636546,0.708448,405283,43130000.0
4,2015-08-11,0.708087,1.13,0.663235,1.07,1463100,42796500.0


## Exogenous Variables
- Etherem related
    - Volume
    - Market Cap
- Other Cryptocurrencies
    - BTC: Bitcoin
    - XRP: Ripple
    - EOS: EOS.IO
    - LTC: Litecoin
    - XLM: Stellar
    - XMR: Monero
- Other Indices
    - VIX
    - DXY
    - EFFR

In [10]:
df = eth_price_raw[['Date', 'Close', 'Volume', 'Market Cap']].copy()
df.columns = ['date', 'eth', 'eth_volume', 'eth_market_cap']
df['date'] = pd.to_datetime(df['date'], format='%b %d, %Y')
df.sort_values('date', inplace=True)
df.reset_index(drop=True, inplace=True)
df.head()

Unnamed: 0,date,eth,eth_volume,eth_market_cap
0,2015-08-07,2.77,164329,-
1,2015-08-08,0.753325,674188,167911000
2,2015-08-09,0.701897,532170,42637600
3,2015-08-10,0.708448,405283,43130000
4,2015-08-11,1.07,1463100,42796500


### **[VIX: CBOE Volatility Index](https://en.wikipedia.org/wiki/VIX)**
- Measure of stock market's expectation of volatility implied by S&P 500 index options, aka 'fear index'
- [Data Source](http://www.cboe.com/products/vix-index-volatility/vix-options-and-futures/vix-index/vix-historical-data)

In [None]:
vix_raw = pd.read_csv('vixcurrent.csv', header=1)
print(vix_raw.info())

In [None]:
vix_raw[:1]

In [None]:
vix_close = vix_raw[['Date', 'VIX Close']].copy()
vix_close['Date'] = pd.to_datetime(vix_close['Date'], format='%m/%d/%Y')
vix_close = vix_close.loc[(vix_close['Date'] >= '2015-08-07')
                          & (vix_close['Date'] <= '2018-02-20')]
vix_close.sort_values('Date', inplace=True)
vix_close.reset_index(drop=True, inplace=True)
vix_close.columns = ['date', 'vix']
vix_close.head()

In [None]:
df_ex = df.merge(vix_close, how='outer', on='date')
df_ex.vix.ffill(inplace=True)
#df_04ex.vix[0]= vix_raw.loc[vix_raw['Date'] == '11/24/2017', ['VIX Close']].values
print(df_ex.vix.isnull().sum())
df_ex.head()

### **[DXY: US Dollar Index](https://en.wikipedia.org/wiki/U.S._Dollar_Index)**
- Measure of value of USD relative to a basket of foreign currencies
- [Data Source](https://quotes.wsj.com/index/DXY/historical-prices)

In [None]:
dxy_raw = pd.read_csv('dxy_historical.csv')
dxy_raw[:1]

In [None]:
dxy_raw.info()

In [None]:
dxy = dxy_raw[['Date', ' Close']].copy()
dxy['Date'] = pd.to_datetime(dxy['Date'], format='%x')
dxy = dxy.loc[(dxy['Date'] >= '2015-08-07')
              & (dxy['Date'] <= '2018-02-20')]
dxy.sort_values('Date', inplace=True)
dxy.reset_index(drop=True, inplace=True)
dxy.columns = ['date', 'dxy']
dxy.head()

In [None]:
df_ex = df_ex.merge(dxy, how='outer', on='date')
df_ex.ffill(inplace=True)

# get first value
#dxy_raw['Date'] = pd.to_datetime(dxy_raw['Date'], format='%x')
#df_04ex.dxy[0] = dxy_raw.loc[dxy_raw['Date'] == '11/24/2017', [' Close']].values
print(df_ex.dxy.isnull().sum())
df_ex.head()

### **[EFFR: Federal Funds Rate](https://en.wikipedia.org/wiki/Federal_funds_rate)**
- The interest rate at which depository institutions (banks and credit unions) lend reserve balances to other depository institutions overnight, on an uncollateralized basis 
- [Data Source](https://fred.stlouisfed.org/series/EFFR)

In [None]:
effr_raw = pd.read_csv('EFFR.csv')
effr_raw.head()

In [None]:
effr = effr_raw.copy()
effr.columns = ['date', 'effr']
effr['date'] = pd.to_datetime(effr['date'], format='%Y-%m-%d')
effr.sort_values('date', inplace=True)
effr = effr.loc[(effr['date'] >= '2015-08-07') & (effr['date'] <= '2018-02-20')]
effr.reset_index(drop=True, inplace=True)
effr.head()

In [None]:
df_ex = df_ex.merge(effr, how='outer', on='date')
df_ex.ffill(inplace=True)
print(df_ex.effr.isnull().sum())
df_ex.head()