In [6]:
import numpy as np
import pandas as pd
import scipy
import datetime as dt
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

%matplotlib inline
warnings.filterwarnings('ignore')
sns.set_style('whitegrid')
sns.set_palette('muted')

## Exogenous Variables
- Etherem: OHLCV
- Other Cryptocurrency prices
    - BTC: Bitcoin
    - XRP: Ripple
    - EOS: EOS.IO
    - LTC: Litecoin
    - XLM: Stellar
    - XMR: Monero
- Other Indices
    - VIX: CBOE Volatility Index
    - TWEXB: Trade Weighted USD Index, Broad
    - EFFR: Effective Federal Fund Rate

## Load Data

In [15]:
date = dt.datetime.today().strftime('%Y-%m-%d')
symbols = ['eth','btc','xrp','eos','ltc','xlm','xmr','vixcls','twexb','effr']

In [27]:
# create empty df for each symbol got data for
d = {symbol: pd.DataFrame() for symbol in symbols}

# populate each with their respective json data
for symbol, df in d.items():
    d[symbol] = pd.read_json('{}_data_{}.json'.format(symbol, date), orient='split')

# convert keys:values to variables = assignments
locals().update(d)

In [28]:
eth.head(2)

Unnamed: 0,date,open,high,low,close,volumefrom,volumeto
0,2015-08-06,0.6747,3.0,0.6747,3.0,123.93,371.79
1,2015-08-07,3.0,3.0,0.15,1.2,2119.43,1438.16


In [30]:
vixcls.head(2)

Unnamed: 0,date,vixcls
0,1990-01-02,17.24
1,1990-01-03,18.19


In [29]:
# rename columns to keep straight

# target currency
cols_orig = list(eth.columns)
cols_new = ['date']

for col in cols_orig[1:]:
    cols_new.append('eth_{}'.format(col))
    
eth.columns = cols_new
eth.head(2)

Unnamed: 0,date,eth_open,eth_high,eth_low,eth_close,eth_volumefrom,eth_volumeto
0,2015-08-06,0.6747,3.0,0.6747,3.0,123.93,371.79
1,2015-08-07,3.0,3.0,0.15,1.2,2119.43,1438.16


In [33]:
# exog cryptocurrencies
currencies = [btc, xrp, eos, ltc, xlm, xmr]

for cur in currencies:
    drops = cur.loc[:, ~cur.isin()]
    cur.drop()

['btc', 'xrp', 'eos', 'ltc', 'xlm', 'xmr']

## Create Single DataFrame

### **[VIX: CBOE Volatility Index](https://en.wikipedia.org/wiki/VIX)**
- Measure of stock market's expectation of volatility implied by S&P 500 index options, aka 'fear index'
- [Data Source](http://www.cboe.com/products/vix-index-volatility/vix-options-and-futures/vix-index/vix-historical-data)

In [None]:
df = eth.copy()

In [None]:
vix_raw = pd.read_csv('vixcurrent.csv', header=1)
print(vix_raw.info())
vix_raw[:1]

In [None]:
start_date = '2015-08-06'
end_date = '2018-09-09'
vix_close = vix_raw[['Date', 'VIX Close']].copy()
vix_close['Date'] = pd.to_datetime(vix_close['Date'], format='%m/%d/%Y')
vix_close = vix_close.loc[(vix_close['Date'] >= start_date)
                          & (vix_close['Date'] <= end_date)]
vix_close.sort_values('Date', inplace=True)
vix_close.reset_index(drop=True, inplace=True)
vix_close.columns = ['date', 'vix']
vix_close.head()

In [None]:
df = df.merge(vix_close, how='outer', on='date')
df.vix.ffill(inplace=True)
#df_04ex.vix[0]= vix_raw.loc[vix_raw['Date'] == '11/24/2017', ['VIX Close']].values
print(df.vix.isnull().sum())
df.head()

### **[DXY: US Dollar Index](https://en.wikipedia.org/wiki/U.S._Dollar_Index)**
- Measure of value of USD relative to a basket of foreign currencies
- [Data Source](https://quotes.wsj.com/index/DXY/historical-prices)

In [None]:
dxy_raw = pd.read_csv('dxy_historical.csv')
print(dxy_raw.info())
dxy_raw[:1]

In [None]:
dxy = dxy_raw[['Date', ' Close']].copy()
dxy['Date'] = pd.to_datetime(dxy['Date'], format='%x')
dxy = dxy.loc[(dxy['Date'] >= start_date)
              & (dxy['Date'] <= end_date)]
dxy.sort_values('Date', inplace=True)
dxy.reset_index(drop=True, inplace=True)
dxy.columns = ['date', 'dxy']
dxy.head()

In [None]:
df = df.merge(dxy, how='outer', on='date')
df.ffill(inplace=True)

# get first value
#dxy_raw['Date'] = pd.to_datetime(dxy_raw['Date'], format='%x')
#df_04ex.dxy[0] = dxy_raw.loc[dxy_raw['Date'] == '11/24/2017', [' Close']].values
print(df.dxy.isnull().sum())
df.head(2)

In [None]:
df.tail(2)

### **[EFFR: Federal Funds Rate](https://en.wikipedia.org/wiki/Federal_funds_rate)**
- The interest rate at which depository institutions (banks and credit unions) lend reserve balances to other depository institutions overnight, on an uncollateralized basis 
- [Data Source](https://fred.stlouisfed.org/series/EFFR)

In [None]:
effr_raw = pd.read_csv('EFFR.csv')
effr_raw.head()

In [None]:
effr = effr_raw.copy()
effr.columns = ['date', 'effr']
effr['date'] = pd.to_datetime(effr['date'], format='%Y-%m-%d')
effr.sort_values('date', inplace=True)
effr = effr.loc[(effr['date'] >= start_date) & (effr['date'] <= end_date)]
effr.reset_index(drop=True, inplace=True)
effr.head()

In [None]:
df = df.merge(effr, how='outer', on='date')
df.ffill(inplace=True)
print(df.effr.isnull().sum())
df.head()

## Add other coins

In [None]:
# bitcoin
btc_raw = pd.read_csv('btc_raw.csv')
btc = btc_raw[['timestamp', 'open', 'high', 'low', 'close', 'volumefrom', 'volumeto']].copy()
btc['timestamp'] = pd.to_datetime(btc['timestamp'])
btc['timestamp'] = btc['timestamp'].dt.date
btc.rename(columns = {'timestamp':'date', 'close':'btc'}, inplace=True)
btc['date'] = pd.to_datetime(btc['date'], format='%Y-%m-%d')
btc = btc.loc[(btc['date'] >= start_date) & (btc['date'] <= end_date)]
btc.sort_values(by='date', inplace=True)
btc.reset_index(drop=True, inplace=True)
btc.drop(['high', 'low', 'open', 'volumefrom', 'volumeto'], axis=1, inplace=True)
btc.head(2)

In [None]:
df = df.merge(btc, how='outer', on='date')
print(df.shape)
df.head(3)

In [None]:
# ripple
xrp_raw = pd.read_csv('xrp_raw.csv')
xrp = xrp_raw[['timestamp', 'open', 'high', 'low', 'close', 'volumefrom', 'volumeto']].copy()
xrp['timestamp'] = pd.to_datetime(xrp['timestamp'])
xrp['timestamp'] = xrp['timestamp'].dt.date
xrp.rename(columns = {'timestamp':'date', 'close':'xrp'}, inplace=True)
xrp['date'] = pd.to_datetime(xrp['date'], format='%Y-%m-%d')
xrp = xrp.loc[(xrp['date'] >= start_date) & (xrp['date'] <= end_date)]
xrp.sort_values(by='date', inplace=True)
xrp.reset_index(drop=True, inplace=True)
xrp.drop(['high', 'low', 'open', 'volumefrom', 'volumeto'], axis=1, inplace=True)
df = df.merge(xrp, how='outer', on='date')
print(df.shape)
df.head(3)

In [None]:
# eos.io
eos_raw = pd.read_csv('eos_raw.csv')
eos = eos_raw[['timestamp', 'open', 'high', 'low', 'close', 'volumefrom', 'volumeto']].copy()
eos['timestamp'] = pd.to_datetime(eos['timestamp'])
eos['timestamp'] = eos['timestamp'].dt.date
eos.rename(columns = {'timestamp':'date', 'close':'eos'}, inplace=True)
eos['date'] = pd.to_datetime(eos['date'], format='%Y-%m-%d')
eos = eos.loc[(eos['date'] >= start_date) & (eos['date'] <= end_date)]
eos.sort_values(by='date', inplace=True)
eos.reset_index(drop=True, inplace=True)
eos.drop(['high', 'low', 'open', 'volumefrom', 'volumeto'], axis=1, inplace=True)
df = df.merge(eos, how='outer', on='date')
print(df.shape)
df.head(3)

In [None]:
# litecoin
ltc_raw = pd.read_csv('ltc_raw.csv')
ltc = ltc_raw[['timestamp', 'open', 'high', 'low', 'close', 'volumefrom', 'volumeto']].copy()
ltc['timestamp'] = pd.to_datetime(ltc['timestamp'])
ltc['timestamp'] = ltc['timestamp'].dt.date
ltc.rename(columns = {'timestamp':'date', 'close':'ltc'}, inplace=True)
ltc['date'] = pd.to_datetime(ltc['date'], format='%Y-%m-%d')
ltc = ltc.loc[(ltc['date'] >= start_date) & (ltc['date'] <= end_date)]
ltc.sort_values(by='date', inplace=True)
ltc.reset_index(drop=True, inplace=True)
ltc.drop(['high', 'low', 'open', 'volumefrom', 'volumeto'], axis=1, inplace=True)
df = df.merge(ltc, how='outer', on='date')
print(df.shape)
df.head(3)

In [None]:
# stellar
xlm_raw = pd.read_csv('xlm_raw.csv')
xlm = xlm_raw[['timestamp', 'open', 'high', 'low', 'close', 'volumefrom', 'volumeto']].copy()
xlm['timestamp'] = pd.to_datetime(xlm['timestamp'])
xlm['timestamp'] = xlm['timestamp'].dt.date
xlm.rename(columns = {'timestamp':'date', 'close':'xlm'}, inplace=True)
xlm['date'] = pd.to_datetime(xlm['date'], format='%Y-%m-%d')
xlm = xlm.loc[(xlm['date'] >= start_date) & (xlm['date'] <= end_date)]
xlm.sort_values(by='date', inplace=True)
xlm.reset_index(drop=True, inplace=True)
xlm.drop(['high', 'low', 'open', 'volumefrom', 'volumeto'], axis=1, inplace=True)
df = df.merge(xlm, how='outer', on='date')
print(df.shape)
df.head(3)

In [None]:
# monero
xmr_raw = pd.read_csv('xmr_raw.csv')
xmr = xmr_raw[['timestamp', 'open', 'high', 'low', 'close', 'volumefrom', 'volumeto']].copy()
xmr['timestamp'] = pd.to_datetime(xmr['timestamp'])
xmr['timestamp'] = xmr['timestamp'].dt.date
xmr.rename(columns = {'timestamp':'date', 'close':'xmr'}, inplace=True)
xmr['date'] = pd.to_datetime(xmr['date'], format='%Y-%m-%d')
xmr = xmr.loc[(xmr['date'] >= start_date) & (xmr['date'] <= end_date)]
xmr.sort_values(by='date', inplace=True)
xmr.reset_index(drop=True, inplace=True)
xmr.drop(['high', 'low', 'open', 'volumefrom', 'volumeto'], axis=1, inplace=True)
df = df.merge(xmr, how='outer', on='date')
print(df.shape)
df.head(3)

In [None]:
df.to_pickle('crypto_df.pkl')

In [None]:
df.tail()

In [None]:
print(df.xlm.isnull().sum())