In [177]:
### IMPORT DEPENDENCIES ###
## pip install tensorflow keras depends on it
import math
from sklearn import preprocessing
from sklearn import metrics
import keras
from keras.models import Sequential
from keras.models import load_model
from keras.layers import Activation, Dense
from keras.layers import LSTM
from matplotlib import pyplot
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup as bs
import io
from datetime import datetime, timedelta
import re
import statsmodels.tsa.api as smt
import statsmodels.api as sm
import scipy.stats as scs
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from math import sqrt
from sklearn.metrics import make_scorer
from sklearn.model_selection import cross_val_score

### GET DATA ##################################################################
header = {
    "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36",
    "X-Requested-With": "XMLHttpRequest"
}                                                   # header illusion...

def current_GLD():
    """Returns float USD amount of SPDR GLD ETF from business insider.com"""
    url = 'http://markets.businessinsider.com/etfs/spdr-gold-shares'
    my_req = requests.get(url, headers=header)
    my_soup = bs(my_req.content, "html.parser")
    t = my_soup.find_all('span', {"class": "aktien-big-font text-nowrap"})
    val_data = re.findall("\d+\.\d+", str(t))       # extract digits
    return print(t)                      # convert to float

current_GLD()

[]


In [178]:
def get_SPDRGLD():
    """Returns pandas df with datetime index, GLD price, and LMBA prices"""
    url = 'http://www.spdrgoldshares.com/assets/dynamic/GLD/GLD_US_archive_EN.csv'
    my_req = requests.get(url, headers=header)          # pull down csv from sp
    gld_hist = pd.read_csv(io.StringIO(my_req.content.decode('utf-8')),
                           skiprows=6,                  # clean up columns
                           parse_dates=True,
                           index_col="Date")

    gld_hist.index.rename('date', inplace=True)         # match index to cccagg

    gld_hist = gld_hist[[' GLD Close', ' LBMA Gold Price',
                         ' Total Net Asset Value in the Trust']]
    gld_hist.loc[:, gld_hist.columns.values[1]] = \
        gld_hist[gld_hist.columns[1]].str.replace('$', '')


                                                        # NA replace + ffill
    gld_hist.replace(' HOLIDAY', np.NaN, inplace=True)
    gld_hist.replace(' NYSE Closed', np.NaN, inplace=True)
    gld_hist.replace(' AWAITED', np.NaN, inplace=True)
    gld_hist = gld_hist.fillna(method='ffill')

    gld_hist = gld_hist.rename(columns={            # clean column names
        gld_hist.columns.values[0]: 'GLD_close',
        gld_hist.columns.values[1]: 'LMBA_price',
        gld_hist.columns.values[2]: 'GLD_market_cap'})
                                                    # convert to float values
    gld_hist["GLD_close"] = gld_hist.GLD_close.astype(float)
    gld_hist["LMBA_price"] = gld_hist.LMBA_price.astype(float)
    gld_hist["GLD_market_cap"] = gld_hist.GLD_market_cap.astype(float)

                                                    # get current GLD prices
    new_entry = pd.DataFrame.from_dict({pd.to_datetime(datetime.now()): {
        # "GLD_close": current_GLD(),                 # reuse yestdy's LMBA price
        "LMBA_price": gld_hist.iloc[-1][1],
        "GLD_market_cap": gld_hist.iloc[-1][2]
    }}, orient='index')
    new_entry.index.rename('date', inplace=True)    # name index like gld_hist

    return gld_hist.append(new_entry)

get_SPDRGLD()

Unnamed: 0_level_0,GLD_close,LMBA_price,GLD_market_cap
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2004-11-18 00:00:00.000000,44.38,442.00,1.149200e+08
2004-11-19 00:00:00.000000,44.78,445.60,8.288069e+08
2004-11-22 00:00:00.000000,44.75,447.80,1.253785e+09
2004-11-23 00:00:00.000000,45.05,448.15,1.254751e+09
2004-11-24 00:00:00.000000,45.05,448.60,1.390569e+09
...,...,...,...
2022-08-02 00:00:00.000000,164.05,1779.75,5.737023e+10
2022-08-03 00:00:00.000000,164.45,1761.25,5.664174e+10
2022-08-04 00:00:00.000000,167.17,1783.20,5.734729e+10
2022-08-05 00:00:00.000000,167.17,1783.20,5.696056e+10


In [179]:
def get_sp500():
    """ Pull Daily Historical S&P500 from FRED https://fred.stlouisfed.org"""
    link = "https://fred.stlouisfed.org/data/SP500.txt"
    my_req = requests.get(link, headers=header)
    sp500 = pd.read_table(io.StringIO(my_req.content.decode('utf-8')),
                          parse_dates=True,
                          # index_col="DATE",
                          dtype={'VALUE': np.float64},
                          delim_whitespace=True,
                          na_values='.',
                          skiprows=44)
    sp500.rename(columns = {'VALUE':'SP500'}, inplace=True)
    sp500.rename(columns = {'DATE': 'date'}, inplace=True)
    # sp500 = sp500.reindex(sp500.index.rename('date'))
    sp500['date'] = pd.to_datetime(sp500['date'])

    return sp500

get_sp500().info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2610 entries, 0 to 2609
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    2610 non-null   datetime64[ns]
 1   SP500   2517 non-null   float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 40.9 KB


In [180]:
def get_crypto(symb, yrs):
    """ for pulling 'yrs' of data from Cryptocompare API"""
    c_daily = "https://min-api.cryptocompare.com/data/histoday"
    param = {'fsym': symb, 'tsym': 'USD',
             'e': 'CCCAGG', 'limit': round(yrs*365)}    # x years
    t = requests.get(c_daily, param).json()['Data']
    ck = pd.DataFrame.from_dict(t)
    date_in = pd.to_datetime(ck.time, unit='s',
                             origin='unix')
    ck['date'] = date_in
    ck.index = date_in
    ck.index.rename('date', inplace=True)
    ck = ck.query('volumefrom != 0')
    ck = ck[['close', 'volumeto']]
    ck = ck.assign(btc_close = ck.close)
    ck = ck.assign(btc_volume =ck.volumeto)
    return ck[['btc_close', 'btc_volume']]
get_crypto('btc', 3)

Unnamed: 0_level_0,btc_close,btc_volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-08-07,11974.28,8.347194e+08
2019-08-08,11982.80,5.884635e+08
2019-08-09,11865.21,4.642146e+08
2019-08-10,11293.59,4.700497e+08
2019-08-11,11549.06,2.928147e+08
...,...,...
2022-08-02,22991.49,7.876981e+08
2022-08-03,22825.25,1.039306e+09
2022-08-04,22622.99,8.350334e+08
2022-08-05,23319.22,1.166015e+09


In [181]:
def get_volume():
    """For obtaining crypto volume"""
    cols = ['date', 'timestamp', 'volume']
    limit = 1800
    url = "https://min-api.cryptocompare.com/data/exchange/histoday?tsym=USD&limit=" + \
          str(limit)
    t = requests.get(url).json()['Data']
    data = pd.DataFrame.from_dict(t)
    date_in = pd.to_datetime(data.time, unit='s',
                             origin='unix')
    data['date'] = date_in
    data.index = date_in
    data.index.rename('date', inplace=True)
    data = data.assign(crypto_volume = data.volume)
    return data[['crypto_volume']]
get_volume()

Unnamed: 0_level_0,crypto_volume
date,Unnamed: 1_level_1
2017-09-01,5.039887e+09
2017-09-02,5.697696e+09
2017-09-03,3.709253e+09
2017-09-04,5.621671e+09
2017-09-05,4.974928e+09
...,...
2022-08-02,5.964921e+10
2022-08-03,4.390974e+10
2022-08-04,2.493986e+10
2022-08-05,2.959353e+10


In [182]:
def combined_data(coin, yrs):
    """Combines features with crypto data"""
    cry = get_crypto(coin, yrs)
    gld = get_SPDRGLD()
    sp5 = get_sp500()
    vol = get_volume()
    # df = cry.join(gld,how='outer').fillna(method='ffill')
    # df = df.join(sp5, how='outer').fillna(method='ffill')
    # df = df.join(vol, how ='outer').fillna(method='ffill')
    # df = df.dropna()

    ## Combination of data with .map() function
    cry = cry.rename_axis('date').reset_index()
    vol = vol.rename_axis('date').reset_index()
    gld = gld.rename_axis('date').reset_index() #may need to strip time off from within cells in the date column
    #sp5 is not being operated on with .rename_axis() because it already comes with a 'DATE' from the webscraping exercise

    df_comb = cry
    df_comb['crypto_volume'] = df_comb.date.map(vol.set_index('date')['crypto_volume'].to_dict())
    df_comb['GLD_close'] = df_comb.date.map(gld.set_index('date')['GLD_close'].to_dict())
    df_comb['LMBA_price'] = df_comb.date.map(gld.set_index('date')['LMBA_price'].to_dict())
    df_comb['GLD_market_cap'] = df_comb.date.map(gld.set_index('date')['GLD_market_cap'].to_dict())

    ## .map() function is giving problems for sp500, I will switch to .join()
    # df_comb['SP500'] = df_comb.date.map(sp5.set_index('date')['SP500'].to_dict())
    df_comb = df_comb.join(sp5.set_index('date'), on ='date', how='outer').fillna(method='ffill')
    df_comb = df_comb.dropna()


    return df_comb


combined_data('BTC',5)

Unnamed: 0,date,btc_close,btc_volume,crypto_volume,GLD_close,LMBA_price,GLD_market_cap,SP500
25.0,2017-09-01,4921.85,4.911404e+08,5.039887e+09,126.06,1320.40,3.527526e+10,2476.55
26.0,2017-09-02,4573.80,7.477138e+08,5.697696e+09,126.06,1320.40,3.527526e+10,2476.55
27.0,2017-09-03,4612.92,4.701358e+08,3.709253e+09,126.06,1320.40,3.527526e+10,2476.55
28.0,2017-09-04,4267.45,7.372161e+08,5.621671e+09,126.06,1320.40,3.527526e+10,2476.55
29.0,2017-09-05,4409.08,6.738468e+08,4.974928e+09,127.46,1335.55,3.605935e+10,2457.85
...,...,...,...,...,...,...,...,...
,2017-07-31,23206.69,1.106710e+08,4.383047e+09,167.17,1783.20,5.696056e+10,2470.30
,2017-08-01,23206.69,1.106710e+08,4.383047e+09,167.17,1783.20,5.696056e+10,2476.35
,2017-08-02,23206.69,1.106710e+08,4.383047e+09,167.17,1783.20,5.696056e+10,2477.57
,2017-08-03,23206.69,1.106710e+08,4.383047e+09,167.17,1783.20,5.696056e+10,2472.16


In [186]:
### FEATURE ENGINEERING  ######################################################
def rate_of_change(data):
    # rate of change
    data["btc_close_roc"] = np.gradient(data['btc_close'])
    data["btc_volume_roc"] = np.gradient(data['btc_volume'])
    data["crypto_volume_roc"] = np.gradient(data['crypto_volume'])
    data["gold_close_roc"] = np.gradient(data['GLD_close'])
    data["lmba_gold_roc"] = np.gradient(data['LMBA_price'])
    data["gold_volume_roc"] = np.gradient(data['GLD_market_cap'])
    data["SP500_roc"] = np.gradient(data['SP500'])
    return data

def parse_date(data):
    # parse date into year, month, week, day, dayofweek
    # data["year"] = data.index[6].year
    # data['month'] = data.index.month
    # data['week'] = data.index.week
    # data['day'] = data.index.day
    # data['dayofweek'] = data.index.dayofweek

    ##### OLD CODES DID NOT WORK SO I AM USING THE ONE BELOW

    data['year'] = pd.DatetimeIndex(data['date']).year
    data['month'] = pd.DatetimeIndex(data['date']).month
    data['week'] = pd.DatetimeIndex(data['date']).week
    data['day'] = pd.DatetimeIndex(data['date']).day
    data['dayofweek'] = pd.DatetimeIndex(data['date']).dayofweek
    return data

def get_dummies(data):
    # day of week
    dayofweek_dummies = pd.get_dummies(data['dayofweek'], prefix='dayofweek')
    data = pd.concat([data, dayofweek_dummies], axis=1)
    # year
    year_dummies = pd.get_dummies(data['year'], prefix='year')
    data = pd.concat([data, year_dummies], axis=1)
    # month
    month_dummies = pd.get_dummies(data['month'], prefix='month')
    data = pd.concat([data, month_dummies], axis=1)
    data.drop('year', axis=1, inplace=True)
    data.drop('month', axis=1, inplace=True)
    data.drop('dayofweek', axis=1, inplace=True)
    return (data)

def add_feats(x):
    """Function to add all kinds of calculated columns"""
    # 1-day Log Returns
    x = x.assign(gld_lr=np.log(x.GLD_close / x.GLD_close.shift(1)))
    x = x.assign(gmc_lr=np.log(x.GLD_market_cap / x.GLD_market_cap.shift(1)))
    x = x.assign(sp5_lr=np.log(x.SP500 / x.SP500.shift(1)))
    x = x.assign(crv_lr=np.log(x.crypto_volume / x.crypto_volume.shift(1)))

    # BTC Stats...
    x = x.assign(btcMA3=x.rolling(3).mean().btc_close)  # rolling mean, std
    x = x.assign(btcMA5=x.rolling(5).mean().btc_close)
    x = x.assign(btcMA10=x.rolling(10).mean().btc_close)
    x = x.assign(btcMA20=x.rolling(20).mean().btc_close)
    # x = x.assign(btcSD5 = x.rolling(5).std().btc_close)

    # Gold Stats...
    x = x.assign(gldMA2=x.rolling(2).mean().GLD_close)
    x = x.assign(gldMA5=x.rolling(5).mean().GLD_close)
    x = x.assign(gldMA10=x.rolling(10).mean().GLD_close)
    x = x.assign(gldMA20=x.rolling(20).mean().GLD_close)
    # x = x.assign(gldSD5 = x.rolling(5).std().GLD_close)

    # response vars...
    x = x.assign(btc_lr=np.log(x.btc_close / x.btc_close.shift(1)))
    x = x.assign(btv_lr=np.log(x.btc_volume / x.btc_volume.shift(1)))
    x = x.assign(btc_lg1p=np.log1p(x.btc_close)).dropna()  # log of price
    return(x)

def get_combined_data():
    """Function that adds all columns for model-building"""
    data = combined_data('BTC', 5)
    # static = data.copy()
    # data = static.copy()
    data = parse_date(data)
    data = get_dummies(data)
    data = rate_of_change(data)
    data = add_feats(data)
    return data

get_combined_data()

Unnamed: 0,date,btc_close,btc_volume,crypto_volume,GLD_close,LMBA_price,GLD_market_cap,SP500,week,day,...,btcMA5,btcMA10,btcMA20,gldMA2,gldMA5,gldMA10,gldMA20,btc_lr,btv_lr,btc_lg1p
44.0,2017-09-20,3882.16,3.424381e+08,2.272666e+09,123.62,1311.3,3.566040e+10,2508.24,38,20,...,3855.786,3848.288,4171.4880,124.12,124.732,125.371,126.213,-0.006624,-0.356063,8.264405
45.0,2017-09-21,3617.27,5.435702e+08,2.630687e+09,122.68,1292.1,3.539559e+10,2500.60,38,21,...,3839.456,3788.225,4106.2590,123.15,124.162,125.020,126.044,-0.070672,0.462068,8.193751
46.0,2017-09-22,3600.83,4.146125e+08,2.061039e+09,123.24,1294.8,3.562911e+10,2502.22,38,22,...,3821.700,3732.416,4057.6105,122.96,123.704,124.688,125.903,-0.004555,-0.270814,8.189197
47.0,2017-09-23,3788.02,2.533708e+08,1.682409e+09,123.24,1294.8,3.562911e+10,2502.22,38,23,...,3759.248,3724.189,4016.3655,123.24,123.480,124.451,125.762,0.050679,-0.492490,8.239863
48.0,2017-09-24,3667.52,1.838521e+08,1.536482e+09,123.24,1294.8,3.562911e+10,2502.22,38,24,...,3711.160,3766.633,3986.3690,123.24,123.204,124.159,125.621,-0.032328,-0.320722,8.207544
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
,2017-07-31,23191.86,1.116046e+08,4.490089e+09,167.17,1783.2,5.696056e+10,2470.30,31,31,...,23191.860,23191.860,23191.8600,167.17,167.170,167.170,167.170,0.000000,0.000000,10.051600
,2017-08-01,23191.86,1.116046e+08,4.490089e+09,167.17,1783.2,5.696056e+10,2476.35,31,1,...,23191.860,23191.860,23191.8600,167.17,167.170,167.170,167.170,0.000000,0.000000,10.051600
,2017-08-02,23191.86,1.116046e+08,4.490089e+09,167.17,1783.2,5.696056e+10,2477.57,31,2,...,23191.860,23191.860,23191.8600,167.17,167.170,167.170,167.170,0.000000,0.000000,10.051600
,2017-08-03,23191.86,1.116046e+08,4.490089e+09,167.17,1783.2,5.696056e+10,2472.16,31,3,...,23191.860,23191.860,23191.8600,167.17,167.170,167.170,167.170,0.000000,0.000000,10.051600


In [184]:
get_combined_data()

Unnamed: 0,date,btc_close,btc_volume,crypto_volume,GLD_close,LMBA_price,GLD_market_cap,SP500,week,day,...,btcMA5,btcMA10,btcMA20,gldMA2,gldMA5,gldMA10,gldMA20,btc_lr,btv_lr,btc_lg1p
44.0,2017-09-20,3882.16,3.424381e+08,2.272666e+09,123.62,1311.3,3.566040e+10,2508.24,38,20,...,3855.786,3848.288,4171.4880,124.12,124.732,125.371,126.213,-0.006624,-0.356063,8.264405
45.0,2017-09-21,3617.27,5.435702e+08,2.630687e+09,122.68,1292.1,3.539559e+10,2500.60,38,21,...,3839.456,3788.225,4106.2590,123.15,124.162,125.020,126.044,-0.070672,0.462068,8.193751
46.0,2017-09-22,3600.83,4.146125e+08,2.061039e+09,123.24,1294.8,3.562911e+10,2502.22,38,22,...,3821.700,3732.416,4057.6105,122.96,123.704,124.688,125.903,-0.004555,-0.270814,8.189197
47.0,2017-09-23,3788.02,2.533708e+08,1.682409e+09,123.24,1294.8,3.562911e+10,2502.22,38,23,...,3759.248,3724.189,4016.3655,123.24,123.480,124.451,125.762,0.050679,-0.492490,8.239863
48.0,2017-09-24,3667.52,1.838521e+08,1.536482e+09,123.24,1294.8,3.562911e+10,2502.22,38,24,...,3711.160,3766.633,3986.3690,123.24,123.204,124.159,125.621,-0.032328,-0.320722,8.207544
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
,2017-07-31,23206.86,1.106736e+08,4.383047e+09,167.17,1783.2,5.696056e+10,2470.30,31,31,...,23206.860,23206.860,23206.8600,167.17,167.170,167.170,167.170,0.000000,0.000000,10.052246
,2017-08-01,23206.86,1.106736e+08,4.383047e+09,167.17,1783.2,5.696056e+10,2476.35,31,1,...,23206.860,23206.860,23206.8600,167.17,167.170,167.170,167.170,0.000000,0.000000,10.052246
,2017-08-02,23206.86,1.106736e+08,4.383047e+09,167.17,1783.2,5.696056e+10,2477.57,31,2,...,23206.860,23206.860,23206.8600,167.17,167.170,167.170,167.170,0.000000,0.000000,10.052246
,2017-08-03,23206.86,1.106736e+08,4.383047e+09,167.17,1783.2,5.696056e+10,2472.16,31,3,...,23206.860,23206.860,23206.8600,167.17,167.170,167.170,167.170,0.000000,0.000000,10.052246


In [185]:
get_combined_data()

Unnamed: 0,date,btc_close,btc_volume,crypto_volume,GLD_close,LMBA_price,GLD_market_cap,SP500,week,day,...,btcMA5,btcMA10,btcMA20,gldMA2,gldMA5,gldMA10,gldMA20,btc_lr,btv_lr,btc_lg1p
44.0,2017-09-20,3882.16,3.424381e+08,2.272666e+09,123.62,1311.3,3.566040e+10,2508.24,38,20,...,3855.786,3848.288,4171.4880,124.12,124.732,125.371,126.213,-0.006624,-0.356063,8.264405
45.0,2017-09-21,3617.27,5.435702e+08,2.630687e+09,122.68,1292.1,3.539559e+10,2500.60,38,21,...,3839.456,3788.225,4106.2590,123.15,124.162,125.020,126.044,-0.070672,0.462068,8.193751
46.0,2017-09-22,3600.83,4.146125e+08,2.061039e+09,123.24,1294.8,3.562911e+10,2502.22,38,22,...,3821.700,3732.416,4057.6105,122.96,123.704,124.688,125.903,-0.004555,-0.270814,8.189197
47.0,2017-09-23,3788.02,2.533708e+08,1.682409e+09,123.24,1294.8,3.562911e+10,2502.22,38,23,...,3759.248,3724.189,4016.3655,123.24,123.480,124.451,125.762,0.050679,-0.492490,8.239863
48.0,2017-09-24,3667.52,1.838521e+08,1.536482e+09,123.24,1294.8,3.562911e+10,2502.22,38,24,...,3711.160,3766.633,3986.3690,123.24,123.204,124.159,125.621,-0.032328,-0.320722,8.207544
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
,2017-07-31,23204.00,1.108253e+08,4.383047e+09,167.17,1783.2,5.696056e+10,2470.30,31,31,...,23204.000,23204.000,23204.0000,167.17,167.170,167.170,167.170,0.000000,0.000000,10.052123
,2017-08-01,23204.00,1.108253e+08,4.383047e+09,167.17,1783.2,5.696056e+10,2476.35,31,1,...,23204.000,23204.000,23204.0000,167.17,167.170,167.170,167.170,0.000000,0.000000,10.052123
,2017-08-02,23204.00,1.108253e+08,4.383047e+09,167.17,1783.2,5.696056e+10,2477.57,31,2,...,23204.000,23204.000,23204.0000,167.17,167.170,167.170,167.170,0.000000,0.000000,10.052123
,2017-08-03,23204.00,1.108253e+08,4.383047e+09,167.17,1783.2,5.696056e+10,2472.16,31,3,...,23204.000,23204.000,23204.0000,167.17,167.170,167.170,167.170,0.000000,0.000000,10.052123
