In [2]:
%matplotlib inline
import numpy as np
import pandas as pd
import requests
import matplotlib.pyplot as plt
from time import sleep

## Get data

In [3]:
def trade_bucket(startDate='2015-01-01'):
    start = 0
    df = pd.DataFrame()
    data = True
    counter = 1
    
    while data:
        api_call_url = 'https://www.bitmex.com/api/v1/trade/bucketed?binSize=1d&partial=false&symbol=XBTUSD&count=1000&start=' + str(
                       start) +'&startTime=' + startDate
        r = requests.get(api_call_url)
        if r.status_code != 200:
            r.raise_for_status()
        data = r.json()
        if data:
            temp_df = pd.DataFrame(data)
            df = df.append(temp_df)
            start += 1000
        if counter < 30:
            counter += 1
        else:
            sleep(60)
            counter = 1
    df['timestamp'] = pd.to_datetime(df['timestamp'], dayfirst=True)
    df.set_index('timestamp', inplace=True)
    return df

In [4]:
trade_df = trade_bucket()

trade_df['percent_change'] = trade_df['close'].pct_change()
trade_df['log_percent_change'] = 100*np.log(1+trade_df['percent_change'])
trade_df['percent_change'] = 100*trade_df['percent_change']

data = trade_df['log_percent_change'].dropna()

## ARCH Model

### Model Fitting

In [5]:
from arch import arch_model

am = arch_model(data, mean='AR', lags=10, p=10, o=0, q=10, vol='EGARCH', dist='skewt')
res = am.fit(update_freq=50, options={'maxiter':200})

Iteration:     50,   Func. Count: 1.89e+03,   Neg. LLF: 4124.867015968201
Optimization terminated successfully.    (Exit mode 0)
            Current function value: 4124.751579342288
            Iterations: 70
            Function evaluations: 2619
            Gradient evaluations: 70


In [6]:
res.summary()

0,1,2,3
Dep. Variable:,log_percent_change,R-squared:,0.0
Mean Model:,AR,Adj. R-squared:,-0.006
Vol Model:,EGARCH,Log-Likelihood:,-4124.75
Distribution:,Standardized Skew Student's t,AIC:,8317.5
Method:,Maximum Likelihood,BIC:,8500.41
,,No. Observations:,1603.0
Date:,"Tue, Feb 25 2020",Df Residuals:,1569.0
Time:,23:15:49,Df Model:,34.0

0,1,2,3,4,5
,coef,std err,t,P>|t|,95.0% Conf. Int.
Const,0.1564,8.293e-03,18.860,2.412e-79,"[ 0.140, 0.173]"
log_...nge[1],-0.0820,8.752e-03,-9.374,6.997e-21,"[-9.919e-02,-6.488e-02]"
log_...nge[2],6.1731e-03,6.720e-03,0.919,0.358,"[-6.999e-03,1.934e-02]"
log_...nge[3],0.0170,1.440e-02,1.183,0.237,"[-1.119e-02,4.525e-02]"
log_...nge[4],-2.9109e-04,8.321e-03,-3.498e-02,0.972,"[-1.660e-02,1.602e-02]"
log_...nge[5],-7.5122e-03,1.278e-02,-0.588,0.557,"[-3.256e-02,1.754e-02]"
log_...nge[6],0.0113,1.916e-02,0.590,0.555,"[-2.624e-02,4.886e-02]"
log_...nge[7],0.0342,1.272e-02,2.688,7.183e-03,"[9.265e-03,5.913e-02]"
log_...nge[8],0.0159,1.390e-02,1.143,0.253,"[-1.135e-02,4.314e-02]"

0,1,2,3,4,5
,coef,std err,t,P>|t|,95.0% Conf. Int.
omega,0.7500,1.749,0.429,0.668,"[ -2.678, 4.178]"
alpha[1],0.4244,0.101,4.217,2.481e-05,"[ 0.227, 0.622]"
alpha[2],0.2402,0.459,0.523,0.601,"[ -0.660, 1.140]"
alpha[3],0.1972,0.602,0.327,0.743,"[ -0.983, 1.378]"
alpha[4],0.2838,0.411,0.690,0.490,"[ -0.522, 1.090]"
alpha[5],0.3154,0.479,0.658,0.511,"[ -0.624, 1.255]"
alpha[6],0.3006,0.778,0.386,0.699,"[ -1.225, 1.826]"
alpha[7],0.3147,0.622,0.506,0.613,"[ -0.904, 1.534]"
alpha[8],0.3000,0.619,0.485,0.628,"[ -0.913, 1.513]"

0,1,2,3,4,5
,coef,std err,t,P>|t|,95.0% Conf. Int.
nu,2.5067,0.197,12.733,3.867e-37,"[ 2.121, 2.893]"
lambda,-0.0168,2.045e-02,-0.822,0.411,"[-5.689e-02,2.327e-02]"


### Forecasting

In [27]:
len(data),len(trade_df)

(1613, 1614)

In [21]:
lookahead = 7
n_sim = 1000

lookahead_forecast = res.forecast(horizon=lookahead, method='simulation', start=len(data)-1, simulations=n_sim).simulations.values

In [9]:
price_forecast = np.zeros([lookahead,n_sim])
price_forecast[0,:] = trade_df['close'][-1]*np.exp(lookahead_forecast[-1,:,0]/100)
for i in range(1,lookahead):
    price_forecast[i,:] = price_forecast[i-1,:]*np.exp(lookahead_forecast[-1,:,i]/100)

price_percentile = np.percentile(price_forecast,[i for i in range(5,100,5)],axis=1)
predicted_price_bounds = np.zeros([2*lookahead,9])
for i in range(lookahead):
    for j in range(9):
        predicted_price_bounds[2*i,j] = price_percentile[8-j,i]
        predicted_price_bounds[2*i+1,j] = price_percentile[10+j,i]
        
index = pd.MultiIndex.from_arrays([['Day '+str(i//2) for i in range(2,2*lookahead+2)],['Lower','Upper']*lookahead])
pd.DataFrame(predicted_price_bounds, columns=['Probability '+str(i) for i in range(10,100,10)],index=index)

Unnamed: 0,Unnamed: 1,Probability 10,Probability 20,Probability 30,Probability 40,Probability 50,Probability 60,Probability 70,Probability 80,Probability 90
Day 1,Lower,9672.008823,9649.669166,9618.680992,9591.241512,9562.339136,9515.693766,9470.182023,9398.801289,9255.789733
Day 1,Upper,9723.920567,9743.235821,9779.032066,9808.322137,9843.508619,9882.320078,9925.716758,10004.790987,10145.166487
Day 2,Lower,9675.006574,9632.023084,9594.946037,9546.715722,9491.000801,9428.729345,9341.476521,9240.340262,8985.949701
Day 2,Upper,9741.362442,9775.446928,9810.7892,9854.062515,9902.439677,9965.807983,10031.593631,10130.833529,10365.575365
Day 3,Lower,9653.775553,9613.239528,9560.960584,9507.886358,9446.390488,9358.511273,9281.30063,9137.705275,8949.960283
Day 3,Upper,9749.451945,9799.48361,9847.383854,9893.758333,9959.833419,10044.719593,10122.5789,10241.736725,10518.930715
Day 4,Lower,9695.714829,9626.647845,9568.785308,9498.394197,9422.568911,9326.550054,9206.53559,9068.623952,8867.43494
Day 4,Upper,9794.417226,9849.990863,9910.104862,9976.323799,10053.076003,10134.608862,10238.519853,10373.139545,10679.772468
Day 5,Lower,9674.837581,9620.669712,9550.065246,9486.461211,9379.070558,9283.917125,9199.667189,9010.265697,8724.941077
Day 5,Upper,9801.992492,9857.613649,9921.197045,9997.175751,10067.659517,10171.873195,10279.352373,10434.831036,10747.174046
