# 1. Libraries and settings <a class="anchor" id="1-bullet"></a> 

In [None]:
import numpy as np
import pandas as pd
import math
import sklearn
import sklearn.preprocessing
import datetime
import os
import matplotlib.pyplot as plt

!pip install pmdarima -U
from pmdarima.arima import auto_arima,ARIMA
!pip install arch -U
from arch import arch_model
!pip install yfinance -U
import yfinance
import warnings
import arch
warnings.filterwarnings("ignore")
#sns.set()

# 2. Analyze data <a class="anchor" id="2-bullet"></a> 
- load stock prices from prices-split-adjusted.csv
- analyze data

In [None]:
df = pd.read_parquet('/kaggle/input/binance-full-history/BTC-BUSD.parquet')

#df.drop(['volume', 'quote_asset_volume', 'number_of_trades', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume'],1,inplace=True)
df.info()
df.head()


In [None]:
df.describe()

In [None]:
plt.figure(figsize=(15, 5));
plt.plot(df.open.values, color='red', label='open')
plt.plot(df.close.values, color='green', label='close')
plt.plot(df.low.values, color='blue', label='low')
plt.plot(df.high.values, color='black', label='high')
plt.title('stock price')
plt.xlabel('time [days]')
plt.ylabel('price')
plt.legend(loc='best')
plt.show()


In [None]:
df.shape

In [None]:
#training_size=int(2000*0.9)
training_data=df.iloc[-10000:-1000,:]
test_data=df.iloc[-1000:,:]


In [None]:
exo_var=df.columns[:2].tolist()+df.columns[4:].tolist()

In [None]:
exo_var

## ARIMAX

In [None]:
start_params=np.array([ 2.17458794e-01,  6.93456596e-01,  2.37057342e-02, -9.59795607e-01,
        5.22108548e-01, -6.62874411e-01, -1.94332969e-02,  9.41431506e-01,
       -5.01053618e-01,  1.30233330e+03])

In [None]:
arimax_model=ARIMA(order=(4,1,4),out_of_sample_size=100,start_params=start_params)
arimax_model.fit(training_data['close'])


In [None]:
arimax_model.oob_

In [None]:
arimax_residuals = arimax_model.arima_res_.resid
garch = arch.arch_model(arimax_residuals,mean='constant', p=4, q=4,dist='ged')
garch_model = garch.fit()
std=garch_model.forecast(horizon=10,reindex=False).variance**0.5
std=std.values[0]

In [None]:
predicted_mu = arimax_model.predict(n_periods=10)
plt.plot(predicted_mu,label='predict',color='b')
plt.plot(predicted_mu+std,color='b',linestyle='-')
plt.plot(predicted_mu-std,color='b',linestyle='-')
plt.plot(test_data['close'].values[:10],label='true')
plt.legend()

In [None]:
std=garch_model.forecast(horizon=1000,reindex=False).variance**0.5
std=std.values[0]
predicted_mu = arimax_model.predict(n_periods=1000)
plt.plot(predicted_mu,label='predict',color='b')
plt.plot(predicted_mu+std,color='b',linestyle='-')
plt.plot(predicted_mu-std,color='b',linestyle='-')
plt.plot(test_data['close'].values[:1000],label='true')
plt.legend()

In [None]:
fig,axes=plt.subplots(3,3,figsize=(20,20))
axes=axes.ravel()
mses=[]
s=0

for i in range(9):
    arimax_residuals = arimax_model.arima_res_.resid
    garch = arch.arch_model(arimax_residuals,mean='constant', p=4, q=4,dist='ged')
    garch_model = garch.fit()
    std=garch_model.forecast(horizon=10,reindex=False).variance**0.5
    std=std.values[0]
    true_y=test_data['close'].values[s:s+10]
    
    predicted_mu = arimax_model.predict(n_periods=10)
    axes[i].plot(predicted_mu,label='predict',color='b')
    axes[i].plot(predicted_mu+std,color='b',linestyle='-')
    axes[i].plot(predicted_mu-std,color='b',linestyle='-')
    axes[i].plot(true_y,label='true')
    axes[i].legend()
    
    mses.append(np.mean((predicted_mu-true_y)**2))
    arimax_model.update(test_data['close'][s:s+100])
    s+=100

In [None]:
import seaborn as sns
sns.histplot(mses)

In [None]:
arimax_model=ARIMA(order=(4,1,4),out_of_sample_size=100,start_params=start_params,maxiter=10)
arimax_model.fit(training_data['close'])

In [None]:
fig,axes=plt.subplots(6,5,figsize=(20,20))
axes=axes.ravel()
mses=[]
s=0
incs=[]
incs1=[]
for i in range(30):
    arimax_residuals = arimax_model.arima_res_.resid
    garch = arch.arch_model(arimax_residuals,mean='constant', p=4, q=4,dist='ged')
    garch_model = garch.fit()
    std=garch_model.forecast(horizon=5,reindex=False).variance**0.5
    std=std.values[0]
    true_y=test_data['close'].values[s:s+5]
    
    predicted_y = arimax_model.predict(n_periods=5)
    axes[i].plot(predicted_y,label='predict',color='b')
    axes[i].plot(predicted_y+std,color='b',linestyle='-')
    axes[i].plot(predicted_y-std,color='b',linestyle='-')
    axes[i].plot(true_y,label='true')
    axes[i].legend()
    
    mses.append(np.mean((predicted_y-true_y)**2))
    cur=test_data['close'][s-1]
    incs.append(np.equal(np.sign(predicted_y[-1]-cur),np.sign(true_y[-1]-cur)))
    incs1.append(np.equal(np.sign(predicted_y[0]-cur),np.sign(true_y[0]-cur)))

    arimax_model.update(test_data['close'][s:s+30])
    s+=30

In [None]:
sns.histplot(mses)
plt.xlim(0,5000)

In [None]:
np.sum(incs)/len(incs)

In [None]:
np.sum(incs1)/len(incs1)

In [None]:
len(arimax_model.arima_res_.resid)