In [60]:
import pandas as pd
import yfinance as yf
import numpy as np
import matplotlib.pyplot as plt

from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import mean_squared_error as mse

#from data_handler import get_sp500_ticker_list
from data_handler import get_adj_close
#from data_handler import sp500_closes_to_csv

In [61]:
# Getting the data
start='2018-01-01'
end='2020-12-31'
interval='1d'
ticker_list = 'AAPL'

df = get_adj_close(ticker_list, start, end, interval)

[*********************100%***********************]  1 of 1 completed


In [62]:
# Computing returns
ret = 100 * df.pct_change()
ret.dropna(inplace=True)

In [63]:
ret

Date
2018-01-03   -0.017423
2018-01-04    0.464499
2018-01-05    1.138538
2018-01-08   -0.371453
2018-01-09   -0.011451
                ...   
2020-12-23   -0.697608
2020-12-24    0.771231
2020-12-28    3.576571
2020-12-29   -1.331501
2020-12-30   -0.852655
Name: Adj Close, Length: 754, dtype: float64

In [64]:
# Computing realized volatility
realized_vol = ret.rolling(5).std()
realized_vol = pd.DataFrame(realized_vol)
realized_vol.reset_index(drop=True, inplace=True)

In [65]:
realized_vol

Unnamed: 0,Adj Close
0,
1,
2,
3,
4,0.583339
...,...
749,1.722863
750,1.725322
751,1.698701
752,2.145503


In [66]:
returns_svm = ret ** 2
returns_svm = returns_svm.reset_index()
del returns_svm['Date']

In [67]:
X = pd.concat([realized_vol, returns_svm], axis=1, ignore_index=True)
X = X[4:].copy()
X = X.reset_index()
X.drop('index', axis=1, inplace=True)

In [68]:
realized_vol = realized_vol.dropna().reset_index()
realized_vol.drop('index', axis=1, inplace=True)

In [69]:
realized_vol.isna().sum()

Adj Close    0
dtype: int64

In [70]:
n = 252
split_date = ret.iloc[-n:].index

### Neural nets

In [71]:
NN_vol = MLPRegressor(learning_rate_init=0.001, random_state=1)

In [72]:
para_grid_NN = {'hidden_layer_sizes': [(100, 50), (50, 50), (10, 100)],
                'max_iter': [500, 1000],
                'alpha': [0.00005, 0.0005 ]}

In [79]:
X.iloc[:-n]

Unnamed: 0,0,1
0,0.583339,0.000131
1,0.583954,0.000526
2,0.595643,0.322612
3,0.557247,1.066352
4,0.596656,0.258261
...,...,...
493,0.774685,0.009040
494,1.007918,3.936374
495,1.030796,0.001440
496,0.911230,0.352270


In [81]:
realized_vol.iloc[1:-(n-1)]

Unnamed: 0,Adj Close
1,0.583954
2,0.595643
3,0.557247
4,0.596656
5,0.850688
...,...
494,1.007918
495,1.030796
496,0.911230
497,0.801228


In [73]:
clf = RandomizedSearchCV(NN_vol, para_grid_NN)
clf.fit(X.iloc[:-n].values,
        realized_vol.iloc[1:-(n-1)].values.reshape(-1, ))
NN_predictions = clf.predict(X.iloc[-n:])

In [74]:
NN_predictions = pd.DataFrame(NN_predictions)
NN_predictions.index = ret.iloc[-n:].index

In [75]:
rmse_NN = np.sqrt(mse(realized_vol.iloc[-n:] / 100, 
                      NN_predictions / 100))
print('The RMSE value of NN is {:.6f}'.format(rmse_NN))

The RMSE value of NN is 0.003435


### DL

In [76]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

KeyboardInterrupt: 

In [16]:
model = keras.Sequential(
    [layers.Dense(256, activation="relu"),
     layers.Dense(128, activation="relu"),
     layers.Dense(1, activation="linear"),])

2022-05-09 10:10:49.323323: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [17]:
model.compile(loss='mse', optimizer='rmsprop')

In [18]:
epochs_trial = np.arange(100, 400, 4)
batch_trial = np.arange(100, 400, 4)
DL_pred = []
DL_RMSE = []
for i, j, k in zip(range(4), epochs_trial, batch_trial):
    model.fit(X.iloc[:-n].values,
        realized_vol.iloc[1:-(n-1)].values.reshape(-1,),
        batch_size=k, epochs=j, verbose=False)
    DL_predict = model.predict(np.asarray(X.iloc[-n:]))
    DL_RMSE.append(np.sqrt(mse(realized_vol.iloc[-n:] / 100, DL_predict.flatten() / 100)))
    DL_pred.append(DL_predict)
    print('DL_RMSE_{}:{:.6f}'.format(i+1, DL_RMSE[i]))

DL_RMSE_1:0.007155
DL_RMSE_2:0.005030
DL_RMSE_3:0.004753
DL_RMSE_4:0.005715


In [19]:
DL_predict = pd.DataFrame(DL_pred[DL_RMSE.index(min(DL_RMSE))])
DL_predict.index = ret.iloc[-n:].index