In [None]:
# !pip install pandas scikit-learn matplotlib tensorflow
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras import Input

In [None]:
from google.colab import files
uploaded = files.upload()

# Load returns
returns = pd.read_csv('denoised_returns.csv', index_col=0, parse_dates=True)

Saving denoised_returns.csv to denoised_returns (2).csv


In [None]:
print(returns.head())
print(returns.describe())
print(returns.info())

                     tsla_close  qqq_close  spy_close  gld_close
timestamp                                                       
2023-06-01 04:01:00   -0.032651  -0.016834  -0.019813   0.005793
2023-06-01 04:02:00   -0.032291  -0.016739  -0.019623   0.005792
2023-06-01 04:03:00   -0.031929  -0.016644  -0.019432   0.005791
2023-06-01 04:04:00   -0.031567  -0.016549  -0.019241   0.005790
2023-06-01 04:05:00   -0.031204  -0.016453  -0.019049   0.005789
          tsla_close      qqq_close      spy_close      gld_close
count  383862.000000  383862.000000  383862.000000  383862.000000
mean       -0.000230      -0.000219      -0.000227      -0.000085
std         0.954055       0.954417       0.947634       0.913829
min      -110.836631    -177.485975    -169.905857    -144.635749
25%        -0.003506      -0.001977      -0.002086      -0.001449
50%        -0.001491       0.000271       0.000445       0.000203
75%         0.001235       0.001164       0.001462       0.001350
max       152.368

In [None]:
#Compute hourly realized volatility
def compute_hourly_rv(returns_df):
    returns_df.index = pd.to_datetime(returns_df.index)
    squared = returns_df ** 2
    hourly_rv = squared.resample('1h').sum() ** 0.5
    hourly_rv.dropna(inplace=True)
    return hourly_rv


#Function to create hourly lagged features
def create_hourly_lagged_features(rv_series):
    df = pd.DataFrame({'RV': np.log(rv_series + 1e-8)})
    df['Lag1'] = df['RV'].shift(1)
    df['Lag5'] = df['RV'].rolling(5).mean().shift(1)
    df['Lag22'] = df['RV'].rolling(22).mean().shift(1)
    df.dropna(inplace=True)
    return df

#Function to train the DNN
def train_dnn(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    model = Sequential([
    Input(shape=(3,)),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(1)
    ])

    model.compile(optimizer='adam', loss='mse')
    model.fit(X_train_scaled, y_train, epochs=100, batch_size=32,
              validation_split=0.2, verbose=0)

    y_pred = model.predict(X_test_scaled).flatten()
    r2 = 1 - np.sum((y_test - y_pred)**2) / np.sum((y_test - np.mean(y_test))**2)
    return r2, y_test, y_pred


In [None]:
# == Fit the DNN on all assets and test out of sample prediction == #

results = {}
predictions = {}

for asset in hourly_rv.columns:
    df_feat = create_hourly_lagged_features(hourly_rv[asset])

    X = df_feat[['Lag1', 'Lag5', 'Lag22']].values
    y = df_feat['RV'].values

    r2, y_test, y_pred = train_dnn(X, y)

    results[asset] = r2
    predictions[asset] = (y_test, y_pred)

    print(f"{asset} - 1H Forecast R²: {r2:.4f}")

In [None]:
# === Plot actual vs predicted for all assets == #
for asset in predictions:
    y_test, y_pred = predictions[asset]

    plt.figure(figsize=(12, 4))
    plt.plot(y_test, label='Actual log(RV)', alpha=0.7)
    plt.plot(y_pred, label='Predicted log(RV)', alpha=0.7)
    plt.title(f'{asset.upper()} - 1H Forecast')
    plt.xlabel('Observation')
    plt.ylabel('log(RV)')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()