In [29]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, explained_variance_score
import numpy as np


import numpy as np
import pandas as pd
import tensorflow as tf

In [30]:
df = pd.read_csv('../data/options.csv')
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values(by=['symbol', 'date']).reset_index(drop=True)

In [31]:
features = df.drop(columns=['Unnamed: 0', 'symbol', 'date', 'DITM_IV'])
target = df['DITM_IV']


In [32]:
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(features)


In [33]:
def create_sequences(features, target, seq_length):
    X, y = [], []
    for i in range(len(features) - seq_length):
        X.append(features[i:i + seq_length])
        y.append(target[i + seq_length])
    return np.array(X), np.array(y)


In [34]:
sequence_length = 10

X, y = create_sequences(scaled_features, target.values, sequence_length)

In [35]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)


In [36]:
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(X_train.shape[1], X_train.shape[2])),  # Input layer
    tf.keras.layers.LSTM(64, return_sequences=True),  # First LSTM layer
    tf.keras.layers.LSTM(32),  # Second LSTM layer
    tf.keras.layers.Dense(16, activation='relu'),  # Dense layer
    tf.keras.layers.Dense(1)  # Output layer for regression
])





In [37]:
model.compile(optimizer='adam', loss='mean_squared_error')
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test), verbose=1)
model.evaluate(X_test, y_test)
model.summary(), history.history 

Epoch 1/10
[1m79042/79042[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m259s[0m 3ms/step - loss: 150.2746 - val_loss: 84.0497
Epoch 2/10
[1m79042/79042[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m249s[0m 3ms/step - loss: 91.7396 - val_loss: 85.6654
Epoch 3/10
[1m79042/79042[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m252s[0m 3ms/step - loss: 89.8795 - val_loss: 81.6549
Epoch 4/10
[1m79042/79042[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m258s[0m 3ms/step - loss: 89.2374 - val_loss: 80.9914
Epoch 5/10
[1m79042/79042[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m247s[0m 3ms/step - loss: 87.8827 - val_loss: 81.8530
Epoch 6/10
[1m79042/79042[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m255s[0m 3ms/step - loss: 87.4004 - val_loss: 80.8297
Epoch 7/10
[1m79042/79042[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m256s[0m 3ms/step - loss: 86.9042 - val_loss: 80.9197
Epoch 8/10
[1m79042/79042[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m255s[0m 3ms/step - loss: 86.

(None,
 {'loss': [103.5517807006836,
   91.46627807617188,
   89.9871826171875,
   88.89842987060547,
   88.0654067993164,
   87.44960021972656,
   86.93687438964844,
   86.5379867553711,
   86.15477752685547,
   85.85529327392578],
  'val_loss': [84.0497055053711,
   85.66535949707031,
   81.65486907958984,
   80.99136352539062,
   81.85302734375,
   80.82965087890625,
   80.9196548461914,
   80.72442626953125,
   79.82723236083984,
   79.93907928466797]})

In [38]:
y_pred = model.predict(X_test)
# Calculate RMSE
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

# Calculate R² (coefficient of determination)
r2 = r2_score(y_test, y_pred)

# Calculate MAE
mae = mean_absolute_error(y_test, y_pred)

# Calculate EVS (Explained Variance Score)
evs = explained_variance_score(y_test, y_pred)

# Print the results
print(f"RMSE: {rmse}")
print(f"R²: {r2}")
print(f"MAE: {mae}")
print(f"EVS: {evs}")

[1m19761/19761[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 1ms/step
RMSE: 8.94087772311773
R²: 0.8540819547137205
MAE: 5.420308796496342
EVS: 0.8545902887858521
