In [4]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, models

In [5]:
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0.1):
    x = layers.MultiHeadAttention(num_heads=num_heads, key_dim=head_size)(inputs, inputs)
    x = layers.Dropout(dropout)(x)
    x = layers.LayerNormalization(epsilon=1e-6)(x + inputs)

    x_ff = layers.Dense(ff_dim, activation="relu")(x)
    x_ff = layers.Dense(inputs.shape[-1])(x_ff)
    x = layers.Dropout(dropout)(x_ff)
    x = layers.LayerNormalization(epsilon=1e-6)(x + x_ff)

    return x

In [6]:
def build_transformer_model(input_dim, num_features, head_size=64, num_heads=4, ff_dim=128, num_layers=2):
    inputs = layers.Input(shape=(num_features,))
    x = layers.Reshape((num_features, 1))(inputs)
    x = layers.Dense(head_size)(x)
    for _ in range(num_layers):
        x = transformer_encoder(x, head_size, num_heads, ff_dim)

    x = layers.GlobalAveragePooling1D()(x)
    x = layers.Dense(64, activation="relu")(x)
    x = layers.Dense(32, activation="relu")(x)
    outputs = layers.Dense(1)(x)

    model = models.Model(inputs, outputs)
    model.compile(optimizer="adam", loss="mse", metrics=["mae"])
    return model

In [19]:
bs_data = pd.read_csv('bs_data.csv')
heston_data = pd.read_csv('heston_data.csv')

In [20]:
bs_data

Unnamed: 0,S,K,T,r,sigma,call_price,label
0,103.290045,131.063618,4.900907,0.017751,0.243331,16.197959,BS
1,116.845571,142.017038,2.893580,0.048187,0.128066,7.417800,BS
2,115.194631,134.749222,0.618206,0.013403,0.288611,4.327009,BS
3,102.594761,114.299103,0.529129,0.041435,0.102126,0.484932,BS
4,122.087767,113.057180,3.602775,0.046209,0.265625,37.269109,BS
...,...,...,...,...,...,...,...
99995,108.563931,142.061595,0.411876,0.022435,0.254733,0.481456,BS
99996,114.069979,132.328663,2.560343,0.035611,0.136477,7.194904,BS
99997,130.560273,143.123941,4.155809,0.013016,0.128519,11.498734,BS
99998,143.854490,129.150101,4.961218,0.049675,0.237826,52.341303,BS


In [21]:
heston_data

Unnamed: 0,S,K,T,r,v0,kappa,theta,sigma_v,rho,call_price,label
0,148.500502,104.156266,2.422762,0.025632,0.368491,1.988959,0.020955,0.368984,-0.472906,30.727535,Heston
1,101.297864,111.069185,4.098943,0.048740,0.556398,2.712698,0.014379,0.367446,-0.701654,3.155828,Heston
2,100.284963,115.888022,1.836936,0.017240,0.075874,2.249124,0.143667,0.101259,-0.594497,11.604038,Heston
3,120.546894,143.081810,4.020003,0.035742,0.553918,1.947384,0.114136,0.147010,-0.410906,27.651690,Heston
4,141.838154,132.778985,1.350122,0.034980,0.364070,1.587303,0.060344,0.129199,-0.875794,29.472240,Heston
...,...,...,...,...,...,...,...,...,...,...,...
9995,135.149676,139.615122,3.737802,0.039070,0.218526,1.952818,0.146864,0.186053,-0.831234,23.652326,Heston
9996,118.044882,109.668124,1.101928,0.025664,0.341593,1.140302,0.047689,0.260040,-0.763109,19.321961,Heston
9997,131.922220,131.450426,3.998019,0.032201,0.503926,2.306932,0.020323,0.314564,-0.361633,16.983123,Heston
9998,101.884798,122.216351,4.435285,0.046327,0.528330,1.449800,0.083438,0.183676,-0.576949,17.649743,Heston


In [22]:
bs_data['v0'] = bs_data['sigma']**2
bs_data['kappa'] = 0
bs_data['sigma_v'] = 0
bs_data['theta'] = bs_data['sigma']**2
bs_data['rho'] = 0

In [23]:
bs_data = bs_data.drop('sigma',axis=1)
bs_data.describe()

Unnamed: 0,S,K,T,r,call_price,v0,kappa,sigma_v,theta,rho
count,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0,100000.0
mean,125.07177,124.964082,2.554814,0.029955,21.87218,0.043373,0.0,0.0,0.043373,0.0
std,14.433018,14.48165,1.412534,0.011559,14.24598,0.02329,0.0,0.0,0.02329,0.0
min,100.000035,100.000001,0.100084,0.010001,2.954185e-21,0.01,0.0,0.0,0.01,0.0
25%,112.57917,112.418628,1.333705,0.019916,10.43561,0.02254,0.0,0.0,0.02254,0.0
50%,125.155012,125.009313,2.558204,0.029944,20.22842,0.040056,0.0,0.0,0.040056,0.0
75%,137.495288,137.480917,3.778421,0.039964,31.64709,0.062584,0.0,0.0,0.062584,0.0
max,149.999858,149.999727,4.999951,0.05,73.58491,0.089997,0.0,0.0,0.089997,0.0


In [24]:
heston_data.describe()

Unnamed: 0,S,K,T,r,v0,kappa,theta,sigma_v,rho,call_price
count,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0,10000.0
mean,125.023931,125.332141,2.579915,0.030007,0.304912,1.996561,0.104846,0.300635,-0.600829,17.289618
std,14.366108,14.370888,1.421903,0.011568,0.171094,0.575568,0.054932,0.114474,0.172167,11.198425
min,100.004312,100.008138,0.100017,0.010007,0.010009,1.00012,0.010022,0.100074,-0.899984,0.0
25%,112.629086,113.130196,1.363125,0.019942,0.155672,1.49808,0.057987,0.201909,-0.748841,8.431136
50%,125.065789,125.421975,2.597391,0.030078,0.307759,2.001616,0.104193,0.301057,-0.604221,15.606057
75%,137.500641,137.808829,3.818971,0.039985,0.451321,2.495222,0.152509,0.399068,-0.452352,24.53398
max,149.998319,149.995052,4.999775,0.049999,0.59998,2.999935,0.199971,0.499958,-0.300093,61.269368


In [25]:
features = ['S', 'K', 'T', 'r', 'v0', 'kappa', 'sigma_v', 'theta', 'rho']

In [29]:
X_bs = bs_data[features]
y_bs = bs_data['call_price']
X_heston = heston_data[features]
y_heston = heston_data['call_price']
y_bs = y_bs.to_numpy().reshape(-1, 1)
y_heston = y_heston.to_numpy().reshape(-1, 1)

In [30]:
scaler = StandardScaler()
X_bs_scaled = scaler.fit_transform(X_bs)
X_heston_scaled = scaler.fit_transform(X_heston)
scaler_y = StandardScaler()
y_bs_scaled = scaler_y.fit_transform(y_bs).flatten()
y_heston_scaled = scaler_y.fit_transform(y_heston).flatten()

In [32]:
X_bs_train, X_bs_test, y_bs_train, y_bs_test = train_test_split(X_bs_scaled, y_bs_scaled, test_size=0.2, random_state=42)
X_heston_train, X_heston_test, y_heston_train, y_heston_test = train_test_split(X_heston_scaled, y_heston_scaled, test_size=0.2, random_state=42)

In [39]:
model = build_transformer_model(input_dim=X_bs_train.shape[1], num_features=X_bs_train.shape[1])
model.summary()

In [40]:
model.fit(
    X_bs_train, y_bs_train,
    validation_data=(X_bs_test, y_bs_test),
    epochs=50,
    batch_size=512,
)

Epoch 1/50
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 72ms/step - loss: 0.9316 - mae: 0.7829 - val_loss: 0.8378 - val_mae: 0.7483
Epoch 2/50
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 19ms/step - loss: 0.8105 - mae: 0.7377 - val_loss: 0.8211 - val_mae: 0.7340
Epoch 3/50
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 19ms/step - loss: 0.8191 - mae: 0.7413 - val_loss: 0.8148 - val_mae: 0.7447
Epoch 4/50
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - loss: 0.8168 - mae: 0.7401 - val_loss: 0.8109 - val_mae: 0.7341
Epoch 5/50
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - loss: 0.8134 - mae: 0.7374 - val_loss: 0.8128 - val_mae: 0.7422
Epoch 6/50
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 15ms/step - loss: 0.8179 - mae: 0.7411 - val_loss: 0.8150 - val_mae: 0.7294
Epoch 7/50
[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 14ms

<keras.src.callbacks.history.History at 0x78d8996dbe90>

In [41]:
model.fit(X_heston_train, y_heston_train,
          epochs=25,
          batch_size=128,
          validation_data=(X_heston_test, y_heston_test),
)

Epoch 1/25
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 140ms/step - loss: 0.9932 - mae: 0.8115 - val_loss: 0.9189 - val_mae: 0.7874
Epoch 2/25
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 0.9663 - mae: 0.7981 - val_loss: 0.9186 - val_mae: 0.7863
Epoch 3/25
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.9263 - mae: 0.7805 - val_loss: 0.9163 - val_mae: 0.7822
Epoch 4/25
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.9514 - mae: 0.7883 - val_loss: 0.9183 - val_mae: 0.7872
Epoch 5/25
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.9630 - mae: 0.8001 - val_loss: 0.9121 - val_mae: 0.7819
Epoch 6/25
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.9657 - mae: 0.7966 - val_loss: 0.9128 - val_mae: 0.7820
Epoch 7/25
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.96

<keras.src.callbacks.history.History at 0x78d89b8fb550>

In [42]:
y_pred_scaled = model.predict(X_heston_test)
y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()
y_heston_test_inv = scaler_y.inverse_transform(y_heston_test.reshape(-1, 1)).flatten()
y_pred, y_heston_test_inv

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step


(array([16.349821, 19.156431, 15.486967, ..., 16.69319 , 19.145174,
        15.294189], dtype=float32),
 array([15.53726524, 34.37050267, 10.63214283, ...,  6.39319173,
        16.00349019, 38.92628718]))

(2000, 2000)