In [1]:
# These lines are used to run the code on Google Colab. 
# I used it because it's better to run big NN models there, but this one is rather small, so we can run it locally.
# from google.colab import drive
# drive.mount('/content/drive')

# import os
# path = '/content/drive/MyDrive/ENSIIE/M1/S4/PRR-Projet-Recherche/projet_recherche'
# os.chdir(path)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [1]:
import pandas as pd
import numpy as np
from functions import MAPE, MSE, MAE

from tensorflow import keras
import tensorflow as tf
from tensorflow.keras.utils import plot_model
from sklearn.model_selection import train_test_split

In [36]:
df = pd.read_csv("data/param1.csv")

In [37]:
df.head()

Unnamed: 0,m,T,r,rho,kappa,gamma,nu,nu0,K,S,price,impl_vol
0,1.257914,1.260829,0.046215,-0.307509,0.349958,0.469729,0.183863,0.079676,79.496682,100,28.830581,0.324574
1,0.834284,2.351793,0.034071,-0.562245,0.151497,0.702222,0.393769,0.081721,119.863229,100,11.736001,0.258141
2,1.170108,1.71826,0.019274,-0.600835,1.561061,0.438521,0.086567,0.19213,85.462163,100,26.591338,0.349612
3,0.720118,2.593797,0.018329,-0.553416,0.278247,0.465255,0.10999,0.317225,138.866101,100,16.740601,0.415047
4,0.909831,0.260397,0.03084,-0.127374,2.050839,0.365112,0.171817,0.244485,109.910492,100,6.163456,0.473289


In [4]:
df.columns

Index(['m', 'T', 'r', 'rho', 'kappa', 'gamma', 'nu', 'nu0'], dtype='object')

## Model predicting the price

This first model predicts the price given the parameters of the Heston models. There are still a few things to check, but as a first approach it's looking good.  

In [38]:
X = df.drop(['price', 'K', 'S', 'impl_vol'], 1)
# X_stand =(X-X.mean())/X.std()
y = df['price']
# y_norm = (y-y.mean())/y.std()

In [22]:
X.shape

(44, 8)

In [23]:
# The dataset should be split into 10-validation, 10-testing, 80-training 
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=1)

In [10]:
train_ratio = 0.8
validation_ratio = 0.1
test_ratio = 0.1

# train is now 80% of the entire data set
# the _junk suffix means that we drop that variable completely
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=1 - train_ratio, random_state=1)

# test is now 10% of the initial data set
# validation is now 10% of the initial data set
x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, test_size=test_ratio/(test_ratio + validation_ratio), random_state=1)

In [24]:
# epochs is the number of iterations over the data
# verbose is a parameter to obtain additional information 
# Note: define the batch size to 
initializer = tf.keras.initializers.GlorotUniform()
model1 = tf.keras.Sequential([
    keras.layers.Dense(units=200,input_shape=(8,),  kernel_initializer=initializer, activation='relu'),
    keras.layers.Dense(units=200,activation='relu'),
    keras.layers.Dense(units=200,activation='relu'),
    keras.layers.Dense(units=200,activation='relu'),
    keras.layers.Dense(units=1),
    ])
model1.compile(optimizer='adam', loss='mean_squared_error')
model1.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_5 (Dense)              (None, 200)               1800      
_________________________________________________________________
dense_6 (Dense)              (None, 200)               40200     
_________________________________________________________________
dense_7 (Dense)              (None, 200)               40200     
_________________________________________________________________
dense_8 (Dense)              (None, 200)               40200     
_________________________________________________________________
dense_9 (Dense)              (None, 1)                 201       
Total params: 122,601
Trainable params: 122,601
Non-trainable params: 0
_________________________________________________________________


In [26]:
# The epochs shouls be 8000, but we'll start with something lighter to test
history= model1.fit(X_train, y_train, epochs=8000, batch_size= 1024, verbose=0)

In [27]:
# y_pred = model.predict(x_test)
y_pred = model1.predict(X_test)

In [28]:
y_test

3     16.740601
2     26.591338
30    13.044868
21    30.369697
22     7.946941
41    15.072212
19    24.001303
35     0.327558
34     0.930086
Name: price, dtype: float64

In [29]:
y_pred

array([[19.414085  ],
       [23.413738  ],
       [14.200456  ],
       [30.354979  ],
       [ 5.9051895 ],
       [16.438808  ],
       [18.898617  ],
       [ 0.25640422],
       [ 3.1630292 ]], dtype=float32)

In [31]:
MAPE(y_test, y_pred)

63.07619

In [32]:
MSE(y_test, y_pred)

1705.63463

In [33]:
MAE(y_test, y_pred)

101.06878

## Model predicting the implied volatility

In [39]:
X = df.drop(['price', 'K', 'S', 'impl_vol'], 1)
# X_stand =(X-X.mean())/X.std()
y = df['impl_vol']
# y_norm = (y-y.mean())/y.std()

In [40]:
X.shape

(44, 8)

In [41]:
# The dataset should be split into 10-validation, 10-testing, 80-training 
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=1)

In [10]:
train_ratio = 0.8
validation_ratio = 0.1
test_ratio = 0.1

# train is now 80% of the entire data set
# the _junk suffix means that we drop that variable completely
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=1 - train_ratio, random_state=1)

# test is now 10% of the initial data set
# validation is now 10% of the initial data set
x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, test_size=test_ratio/(test_ratio + validation_ratio), random_state=1)

In [45]:
# epochs is the number of iterations over the data
# verbose is a parameter to obtain additional information 
# Note: define the batch size to 
initializer = tf.keras.initializers.GlorotUniform()
model2 = tf.keras.Sequential([
    keras.layers.Dense(units=200,input_shape=(8,),  kernel_initializer=initializer, activation='relu'),
    keras.layers.Dense(units=200,activation='relu'),
    keras.layers.Dense(units=200,activation='relu'),
    keras.layers.Dense(units=200,activation='relu'),
    keras.layers.Dense(units=1),
    ])
model2.compile(optimizer='adam', loss='mean_squared_error')
model2.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_20 (Dense)             (None, 200)               1800      
_________________________________________________________________
dense_21 (Dense)             (None, 200)               40200     
_________________________________________________________________
dense_22 (Dense)             (None, 200)               40200     
_________________________________________________________________
dense_23 (Dense)             (None, 200)               40200     
_________________________________________________________________
dense_24 (Dense)             (None, 1)                 201       
Total params: 122,601
Trainable params: 122,601
Non-trainable params: 0
_________________________________________________________________


In [46]:
# The epochs shouls be 8000, but we'll start with something lighter to test
history= model2.fit(X_train, y_train, epochs=8000, batch_size= 1024, verbose=0)

In [47]:
# y_pred = model.predict(x_test)
y_pred = model2.predict(X_test)

In [48]:
y_test

3     0.415047
2     0.349612
30    0.408136
21    0.627606
22    0.312476
41    0.487659
19    0.461360
35    0.288328
34    0.256209
Name: impl_vol, dtype: float64

In [49]:
y_pred

array([[0.43306813],
       [0.326813  ],
       [0.40381056],
       [0.6104918 ],
       [0.29560682],
       [0.45766893],
       [0.42727855],
       [0.28337884],
       [0.40773168]], dtype=float32)

In [50]:
MAPE(y_test, y_pred)

2.67846

In [51]:
MSE(y_test, y_pred)

0.18623

In [52]:
MAE(y_test, y_pred)

1.01702