In [205]:
from math import log, sqrt, pi, exp
from scipy.stats import norm
import pandas as pd
import tensorflow as tf

In [206]:
def read_file(file):
    """Read a single file and return a dataframe"""
    return pd.read_csv(file, skipinitialspace=True)

In [207]:
first_year = 2019
last_year = 2021
file = f"./data/processed_data/{first_year}-{last_year}_underlying-strike_only-price.csv"
df_options = read_file(file)

In [208]:
df_options['Quote_date'] = pd.to_datetime(df_options.Quote_date, format='%Y-%m-%d')
df_options = df_options[df_options.Quote_date.dt.year == 2021]

# To be used for testing on for faster computing
#df_options_short = df_options[df_options["Quote_date"] == "2021-01-06"]

In [209]:
# Black-Scholes formula for call options
def d1(S,K,T,r,sigma):
    x1 = S.apply(lambda x : log(x)) - K.apply(lambda x : log(x))
    x2 = (r + ((sigma.apply(lambda x : x**2)) / 2)) * T
    x3 = sigma * T.apply(lambda x: sqrt(x))
    return  (x1 + x2) / x3

def d2(S,K,T,r,sigma):
    return d1(S,K,T,r,sigma) - sigma * T.apply(lambda x : sqrt(x))  

def bs_call(S,K,T,r,sigma):
    T = T/365
    r = r/100
    return S * d1(S,K,T,r,sigma).apply(lambda x : norm.cdf(x)) - K * (-r*T).apply(lambda x : exp(x)) * d2(S,K,T,r,sigma).apply(lambda x : norm.cdf(x))

In [210]:
df_options["Prediction"] = bs_call(df_options["Underlying_last"], df_options["Strike"], df_options["Ttl"], df_options["R"], df_options["Volatility"])


In [211]:
print(df_options.head())

         Unnamed: 0 Quote_date Expire_date     Price  Underlying_last  Strike  \
3097811     4477733 2021-01-04  2021-01-06  2701.855          3701.38  1000.0   
3097812     4477734 2021-01-04  2021-01-06  2598.795          3701.38  1100.0   
3097813     4477735 2021-01-04  2021-01-06  2500.195          3701.38  1200.0   
3097814     4477736 2021-01-04  2021-01-06  2400.290          3701.38  1300.0   
3097815     4477737 2021-01-04  2021-01-06  2300.300          3701.38  1400.0   

         Ttl  Volatility     R   Prediction  
3097811    2    0.185353  0.09  2701.384931  
3097812    2    0.185353  0.09  2601.385425  
3097813    2    0.185353  0.09  2501.385918  
3097814    2    0.185353  0.09  2401.386411  
3097815    2    0.185353  0.09  2301.386904  


In [212]:
mse_bs = tf.keras.metrics.mean_squared_error(
    df_options["Price"], df_options["Prediction"]
)

In [213]:
print("MSE: ", mse_bs)
print("RMSE: ", sqrt(mse_bs))

MSE:  tf.Tensor(1283.3263391293256, shape=(), dtype=float64)
RMSE:  35.823544480262214
