In [62]:
from math import log, sqrt, pi, exp
from scipy.stats import norm
import pandas as pd
import tensorflow as tf

In [63]:
def read_file(file):
    """Read a single file and return a dataframe"""
    return pd.read_csv(file, skipinitialspace=True)

In [64]:
first_year = 2019
last_year = 2021
file = f"./data/processed_data/{first_year}-{last_year}_underlying-strike_only-price.csv"
df_options = read_file(file)

In [65]:
df_options['Quote_date'] = pd.to_datetime(df_options.Quote_date, format='%Y-%m-%d')
df_options = df_options[df_options.Quote_date.dt.year == 2021]

# To be used for testing on for faster computing
#df_options_short = df_options[df_options["Quote_date"] == "2021-01-06"]

In [66]:
print(df_options.head())

   Unnamed: 0  Quote_date Expire_date     Price  Underlying_last  Strike  Ttl  \
0     1354913  2019-01-02  2019-01-04  1707.050          2509.98   800.0    2   
1     1354914  2019-01-02  2019-01-04  1607.495          2509.98   900.0    2   
2     1354915  2019-01-02  2019-01-04  1507.500          2509.98  1000.0    2   
3     1354916  2019-01-02  2019-01-04  1458.295          2509.98  1050.0    2   
4     1354917  2019-01-02  2019-01-04  1408.300          2509.98  1100.0    2   

   Volatility    R  
0    0.202726  2.4  
1    0.202726  2.4  
2    0.202726  2.4  
3    0.202726  2.4  
4    0.202726  2.4  


In [54]:
#Sequenced verion
def d1(S,K,T,r,sigma):
    x1 = S.apply(lambda x : log(x)) - K.apply(lambda x : log(x))
    x2 = (r + ((sigma.apply(lambda x : x**2)) / 2)) * T
    x3 = sigma * T.apply(lambda x: sqrt(x))
    return  (x1 + x2) / x3

def d2(S,K,T,r,sigma):
    return d1(S,K,T,r,sigma) - sigma * T.apply(lambda x : sqrt(x))  

def bs_call(S,K,T,r,sigma):
    T = T / 365
    return S * d1(S,K,T,r,sigma).apply(lambda x : norm.cdf(x)) - K * (-r*T).apply(lambda x : exp(x)) * d2(S,K,T,r,sigma).apply(lambda x : norm.cdf(x))

In [89]:
import numpy as np
from scipy.stats import norm

N = norm.cdf

def BS_CALL(S, K, T, r, sigma):
    d1 = ((S/K).apply(lambda x : log(x)) + (r + sigma.apply(lambda x : x**2)/2)*T) / (sigma*T.apply(lambda x: sqrt(x)))
    d2 = d1 - sigma * T.apply(lambda x : sqrt(x)) 
    return S * d1.apply(lambda x : N(x)) - K * (-r*T).apply(lambda x : exp(x)) * d2.apply(lambda x : N(x))

In [29]:
df_options["Prediction"] = bs_call(df_options["Underlying_last"], df_options["Strike"], df_options["Ttl"], df_options["R"], df_options["Volatility"])


In [92]:
df_options_short["Prediction"] = bs_call(df_options_short["Underlying_last"], df_options_short["Strike"], df_options_short["Ttl"], df_options_short["R"], df_options_short["Volatility"])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_options_short["Prediction"] = bs_call(df_options_short["Underlying_last"], df_options_short["Strike"], df_options_short["Ttl"], df_options_short["R"], df_options_short["Volatility"])


In [93]:
print(df_options.head())

         Unnamed: 0  Quote_date Expire_date     Price  Underlying_last  \
3112154     4492076  2021-01-06  2021-01-08  2848.545          3748.21   
3112155     4492077  2021-01-06  2021-01-08  2750.200          3748.21   
3112156     4492078  2021-01-06  2021-01-08  2649.950          3748.21   
3112157     4492079  2021-01-06  2021-01-08  2549.455          3748.21   
3112158     4492080  2021-01-06  2021-01-08  2450.950          3748.21   

         Strike  Ttl  Volatility     R   Prediction  
3112154   900.0    2    0.185095  0.09  2848.653726  
3112155  1000.0    2    0.185095  0.09  2748.703029  
3112156  1100.0    2    0.185095  0.09  2648.752332  
3112157  1200.0    2    0.185095  0.09  2548.801635  
3112158  1300.0    2    0.185095  0.09  2448.850938  


In [30]:
mse_bs = tf.keras.metrics.mean_squared_error(
    df_options["Price"], df_options["Prediction"]
)

In [31]:
print("MSE: ", mse_bs)
print("RMSE: ", sqrt(mse_bs))

MSE:  tf.Tensor(69977.9777258106, shape=(), dtype=float64)
RMSD:  264.53350964634063
