#2 Run
This file takes in the file written as a .csv from naive.ipynb. The output is used by compare.ipynb

In [76]:
from math import log, sqrt, pi, exp
from scipy.stats import norm
import pandas as pd
import tensorflow as tf
import numpy as np

In [77]:
def read_file(file):
    """Read a single file and return a dataframe"""
    return pd.read_csv(file, skipinitialspace=True)

In [78]:
import pandas as pd

file = '../data/processed_data/2010-2022_filtered.csv'
df = pd.read_csv(file, skipinitialspace=True)

In [79]:
# Black-Scholes formula for call options
def d1(S,K,T,r,sigma):
    x1 = S.apply(lambda x : log(x)) - K.apply(lambda x : log(x))
    x2 = (r + ((sigma.apply(lambda x : x**2)) / 2)) * T
    x3 = sigma * T.apply(lambda x: sqrt(x))
    return  (x1 + x2) / x3

def d2(S,K,T,r,sigma):
    return d1(S,K,T,r,sigma) - sigma * T.apply(lambda x : sqrt(x))  

def bs_call(S,K,T,r,sigma):
    T = T/365
    r = r/100
    return S * d1(S,K,T,r,sigma).apply(lambda x : norm.cdf(x)) - K * (-r*T).apply(lambda x : exp(x)) * d2(S,K,T,r,sigma).apply(lambda x : norm.cdf(x))

In [80]:
# Add volatility column with 30 day rolling standard deviation of Underlying_last

# New dataframe without duplicate Quote_dates
df2 = df.drop_duplicates(subset=['Quote_date'])

# Calculate volatility
df2['Volatility'] = np.log(df2["Underlying_last"] / df2["Underlying_last"].shift()).rolling(30).std()*(252**0.5)

# Matching volatility in df2 to df
df['Volatility'] = df['Quote_date'].map(df2.set_index('Quote_date')['Volatility'])

# Drop rows with NaN values
df = df.dropna()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['Volatility'] = np.log(df2["Underlying_last"] / df2["Underlying_last"].shift()).rolling(30).std()*(252**0.5)


In [81]:
display(df)

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Quote_date,Expire_date,Price,Underlying_last,Strike,TTM,R,Moneyness,Volatility
21023,27567,27567,2010-03-01,2010-03-04,240.155,1115.56,875.0,3,0.09,1.274926,0.175702
21024,27568,27568,2010-03-01,2010-03-04,215.155,1115.56,900.0,3,0.09,1.239511,0.175702
21025,27569,27569,2010-03-01,2010-03-04,190.150,1115.56,925.0,3,0.09,1.206011,0.175702
21026,27570,27570,2010-03-01,2010-03-04,165.155,1115.56,950.0,3,0.09,1.174274,0.175702
21027,27571,27571,2010-03-01,2010-03-04,140.200,1115.56,975.0,3,0.09,1.144164,0.175702
...,...,...,...,...,...,...,...,...,...,...,...
11895723,13536141,13536141,2022-12-30,2024-12-20,362.600,3839.81,4300.0,721,4.41,0.892979,0.192920
11895724,13536142,13536142,2022-12-30,2024-12-20,319.150,3839.81,4400.0,721,4.41,0.872684,0.192920
11895725,13536143,13536143,2022-12-30,2024-12-20,279.000,3839.81,4500.0,721,4.41,0.853291,0.192920
11895726,13536144,13536144,2022-12-30,2024-12-20,241.950,3839.81,4600.0,721,4.41,0.834741,0.192920


In [82]:
df["BS"] = bs_call(df["Underlying_last"], df["Strike"], df["TTM"], df["R"], df["Volatility"])


In [83]:
display(df)

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Quote_date,Expire_date,Price,Underlying_last,Strike,TTM,R,Moneyness,Volatility,BS
21023,27567,27567,2010-03-01,2010-03-04,240.155,1115.56,875.0,3,0.09,1.274926,0.175702,240.566473
21024,27568,27568,2010-03-01,2010-03-04,215.155,1115.56,900.0,3,0.09,1.239511,0.175702,215.566658
21025,27569,27569,2010-03-01,2010-03-04,190.150,1115.56,925.0,3,0.09,1.206011,0.175702,190.566842
21026,27570,27570,2010-03-01,2010-03-04,165.155,1115.56,950.0,3,0.09,1.174274,0.175702,165.567027
21027,27571,27571,2010-03-01,2010-03-04,140.200,1115.56,975.0,3,0.09,1.144164,0.175702,140.567212
...,...,...,...,...,...,...,...,...,...,...,...,...
11895723,13536141,13536141,2022-12-30,2024-12-20,362.600,3839.81,4300.0,721,4.41,0.892979,0.192920,370.753695
11895724,13536142,13536142,2022-12-30,2024-12-20,319.150,3839.81,4400.0,721,4.41,0.872684,0.192920,334.828193
11895725,13536143,13536143,2022-12-30,2024-12-20,279.000,3839.81,4500.0,721,4.41,0.853291,0.192920,301.817047
11895726,13536144,13536144,2022-12-30,2024-12-20,241.950,3839.81,4600.0,721,4.41,0.834741,0.192920,271.572420


In [84]:
mse_bs = tf.keras.metrics.mean_squared_error(
    df["Price"], df["BS"]
)

In [85]:
print("MSE: ", mse_bs)
print("RMSE: ", sqrt(mse_bs))

MSE:  tf.Tensor(nan, shape=(), dtype=float64)
RMSE:  nan


In [86]:
# Write to file
df.to_csv('../data/predictions/BS.csv', encoding='utf-8', index=False)