In [1]:
from scipy.stats import ttest_rel
import pandas as pd
import numpy as np
from scipy.stats import norm, wasserstein_distance
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('sstorm_data.csv', index_col=0)
dff = pd.read_csv('storm_data.csv', index_col=0)
dfff = pd.read_csv('ssstorm_data.csv', index_col=0)
df = pd.concat([df, dff, dfff])
bs_col = "BS_V_prime"
model_col = "V_prime"
market_col = "opt_price_prime"

In [3]:
df["ticker"].unique()

array(['MCD', 'NKE', 'PFE', 'BB', 'GME', 'JPM', 'XOM', 'NOK'],
      dtype=object)

In [4]:
df[["S", "K", "best_bid", "best_offer", "t_prime"]].describe()

Unnamed: 0,S,K,best_bid,best_offer,t_prime
count,353932.0,353932.0,353932.0,353932.0,353932.0
mean,100.882932,1034.445995,10.937361,11.214011,13.754286
std,82.320431,848.793988,21.512486,21.776223,8.449873
min,3.18,5.0,0.0,0.01,1.0
25%,26.1,340.0,0.02,0.08,7.0
50%,101.676468,975.0,1.53,1.7,14.0
75%,125.8769,1360.0,11.05,11.5,21.0
max,288.065491,4100.0,179.55,182.15,29.0


In [5]:
df["date"].min(), df["date"].max()

('2022-08-31', '2023-08-31')

In [6]:
# 1. Pricing Accuracy Analysis
# Calculate normalized pricing errors
df['BS_error'] = (df[market_col] - df[bs_col])**2
df['model_error'] = (df[market_col] - df[model_col])**2
df["moneyness"] = df["S"] - df["K"]

df[[bs_col, model_col, market_col]].describe()


Unnamed: 0,BS_V_prime,V_prime,opt_price_prime
count,353932.0,353932.0,353932.0
mean,4.1e-05,0.008036,0.023711
std,0.001134,0.008227,0.07207
min,0.0,0.000589,1e-06
25%,0.0,0.0018,8.4e-05
50%,0.0,0.005151,0.002814
75%,0.0,0.012989,0.019062
max,0.087514,0.270026,1.045


In [19]:
corr_imp = {}
rmse = {}
for ticker in df["ticker"].unique():
    rmse[ticker] = {}
    sub_df = df.loc[df["ticker"] == ticker]
    corr_imp[ticker] = {}
    per_imp = (np.sqrt((sub_df['BS_error']).mean()) - np.sqrt((sub_df['model_error']).mean()))/np.sqrt((sub_df['BS_error']).mean())
    corr_imp[ticker]["RMSE"] = per_imp
    rmse[ticker]["qStorm"] = np.sqrt((sub_df['model_error']).mean())
    rmse[ticker]["BSM"] = np.sqrt((sub_df['BS_error']).mean())
    rmse[ticker]["% Improvement"] = per_imp
rmse = pd.DataFrame(rmse)
rmse["Total"] = rmse.mean(axis=1)
rmse = round(rmse, 4)
rmse = rmse.T
rmse["% Improvement"] = rmse["% Improvement"] * 100
rmse = rmse.T
rmse

Unnamed: 0,MCD,NKE,PFE,BB,GME,JPM,XOM,NOK,Total
qStorm,0.0272,0.0229,0.0169,0.1634,0.0762,0.0261,0.0315,0.206,0.0713
BSM,0.0299,0.0247,0.0176,0.1705,0.0793,0.0287,0.0336,0.2158,0.075
% Improvement,9.08,7.12,3.58,4.16,3.87,8.74,6.41,4.55,5.94


In [None]:
plt.figure(figsize=(10, 5))
plt.hist(np.sqrt((df['model_error'])), bins=100, alpha=0.5, label='Model', density=True)
plt.hist(np.sqrt((df['BS_error'])), bins=100, alpha=0.5, label='BS', density=True)
plt.legend()
plt.title("Pricing Errors")
plt.show()

In [22]:
wass = {}
for ticker in df["ticker"].unique():
    sub_df = df.loc[df["ticker"] == ticker]
    model_wasserstein = wasserstein_distance(sub_df[model_col], sub_df[market_col])
    bs_wasserstein = wasserstein_distance(sub_df[bs_col], sub_df[market_col])
    corr_imp[ticker]["Wasserstein"] = (bs_wasserstein - model_wasserstein)/bs_wasserstein
    corr_imp[ticker]["Mean S"] = sub_df["S"].unique().mean()    
    wass[ticker] = {}
    wass[ticker]["qStorm"] = model_wasserstein
    wass[ticker]["BSM"] = bs_wasserstein
    wass[ticker]["% Improvement"] = (bs_wasserstein - model_wasserstein)/bs_wasserstein
wass = pd.DataFrame(wass)
wass["Total"] = wass.mean(axis=1)
wass = round(wass, 4)
wass = wass.T
wass["% Improvement"] = wass["% Improvement"] * 100
wass = wass.T
wass

Unnamed: 0,MCD,NKE,PFE,BB,GME,JPM,XOM,NOK,Total
qStorm,0.0107,0.0067,0.0047,0.0648,0.0233,0.0087,0.009,0.0851,0.0266
BSM,0.0169,0.012,0.0087,0.0714,0.0289,0.0151,0.0147,0.0927,0.0326
% Improvement,36.33,44.53,46.12,9.14,19.41,42.43,38.94,8.13,30.63


In [None]:
pd.DataFrame(corr_imp).T.corr()

In [None]:
result_df = pd.DataFrame(corr_imp).drop("Mean S", axis=0)
result_df["Total"] = result_df.mean(axis=1)
result_df