In [1]:
from scipy.stats import ttest_rel
import pandas as pd
import numpy as np
from scipy.stats import norm, wasserstein_distance
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('storm_data.csv', index_col=0)
bs_col = "BS_V_prime"
model_col = "V_prime"
market_col = "opt_price_prime"

In [3]:
df["ticker"].unique()

array(['BB', 'GME', 'JPM'], dtype=object)

In [None]:
df["date"].min(), df["date"].max()

In [None]:
# 1. Pricing Accuracy Analysis
# Calculate normalized pricing errors
df['BS_error'] = (df[market_col] - df[bs_col])**2
df['model_error'] = (df[market_col] - df[model_col])**2
df["moneyness"] = df["S"] - df["K"]

df[[bs_col, model_col, market_col]].describe()


In [None]:
df.shape[0], df.loc[df["moneyness"] > 0].shape[0], df.loc[df["moneyness"] > 0].shape[0]/df.shape[0]

In [None]:
for moneyness in ["BOTH", "ITM", "OTM"]:
    sub_df = df.copy()
    if moneyness == "ITM":
        sub_df = sub_df.loc[sub_df["moneyness"] > 0]
    elif moneyness == "OTM":
        sub_df = sub_df.loc[sub_df["moneyness"] < 0]
    print(f"Pricing Performance for moneyness {moneyness}:")
    print(f"BS RMSE: {np.sqrt((sub_df['BS_error']).mean()):.6f}")
    print(f"Model RMSE: {np.sqrt((sub_df['model_error']).mean()):.6f}")
    print(f"% imporvement: {100*(np.sqrt((sub_df['BS_error']).mean()) - np.sqrt((sub_df['model_error']).mean()))/np.sqrt((sub_df['BS_error']).mean()):.2f}%\n")

In [None]:
for ticker in df["ticker"].unique():
    sub_df = df.loc[df["ticker"] == ticker]
    print(f"Pricing Performance for {ticker}:")
    print(f"BS RMSE: {np.sqrt((sub_df['BS_error']).mean()):.6f}")
    print(f"Model RMSE: {np.sqrt((sub_df['model_error']).mean()):.6f}")
    print(f"% imporvement: {100*(np.sqrt((sub_df['BS_error']).mean()) - np.sqrt((sub_df['model_error']).mean()))/np.sqrt((sub_df['BS_error']).mean()):.2f}%\n")

In [None]:
plt.figure(figsize=(10, 5))
plt.hist(np.sqrt((df['model_error'])), bins=100, alpha=0.5, label='Model', density=True)
plt.hist(np.sqrt((df['BS_error'])), bins=100, alpha=0.5, label='BS', density=True)
plt.legend()
plt.title("Pricing Errors")
plt.show()

In [None]:
for moneyness in ["BOTH", "ITM", "OTM"]:
    sub_df = df.copy()
    if moneyness == "ITM":
        sub_df = sub_df.loc[sub_df["moneyness"] > 0]
    elif moneyness == "OTM":
        sub_df = sub_df.loc[sub_df["moneyness"] < 0]
    
    print(f"Correlation for moneyness {moneyness}:") 
    print(f"Market vs Model: {sub_df[model_col].corr(sub_df[market_col]):.6f}")
    print(f"Market vs BS: {sub_df[bs_col].corr(sub_df[market_col]):.6f}")
    print(f"Model vs BS: {sub_df[model_col].corr(sub_df[bs_col]):.6f}\n")

In [None]:
# check wasserstein distance
for moneyness in ["BOTH", "ITM", "OTM"]:
    sub_df = df.copy()
    if moneyness == "ITM":
        sub_df = sub_df.loc[sub_df["moneyness"] > 0]
    elif moneyness == "OTM":
        sub_df = sub_df.loc[sub_df["moneyness"] < 0]
    model_wasserstein = wasserstein_distance(sub_df[model_col], sub_df[market_col])
    bs_wasserstein = wasserstein_distance(sub_df[bs_col], sub_df[market_col])
    print(f"\nWasserstein Distance for Moneyness {moneyness}:")
    print(f"Model: {model_wasserstein:.6f}")
    print(f"BS: {bs_wasserstein:.6f}")
    print(f"% improvement: {100*(bs_wasserstein - model_wasserstein)/bs_wasserstein:.2f}%")

In [None]:
for ticker in df["ticker"].unique():
    sub_df = df.loc[df["ticker"] == ticker]
    print(f"Wasserstein Distance for {ticker}:")
    model_wasserstein = wasserstein_distance(sub_df[model_col], sub_df[market_col])
    bs_wasserstein = wasserstein_distance(sub_df[bs_col], sub_df[market_col])
    print(f"Model: {model_wasserstein:.6f}")
    print(f"BS: {bs_wasserstein:.6f}")
    print(f"% improvement: {100*(bs_wasserstein - model_wasserstein)/bs_wasserstein:.2f}%\n")
    

In [None]:
df[["moneyness", "opt_price", "BS_error", "model_error"]].corr()