In [15]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import pandas as pd

In [2]:
def load_dataset(file="../data/raw/training_data.npz"):
    data = np.load(file)
    X, y = data["X"], data["y"]
    return X, y

X, y = load_dataset()

In [3]:
print(f"Loaded dataset: X {X.shape}, y {y.shape}")

Loaded dataset: X (450000, 15), y (450000,)


In [4]:
def plot_payoff_distribution(y, save_dir="../src/visualization/plots"):
    os.makedirs(save_dir, exist_ok=True)
    plt.figure(figsize=(8, 5))
    sns.histplot(y, bins=100, kde=True)
    plt.title("Distribution of Discounted Payoffs")
    plt.xlabel("Payoff")
    plt.ylabel("Frequency")
    plt.tight_layout()
    plt.savefig(os.path.join(save_dir, "payoff_distribution.png"))
    plt.close()

plot_payoff_distribution(y)

In [5]:
def plot_param_histograms(X, prefix_len=10, save_dir="../src/visualization/plots"):
    os.makedirs(save_dir, exist_ok=True)
    K, T, r, sigma, opt_flag = X[:, prefix_len:].T

    fig, axes = plt.subplots(2, 2, figsize=(10, 8))

    sns.histplot(K, bins=20, kde=False, ax=axes[0, 0])
    axes[0, 0].set_title("Strike (K)")

    sns.histplot(T, bins=20, kde=False, ax=axes[0, 1])
    axes[0, 1].set_title("Maturity (T)")

    sns.histplot(r, bins=20, kde=False, ax=axes[1, 0])
    axes[1, 0].set_title("Risk-free Rate (r)")

    sns.histplot(sigma, bins=20, kde=False, ax=axes[1, 1])
    axes[1, 1].set_title("Volatility (sigma)")

    plt.tight_layout()
    plt.savefig(os.path.join(save_dir, "parameter_histograms.png"))
    plt.close()

    # Option type split
    plt.figure(figsize=(5, 4))
    sns.countplot(x=opt_flag)
    plt.xticks([0, 1], ["Put", "Call"])
    plt.title("Option Type Counts")
    plt.tight_layout()
    plt.savefig(os.path.join(save_dir, "option_type_counts.png"))
    plt.close()

plot_param_histograms(X, prefix_len=10)

In [6]:
def plot_sample_paths(X, n_paths=5, prefix_len=10, save_dir="../src/visualization/plots"):
    os.makedirs(save_dir, exist_ok=True)
    plt.figure(figsize=(8, 5))

    for i in range(n_paths):
        prefix = X[i, :prefix_len]
        plt.plot(range(prefix_len), prefix, marker="o", label=f"Path {i+1}")

    plt.title(f"Sample Prefixes (first {prefix_len} timesteps)")
    plt.xlabel("Time step")
    plt.ylabel("Stock Price")
    plt.legend()
    plt.tight_layout()
    plt.savefig(os.path.join(save_dir, "sample_prefixes.png"))
    plt.close()

plot_sample_paths(X, n_paths=5, prefix_len=10)

In [7]:
def plot_payoff_by_option_type(X, y, prefix_len=10, save_dir="../src/visualization/plots"):
    os.makedirs(save_dir, exist_ok=True)
    opt_flag = X[:, prefix_len + 4]  # last column is option type
    plt.figure(figsize=(8, 5))
    sns.histplot(y[opt_flag == 1], bins=80, kde=True, label="Call", color="blue")
    sns.histplot(y[opt_flag == 0], bins=80, kde=True, label="Put", color="red")
    plt.title("Payoff Distribution by Option Type")
    plt.xlabel("Payoff")
    plt.ylabel("Frequency")
    plt.legend()
    plt.tight_layout()
    plt.savefig(os.path.join(save_dir, "payoff_by_option_type.png"))
    plt.close()

plot_payoff_by_option_type(X, y, prefix_len=10)

In [8]:
def plot_payoff_by_moneyness(X, y, prefix_len=10, save_dir="../src/visualization/plots"):
    os.makedirs(save_dir, exist_ok=True)
    S0 = X[:, 0]  # first prefix point = initial spot
    K = X[:, prefix_len]
    moneyness = S0 / K

    bins = [0, 0.95, 1.05, np.inf]
    labels = ["OTM", "ATM", "ITM"]
    categories = np.digitize(moneyness, bins) - 1

    plt.figure(figsize=(8, 5))
    for i, label in enumerate(labels):
        sns.histplot(y[categories == i], bins=80, kde=True, label=label)
    plt.title("Payoff Distribution by Moneyness")
    plt.xlabel("Payoff")
    plt.ylabel("Frequency")
    plt.legend()
    plt.tight_layout()
    plt.savefig(os.path.join(save_dir, "payoff_by_moneyness.png"))
    plt.close()

plot_payoff_by_moneyness(X, y, prefix_len=10)

In [16]:
def plot_feature_correlations(X, y, prefix_len=10, save_dir="figures"):
    os.makedirs(save_dir, exist_ok=True)
    K, T, r, sigma, opt_flag = X[:, prefix_len:].T

    features = {
        "Strike (K)": K,
        "Maturity (T)": T,
        "Rate (r)": r,
        "Volatility (sigma)": sigma,
        "OptionFlag (1=Call)": opt_flag,
        "Payoff": y,
    }
    df = pd.DataFrame(features)

    corr = df.corr()

    plt.figure(figsize=(8, 6))
    sns.heatmap(corr, annot=True, cmap="coolwarm", fmt=".2f", cbar=True)
    plt.title("Correlation Heatmap: Features vs Payoff")
    plt.tight_layout()
    plt.savefig(os.path.join(save_dir, "feature_payoff_correlation.png"))
    plt.close()

    return df

df = plot_feature_correlations(X, y, prefix_len=10)
df

Unnamed: 0,Strike (K),Maturity (T),Rate (r),Volatility (sigma),OptionFlag (1=Call),Payoff
0,80.0,0.25,0.00,0.1,1.0,12.222407
1,80.0,0.25,0.00,0.1,1.0,20.504919
2,80.0,0.25,0.00,0.1,1.0,18.497378
3,80.0,0.25,0.00,0.1,1.0,22.881945
4,80.0,0.25,0.00,0.1,1.0,25.350818
...,...,...,...,...,...,...
449995,120.0,1.00,0.03,0.3,0.0,22.791851
449996,120.0,1.00,0.03,0.3,0.0,47.339841
449997,120.0,1.00,0.03,0.3,0.0,15.047626
449998,120.0,1.00,0.03,0.3,0.0,12.095583


In [17]:
def plot_pairwise_relationships(df, save_dir="../src/visualization/plots"):
    os.makedirs(save_dir, exist_ok=True)
    plt.figure(figsize=(10, 8))
    sns.pairplot(
        df,
        vars=["Strike (K)", "Maturity (T)", "Rate (r)", "Volatility (sigma)"],
        hue="OptionFlag (1=Call)",
        diag_kind="kde",
        plot_kws={"alpha": 0.5, "s": 20},
    )
    plt.savefig(os.path.join(save_dir, "pairplot_features.png"))
    plt.close()

plot_pairwise_relationships(df)


<Figure size 1000x800 with 0 Axes>