In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [6]:
def load_dataset(file="../data/raw/training_data.npz"):
    data = np.load(file)
    X, y = data["X"], data["y"]
    return X, y

X, y = load_dataset()

In [7]:
print(f"Loaded dataset: X {X.shape}, y {y.shape}")

Loaded dataset: X (450000, 15), y (450000,)


In [10]:
def plot_payoff_distribution(y, save_dir="../src/visualization/plots"):
    os.makedirs(save_dir, exist_ok=True)
    plt.figure(figsize=(8, 5))
    sns.histplot(y, bins=100, kde=True)
    plt.title("Distribution of Discounted Payoffs")
    plt.xlabel("Payoff")
    plt.ylabel("Frequency")
    plt.tight_layout()
    plt.savefig(os.path.join(save_dir, "payoff_distribution.png"))
    plt.close()

plot_payoff_distribution(y)

In [11]:
def plot_param_histograms(X, prefix_len=10, save_dir="../src/visualization/plots"):
    os.makedirs(save_dir, exist_ok=True)
    K, T, r, sigma, opt_flag = X[:, prefix_len:].T

    fig, axes = plt.subplots(2, 2, figsize=(10, 8))

    sns.histplot(K, bins=20, kde=False, ax=axes[0, 0])
    axes[0, 0].set_title("Strike (K)")

    sns.histplot(T, bins=20, kde=False, ax=axes[0, 1])
    axes[0, 1].set_title("Maturity (T)")

    sns.histplot(r, bins=20, kde=False, ax=axes[1, 0])
    axes[1, 0].set_title("Risk-free Rate (r)")

    sns.histplot(sigma, bins=20, kde=False, ax=axes[1, 1])
    axes[1, 1].set_title("Volatility (sigma)")

    plt.tight_layout()
    plt.savefig(os.path.join(save_dir, "parameter_histograms.png"))
    plt.close()

    # Option type split
    plt.figure(figsize=(5, 4))
    sns.countplot(x=opt_flag)
    plt.xticks([0, 1], ["Put", "Call"])
    plt.title("Option Type Counts")
    plt.tight_layout()
    plt.savefig(os.path.join(save_dir, "option_type_counts.png"))
    plt.close()

plot_param_histograms(X, prefix_len=10)

In [13]:
def plot_sample_paths(X, n_paths=5, prefix_len=10, save_dir="../src/visualization/plots"):
    os.makedirs(save_dir, exist_ok=True)
    plt.figure(figsize=(8, 5))

    for i in range(n_paths):
        prefix = X[i, :prefix_len]
        plt.plot(range(prefix_len), prefix, marker="o", label=f"Path {i+1}")

    plt.title(f"Sample Prefixes (first {prefix_len} timesteps)")
    plt.xlabel("Time step")
    plt.ylabel("Stock Price")
    plt.legend()
    plt.tight_layout()
    plt.savefig(os.path.join(save_dir, "sample_prefixes.png"))
    plt.close()

plot_sample_paths(X, n_paths=5, prefix_len=10)