In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim

from policies.lsm import LSMPolicy
from policies.rpo import RPOPolicy
from section51_gbm_comparison.gbm_trajectories import generate_gbm_trajectories, graph_gbm_trajectories
from section51_gbm_comparison.plot_boundaries import plot_boundaries, plot_rpo_boundaries



In [2]:
K = 100.0
def payoff(t, x):
    return max(x - K, 0)

basis_functions = [
    lambda x: 1.0,                            # Constant term (bias/intercept)
    lambda x: (x - K) / (x + 1e-5),           # Scaled payoff (zero near strike, increasing with moneyness)
    lambda x: ((x - K) / (x + 1e-5))**2,      # Quadratic in scaled payoff — allows curvature
    lambda x: np.exp(-np.abs(x - K) / K),    # Exponential decay around strike (sharp transition modeling)
]

In [3]:
weights = torch.load('test6_x4/rpo_weights.pt').detach().numpy()
rpo = RPOPolicy(
    basis_functions=basis_functions,
    payoff_fn=payoff,
    T=100,
    steps=100,
    start_weights=weights,
)
print(rpo.get_weights())

[array([ 22.312687,  48.2712  ,  10.000335, -10.706387], dtype=float32), array([  2.4479926,  71.55663  ,  45.61465  , -44.02363  ], dtype=float32), array([  9.527155 ,  33.315228 ,   6.1506824, -46.085617 ], dtype=float32), array([ 15.128083 ,  34.45136  ,   6.8987803, -53.508873 ], dtype=float32), array([ 19.469948 ,  35.087234 ,   7.9367905, -59.39522  ], dtype=float32), array([ 23.209734,  42.065662,  13.908068, -65.82793 ], dtype=float32), array([ 28.247854,  37.39518 , -16.810335, -70.77529 ], dtype=float32), array([ 31.460745 ,  47.609604 ,   7.5747094, -78.447365 ], dtype=float32), array([ 35.057297,  48.599728,   6.284153, -83.55295 ], dtype=float32), array([ 38.016876,  49.115105,   7.203269, -89.02251 ], dtype=float32), array([ 40.493195,  50.315697,  10.462806, -92.96953 ], dtype=float32), array([ 42.65279 ,  52.015865,  13.146792, -97.14555 ], dtype=float32), array([  45.750996,   45.768913,   18.049604, -100.08391 ], dtype=float32), array([  46.777885 ,   45.89585  ,   13

In [4]:
t_values=list(range(1,100))
x_grid=np.linspace(50, 500, 700)

In [5]:
plot_rpo_boundaries(
    policies=[("RPO", rpo)],
    t_values=t_values,
    x_grid=x_grid,
    target_probs=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
    title="RPO Boundaries",
    filename="rpo_boundaries.png"
)