# Testing Quasi-Newton Optimizers
#### Author: JP Melo

A test of BFGS and the Self-Scaled Broyden optimizer against the L-BFGS method of Pytorch.

### Imports

In [11]:
from derpinns.nn import *
from derpinns.utils import *
from derpinns.trainer import *
import torch

## Parameters

In [12]:
# Fix seed for reproducibility
torch.manual_seed(0)
np.random.seed(0)

# Global parameters
assets = 10
sampler = "pseudo"               # ["pseudo", "LHS", "Halton", "Hammersley", "Sobol"]:
nn_shape = "10x3"                # n_assets input layer + 64 neurons, 3 hidden layers + 1 output layer
device = torch.device("cpu")     # cpu, cuda or mps
dtype = torch.float32

# Define option valuation params
params = OptionParameters(
    n_assets=assets,
    tau=1.0,
    sigma=np.array([0.2] * assets),
    rho=np.eye(assets) + 0.25 * (np.ones((assets, assets)) - np.eye(assets)),
    r=0.05,
    strike=100,
    payoff=payoff
)

# Build the net to be used
model = build_nn(
    nn_shape=nn_shape,
    input_dim=assets,
    dtype=torch.float32
).apply(weights_init).to(device)
model.train()

NN(
  (hidden_layers): Sequential(
    (0): Linear(in_features=11, out_features=10, bias=True)
    (1): Tanh()
    (2): Linear(in_features=10, out_features=10, bias=True)
    (3): Tanh()
    (4): Linear(in_features=10, out_features=10, bias=True)
    (5): Tanh()
  )
  (output_layer): Linear(in_features=10, out_features=1, bias=True)
)

## Training

### BFGS Training

In [13]:
model = build_nn(
    nn_shape=nn_shape,
    input_dim=assets,
    dtype=torch.float32
).apply(weights_init).to(device)
model.train()

boundary_samples = 100
interior_samples = boundary_samples*assets*2
initial_samples = boundary_samples*assets*2

# We create new samples
dataset = SampledDataset(
    params, interior_samples, initial_samples, boundary_samples, sampler, dtype, device, seed=0)

optimizer = BFGS(
    model.parameters(),
    max_iter=2000)

batch_size = len(dataset) # we use all samples

# # Set the training function
closure = DimlessBS()\
    .with_dataset(dataset, loader_opts={'batch_size': batch_size, "shuffle": True, "pin_memory": True})\
    .with_model(model)\
    .with_device(device)\
    .with_dtype(dtype)

trainer = PINNTrainer()\
    .with_optimizer(optimizer)\
    .with_device(device)\
    .with_dtype(dtype)\
    .with_training_step(closure)\

closure = closure.with_dataset(
    dataset, loader_opts={'batch_size': batch_size, "shuffle": False, "pin_memory": True})

trainer = trainer.with_optimizer(optimizer).with_training_step(closure)
trainer.train()

bfgs_state = closure.get_state()
plot_loss(bfgs_state, smooth=False)

BFGS training:   0%|          | 0/2000 [00:00<?, ?it/s]

BFGS training:  41%|████      | 821/2000 [14:58<21:30,  1.09s/it, Interior=0.005643, Boundary=0.054480, Initial=0.165083, Total=0.225206, Max Error=64.684540, L2 Error=0.109027] 


In [14]:
bfgs_l2 = compare_with_mc(model, params, n_prices=200,
                          n_simulations=10_000, dtype=dtype, device=device, seed=42)['l2_rel_error']
print("L2 Error (%): ", bfgs_l2*100)

L2 Error (%):  9.705838


## L-BFGS Training

In [15]:
model = build_nn(
    nn_shape=nn_shape,
    input_dim=assets,
    dtype=torch.float32
).apply(weights_init).to(device)
model.train()

boundary_samples = 100
interior_samples = boundary_samples*assets*2
initial_samples = boundary_samples*assets*2

# We create new samples
dataset = SampledDataset(
    params, interior_samples, initial_samples, boundary_samples, sampler, dtype, device, seed=0)

optimizer = LBFGS(
    model.parameters(),
    max_iter=2000,
    max_eval=2000,
    line_search_fn="strong_wolfe",)

batch_size = len(dataset) # we use all samples

# # Set the training function
closure = DimlessBS()\
    .with_dataset(dataset, loader_opts={'batch_size': batch_size, "shuffle": True, "pin_memory": True})\
    .with_model(model)\
    .with_device(device)\
    .with_dtype(dtype)

trainer = PINNTrainer()\
    .with_optimizer(optimizer)\
    .with_device(device)\
    .with_dtype(dtype)\
    .with_training_step(closure)\

closure = closure.with_dataset(
    dataset, loader_opts={'batch_size': batch_size, "shuffle": False, "pin_memory": True})

trainer = trainer.with_optimizer(optimizer).with_training_step(closure)
trainer.train()

lbfgs_state = closure.get_state()
plot_loss(lbfgs_state, smooth=False)

LBFGS training:   0%|          | 0/2000 [00:00<?, ?it/s]

LBFGS training: 100%|█████████▉| 1999/2000 [20:47<00:00,  1.60it/s, Interior=0.001306, Boundary=0.014614, Initial=0.888273, Total=0.904193, Max Error=78.610863, L2 Error=0.094029]


In [16]:
lbfgs_l2 = compare_with_mc(model, params, n_prices=200,
                          n_simulations=10_000, dtype=dtype, device=device, seed=42)['l2_rel_error']
print("L2 Error (%): ", lbfgs_l2*100)

L2 Error (%):  25.911957


In [None]:
model = build_nn(
    nn_shape=nn_shape,
    input_dim=assets,
    dtype=torch.float32
).apply(weights_init).to(device)
model.train()

boundary_samples = 100
interior_samples = boundary_samples*assets*2
initial_samples = boundary_samples*assets*2

# We create new samples
dataset = SampledDataset(
    params, interior_samples, initial_samples, boundary_samples, sampler, dtype, device, seed=0)

optimizer = SSBroyden(
    model.parameters(),
    max_iter=2000,)

batch_size = len(dataset) # we use all samples

# # Set the training function
closure = DimlessBS()\
    .with_dataset(dataset, loader_opts={'batch_size': batch_size, "shuffle": True, "pin_memory": True})\
    .with_model(model)\
    .with_device(device)\
    .with_dtype(dtype)

trainer = PINNTrainer()\
    .with_optimizer(optimizer)\
    .with_device(device)\
    .with_dtype(dtype)\
    .with_training_step(closure)\

closure = closure.with_dataset(
    dataset, loader_opts={'batch_size': batch_size, "shuffle": False, "pin_memory": True})

trainer = trainer.with_optimizer(optimizer).with_training_step(closure)
trainer.train()

ssbroyden_state = closure.get_state()
plot_loss(ssbroyden_state, smooth=False)

SSBroyden training:   0%|          | 0/2000 [00:00<?, ?it/s]

SSBroyden training:  30%|██▉       | 590/2000 [09:57<23:48,  1.01s/it, Interior=0.005699, Boundary=0.055761, Initial=0.119629, Total=0.181090, Max Error=62.768005, L2 Error=0.096544] 


In [18]:
ss_broyden_l2 = compare_with_mc(model, params, n_prices=200,
                          n_simulations=10_000, dtype=dtype, device=device, seed=42)['l2_rel_error']
print("L2 Error (%): ", ss_broyden_l2*100)

L2 Error (%):  6.6149383


In [19]:
compare_loss_histories(
    [bfgs_state,lbfgs_state, ssbroyden_state],
    ["BFGS", "L-BFGS", "Self-Scaled-Broyden"],
    smooth=False,
)