# Testing Quasi-Newton Optimizers
#### Author: JP Melo

### Imports

In [9]:
from derpinns.nn import *
from derpinns.utils import *
from derpinns.trainer import *
import torch

## Parameters

In [None]:
# Fix seed for reproducibility
torch.manual_seed(0)
np.random.seed(0)

# Global parameters
assets = 3
sampler = "pseudo"               # ["pseudo", "LHS", "Halton", "Hammersley", "Sobol"]:
nn_shape = "10x3"               # n_assets input layer + 64 neurons, 3 hidden layers + 1 output layer
device = torch.device("cpu")    # cpu, cuda or mps
dtype = torch.float32

# Define option valuation params
params = OptionParameters(
    n_assets=assets,
    tau=1.0,
    sigma=np.array([0.2] * assets),
    rho=np.eye(assets) + 0.25 * (np.ones((assets, assets)) - np.eye(assets)),
    r=0.05,
    strike=100,
    payoff=payoff
)

# Build the net to be used
model = build_nn(
    nn_shape=nn_shape,
    input_dim=assets,
    dtype=torch.float32
).apply(weights_init).to(device)
model.train()

NN(
  (hidden_layers): Sequential(
    (0): Linear(in_features=4, out_features=10, bias=True)
    (1): Tanh()
    (2): Linear(in_features=10, out_features=10, bias=True)
    (3): Tanh()
    (4): Linear(in_features=10, out_features=10, bias=True)
    (5): Tanh()
  )
  (output_layer): Linear(in_features=10, out_features=1, bias=True)
)

## Training

### BFGS Training

In [None]:
model = build_nn(
    nn_shape=nn_shape,
    input_dim=assets,
    dtype=torch.float32
).apply(weights_init).to(device)
model.train()

boundary_samples = 100
interior_samples = boundary_samples*assets*2
initial_samples = boundary_samples*assets*2

# We create new samples
dataset = SampledDataset(
    params, interior_samples, initial_samples, boundary_samples, sampler, dtype, device, seed=0)

optimizer = BFGS(
    model.parameters(),
    max_iter=1000)

batch_size = len(dataset) # we use all samples

# # Set the training function
closure = DimlessBS()\
    .with_dataset(dataset, loader_opts={'batch_size': batch_size, "shuffle": True, "pin_memory": True})\
    .with_model(model)\
    .with_device(device)\
    .with_dtype(dtype)

trainer = PINNTrainer()\
    .with_optimizer(optimizer)\
    .with_device(device)\
    .with_dtype(dtype)\
    .with_training_step(closure)\

closure = closure.with_dataset(
    dataset, loader_opts={'batch_size': batch_size, "shuffle": False, "pin_memory": True})

trainer = trainer.with_optimizer(optimizer).with_training_step(closure)
trainer.train()

bfgs_state = closure.get_state()
plot_loss(bfgs_state, smooth=False)

BFGS training: 100%|█████████▉| 999/1000 [1:01:22<00:03,  3.69s/it, Interior=0.000274, Boundary=0.000553, Initial=0.001064, Total=0.001891, Max Error=6.779968, L2 Error=0.010827]  


In [12]:
bfgs_l2 = compare_with_mc(model, params, n_prices=200,
                          n_simulations=10_000, dtype=dtype, device=device, seed=42)['l2_rel_error']
print("L2 Error (%): ", bfgs_l2*100)

L2 Error (%):  1.2867188


## L-BFGS Training

In [13]:
model = build_nn(
    nn_shape=nn_shape,
    input_dim=assets,
    dtype=torch.float32
).apply(weights_init).to(device)
model.train()

boundary_samples = 500
interior_samples = boundary_samples*assets*2
initial_samples = boundary_samples*assets*2

# We create new samples
dataset = SampledDataset(
    params, interior_samples, initial_samples, boundary_samples, sampler, dtype, device, seed=0)

optimizer = LBFGS(
    model.parameters(),
    max_iter=1000,
    max_eval=1000,)

batch_size = len(dataset) # we use all samples

# # Set the training function
closure = DimlessBS()\
    .with_dataset(dataset, loader_opts={'batch_size': batch_size, "shuffle": True, "pin_memory": True})\
    .with_model(model)\
    .with_device(device)\
    .with_dtype(dtype)

trainer = PINNTrainer()\
    .with_optimizer(optimizer)\
    .with_device(device)\
    .with_dtype(dtype)\
    .with_training_step(closure)\

closure = closure.with_dataset(
    dataset, loader_opts={'batch_size': batch_size, "shuffle": False, "pin_memory": True})

trainer = trainer.with_optimizer(optimizer).with_training_step(closure)
trainer.train()

lbfgs_state = closure.get_state()
plot_loss(lbfgs_state, smooth=False)

LBFGS training:   0%|          | 0/1000 [00:00<?, ?it/s]

LBFGS training: 100%|█████████▉| 999/1000 [50:44<00:03,  3.05s/it, Interior=0.000507, Boundary=0.000860, Initial=0.002224, Total=0.003590, Max Error=11.087769, L2 Error=0.013641]    


In [14]:
compare_loss_histories(
    [bfgs_state,lbfgs_state],
    ["BFGS", "L-BFGS"],
    smooth=False,
)