# Do Nets Matter?
#### Author: JP Melo

In this file we explore how different shapes of nets affect performance and accuracy of the model. We will use the same dataset as before, but we will create different nets with different shapes and compare their performance.

### Imports

In [14]:
from derpinns.nn import *
from derpinns.utils import *
from derpinns.trainer import *
import torch
import kfac

## Parameters

In [None]:
# Fix seed for reproducibility
torch.manual_seed(0)
np.random.seed(0)

# Global parameters
assets = 3

sampler = "pseudo"
device = torch.device("cpu")
dtype = torch.float32

# Define option valuation params
params = OptionParameters(
    n_assets=assets,
    tau=1.0,
    sigma=np.array([0.2] * assets),
    rho=np.eye(assets) + 0.25 * (np.ones((assets, assets)) - np.eye(assets)),
    r=0.05,
    strike=100,
    payoff=payoff
)

# Define the number of samples to be used in each training stage

adam_batch_size = 200
adam_total_iter = 200
adam_boundary_samples = 20_000
adam_interior_samples = adam_boundary_samples*assets*2
adam_initial_samples = adam_boundary_samples*assets*2

lbfgs_boundary_samples = 200
lbfgs_interior_samples = lbfgs_boundary_samples*assets*2
lbfgs_initial_samples = lbfgs_boundary_samples*assets*2

## Training

In this case, we use the full training pipeline as the idea is to analize the expressability of the model and not the training process.

### Vanilla NN

In [None]:
nn_shape = "64x2"
model = build_nn(
    nn_shape=nn_shape,
    input_dim=assets,
    dtype=torch.float32
).apply(weights_init).to(device)
model.train()

dataset = SampledDataset(
    params, adam_interior_samples, adam_initial_samples, adam_boundary_samples, sampler, dtype, device, seed=0)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)
preconditioner = kfac.preconditioner.KFACPreconditioner(model)

closure = DimlessBS()\
    .with_dataset(dataset, loader_opts={'batch_size': adam_batch_size, "shuffle": True, "pin_memory": True})\
    .with_model(model)\
    .with_device(device)\
    .with_dtype(dtype)

trainer = PINNTrainer()\
    .with_optimizer(optimizer)\
    .with_device(device)\
    .with_dtype(dtype)\
    .with_training_step(closure)\
    .with_preconditioner(preconditioner)\
    .with_epochs(adam_total_iter)\

# first training stage
trainer.train()

# we create new samples for the second stage
dataset = SampledDataset(
    params, lbfgs_interior_samples, lbfgs_initial_samples, lbfgs_boundary_samples, sampler, dtype, device, seed=0)

optimizer = SSBroyden(
    model.parameters(),
    max_iter=2_000,
)
batch_size = len(dataset)  # we use all samples

closure = closure.with_dataset(
    dataset, loader_opts={'batch_size': batch_size, "shuffle": False, "pin_memory": True})

trainer = trainer.with_optimizer(optimizer).with_training_step(closure)

# second training stage
trainer.train()

Adam training: 100%|██████████| 200/200 [01:36<00:00,  2.07it/s, Interior=0.000533, Boundary=0.001094, Initial=0.000956, Total=0.002583, Max Error=159.1230773926, L2 Error=0.2527151704]
SSBroyden training: 100%|██████████| 2000/2000 [17:10<00:00,  1.94it/s, Interior=0.000008, Boundary=0.000005, Initial=0.000015, Total=0.000028, Max Error=8.701965, L2 Error=0.010926] 


In [17]:
with_vanilla = trainer.closure.get_state()
plot_loss(with_vanilla, smooth=True, smooth_window=50)

vanilla_l2 = compare_with_mc(model, params, n_prices=200,
                             n_simulations=10_000, dtype=dtype, device=device, seed=42)['l2_rel_error']
print("L2 Error: ", vanilla_l2*100)

L2 Error:  2.2215905


## With SPINN

In [24]:
# Build the net to be used
assets = 3
model = SPINN(n_layers=2, input_dim=assets+1, hidden_dim=15,
output_dim=15, dtype=dtype).apply(weights_init).to(device)

dataset = SampledDataset(
    params, adam_interior_samples, adam_initial_samples, adam_boundary_samples, sampler, dtype, device, seed=0)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)
preconditioner = kfac.preconditioner.KFACPreconditioner(model)

closure = DimlessBS()\
    .with_dataset(dataset, loader_opts={'batch_size': adam_batch_size, "shuffle": True, "pin_memory": True})\
    .with_model(model)\
    .with_device(device)\
    .with_dtype(dtype)

trainer = PINNTrainer()\
    .with_optimizer(optimizer)\
    .with_device(device)\
    .with_dtype(dtype)\
    .with_training_step(closure)\
    .with_preconditioner(preconditioner)\
    .with_epochs(adam_total_iter)\

# first training stage
trainer.train()

# we create new samples for the second stage
dataset = SampledDataset(
    params, lbfgs_interior_samples, lbfgs_initial_samples, lbfgs_boundary_samples, sampler, dtype, device, seed=0)

optimizer = SSBroyden(
    model.parameters(),
    max_iter=2_000,
    initial_scale=True,
)

batch_size = len(dataset)  # we use all samples

closure = closure.with_dataset(
    dataset, loader_opts={'batch_size': batch_size, "shuffle": False, "pin_memory": True})

trainer = trainer.with_optimizer(optimizer).with_training_step(closure)

# second training stage
trainer.train()

Adam training: 100%|██████████| 200/200 [01:38<00:00,  2.03it/s, Interior=0.240363, Boundary=1.156865, Initial=147.057205, Total=148.454433, Max Error=1239.1752929688, L2 Error=2.4092340469]
SSBroyden training:  67%|██████▋   | 1349/2000 [18:06<08:44,  1.24it/s, Interior=0.000077, Boundary=0.000130, Initial=0.000611, Total=0.000817, Max Error=13.958313, L2 Error=0.018678]   


In [26]:
with_spinn = trainer.closure.get_state()
plot_loss(with_spinn, smooth=True, smooth_window=50)

spinn_l2 = compare_with_mc(model, params, n_prices=200,
                           n_simulations=10_000, dtype=dtype, device=device, seed=42)['l2_rel_error']
print("L2 Error: ", spinn_l2*100)

L2 Error:  1.507262


## With NN+Ansatz

In [20]:
model = NNWithAnsatz(n_layers=2, input_dim=assets+1, hidden_dim=64,
                 output_dim=1, dtype=dtype).apply(weights_init).to(device)

dataset = SampledDataset(
    params, adam_interior_samples, adam_initial_samples, adam_boundary_samples, sampler, dtype, device, seed=0)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)
preconditioner = kfac.preconditioner.KFACPreconditioner(model)

closure = DimlessBS()\
    .with_dataset(dataset, loader_opts={'batch_size': adam_batch_size, "shuffle": True, "pin_memory": True})\
    .with_model(model)\
    .with_device(device)\
    .with_dtype(dtype)

trainer = PINNTrainer()\
    .with_optimizer(optimizer)\
    .with_device(device)\
    .with_dtype(dtype)\
    .with_training_step(closure)\
    .with_preconditioner(preconditioner)\
    .with_epochs(adam_total_iter)\

# first training stage
trainer.train()

# we create new samples for the second stage
dataset = SampledDataset(
    params, lbfgs_interior_samples, lbfgs_initial_samples, lbfgs_boundary_samples, sampler, dtype, device, seed=0)

optimizer = SSBroyden(
    model.parameters(),
    max_iter=2_000,
)

batch_size = len(dataset)  # we use all samples

closure = closure.with_dataset(
    dataset, loader_opts={'batch_size': batch_size, "shuffle": False, "pin_memory": True})

trainer = trainer.with_optimizer(optimizer).with_training_step(closure)

# second training stage
trainer.train()

Adam training: 100%|██████████| 200/200 [01:31<00:00,  2.18it/s, Interior=0.000607, Boundary=0.001930, Initial=0.001917, Total=0.004454, Max Error=6.6287689209, L2 Error=0.0118496902]
SSBroyden training: 100%|██████████| 2000/2000 [17:27<00:00,  1.91it/s, Interior=0.000001, Boundary=0.000001, Initial=0.000002, Total=0.000004, Max Error=15.702744, L2 Error=0.018892]


In [21]:
with_anzats = trainer.closure.get_state()
plot_loss(with_anzats, smooth=True, smooth_window=50)

anzats_l2 = compare_with_mc(model, params, n_prices=200,
                            n_simulations=10_000, dtype=dtype, device=device, seed=42)['l2_rel_error']
print("L2 Error: ", anzats_l2*100)

L2 Error:  0.64759606


### Compare runs

In [25]:
compare_error_histories(
    [with_vanilla, with_spinn, with_anzats],
    labels=["Vanilla", "SPINN", "NN+Ansatz"],
    smooth=False
)

The neural network with the modifed output (ansatz) is able to get a good accuracy very fast but gets stuck and is not able to progress further. The SPINN model is able to get better accuracy in the long run but it requires more computing.