# To Balance Or Not To Balance: Testing ReLoBRalo
#### Author: JP Melo

In this file we asses training performance using loss balancing methods. In particular, we test the ideas showns in [this article](https://arxiv.org/abs/2110.09813) (ReLoBRaLo).

### Imports

In [1]:
from derpinns.nn import *
from derpinns.utils import *
from derpinns.trainer import *
import torch
import kfac

  from kfac.distributed import get_rank


## Parameters

In [2]:
# Fix seed for reproducibility
torch.manual_seed(0)
np.random.seed(0)

# Global parameters
assets = 1

sampler = "pseudo"               
nn_shape = "64x3"               
device = torch.device("cpu") 
dtype = torch.float32

# Define option valuation params
params = OptionParameters(
    n_assets=assets,
    tau=1.0,
    sigma=np.array([0.2] * assets),
    rho=np.eye(assets) + 0.25 * (np.ones((assets, assets)) - np.eye(assets)),
    r=0.05,
    strike=100,
    payoff=payoff
)

# Create dataset to traing over
batch_size = 100
total_iter = 5_000
boundary_samples = 20_000
interior_samples = boundary_samples*assets*2
initial_samples = boundary_samples*assets*2

dataset = SampledDataset(
    params, interior_samples, initial_samples, boundary_samples, sampler, dtype, device, seed=0)

## Training

We train the same model arquitecture with and without loss balancing. This technique is implemented inside the closure ```LossBalancingDimlessBS```.

### With Loss Balancing

In [None]:
# Build the net to be used
model = build_nn(
    nn_shape=nn_shape,
    input_dim=assets,
    dtype=torch.float32
).apply(weights_init).to(device)

# we use the same optimizer for both cases
#optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
optimizer = SSBroyden(model.parameters(), max_iter=1000)
model.train()

#closure = LossBalancingDimlessBS(alpha=torch.tensor(0.90),tau=torch.tensor(1), rho_prob=0.99)\
closure = MultiBalanceDimlessBS(mode='RELOBRALO', alpha=1e-2)\
    .with_dataset(dataset, loader_opts={'batch_size': batch_size, "shuffle": True, "pin_memory": True})\
    .with_model(model)\
    .with_device(device)\
    .with_dtype(dtype)

trainer = PINNTrainer()\
    .with_optimizer(optimizer)\
    .with_device(device)\
    .with_dtype(dtype)\
    .with_training_step(closure)\
    .with_epochs(total_iter)\

trainer.train()

SSBroyden training:   0%|          | 1/1000 [00:01<17:49,  1.07s/it, Interior=0.084294, Boundary=0.100329, Initial=5.427269, Total=5.611893, Max Error=234.569397, L2 Error=1.694567]

Loss change is below tolerance, returning loss: 5.295868396759033





In [4]:
with_lb_state = trainer.closure.get_state()
plot_loss(with_lb_state, smooth=True, smooth_window=10)

with_lb_results = compare_with_mc(model, params, n_prices=200,
                          n_simulations=10_000, dtype=dtype, device=device, seed=42)['l2_rel_error']
print("L2 Error: ", with_lb_results*100)

IndexError: index 0 is out of bounds for axis 0 with size 0

## Without Loss Balancing

In [None]:
# Build the net to be used
model = build_nn(
    nn_shape=nn_shape,
    input_dim=assets,
    dtype=torch.float32
).apply(weights_init).to(device)

# we use the same optimizer for both cases
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

model.train()

closure = DimlessBS()\
    .with_dataset(dataset, loader_opts={'batch_size': batch_size, "shuffle": True, "pin_memory": True})\
    .with_model(model)\
    .with_device(device)\
    .with_dtype(dtype)

trainer = PINNTrainer()\
    .with_optimizer(optimizer)\
    .with_device(device)\
    .with_dtype(dtype)\
    .with_training_step(closure)\
    .with_epochs(total_iter)\

trainer.train()

Adam training:   0%|          | 20/5000 [00:04<19:10,  4.33it/s, Interior=0.032240, Boundary=0.007304, Initial=0.656010, Total=0.695554, Max Error=75.2324218750, L2 Error=0.5039798021]

In [None]:
without_lb_state = trainer.closure.get_state()
plot_loss(without_lb_state, smooth=True, smooth_window=10)

without_lb_results = compare_with_mc(model, params, n_prices=200,
                          n_simulations=10_000, dtype=dtype, device=device, seed=42)['l2_rel_error']
print("L2 Error: ", without_lb_results*100)

6.2328877/3.9504097

### Compare both runs

In [None]:
compare_loss_histories(
    [with_lb_state,without_lb_state],
    ["With Loss Balancing", "Without Loss Balancing"],
    smooth=True,
    smooth_window=50,
)

Comments: No significant improvements can be seen in this particular use case.