In [None]:
import torch
from torch import nn
from time import perf_counter_ns
import copy
import pickle
import gc
from simple_network import SimpleNetwork

Setting the options_case flag to True uses input parameter counts which correspond to the parameter count in the following options pricing models:

* 8: Heston model with no dividend yield: $P_{8} = \{\kappa, \theta, \sigma, \rho, v_{0}, K, \tau, r \}$;
* 9: Heston model with continuous dividend yield: $P_{9} = P_{8} \cup \{ q \}$;
* 11: Bates model: $P_{9} \cup \{\lambda_{v}, \mu_{v}\}$;
* 14: Duffie model with correlated jumps only (SVJJ): $P_{14} = P_{9} \cup \{\lambda_{c}, \mu_{cv}, \mu_{cy}, s_{cy}, \rho_{j} \}$;
* 19: Full Duffie model: $P_{19} = P_{14} \cup \{\lambda_{y}, \mu_{y}, s_{y}, \lambda_{v}, \mu_{v} \}$.

If the options_case flag is set to False, a more generic performance analysis is performed for 16, 32 and 128 input parameters, respectively.

In [None]:
num_eval = 10000
options_case = True
device = torch.device('cuda') # alternatively torch.device('cpu')

if options_case:
    params_in = [8, 9, 11, 14, 19]
    suffix = 'options'
else:
    params_in = [16, 32, 128]
    suffix = 'generic'

layer_sizes_jac = [
    # [32, 32, 32, 32, 1],
    # [64, 64, 64, 64, 1],
    # [128, 128, 128, 128, 1],
    # [256, 256, 256, 256, 1],
    [32, 32, 32, 32, 4],
    [64, 64, 64, 64, 4],
    [128, 128, 128, 128, 4],
    [256, 256, 256, 256, 4],
    [32, 32, 32, 32, 16],
    [64, 64, 64, 64, 16],
    [128, 128, 128, 128, 16],
    [256, 256, 256, 256, 16]
]
layer_sizes_hess = [
    [32, 32, 1],
    [64, 64, 1],
    [128, 128, 1],
    [256, 256, 1]
]
    
act_funs = [nn.Sigmoid, nn.Tanh, nn.Sigmoid, nn.Tanh, nn.Sigmoid]

### Comparison of the Jacobian calculations

In [None]:
def run_jacobian_calculations(num_eval, params_in, layer_sizes, act_funs, calc_deriv, device):
    jacobians = torch.zeros(size=(num_eval, layer_sizes[-1], params_in), device=device)
    times = torch.zeros(size=(num_eval,))
    cuda_runs = 1
    for seed_idx in range(num_eval):
        torch.manual_seed(seed_idx)
        x = torch.rand(params_in, device=device).float().requires_grad_(not calc_deriv)
        model = SimpleNetwork(params_in, copy.deepcopy(layer_sizes), act_funs, calc_deriv, device).to(device)
        if not calc_deriv:
            if device == torch.device('cuda'):
                jac = torch.autograd.functional.jacobian(model, x) # warmup
                torch.cuda.synchronize()
                start = perf_counter_ns()
                for _ in range(cuda_runs):
                    _ = torch.autograd.functional.jacobian(model, x)
                torch.cuda.synchronize()
                end = perf_counter_ns()
                times[seed_idx] = (end - start) / cuda_runs
            else:
                start = perf_counter_ns()
                jac = torch.autograd.functional.jacobian(model, x)
                end = perf_counter_ns()
                times[seed_idx] = end - start
        else:
            with torch.no_grad():
                if device == torch.device('cuda'):
                    jac = model.get_network_jacobian(x) # warmup
                    torch.cuda.synchronize()
                    start = perf_counter_ns()
                    for c_idx in range(cuda_runs):
                        _ = model.get_network_jacobian(x + (c_idx + 1)*1e-10) # s.t. network derivatives are recalculated
                    torch.cuda.synchronize()
                    end = perf_counter_ns()
                    times[seed_idx] = (end - start) / cuda_runs
                else:
                    start = perf_counter_ns()
                    jac = model.get_network_jacobian(x)
                    end = perf_counter_ns()
                    times[seed_idx] = end - start
            model.destroy_model()
        jacobians[seed_idx, :, :] = jac.detach().clone()
        del model, jac, x
        if device == torch.device('cuda'):
            torch.cuda.synchronize()
    return (jacobians, times)

In [None]:
for p_idx, p in enumerate(params_in):
    for l_idx, l in enumerate(layer_sizes_jac):
        gc.disable()
        print(f'Running Jacobian calculations with {p} params in, and layer sizes {l}')
        jacobians_pt, t_jac_pt = run_jacobian_calculations(num_eval, p, l, act_funs[0:len(l)], False, device=device)
        jacobians_mat, t_jac_mat = run_jacobian_calculations(num_eval, p, l, act_funs[0:len(l)], True, device=device)
        jac_abs_diff_max = torch.max(torch.abs(jacobians_mat - jacobians_pt))
        assert jac_abs_diff_max < 1e-7

        print(f'Average time - Jacobian - Pytorch : {torch.mean(t_jac_pt)} nanoseconds')
        print(f'Average time - Jacobian - Matrix : {torch.mean(t_jac_mat)} nanoseconds')
        print(f'Median time - Jacobian - Pytorch : {torch.median(t_jac_pt)} nanoseconds')
        print(f'Median time - Jacobian - Matrix : {torch.median(t_jac_mat)} nanoseconds')
        
        times_diff_jac = {
            'times_pt': t_jac_pt.detach().cpu().numpy(),
            'times_mat': t_jac_mat.detach().cpu().numpy(),
            'max_diff': jac_abs_diff_max.detach().cpu().numpy()
        }
        with open(f'rev_times_{suffix}_{p}_{l[0]}_{l[-1]}_{device.type}.p', 'wb') as file:
            pickle.dump(times_diff_jac, file)

        gc.enable()
        gc.collect()

### Comparison of the Hessian calculations

In [None]:
def run_hessian_calculations(num_eval, params_in, layer_sizes, act_funs, calc_deriv, device):
    hessians = torch.zeros(size=(num_eval, params_in, layer_sizes[-1], params_in), device=device)
    times = torch.zeros(size=(num_eval,))
    cuda_runs = 1
    for seed_idx in range(num_eval):
        torch.manual_seed(seed_idx)
        x = torch.rand(params_in, device=device).float().requires_grad_(not calc_deriv)
        model = SimpleNetwork(params_in, copy.deepcopy(layer_sizes), act_funs, calc_deriv, device).to(device)
        if not calc_deriv:
            if device == torch.device('cuda'):
                hess = torch.autograd.functional.hessian(model, x) # warmup
                torch.cuda.synchronize()
                start = perf_counter_ns()
                for _ in range(cuda_runs):
                    _ = torch.autograd.functional.hessian(model, x)
                end = perf_counter_ns()
                times[seed_idx] = (end - start) / cuda_runs
            else:
                start = perf_counter_ns()
                hess = torch.autograd.functional.hessian(model, x)
                end = perf_counter_ns()
                times[seed_idx] = end - start
        else:
            with torch.no_grad():
                if device == torch.device('cuda'):
                    hess = model.get_network_hessians(x) # warmup
                    torch.cuda.synchronize()
                    start = perf_counter_ns()
                    for c_idx in range(cuda_runs):
                        _ = model.get_network_hessians(x + (c_idx + 1)*1e-10) # s.t. network derivatives are recalculated
                    torch.cuda.synchronize()
                    end = perf_counter_ns()
                    times[seed_idx] = (end - start) / cuda_runs
                else:
                    start = perf_counter_ns()
                    hess = model.get_network_hessians(x)
                    end = perf_counter_ns()
                    times[seed_idx] = end - start
            model.destroy_model()
        hessians[seed_idx, :, :, :] = hess.view(params_in, layer_sizes[-1], params_in).detach().clone()
        del model, hess, x
        if device == torch.device('cuda'):
            torch.cuda.synchronize()
    return (hessians, times)

In [None]:
for p_idx, p in enumerate(params_in):
    for l_idx, l in enumerate(layer_sizes_hess):
        gc.disable()
        print(f'Running Hessian calculations with {p} params in, and layer sizes {l}')
        hessians_pt, t_hess_pt = run_hessian_calculations(num_eval, p, l, act_funs[0:len(l)], False, device=device)
        hessians_mat, t_hess_mat = run_hessian_calculations(num_eval, p, l, act_funs[0:len(l)], True, device=device)
        hess_abs_diff_max = torch.max(torch.abs(hessians_mat - hessians_pt))
        assert hess_abs_diff_max < 1e-7
        
        print(f'Average time - Hessian - Pytorch : {torch.mean(t_hess_pt)} nanoseconds')
        print(f'Average time - Hessian - Matrix : {torch.mean(t_hess_mat)} nanoseconds')
        print(f'Median time - Hessian - Pytorch : {torch.median(t_hess_pt)} nanoseconds')
        print(f'Median time - Hessian - Matrix : {torch.median(t_hess_mat)} nanoseconds')

        times_diff_hess = {
            'times_pt': t_hess_pt.detach().cpu().numpy(),
            'times_mat': t_hess_mat.detach().cpu().numpy(),
            'max_diff': hess_abs_diff_max.detach().cpu().numpy()
        }
        with open(f'rev_times_{suffix}_hess_{p}_{l[0]}_{l[-1]}_{device.type}.p', 'wb') as file:
            pickle.dump(times_diff_hess, file)

        gc.enable()
        gc.collect()