# Active Learning Deep Gaussian Process Regression Monte Carlo Simulation

https://docs.gpytorch.ai/en/latest/examples/01_Exact_GPs/Simple_GP_Regression.html

https://docs.gpytorch.ai/en/stable/examples/06_PyTorch_NN_Integration_DKL/KISSGP_Deep_Kernel_Regression_CUDA.html

In [1]:
import numpy as np
import torch
from scipy.special import ndtri
import random

from core.SNDGPR.train import train_model
from core.MCS import MC_sampling_plan, MC_prediction, estimate_Pf
from core.bay_opt.K_fold_train import kfold_train
from core.bay_opt.bayesian_optimization import bayesian_optimization
from core.bay_opt.optimization_variables import optimization_variables

from core.utils import load_core_modules, load_example_modules, sample_info, print_info, \
    save_bests, plot_losses, min_max_normalization, save_x_added, evaluate_g, \
    pickle_save, pickle_load, results_plot, results_print

In [2]:
# Set the seed for reproducibility
SEED = 42

# For NumPy
np.random.seed(SEED)

# For PyTorch
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)  # if you are using multi-GPU

# For Python's built-in random module
random.seed(SEED)

# Ensuring reproducibility in cuDNN using PyTorch
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

### Set parameters

In [3]:
N_MC = 1e6  # number of points for Monte Carlo
TRAINING_ITERATIONS = 1000  # epochs for training the DGPR
LEARNING_RATE = 0.01  # learning rate for training the model with Adam
VALIDATION_SPLIT = 6  # K-Fold split (K = VALIDATION_SPLIT)
N = 48  # initial number of points
N_INFILL = 12  # number of infill points
ALPHA = 0.05  # alpha for confidence bounds
SPECTRAL_NORMALIZATION = True
EXAMPLE = 'collapse_simulation'  # example to run
SAMPLING_PLAN_STRATEGY = 'LHS'  # initial sampling plan generation strategy
LEARNING_FUNCTION = 'U'  # learning function
CONVERGENCE_FUNCTION = 'stop_Pf'  # stopping criteria

Set parameters of Bayesian Optimization

In [4]:
N_INITIAL_EGO = 25
N_INFILL_EGO = 15

DIM_EGO = 3  # number of hyperparameters to optimize
TRAINING_ITERATIONS_EGO = 10000
BOUNDS_BSA = (
    tuple((0, 1) for _ in range(DIM_EGO))  # the number of (0, 1) tuples has to be equal to DIM
    )
BSA_POPSIZE = 20
BSA_EPOCH = 200
# Adjust BSA population size and epochs based on the size of your search space,
# defined by BOUNDS_BAY_OPT.
# Make BSA_POPSIZE * BSA_EPOCH much larger than your search space.
BOUNDS_BAY_OPT = [[1, 5], [2, 10], [0, 4]]  # L, r, act_fun
# For more activation functions, check optimization_variables.py
# and adjust the nonlinearity of the weight initialization in SNDGPR.py

Load random variables and limit state function dynamically

In [5]:
RVs, limit_state_function = load_example_modules(EXAMPLE)

Load core functions dynamically

In [None]:
initial_sampling_plan, learning_function, evaluate_lf, convergence_function = load_core_modules(
    SAMPLING_PLAN_STRATEGY, LEARNING_FUNCTION, CONVERGENCE_FUNCTION
)

In [7]:
Params = dict()
Params['RVs'] = RVs
Params['N_MC'] = N_MC
Params['training_iterations'] = TRAINING_ITERATIONS
Params['N'] = N
Params['N_added'] = N_INFILL
Params['alpha'] = ALPHA
Params['limit_state_function'] = EXAMPLE
Params['initial_sampling_plan'] = SAMPLING_PLAN_STRATEGY
Params['learning_function'] = LEARNING_FUNCTION
Params['convergence_function'] = CONVERGENCE_FUNCTION
Params['seed'] = SEED

### Sample points

In [None]:
x = initial_sampling_plan(N, RVs, SEED)

In [19]:
x_candidate = MC_sampling_plan(N_MC, RVs)

In [None]:
sample_info(RVs, [SAMPLING_PLAN_STRATEGY, 'Monte Carlo'], [x, x_candidate])

## Active learning

In [None]:
data_dim = x.shape[1]
estimate_Pf_all = []
estimate_Pf_allp = []
estimate_Pf_allm = []
estimate_N_samples_added = []
N_samples_added_total = 0
converged = False
it = 0

In [None]:
while True:
    print(f'\nIteration {it}')
    
    if it == 0:
        f_EGO, x_EGO, model, likelihood, train_losses, val_losses, train_x, val_x, train_g, val_g, x_max, x_min \
            = bayesian_optimization(
                x, N_INITIAL_EGO, N_INFILL_EGO, DIM_EGO, TRAINING_ITERATIONS_EGO, BOUNDS_BSA, BSA_POPSIZE, BSA_EPOCH, SEED, \
                TRAINING_ITERATIONS, BOUNDS_BAY_OPT, SPECTRAL_NORMALIZATION
                )
    else:
        layer_sizes, act_fun = optimization_variables(BOUNDS_BAY_OPT, x_EGO[torch.argmin(f_EGO), :], x, SPECTRAL_NORMALIZATION)
        
        _, _, model, likelihood, train_losses, val_losses, train_x, val_x, train_g, val_g, x_max, x_min, fold = kfold_train(
            x, g, x_candidate, TRAINING_ITERATIONS, LEARNING_RATE, layer_sizes, act_fun, train_model, MC_prediction, evaluate_lf, estimate_Pf, learning_function,
                N, N_MC, ALPHA, SPECTRAL_NORMALIZATION, n_splits=VALIDATION_SPLIT, SEED=SEED
            )
    
    # Save variables and plot loss
    save_bests(model, likelihood, train_losses, val_losses, x, train_x, val_x, train_g, val_g,
               x_EGO, f_EGO, x_max, x_min, it, BOUNDS_BAY_OPT, SPECTRAL_NORMALIZATION, EXAMPLE)
    plot_losses(train_losses, val_losses, it)
    
    # Predict MC responses (only the sample which are not contained in the Kriging yet)
    x_candidate_normalized = min_max_normalization(x_max, x_min, x_candidate)
    preds = MC_prediction(model, likelihood, x_candidate_normalized)
    
    # Evaluate learning function
    g_mean, gs, ind_lf = evaluate_lf(preds, learning_function)
    
    # Select additional sample (the sample which maximizes the learning function value)
    x_added = x_candidate[ind_lf, :]
    x_added = x_added.view(1, data_dim)
    # x_added = x_added * (x_max - x_min) + x_min  # undo normalization
    
    save_x_added(x_added, it, EXAMPLE)  # Salve o array do NumPy em um arquivo .mat
    
    # Estimate Pf
    Pf, Pf_plus, Pf_minus = estimate_Pf(g, g_mean, gs, N, N_MC, ALPHA)
    
    estimate_Pf_all.append(Pf)
    estimate_Pf_allp.append(Pf_plus)
    estimate_Pf_allm.append(Pf_minus)
    estimate_N_samples_added.append(N_samples_added_total)
    
    # Print some info
    print_info(N, N_INFILL, it, Pf, Pf_plus, Pf_minus)
    
    # Check if maximum number of points were added
    if N_samples_added_total >= N_INFILL: break
    it += 1
    
    # Convergence criterion
    if converged and N_samples_added_total != 0:
        if convergence_function(g, g_mean, gs, N, N_MC): break
        converged = False
    else:
        converged = convergence_function(g, g_mean, gs, N, N_MC)
    
    g_added = evaluate_g(x_added, it, limit_state_function, EXAMPLE)
    
    x = torch.cat((x, x_added), 0)
    g = torch.cat((g, g_added), 0)
    x_candidate = torch.cat((x_candidate[:ind_lf], x_candidate[ind_lf+1:]))
    N_samples_added_total = N_samples_added_total + 1

## Store results

In [38]:
# Store results
# Estimate failure probability
estimate_Pf_0 = (torch.sum(g_mean <= 0) + torch.sum(g[N+1:] <= 0))/N_MC

# Estimate the covariance
estimate_CoV = torch.sqrt((1-estimate_Pf_0) / estimate_Pf_0 / N_MC)

# Store the results
Results = {
    'Pf': estimate_Pf_0,
    'Beta': -ndtri(estimate_Pf_0),
    'CoV': estimate_CoV,
    'Model_Evaluations': N_samples_added_total + N,
    'Pf_CI': estimate_Pf_0 * np.array([
        1 + ndtri(ALPHA/2)*estimate_CoV,
        1 + ndtri(1-ALPHA/2)*estimate_CoV
        ]),
    }
Results['Beta_CI'] = torch.flip(-ndtri(Results['Pf_CI']), [0])

History = {
    'Pf': estimate_Pf_all,
    'Pf_Upper': estimate_Pf_allp,
    'Pf_Lower': estimate_Pf_allm,
    'N_Samples': estimate_N_samples_added,
    'N_Init': N,
    'X': x,
    'G': g,
    'MC_Sample': x_candidate,
}

## SAVE

In [40]:
pickle_save(Results, History, Params, EXAMPLE)

## Display results and plot

In [None]:
results_print(Results, History, Params)

In [None]:
results_plot(Results, History, Params, EXAMPLE)

## LOAD

In [44]:
# Results, History, Params = pickle_load(EXAMPLE)