# Quick start

The following code provides a basic template for a pipeline that does model selection pipeline. Results are stored with *weight and bias*. 

In [4]:
import inferlylens as infly
import gpflow
import numpy as np
import pandas as pd

from plotly.subplots import make_subplots
from inferlycore.models.kernels import kernel_factories, base_kernels
from inferlycore.data.transform import unit_cube_rescaling, standardise_rescaling
from inferlycore.models.diagnostics import plot_slices

file_name = '../datasets/banana.parquet'
inputs = ['x1', 'x2']
outputs = ['y']
seed = 44

use_wandb = False

anova_factory = kernel_factories['anova']
keys = ["anova" + "_" + name_kern for name_kern in base_kernels]
vals = [anova_factory(kern, len(inputs)) for kern in base_kernels.values()]
kernel_library = dict(zip(keys, vals))

if use_wandb:
    import wandb

for kern_name, kern in kernel_library.items():
    kern = kernel_library[kern_name]
    print(kern_name, kern)

    ## load data
    df = pd.read_parquet(file_name)

    dataset = infly.data.Dataset(df, inputs, outputs)
    trainset, testset = infly.data.train_test_split(dataset, n_train=1000, random_state=seed)
    testset, _ = infly.data.train_test_split(testset, n_train=500, random_state=seed)

    ## fit a GP model
    X = trainset.data[inputs]
    Y = trainset.data[outputs]

    # create transforms
    input_transform = unit_cube_rescaling(X)
    output_transform = standardise_rescaling(Y)

    # create model
    kernel = kern
    gpr = gpflow.models.GPR((input_transform.forward(X), output_transform.forward(Y)), kernel)
    gpr.likelihood.variance.assign(1e-4)
    gpmodel = infly.models.GPmodel(gpr, input_transform, output_transform)

    # optimize
    opt = gpflow.optimizers.Scipy()
    opt_logs = opt.minimize(gpmodel.model.training_loss, gpmodel.model.trainable_variables, options=dict(maxiter=200))
    print(opt_logs)


    fig = make_subplots(rows=1, cols=8, subplot_titles=inputs)
    plot_slices(gpmodel, fig, reference_point=np.zeros((1, 2)), xlim=np.array([[0, 1]]*2).T)
    #for name, param in gpflow.utilities.parameter_dict(gpr).items():
    #    wandb.log({name[1:]: param.numpy()})

    # compute metrics
    Xtest, Ytest = testset.data[inputs].values, testset.data[outputs].values
    rmse = infly.models.rmse(gpmodel, [Xtest, Ytest])
    nlpd = infly.models.nlpd(gpmodel, [Xtest, Ytest])

    print("rmse", rmse)
    print("nlpd", nlpd)

    if use_wandb:
        cfg = {
            "data_file": file_name,
            "inputs": inputs,
            "outputs": outputs,
            "kernel": kern_name,
            "seed": seed
        }

        wandb.init(
            #mode="disabled",
            project = "example-pipeline-" + '_'.join(outputs) + '_vs_' + '_'.join(inputs),
            name = kern_name + '_seed' + str(cfg['seed']),
            notes = "run generated as part of inferlycore's documentation",
            config = cfg
        )
    
        html = gpflow.utilities.tabulate_module_summary(gpmodel.model, "html")
        wandb.log({"model summary post-train": wandb.Html(html)})
        
        wandb.log({"plot prediction slices": fig})

        wandb.log({"rmse": rmse})
        wandb.log({"nlpd": nlpd})

        wandb.finish()

anova_Matern12 <gpflow.kernels.base.Product object at 0x7f40a8570d60>
  message: CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH
  success: True
   status: 0
      fun: 904.5282742034594
        x: [-1.640e+00 -4.000e+00 -1.896e+01 -1.431e+00  2.744e+01
            -6.746e+01 -1.276e+00]
      nit: 80
      jac: [ 6.661e-03 -1.447e-02  5.380e-06  3.697e-03 -5.320e-04
             3.296e-30 -8.512e-03]
     nfev: 96
     njev: 96
 hess_inv: <7x7 LbfgsInvHessProduct with dtype=float64>
rmse 0.26424429979369696
nlpd 0.08980575478280547
anova_Matern32 <gpflow.kernels.base.Product object at 0x7f40a85739a0>
  message: CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH
  success: True
   status: 0
      fun: 885.1905956673813
        x: [-1.730e+00 -1.862e+00 -1.562e+01 -1.706e+00  5.040e+00
            -1.903e+01 -1.118e+00]
      nit: 53
      jac: [-1.391e-03 -7.815e-03  7.872e-06  7.196e-03 -1.651e-03
             1.028e-08  2.066e-02]
     nfev: 66
     njev: 66
 hess_inv: <7x7 LbfgsInvHess

All done!

In [5]:
m = gpmodel.model


In [7]:
m.kernel.kernels

ListWrapper([<gpflow.kernels.base.Sum object at 0x7f40ec576680>, <gpflow.kernels.base.Sum object at 0x7f40ec575b40>])

In [9]:
m.kernel.name

'product'