# Active learning framework
Example of the active learning framework

In [1]:
import torch
import numpy as np
import pandas as pd
import metrics
import gp_utils
from botorch.fit import fit_gpytorch_mll
from botorch.acquisition.active_learning import qNegIntegratedPosteriorVariance
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

cpu


  from .autonotebook import tqdm as notebook_tqdm


## Load and split the data
The below section is for Case 3, for Case 1 and 2 use the data split function in data_utils. 

In [2]:
df = pd.read_csv('Zr_grid_interpolated_C009_S006.csv', delimiter=",")
targets_cols = ['qcm_1_mass_rate_[ng/cm2s]', 'qcm_2_mass_rate_[ng/cm2s]', 'qcm_3_mass_rate_[ng/cm2s]']
input_features = ['source_1_set_power_[W]', 'set_pressure_[mTorr]']
df = df[df['set_pressure_[mTorr]'] > 5]
X = df[input_features]
X1 = df[input_features[0]] # power
X2 = df[input_features[1]] # pressure
Y = df[targets_cols[0]]
X_init = np.array([[1, 7], [43, 7], [1, 43], [43, 43], [22, 25]], dtype=float) # [pow, pres], used in Case 3
condition = np.where(X_init[0] == X.values, True, False)
init_idx = [np.where(np.sum(np.where(x == X.values, True, False), axis=1) == 2)[0].item() for x in X_init]
X_train = torch.from_numpy(X_init)
Y_train = torch.from_numpy(Y.values[init_idx]).unsqueeze(-1)
mask = np.ones(X.shape[0], dtype=bool)
mask[init_idx] = False
X_test = torch.from_numpy(X[mask].values).to(torch.float64)
Y_test = torch.from_numpy(Y[mask].values).to(torch.float64).unsqueeze(-1)

## Active learning

In [None]:
acquisition_strategy = 'nipv_smart'
N = 50
scores_df = pd.DataFrame(
    columns=['nmll', 'rmse'], dtype=object)
gp_params = pd.DataFrame(columns=['lenghtscale', 'variance'])

for trial in range(0, 1):
    print('Trial: ', trial)
    X_pending = None
    budget = N
    X_bald = X_train.to(device)
    Y_bald = Y_train.to(device)

    while budget > 0:
        budget -= 1
        print('Budget: ', budget)
        model, mll = gp_utils.defineSTGP(X_bald, Y_bald, device=device)
        mll = fit_gpytorch_mll(mll)

        with torch.no_grad():
            model.eval()
            mll.eval()
            prediction = model(X_test)
            output = model.likelihood(prediction) 

        weights = torch.DoubleTensor([1.0]).to(device)
        scores_df.at[N-budget-1, 'rmse'] = metrics.calculate_rmse(model, X_test, Y_test)
        nmll = -mll(prediction, Y_test.T)
        print('nmll: ', nmll)
        scores_df.at[N-budget-1, 'nmll'] = nmll.sum().item()

        acqf = qNegIntegratedPosteriorVariance(model, X_test)
        with torch.no_grad():
            acq_vals = acqf(X_test.unsqueeze(1))
        best_idx = torch.argmax(acq_vals)  # Index of the best point
        candidate = X_test[best_idx]  # Coordinates of the best point
        best_acq_value = acq_vals[best_idx] 

        candidate = candidate.to(device)
        print(candidate)

        X_bald = torch.cat([X_bald, candidate.unsqueeze(0)])
        Y_bald = torch.cat(
            [Y_bald, Y_test[best_idx].unsqueeze(0)])
        gp_params.loc[N-budget-1] = {
            'lenghtscale': model.state_dict()['covar_module.raw_lengthscale'].squeeze().detach().numpy(),
            'variance': model.state_dict()['likelihood.noise_covar.raw_noise'].detach().item()
        }