In [1]:
import math
import torch
import numpy as np
import gpytorch
import pandas as pd
from matplotlib import pyplot as plt
import random
from scipy.stats import norm
from scipy.optimize import minimize
import botorch

  from .autonotebook import tqdm as notebook_tqdm


**Define classification model class**

In [2]:
# define classification model using gpytorch
#class definition
class DirichletGPModel(gpytorch.models.ExactGP):
    def __init__(self, train_x, train_y, likelihood, num_classes):
        super(DirichletGPModel, self).__init__(train_x, train_y, likelihood)
        self.mean_module = gpytorch.means.ConstantMean(batch_shape=torch.Size((num_classes,)))
        self.covar_module = gpytorch.kernels.ScaleKernel(
            gpytorch.kernels.RBFKernel(batch_shape=torch.Size((num_classes,))),
            batch_shape=torch.Size((num_classes,)),
        )

    def forward(self, x):
        mean_x = self.mean_module(x)
        covar_x = self.covar_module(x)
        return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)

# function to optimize parameters of the classification GP - 
def train_cls_gp(model, likelihood, train_x, training_iter):
   # Find optimal model hyperparameters
    model.train()
    likelihood.train()

    # Use the adam optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=0.1)  # Includes GaussianLikelihood parameters

    # "Loss" for GPs - the marginal log likelihood
    mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

    for i in range(training_iter):
        # Zero gradients from previous iteration
        optimizer.zero_grad()
        # Output from model
        output = model(train_x)
        # Calc loss and backprop gradients
        loss = -mll(output, likelihood.transformed_targets).sum()
        loss.backward()
        optimizer.step()

    return model, likelihood


**Read and normalize training data**

In [3]:
filename = r'../data/olhs_run1.xlsx'
x_pd = pd.read_excel(filename, sheet_name='Initial Design (OLHS)', header=[0,1], index_col=[0])
y_pd = pd.read_excel(filename, sheet_name='bo_data', header=[0,1], index_col=[0])

#normalizing the inputs - Standard Normalize
xmeans = x_pd.mean(axis=0)
xstddv = x_pd.std(axis=0)
# x_pd_normal = (x_pd - xmeans)/xstddv

# x_pd = x_pd_normal

xmins = x_pd.min(axis=0)
xmaxs = x_pd.max(axis=0)
x_pd_normal = (x_pd - xmins) / (xmaxs - xmins)
x_pd = x_pd_normal


#normalize objective value data
y_obj_pd = y_pd.iloc[:, [0,1,2]]
ymeans = y_obj_pd.mean(axis=0)
ystddv = y_obj_pd.std(axis=0)
y_obj_pd_normal = (y_obj_pd - ymeans) / ystddv

y_pd.iloc[:, [0,1,2]] = y_obj_pd_normal

In [5]:
objective_properties = ['Polymer Solubility', 'Gelation Enthalpy', 'Shear Modulus']
# objective_properties = ['Polymer Solubility', 'Gelation Enthalpy']
scaling = {'ymeans': ymeans[objective_properties],
           'ystddv': ystddv[objective_properties],
           'xmeans': xmeans,
           'xstddv': xstddv}

In [6]:
dtype=torch.double

validation_idx = [1,7,15]

train_x_pd = x_pd.drop(validation_idx)
train_y_pd = y_pd.drop(validation_idx)

#make torch tensors
train_x = torch.tensor(train_x_pd.values, dtype=dtype)
train_y1 = torch.tensor(train_y_pd['Polymer Solubility', 'mg/mL'].values, dtype=dtype).unsqueeze(1)
train_y2 = torch.tensor(train_y_pd['Gelation Enthalpy', 'J/g'].values, dtype=dtype).unsqueeze(1)
train_y3 = torch.tensor(train_y_pd['Shear Modulus', 'Kpa'].values, dtype=dtype).unsqueeze(1)
train_y4 = torch.tensor(train_y_pd['Manufacturability', '--'].values, dtype=torch.long).squeeze()

In [7]:
test_x = torch.tensor(x_pd.values, dtype=torch.float)
test_y = torch.tensor(y_pd.values, dtype=torch.float).squeeze()

In [8]:
#define classification model 
cls_likl = gpytorch.likelihoods.DirichletClassificationLikelihood(train_y4, learn_additional_noise=False, alpha_epsilon=1e-4)
cls_model = DirichletGPModel(train_x, cls_likl.transformed_targets, cls_likl, num_classes=cls_likl.num_classes)