## Bayesian Optimisation for Antimicrobial Polymer Discovery

In [150]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib import rcParams
import torch
from botorch.models import SingleTaskGP, ModelListGP
from botorch.fit import fit_gpytorch_model
from botorch.utils import standardize
from gpytorch.mlls import ExactMarginalLogLikelihood
from botorch.acquisition.monte_carlo import qExpectedImprovement
from botorch.acquisition.analytic import UpperConfidenceBound, ProbabilityOfImprovement, ExpectedImprovement
from botorch.optim import optimize_acqf
from botorch.cross_validation import gen_loo_cv_folds
import math
import GPy
import pandas as pd

## Retrieve Training dataset and assign variables
To enable mixed search space, utilise MixedSingleTaskGP which uses a special kernel to combine continuous and categorical data.

In [89]:
data = pd.read_csv('modified_data.csv',usecols=[1,2,3,4,5,6,7,23])
data

Unnamed: 0,type_A,type_B1,type_B2,type_C,composition_A,composition_B1,composition_B2,MIC_PAO1_PA
0,Boc-AEAm,PEAm,,HEAm,0.5,0.30,0.00,32
1,Boc-AEAm,PEAm,,HEAm,0.5,0.30,0.00,64
2,Boc-AEAm,PEAm,,HEAm,0.5,0.30,0.00,64
3,Boc-AEAm,PEAm,,,0.7,0.30,0.00,128
4,Boc-AEAm,PEAm,,,0.7,0.30,0.00,64
...,...,...,...,...,...,...,...,...
156,AAPTAC,PEAm,NIPAm,HEAm,0.3,0.00,0.47,128
157,AAPTAC,PEAm,NIPAm,HEAm,0.3,0.70,0.00,128
158,AAPTAC,PEAm,NIPAm,HEAm,0.3,0.47,0.23,128
159,AAPTAC,PEAm,NIPAm,HEAm,0.3,0.23,0.47,128


In [153]:
train_y_one_tensor = torch.tensor(data.iloc[:,7].values,dtype = float)
train_y = torch.reshape(train_y_one_tensor,(len(train_y_one_tensor),1))
train_x= torch.tensor(data.iloc[:,4:7].values)
best_y = min(train_y)
train_x, train_y, best_y

(tensor([[0.5000, 0.3000, 0.0000],
         [0.5000, 0.3000, 0.0000],
         [0.5000, 0.3000, 0.0000],
         [0.7000, 0.3000, 0.0000],
         [0.7000, 0.3000, 0.0000],
         [0.7000, 0.3000, 0.0000],
         [0.5000, 0.3000, 0.0000],
         [0.5000, 0.3000, 0.0000],
         [0.5000, 0.3000, 0.0000],
         [0.5000, 0.3000, 0.0000],
         [0.5000, 0.3000, 0.0000],
         [0.5000, 0.3000, 0.0000],
         [0.7000, 0.3000, 0.0000],
         [0.7000, 0.3000, 0.0000],
         [0.5000, 0.3000, 0.0000],
         [0.5000, 0.3000, 0.0000],
         [0.5000, 0.3000, 0.0000],
         [0.5000, 0.3000, 0.0000],
         [0.5000, 0.3000, 0.0000],
         [0.5000, 0.3000, 0.0000],
         [0.5000, 0.3000, 0.0000],
         [0.5000, 0.3000, 0.0000],
         [0.5000, 0.3000, 0.0000],
         [0.5000, 0.3000, 0.0000],
         [0.5000, 0.3000, 0.0000],
         [0.5000, 0.3000, 0.0000],
         [0.5000, 0.3000, 0.0000],
         [0.5000, 0.3000, 0.0000],
         [0.5000, 0.

## Generate the next point (regardless of the type)

In [158]:
Surrogate = SingleTaskGP(train_X = train_x, train_Y = train_y)
mll = ExactMarginalLogLikelihood(Surrogate.likelihood, Surrogate)
EI = qExpectedImprovement(model = Surrogate, best_f = best_y)
UCB = UpperConfidenceBound(model = Surrogate, beta = 0.2)
new_point_analytic, _ = optimize_acqf(
    acq_function=EI,
    bounds=torch.tensor([[0.0] * 3, [1.0] * 3]),
    q=1,
    num_restarts=20,
    raw_samples=100,
    options={},
)
new_point_analytic

tensor([[0.5851, 0.1811, 0.0589]])