In [1]:
import numpy as np

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF

from dscribe.descriptors import SineMatrix

from autocat.surface import generate_surface_structures
from autocat.adsorption import generate_adsorbed_structures

from autocat.utils import extract_structures

from autocat.learning.predictors import Predictor

In this example we show how to train a `Predictor` and use it to make predictions for adsorbates on Pt.

In [2]:
# Generate adsorption structures
substrates_dictionary = generate_surface_structures(
    species_list=["Pt"],
    facets={"Pt":["100"]}
)

substrate = extract_structures(substrates_dictionary)[0]

adsorbed_dictionary = generate_adsorbed_structures(
    surface=substrate,
    adsorbates=["H", "O", "N", "C", "Na"],
    use_all_sites=False,
)

adsorbed_structures = extract_structures(adsorbed_dictionary)

In [3]:
# Generate labels
# N.B. here they are random for convenience, but should be actual values to train a meaningful `Predictor`

labels = np.random.randint(-10,10,size=len(adsorbed_structures))

In [4]:
kernel = RBF(0.5)

predictor = Predictor(
    model_class=GaussianProcessRegressor,
    model_kwargs={"kernel": kernel},
    featurizer_class=SineMatrix,
    featurization_kwargs={"design_space_structures": adsorbed_structures}
)

print(predictor)

+---------+--------------------------------------------------------+
|         |                       Predictor                        |
+---------+--------------------------------------------------------+
|  class  | sklearn.gaussian_process._gpr.GaussianProcessRegressor |
|  kwargs |           {'kernel': RBF(length_scale=0.5)}            |
| is fit? |                         False                          |
+---------+--------------------------------------------------------+
+-----------------------------------+-------------------------------------------+
|                                   |                 Featurizer                |
+-----------------------------------+-------------------------------------------+
|               class               | dscribe.descriptors.sinematrix.SineMatrix |
|               kwargs              |                    None                   |
|            species list           |      ['Na', 'Pt', 'C', 'N', 'O', 'H']     |
|       maximum structure

In [5]:
predictor.fit(
    training_structures=adsorbed_structures,
    y=labels
)

print(predictor.is_fit)

True


In [6]:
test_dictionary = generate_adsorbed_structures(
    surface=substrate,
    adsorbates=["S", "Li", "P"],
    use_all_sites=False
)

test_structures = extract_structures(test_dictionary)

In [7]:
# Make predictions on unseen data
predictions, uncertainties = predictor.predict(testing_structures=test_structures)
print(predictions.shape)
print(uncertainties.shape)

(3,)
(3,)
