In [1]:
import pathlib
import os
import sys

pypath = os.path.join(str(pathlib.Path.cwd().parent.parent))
sys.path.append(pypath)

In [29]:
import json
import math
from typing import Optional

import numpy as np
import pandas as pd

from bofire.benchmarks.benchmark import Benchmark
from bofire.domain import Domain
from bofire.domain.features import (
    CategoricalDescriptorInput,
    CategoricalInput,
    ContinuousInput,
    ContinuousOutput,
    InputFeature,
    InputFeatures,
    OutputFeatures,
)
from bofire.domain.objectives import MaximizeObjective, MinimizeObjective
from bofire.utils.enum import CategoricalEncodingEnum
from bofire.models.feature_importance import permutation_importance_hook, combine_permutation_importances

In [3]:
descriptor_encoding = CategoricalEncodingEnum.DESCRIPTOR

# "residence time in minutes"
input_features = [
            CategoricalDescriptorInput(
                key="catalyst",
                categories=["tBuXPhos", "tBuBrettPhos", "AlPhos"],
                descriptors=["area_cat", "M2_cat"],
                values = [
                    [
                        460.7543,
                        67.2057,
                    ],  # 30.8413, 2.3043, 0], #, 424.64, 421.25040226], "area_cat", "M2_cat", 'M3_cat', 'Macc3_cat', 'Mdon3_cat'] #,'mol_weight', 'sol']
                    [
                        518.8408,
                        89.8738,
                    ],  # 39.4424, 2.5548, 0], #, 487.7, 781.11247064],
                    [
                        819.933,
                        129.0808,
                    ],  # 83.2017, 4.2959, 0], #, 815.06, 880.74916884],
                ]
            ),
            CategoricalDescriptorInput(
                key="base",
                categories=["TEA", "TMG", "BTMG", "DBU"],
                descriptors=["area", "M2"],
                values = [
                    # "area", "M2", 'M3', 'Macc3', 'Mdon3', 'mol_weight', 'sol'
                    [162.2992, 25.8165],  # 40.9469, 3.0278, 0], #101.19, 642.2973283],
                    [
                        165.5447,
                        81.4847,
                    ],  # 107.0287, 10.215, 0.0169], # 115.18, 534.01544123],
                    [
                        227.3523,
                        30.554,
                    ],  # 14.3676, 1.1196, 0.0127], # 171.28, 839.81215],
                    [192.4693, 59.8367],  # 82.0661, 7.42, 0], # 152.24, 1055.82799],
                ],
            ),
            # "base equivalents"
            ContinuousInput(key="base_eq", lower_bound=1.0, upper_bound=2.5),
            # "Reactor temperature in degrees celsius"
            ContinuousInput(key="temperature", lower_bound=30, upper_bound=100.0),
            # "residence time in seconds (s)"
            ContinuousInput(key="t_res", lower_bound=60, upper_bound=1800.0),
        ]

input_preprocessing_specs = {
            "catalyst": descriptor_encoding,
            "base": descriptor_encoding,
        }

# Objectives: yield and cost
output_features = [
            ContinuousOutput(
                key="yield", bounds=[0.0, 1.0], objective=MaximizeObjective(w=1.0)
            ),
            ContinuousOutput(
                key="cost", objective=MinimizeObjective(w=1.0), bounds=[0.0, 1.0]
            ),
        ]
ref_point = {"yield": 0.0, "cost": 1.0}

domain = Domain(
            input_features=InputFeatures(features=input_features),
            output_features=OutputFeatures(features=output_features),
        )

In [12]:
datapath = os.path.join(str(pathlib.Path.cwd().parent.parent),"bofire","benchmarks","data")
data = pd.read_csv(datapath + "/aniline_cn_crosscoupling.csv", index_col=0, skiprows=[1])
data = data.rename(columns={"base_equivalents": "base_eq", "yld": "yield"})
data["valid_yield"] = 1
data

Unnamed: 0,catalyst,base,base_eq,temperature,t_res,yield,valid_yield
0,tBuXPhos,DBU,2.183015,30.0,328.717802,0.042833,1
1,tBuXPhos,BTMG,2.190882,100.0,73.331194,0.959690,1
2,tBuXPhos,TMG,1.093138,47.5,75.121297,0.031579,1
3,tBuXPhos,TMG,2.186276,100.0,673.259508,0.766768,1
4,tBuXPhos,TEA,1.108767,30.0,107.541151,0.072299,1
...,...,...,...,...,...,...,...
91,tBuBrettPhos,BTMG,1.100490,100.0,526.484113,1.000808,1
92,tBuBrettPhos,TEA,1.142838,30.0,75.307307,0.005499,1
93,tBuBrettPhos,BTMG,2.192176,30.0,1240.719965,1.019672,1
94,AlPhos,DBU,1.091453,100.0,321.263375,0.959021,1


In [31]:
from bofire.models.torch_models import SingleTaskGPModel

model = SingleTaskGPModel(
    input_features=InputFeatures(features=input_features), 
    output_features=OutputFeatures(features=[output_features[0]]),
    input_preprocessing_specs=input_preprocessing_specs
    )
model.fit(experiments=data)

In [33]:
train_cv, test_cv, pi = model.cross_validate(experiments=data, folds=5, hooks={"pemutation_imprtance": permutation_importance_hook})

InputDataError: Input data contains NaN values.

# Neural Network

In [15]:
import torch
from bofire.models.model import Model
from bofire.models.torch_models import TrainableModel
from bofire.utils.torch_tools import tkwargs

class NeuralNetwork(Model, TrainableModel):
    model: Optional[torch.nn.Module]

    def _fit(self, X: pd.DataFrame, Y: pd.DataFrame):
        

    def _predict(self, transformed_X: pd.DataFrame):
        # transform to tensor
        X = torch.from_numpy(transformed_X.values).to(**tkwargs)
        with torch.no_grad():
            preds = self.model.forward(X=X).mean.cpu().detach().numpy()  # type: ignore
            stds = np.sqrt(self.model.forward(X=X).variance.cpu().detach().numpy())  # type: ignore
        return preds, stds

'c:\\Users\\J32616\\Repositories\\bofire'

In [16]:
import torch

torch.load(pypath + "/bofire/benchmarks/models/baumgartner_aniline_cn_crosscoupling_descriptors/" + "baumgartner_aniline_cn_crosscoupling_descriptors_predictor_0.pt")

OrderedDict([('input_layer.weight',
              tensor([[ 0.3534,  0.3164, -0.0753,  ...,  0.2180, -0.0378, -0.3686],
                      [ 0.2221,  0.0649,  0.0300,  ...,  0.2445,  0.1847,  0.1897],
                      [-0.0025, -0.2173,  0.1716,  ...,  0.1061, -0.0598, -0.0046],
                      ...,
                      [ 0.0433,  0.3204, -0.1431,  ...,  0.1345, -0.1245, -0.1898],
                      [-0.2835, -0.3151, -0.2693,  ...,  0.0462, -0.0890, -0.2814],
                      [ 0.3000,  0.2080,  0.2653,  ..., -0.3774, -0.3711, -0.1152]])),
             ('input_layer.bias',
              tensor([ 1.9363e-01,  1.4644e-01,  1.6182e-01,  3.2308e-01, -8.0292e-02,
                      -1.1248e-01,  1.8887e-01,  6.1024e-02,  2.4606e-01, -2.0398e-01,
                      -1.6658e-01,  2.5738e-01,  3.4414e-01,  1.3229e-01, -2.0442e-01,
                       3.7890e-01, -1.4301e-01, -7.3960e-02, -7.8199e-02, -4.4210e-02,
                       2.0857e-01, -3.6628e-01, 