In [1]:
import pickle
import pandas as pd
import numpy as np
import tensorflow as tf
from pathlib import Path
from dataclasses import dataclass, field
from mbeml.constants import LigandFeatures, TargetProperty, unique_cores
from mbeml.featurization import data_prep
from mbeml.metrics import (
    mean_absolute_error,
    r2_score,
    mean_negative_log_likelihood,
    max_absolute_error,
    std_absolute_error,
)

2024-06-24 17:54:07.673554: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-06-24 17:54:07.675647: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-06-24 17:54:07.707452: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-06-24 17:54:07.707996: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
data_dir = Path("../../data/")

data_sets = {
    "train": pd.read_csv(data_dir / "training_data.csv"),
    "validation": pd.read_csv(data_dir / "validation_data.csv"),
    "composition_test": pd.read_csv(data_dir / "composition_test_data.csv"),
    "ligand_test": pd.read_csv(data_dir / "ligand_test_data.csv"),
}

model_dir = Path("../../models/")

In [3]:
@dataclass
class Experiment:
    name: str
    features: LigandFeatures
    target: TargetProperty = TargetProperty.ORBITALS
    is_nn: bool = False
    predictions: dict = field(
        default_factory=lambda: {
            key: np.zeros([len(df), 4]) for key, df in data_sets.items()
        }
    )
    uncertainties: dict = field(
        default_factory=lambda: {
            key: np.zeros([len(df), 4]) for key, df in data_sets.items()
        }
    )

In [4]:
experiments = [
    Experiment(name="krr_standard_racs", features=LigandFeatures.STANDARD_RACS),
    Experiment(name="krr_two_body", features=LigandFeatures.LIGAND_RACS),
    Experiment(name="krr_three_body", features=LigandFeatures.LIGAND_RACS),
    Experiment(
        name="nn_standard_racs", features=LigandFeatures.STANDARD_RACS, is_nn=True
    ),
    Experiment(name="nn_two_body", features=LigandFeatures.LIGAND_RACS, is_nn=True),
    Experiment(name="nn_three_body", features=LigandFeatures.LIGAND_RACS, is_nn=True),
]

In [5]:
for experiment in experiments:
    for df_name, data_set in data_sets.items():
        X, y = data_prep(
            data_set, experiment.features, experiment.target, experiment.is_nn
        )
        if experiment.is_nn:
            model = tf.keras.models.load_model(
                model_dir / experiment.target.name.lower() / experiment.name
            )
            y_mean, y_std = model.predict(X, verbose=0)
        else:
            with open(
                model_dir / experiment.target.name.lower() / f"{experiment.name}.pkl",
                "rb",
            ) as fin:
                model = pickle.load(fin)
            y_mean, y_std = model.predict(X, return_std=True)
        experiment.predictions[df_name] = y_mean
        experiment.uncertainties[df_name] = y_std

2024-06-24 17:54:53.180448: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,7]
	 [[{{node inputs}}]]
2024-06-24 17:54:53.182623: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder' with dtype float and shape [?,7]
	 [[{{node Placeholder}}]]
2024-06-24 17:54:53.212839: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,7]
	 [[{{node inputs}}]]
2024-06-24 1

In [6]:
def evaluate_metric(metric, requires_uncertainty=False, transformation=None):
    if transformation is None:
        transformation = lambda x: x
    results = {}
    for experiment in experiments:
        result_row = {}
        for key, data_set in data_sets.items():
            if requires_uncertainty:
                result_row[key] = metric(
                    transformation(data_set[experiment.target.full_name()].values),
                    transformation(experiment.predictions[key]),
                    transformation(experiment.uncertainties[key]),
                )
            else:
                result_row[key] = metric(
                    transformation(data_set[experiment.target.full_name()].values),
                    transformation(experiment.predictions[key]),
                )
        results[experiment.name] = result_row
    return pd.DataFrame.from_dict(results, orient="index")

In [7]:
def metal_dependence(
    key: str,
    metric=mean_absolute_error,
    requires_uncertainty=False,
    transformation=None,
):
    if transformation is None:
        transformation = lambda x: x
    results = {}
    data_set = data_sets[key]
    for experiment in experiments:
        result_row = {}
        for core in unique_cores:
            mask = (data_set[["metal", "ox"]] == [core[:2], int(core[-1])]).all(axis=1)
            if requires_uncertainty:
                result_row[core] = metric(
                    transformation(
                        data_set[mask][experiment.target.full_name()].values
                    ),
                    transformation(experiment.predictions[key][mask]),
                    transformation(experiment.uncertainties[key][mask]),
                )
            else:
                result_row[core] = metric(
                    transformation(
                        data_set[mask][experiment.target.full_name()].values
                    ),
                    transformation(experiment.predictions[key][mask]),
                )
        if requires_uncertainty:
            mask = data_set["ox"] == 2
            result_row["ox2"] = metric(
                    transformation(
                        data_set[mask][experiment.target.full_name()].values
                    ),
                    transformation(experiment.predictions[key][mask]),
                    transformation(experiment.uncertainties[key][mask]),
                )
            mask = data_set["ox"] == 3
            result_row["ox3"] = metric(
                    transformation(
                        data_set[mask][experiment.target.full_name()].values
                    ),
                    transformation(experiment.predictions[key][mask]),
                    transformation(experiment.uncertainties[key][mask]),
                )
        else:
            mask = data_set["ox"] == 2
            result_row["ox2"] = metric(
                    transformation(
                        data_set[mask][experiment.target.full_name()].values
                    ),
                    transformation(experiment.predictions[key][mask]),
                )
            mask = data_set["ox"] == 3
            result_row["ox3"] = metric(
                    transformation(
                        data_set[mask][experiment.target.full_name()].values
                    ),
                    transformation(experiment.predictions[key][mask]),
                )
        if requires_uncertainty:
            result_row["all"] = metric(
                transformation(data_set[experiment.target.full_name()].values),
                transformation(experiment.predictions[key]),
                transformation(experiment.uncertainties[key]),
            )
        else:
            result_row["all"] = metric(
                transformation(data_set[experiment.target.full_name()].values),
                transformation(experiment.predictions[key]),
            )
        results[experiment.name] = result_row
    return pd.DataFrame.from_dict(results, orient="index")

In [8]:
test_ligands = [
    "4H-pyran",
    "[OH]-[CH]=[CH]-[OH]",
    "bifuran",
    "pyridine-N-oxide",
    "acrylamide",
    "dmf",
    "thiophene",
    "thiane",
    "4H-thiopyran",
    "oxazoline",
    "thioazole",
    "[NH]=[CH]-[OH]",
    "[PH]=[CH]-[OH]",
    "[NH2]-[NH]-[NH]-[NH2]",
    "1H-tetrazole",
    "1H-triazole",
    "thioformaldehyde",
    "[NH2]-[O]-[O]-[NH2]",
    "bipyrimidine",
    "[PH2]-[CH2]-[OH]",
    "[PH2]-[NH]-[NH]-[PH2]",
]


def ligand_dependence(
    metric,
    requires_uncertainty=False,
    transformation=None,
):
    if transformation is None:
        transformation = lambda x: x
    results = {}
    key = "ligand_test"
    data_set = data_sets[key]
    for lig in test_ligands:
        result_row = {}
        mask = data_set["name"].str.contains(lig, regex=False)
        result_row["count"] = np.count_nonzero(mask)
        for experiment in experiments:
            if requires_uncertainty:
                result_row[experiment.name] = metric(
                    transformation(
                        data_set[mask][experiment.target.full_name()].values
                    ),
                    transformation(experiment.predictions[key][mask]),
                    transformation(experiment.uncertainties[key][mask]),
                )
            else:
                result_row[experiment.name] = metric(
                    transformation(
                        data_set[mask][experiment.target.full_name()].values
                    ),
                    transformation(experiment.predictions[key][mask]),
                )
        results[lig] = result_row
    return pd.DataFrame.from_dict(results, orient="index")

In [9]:
TargetProperty.ORBITALS.full_name()

['energetic_homo_ls_eV',
 'energetic_homo_hs_eV',
 'energetic_lumo_ls_eV',
 'energetic_lumo_hs_eV']

In [10]:
def homo_ls(x):
    return x[:, :1]


def homo_hs(x):
    return x[:, 1:2]


def homo_both(x):
    return x[:, :2]


def lumo_both(x):
    return x[:, 2:]


def gap_both(x):
    return x[:, 2:] - x[:, :2]

In [11]:
evaluate_metric(mean_absolute_error).round(2)

Unnamed: 0,train,validation,composition_test,ligand_test
krr_standard_racs,0.13,0.43,0.52,0.99
krr_two_body,0.33,0.4,0.3,1.23
krr_three_body,0.17,0.36,0.23,1.06
nn_standard_racs,0.29,0.38,0.52,0.83
nn_two_body,0.35,0.38,0.35,0.96
nn_three_body,0.28,0.34,0.61,0.88


In [12]:
evaluate_metric(r2_score).round(2)

Unnamed: 0,train,validation,composition_test,ligand_test
krr_standard_racs,1.0,0.98,0.96,0.75
krr_two_body,0.99,0.98,0.99,0.63
krr_three_body,1.0,0.98,0.99,0.72
nn_standard_racs,0.99,0.98,0.96,0.82
nn_two_body,0.99,0.98,0.98,0.77
nn_three_body,0.99,0.98,0.94,0.81


# HOMO

In [13]:
evaluate_metric(mean_absolute_error, transformation=homo_both).round(2)

Unnamed: 0,train,validation,composition_test,ligand_test
krr_standard_racs,0.15,0.37,0.58,1.03
krr_two_body,0.41,0.44,0.28,1.27
krr_three_body,0.22,0.4,0.23,1.12
nn_standard_racs,0.29,0.34,0.56,0.96
nn_two_body,0.43,0.43,0.3,1.05
nn_three_body,0.32,0.38,0.79,0.97


In [14]:
evaluate_metric(r2_score, transformation=homo_both).round(3)

Unnamed: 0,train,validation,composition_test,ligand_test
krr_standard_racs,0.998,0.984,0.943,0.698
krr_two_body,0.984,0.974,0.986,0.569
krr_three_body,0.995,0.978,0.99,0.67
nn_standard_racs,0.993,0.987,0.952,0.756
nn_two_body,0.984,0.975,0.986,0.696
nn_three_body,0.992,0.981,0.904,0.769


In [15]:
evaluate_metric(std_absolute_error, transformation=homo_both).round(2)

Unnamed: 0,train,validation,composition_test,ligand_test
krr_standard_racs,0.16,0.34,0.47,0.86
krr_two_body,0.43,0.45,0.24,0.99
krr_three_body,0.26,0.42,0.21,0.86
nn_standard_racs,0.26,0.3,0.37,0.74
nn_two_body,0.42,0.44,0.22,0.85
nn_three_body,0.28,0.38,0.54,0.67


In [16]:
evaluate_metric(max_absolute_error, transformation=homo_both).round(2)

Unnamed: 0,train,validation,composition_test,ligand_test
krr_standard_racs,1.84,2.03,2.28,3.91
krr_two_body,3.18,3.02,1.39,4.71
krr_three_body,2.51,3.25,1.27,4.25
nn_standard_racs,2.63,2.28,2.01,3.22
nn_two_body,3.28,2.98,1.22,4.15
nn_three_body,1.96,4.5,2.34,3.12


In [17]:
evaluate_metric(mean_absolute_error, transformation=homo_ls).round(2)

Unnamed: 0,train,validation,composition_test,ligand_test
krr_standard_racs,0.15,0.38,0.58,1.07
krr_two_body,0.41,0.44,0.29,1.28
krr_three_body,0.22,0.4,0.23,1.1
nn_standard_racs,0.29,0.35,0.5,0.94
nn_two_body,0.43,0.42,0.28,1.08
nn_three_body,0.31,0.39,0.72,0.99


In [18]:
evaluate_metric(mean_absolute_error, transformation=homo_hs).round(2)

Unnamed: 0,train,validation,composition_test,ligand_test
krr_standard_racs,0.15,0.37,0.58,1.0
krr_two_body,0.41,0.45,0.27,1.26
krr_three_body,0.22,0.41,0.22,1.14
nn_standard_racs,0.29,0.34,0.63,0.97
nn_two_body,0.43,0.44,0.32,1.02
nn_three_body,0.33,0.37,0.86,0.96


In [19]:
metal_dependence(
    "composition_test", mean_absolute_error, transformation=homo_both
).round(2)

Unnamed: 0,cr3,cr2,mn3,mn2,fe3,fe2,co3,co2,ox2,ox3,all
krr_standard_racs,0.59,0.49,0.52,0.56,0.6,0.58,0.51,0.76,0.6,0.56,0.58
krr_two_body,0.34,0.19,0.38,0.21,0.22,0.25,0.44,0.21,0.22,0.35,0.28
krr_three_body,0.26,0.22,0.29,0.17,0.17,0.16,0.38,0.19,0.18,0.27,0.23
nn_standard_racs,0.69,0.34,0.58,0.52,0.72,0.45,0.52,0.66,0.49,0.63,0.56
nn_two_body,0.31,0.33,0.35,0.25,0.27,0.23,0.42,0.22,0.26,0.34,0.3
nn_three_body,1.09,0.33,0.74,0.65,1.1,0.58,0.98,0.83,0.6,0.98,0.79


In [20]:
metal_dependence("composition_test", r2_score, transformation=homo_both).round(2)

Unnamed: 0,cr3,cr2,mn3,mn2,fe3,fe2,co3,co2,ox2,ox3,all
krr_standard_racs,-2.38,-17.84,-1.88,-17.39,-1.92,-34.58,0.06,-14.85,-0.34,-0.54,0.94
krr_two_body,0.23,-0.86,-0.18,-1.19,0.49,-8.17,0.22,-0.01,0.84,0.37,0.99
krr_three_body,0.53,-1.06,0.3,-0.33,0.7,-1.87,0.37,0.19,0.89,0.55,0.99
nn_standard_racs,-2.11,-5.96,-2.19,-12.75,-2.79,-14.96,0.1,-8.5,0.26,-0.71,0.95
nn_two_body,0.45,-5.12,-0.08,-2.23,0.44,-6.85,0.41,-0.12,0.77,0.47,0.99
nn_three_body,-6.58,-5.3,-3.89,-22.67,-7.56,-30.14,-1.66,-14.75,-0.18,-2.89,0.9


In [21]:
metal_dependence("ligand_test", mean_absolute_error, transformation=homo_both).round(2)

Unnamed: 0,cr3,cr2,mn3,mn2,fe3,fe2,co3,co2,ox2,ox3,all
krr_standard_racs,1.27,0.76,1.35,0.81,1.22,0.84,1.67,0.95,0.85,1.34,1.03
krr_two_body,1.66,0.96,1.43,1.03,1.52,1.07,1.83,1.16,1.06,1.6,1.27
krr_three_body,1.39,0.8,1.36,0.93,1.28,0.92,1.65,1.1,0.94,1.39,1.12
nn_standard_racs,1.2,0.7,1.42,0.74,1.07,0.81,1.37,0.88,0.79,1.23,0.96
nn_two_body,1.49,0.82,1.24,0.81,1.2,0.85,1.48,0.93,0.86,1.36,1.05
nn_three_body,1.17,0.78,1.23,0.84,0.97,0.88,1.13,0.99,0.88,1.12,0.97


In [22]:
metal_dependence("ligand_test", r2_score, transformation=homo_both).round(2)

Unnamed: 0,cr3,cr2,mn3,mn2,fe3,fe2,co3,co2,ox2,ox3,all
krr_standard_racs,0.11,0.36,-1.82,-0.01,0.12,0.16,-5.36,-0.46,0.13,-0.26,0.7
krr_two_body,-0.43,0.06,-2.0,-0.35,-0.28,-0.3,-8.18,-0.97,-0.24,-0.81,0.57
krr_three_body,0.0,0.36,-1.84,-0.11,0.12,0.02,-6.2,-0.72,0.02,-0.35,0.67
nn_standard_racs,0.3,0.47,-1.81,0.27,0.32,0.33,-3.99,-0.14,0.32,-0.04,0.76
nn_two_body,-0.06,0.36,-1.55,0.09,0.18,0.18,-5.73,-0.34,0.18,-0.32,0.7
nn_three_body,0.38,0.33,-1.23,0.05,0.48,0.19,-1.49,-0.39,0.15,0.22,0.77


In [23]:
metal_dependence(
    "ligand_test",
    mean_negative_log_likelihood,
    requires_uncertainty=True,
    transformation=homo_both,
).round(2)

Unnamed: 0,cr3,cr2,mn3,mn2,fe3,fe2,co3,co2,ox2,ox3,all
krr_standard_racs,3.04,2.88,2.96,2.97,3.02,2.94,3.13,2.96,2.93,3.03,2.97
krr_two_body,2.45,2.2,2.18,2.25,2.33,2.24,2.39,2.28,2.24,2.36,2.29
krr_three_body,2.69,2.52,2.48,2.58,2.62,2.57,2.63,2.59,2.57,2.62,2.59
nn_standard_racs,107.52,76.59,126.79,57.84,159.99,65.58,153.38,79.72,70.14,133.54,94.6
nn_two_body,30.46,12.73,34.69,12.6,15.61,18.83,43.19,21.62,16.73,29.07,21.49
nn_three_body,44.76,15.15,70.05,19.48,37.4,18.7,17.31,15.39,17.17,42.82,27.07


In [24]:
ligand_dependence(mean_absolute_error, transformation=homo_both).round(2)

Unnamed: 0,count,krr_standard_racs,krr_two_body,krr_three_body,nn_standard_racs,nn_two_body,nn_three_body
4H-pyran,3,0.32,0.69,0.39,1.12,0.56,1.49
[OH]-[CH]=[CH]-[OH],4,1.63,2.22,2.08,1.4,1.95,1.16
bifuran,4,0.33,1.57,1.38,0.95,0.64,1.15
pyridine-N-oxide,7,0.88,1.65,1.81,1.84,1.55,2.55
acrylamide,7,0.34,0.7,0.83,0.26,0.15,0.73
dmf,8,2.45,0.43,0.09,1.56,0.82,0.8
thiophene,5,0.73,0.45,0.13,0.12,0.39,0.16
thiane,5,1.59,1.26,1.1,0.76,0.9,0.47
4H-thiopyran,5,0.37,0.56,0.92,1.65,1.14,1.56
oxazoline,7,2.37,2.98,2.86,1.87,2.77,2.06


In [25]:
ligand_dependence(std_absolute_error, transformation=homo_both).round(2)

Unnamed: 0,count,krr_standard_racs,krr_two_body,krr_three_body,nn_standard_racs,nn_two_body,nn_three_body
4H-pyran,3,0.23,0.26,0.31,0.23,0.18,0.3
[OH]-[CH]=[CH]-[OH],4,0.55,0.44,0.37,0.41,0.52,0.59
bifuran,4,0.2,0.2,0.32,0.21,0.17,0.31
pyridine-N-oxide,7,0.33,0.36,0.35,0.21,0.16,0.33
acrylamide,7,0.17,0.22,0.18,0.16,0.13,0.2
dmf,8,0.3,0.3,0.07,0.24,0.19,0.27
thiophene,5,0.16,0.18,0.12,0.16,0.24,0.12
thiane,5,1.0,0.88,0.63,0.52,0.85,0.25
4H-thiopyran,5,0.21,0.42,0.34,0.19,0.29,0.23
oxazoline,7,0.3,0.84,0.88,0.63,0.84,0.27


# LUMO

In [26]:
evaluate_metric(mean_absolute_error, transformation=lumo_both).round(
    2
)

Unnamed: 0,train,validation,composition_test,ligand_test
krr_standard_racs,0.11,0.48,0.47,0.94
krr_two_body,0.25,0.36,0.31,1.19
krr_three_body,0.12,0.32,0.22,1.01
nn_standard_racs,0.29,0.42,0.49,0.71
nn_two_body,0.27,0.32,0.4,0.88
nn_three_body,0.25,0.31,0.44,0.8


In [27]:
evaluate_metric(r2_score, transformation=lumo_both).round(2)

Unnamed: 0,train,validation,composition_test,ligand_test
krr_standard_racs,1.0,0.97,0.97,0.81
krr_two_body,0.99,0.98,0.99,0.68
krr_three_body,1.0,0.99,0.99,0.78
nn_standard_racs,0.99,0.98,0.97,0.89
nn_two_body,0.99,0.99,0.98,0.84
nn_three_body,0.99,0.99,0.98,0.86


In [28]:
metal_dependence("ligand_test", mean_absolute_error, transformation=lumo_both).round(2)

Unnamed: 0,cr3,cr2,mn3,mn2,fe3,fe2,co3,co2,ox2,ox3,all
krr_standard_racs,0.99,0.76,1.33,0.86,0.9,0.88,1.44,0.85,0.84,1.1,0.94
krr_two_body,1.34,1.05,1.51,1.01,1.37,1.1,1.47,1.09,1.06,1.4,1.19
krr_three_body,1.15,0.82,1.35,0.88,1.18,0.9,1.18,0.92,0.88,1.2,1.01
nn_standard_racs,0.76,0.57,1.16,0.6,0.88,0.58,1.08,0.56,0.58,0.92,0.71
nn_two_body,0.98,0.81,1.29,0.75,1.01,0.75,1.07,0.76,0.77,1.06,0.88
nn_three_body,0.86,0.65,1.19,0.7,0.93,0.66,0.99,0.77,0.7,0.96,0.8


# GAP

In [29]:
def count_negative_gaps():
    transformation = gap_both
    results = {}
    for experiment in experiments:
        result_row = {}
        for key in data_sets.keys():
            result_row[key] = np.count_nonzero(
                transformation(experiment.predictions[key]) < 0.0
            )
        results[experiment.name] = result_row
    return pd.DataFrame.from_dict(results, orient="index")


count_negative_gaps()

Unnamed: 0,train,validation,composition_test,ligand_test
krr_standard_racs,0,0,0,3
krr_two_body,0,0,0,0
krr_three_body,0,0,0,0
nn_standard_racs,0,0,0,0
nn_two_body,2,0,0,1
nn_three_body,1,0,0,0


In [30]:
data_sets["ligand_test"][
    np.any(gap_both(experiments[0].predictions["ligand_test"]) < 0, axis=1)
]

Unnamed: 0,name,metal,ox,high_spin,spin_splitting_kcal/mol,energetic_homo_ls_eV,energetic_homo_hs_eV,energetic_lumo_ls_eV,energetic_lumo_hs_eV,energetic_gap_ls_eV,...,lig6_D_T_1,lig6_D_S_1,lig6_D_Z_2,lig6_D_chi_2,lig6_D_T_2,lig6_D_S_2,lig6_D_Z_3,lig6_D_chi_3,lig6_D_T_3,lig6_D_S_3
17,co_2_thiane_thiane_thiane_thiane_thiane_thiane,co,2,4,-6.649775,-11.246466,-11.540349,-8.968873,-8.740297,2.277593,...,-4.0,0.58,80.0,1.58,0.0,3.54,70.0,1.55,2.0,3.25
61,fe_3_thiane_thiane_thiane_thiane_thiane_thiane,fe,3,6,-8.337003,-14.91184,-14.824763,-12.83289,-12.773025,2.07895,...,-4.0,0.58,80.0,1.58,0.0,3.54,70.0,1.55,2.0,3.25
123,cr_3_thiane_thiane_thiane_thiane_thiane_thiane,cr,3,4,-27.275012,-15.026127,-14.982589,-12.21247,-12.381181,2.813657,...,-4.0,0.58,80.0,1.58,0.0,3.54,70.0,1.55,2.0,3.25


In [31]:
data_sets["ligand_test"][
    np.any(gap_both(experiments[4].predictions["ligand_test"]) < 0, axis=1)
]

Unnamed: 0,name,metal,ox,high_spin,spin_splitting_kcal/mol,energetic_homo_ls_eV,energetic_homo_hs_eV,energetic_lumo_ls_eV,energetic_lumo_hs_eV,energetic_gap_ls_eV,...,lig6_D_T_1,lig6_D_S_1,lig6_D_Z_2,lig6_D_chi_2,lig6_D_T_2,lig6_D_S_2,lig6_D_Z_3,lig6_D_chi_3,lig6_D_T_3,lig6_D_S_3
123,cr_3_thiane_thiane_thiane_thiane_thiane_thiane,cr,3,4,-27.275012,-15.026127,-14.982589,-12.21247,-12.381181,2.813657,...,-4.0,0.58,80.0,1.58,0.0,3.54,70.0,1.55,2.0,3.25


In [32]:
data_sets["train"][np.any(gap_both(experiments[4].predictions["train"]) < 0, axis=1)]

Unnamed: 0,name,metal,ox,high_spin,spin_splitting_kcal/mol,energetic_homo_ls_eV,energetic_homo_hs_eV,energetic_lumo_ls_eV,energetic_lumo_hs_eV,energetic_gap_ls_eV,...,lig6_D_T_1,lig6_D_S_1,lig6_D_Z_2,lig6_D_chi_2,lig6_D_T_2,lig6_D_S_2,lig6_D_Z_3,lig6_D_chi_3,lig6_D_T_3,lig6_D_S_3
447,fe_3_furan_furan_furan_furan_carbonyl_furan,fe,3,6,-23.713989,-15.842469,-15.65471,-15.241097,-15.012522,0.601372,...,-2.0,-0.2,18.0,4.26,0.0,0.5,14.0,2.48,2.0,0.7
449,fe_3_furan_furan_furan_furan_misc_furan,fe,3,6,-21.943644,-15.46151,-15.230213,-14.713196,-14.615235,0.748313,...,-2.0,-0.2,18.0,4.26,0.0,0.5,14.0,2.48,2.0,0.7


In [33]:
data_sets["train"][np.any(gap_both(experiments[5].predictions["train"]) < 0, axis=1)]

Unnamed: 0,name,metal,ox,high_spin,spin_splitting_kcal/mol,energetic_homo_ls_eV,energetic_homo_hs_eV,energetic_lumo_ls_eV,energetic_lumo_hs_eV,energetic_gap_ls_eV,...,lig6_D_T_1,lig6_D_S_1,lig6_D_Z_2,lig6_D_chi_2,lig6_D_T_2,lig6_D_S_2,lig6_D_Z_3,lig6_D_chi_3,lig6_D_T_3,lig6_D_S_3
447,fe_3_furan_furan_furan_furan_carbonyl_furan,fe,3,6,-23.713989,-15.842469,-15.65471,-15.241097,-15.012522,0.601372,...,-2.0,-0.2,18.0,4.26,0.0,0.5,14.0,2.48,2.0,0.7


In [34]:
evaluate_metric(mean_absolute_error, transformation=gap_both).round(2)

Unnamed: 0,train,validation,composition_test,ligand_test
krr_standard_racs,0.16,0.52,0.45,0.8
krr_two_body,0.35,0.45,0.34,0.72
krr_three_body,0.2,0.41,0.26,0.61
nn_standard_racs,0.34,0.43,0.5,0.63
nn_two_body,0.39,0.44,0.42,0.54
nn_three_body,0.34,0.39,0.94,0.53


In [35]:
evaluate_metric(r2_score, transformation=gap_both).round(3)

Unnamed: 0,train,validation,composition_test,ligand_test
krr_standard_racs,0.96,0.55,0.657,0.079
krr_two_body,0.808,0.681,0.808,0.338
krr_three_body,0.925,0.72,0.874,0.504
nn_standard_racs,0.848,0.731,0.639,0.454
nn_two_body,0.782,0.697,0.731,0.573
nn_three_body,0.845,0.757,-0.05,0.6


In [36]:
metal_dependence("ligand_test", mean_absolute_error, transformation=gap_both).round(2)

Unnamed: 0,cr3,cr2,mn3,mn2,fe3,fe2,co3,co2,ox2,ox3,all
krr_standard_racs,1.03,0.56,0.74,0.83,0.93,0.7,0.93,0.8,0.72,0.93,0.8
krr_two_body,1.1,0.59,0.55,0.57,0.84,0.59,0.78,0.71,0.62,0.87,0.72
krr_three_body,1.02,0.4,0.45,0.36,0.8,0.51,0.84,0.6,0.48,0.82,0.61
nn_standard_racs,0.83,0.44,0.54,0.47,0.64,0.55,0.79,0.77,0.57,0.72,0.63
nn_two_body,0.97,0.28,0.28,0.38,0.76,0.42,0.88,0.51,0.4,0.77,0.54
nn_three_body,0.85,0.39,0.31,0.46,0.44,0.5,0.54,0.63,0.5,0.58,0.53


In [37]:
ligand_dependence(mean_absolute_error, transformation=gap_both).round(2)

Unnamed: 0,count,krr_standard_racs,krr_two_body,krr_three_body,nn_standard_racs,nn_two_body,nn_three_body
4H-pyran,3,0.55,0.37,0.44,1.34,0.33,1.11
[OH]-[CH]=[CH]-[OH],4,0.75,0.85,0.94,0.63,0.87,0.99
bifuran,4,0.56,0.44,0.37,1.11,0.31,0.86
pyridine-N-oxide,7,0.96,1.03,0.83,0.85,0.67,0.67
acrylamide,7,0.41,0.67,0.52,0.33,0.44,0.36
dmf,8,0.87,0.69,0.35,0.29,0.36,0.48
thiophene,5,0.74,0.94,0.49,0.3,0.24,0.95
thiane,5,2.84,0.91,0.7,0.38,1.2,0.38
4H-thiopyran,5,0.35,0.54,0.52,0.8,0.47,1.19
oxazoline,7,0.88,0.73,0.53,0.71,0.6,0.38
