In [1]:
import pickle
import pandas as pd
import numpy as np
import tensorflow as tf
from pathlib import Path
from dataclasses import dataclass, field
from mbeml.constants import LigandFeatures, TargetProperty
from mbeml.featurization import data_prep
from mbeml.metrics import mean_absolute_error, r2_score, mean_negative_log_likelihood

In [2]:
data_dir = Path("../../data/")

data_sets = {
    "train": pd.read_csv(data_dir / "training_data.csv"),
    "validation": pd.read_csv(data_dir / "validation_data.csv"),
    "composition_test": pd.read_csv(data_dir / "composition_test_data.csv"),
    "ligand_test": pd.read_csv(data_dir / "ligand_test_data.csv"),
}

model_dir = Path("../../models/")

In [3]:
@dataclass
class Experiment:
    name: str
    features: LigandFeatures
    target: TargetProperty = TargetProperty.ORBITALS
    is_nn: bool = False
    predictions: dict = field(
        default_factory=lambda: {
            key: np.zeros([len(df), 4]) for key, df in data_sets.items()
        }
    )
    uncertainties: dict = field(
        default_factory=lambda: {
            key: np.zeros([len(df), 4]) for key, df in data_sets.items()
        }
    )

In [4]:
experiments = [
    Experiment(name="krr_standard_racs", features=LigandFeatures.STANDARD_RACS),
    Experiment(name="krr_two_body", features=LigandFeatures.LIGAND_RACS),
    Experiment(name="krr_three_body", features=LigandFeatures.LIGAND_RACS),
    Experiment(
        name="nn_standard_racs", features=LigandFeatures.STANDARD_RACS, is_nn=True
    ),
    Experiment(name="nn_two_body", features=LigandFeatures.LIGAND_RACS, is_nn=True),
    Experiment(name="nn_three_body", features=LigandFeatures.LIGAND_RACS, is_nn=True),
]

In [5]:
for experiment in experiments:
    for df_name, data_set in data_sets.items():
        X, y = data_prep(
            data_set, experiment.features, experiment.target, experiment.is_nn
        )
        if experiment.is_nn:
            model = tf.keras.models.load_model(
                model_dir / experiment.target.name.lower() / experiment.name
            )
            y_mean, y_std = model.predict(X, verbose=0)
        else:
            with open(
                model_dir / experiment.target.name.lower() / f"{experiment.name}.pkl",
                "rb",
            ) as fin:
                model = pickle.load(fin)
            y_mean, y_std = model.predict(X, return_std=True)
        experiment.predictions[df_name] = y_mean
        experiment.uncertainties[df_name] = y_std

2024-03-05 09:43:40.861387: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


In [6]:
def evaluate_metric(metric, requires_uncertainty=False, transformation=None):
    if transformation is None:
        transformation = lambda x: x
    results = {}
    for experiment in experiments:
        result_row = {}
        for key, data_set in data_sets.items():
            if requires_uncertainty:
                result_row[key] = metric(
                    transformation(data_set[experiment.target.full_name()].values),
                    transformation(experiment.predictions[key]),
                    transformation(experiment.uncertainties[key]),
                )
            else:
                result_row[key] = metric(
                    transformation(data_set[experiment.target.full_name()].values),
                    transformation(experiment.predictions[key]),
                )
        results[experiment.name] = result_row
    return pd.DataFrame.from_dict(results, orient="index")

In [7]:
cores = ["cr3", "cr2", "mn3", "mn2", "fe3", "fe2", "co3", "co2"]


def metal_dependence(
    key: str,
    metric=mean_absolute_error,
    requires_uncertainty=False,
    transformation=None,
):
    if transformation is None:
        transformation = lambda x: x
    results = {}
    data_set = data_sets[key]
    for experiment in experiments:
        result_row = {}
        for core in cores:
            mask = (data_set[["metal", "ox"]] == [core[:2], int(core[-1])]).all(axis=1)
            if requires_uncertainty:
                result_row[core] = metric(
                    transformation(
                        data_set[mask][experiment.target.full_name()].values
                    ),
                    transformation(experiment.predictions[key][mask]),
                    transformation(experiment.uncertainties[key][mask]),
                )
            else:
                result_row[core] = metric(
                    transformation(
                        data_set[mask][experiment.target.full_name()].values
                    ),
                    transformation(experiment.predictions[key][mask]),
                )
        result_row["all"] = metric(
            transformation(data_set[experiment.target.full_name()].values),
            transformation(experiment.predictions[key]),
        )
        results[experiment.name] = result_row
    return pd.DataFrame.from_dict(results, orient="index")

In [8]:
TargetProperty.ORBITALS.full_name()

['energetic_homo_ls_eV',
 'energetic_homo_hs_eV',
 'energetic_lumo_ls_eV',
 'energetic_lumo_hs_eV']

In [9]:
def homo_ls(x):
    return x[:, :1]


def homo_hs(x):
    return x[:, 1:2]


def homo_both(x):
    return x[:, :2]


def lumo_both(x):
    return x[:, 2:]


def gap_both(x):
    return x[:, 2:] - x[:, :2]

In [10]:
evaluate_metric(mean_absolute_error).round(2)

Unnamed: 0,train,validation,composition_test,ligand_test
krr_standard_racs,0.13,0.43,0.52,0.99
krr_two_body,0.33,0.4,0.3,1.23
krr_three_body,0.17,0.36,0.23,1.06
nn_standard_racs,0.29,0.38,0.52,0.83
nn_two_body,0.35,0.38,0.35,0.96
nn_three_body,0.28,0.34,0.61,0.88


In [11]:
evaluate_metric(r2_score).round(2)

Unnamed: 0,train,validation,composition_test,ligand_test
krr_standard_racs,1.0,0.98,0.96,0.75
krr_two_body,0.99,0.98,0.99,0.63
krr_three_body,1.0,0.98,0.99,0.72
nn_standard_racs,0.99,0.98,0.96,0.82
nn_two_body,0.99,0.98,0.98,0.77
nn_three_body,0.99,0.98,0.94,0.81


# HOMO

In [12]:
evaluate_metric(mean_absolute_error, transformation=homo_both).round(2)

Unnamed: 0,train,validation,composition_test,ligand_test
krr_standard_racs,0.15,0.37,0.58,1.03
krr_two_body,0.41,0.44,0.28,1.27
krr_three_body,0.22,0.4,0.23,1.12
nn_standard_racs,0.29,0.34,0.56,0.96
nn_two_body,0.43,0.43,0.3,1.05
nn_three_body,0.32,0.38,0.79,0.97


In [13]:
evaluate_metric(r2_score, transformation=homo_both).round(2)

Unnamed: 0,train,validation,composition_test,ligand_test
krr_standard_racs,1.0,0.98,0.94,0.7
krr_two_body,0.98,0.97,0.99,0.57
krr_three_body,0.99,0.98,0.99,0.67
nn_standard_racs,0.99,0.99,0.95,0.76
nn_two_body,0.98,0.98,0.99,0.7
nn_three_body,0.99,0.98,0.9,0.77


In [14]:
metal_dependence("ligand_test", mean_absolute_error, transformation=homo_both).round(2)

Unnamed: 0,cr3,cr2,mn3,mn2,fe3,fe2,co3,co2,all
krr_standard_racs,1.27,0.76,1.35,0.81,1.22,0.84,1.67,0.95,1.03
krr_two_body,1.66,0.96,1.43,1.03,1.52,1.07,1.83,1.16,1.27
krr_three_body,1.39,0.8,1.36,0.93,1.28,0.92,1.65,1.1,1.12
nn_standard_racs,1.2,0.7,1.42,0.74,1.07,0.81,1.37,0.88,0.96
nn_two_body,1.49,0.82,1.24,0.81,1.2,0.85,1.48,0.93,1.05
nn_three_body,1.17,0.78,1.23,0.84,0.97,0.88,1.13,0.99,0.97


# LUMO

In [15]:
evaluate_metric(mean_absolute_error, transformation=lumo_both).round(2)

Unnamed: 0,train,validation,composition_test,ligand_test
krr_standard_racs,0.11,0.48,0.47,0.94
krr_two_body,0.25,0.36,0.31,1.19
krr_three_body,0.12,0.32,0.22,1.01
nn_standard_racs,0.29,0.42,0.49,0.71
nn_two_body,0.27,0.32,0.4,0.88
nn_three_body,0.25,0.31,0.44,0.8


In [16]:
metal_dependence("ligand_test", mean_absolute_error, transformation=lumo_both).round(2)

Unnamed: 0,cr3,cr2,mn3,mn2,fe3,fe2,co3,co2,all
krr_standard_racs,0.99,0.76,1.33,0.86,0.9,0.88,1.44,0.85,0.94
krr_two_body,1.34,1.05,1.51,1.01,1.37,1.1,1.47,1.09,1.19
krr_three_body,1.15,0.82,1.35,0.88,1.18,0.9,1.18,0.92,1.01
nn_standard_racs,0.76,0.57,1.16,0.6,0.88,0.58,1.08,0.56,0.71
nn_two_body,0.98,0.81,1.29,0.75,1.01,0.75,1.07,0.76,0.88
nn_three_body,0.86,0.65,1.19,0.7,0.93,0.66,0.99,0.77,0.8


# GAP

In [17]:
def count_negative_gaps():
    transformation = gap_both
    results = {}
    for experiment in experiments:
        result_row = {}
        for key, data_set in data_sets.items():
            result_row[key] = np.count_nonzero(
                transformation(experiment.predictions[key]) < 0.0
            )
        results[experiment.name] = result_row
    return pd.DataFrame.from_dict(results, orient="index")


count_negative_gaps()

Unnamed: 0,train,validation,composition_test,ligand_test
krr_standard_racs,0,0,0,3
krr_two_body,0,0,0,0
krr_three_body,0,0,0,0
nn_standard_racs,0,0,0,0
nn_two_body,2,0,0,1
nn_three_body,1,0,0,0


In [18]:
evaluate_metric(mean_absolute_error, transformation=gap_both).round(2)

Unnamed: 0,train,validation,composition_test,ligand_test
krr_standard_racs,0.16,0.52,0.45,0.8
krr_two_body,0.35,0.45,0.34,0.72
krr_three_body,0.2,0.41,0.26,0.61
nn_standard_racs,0.34,0.43,0.5,0.63
nn_two_body,0.39,0.44,0.42,0.54
nn_three_body,0.34,0.39,0.94,0.53


In [19]:
evaluate_metric(r2_score, transformation=gap_both).round(2)

Unnamed: 0,train,validation,composition_test,ligand_test
krr_standard_racs,0.96,0.55,0.66,0.08
krr_two_body,0.81,0.68,0.81,0.34
krr_three_body,0.92,0.72,0.87,0.5
nn_standard_racs,0.85,0.73,0.64,0.45
nn_two_body,0.78,0.7,0.73,0.57
nn_three_body,0.85,0.76,-0.05,0.6


In [20]:
metal_dependence("ligand_test", mean_absolute_error, transformation=gap_both).round(2)

Unnamed: 0,cr3,cr2,mn3,mn2,fe3,fe2,co3,co2,all
krr_standard_racs,1.03,0.56,0.74,0.83,0.93,0.7,0.93,0.8,0.8
krr_two_body,1.1,0.59,0.55,0.57,0.84,0.59,0.78,0.71,0.72
krr_three_body,1.02,0.4,0.45,0.36,0.8,0.51,0.84,0.6,0.61
nn_standard_racs,0.83,0.44,0.54,0.47,0.64,0.55,0.79,0.77,0.63
nn_two_body,0.97,0.28,0.28,0.38,0.76,0.42,0.88,0.51,0.54
nn_three_body,0.85,0.39,0.31,0.46,0.44,0.5,0.54,0.63,0.53
