In [1]:
import pickle
import pandas as pd
import numpy as np
import tensorflow as tf
from pathlib import Path
from dataclasses import dataclass, field
from mbeml.constants import LigandFeatures, TargetProperty
from mbeml.featurization import data_prep
from mbeml.metrics import mean_absolute_error, r2_score, mean_negative_log_likelihood

In [2]:
data_dir = Path("../../data/")

data_sets = {
    "train": pd.read_csv(data_dir / "training_data.csv"),
    "validation": pd.read_csv(data_dir / "validation_data.csv"),
    "composition_test": pd.read_csv(data_dir / "composition_test_data.csv"),
    "ligand_test": pd.read_csv(data_dir / "ligand_test_data.csv"),
}

model_dir = Path("../../models/")

In [3]:
@dataclass
class Experiment:
    name: str
    features: LigandFeatures
    target: TargetProperty = TargetProperty.ORBITALS
    is_nn: bool = False
    predictions: dict = field(
        default_factory=lambda: {
            key: np.zeros([len(df), 4]) for key, df in data_sets.items()
        }
    )
    uncertainties: dict = field(
        default_factory=lambda: {
            key: np.zeros([len(df), 4]) for key, df in data_sets.items()
        }
    )

In [4]:
experiments = [
    Experiment(name="krr_standard_racs", features=LigandFeatures.STANDARD_RACS),
    Experiment(name="krr_two_body", features=LigandFeatures.LIGAND_RACS),
    Experiment(name="krr_three_body", features=LigandFeatures.LIGAND_RACS),
    Experiment(
        name="nn_standard_racs", features=LigandFeatures.STANDARD_RACS, is_nn=True
    ),
    Experiment(name="nn_two_body", features=LigandFeatures.LIGAND_RACS, is_nn=True),
    Experiment(name="nn_three_body", features=LigandFeatures.LIGAND_RACS, is_nn=True),
]

In [5]:
for experiment in experiments:
    for df_name, data_set in data_sets.items():
        X, y = data_prep(
            data_set, experiment.features, experiment.target, experiment.is_nn
        )
        if experiment.is_nn:
            model = tf.keras.models.load_model(
                model_dir / experiment.target.name.lower() / experiment.name
            )
            y_mean, y_std = model.predict(X, verbose=0)
        else:
            with open(
                model_dir / experiment.target.name.lower() / f"{experiment.name}.pkl",
                "rb",
            ) as fin:
                model = pickle.load(fin)
            y_mean, y_std = model.predict(X, return_std=True)
        experiment.predictions[df_name] = y_mean
        experiment.uncertainties[df_name] = y_std
        # setattr(experiment, f"MAE_{df_name}", mean_absolute_error(y, y_mean))
        # setattr(experiment, f"R2_{df_name}", r2_score(y, y_mean))
        # setattr(
        #    experiment,
        #    f"MNLL_{df_name}",
        #    mean_negative_log_likelihood(y, y_mean, y_std),
        # )

2024-01-23 13:58:17.036190: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


In [7]:
def evaluate_metric(metric, requires_uncertainty=False):
    results = {}
    for experiment in experiments:
        result_row = {}
        for key, data_set in data_sets.items():
            if requires_uncertainty:
                result_row[key] = metric(
                    data_set[experiment.target.full_name()],
                    experiment.predictions[key],
                    experiment.uncertainties[key],
                )
            else:
                result_row[key] = metric(
                    data_set[experiment.target.full_name()],
                    experiment.predictions[key],
                )
        results[experiment.name] = result_row
    return pd.DataFrame.from_dict(results, orient="index")

In [9]:
evaluate_metric(mean_absolute_error).round(2)

Unnamed: 0,train,validation,composition_test,ligand_test
krr_standard_racs,0.87,3.67,4.1,4.84
krr_two_body,2.63,3.96,3.12,5.05
krr_three_body,1.02,3.3,2.75,4.93
nn_standard_racs,2.33,3.51,4.16,4.73
nn_two_body,3.16,3.73,3.61,4.14
nn_three_body,2.78,3.48,3.41,4.0


In [6]:
maes = pd.DataFrame(
    experiments,
    columns=[
        "name",
        "MAE_train",
        "MAE_validation",
        "MAE_composition_test",
        "MAE_ligand_test",
    ],
)
maes.round(2)

Unnamed: 0,name,MAE_train,MAE_validation,MAE_composition_test,MAE_ligand_test
0,krr_standard_racs,0.13,0.43,0.52,0.99
1,krr_two_body,0.33,0.4,0.3,1.23
2,krr_three_body,0.17,0.36,0.23,1.06
3,nn_standard_racs,0.29,0.38,0.52,0.83
4,nn_two_body,0.35,0.38,0.35,0.96
5,nn_three_body,0.28,0.34,0.61,0.88
