In [1]:
import sys
sys.path.append('../')


import glob
import logging
import os.path as op
import pandas as pd
import numpy as np
from dataclasses import dataclass, field
from transformers import HfArgumentParser
from typing import Optional
from muben.utils.macro import (
    MODEL_NAMES,
    FINGERPRINT_FEATURE_TYPES,
    QM_DATASET,
    PC_DATASET,
    BIO_DATASET,
    PHY_DATASET,
    CLASSIFICATION_METRICS,
    REGRESSION_METRICS,
    UncertaintyMethods,
    metrics_mapping
)
from muben.utils.io import init_dir, set_logging

logger = logging.getLogger(__name__)


In [2]:
model_mapping = {
    "DNN-rdkit": "DNN",
    "ChemBERTa": "ChemBERTa",
    "GROVER": "GROVER",
    "Uni-Mol": "Uni-Mol",
    "TorchMD-NET": "TorchMD-NET",
    "GIN": "GIN",
}
CLASSIFICATION_UNCERTAINTY = [
    "none",
    "TemperatureScaling",
    "FocalLoss",
    "MCDropout",
    "SWAG",
    "BBP",
    "SGLD",
    "DeepEnsembles",
]
REGRESSION_UNCERTAINTY = ["none", "MCDropout", "SWAG", "BBP", "SGLD", "DeepEnsembles"]
uncertainty_mapping = {
    "none": "Deterministic",
    "TemperatureScaling": "Temperature",
    "FocalLoss": "Focal Loss",
    "MCDropout": "MC Dropout",
    "SWAG": "SWAG",
    "BBP": "BBP",
    "SGLD": "SGLD",
    "DeepEnsembles": "Ensembles",
}
MODEL_NAMES = [
    "DNN-rdkit",
    "ChemBERTa",
    "GROVER",
    "Uni-Mol",
]

In [3]:
file_path = "../output/random/RESULTS/ranks/mean_classification.csv"
df = pd.read_csv(file_path, index_col=0)
df['backbone'] = df.index.map(lambda x: x.split('_')[0])
df['uncertainty'] = df.index.map(lambda x: x.split('_')[1])
unc_df = df.groupby('uncertainty').mean(numeric_only=True)

mapped_metrics = [metrics_mapping[m] for m in CLASSIFICATION_METRICS]
index_mapping = {unc: uncertainty_mapping[unc] for unc in CLASSIFICATION_UNCERTAINTY}
unc_df = unc_df.rename(index=index_mapping, columns=metrics_mapping)
unc_df = unc_df.reindex([index_mapping[unc] for unc in CLASSIFICATION_UNCERTAINTY])
unc_df = unc_df[mapped_metrics]

In [4]:
file_path = "../output/random/RESULTS/ranks/mean_regression.csv"
df = pd.read_csv(file_path, index_col=0)
df['backbone'] = df.index.map(lambda x: x.split('_')[0])
df['uncertainty'] = df.index.map(lambda x: x.split('_')[1])
unc_df_reg = df.groupby('uncertainty').mean(numeric_only=True)

mapped_metrics = [metrics_mapping[m] for m in REGRESSION_METRICS]
index_mapping = {unc: uncertainty_mapping[unc] for unc in REGRESSION_UNCERTAINTY}
unc_df_reg = unc_df_reg.rename(index=index_mapping, columns=metrics_mapping)
unc_df_reg = unc_df_reg.reindex([index_mapping[unc] for unc in REGRESSION_UNCERTAINTY])
unc_df_reg = unc_df_reg[mapped_metrics]

In [5]:
unc_df.merge(unc_df_reg, how='left', left_index=True, right_index=True).round(2).to_csv("ranks_unc_random.csv")

In [8]:
file_path = "../output/random/RESULTS/mrrs/mrr_classification.csv"
df = pd.read_csv(file_path, index_col=0)
df['backbone'] = df.index.map(lambda x: x.split('_')[0])
df['uncertainty'] = df.index.map(lambda x: x.split('_')[1])
unc_df = df.groupby('uncertainty').mean(numeric_only=True)

mapped_metrics = [metrics_mapping[m] for m in CLASSIFICATION_METRICS]
index_mapping = {unc: uncertainty_mapping[unc] for unc in CLASSIFICATION_UNCERTAINTY}
unc_df = unc_df.rename(index=index_mapping, columns=metrics_mapping)
unc_df = unc_df.reindex([index_mapping[unc] for unc in CLASSIFICATION_UNCERTAINTY])
unc_df = unc_df[mapped_metrics]

unc_df.round(2).to_csv("mrrs_unc_classification_random.csv")

In [9]:

file_path = "../output/random/RESULTS/mrrs/mrr_regression.csv"
df = pd.read_csv(file_path, index_col=0)
df['backbone'] = df.index.map(lambda x: x.split('_')[0])
df['uncertainty'] = df.index.map(lambda x: x.split('_')[1])
unc_df_reg = df.groupby('uncertainty').mean(numeric_only=True)

mapped_metrics = [metrics_mapping[m] for m in REGRESSION_METRICS]
index_mapping = {unc: uncertainty_mapping[unc] for unc in REGRESSION_UNCERTAINTY}
unc_df_reg = unc_df_reg.rename(index=index_mapping, columns=metrics_mapping)
unc_df_reg = unc_df_reg.reindex([index_mapping[unc] for unc in REGRESSION_UNCERTAINTY])
unc_df_reg = unc_df_reg[mapped_metrics]

unc_df_reg.round(2).to_csv("mrrs_unc_regression_random.csv")

In [21]:

file_path = "../output/random/RESULTS/mrrs/mrr_classification.csv"
df = pd.read_csv(file_path, index_col=0)
df['backbone'] = df.index.map(lambda x: x.split('_')[0])
df['uncertainty'] = df.index.map(lambda x: x.split('_')[1])
unc_df = df.groupby('backbone').mean(numeric_only=True)

mapped_metrics = [metrics_mapping[m] for m in CLASSIFICATION_METRICS]
index_mapping = {unc: uncertainty_mapping[unc] for unc in CLASSIFICATION_UNCERTAINTY}
unc_df = unc_df.rename(index=index_mapping, columns=metrics_mapping)
unc_df = unc_df.reindex(MODEL_NAMES)
unc_df = unc_df[mapped_metrics]

unc_df.round(2).to_csv("mrrs_backbone_classification_random.csv")

In [23]:

file_path = "../output/random/RESULTS/mrrs/mrr_regression.csv"
df = pd.read_csv(file_path, index_col=0)
df['backbone'] = df.index.map(lambda x: x.split('_')[0])
df['uncertainty'] = df.index.map(lambda x: x.split('_')[1])
unc_df_reg = df.groupby('backbone').mean(numeric_only=True)

mapped_metrics = [metrics_mapping[m] for m in REGRESSION_METRICS]
index_mapping = {unc: uncertainty_mapping[unc] for unc in REGRESSION_UNCERTAINTY}
unc_df_reg = unc_df_reg.rename(index=index_mapping, columns=metrics_mapping)
unc_df_reg = unc_df_reg.reindex(MODEL_NAMES)
unc_df_reg = unc_df_reg[mapped_metrics]

unc_df_reg.round(2).to_csv("mrrs_backbone_regression_random.csv")