In [1]:
import os
import mlflow
import matplotlib.pyplot as plt
import altair as alt
import numpy as np
import pandas as pd

%matplotlib inline

#alt.renderers.enable("html")
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [2]:
tracking_uri = os.getenv("TRACKING_URI", default="http://localhost:5000")
mlflow.set_tracking_uri(tracking_uri)

## Experiment Parameter-Search MLP

- Single random seed for model training for all models
- Single random seed for random split for all models
- Per run/model calculate atomic attribution using various methods (e.g. integrated gradients and saliency)
- Single target objective (first) using BCE-loss
- Hyperparameter search using Optuna TPE [(Tree-structured Parzen Estimator)](https://optuna.readthedocs.io/en/stable/reference/generated/optuna.samplers.TPESampler.html)

In [3]:
experiment_name = "herg_mlp_opttpe1"
exp = mlflow.get_experiment_by_name(experiment_name)

runs = mlflow.search_runs(experiment_ids=exp.experiment_id)

In [5]:
#runs.iloc[1:].describe().T

In [6]:
#runs.iloc[1:].corr()

### Search space

In [4]:
search_space = [c for c in runs.columns if "search_space/" in c]
for r in runs[search_space].iloc[0].head(n=99):
    print(r)

{'name': 'momentum', 'type': 'choice', 'values': [0.2, 0.1, 0.05, 0.01]}
{'name': 'warmup_steps', 'type': 'choice', 'values': [0.01, 0.05, 0.1, 0.3]}
{'name': 'weight_decay', 'type': 'choice', 'values': [0.0, 0.001, 0.0001]}
{'name': 'dropout', 'type': 'choice', 'values': [0.0, 0.1, 0.01, 0.3]}
{'name': 'nr_layer', 'type': 'choice', 'values': [1, 2, 3, 4, 5]}
{'name': 'hidden_size', 'type': 'choice', 'values': [16, 32, 64, 128]}
{'name': 'lr', 'type': 'choice', 'values': [0.02, 0.01, 0.05, 0.001]}
{'name': 'batch_size', 'type': 'choice', 'values': [32, 64, 128, 256, 512]}


### Other + default parameters

In [5]:
params = [c for c in runs.columns if "search_space" not in c and "params.args" in c]
for p, v in zip(params, runs[params].iloc[0].head(n=99)):
    print(f"{p.replace('params.args/', '')}: {v}")

experiment_name: herg_mlp_opttpe1
pruner_name: None
tracking_uri: http://localhost:5000
standardize: False
batch_size: 256
minimize: False
featurizer_kwargs: {'fold': 1024, 'radius': 3, 'return_count': True, 'use_chirality': True, 'use_features': True}
patience_minimize: True
trials: 30
featurizer_name: combined
run_name: mlp random
normalize_input: True
scheduler_params: {'warmup_steps': 0.1}
scheduler: linear_with_warmup
stochastic_weight_avg: False
track_metrics: ...value too long for mlflow - not inserted
lr: 0.01
gradient_clip_val: 1.0
optimizer_params: {'weight_decay': 0.001}
seed: 3549156989
split_seed: 4256484565
dropout: 0.1
checkpoint_objective: val/loss
max_steps: 1000
use_labels: ['active_g10']
patience_objective: val/loss
optimizer: adamw
cache_dir: ../../../data/herg/
featurizer_chunksize: 100
objective_name: val/AUROC
featurizer_mp_context: fork
attribution_kwargs: ...value too long for mlflow - not inserted
momentum: 0.01
split_size: (0.6, 0.2, 0.2)
batch_norm: True
num

### Best parameters
- Found within this scenario

In [6]:
params = [c for c in runs.columns if c and "params.best" in c]
for p, v in zip(params, runs[params].iloc[0].head(n=99)):
    print(f"{p.replace('params.args/', '')}: {v}")

params.best/lr: 0.001
params.best/dropout: 0.0
params.best/batch_size: 512
params.best/warmup_steps: 0.1
params.best/momentum: 0.1
params.best/weight_decay: 0.0
params.best/hidden_size: 128
params.best/nr_layer: 3


### Metrics
- Include metrics for atomic attribution/weights
    - Values were calculated using active hergophores on all rows for which herg activity was predicted as negative
    - Threshold was calculated per individual model using threshold moving

In [7]:
metrics_names = [
    "metrics.test/AUROC",
    "metrics.test/loss",
    "metrics.test/mean/avg_score_pred_inactive/integrated_gradients",
    "metrics.test/mean/avg_score_pred_inactive/saliency",
    "metrics.test/mean/avg_score_pred_inactive/saliency-absolute"
]
metrics_columns = [c for c in runs.columns if any(c in m for m in metrics_names)]
metrics = runs[metrics_columns].iloc[1:]

metrics.columns = [c.split("/")[-1] for c in metrics.columns]
metrics.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
AUROC,30.0,0.877716,0.01225,0.838827,0.873341,0.880409,0.888573,0.891118
saliency,30.0,0.658981,0.019654,0.608037,0.651456,0.660824,0.667536,0.699535
saliency-absolute,30.0,0.422159,0.012136,0.404077,0.413293,0.422446,0.426975,0.453969
integrated_gradients,30.0,0.722171,0.007723,0.709545,0.717815,0.724016,0.724974,0.747357
loss,30.0,0.461241,0.044591,0.421659,0.428316,0.452726,0.478047,0.622914


#### Correlation

- Correlation between various metrics

In [8]:
metrics.corr(method="pearson")

Unnamed: 0,AUROC,saliency,saliency-absolute,integrated_gradients,loss
AUROC,1.0,-0.212326,-0.183777,0.022196,-0.807773
saliency,-0.212326,1.0,0.527091,0.790705,0.326532
saliency-absolute,-0.183777,0.527091,1.0,0.5858,0.061934
integrated_gradients,0.022196,0.790705,0.5858,1.0,0.077498
loss,-0.807773,0.326532,0.061934,0.077498,1.0


In [10]:
x = "AUROC"
y = "integrated_gradients"

chart = alt.Chart(metrics).mark_point().encode(
    x=alt.X(x, scale=alt.Scale(zero=False)),
    y=alt.Y(y, scale=alt.Scale(zero=False))
)

chart = chart + chart.transform_regression(x, y).mark_line()
#chart.show()
chart

In [12]:
x = "AUROC"
y = "saliency"

chart = alt.Chart(metrics).mark_point().encode(
    x=alt.X(x, scale=alt.Scale(zero=False)),
    y=alt.Y(y, scale=alt.Scale(zero=False))
)

chart = chart + chart.transform_regression(x, y).mark_line()
#hart.show()
chart

## Experiment Best MLP (kfold cross validated)

- Using best parameters found (see above)
    - Run 5-fold cross validation (was run 4 times)
    - Results/metrics were calculated using the mean of all individual runs (4 * 5 = 20 runs)
    - Per 5-fold cross validation (4 times) the seed for model training and splitting was chosen randomly

In [8]:
experiment_name = "herg_mlp_best_kfold"
exp = mlflow.get_experiment_by_name(experiment_name)

query = "tags.mlflow.runName != 'summary'"
runs = mlflow.search_runs(experiment_ids=exp.experiment_id, filter_string=query)

### Seeds


In [20]:
runs[["run_id", "params.seed", "params.split_seed"]].head(n=99)

Unnamed: 0,run_id,params.seed,params.split_seed
0,6a6b7d0b0e95467cae36382ceb6960c4,3866560668,3866560668
1,848fe153a644432ca010c0237593d7cc,3866560668,3866560668
2,1cd9ac20d8e540359cc137f77f493adc,3866560668,3866560668
3,5a5ae9ecfc6d4b4ba85ef607b3e3f125,3866560668,3866560668
4,ba98cfaf590743548b1495b8f593f985,3866560668,3866560668
5,d0adfc6707b34a858262226855645f80,350281109,350281109
6,996d66d2ec5d4597b2e2b43cdf1f20cf,350281109,350281109
7,3c1bb8b6ce034011a786b272dd2f5ef0,350281109,350281109
8,4d74b5068e8742b7849083405a624e20,350281109,350281109
9,69e529ece08141dd88ce377f3b1d94ab,350281109,350281109


### Parameters

In [21]:
params = [c for c in runs.columns if "search_space" not in c and "params" in c]
for p, v in zip(params, runs[params].iloc[0].head(n=99)):
    print(f"{p.replace('params.', '')}: {v}")

smile1: c1ccccc1CNCC - active
attribution_kwargs/data_types: ['test']
featurizer_kwargs/use_chirality: True
auto_scale_batch_size: False
batch_norm: True
fast_dev_run: False
ignore_index: -100
momentum: 0.1
num_val_batches: []
move_metrics_to_cpu: False
batch_size: 512
attribution_kwargs/label: active_g10
split_size: (5, 4, 0)
limit_predict_batches: 1.0
gradient_clip_val: 1.0
categorical_indices: None
num_workers: 8
patience: 10
featurizer_mp_context: fork
scheduler_params/warmup_steps: 0.1
featurizer_kwargs/use_features: True
log_every_n_steps: 50
minimize: True
split_seed: 3866560668
num_gpus: 1
overfit_batches: 0.0
normalize_input: True
num_classes: 2
checkpoint_minimize: True
standardize: False
stochastic_weight_avg: False
featurizer_kwargs/return_count: True
hidden_size: [128, 128, 128]
limit_test_batches: 1.0
use_cache: True
should_stop: False
attribution_kwargs/label_idx: 0
auto_lr_find: False
max_steps: 1000
attribution_kwargs/references: [('CCOc1ccccc1', 1), ('c1ccccc1CNCC', 1

### Metrics

#### Performance on hERG dataset

In [12]:
metrics_names = [
    "metrics.test/AUROC",
    "metrics.test/loss",
]
metrics_columns = [c for c in runs.columns if any(c in m for m in metrics_names)]
metrics = runs[metrics_columns]

metrics.columns = [c.split("/")[-1] for c in metrics.columns]
metrics.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
loss,20.0,0.444532,0.017877,0.409,0.434584,0.445971,0.457906,0.474792
AUROC,20.0,0.87779,0.010099,0.864952,0.870095,0.876105,0.883195,0.895859


#### Active Hergophores on Inactive Model Predictions

- Atomic attribution using various methods

In [26]:
metrics_names = [
    "metrics.test/mean/avg_score_pred_inactive/integrated_gradients",
    "metrics.test/mean/avg_score_pred_inactive/saliency",
    "metrics.test/mean/avg_score_pred_inactive/saliency-absolute",
    "metrics.test/mean/avg_score_pred_inactive/input_x_gradient",
    "metrics.test/mean/avg_score_pred_inactive/occlusion",
    "metrics.test/mean/avg_score_pred_inactive/deeplift",
    "metrics.test/mean/avg_score_pred_inactive/shapley_value_sampling",
    "metrics.test/mean/avg_score_pred_inactive/noise_tunnel_ig",
]
metrics_columns = [c for c in runs.columns if any(c in m for m in metrics_names)]
metrics = runs[metrics_columns]

metrics.columns = [c.split("/")[-1] for c in metrics.columns]
metrics.describe().T


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
input_x_gradient,20.0,0.673656,0.015442,0.64342,0.666187,0.673315,0.686064,0.6945
occlusion,20.0,0.660455,0.029884,0.585352,0.646043,0.659704,0.686653,0.708293
shapley_value_sampling,20.0,0.703891,0.014118,0.671523,0.700048,0.707599,0.711563,0.721415
saliency,20.0,0.653187,0.022565,0.59437,0.639084,0.659191,0.667337,0.692389
integrated_gradients,20.0,0.685782,0.018095,0.652881,0.673012,0.689654,0.700345,0.711965
deeplift,20.0,0.686678,0.018304,0.650968,0.672346,0.690172,0.698949,0.713166
noise_tunnel_ig,20.0,0.674616,0.0216,0.618974,0.664155,0.674567,0.68535,0.712579
saliency-absolute,20.0,0.411718,0.01516,0.378439,0.404915,0.412747,0.424115,0.432723


#### Active Hergophores on Active Model Predictions

- Atomic attribution using various methods

In [27]:
metrics_names = [
    "metrics.test/mean/avg_score_pred_active",
    "metrics.test/mean/avg_score_pred_active/integrated_gradients",
    "metrics.test/mean/avg_score_pred_active/saliency",
    "metrics.test/mean/avg_score_pred_active/saliency-absolute",
    "metrics.test/mean/avg_score_pred_active/input_x_gradient",
    "metrics.test/mean/avg_score_pred_active/occlusion",
    "metrics.test/mean/avg_score_pred_active/deeplift",
    "metrics.test/mean/avg_score_pred_active/shapley_value_sampling",
    "metrics.test/mean/avg_score_pred_active/noise_tunnel_ig",
]
metrics_columns = [c for c in runs.columns if any(c in m for m in metrics_names)]
metrics = runs[metrics_columns]

metrics.columns = [c.split("/")[-1] for c in metrics.columns]
metrics.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
deeplift,20.0,0.661436,0.019808,0.604043,0.654186,0.666387,0.674754,0.682743
shapley_value_sampling,20.0,0.678407,0.012596,0.649541,0.673545,0.681229,0.688067,0.693171
saliency-absolute,20.0,0.443144,0.016747,0.407788,0.432806,0.444909,0.45174,0.473916
noise_tunnel_ig,20.0,0.656777,0.017179,0.611564,0.651692,0.659322,0.665696,0.686385
saliency,20.0,0.610513,0.02443,0.563042,0.594073,0.614535,0.623,0.659853
input_x_gradient,20.0,0.649452,0.01304,0.627294,0.642662,0.649568,0.654752,0.678476
integrated_gradients,20.0,0.66223,0.017332,0.612838,0.654511,0.668836,0.674633,0.681135
occlusion,20.0,0.661626,0.016927,0.629488,0.650606,0.668766,0.672189,0.684617


#### Correlations

In [29]:
metrics_names = [
    "metrics.test/AUROC",
    "metrics.test/loss",
]

metrics_names += [
    "metrics.test/mean/avg_score_pred_inactive/integrated_gradients",
    "metrics.test/mean/avg_score_pred_inactive/saliency",
    "metrics.test/mean/avg_score_pred_inactive/saliency-absolute",
    "metrics.test/mean/avg_score_pred_inactive/input_x_gradient",
    "metrics.test/mean/avg_score_pred_inactive/occlusion",
    "metrics.test/mean/avg_score_pred_inactive/deeplift",
    "metrics.test/mean/avg_score_pred_inactive/shapley_value_sampling",
    "metrics.test/mean/avg_score_pred_inactive/noise_tunnel_ig",
]
metrics_columns = [c for c in runs.columns if any(c in m for m in metrics_names)]
metrics = runs[metrics_columns]

metrics.columns = [c.split("/")[-1] for c in metrics.columns]
metrics.corr(method="pearson").order

Unnamed: 0,input_x_gradient,occlusion,shapley_value_sampling,loss,saliency,integrated_gradients,AUROC,deeplift,noise_tunnel_ig,saliency-absolute
input_x_gradient,1.0,0.736982,0.684457,0.173471,0.795202,0.850723,-0.240127,0.828172,0.254193,0.360526
occlusion,0.736982,1.0,0.611553,0.03943,0.699862,0.705018,-0.087137,0.720586,0.416553,0.421077
shapley_value_sampling,0.684457,0.611553,1.0,-0.22891,0.726744,0.498122,0.182691,0.479755,0.413949,0.228049
loss,0.173471,0.03943,-0.22891,1.0,-0.165068,0.272456,-0.970184,0.269424,-0.152226,0.048935
saliency,0.795202,0.699862,0.726744,-0.165068,1.0,0.548351,0.087724,0.518943,0.152148,0.517768
integrated_gradients,0.850723,0.705018,0.498122,0.272456,0.548351,1.0,-0.318141,0.981481,0.26829,0.367018
AUROC,-0.240127,-0.087137,0.182691,-0.970184,0.087724,-0.318141,1.0,-0.337791,0.129198,-0.038397
deeplift,0.828172,0.720586,0.479755,0.269424,0.518943,0.981481,-0.337791,1.0,0.256552,0.29295
noise_tunnel_ig,0.254193,0.416553,0.413949,-0.152226,0.152148,0.26829,0.129198,0.256552,1.0,-0.302355
saliency-absolute,0.360526,0.421077,0.228049,0.048935,0.517768,0.367018,-0.038397,0.29295,-0.302355,1.0
