In [1]:
import os
import mlflow
import matplotlib.pyplot as plt
import altair as alt
import numpy as np
import pandas as pd

%matplotlib inline

#alt.renderers.enable("html")
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [2]:
tracking_uri = os.getenv("TRACKING_URI", default="http://localhost:5000")
mlflow.set_tracking_uri(tracking_uri)

## Experiment Parameter-Search MLP

- Single random seed for model training for all models
- Single random seed for random split for all models
- Per run/model calculate atomic attribution using various methods (e.g. integrated gradients and saliency)
- Single target objective (first) using BCE-loss
- Hyperparameter search using Optuna TPE [(Tree-structured Parzen Estimator)](https://optuna.readthedocs.io/en/stable/reference/generated/optuna.samplers.TPESampler.html)

In [3]:
experiment_name = "herg_mlp_opttpe1"
exp = mlflow.get_experiment_by_name(experiment_name)

runs = mlflow.search_runs(experiment_ids=exp.experiment_id)

In [4]:
#runs.iloc[1:].describe().T

In [5]:
#runs.iloc[1:].corr()

### Search space

In [4]:
search_space = [c for c in runs.columns if "search_space/" in c]
for r in runs[search_space].iloc[0].head(n=99):
    print(r)

{'name': 'batch_size', 'type': 'choice', 'values': [32, 64, 128, 256, 512]}
{'name': 'weight_decay', 'type': 'choice', 'values': [0.0, 0.001, 0.0001]}
{'name': 'momentum', 'type': 'choice', 'values': [0.2, 0.1, 0.05, 0.01]}
{'name': 'dropout', 'type': 'choice', 'values': [0.0, 0.1, 0.01, 0.3]}
{'name': 'lr', 'type': 'choice', 'values': [0.02, 0.01, 0.05, 0.001]}
{'name': 'hidden_size', 'type': 'choice', 'values': [16, 32, 64, 128]}
{'name': 'nr_layer', 'type': 'choice', 'values': [1, 2, 3, 4, 5]}
{'name': 'warmup_steps', 'type': 'choice', 'values': [0.01, 0.05, 0.1, 0.3]}


### Other + default parameters

In [5]:
params = [c for c in runs.columns if "search_space" not in c and "params.args" in c]
for p, v in zip(params, runs[params].iloc[0].head(n=99)):
    print(f"{p.replace('params.args/', '')}: {v}")

checkpoint_objective: val/loss
momentum: 0.01
normalize_input: True
split_size: (0.6, 0.2, 0.2)
stochastic_weight_avg: False
dropout: 0.1
track_metrics: ...value too long for mlflow - not inserted
use_labels: ['active_g10']
batch_norm: True
scheduler_params: {'warmup_steps': 0.1}
optimizer_params: {'weight_decay': 0.001}
minimize: False
objective_name: val/AUROC
standardize: False
experiment_name: herg_mlp_opttpe1
patience: 10
lr: 0.01
num_workers: 8
tracking_uri: http://localhost:5000
hidden_size: [32, 32, 32]
featurizer_chunksize: 100
checkpoint_minimize: True
max_steps: 1000
patience_objective: val/loss
trials: 30
batch_size: 256
featurizer_kwargs: {'fold': 1024, 'radius': 3, 'return_count': True, 'use_chirality': True, 'use_features': True}
featurizer_name: combined
scheduler: linear_with_warmup
patience_minimize: True
pruner_name: None
gradient_clip_val: 1.0
attribution_kwargs: ...value too long for mlflow - not inserted
split_seed: 4256484565
seed: 3549156989
run_name: mlp random

### Best parameters
- Found within this scenario

In [6]:
params = [c for c in runs.columns if c and "params.best" in c]
for p, v in zip(params, runs[params].iloc[0].head(n=99)):
    print(f"{p.replace('params.args/', '')}: {v}")

params.best/dropout: 0.0
params.best/lr: 0.001
params.best/hidden_size: 128
params.best/momentum: 0.1
params.best/weight_decay: 0.0
params.best/nr_layer: 3
params.best/warmup_steps: 0.1
params.best/batch_size: 512


### Metrics
- Include metrics for atomic attribution/weights
    - Values were calculated using active hergophores on all rows for which herg activity was predicted as negative
    - Threshold was calculated per individual model using threshold moving

In [24]:
metrics_names = [
    "metrics.test/AUROC",
    "metrics.test/loss",
    "metrics.test/mean/avg_score_pred_inactive/integrated_gradients",
    "metrics.test/mean/avg_score_pred_inactive/saliency",
    "metrics.test/mean/avg_score_pred_inactive/saliency-absolute"
]
metrics_columns = [c for c in runs.columns if any(c in m for m in metrics_names)]
metrics = runs[metrics_columns].iloc[1:]

metrics.columns = [c.split("/")[-1] for c in metrics.columns]
metrics.describe().T.sort_values(by="mean", ascending=False)

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
AUROC,19.0,0.877526,0.010305,0.864952,0.869901,0.872901,0.88259,0.895859
integrated_gradients,19.0,0.68561,0.018574,0.652881,0.671842,0.690259,0.700468,0.711965
saliency,19.0,0.652748,0.023095,0.59437,0.638932,0.65684,0.667345,0.692389
loss,19.0,0.445024,0.018228,0.409,0.43547,0.447825,0.459391,0.474792
saliency-absolute,19.0,0.411331,0.015473,0.378439,0.404284,0.409808,0.42453,0.432723


#### Correlation

- Correlation between various metrics

In [8]:
metrics.corr(method="pearson")

Unnamed: 0,saliency-absolute,saliency,AUROC,integrated_gradients,loss
saliency-absolute,1.0,0.527091,-0.183777,0.5858,0.061934
saliency,0.527091,1.0,-0.212326,0.790705,0.326532
AUROC,-0.183777,-0.212326,1.0,0.022196,-0.807773
integrated_gradients,0.5858,0.790705,0.022196,1.0,0.077498
loss,0.061934,0.326532,-0.807773,0.077498,1.0


In [9]:
x = "AUROC"
y = "integrated_gradients"

chart = alt.Chart(metrics).mark_point().encode(
    x=alt.X(x, scale=alt.Scale(zero=False)),
    y=alt.Y(y, scale=alt.Scale(zero=False))
)

chart = chart + chart.transform_regression(x, y).mark_line()
#chart.save('chart.html', embed_options={'renderer':'svg'})
chart

In [10]:
x = "AUROC"
y = "saliency"

chart = alt.Chart(metrics).mark_point().encode(
    x=alt.X(x, scale=alt.Scale(zero=False)),
    y=alt.Y(y, scale=alt.Scale(zero=False))
)

chart = chart + chart.transform_regression(x, y).mark_line()
#chart.show()
chart

## Experiment Best MLP (kfold cross validated)

- Using best parameters found (see above)
    - Run 5-fold cross validation (was run 4 times)
    - Results/metrics were calculated using the mean of all individual runs (4 * 5 = 20 runs)
    - Per 5-fold cross validation (4 times) the seed for model training and splitting was chosen randomly

In [11]:
experiment_name = "herg_mlp_best_kfold"
exp = mlflow.get_experiment_by_name(experiment_name)

query = "tags.mlflow.runName != 'summary'"
runs = mlflow.search_runs(experiment_ids=exp.experiment_id, filter_string=query)

### Seeds


In [12]:
runs[["run_id", "params.seed", "params.split_seed"]].head(n=99)

Unnamed: 0,run_id,params.seed,params.split_seed
0,6a6b7d0b0e95467cae36382ceb6960c4,3866560668,3866560668
1,848fe153a644432ca010c0237593d7cc,3866560668,3866560668
2,1cd9ac20d8e540359cc137f77f493adc,3866560668,3866560668
3,5a5ae9ecfc6d4b4ba85ef607b3e3f125,3866560668,3866560668
4,ba98cfaf590743548b1495b8f593f985,3866560668,3866560668
5,d0adfc6707b34a858262226855645f80,350281109,350281109
6,996d66d2ec5d4597b2e2b43cdf1f20cf,350281109,350281109
7,3c1bb8b6ce034011a786b272dd2f5ef0,350281109,350281109
8,4d74b5068e8742b7849083405a624e20,350281109,350281109
9,69e529ece08141dd88ce377f3b1d94ab,350281109,350281109


### Parameters

In [13]:
params = [c for c in runs.columns if "search_space" not in c and "params" in c]
for p, v in zip(params, runs[params].iloc[0].head(n=99)):
    print(f"{p.replace('params.', '')}: {v}")

attribution_kwargs/label: active_g10
dropout: 0.0
patience_objective: val/loss
split_type: random_kfold
gradient_clip_val: 1.0
max_steps: 1000
weights_summary: top
num_workers: 8
smile1: c1ccccc1CNCC - active
featurizer_mp_context: fork
featurizer_n_jobs: 8
reload_dataloaders_every_n_epochs: 0
num_val_batches: []
scheduler_params/warmup_steps: 0.1
smile2: c1ccccc1CCNC - active
attribution_kwargs/label_idx: 0
scheduler: linear_with_warmup
overfit_batches: 0.0
featurizer_kwargs/use_features: True
smile0: CCOc1ccccc1 - active
featurizer_kwargs/radius: 3
auto_lr_find: False
featurizer_kwargs/return_count: True
trainable_parameters: 292225
fast_dev_run: False
stochastic_weight_avg: False
checkpoint_minimize: True
featurizer_name: combined
objective_name: val/loss
optimizer: adamw
optimizer_params/weight_decay: 0.0001
num_classes: 2
split_size: (5, 4, 0)
check_val_every_n_epoch: 1
run_name: mlp
train/threshold-t0: 0.5517712235450745
featurizer_kwargs/use_chirality: True
batch_size: 512
attri

### Metrics

#### Performance on hERG dataset

In [23]:
metrics_names = [
    "metrics.test/AUROC",
    "metrics.test/loss",
]
metrics_columns = [c for c in runs.columns if any(c in m for m in metrics_names)]
metrics = runs[metrics_columns]

metrics.columns = [c.split("/")[-1] for c in metrics.columns]
metrics.describe().T.sort_values(by="mean", ascending=False)

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
AUROC,20.0,0.87779,0.010099,0.864952,0.870095,0.876105,0.883195,0.895859
loss,20.0,0.444532,0.017877,0.409,0.434584,0.445971,0.457906,0.474792


#### Active Hergophores on Inactive Model Predictions

- Atomic attribution using various methods

In [22]:
metrics_names = [
    "metrics.test/mean/avg_score_pred_inactive/integrated_gradients",
    "metrics.test/mean/avg_score_pred_inactive/saliency",
    "metrics.test/mean/avg_score_pred_inactive/saliency-absolute",
    "metrics.test/mean/avg_score_pred_inactive/input_x_gradient",
    "metrics.test/mean/avg_score_pred_inactive/occlusion",
    "metrics.test/mean/avg_score_pred_inactive/deeplift",
    "metrics.test/mean/avg_score_pred_inactive/shapley_value_sampling",
    "metrics.test/mean/avg_score_pred_inactive/noise_tunnel_ig",
]
metrics_columns = [c for c in runs.columns if any(c in m for m in metrics_names)]
metrics = runs[metrics_columns]

metrics.columns = [c.split("/")[-1] for c in metrics.columns]
metrics.describe().T.sort_values(by="mean", ascending=False)


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
shapley_value_sampling,20.0,0.703891,0.014118,0.671523,0.700048,0.707599,0.711563,0.721415
deeplift,20.0,0.686678,0.018304,0.650968,0.672346,0.690172,0.698949,0.713166
integrated_gradients,20.0,0.685782,0.018095,0.652881,0.673012,0.689654,0.700345,0.711965
noise_tunnel_ig,20.0,0.674616,0.0216,0.618974,0.664155,0.674567,0.68535,0.712579
input_x_gradient,20.0,0.673656,0.015442,0.64342,0.666187,0.673315,0.686064,0.6945
occlusion,20.0,0.660455,0.029884,0.585352,0.646043,0.659704,0.686653,0.708293
saliency,20.0,0.653187,0.022565,0.59437,0.639084,0.659191,0.667337,0.692389
saliency-absolute,20.0,0.411718,0.01516,0.378439,0.404915,0.412747,0.424115,0.432723


#### Active Hergophores on Active Model Predictions

- Atomic attribution using various methods

In [20]:
metrics_names = [
    "metrics.test/mean/avg_score_pred_active",
    "metrics.test/mean/avg_score_pred_active/integrated_gradients",
    "metrics.test/mean/avg_score_pred_active/saliency",
    "metrics.test/mean/avg_score_pred_active/saliency-absolute",
    "metrics.test/mean/avg_score_pred_active/input_x_gradient",
    "metrics.test/mean/avg_score_pred_active/occlusion",
    "metrics.test/mean/avg_score_pred_active/deeplift",
    "metrics.test/mean/avg_score_pred_active/shapley_value_sampling",
    "metrics.test/mean/avg_score_pred_active/noise_tunnel_ig",
]
metrics_columns = [c for c in runs.columns if any(c in m for m in metrics_names)]
metrics = runs[metrics_columns]

metrics.columns = [c.split("/")[-1] for c in metrics.columns]
metrics.describe().T.sort_values(by="mean", ascending=False)

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
shapley_value_sampling,20.0,0.678407,0.012596,0.649541,0.673545,0.681229,0.688067,0.693171
integrated_gradients,20.0,0.66223,0.017332,0.612838,0.654511,0.668836,0.674633,0.681135
occlusion,20.0,0.661626,0.016927,0.629488,0.650606,0.668766,0.672189,0.684617
deeplift,20.0,0.661436,0.019808,0.604043,0.654186,0.666387,0.674754,0.682743
noise_tunnel_ig,20.0,0.656777,0.017179,0.611564,0.651692,0.659322,0.665696,0.686385
input_x_gradient,20.0,0.649452,0.01304,0.627294,0.642662,0.649568,0.654752,0.678476
saliency,20.0,0.610513,0.02443,0.563042,0.594073,0.614535,0.623,0.659853
saliency-absolute,20.0,0.443144,0.016747,0.407788,0.432806,0.444909,0.45174,0.473916


#### Correlations

In [21]:
metrics_names = [
    "metrics.test/AUROC",
    "metrics.test/loss",
]

metrics_names += [
    "metrics.test/mean/avg_score_pred_inactive/integrated_gradients",
    "metrics.test/mean/avg_score_pred_inactive/saliency",
    "metrics.test/mean/avg_score_pred_inactive/saliency-absolute",
    "metrics.test/mean/avg_score_pred_inactive/input_x_gradient",
    "metrics.test/mean/avg_score_pred_inactive/occlusion",
    "metrics.test/mean/avg_score_pred_inactive/deeplift",
    "metrics.test/mean/avg_score_pred_inactive/shapley_value_sampling",
    "metrics.test/mean/avg_score_pred_inactive/noise_tunnel_ig",
]
metrics_columns = [c for c in runs.columns if any(c in m for m in metrics_names)]
metrics = runs[metrics_columns]

metrics.columns = [c.split("/")[-1] for c in metrics.columns]
metrics.corr(method="pearson")

Unnamed: 0,occlusion,input_x_gradient,shapley_value_sampling,noise_tunnel_ig,saliency-absolute,saliency,AUROC,integrated_gradients,deeplift,loss
occlusion,1.0,0.736982,0.611553,0.416553,0.421077,0.699862,-0.087137,0.705018,0.720586,0.03943
input_x_gradient,0.736982,1.0,0.684457,0.254193,0.360526,0.795202,-0.240127,0.850723,0.828172,0.173471
shapley_value_sampling,0.611553,0.684457,1.0,0.413949,0.228049,0.726744,0.182691,0.498122,0.479755,-0.22891
noise_tunnel_ig,0.416553,0.254193,0.413949,1.0,-0.302355,0.152148,0.129198,0.26829,0.256552,-0.152226
saliency-absolute,0.421077,0.360526,0.228049,-0.302355,1.0,0.517768,-0.038397,0.367018,0.29295,0.048935
saliency,0.699862,0.795202,0.726744,0.152148,0.517768,1.0,0.087724,0.548351,0.518943,-0.165068
AUROC,-0.087137,-0.240127,0.182691,0.129198,-0.038397,0.087724,1.0,-0.318141,-0.337791,-0.970184
integrated_gradients,0.705018,0.850723,0.498122,0.26829,0.367018,0.548351,-0.318141,1.0,0.981481,0.272456
deeplift,0.720586,0.828172,0.479755,0.256552,0.29295,0.518943,-0.337791,0.981481,1.0,0.269424
loss,0.03943,0.173471,-0.22891,-0.152226,0.048935,-0.165068,-0.970184,0.272456,0.269424,1.0
