In [2]:
import os
import mlflow
import matplotlib.pyplot as plt
import altair as alt
import numpy as np
import pandas as pd

%matplotlib inline

#alt.renderers.enable("html")
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [3]:
tracking_uri = os.getenv("TRACKING_URI", default="http://localhost:5000")
mlflow.set_tracking_uri(tracking_uri)

## Experiment Parameter-Search Random Forest

- Single random seed for model training for all models
- Single random seed for random split for all models
- Per run/model calculate atomic attribution using various methods (e.g. treeinterpreter, global impurity feature importance for rf etc.)
- Single target objective (first)
- Hyperparameter search using optuna tpe sampler (Tree-structured Parzen Estimator - https://optuna.readthedocs.io/en/stable/reference/generated/optuna.samplers.TPESampler.html)

In [4]:
experiment_name = "herg_rf_opttpe1"
exp = mlflow.get_experiment_by_name(experiment_name)

runs = mlflow.search_runs(experiment_ids=exp.experiment_id)

In [None]:
#runs.iloc[1:].describe().T

In [None]:
#runs.iloc[1:].corr()

### Search space

In [5]:
search_space = [c for c in runs.columns if "search_space/" in c]
for r in runs[search_space].iloc[0].head(n=99):
    print(r)

{'name': 'bootstrap', 'type': 'choice', 'values': [True, False]}
{'name': 'max_depth', 'type': 'choice', 'values': [None, 10, 20, 30, 40, 50, 60, 70]}
{'name': 'criterion', 'type': 'choice', 'values': ['gini', 'entropy']}
{'name': 'min_samples_leaf', 'type': 'choice', 'values': [1, 2, 4]}
{'name': 'n_estimators', 'type': 'choice', 'values': [10, 50, 100, 200, 500]}
{'name': 'min_samples_split', 'type': 'choice', 'values': [2, 5, 10]}
{'name': 'max_features', 'type': 'choice', 'values': ['auto', 'sqrt']}


### Other + default parameters

In [6]:
params = [c for c in runs.columns if "search_space" not in c and "params.args" in c]
for p, v in zip(params, runs[params].iloc[0].head(n=99)):
    print(f"{p.replace('params.args/', '')}: {v}")

sampler_name: tpe
seed: 63259467
minimize: False
pruner_name: None
featurizer_kwargs: {'fold': 1024, 'radius': 3, 'return_count': True, 'use_chirality': True, 'use_features': True}
rf_kwargs: {'n_estimators': 100}
tracking_uri: http://localhost:5000
experiment_name: herg_rf_opttpe1
num_workers: 0
featurizer_mp_context: fork
split_seed: 3131724718
objective_name: val/AUROC
trials: 30
cache_dir: ../../../data/herg/
batch_size: 9999
featurizer_name: combined
use_labels: ['active_g10']
split_size: (0.6, 0.2, 0.2)
split_type: random
track_metrics: ...value too long for mlflow - not inserted
featurizer_chunksize: 100
run_name: tpe
standardize: False
attribution_kwargs: ...value too long for mlflow - not inserted


### Best parameters
- Found within this scenario

In [7]:
params = [c for c in runs.columns if c and "params.best" in c]
for p, v in zip(params, runs[params].iloc[0].head(n=99)):
    print(f"{p.replace('params.args/', '')}: {v}")

params.best/min_samples_leaf: 2
params.best/max_depth: 40
params.best/n_estimators: 200
params.best/criterion: entropy
params.best/bootstrap: False
params.best/min_samples_split: 5
params.best/max_features: sqrt


### Metrics
- Include metrics for atomic attribution/weights
    - Values were calculated using active hergophores on all rows for which herg activity was predicted as negative
    - Threshold was calculated per individual model using threshold moving

In [8]:
metrics_names = [
    "metrics.test/AUROC",
    "metrics.test/loss",
    "metrics.test/mean/avg_score_pred_inactive/impurity"
    "metrics.test/mean/avg_score_pred_inactive/treeinterpreter",
]
metrics_columns = [c for c in runs.columns if any(c in m for m in metrics_names)]
metrics = runs[metrics_columns].iloc[1:]

metrics.columns = [c.split("/")[-1] for c in metrics.columns]
metrics.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
impurity,30.0,0.725644,0.016265,0.683424,0.71627,0.733917,0.73486,0.747149
AUROC,30.0,0.887216,0.008119,0.858842,0.887875,0.889633,0.892412,0.892761
treeinterpreter,30.0,0.398638,0.016282,0.3594,0.390116,0.396086,0.401469,0.440163


#### Correlation

- Correlation between various metrics

In [9]:
metrics.corr(method="pearson")

Unnamed: 0,impurity,AUROC,treeinterpreter
impurity,1.0,0.683556,-0.151315
AUROC,0.683556,1.0,-0.223517
treeinterpreter,-0.151315,-0.223517,1.0


In [10]:
x = "AUROC"
y = "impurity"

chart = alt.Chart(metrics).mark_point().encode(
    x=alt.X(x, scale=alt.Scale(zero=False)),
    y=alt.Y(y, scale=alt.Scale(zero=False))
)

chart = chart + chart.transform_regression(x, y).mark_line()
chart.show()


Displaying chart at http://localhost:21820/


## Experiment Best RF (kfold cross validated)

- Using best parameters found (see above)
    - Run 5-fold cross validation (was run 4 times)
    - Results/metrics were calculated using the mean of all individual runs (4 * 5 = 20 runs)
    - Per 5-fold cross validation (4 times) the seed for model training and splitting was chosen randomly

In [11]:
experiment_name = "herg_rf_best_kfold"
exp = mlflow.get_experiment_by_name(experiment_name)

query = "tags.mlflow.runName != 'summary'"
runs = mlflow.search_runs(experiment_ids=exp.experiment_id, filter_string=query)

### Seeds


In [12]:
runs[["run_id", "params.seed", "params.split_seed"]].head(n=99)

Unnamed: 0,run_id,params.seed,params.split_seed
0,46d70c678c0049a084daa24f014862ec,166806334,166806334
1,c895e7c45d49417bae414cfc004a9f9c,166806334,166806334
2,6fef517d4ec443fe8f7d1adf775aca40,166806334,166806334
3,9a85521df4c34bc0a691b83f8877869d,166806334,166806334
4,b45a06c046a64c9591be78f5c498a53c,3369474970,3369474970
5,5c80aafa8c134b1f8d937a5e6041bb1b,3369474970,3369474970
6,37dccf6ad8824c5982e4c32b69b2427d,3369474970,3369474970
7,319f95721a1b40ab99d305db2e8560ec,3369474970,3369474970
8,73a8a3c35f21424d85101d02aab49cd3,3369474970,3369474970
9,89117b8a259743b99c7d60ce78524f14,1531702888,1531702888


### Parameters

In [13]:
params = [c for c in runs.columns if "search_space" not in c and "params" in c]
for p, v in zip(params, runs[params].iloc[0].head(n=99)):
    print(f"{p.replace('params.', '')}: {v}")

rf_kwargs/n_estimators: 200
min_samples_split: 5
featurizer_kwargs/return_count: True
random_state: 166806334
featurizer_kwargs/radius: 3
featurizer_name: combined
min_samples_leaf: 2
standardize: True
max_leaf_nodes: None
oob_score: False
rf_kwargs/criterion: entropy
split_size: (5, 3, 4)
min_impurity_decrease: 0.0
rf_kwargs/min_samples_split: 5
num_workers: 0
verbose: 1
cache_dir: ../../../data/herg/
warm_start: False
n_jobs: 16
featurizer_chunksize: 100
split_type: random_kfold
rf_kwargs/min_samples_leaf: 2
criterion: entropy
max_depth: 40
num_classes: 2
rf_kwargs/max_depth: 40
seed: 166806334
featurizer_kwargs/use_features: True
num_targets: 1
ignore_index: -100
pos_label: 1
featurizer_mp_context: fork
ccp_alpha: 0.0
objective: binary
smile3: c1ccccc1CN2CCCCC2 - active
batch_size: 9999
featurizer_n_jobs: 0
train/threshold-t0: 0.5100021958351135
min_weight_fraction_leaf: 0.0
class_weight: None
bootstrap: False
input_size: 2017
featurizer_kwargs/use_chirality: True
featurizer_kwargs/

### Metrics

#### Performance on hERG dataset

In [14]:
metrics_names = [
    "metrics.test/AUROC",
]
metrics_columns = [c for c in runs.columns if any(c in m for m in metrics_names)]
metrics = runs[metrics_columns]

metrics.columns = [c.split("/")[-1] for c in metrics.columns]
metrics.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
AUROC,19.0,0.895086,0.008642,0.879861,0.891096,0.894983,0.898915,0.915833


#### Active Hergophores on Inactive Model Predictions

- Atomic attribution using various methods

In [15]:
metrics_names = [
    "metrics.test/mean/avg_score_pred_inactive/impurity",
    "metrics.test/mean/avg_score_pred_inactive/input_x_impurity",
    "metrics.test/mean/avg_score_pred_inactive/occlusion",
    "metrics.test/mean/avg_score_pred_inactive/shapley_value_sampling",
    "metrics.test/mean/avg_score_pred_inactive/treeinterpreter",
]
metrics_columns = [c for c in runs.columns if any(c in m for m in metrics_names)]
metrics = runs[metrics_columns]

metrics.columns = [c.split("/")[-1] for c in metrics.columns]
metrics.describe().T


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
impurity,19.0,0.73392,0.008957,0.717656,0.729059,0.735102,0.736916,0.75073
occlusion,19.0,0.557309,0.016893,0.514387,0.548241,0.561287,0.567281,0.580939
treeinterpreter,19.0,0.382229,0.010059,0.361666,0.377456,0.382633,0.386903,0.405098
input_x_impurity,18.0,0.396073,0.007987,0.37961,0.39305,0.396889,0.398525,0.415246
shapley_value_sampling,5.0,0.584729,0.013353,0.564401,0.577842,0.591943,0.594215,0.595243


#### Active Hergophores on Active Model Predictions

- Atomic attribution using various methods

In [16]:
metrics_names = [
    "metrics.test/mean/avg_score_pred_active/impurity",
    "metrics.test/mean/avg_score_pred_active/input_x_impurity",
    "metrics.test/mean/avg_score_pred_active/occlusion",
    "metrics.test/mean/avg_score_pred_active/shapley_value_sampling",
    "metrics.test/mean/avg_score_pred_active/treeinterpreter",
]
metrics_columns = [c for c in runs.columns if any(c in m for m in metrics_names)]
metrics = runs[metrics_columns]

metrics.columns = [c.split("/")[-1] for c in metrics.columns]
metrics.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
impurity,19.0,0.714435,0.007176,0.699044,0.710904,0.71691,0.720044,0.723036
treeinterpreter,19.0,0.626461,0.008753,0.608478,0.622153,0.626114,0.632793,0.641714
occlusion,19.0,0.551661,0.012019,0.529969,0.543278,0.553491,0.560669,0.572107
input_x_impurity,18.0,0.456979,0.010248,0.437521,0.45089,0.459747,0.463809,0.472306
shapley_value_sampling,5.0,0.597837,0.020107,0.568877,0.588296,0.59988,0.613007,0.619126


#### Correlations

In [17]:
metrics_names = [
    "metrics.test/AUROC",
]

metrics_names += [
    "metrics.test/mean/avg_score_pred_inactive/impurity",
    "metrics.test/mean/avg_score_pred_inactive/input_x_impurity",
    "metrics.test/mean/avg_score_pred_inactive/occlusion",
    "metrics.test/mean/avg_score_pred_inactive/shapley_value_sampling",
    "metrics.test/mean/avg_score_pred_inactive/treeinterpreter",
]
metrics_columns = [c for c in runs.columns if any(c in m for m in metrics_names)]
metrics = runs[metrics_columns]

metrics.columns = [c.split("/")[-1] for c in metrics.columns]
metrics.corr(method="pearson")

Unnamed: 0,impurity,AUROC,occlusion,treeinterpreter,input_x_impurity,shapley_value_sampling
impurity,1.0,0.147867,0.088665,-0.336767,-0.14742,0.128604
AUROC,0.147867,1.0,-0.289093,0.320219,-0.14437,-0.525221
occlusion,0.088665,-0.289093,1.0,-0.678268,0.050595,0.754203
treeinterpreter,-0.336767,0.320219,-0.678268,1.0,0.023114,-0.774499
input_x_impurity,-0.14742,-0.14437,0.050595,0.023114,1.0,-0.604746
shapley_value_sampling,0.128604,-0.525221,0.754203,-0.774499,-0.604746,1.0
