In [1]:
from joblib import load
import pandas as pd 
import plot

import os.path
from configuration import BaseConfiguration

wf_new = load('workflow.joblib')
workflow = load(os.path.join('Temporary', 'workflow.joblib'))

config = BaseConfiguration('Input/plotting-configuration.json')

In [2]:
path_configs = [k for k in workflow.__dict__ if 'file' in k or 'dir' in k]

for k, v in wf_new.__dict__.items():
    
    if k in path_configs:
        setattr(workflow, k, v)
    elif k == 'function_for_metric': 
        setattr(workflow, k, v)

In [3]:
metrics = list(config.label_for_metric)

performances = workflow.concatenate_history('performances')

performance_for_effect = {}
for effect in ['general', 'repro_dev']:
    performance_for_metric = {}
    for metric in metrics:
        performance_for_metric[metric] = (
            performances.xs(effect, axis=1, level='target_effect')
            .xs(metric, axis=1, level='metric')
        )
    performance_for_effect[effect] = performance_for_metric

In [4]:
def describe(performances):
    return (
        pd.concat(performances, axis=1)
        .describe(percentiles=[0.05, 0.5, 0.95])
        .round(2)
        .droplevel([1, 2, 3, 5], axis=1)
    )
    
describe(performance_for_effect['general'])

Unnamed: 0_level_0,root_mean_squared_error,root_mean_squared_error,median_absolute_error,median_absolute_error,r2_score,r2_score
model_build,without_selection,with_selection,without_selection,with_selection,without_selection,with_selection
count,150.0,150.0,150.0,150.0,150.0,150.0
mean,0.68,0.69,0.39,0.4,0.5,0.48
std,0.04,0.04,0.02,0.03,0.04,0.04
min,0.59,0.61,0.33,0.33,0.4,0.38
5%,0.62,0.63,0.35,0.35,0.44,0.42
50%,0.67,0.69,0.39,0.4,0.5,0.48
95%,0.74,0.75,0.42,0.45,0.56,0.54
max,0.78,0.8,0.46,0.47,0.6,0.57


In [5]:
describe(performance_for_effect['repro_dev'])

Unnamed: 0_level_0,root_mean_squared_error,root_mean_squared_error,median_absolute_error,median_absolute_error,r2_score,r2_score
model_build,without_selection,with_selection,without_selection,with_selection,without_selection,with_selection
count,150.0,150.0,150.0,150.0,150.0,150.0
mean,0.59,0.6,0.31,0.31,0.5,0.49
std,0.06,0.06,0.02,0.02,0.05,0.05
min,0.48,0.5,0.27,0.27,0.35,0.32
5%,0.51,0.52,0.29,0.28,0.41,0.39
50%,0.58,0.58,0.31,0.31,0.51,0.49
95%,0.7,0.71,0.34,0.35,0.58,0.56
max,0.77,0.78,0.37,0.36,0.6,0.6


In [6]:
importances = workflow.concatenate_history('importances')

scorings = list(config.label_for_scoring)

importance_for_effect = {}
for effect in ['general', 'repro_dev']:
    importance_for_scoring = {}
    for scoring in scorings:
        importance_for_scoring[scoring] = (
            importances.xs(effect, axis=1, level='target_effect')
            .xs(scoring, axis=1, level='metric')
            .mean()
            .sort_values(ascending=False)
            .droplevel([0, 1, 2, 3, 4])
        )
    importance_for_effect[effect] = importance_for_scoring

In [7]:
pd.concat(importance_for_effect['general'], axis=1).head().round(2)

Unnamed: 0_level_0,neg_root_mean_squared_error,neg_median_absolute_error,r2
feature,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CATMoS_LD50_pred,0.26,0.19,0.47
ReadyBiodeg_pred_discrete,0.02,0.02,0.02
MP_pred,0.02,0.01,0.02
ndHBdDon_discrete,0.01,0.01,0.01
CombDipolPolariz,0.01,0.01,0.01


In [8]:
pd.concat(importance_for_effect['repro_dev'], axis=1).head().round(2)

Unnamed: 0_level_0,neg_root_mean_squared_error,neg_median_absolute_error,r2
feature,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
CATMoS_LD50_pred,0.25,0.18,0.5
FUB_pred,0.01,0.01,0.02
ReadyBiodeg_pred_discrete,0.01,0.02,0.02
Koc_pred,0.01,0.01,0.01
Sp3Sp2HybRatio,0.01,0.02,0.01
