# Hyper parameters optimization #

## Parameters cell ##

Parameters are overiddent by papermill when run inside DVC stages



In [1]:
n_folds = 5 # this sets global setting of which how many bootstraps to use
n_hold_out = 1
repeats = 5
n_trials = 10
importance_type = "split"
#first round of optimization

life_history = ["lifespan", "mass_g", "mtGC", "metabolic_rate", "temperature", "gestation_days"]
trait = "lifespan"

debug_local = True #to use local version

In [2]:
from pathlib import Path
import sys
import inspect

local = (Path("..") / "yspecies").resolve()
if debug_local and local.exists():
  sys.path.insert(0, Path("..").as_posix())
  #sys.path.insert(0, local.as_posix())
  print("extending pathes with local yspecies")
  print(sys.path)
  %load_ext autoreload
  %autoreload 2

extending pathes with local yspecies
['..', '/data/sources/yspecies/notebooks', '/opt/miniconda3/envs/yspecies/lib/python38.zip', '/opt/miniconda3/envs/yspecies/lib/python3.8', '/opt/miniconda3/envs/yspecies/lib/python3.8/lib-dynload', '', '/opt/miniconda3/envs/yspecies/lib/python3.8/site-packages', '/opt/miniconda3/envs/yspecies/lib/python3.8/site-packages/IPython/extensions', '/home/antonkulaga/.ipython']


In [3]:
from dataclasses import dataclass, replace
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
import seaborn as sns
import matplotlib.pyplot as plt

In [6]:
from typing import *
from yspecies.dataset import *
from yspecies.utils import *
from yspecies.workflow import TupleWith, Repeat, Collect
from yspecies.config import *
from yspecies.preprocess import FeatureSelection, DataExtractor
from yspecies.partition import DataPartitioner, PartitionParameters
from yspecies.selection import ShapSelector
from yspecies.tuning import Tune
from yspecies.models import ResultsCV, CrossValidator
from yspecies.results import FeatureSummary
import optuna
from optuna import Study, Trial

In [7]:
#settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.float_format', lambda x: '%.3f' % x)
import pprint
pp = pprint.PrettyPrinter(indent=4)

#charts settings
import matplotlib.pyplot as plt
from IPython.display import set_matplotlib_formats
%matplotlib auto
plt.ioff()
set_matplotlib_formats('svg')

Using matplotlib backend: TkAgg


### Loading data ###
Let's load data from species/genes/expressions selected by select_samples.py notebook

In [8]:
from pathlib import Path
locations: Locations = Locations("./") if Path("./data").exists() else Locations("../")

In [9]:
default_selection = FeatureSelection(
    samples = ["tissue","species"], #samples metadata to include
    species =  [], #species metadata other then Y label to include
    exclude_from_training = ["species"],  #exclude some fields from LightGBM training
    to_predict = "lifespan", #column to predict
    categorical = ["tissue"],
    select_by = "shap",
    importance_type =  importance_type,
    feature_perturbation = "tree_path_dependent"
)
default_selection

Samples metadata,Species metadata,Genes,Predict label,not_validated species
"['tissue', 'species']",[],all,lifespan,[]


In [10]:
loader = DataLoader(locations, default_selection)
selections = loader.load_life_history()
selections[trait][0]

expressions,genes,species,samples,Genes Metadata,Species Metadata
"(408, 12323)","(12323, 37)",38,408,"(12323, 2)","(38, 18)"


## Setting up ShapSelector ##

Deciding on selection parameters (which fields to include, exclude, predict)

In [11]:
partition_params = PartitionParameters(n_folds, n_hold_out, 2,   42)

## Setting up features to select ##

In [12]:
selection = FeatureSelection(
    samples = ["tissue","species"], #samples metadata to include
    species =  [], #species metadata other then Y label to include
    exclude_from_training = ["species"],  #exclude some fields from LightGBM training
    to_predict = "lifespan", #column to predict
    categorical = ["tissue"],
    select_by = "shap",
    importance_type = "split"
)

In [13]:
select_lifespan = replace(selection, select_by = "shap")

In [36]:
def load_study(trait: str):
    url = f'sqlite:///' +str((locations.interim.optimization / (trait+".sqlite")).absolute())
    print('loading (if exists) study from '+url)
    storage = optuna.storages.RDBStorage(
        url=url
        #engine_kwargs={'check_same_thread': False}
    )
    return optuna.multi_objective.study.create_study(directions=['maximize','minimize','maximize'], storage = storage, study_name = f"{trait}_r2_huber_kendall", load_if_exists = True)

study = load_study(trait)
metrics, params = MultiObjectiveResults.from_study(study).best_metrics_params_r2()
metrics, params

loading (if exists) study from sqlite:////data/sources/yspecies/notebooks/../data/interim/optimization/lifespan.sqlite


create_study is experimental (supported from v1.4.0). The interface can change in the future.
NSGAIIMultiObjectiveSampler is experimental (supported from v1.5.0). The interface can change in the future.
RandomMultiObjectiveSampler is experimental (supported from v1.4.0). The interface can change in the future.
[I 2020-08-24 20:41:03,873] Using an existing study with name 'lifespan_r2_huber_kendall' instead of creating a new one.
MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.


((0.8895391594213731, 3.9388608314443507, 0.493533956976745),
 {'bagging_fraction': 0.9223944053685549,
  'boosting_type': 'gbdt',
  'drop_rate': 0.29847846764282016,
  'feature_fraction': 0.30501714460224083,
  'lambda_l1': 0.21894440516534,
  'lambda_l2': 1.3848191603958375,
  'learning_rate': 0.08210966706622723,
  'max_depth': 3,
  'max_leaves': 17,
  'min_data_in_leaf': 8,
  'objective': 'regression'})

In [39]:
len(study.get_pareto_front_trials())

8

In [30]:
def objective_parameters(trial: Trial) -> dict:
    return {
        'objective': 'regression',
        'metric': {'mae', 'mse', 'huber'},
        'verbosity': -1,
        'boosting_type': trial.suggest_categorical('boosting_type', ['dart', 'gbdt']),
        'lambda_l1': trial.suggest_uniform('lambda_l1', 0.01, 3.0),
        'lambda_l2': trial.suggest_uniform('lambda_l2', 0.01, 3.0),
        'max_leaves': trial.suggest_int("max_leaves", 15, 25),
        'max_depth': trial.suggest_int('max_depth', 3, 8),
        'feature_fraction': trial.suggest_uniform('feature_fraction', 0.3, 1.0),
        'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.3, 1.0),
        'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.1),
        'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 3, 8),
        'drop_rate': trial.suggest_uniform('drop_rate', 0.1, 0.3),
        "verbose": -1
    }
optimization_parameters = objective_parameters

In [31]:
from yspecies.workflow import SplitReduce

def side(i: int):
    print(i)
    return i

prepare_partition = SplitReduce(
    outputs = DataPartitioner(), 
    split = lambda x: [(x[0], replace(partition_params, seed=side(x[2])))], 
    reduce = lambda x, output: (output[0], x[1]) 
)                               
partition_and_cv = Pipeline(
    [
        ("prepare partition", prepare_partition),
        ("shap_computation", ShapSelector()) #('crossvalidator', CrossValidator())        
    ]
)

partition_and_cv_repeat =  Pipeline([
    ("repeat_cv_pipe", Repeat(partition_and_cv, repeats, lambda x, i: [x[0], x[1], i] )),
    #("collect_mean", Collect(fold=lambda results: np.array([r.last("huber") for r in results]).mean()))
    ("collect_mean", Collect(fold=lambda results: (FeatureSummary(results).metrics_average.R2, FeatureSummary(results).metrics_average.huber, FeatureSummary(results).kendall_tau_abs_mean)))    
    ]
    )

p = Pipeline([
     ('extractor', DataExtractor()),
     ('tune', Tune(partition_and_cv_repeat, study = study, n_trials = n_trials, parameters_space = optimization_parameters))    
])

In [32]:
results = p.fit_transform(selections[trait])
results

MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.
FrozenMultiObjectiveTrial is experimental (supported from v1.4.0). The interface can change in the future.
MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.
FrozenMultiObjectiveTrial is experimental (supported from v1.4.0). The interface can change in the future.
FrozenMultiObjectiveTrial is experimental (supported from v1.4.0). The interface can change in the future.
MultiObjectiveTrial is experimental (supported from v1.4.0). The interface can change in the future.
MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.
MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.
FrozenMultiObjectiveTrial is experimental (supported from v1.4.0). The interface can change in the future.


0
===== fitting models with seed 0 =====
SEED: 0 | FOLD: 0 | VALIDATION_SPECIES: ['Gorilla_gorilla', 'Pan_paniscus']
Training until validation scores don't improve for 10 rounds
[250]	valid_0's huber: 8.70794	valid_0's l2: 303.514	valid_0's l1: 10.0568
Did not meet early stopping. Best iteration is:
[250]	valid_0's huber: 8.70794	valid_0's l2: 303.514	valid_0's l1: 10.0568
SEED: 0 | FOLD: 1 | VALIDATION_SPECIES: ['Heterocephalus_glaber', 'Capra_hircus']
Training until validation scores don't improve for 10 rounds
[250]	valid_0's huber: 3.85972	valid_0's l2: 76.7909	valid_0's l1: 4.66369
Did not meet early stopping. Best iteration is:
[249]	valid_0's huber: 3.85943	valid_0's l2: 76.7979	valid_0's l1: 4.66338
SEED: 0 | FOLD: 2 | VALIDATION_SPECIES: ['Bos_taurus', 'Otolemur_garnettii']
Training until validation scores don't improve for 10 rounds
[250]	valid_0's huber: 3.42562	valid_0's l2: 55.6028	valid_0's l1: 4.16304
Did not meet early stopping. Best iteration is:
[250]	valid_0's huber:

MultiObjectiveTrial is experimental (supported from v1.4.0). The interface can change in the future.
MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.
[I 2020-08-24 19:24:58,795] Trial 226 finished with values: [0.8656536549026324, 4.156772369963435, 0.5298702631467377] with parameters: {'boosting_type': 'gbdt', 'lambda_l1': 2.11235684284872, 'lambda_l2': 2.545986499359966, 'max_leaves': 15, 'max_depth': 3, 'feature_fraction': 0.6275052118649216, 'bagging_fraction': 0.7123985676934603, 'learning_rate': 0.03871337664007671, 'min_data_in_leaf': 3, 'drop_rate': 0.1437718204212639}.
MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.
FrozenMultiObjectiveTrial is experimental (supported from v1.4.0). The interface can change in the future.
MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.
FrozenMultiObjectiveTrial is experimental (supported f

0
===== fitting models with seed 0 =====
SEED: 0 | FOLD: 0 | VALIDATION_SPECIES: ['Gorilla_gorilla', 'Pan_paniscus']
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[160]	valid_0's huber: 7.88485	valid_0's l2: 226.76	valid_0's l1: 9.13311
SEED: 0 | FOLD: 1 | VALIDATION_SPECIES: ['Heterocephalus_glaber', 'Capra_hircus']
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[166]	valid_0's huber: 3.45416	valid_0's l2: 64.1343	valid_0's l1: 4.17538
SEED: 0 | FOLD: 2 | VALIDATION_SPECIES: ['Bos_taurus', 'Otolemur_garnettii']
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[95]	valid_0's huber: 2.81157	valid_0's l2: 35.7836	valid_0's l1: 3.48468
SEED: 0 | FOLD: 3 | VALIDATION_SPECIES: ['Macaca_nemestrina', 'Microcebus_murinus']
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[129]	valid_0's huber: 2.52388	va

MultiObjectiveTrial is experimental (supported from v1.4.0). The interface can change in the future.
MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.
[I 2020-08-24 19:27:15,510] Trial 227 finished with values: [0.8844552149543843, 3.915045225713265, 0.47600260628857977] with parameters: {'boosting_type': 'gbdt', 'lambda_l1': 0.0276980196726032, 'lambda_l2': 0.544763082731981, 'max_leaves': 20, 'max_depth': 3, 'feature_fraction': 0.345741412241509, 'bagging_fraction': 0.9005959071673288, 'learning_rate': 0.09121990048692946, 'min_data_in_leaf': 3, 'drop_rate': 0.1437718204212639}.
MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.
FrozenMultiObjectiveTrial is experimental (supported from v1.4.0). The interface can change in the future.
MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.
FrozenMultiObjectiveTrial is experimental (supported

0
===== fitting models with seed 0 =====
SEED: 0 | FOLD: 0 | VALIDATION_SPECIES: ['Gorilla_gorilla', 'Pan_paniscus']
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[107]	valid_0's huber: 8.91001	valid_0's l2: 308.188	valid_0's l1: 10.2812
SEED: 0 | FOLD: 1 | VALIDATION_SPECIES: ['Heterocephalus_glaber', 'Capra_hircus']
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[137]	valid_0's huber: 3.18558	valid_0's l2: 58.1236	valid_0's l1: 3.88826
SEED: 0 | FOLD: 2 | VALIDATION_SPECIES: ['Bos_taurus', 'Otolemur_garnettii']
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[215]	valid_0's huber: 2.86749	valid_0's l2: 46.09	valid_0's l1: 3.49613
SEED: 0 | FOLD: 3 | VALIDATION_SPECIES: ['Macaca_nemestrina', 'Microcebus_murinus']
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[137]	valid_0's huber: 2.26506	va

MultiObjectiveTrial is experimental (supported from v1.4.0). The interface can change in the future.
MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.
[I 2020-08-24 19:31:52,964] Trial 228 finished with values: [0.8760904515398245, 3.8900645385347596, 0.44821748225246993] with parameters: {'boosting_type': 'gbdt', 'lambda_l1': 2.2371082826110436, 'lambda_l2': 2.0296121960445372, 'max_leaves': 23, 'max_depth': 7, 'feature_fraction': 0.5809187061009446, 'bagging_fraction': 0.7029168888817168, 'learning_rate': 0.05298955871762984, 'min_data_in_leaf': 8, 'drop_rate': 0.29199971159327986}.
MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.
FrozenMultiObjectiveTrial is experimental (supported from v1.4.0). The interface can change in the future.
MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.
FrozenMultiObjectiveTrial is experimental (suppo

0
===== fitting models with seed 0 =====
SEED: 0 | FOLD: 0 | VALIDATION_SPECIES: ['Gorilla_gorilla', 'Pan_paniscus']
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[124]	valid_0's huber: 8.9967	valid_0's l2: 312.384	valid_0's l1: 10.3871
SEED: 0 | FOLD: 1 | VALIDATION_SPECIES: ['Heterocephalus_glaber', 'Capra_hircus']
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[210]	valid_0's huber: 3.19965	valid_0's l2: 55.6612	valid_0's l1: 3.91259
SEED: 0 | FOLD: 2 | VALIDATION_SPECIES: ['Bos_taurus', 'Otolemur_garnettii']
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[211]	valid_0's huber: 3.03449	valid_0's l2: 45.9651	valid_0's l1: 3.70113
SEED: 0 | FOLD: 3 | VALIDATION_SPECIES: ['Macaca_nemestrina', 'Microcebus_murinus']
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[172]	valid_0's huber: 2.27095	v

MultiObjectiveTrial is experimental (supported from v1.4.0). The interface can change in the future.
MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.
[I 2020-08-24 19:35:59,262] Trial 229 finished with values: [0.8768659844364015, 3.9341380852431307, 0.48353942971841446] with parameters: {'boosting_type': 'gbdt', 'lambda_l1': 2.2371082826110436, 'lambda_l2': 2.0296121960445372, 'max_leaves': 16, 'max_depth': 5, 'feature_fraction': 0.5280630910750994, 'bagging_fraction': 0.4542347417151458, 'learning_rate': 0.05296307049829743, 'min_data_in_leaf': 8, 'drop_rate': 0.17734603254653464}.
MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.
FrozenMultiObjectiveTrial is experimental (supported from v1.4.0). The interface can change in the future.
MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.
FrozenMultiObjectiveTrial is experimental (suppo

0
===== fitting models with seed 0 =====
SEED: 0 | FOLD: 0 | VALIDATION_SPECIES: ['Gorilla_gorilla', 'Pan_paniscus']
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[113]	valid_0's huber: 9.12319	valid_0's l2: 332.706	valid_0's l1: 10.4924
SEED: 0 | FOLD: 1 | VALIDATION_SPECIES: ['Heterocephalus_glaber', 'Capra_hircus']
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[160]	valid_0's huber: 3.22362	valid_0's l2: 57.8674	valid_0's l1: 3.91743
SEED: 0 | FOLD: 2 | VALIDATION_SPECIES: ['Bos_taurus', 'Otolemur_garnettii']
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[121]	valid_0's huber: 2.87917	valid_0's l2: 66.3003	valid_0's l1: 3.51137
SEED: 0 | FOLD: 3 | VALIDATION_SPECIES: ['Macaca_nemestrina', 'Microcebus_murinus']
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[119]	valid_0's huber: 2.57437	

MultiObjectiveTrial is experimental (supported from v1.4.0). The interface can change in the future.
MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.
[I 2020-08-24 19:41:25,915] Trial 230 finished with values: [0.8556623741776791, 4.020674628663035, 0.45315184971811967] with parameters: {'boosting_type': 'gbdt', 'lambda_l1': 1.8428795592857916, 'lambda_l2': 0.544763082731981, 'max_leaves': 20, 'max_depth': 6, 'feature_fraction': 0.5927592221018452, 'bagging_fraction': 0.8665388049683505, 'learning_rate': 0.06639639992502677, 'min_data_in_leaf': 5, 'drop_rate': 0.2800148288163965}.
MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.
FrozenMultiObjectiveTrial is experimental (supported from v1.4.0). The interface can change in the future.
MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.
FrozenMultiObjectiveTrial is experimental (supporte

0
===== fitting models with seed 0 =====
SEED: 0 | FOLD: 0 | VALIDATION_SPECIES: ['Gorilla_gorilla', 'Pan_paniscus']


Early stopping is not available in dart mode


[250]	valid_0's huber: 9.19081	valid_0's l2: 329.749	valid_0's l1: 10.5829
SEED: 0 | FOLD: 1 | VALIDATION_SPECIES: ['Heterocephalus_glaber', 'Capra_hircus']
[250]	valid_0's huber: 4.25606	valid_0's l2: 150.609	valid_0's l1: 5.07675
SEED: 0 | FOLD: 2 | VALIDATION_SPECIES: ['Bos_taurus', 'Otolemur_garnettii']
[250]	valid_0's huber: 3.68291	valid_0's l2: 82.1927	valid_0's l1: 4.43551
SEED: 0 | FOLD: 3 | VALIDATION_SPECIES: ['Macaca_nemestrina', 'Microcebus_murinus']
[250]	valid_0's huber: 2.71331	valid_0's l2: 90.2847	valid_0's l1: 3.3356
1
===== fitting models with seed 1 =====
SEED: 1 | FOLD: 0 | VALIDATION_SPECIES: ['Felis_catus', 'Ursus_americanus']


Early stopping is not available in dart mode


[250]	valid_0's huber: 3.59598	valid_0's l2: 55.2271	valid_0's l1: 4.36067
SEED: 1 | FOLD: 1 | VALIDATION_SPECIES: ['Heterocephalus_glaber', 'Vombatus_ursinus']
[250]	valid_0's huber: 4.56768	valid_0's l2: 128.712	valid_0's l1: 5.43811
SEED: 1 | FOLD: 2 | VALIDATION_SPECIES: ['Macaca_fascicularis', 'Rhinopithecus_bieti']
[250]	valid_0's huber: 2.87543	valid_0's l2: 41.1693	valid_0's l1: 3.57308
SEED: 1 | FOLD: 3 | VALIDATION_SPECIES: ['Gorilla_gorilla', 'Ovis_aries']
[250]	valid_0's huber: 6.53972	valid_0's l2: 233.881	valid_0's l1: 7.63452
2
===== fitting models with seed 2 =====
SEED: 2 | FOLD: 0 | VALIDATION_SPECIES: ['Rhinopithecus_bieti', 'Ursus_americanus']


Early stopping is not available in dart mode


[250]	valid_0's huber: 5.53621	valid_0's l2: 140.958	valid_0's l1: 6.54011
SEED: 2 | FOLD: 1 | VALIDATION_SPECIES: ['Aotus_nancymaae', 'Phascolarctos_cinereus']
[250]	valid_0's huber: 6.35303	valid_0's l2: 161.889	valid_0's l1: 7.46549
SEED: 2 | FOLD: 2 | VALIDATION_SPECIES: ['Vombatus_ursinus', 'Ailuropoda_melanoleuca']
[250]	valid_0's huber: 5.50833	valid_0's l2: 116.372	valid_0's l1: 6.51042
SEED: 2 | FOLD: 3 | VALIDATION_SPECIES: ['Rattus_norvegicus', 'Monodelphis_domestica']
[250]	valid_0's huber: 6.51524	valid_0's l2: 127.459	valid_0's l1: 7.6606
3
===== fitting models with seed 3 =====
SEED: 3 | FOLD: 0 | VALIDATION_SPECIES: ['Microcebus_murinus', 'Pan_paniscus']


Early stopping is not available in dart mode


[250]	valid_0's huber: 3.72369	valid_0's l2: 51.8951	valid_0's l1: 4.52993
SEED: 3 | FOLD: 1 | VALIDATION_SPECIES: ['Ursus_americanus', 'Gorilla_gorilla']
[250]	valid_0's huber: 7.55543	valid_0's l2: 209.059	valid_0's l1: 8.77499
SEED: 3 | FOLD: 2 | VALIDATION_SPECIES: ['Bos_taurus', 'Capra_hircus']
[250]	valid_0's huber: 4.19759	valid_0's l2: 82.4252	valid_0's l1: 5.03271
SEED: 3 | FOLD: 3 | VALIDATION_SPECIES: ['Rhinolophus_ferrumequinum', 'Mus_musculus']
[250]	valid_0's huber: 2.58839	valid_0's l2: 54.3826	valid_0's l1: 3.25999
4
===== fitting models with seed 4 =====
SEED: 4 | FOLD: 0 | VALIDATION_SPECIES: ['Macaca_fascicularis', 'Pan_troglodytes']


Early stopping is not available in dart mode


[250]	valid_0's huber: 3.18272	valid_0's l2: 42.223	valid_0's l1: 3.8998
SEED: 4 | FOLD: 1 | VALIDATION_SPECIES: ['Otolemur_garnettii', 'Ursus_americanus']
[250]	valid_0's huber: 3.65201	valid_0's l2: 116.673	valid_0's l1: 4.39653
SEED: 4 | FOLD: 2 | VALIDATION_SPECIES: ['Heterocephalus_glaber', 'Cavia_aperea']
[250]	valid_0's huber: 4.14974	valid_0's l2: 63.2349	valid_0's l1: 4.9613
SEED: 4 | FOLD: 3 | VALIDATION_SPECIES: ['Pan_paniscus', 'Gorilla_gorilla']
[250]	valid_0's huber: 6.58647	valid_0's l2: 206.039	valid_0's l1: 7.67431


MultiObjectiveTrial is experimental (supported from v1.4.0). The interface can change in the future.
MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.
[I 2020-08-24 19:50:07,643] Trial 231 finished with values: [0.8286028638861194, 4.848537870630264, 0.4859936792387655] with parameters: {'boosting_type': 'dart', 'lambda_l1': 2.7650012000614503, 'lambda_l2': 2.9041579926783463, 'max_leaves': 15, 'max_depth': 6, 'feature_fraction': 0.5927592221018452, 'bagging_fraction': 0.9223944053685549, 'learning_rate': 0.06639639992502677, 'min_data_in_leaf': 8, 'drop_rate': 0.11114772490334401}.
MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.
FrozenMultiObjectiveTrial is experimental (supported from v1.4.0). The interface can change in the future.
MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.
FrozenMultiObjectiveTrial is experimental (support

0
===== fitting models with seed 0 =====
SEED: 0 | FOLD: 0 | VALIDATION_SPECIES: ['Gorilla_gorilla', 'Pan_paniscus']
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[206]	valid_0's huber: 7.99098	valid_0's l2: 223.35	valid_0's l1: 9.26438
SEED: 0 | FOLD: 1 | VALIDATION_SPECIES: ['Heterocephalus_glaber', 'Capra_hircus']
Training until validation scores don't improve for 10 rounds
[250]	valid_0's huber: 3.30954	valid_0's l2: 64.6582	valid_0's l1: 4.0679
Did not meet early stopping. Best iteration is:
[248]	valid_0's huber: 3.30922	valid_0's l2: 64.6547	valid_0's l1: 4.06778
SEED: 0 | FOLD: 2 | VALIDATION_SPECIES: ['Bos_taurus', 'Otolemur_garnettii']
Training until validation scores don't improve for 10 rounds
[250]	valid_0's huber: 3.01253	valid_0's l2: 44.9834	valid_0's l1: 3.69861
Did not meet early stopping. Best iteration is:
[249]	valid_0's huber: 3.01243	valid_0's l2: 44.9788	valid_0's l1: 3.69856
SEED: 0 | FOLD: 3 | VALIDATION_SPECIE

MultiObjectiveTrial is experimental (supported from v1.4.0). The interface can change in the future.
MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.
[I 2020-08-24 19:54:26,629] Trial 232 finished with values: [0.8843570299598273, 3.94819442809667, 0.49814562457895273] with parameters: {'boosting_type': 'gbdt', 'lambda_l1': 2.368954553613788, 'lambda_l2': 1.3848191603958375, 'max_leaves': 24, 'max_depth': 3, 'feature_fraction': 0.30501714460224083, 'bagging_fraction': 0.7123985676934603, 'learning_rate': 0.05298955871762984, 'min_data_in_leaf': 8, 'drop_rate': 0.26779555521427745}.
MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.
FrozenMultiObjectiveTrial is experimental (supported from v1.4.0). The interface can change in the future.
MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.
FrozenMultiObjectiveTrial is experimental (support

0
===== fitting models with seed 0 =====
SEED: 0 | FOLD: 0 | VALIDATION_SPECIES: ['Gorilla_gorilla', 'Pan_paniscus']
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[136]	valid_0's huber: 8.9145	valid_0's l2: 310.005	valid_0's l1: 10.2898
SEED: 0 | FOLD: 1 | VALIDATION_SPECIES: ['Heterocephalus_glaber', 'Capra_hircus']
Training until validation scores don't improve for 10 rounds
[250]	valid_0's huber: 3.28542	valid_0's l2: 55.0098	valid_0's l1: 4.00778
Did not meet early stopping. Best iteration is:
[245]	valid_0's huber: 3.2848	valid_0's l2: 54.9806	valid_0's l1: 4.00726
SEED: 0 | FOLD: 2 | VALIDATION_SPECIES: ['Bos_taurus', 'Otolemur_garnettii']
Training until validation scores don't improve for 10 rounds
[250]	valid_0's huber: 3.00966	valid_0's l2: 43.3157	valid_0's l1: 3.70238
Did not meet early stopping. Best iteration is:
[245]	valid_0's huber: 3.00915	valid_0's l2: 43.3089	valid_0's l1: 3.70182
SEED: 0 | FOLD: 3 | VALIDATION_SPECIE

MultiObjectiveTrial is experimental (supported from v1.4.0). The interface can change in the future.
MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.
[I 2020-08-24 19:57:58,146] Trial 233 finished with values: [0.8721148778013909, 4.087096954346963, 0.4976962710695245] with parameters: {'boosting_type': 'gbdt', 'lambda_l1': 0.598678382495219, 'lambda_l2': 1.492191937924149, 'max_leaves': 17, 'max_depth': 3, 'feature_fraction': 0.678900517497091, 'bagging_fraction': 0.8025525925083434, 'learning_rate': 0.0640487899213809, 'min_data_in_leaf': 6, 'drop_rate': 0.1437718204212639}.
MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.
FrozenMultiObjectiveTrial is experimental (supported from v1.4.0). The interface can change in the future.
MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.
FrozenMultiObjectiveTrial is experimental (supported fr

0
===== fitting models with seed 0 =====
SEED: 0 | FOLD: 0 | VALIDATION_SPECIES: ['Gorilla_gorilla', 'Pan_paniscus']
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[147]	valid_0's huber: 8.86212	valid_0's l2: 307.009	valid_0's l1: 10.2419
SEED: 0 | FOLD: 1 | VALIDATION_SPECIES: ['Heterocephalus_glaber', 'Capra_hircus']
Training until validation scores don't improve for 10 rounds
[250]	valid_0's huber: 3.62523	valid_0's l2: 67.5713	valid_0's l1: 4.39899
Did not meet early stopping. Best iteration is:
[250]	valid_0's huber: 3.62523	valid_0's l2: 67.5713	valid_0's l1: 4.39899
SEED: 0 | FOLD: 2 | VALIDATION_SPECIES: ['Bos_taurus', 'Otolemur_garnettii']
Training until validation scores don't improve for 10 rounds
[250]	valid_0's huber: 3.09036	valid_0's l2: 45.5494	valid_0's l1: 3.79115
Did not meet early stopping. Best iteration is:
[250]	valid_0's huber: 3.09036	valid_0's l2: 45.5494	valid_0's l1: 3.79115
SEED: 0 | FOLD: 3 | VALIDATION_SPEC

MultiObjectiveTrial is experimental (supported from v1.4.0). The interface can change in the future.
MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.
[I 2020-08-24 20:01:54,450] Trial 234 finished with values: [0.8799747707089589, 4.009104624678772, 0.49571000065604176] with parameters: {'boosting_type': 'gbdt', 'lambda_l1': 1.3372687152071137, 'lambda_l2': 2.7636630410837917, 'max_leaves': 23, 'max_depth': 3, 'feature_fraction': 0.5809187061009446, 'bagging_fraction': 0.5665557843020324, 'learning_rate': 0.05298955871762984, 'min_data_in_leaf': 8, 'drop_rate': 0.2758291874738865}.
MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.
FrozenMultiObjectiveTrial is experimental (supported from v1.4.0). The interface can change in the future.
MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.
FrozenMultiObjectiveTrial is experimental (support

0
===== fitting models with seed 0 =====
SEED: 0 | FOLD: 0 | VALIDATION_SPECIES: ['Gorilla_gorilla', 'Pan_paniscus']
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[121]	valid_0's huber: 9.47398	valid_0's l2: 355.525	valid_0's l1: 10.9245
SEED: 0 | FOLD: 1 | VALIDATION_SPECIES: ['Heterocephalus_glaber', 'Capra_hircus']
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[150]	valid_0's huber: 3.55081	valid_0's l2: 65.9154	valid_0's l1: 4.32935
SEED: 0 | FOLD: 2 | VALIDATION_SPECIES: ['Bos_taurus', 'Otolemur_garnettii']
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[141]	valid_0's huber: 3.5659	valid_0's l2: 62.1976	valid_0's l1: 4.32853
SEED: 0 | FOLD: 3 | VALIDATION_SPECIES: ['Macaca_nemestrina', 'Microcebus_murinus']
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[152]	valid_0's huber: 2.3601	va

MultiObjectiveTrial is experimental (supported from v1.4.0). The interface can change in the future.
MultiObjectiveStudy is experimental (supported from v1.4.0). The interface can change in the future.
[I 2020-08-24 20:05:51,339] Trial 235 finished with values: [0.8432968935936161, 4.233353706353105, 0.5263257036244215] with parameters: {'boosting_type': 'gbdt', 'lambda_l1': 2.368954553613788, 'lambda_l2': 1.492191937924149, 'max_leaves': 23, 'max_depth': 3, 'feature_fraction': 0.9825126690946149, 'bagging_fraction': 0.42611385339632235, 'learning_rate': 0.07150591722292106, 'min_data_in_leaf': 3, 'drop_rate': 0.2758291874738865}.
FrozenMultiObjectiveTrial is experimental (supported from v1.4.0). The interface can change in the future.


MultiObjectiveResults(best_trials=[<optuna.multi_objective.trial.FrozenMultiObjectiveTrial object at 0x7f167349ea90>, <optuna.multi_objective.trial.FrozenMultiObjectiveTrial object at 0x7f167347b2b0>, <optuna.multi_objective.trial.FrozenMultiObjectiveTrial object at 0x7f167347b4f0>, <optuna.multi_objective.trial.FrozenMultiObjectiveTrial object at 0x7f1673471370>, <optuna.multi_objective.trial.FrozenMultiObjectiveTrial object at 0x7f1673471610>, <optuna.multi_objective.trial.FrozenMultiObjectiveTrial object at 0x7f16734716d0>, <optuna.multi_objective.trial.FrozenMultiObjectiveTrial object at 0x7f1673471730>, <optuna.multi_objective.trial.FrozenMultiObjectiveTrial object at 0x7f1673471790>], all_trials=[<optuna.multi_objective.trial.FrozenMultiObjectiveTrial object at 0x7f1673429160>, <optuna.multi_objective.trial.FrozenMultiObjectiveTrial object at 0x7f1670a6b040>, <optuna.multi_objective.trial.FrozenMultiObjectiveTrial object at 0x7f16734fc550>, <optuna.multi_objective.trial.FrozenMul

In [33]:
best = results.best_trials

In [72]:
for i, t in enumerate(best):
    trait_path = locations.metrics.optimization / trait
    if not trait_path.exists():
        trait_path.mkdir()
    path = trait_path / f"{str(i)}.json"    
    print(f"writing parameters to {path}")
    with open(path, 'w') as f:
        params = t.params
        values = t.values
        to_write = {"number": t.number,"params": params, "metrics": {"R2":values[0], "huber": values[1], "kendall_tau": values[2]}}
        json.dump(to_write, f, sort_keys=True, indent=4)

writing parameters to ../data/metrics/optimization/lifespan/0.json
writing parameters to ../data/metrics/optimization/lifespan/1.json
writing parameters to ../data/metrics/optimization/lifespan/2.json
