# SHAP feature selection #
## Code to select feature with combination of LightGBM and SHAP ##

## Parameters cell ##

Parameters are overiddent by papermill when run inside DVC stages



In [1]:
n_folds = 5 # this sets global setting of which how many bootstraps to use
n_hold_out = 1
repeats = 5
importance_type = "split"

life_history = ["lifespan", "mass_kg", "mtGC", "metabolic_rate", "temperature", "gestation_days"]
debug_local = True #to use local version

In [4]:
from pathlib import Path
import sys
import inspect

#lgb_params["importance_type"] = importance_type

local = (Path("..") / "yspecies").resolve()
if debug_local and local.exists():
  sys.path.insert(0, Path("..").as_posix())
  #sys.path.insert(0, local.as_posix())
  print("extending pathes with local yspecies")
  print(sys.path)
  %load_ext autoreload
  %autoreload 2

extending pathes with local yspecies
['..', '/data/sources/yspecies/notebooks', '/opt/miniconda3/envs/yspecies/lib/python38.zip', '/opt/miniconda3/envs/yspecies/lib/python3.8', '/opt/miniconda3/envs/yspecies/lib/python3.8/lib-dynload', '', '/opt/miniconda3/envs/yspecies/lib/python3.8/site-packages', '/opt/miniconda3/envs/yspecies/lib/python3.8/site-packages/IPython/extensions', '/home/antonkulaga/.ipython']


In [5]:
from dataclasses import dataclass, replace
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
import seaborn as sns
import matplotlib.pyplot as plt

In [6]:
from typing import *
from yspecies.dataset import *
from yspecies.utils import *
from yspecies.workflow import TupleWith, Repeat, Collect
from yspecies.config import *
from yspecies.preprocess import FeatureSelection, DataExtractor
from yspecies.partition import DataPartitioner, PartitionParameters
from yspecies.models import Metrics
from yspecies.selection import ShapSelector
from yspecies.results import FeatureSummary

In [26]:
import optuna
from optuna import Study, Trial
from optuna import multi_objective
from optuna.multi_objective import trial
from optuna.multi_objective.study import MultiObjectiveStudy
from yspecies.tuning import MultiObjectiveResults

In [7]:
#settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.float_format', lambda x: '%.3f' % x)
import pprint
pp = pprint.PrettyPrinter(indent=4)

#charts settings
import matplotlib.pyplot as plt
from IPython.display import set_matplotlib_formats
%matplotlib auto
plt.ioff()
set_matplotlib_formats('svg')

Using matplotlib backend: TkAgg


### Loading data ###
Let's load data from species/genes/expressions selected by select_samples.py notebook

In [8]:
from pathlib import Path
locations: Locations = Locations("./") if Path("./data").exists() else Locations("../")

## Setting up Features to select ##

In [9]:
default_selection = FeatureSelection(
    samples = ["tissue","species"], #samples metadata to include
    species =  [], #species metadata other then Y label to include
    exclude_from_training = ["species"],  #exclude some fields from LightGBM training
    to_predict = "lifespan", #column to predict
    categorical = ["tissue"],
    select_by = "shap",
    importance_type =  importance_type,
    feature_perturbation = "tree_path_dependent"
)
default_selection

Samples metadata,Species metadata,Genes,Predict label,not_validated species
"['tissue', 'species']",[],all,lifespan,[]


In [10]:
loader = DataLoader(locations, default_selection)
selections = loader.load_life_history()
selections["lifespan"][0]

expressions,genes,species,samples,Genes Metadata,Species Metadata
"(408, 12323)","(12323, 37)",38,408,"(12323, 2)","(38, 18)"


### Loading Hyperoptimization studies ###

In [15]:
def load_study(trait: str):
    url = f'sqlite:///' +str((locations.interim.optimization / (trait+".sqlite")).absolute())
    print('loading (if exists) study from '+url)
    storage = optuna.storages.RDBStorage(
        url=url
        #engine_kwargs={'check_same_thread': False}
    )
    return optuna.multi_objective.study.create_study(directions=['maximize','minimize','maximize'], storage = storage, study_name = f"{trait}_r2_huber_kendall", load_if_exists = True)

## Setting up SHAP selection pipeline ##

### Deciding on selection parameters (which fields to include, exclude, predict)  ###

In [17]:
partition_params = PartitionParameters(n_folds, n_hold_out, 2,   42)


In [38]:
def make_pipeline(trait: str):
    study = load_study(trait)
    if len(study.get_pareto_front_trials())>0 :
        metrics, params = MultiObjectiveResults.from_study(study).best_metrics_params_r2()
    else:
        params =  lgb_params = {"bagging_fraction": 0.9522534844058304,
                  "boosting_type": "dart",
                  "objective": "regression",
                  "feature_fraction": 0.42236910941558053,
                  "lambda_l1": 0.020847266580277746,
                  "lambda_l2": 2.8448564854773326,
                  "learning_rate": 0.11484015430016059,
                  "max_depth": 3,
                  "max_leaves": 35,
                  "min_data_in_leaf": 9,
                  "num_iterations": 250,
                  "metrics": ["l1", "l2", "huber"]
                 }
    partition_shap_pipe = Pipeline([
    ("partitioner", DataPartitioner()),
    ('prepare_for_partitioning', TupleWith(params)),
    ("shap_computation", ShapSelector())
    ]
    )
    repeated_cv =  Repeat(partition_shap_pipe, repeats, lambda x,i: (x[0], replace(x[1], seed = i)))
    return Pipeline(
        [
        ('extractor', DataExtractor()),
        ('prepare_for_partitioning', TupleWith(partition_params)), # to extract the data required for ML from the dataset
        ("partition_shap", repeated_cv),
        ("summarize", Collect(fold=lambda results: FeatureSummary(results)))
        ]
        )

In [39]:
selections["lifespan"][1]

Samples metadata,Species metadata,Genes,Predict label,not_validated species
"['tissue', 'species']",[],all,lifespan,"['Mus_caroli', 'Homo_sapiens']"


In [40]:
pipelines = {trait:make_pipeline(trait) for trait in life_history}
pipelines.keys()

loading (if exists) study from sqlite:////data/sources/yspecies/notebooks/../data/interim/optimization/lifespan.sqlite


create_study is experimental (supported from v1.4.0). The interface can change in the future.
[I 2020-08-24 20:45:06,869] Using an existing study with name 'lifespan_r2_huber_kendall' instead of creating a new one.


loading (if exists) study from sqlite:////data/sources/yspecies/notebooks/../data/interim/optimization/mass_kg.sqlite


[I 2020-08-24 20:45:07,109] Using an existing study with name 'mass_kg_r2_huber_kendall' instead of creating a new one.


loading (if exists) study from sqlite:////data/sources/yspecies/notebooks/../data/interim/optimization/mtGC.sqlite


[I 2020-08-24 20:45:07,149] Using an existing study with name 'mtGC_r2_huber_kendall' instead of creating a new one.


loading (if exists) study from sqlite:////data/sources/yspecies/notebooks/../data/interim/optimization/metabolic_rate.sqlite


[I 2020-08-24 20:45:07,384] Using an existing study with name 'metabolic_rate_r2_huber_kendall' instead of creating a new one.


loading (if exists) study from sqlite:////data/sources/yspecies/notebooks/../data/interim/optimization/temperature.sqlite


[I 2020-08-24 20:45:07,416] Using an existing study with name 'temperature_r2_huber_kendall' instead of creating a new one.


loading (if exists) study from sqlite:////data/sources/yspecies/notebooks/../data/interim/optimization/gestation_days.sqlite


[I 2020-08-24 20:45:07,438] Using an existing study with name 'gestation_days_r2_huber_kendall' instead of creating a new one.


dict_keys(['lifespan', 'mass_kg', 'mtGC', 'metabolic_rate', 'temperature', 'gestation_days'])

# First stage selection (shap ) #

### Lifespan ###

In [52]:
def make_run(trait: str):
    return pipelines[trait].fit_transform(selections[trait])

In [54]:
stage_one_lifespan = make_run("lifespan")
stage_one_lifespan.selected

===== fitting models with seed 0 =====
SEED: 0 | FOLD: 0 | VALIDATION_SPECIES: ['Gorilla_gorilla', 'Pan_paniscus']
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[158]	valid_0's l1: 8.56432	valid_0's l2: 189.457	valid_0's huber: 7.35476
SEED: 0 | FOLD: 1 | VALIDATION_SPECIES: ['Heterocephalus_glaber', 'Capra_hircus']
Training until validation scores don't improve for 10 rounds
[250]	valid_0's l1: 3.93228	valid_0's l2: 62.8388	valid_0's huber: 3.21679
Did not meet early stopping. Best iteration is:
[250]	valid_0's l1: 3.93228	valid_0's l2: 62.8388	valid_0's huber: 3.21679
SEED: 0 | FOLD: 2 | VALIDATION_SPECIES: ['Bos_taurus', 'Otolemur_garnettii']
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[133]	valid_0's l1: 3.6893	valid_0's l2: 41.9248	valid_0's huber: 2.9942
SEED: 0 | FOLD: 3 | VALIDATION_SPECIES: ['Macaca_nemestrina', 'Microcebus_murinus']
Training until validation scores don't impr

Unnamed: 0,symbol,repeats,mean_shap,mean_kendall_tau,shap_0,kendall_tau_0,shap_1,kendall_tau_1,shap_2,kendall_tau_2,shap_3,kendall_tau_3,shap_4,kendall_tau_4
ENSG00000204498,NFKBIL1,5.0,7.6,-0.106,7.0,-0.156,7.75,-0.036,8.0,-0.124,7.5,-0.072,7.75,-0.14
ENSG00000010219,DYRK4,5.0,7.55,0.277,8.0,0.306,8.75,0.283,7.5,0.262,8.5,0.272,5.0,0.261
ENSG00000185880,TRIM69,5.0,4.65,0.145,4.25,0.102,4.0,0.203,4.0,0.136,3.5,0.09,7.5,0.193
ENSG00000105672,ETV2,5.0,3.55,0.728,2.75,0.796,3.0,0.723,2.75,0.734,4.25,0.68,5.0,0.708
ENSG00000167515,TRAPPC2L,4.0,5.75,-0.21,,,4.75,-0.297,4.25,-0.18,7.25,-0.18,6.75,-0.183
ENSG00000132436,FIGNL1,4.0,3.938,0.506,4.0,0.468,,,3.25,0.521,3.75,0.525,4.75,0.512
ENSG00000165501,LRR1,4.0,2.125,0.694,2.25,0.669,,,2.0,0.742,1.5,0.652,2.75,0.712
ENSG00000188747,NOXA1,4.0,2.062,0.696,1.25,0.679,,,2.25,0.706,2.25,0.684,2.5,0.716
ENSG00000066923,STAG3,3.0,6.333,0.449,,,5.0,0.38,8.5,0.453,,,5.5,0.514
ENSG00000171121,KCNMB3,3.0,4.417,0.668,5.0,0.704,4.0,0.654,,,4.25,0.646,,


In [None]:
stage_one_lifespan.metrics

### Mass_kg ###

In [56]:
stage_one_mass = make_run("mass_kg")
stage_one_mass.selected

===== fitting models with seed 0 =====
SEED: 0 | FOLD: 0 | VALIDATION_SPECIES: ['Monodelphis_domestica', 'Aotus_nancymaae']


Early stopping is not available in dart mode


[250]	valid_0's l1: 14.6233	valid_0's l2: 531.809	valid_0's huber: 12.8062
SEED: 0 | FOLD: 1 | VALIDATION_SPECIES: ['Sus_scrofa', 'Oryctolagus_cuniculus']
[250]	valid_0's l1: 31.9305	valid_0's l2: 2492.04	valid_0's huber: 28.3848
SEED: 0 | FOLD: 2 | VALIDATION_SPECIES: ['Mesocricetus_auratus', 'Cavia_porcellus']
[250]	valid_0's l1: 6.99638	valid_0's l2: 178.413	valid_0's huber: 5.95567
SEED: 0 | FOLD: 3 | VALIDATION_SPECIES: ['Ictidomys_tridecemlineatus', 'Phascolarctos_cinereus']
[250]	valid_0's l1: 8.39636	valid_0's l2: 201.975	valid_0's huber: 7.23164
===== fitting models with seed 1 =====
SEED: 1 | FOLD: 0 | VALIDATION_SPECIES: ['Aotus_nancymaae', 'Mesocricetus_auratus']


Early stopping is not available in dart mode


[250]	valid_0's l1: 11.3117	valid_0's l2: 521.558	valid_0's huber: 9.85904
SEED: 1 | FOLD: 1 | VALIDATION_SPECIES: ['Monodelphis_domestica', 'Rattus_norvegicus']
[250]	valid_0's l1: 6.67361	valid_0's l2: 159.442	valid_0's huber: 5.61735
SEED: 1 | FOLD: 2 | VALIDATION_SPECIES: ['Ictidomys_tridecemlineatus', 'Sus_scrofa']
[250]	valid_0's l1: 14.1861	valid_0's l2: 479.213	valid_0's huber: 12.4246
SEED: 1 | FOLD: 3 | VALIDATION_SPECIES: ['Homo_sapiens', 'Cavia_porcellus']
[250]	valid_0's l1: 28.3574	valid_0's l2: 1465.94	valid_0's huber: 25.1512
===== fitting models with seed 2 =====
SEED: 2 | FOLD: 0 | VALIDATION_SPECIES: ['Rattus_norvegicus', 'Meriones_unguiculatus']


Early stopping is not available in dart mode


[250]	valid_0's l1: 4.2628	valid_0's l2: 147.876	valid_0's huber: 3.46674
SEED: 2 | FOLD: 1 | VALIDATION_SPECIES: ['Suricata_suricatta', 'Mesocricetus_auratus']
[250]	valid_0's l1: 7.41637	valid_0's l2: 309.584	valid_0's huber: 6.39649
SEED: 2 | FOLD: 2 | VALIDATION_SPECIES: ['Phascolarctos_cinereus', 'Cavia_porcellus']
[250]	valid_0's l1: 3.37577	valid_0's l2: 72.8074	valid_0's huber: 2.69391
SEED: 2 | FOLD: 3 | VALIDATION_SPECIES: ['Homo_sapiens', 'Aotus_nancymaae']
[250]	valid_0's l1: 27.6653	valid_0's l2: 1569.13	valid_0's huber: 24.5205
===== fitting models with seed 3 =====
SEED: 3 | FOLD: 0 | VALIDATION_SPECIES: ['Callithrix_jacchus', 'Rattus_norvegicus']


Early stopping is not available in dart mode


[250]	valid_0's l1: 14.4167	valid_0's l2: 541.186	valid_0's huber: 12.5954
SEED: 3 | FOLD: 1 | VALIDATION_SPECIES: ['Ictidomys_tridecemlineatus', 'Oryctolagus_cuniculus']
[250]	valid_0's l1: 27.7632	valid_0's l2: 2703.49	valid_0's huber: 24.6433
SEED: 3 | FOLD: 2 | VALIDATION_SPECIES: ['Sus_scrofa', 'Aotus_nancymaae']
[250]	valid_0's l1: 27.7252	valid_0's l2: 2864.96	valid_0's huber: 24.5984
SEED: 3 | FOLD: 3 | VALIDATION_SPECIES: ['Meriones_unguiculatus', 'Mesocricetus_auratus']
[250]	valid_0's l1: 11.3023	valid_0's l2: 228.898	valid_0's huber: 9.7895
===== fitting models with seed 4 =====
SEED: 4 | FOLD: 0 | VALIDATION_SPECIES: ['Callithrix_jacchus', 'Heterocephalus_glaber']


Early stopping is not available in dart mode


[250]	valid_0's l1: 19.704	valid_0's l2: 771.443	valid_0's huber: 17.3417
SEED: 4 | FOLD: 1 | VALIDATION_SPECIES: ['Cavia_porcellus', 'Aotus_nancymaae']
[250]	valid_0's l1: 20.6264	valid_0's l2: 747.225	valid_0's huber: 18.1848
SEED: 4 | FOLD: 2 | VALIDATION_SPECIES: ['Homo_sapiens', 'Rattus_norvegicus']
[250]	valid_0's l1: 17.4659	valid_0's l2: 917.915	valid_0's huber: 15.3391
SEED: 4 | FOLD: 3 | VALIDATION_SPECIES: ['Phascolarctos_cinereus', 'Ictidomys_tridecemlineatus']
[250]	valid_0's l1: 15.7312	valid_0's l2: 537.635	valid_0's huber: 13.8034


Unnamed: 0,symbol,repeats,mean_shap,mean_kendall_tau,shap_0,kendall_tau_0,shap_1,kendall_tau_1,shap_2,kendall_tau_2,shap_3,kendall_tau_3,shap_4,kendall_tau_4
ENSG00000054277,OPN3,4.0,59.0,0.077,22.75,-0.122,,,91.25,0.25,15.0,0.086,107.0,0.095
ENSG00000127952,STYXL1,3.0,24.417,0.355,34.0,0.241,,,8.5,0.474,30.75,0.351,,
ENSG00000270647,TAF15,2.0,20.375,0.054,,,,,18.75,0.006,,,22.0,0.102
ENSG00000107020,PLGRKT,2.0,17.75,0.128,17.0,0.1,18.5,0.155,,,,,,
ENSG00000010219,DYRK4,2.0,16.125,0.2,15.25,0.128,,,,,17.0,0.271,,
ENSG00000175806,MSRA,2.0,11.5,-0.039,10.75,-0.005,,,,,12.25,-0.074,,
ENSG00000168894,RNF181,2.0,1.625,0.171,1.25,0.012,,,,,2.0,0.329,,
ENSG00000164304,CAGE1,1.0,23.0,-0.2,23.0,-0.2,,,,,,,,
ENSG00000124172,ATP5F1E,1.0,18.5,-0.428,,,,,,,18.5,-0.428,,
ENSG00000066923,STAG3,1.0,15.0,0.071,15.0,0.071,,,,,,,,


### MtGC ###

In [57]:
stage_one_mtGC = make_run("mtGC")
stage_one_mtGC.selected

===== fitting models with seed 0 =====
SEED: 0 | FOLD: 0 | VALIDATION_SPECIES: ['Callithrix_jacchus', 'Sus_scrofa']
Training until validation scores don't improve for 10 rounds
[250]	valid_0's l1: 0.464279	valid_0's l2: 0.586362	valid_0's huber: 0.199885
Did not meet early stopping. Best iteration is:
[250]	valid_0's l1: 0.464279	valid_0's l2: 0.586362	valid_0's huber: 0.199885
SEED: 0 | FOLD: 1 | VALIDATION_SPECIES: ['Rattus_norvegicus', 'Gorilla_gorilla']
Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[108]	valid_0's l1: 0.805182	valid_0's l2: 1.03374	valid_0's huber: 0.423183
SEED: 0 | FOLD: 2 | VALIDATION_SPECIES: ['Macaca_mulatta', 'Equus_caballus']
Training until validation scores don't improve for 10 rounds
[250]	valid_0's l1: 0.595834	valid_0's l2: 1.0413	valid_0's huber: 0.337971
Did not meet early stopping. Best iteration is:
[250]	valid_0's l1: 0.595834	valid_0's l2: 1.0413	valid_0's huber: 0.337971
SEED: 0 | FOLD: 3 | VALIDAT

Unnamed: 0,symbol,repeats,mean_shap,mean_kendall_tau,shap_0,kendall_tau_0,shap_1,kendall_tau_1,shap_2,kendall_tau_2,shap_3,kendall_tau_3,shap_4,kendall_tau_4
ENSG00000171121,KCNMB3,5.0,37.4,0.646,38.25,0.639,42.75,0.621,39.5,0.633,25.25,0.669,41.25,0.668
ENSG00000164362,TERT,5.0,23.0,-0.465,27.0,-0.449,20.0,-0.434,31.5,-0.408,16.75,-0.402,19.75,-0.632
ENSG00000196419,XRCC6,5.0,19.8,0.618,15.25,0.654,20.5,0.646,22.5,0.691,18.75,0.471,22.0,0.629
ENSG00000122034,GTF3A,5.0,18.8,0.611,17.25,0.557,24.75,0.589,20.5,0.544,15.0,0.687,16.5,0.677
ENSG00000144451,SPAG16,5.0,13.25,0.413,10.25,0.445,17.75,0.389,14.0,0.45,8.5,0.411,15.75,0.368
ENSG00000122952,ZWINT,5.0,11.3,-0.676,12.25,-0.69,16.0,-0.686,4.5,-0.597,12.5,-0.684,11.25,-0.725
ENSG00000188747,NOXA1,5.0,11.15,0.641,14.5,0.668,13.75,0.57,13.0,0.646,6.25,0.636,8.25,0.684
ENSG00000023191,RNH1,5.0,7.3,-0.542,5.75,-0.62,8.75,-0.523,7.0,-0.482,6.75,-0.472,8.25,-0.611
ENSG00000089127,OAS1,5.0,5.1,-0.344,4.25,-0.412,7.0,-0.387,8.0,0.023,2.25,-0.548,4.0,-0.395
ENSG00000065548,ZC3H15,5.0,3.85,0.309,3.25,0.326,4.25,0.448,7.75,0.253,2.0,0.289,2.0,0.231


### Metabolism ###

In [None]:
#stage_one_metabolic_rate = make_run("metabolic_rate")
stage_one_metabolic_rate.selected

### Temperature ###

In [None]:
stage_one_temperature = make_run("temperature")
stage_one_temperature

[autoreload of yspecies.results failed: Traceback (most recent call last):
  File "/opt/miniconda3/envs/yspecies/lib/python3.8/site-packages/IPython/extensions/autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "/opt/miniconda3/envs/yspecies/lib/python3.8/site-packages/IPython/extensions/autoreload.py", line 410, in superreload
    update_generic(old_obj, new_obj)
  File "/opt/miniconda3/envs/yspecies/lib/python3.8/site-packages/IPython/extensions/autoreload.py", line 347, in update_generic
    update(a, b)
  File "/opt/miniconda3/envs/yspecies/lib/python3.8/site-packages/IPython/extensions/autoreload.py", line 317, in update_class
    update_instances(old, new)
  File "/opt/miniconda3/envs/yspecies/lib/python3.8/site-packages/IPython/extensions/autoreload.py", line 280, in update_instances
    ref.__class__ = new
  File "<string>", line 4, in __setattr__
dataclasses.FrozenInstanceError: cannot assign to field '__class__'
]


===== fitting models with seed 0 =====
SEED: 0 | FOLD: 0 | VALIDATION_SPECIES: ['Rattus_norvegicus', 'Suricata_suricatta']


Found `num_iterations` in params. Will use it instead of argument
Early stopping is not available in dart mode


[250]	valid_0's l1: 37.5433	valid_0's l2: 1409.91	valid_0's huber: 33.384
SEED: 0 | FOLD: 1 | VALIDATION_SPECIES: ['Gorilla_gorilla', 'Cavia_porcellus']
[250]	valid_0's l1: 32.4614	valid_0's l2: 1056.27	valid_0's huber: 28.8103
SEED: 0 | FOLD: 2 | VALIDATION_SPECIES: ['Tupaia_belangeri', 'Mesocricetus_auratus']
[250]	valid_0's l1: 34.0427	valid_0's l2: 1159.96	valid_0's huber: 30.2334
SEED: 0 | FOLD: 3 | VALIDATION_SPECIES: ['Bos_taurus', 'Ictidomys_tridecemlineatus']
[250]	valid_0's l1: 34.7152	valid_0's l2: 1206.03	valid_0's huber: 30.8387
===== fitting models with seed 1 =====
SEED: 1 | FOLD: 0 | VALIDATION_SPECIES: ['Equus_caballus', 'Macaca_mulatta']


Found `num_iterations` in params. Will use it instead of argument
Early stopping is not available in dart mode


[250]	valid_0's l1: 35.0993	valid_0's l2: 1232.82	valid_0's huber: 31.1843
SEED: 1 | FOLD: 1 | VALIDATION_SPECIES: ['Rattus_norvegicus', 'Felis_catus']
[250]	valid_0's l1: 41.2284	valid_0's l2: 1701.2	valid_0's huber: 36.7005
SEED: 1 | FOLD: 2 | VALIDATION_SPECIES: ['Bos_taurus', 'Suricata_suricatta']
[250]	valid_0's l1: 32.3489	valid_0's l2: 1048.16	valid_0's huber: 28.709
SEED: 1 | FOLD: 3 | VALIDATION_SPECIES: ['Cavia_porcellus', 'Callithrix_jacchus']
[250]	valid_0's l1: 37.2623	valid_0's l2: 1390.1	valid_0's huber: 33.1311
===== fitting models with seed 2 =====
SEED: 2 | FOLD: 0 | VALIDATION_SPECIES: ['Felis_catus', 'Suricata_suricatta']


Found `num_iterations` in params. Will use it instead of argument
Early stopping is not available in dart mode


[250]	valid_0's l1: 32.8705	valid_0's l2: 1081.04	valid_0's huber: 29.1784
SEED: 2 | FOLD: 1 | VALIDATION_SPECIES: ['Homo_sapiens', 'Monodelphis_domestica']
[250]	valid_0's l1: 36.4961	valid_0's l2: 1336.21	valid_0's huber: 32.4415
SEED: 2 | FOLD: 2 | VALIDATION_SPECIES: ['Macaca_mulatta', 'Phascolarctos_cinereus']
[250]	valid_0's l1: 37.2646	valid_0's l2: 1389.56	valid_0's huber: 33.1331
SEED: 2 | FOLD: 3 | VALIDATION_SPECIES: ['Mesocricetus_auratus', 'Sus_scrofa']
[250]	valid_0's l1: 33.3301	valid_0's l2: 1111.23	valid_0's huber: 29.5921
===== fitting models with seed 3 =====
SEED: 3 | FOLD: 0 | VALIDATION_SPECIES: ['Macaca_fascicularis', 'Tupaia_belangeri']


Found `num_iterations` in params. Will use it instead of argument
Early stopping is not available in dart mode


[250]	valid_0's l1: 33.7324	valid_0's l2: 1138.52	valid_0's huber: 29.9542
SEED: 3 | FOLD: 1 | VALIDATION_SPECIES: ['Felis_catus', 'Cavia_porcellus']
[250]	valid_0's l1: 37.1325	valid_0's l2: 1382.08	valid_0's huber: 33.0142
SEED: 3 | FOLD: 2 | VALIDATION_SPECIES: ['Gorilla_gorilla', 'Ictidomys_tridecemlineatus']
[250]	valid_0's l1: 37.3667	valid_0's l2: 1397.95	valid_0's huber: 33.225
SEED: 3 | FOLD: 3 | VALIDATION_SPECIES: ['Macaca_mulatta', 'Bos_taurus']
[250]	valid_0's l1: 35.5684	valid_0's l2: 1266.11	valid_0's huber: 31.6066


### Gestation ###

In [None]:
stage_one_gestation = make_run("gestation_days"])
stage_one_gestation.selected

## Intersect first stages ##

## Second stage selection ##