In [1]:
from src.utils import Phenotypes, Models, DataType, make_features
import pandas as pd
import numpy as np

DATATYPE = DataType.BOTH

# Load csv data into pandas dataframes
if DATATYPE == DataType.PREGNANCY:
    exposome = pd.read_csv("data/preprocessed/preg_exposome.csv")
elif DATATYPE == DataType.POSTNATAL:
    exposome = pd.read_csv("data/preprocessed/postnatal_exposome.csv")
else:
    exposome = pd.read_csv("data/preprocessed/exposome.csv")
covariates = pd.read_csv("data/preprocessed/covariates.csv")
phenotype = pd.read_csv("data/preprocessed/phenotype.csv")

In [2]:
import pandas as pd

def print_grid_search_results(gsearch):
    results_df = pd.DataFrame(gsearch.cv_results_)
    results_df = results_df.sort_values(by=['rank_test_score'])
    results_df = (
        results_df
        .set_index(results_df["params"].apply(
            lambda x: "_".join(str(val) for val in x.values()))
        )
        .rename_axis('kernel')
    )
    return results_df[['params', 'rank_test_score', 'mean_test_score', 'std_test_score']]
    

# IQ

In [3]:
TARGET = Phenotypes.IQ

# Make features and target dataframes
features = make_features(exposome, covariates, phenotype)
target = features[TARGET.value]
features = features.drop(phenotype.columns, axis = 1)
features_columns = features.columns

print('target_name: ', target.name)

joining dataframes on same ID
dataframe of:
  * 1301 rows total
  * 264 features total
target_name:  hs_correct_raven


### max_depth && min_child_weight tuning

In [4]:
from sklearn.model_selection import cross_val_score
from xgboost.sklearn import XGBRegressor
from sklearn.model_selection import GridSearchCV

params = {
    'learning_rate' : 0.02,
    'n_estimators' : 5000,
    'max_depth' : 5,
    'min_child_weight' : 1,
    'gamma' : 0,
    'subsample' : 0.8,
    'colsample_bytree' : 0.8,
    'objective' : 'reg:squarederror',
    'eval_metric' : 'mae',
    'booster' : 'gbtree',
    'use_label_encoder' : False,
    'seed' : 42
}

model = XGBRegressor
param_test1 = {
 'max_depth' : range(2,8,2),
 'min_child_weight' : range(1,6,2)
}

gsearch1 = GridSearchCV(estimator = model(**params), 
                        param_grid = param_test1,
                        scoring='neg_mean_absolute_error',
                        cv=5)
gsearch1.fit(features,target)

print_grid_search_results(gsearch1)

Unnamed: 0_level_0,params,rank_test_score,mean_test_score,std_test_score
kernel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
6_3,"{'max_depth': 6, 'min_child_weight': 3}",1,-3.331317,0.132865
6_5,"{'max_depth': 6, 'min_child_weight': 5}",2,-3.332615,0.112112
6_1,"{'max_depth': 6, 'min_child_weight': 1}",3,-3.347684,0.13232
4_3,"{'max_depth': 4, 'min_child_weight': 3}",4,-3.361757,0.135248
4_5,"{'max_depth': 4, 'min_child_weight': 5}",5,-3.363752,0.136027
4_1,"{'max_depth': 4, 'min_child_weight': 1}",6,-3.366762,0.114059
2_1,"{'max_depth': 2, 'min_child_weight': 1}",7,-3.46221,0.1451
2_3,"{'max_depth': 2, 'min_child_weight': 3}",8,-3.46363,0.154744
2_5,"{'max_depth': 2, 'min_child_weight': 5}",9,-3.473087,0.13567


In [5]:
# Affine search for max_depth and min_child_weight
param_test2 = {
 'max_depth':[5,6,7],
 'min_child_weight':[2,3,4]
}
gsearch2 = GridSearchCV(estimator = model(**params), 
                        param_grid = param_test2,
                        scoring='neg_mean_absolute_error',
                        cv=5)
gsearch2.fit(features,target)

print_grid_search_results(gsearch2)

Unnamed: 0_level_0,params,rank_test_score,mean_test_score,std_test_score
kernel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
6_2,"{'max_depth': 6, 'min_child_weight': 2}",1,-3.3255,0.1264
7_4,"{'max_depth': 7, 'min_child_weight': 4}",2,-3.327164,0.138783
5_4,"{'max_depth': 5, 'min_child_weight': 4}",3,-3.327466,0.131382
6_4,"{'max_depth': 6, 'min_child_weight': 4}",4,-3.329311,0.116435
5_3,"{'max_depth': 5, 'min_child_weight': 3}",5,-3.329651,0.138688
6_3,"{'max_depth': 6, 'min_child_weight': 3}",6,-3.331317,0.132865
7_3,"{'max_depth': 7, 'min_child_weight': 3}",7,-3.337929,0.124169
7_2,"{'max_depth': 7, 'min_child_weight': 2}",8,-3.345372,0.100113
5_2,"{'max_depth': 5, 'min_child_weight': 2}",9,-3.347093,0.137549


### gamma tuning

In [6]:
params['max_depth'] = 6
params['min_child_weight'] = 3
print(params)

param_test3 = {
 'gamma':[i/10.0 for i in range(0,5)]
}
gsearch3 = GridSearchCV(estimator = model(**params), 
                        param_grid = param_test3,
                        scoring='neg_mean_absolute_error',
                        cv=5)
gsearch3.fit(features,target)

print_grid_search_results(gsearch3)

{'learning_rate': 0.02, 'n_estimators': 5000, 'max_depth': 6, 'min_child_weight': 3, 'gamma': 0, 'subsample': 0.8, 'colsample_bytree': 0.8, 'objective': 'reg:squarederror', 'eval_metric': 'mae', 'booster': 'gbtree', 'use_label_encoder': False, 'seed': 42}


Unnamed: 0_level_0,params,rank_test_score,mean_test_score,std_test_score
kernel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,{'gamma': 0.0},1,-3.387739,0.102183
0.1,{'gamma': 0.1},2,-3.3983,0.10549
0.2,{'gamma': 0.2},3,-3.400207,0.111253
0.3,{'gamma': 0.3},4,-3.400934,0.104957
0.4,{'gamma': 0.4},5,-3.403719,0.093327


### subsample && colsample_bytree tuning

In [7]:
# Tune subcample and colsample_bytree
param_test4 = {
 'subsample':[i/10.0 for i in range(6,10)],
 'colsample_bytree':[i/10.0 for i in range(6,10)]
}
gsearch4 = GridSearchCV(estimator = model(**params), 
                        param_grid = param_test4,
                        scoring='neg_mean_absolute_error',
                        n_jobs=4,
                        cv=5)
gsearch4.fit(features,target)

print_grid_search_results(gsearch4)

Unnamed: 0_level_0,params,rank_test_score,mean_test_score,std_test_score
kernel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.9_0.6,"{'colsample_bytree': 0.9, 'subsample': 0.6}",1,-3.38046,0.147909
0.8_0.8,"{'colsample_bytree': 0.8, 'subsample': 0.8}",2,-3.387739,0.102183
0.8_0.6,"{'colsample_bytree': 0.8, 'subsample': 0.6}",3,-3.39017,0.126119
0.7_0.9,"{'colsample_bytree': 0.7, 'subsample': 0.9}",4,-3.392983,0.134766
0.8_0.9,"{'colsample_bytree': 0.8, 'subsample': 0.9}",5,-3.397351,0.153661
0.7_0.7,"{'colsample_bytree': 0.7, 'subsample': 0.7}",6,-3.400963,0.114202
0.7_0.8,"{'colsample_bytree': 0.7, 'subsample': 0.8}",7,-3.400985,0.128836
0.9_0.8,"{'colsample_bytree': 0.9, 'subsample': 0.8}",8,-3.401563,0.121045
0.9_0.9,"{'colsample_bytree': 0.9, 'subsample': 0.9}",9,-3.402918,0.145713
0.6_0.9,"{'colsample_bytree': 0.6, 'subsample': 0.9}",10,-3.403454,0.114878


In [8]:
import configparser

config = configparser.ConfigParser()
config.read('config/config_xgb.ini')

# config['DEFAULT'] = {
#     'learning_rate' : 0.01,
#     'n_estimators' : 5000,
#     'max_depth' : 5,
#     'min_child_weight' : 1,
#     'gamma' : 0,
#     'subsample' : 0.8,
#     'colsample_bytree' : 0.8,
#     'objective' : 'reg:squarederror',
#     'eval_metric' : 'mae',
#     'booster' : 'gbtree',
#     'use_label_encoder' : False,
#     'seed' : 42
# }

params['colsample_bytree'] = 0.9
params['subsample'] = 0.6
params['learning_rate'] = 0.02
params['n_estimators'] = 5000
print(params)
config['IQ'] = params

with open('config/config_xgb.ini', 'w') as configfile:
    config.write(configfile)

{'learning_rate': 0.01, 'n_estimators': 5000, 'max_depth': 6, 'min_child_weight': 3, 'gamma': 0, 'subsample': 0.6, 'colsample_bytree': 0.9, 'objective': 'reg:squarederror', 'eval_metric': 'mae', 'booster': 'gbtree', 'use_label_encoder': False, 'seed': 42}


# BODY MASS INDEX CATEGORICAL

In [11]:
TARGET = Phenotypes.BODY_MASS_INDEX_CATEGORICAL

import imblearn
covariates.drop('hs_c_weight_None', axis=1, inplace=True)
covariates.drop('hs_c_height_None', axis=1, inplace=True)

# Make features and target dataframes
features = make_features(exposome, covariates, phenotype)
target = features[TARGET.value]
features = features.drop(phenotype.columns, axis = 1)
features_columns = features.columns

# print(target.value_counts())
# # Oversampling
# oversampler = imblearn.over_sampling.RandomOverSampler(sampling_strategy={0:100, 1:904, 2:500, 3:500})
# features, target = oversampler.fit_resample(features, target)

print('target_name: ', target.name)

joining dataframes on same ID
dataframe of:
  * 1301 rows total
  * 240 features total
target_name:  hs_bmi_c_cat


## Grid search with oversampling

In [13]:
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score

def score_model(model, params, sampling_strategy, X_train, y_train):
    """
    Creates folds manually, and upsamples within each fold.
    Returns an array of validation (recall) scores
    """
    cv = KFold(n_splits=5)
    
    oversampler = imblearn.over_sampling.RandomOverSampler(sampling_strategy=sampling_strategy, random_state=42)
    scores = []

    for train_fold_index, val_fold_index in cv.split(X_train, y_train):
        # Get the training data
        X_train_fold, y_train_fold = X_train.iloc[train_fold_index], y_train[train_fold_index]
        # Get the validation data
        X_val_fold, y_val_fold = X_train.iloc[val_fold_index], y_train[val_fold_index]

        # Upsample only the data in the training section
        X_train_fold_upsample, y_train_fold_upsample = oversampler.fit_resample(X_train_fold, y_train_fold)
        # Fit the model on the upsampled training data
        model_obj = model(**params).fit(X_train_fold_upsample, y_train_fold_upsample)
        # Score the model on the (non-upsampled) validation data
        score = f1_score(y_val_fold, model_obj.predict(X_val_fold), average='weighted')
        scores.append(score)
    return np.mean(scores)

### max_depth && min_child_weight tuning (manually)

In [None]:
from xgboost.sklearn import XGBClassifier
param_test1 = {
 'max_depth' : range(2,8,2),
 'min_child_weight' : range(1,6,2)
}
sampling_strategy = {0:60, 2:500, 3:500}

params = {
    'learning_rate' : 0.1,
    'n_estimators' : 5000,
    'max_depth' : 5,
    'min_child_weight' : 1,
    'gamma' : 0,
    'subsample' : 0.8,
    'colsample_bytree' : 0.8,
    'objective' : 'multi:softmax',
    'num_class' : 4,
    'eval_metric' : 'mlogloss',
    'booster' : 'gbtree',
    'use_label_encoder' : False,
    'seed' : 42
}
model = XGBClassifier

scores = []
for max_depth in param_test1['max_depth']:
    for min_child_weight in param_test1['min_child_weight']:
        params['max_depth'] = max_depth
        params['min_child_weight'] = min_child_weight
        
        results = {
            'max_depth': max_depth,
            'min_child_weight': min_child_weight,
            'f1_score': score_model(model, params, sampling_strategy, features, target)
        }
        scores.append(results)

print(sorted(scores, key=lambda x: x['f1_score'], reverse=True))

In [20]:
param_test2 = {
 'max_depth' : [5, 6, 7],
 'min_child_weight' : [2, 3, 4]
}
scores = []
for max_depth in param_test2['max_depth']:
    for min_child_weight in param_test2['min_child_weight']:
        params['max_depth'] = max_depth
        params['min_child_weight'] = min_child_weight
        
        
        results = {
            'max_depth': max_depth,
            'min_child_weight': min_child_weight,
            'f1_score': score_model(model, params, sampling_strategy, features, target)
        }
        scores.append(results)

print(sorted(scores, key=lambda x: x['f1_score'], reverse=True))

[{'max_depth': 5, 'min_child_weight': 2, 'f1_score': 0.6536901576462584}, {'max_depth': 6, 'min_child_weight': 3, 'f1_score': 0.6531139626022864}, {'max_depth': 7, 'min_child_weight': 2, 'f1_score': 0.6515637237638001}, {'max_depth': 5, 'min_child_weight': 3, 'f1_score': 0.651523296143764}, {'max_depth': 5, 'min_child_weight': 4, 'f1_score': 0.6505128364727325}, {'max_depth': 7, 'min_child_weight': 4, 'f1_score': 0.6483417121995029}, {'max_depth': 6, 'min_child_weight': 2, 'f1_score': 0.6478537674760437}, {'max_depth': 7, 'min_child_weight': 3, 'f1_score': 0.6408746903877061}, {'max_depth': 6, 'min_child_weight': 4, 'f1_score': 0.6394282193256993}]


### Gamma tuning (manual)

In [21]:
params['max_depth'] = 5
params['min_child_weight'] = 2

param_test3 = {
 'gamma':[i/10.0 for i in range(0,5)]
}
scores = []
for gamma in param_test3['gamma']:
    params['gamma'] = gamma

    results = {
        'gamma': gamma,
        'f1_score': score_model(model, params, sampling_strategy, features, target)
    }
    scores.append(results)

print(sorted(scores, key=lambda x: x['f1_score'], reverse=True))

[{'gamma': 0.3, 'f1_score': 0.6540855177908281}, {'gamma': 0.0, 'f1_score': 0.6536901576462584}, {'gamma': 0.1, 'f1_score': 0.6494768699862129}, {'gamma': 0.2, 'f1_score': 0.6481735406464757}, {'gamma': 0.4, 'f1_score': 0.647754238935149}]


### subsample && colsample_bytree tuning

In [23]:
params['gamma'] = 0.3
param_test4 = {
 'subsample':[i/10.0 for i in range(6,10)],
 'colsample_bytree':[i/10.0 for i in range(6,10)]
}
scores = []
for subsample in param_test4['subsample']:
    for colsample_bytree in param_test4['colsample_bytree']:
        params['subsample'] = subsample
        params['colsample_bytree'] = colsample_bytree

        results = {
            'subsample': subsample,
            'colsample_bytree': colsample_bytree,
            'f1_score': score_model(model, params, sampling_strategy, features, target)
        }
        scores.append(results)

print(sorted(scores, key=lambda x: x['f1_score'], reverse=True))

[{'subsample': 0.7, 'colsample_bytree': 0.8, 'f1_score': 0.659449863133648}, {'subsample': 0.8, 'colsample_bytree': 0.9, 'f1_score': 0.6542531812768613}, {'subsample': 0.8, 'colsample_bytree': 0.8, 'f1_score': 0.6540855177908281}, {'subsample': 0.9, 'colsample_bytree': 0.9, 'f1_score': 0.6527785688978482}, {'subsample': 0.6, 'colsample_bytree': 0.7, 'f1_score': 0.6521874183631386}, {'subsample': 0.7, 'colsample_bytree': 0.6, 'f1_score': 0.6503107251029907}, {'subsample': 0.9, 'colsample_bytree': 0.7, 'f1_score': 0.6498355055138745}, {'subsample': 0.8, 'colsample_bytree': 0.7, 'f1_score': 0.6495632675173051}, {'subsample': 0.6, 'colsample_bytree': 0.8, 'f1_score': 0.6486509037915023}, {'subsample': 0.9, 'colsample_bytree': 0.8, 'f1_score': 0.6476966497684166}, {'subsample': 0.7, 'colsample_bytree': 0.7, 'f1_score': 0.6454112979930805}, {'subsample': 0.6, 'colsample_bytree': 0.6, 'f1_score': 0.6440807901727117}, {'subsample': 0.7, 'colsample_bytree': 0.9, 'f1_score': 0.6439319248377515},

In [28]:
import configparser

config = configparser.ConfigParser()
config.read('config/config_xgb.ini')
params['colsample_bytree'] = 0.8
params['subsample'] = 0.7
params['learning_rate'] = 0.01
params['n_estimators'] = 5000
print(params)
config['BODY_MASS_INDEX_CATEGORICAL'] = params

with open('config/config.ini', 'w') as configfile:
    config.write(configfile)

{'learning_rate': 0.01, 'n_estimators': 5000, 'max_depth': 5, 'min_child_weight': 2, 'gamma': 0.3, 'subsample': 0.7, 'colsample_bytree': 0.8, 'objective': 'multi:softmax', 'num_class': 4, 'eval_metric': 'mlogloss', 'booster': 'gbtree', 'use_label_encoder': False, 'seed': 42}


In [40]:
from imblearn.pipeline import Pipeline, make_pipeline
from sklearn.model_selection import cross_val_score

sampling_strategy = {0:100, 2:1.0, 3:500}
model = XGBClassifier(
            learning_rate = 0.1,
            n_estimators=140,
            max_depth=5,
            min_child_weight=1,
            gamma=0,
            subsample=0.8,
            colsample_bytree=0.8,
            objective= 'multi:softmax',
            num_class=4,
            eval_metric='mlogloss',
            booster='gbtree',
            use_label_encoder=False,
            seed=42)
sampler = imblearn.over_sampling.RandomOverSampler(
            sampling_strategy=sampling_strategy,
            random_state=42)

pipeline = make_pipeline(sampler, model)
print(pipeline)

param_test1 = {
 'xgbclassifier__max_depth' : range(2,8,2),
 'xgbclassifier__min_child_weight' : range(1,6,2)
}

# cross_val_score(imba_pipeline, features, target, scoring='f1_macro', cv=KFold(n_splits=5))

gsearch1 = GridSearchCV(estimator = imba_pipeline, 
                        param_grid = param_test1,
                        scoring='f1_macro',
                        cv=5)
gsearch1.fit(features,target)



results_df = pd.DataFrame(gsearch1.cv_results_)
results_df = results_df.sort_values(by=['rank_test_score'])
results_df = (
    results_df
    .set_index(results_df["params"].apply(
        lambda x: "_".join(str(val) for val in x.values()))
    )
    .rename_axis('kernel')
)
results_df[['params', 'rank_test_score', 'mean_test_score', 'std_test_score']]

Pipeline(steps=[('randomoversampler',
                 RandomOverSampler(random_state=42,
                                   sampling_strategy={0: 100, 2: 1.0, 3: 500})),
                ('xgbclassifier',
                 XGBClassifier(base_score=None, booster='gbtree',
                               colsample_bylevel=None, colsample_bynode=None,
                               colsample_bytree=0.8, eval_metric='mlogloss',
                               gamma=0, gpu_id=None, importance_type='gain',
                               interaction_constraints=None, learning_rate=0.1,
                               max_delta_step=None, max_depth=5,
                               min_child_weight=1, missing=nan,
                               monotone_constraints=None, n_estimators=140,
                               n_jobs=None, num_class=4, num_parallel_tree=None,
                               objective='multi:softmax', random_state=None,
                               reg_alpha=None, reg_lam

Unnamed: 0_level_0,params,rank_test_score,mean_test_score,std_test_score
kernel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2_1,"{'xgbclassifier__max_depth': 2, 'xgbclassifier...",1,0.364086,0.012538
2_3,"{'xgbclassifier__max_depth': 2, 'xgbclassifier...",2,0.360707,0.009574
2_5,"{'xgbclassifier__max_depth': 2, 'xgbclassifier...",3,0.354921,0.01598
4_5,"{'xgbclassifier__max_depth': 4, 'xgbclassifier...",4,0.353313,0.019679
6_5,"{'xgbclassifier__max_depth': 6, 'xgbclassifier...",5,0.345386,0.016329
4_3,"{'xgbclassifier__max_depth': 4, 'xgbclassifier...",6,0.339106,0.029578
6_3,"{'xgbclassifier__max_depth': 6, 'xgbclassifier...",7,0.338356,0.030574
4_1,"{'xgbclassifier__max_depth': 4, 'xgbclassifier...",8,0.334591,0.018251
6_1,"{'xgbclassifier__max_depth': 6, 'xgbclassifier...",9,0.304126,0.016835


In [30]:
from xgboost.sklearn import XGBClassifier
MODEL = Models.XGB
model = XGBClassifier(
    learning_rate =0.1,
    n_estimators=140,
    max_depth=5,
    min_child_weight=1,
    gamma=0,
    subsample=0.8,
    colsample_bytree=0.8,
    objective= 'multi:softmax',
    num_class=4,
    eval_metric='mlogloss',
    booster='gbtree',
    use_label_encoder=False,
    seed=42)

In [31]:
from sklearn.model_selection import GridSearchCV

param_test1 = {
 'max_depth':range(2,8,2),
 'min_child_weight':range(1,6,2)
}

features = features.astype('float64')
labels = target.astype('int64')

gsearch1 = GridSearchCV(estimator = model, 
                        param_grid = param_test1,
                        scoring='f1_macro',
                        n_jobs=4,
                        cv=5)
gsearch1.fit(features,target)


results_df = pd.DataFrame(gsearch1.cv_results_)
results_df = results_df.sort_values(by=['rank_test_score'])
results_df = (
    results_df
    .set_index(results_df["params"].apply(
        lambda x: "_".join(str(val) for val in x.values()))
    )
    .rename_axis('kernel')
)
results_df[['params', 'rank_test_score', 'mean_test_score', 'std_test_score']]

Unnamed: 0_level_0,params,rank_test_score,mean_test_score,std_test_score
kernel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2_5,"{'max_depth': 2, 'min_child_weight': 5}",1,0.323886,0.018515
2_3,"{'max_depth': 2, 'min_child_weight': 3}",2,0.323848,0.021614
2_1,"{'max_depth': 2, 'min_child_weight': 1}",3,0.321861,0.015985
4_1,"{'max_depth': 4, 'min_child_weight': 1}",4,0.32173,0.026458
6_3,"{'max_depth': 6, 'min_child_weight': 3}",5,0.318847,0.021348
6_5,"{'max_depth': 6, 'min_child_weight': 5}",6,0.312343,0.028071
4_3,"{'max_depth': 4, 'min_child_weight': 3}",7,0.312224,0.017032
4_5,"{'max_depth': 4, 'min_child_weight': 5}",8,0.311845,0.031798
6_1,"{'max_depth': 6, 'min_child_weight': 1}",9,0.293828,0.022942


In [37]:
# Affine gridsearch
from sklearn.model_selection import GridSearchCV

param_test2 = {
 'max_depth':[2,3,4],
 'min_child_weight':[2,3,4]
}
gsearch2 = GridSearchCV(estimator = model, 
                        param_grid = param_test2,
                        scoring='f1_macro',
                        n_jobs=4,
                        cv=5)
gsearch2.fit(features,target)

GridSearchCV(cv=5,
             estimator=XGBClassifier(base_score=None, booster='gbtree',
                                     colsample_bylevel=None,
                                     colsample_bynode=None,
                                     colsample_bytree=0.8,
                                     eval_metric='mlogloss', gamma=0,
                                     gpu_id=None, importance_type='gain',
                                     interaction_constraints=None,
                                     learning_rate=0.1, max_delta_step=None,
                                     max_depth=5, min_child_weight=1,
                                     missing=nan, monotone_constraints=None,
                                     n_estimators=1000, n_jobs=None,
                                     num_class=4, num_parallel_tree=None,
                                     objective='multi:softmax',
                                     random_state=None, reg_alpha=None,
               

In [38]:
# Print result
results_df = pd.DataFrame(gsearch2.cv_results_)
results_df = results_df.sort_values(by=['rank_test_score'])
results_df = (
    results_df
    .set_index(results_df["params"].apply(
        lambda x: "_".join(str(val) for val in x.values()))
    )
    .rename_axis('kernel')
)
results_df[
    ['params', 'rank_test_score', 'mean_test_score', 'std_test_score']
]

Unnamed: 0_level_0,params,rank_test_score,mean_test_score,std_test_score
kernel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
4_3,"{'max_depth': 4, 'min_child_weight': 3}",1,0.337916,0.026961
2_4,"{'max_depth': 2, 'min_child_weight': 4}",2,0.337199,0.030277
4_4,"{'max_depth': 4, 'min_child_weight': 4}",3,0.335101,0.023373
2_3,"{'max_depth': 2, 'min_child_weight': 3}",4,0.331326,0.031871
3_4,"{'max_depth': 3, 'min_child_weight': 4}",5,0.331025,0.031623
2_2,"{'max_depth': 2, 'min_child_weight': 2}",6,0.330188,0.028113
3_2,"{'max_depth': 3, 'min_child_weight': 2}",7,0.328367,0.035055
3_3,"{'max_depth': 3, 'min_child_weight': 3}",8,0.325762,0.024995
4_2,"{'max_depth': 4, 'min_child_weight': 2}",9,0.324106,0.034336


In [40]:
# Tune gamma
model = XGBClassifier(
    learning_rate =0.1,
    n_estimators=140,
    max_depth=4, # Set best hp
    min_child_weight=3, # Set best hp
    gamma=0,
    subsample=0.8,
    colsample_bytree=0.8,
    objective= 'multi:softmax',
    num_class=4,
    eval_metric='mlogloss',
    booster='gbtree',
    use_label_encoder=False,
    seed=42)
param_test3 = {
 'gamma':[i/10.0 for i in range(0,5)]
}
gsearch3 = GridSearchCV(estimator = model, 
                        param_grid = param_test3,
                        scoring='f1_macro',
                        n_jobs=4,
                        cv=5)
gsearch3.fit(features,target)

# Print result
results_df = pd.DataFrame(gsearch3.cv_results_)
results_df = results_df.sort_values(by=['rank_test_score'])
results_df = (
    results_df
    .set_index(results_df["params"].apply(
        lambda x: "_".join(str(val) for val in x.values()))
    )
    .rename_axis('kernel')
)
results_df[
    ['params', 'rank_test_score', 'mean_test_score', 'std_test_score']
]

Unnamed: 0_level_0,params,rank_test_score,mean_test_score,std_test_score
kernel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.4,{'gamma': 0.4},1,0.3338,0.025597
0.1,{'gamma': 0.1},2,0.327229,0.022278
0.2,{'gamma': 0.2},3,0.324016,0.024292
0.3,{'gamma': 0.3},4,0.319674,0.029838
0.0,{'gamma': 0.0},5,0.312224,0.017032


In [None]:
# Tune subcample and colsample_bytree
param_test4 = {
 'subsample':[i/10.0 for i in range(6,10)],
 'colsample_bytree':[i/10.0 for i in range(6,10)]
}
gsearch4 = GridSearchCV(estimator = model, 
                        param_grid = param_test4,
                        scoring='f1_macro',
                        n_jobs=4,
                        cv=5)
gsearch4.fit(features,target)

# Print result
results_df = pd.DataFrame(gsearch4.cv_results_)
results_df = results_df.sort_values(by=['rank_test_score'])
results_df = (
    results_df
    .set_index(results_df["params"].apply(
        lambda x: "_".join(str(val) for val in x.values()))
    )
    .rename_axis('kernel')
)
results_df[
    ['params', 'rank_test_score', 'mean_test_score', 'std_test_score']
]

# BODY MASS INDEX

In [3]:
TARGET = Phenotypes.BODY_MASS_INDEX

# Drop BODY MASS INDEX covariates
covariates.drop('hs_c_weight_None', axis=1, inplace=True)
covariates.drop('hs_c_height_None', axis=1, inplace=True)

# Make features and target dataframes
features = make_features(exposome, covariates, phenotype)
target = features[TARGET.value]
features = features.drop(phenotype.columns, axis = 1)
features_columns = features.columns

print('target_name: ', target.name)

joining dataframes on same ID
dataframe of:
  * 1301 rows total
  * 240 features total
target_name:  hs_zbmi_who


In [6]:
from sklearn.model_selection import cross_val_score
from xgboost.sklearn import XGBRegressor
from sklearn.model_selection import GridSearchCV

#0.78
params = {
    'learning_rate' : 0.01,
    'n_estimators' : 5000,
    'max_depth' : 5,
    'min_child_weight' : 1,
    'gamma' : 0,
    'subsample' : 0.8,
    'colsample_bytree' : 0.8,
    'objective' : 'reg:squarederror',
    'eval_metric' : 'mae',
    'booster' : 'gbtree',
    'use_label_encoder' : False,
    'seed' : 42
}

model = XGBRegressor
param_test1 = {
 'max_depth' : range(2,8,2),
 'min_child_weight' : range(1,6,2)
}

gsearch1 = GridSearchCV(estimator = model(**params), 
                        param_grid = param_test1,
                        scoring='neg_mean_absolute_error',
                        cv=5)
gsearch1.fit(features,target)

print_grid_search_results(gsearch1)

Unnamed: 0_level_0,params,rank_test_score,mean_test_score,std_test_score
kernel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
6_5,"{'max_depth': 6, 'min_child_weight': 5}",1,-0.782752,0.009873
6_3,"{'max_depth': 6, 'min_child_weight': 3}",2,-0.783251,0.00874
6_1,"{'max_depth': 6, 'min_child_weight': 1}",3,-0.784577,0.009249
4_1,"{'max_depth': 4, 'min_child_weight': 1}",4,-0.785654,0.008832
4_3,"{'max_depth': 4, 'min_child_weight': 3}",5,-0.785799,0.010633
4_5,"{'max_depth': 4, 'min_child_weight': 5}",6,-0.787598,0.009552
2_5,"{'max_depth': 2, 'min_child_weight': 5}",7,-0.802214,0.019681
2_3,"{'max_depth': 2, 'min_child_weight': 3}",8,-0.803243,0.019907
2_1,"{'max_depth': 2, 'min_child_weight': 1}",9,-0.803591,0.0183


In [7]:
# Affine search for max_depth and min_child_weight
param_test2 = {
 'max_depth':[5,6,7],
 'min_child_weight':[4,5,6]
}
gsearch2 = GridSearchCV(estimator = model(**params), 
                        param_grid = param_test2,
                        scoring='neg_mean_absolute_error',
                        cv=5)
gsearch2.fit(features,target)

print_grid_search_results(gsearch2)

Unnamed: 0_level_0,params,rank_test_score,mean_test_score,std_test_score
kernel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
7_6,"{'max_depth': 7, 'min_child_weight': 6}",1,-0.780011,0.014561
7_5,"{'max_depth': 7, 'min_child_weight': 5}",2,-0.781398,0.015524
6_6,"{'max_depth': 6, 'min_child_weight': 6}",3,-0.781663,0.011878
5_5,"{'max_depth': 5, 'min_child_weight': 5}",4,-0.782177,0.012779
5_6,"{'max_depth': 5, 'min_child_weight': 6}",5,-0.782195,0.010945
5_4,"{'max_depth': 5, 'min_child_weight': 4}",6,-0.782285,0.012882
6_5,"{'max_depth': 6, 'min_child_weight': 5}",7,-0.782752,0.009873
6_4,"{'max_depth': 6, 'min_child_weight': 4}",8,-0.782781,0.011767
7_4,"{'max_depth': 7, 'min_child_weight': 4}",9,-0.783217,0.011868


### Gamma tuning

In [8]:
params['max_depth'] = 7
params['min_child_weight'] = 6
print(params)

param_test3 = {
 'gamma':[i/10.0 for i in range(0,5)]
}
gsearch3 = GridSearchCV(estimator = model(**params), 
                        param_grid = param_test3,
                        scoring='neg_mean_absolute_error',
                        cv=5)
gsearch3.fit(features,target)

print_grid_search_results(gsearch3)

{'learning_rate': 0.01, 'n_estimators': 5000, 'max_depth': 7, 'min_child_weight': 6, 'gamma': 0, 'subsample': 0.8, 'colsample_bytree': 0.8, 'objective': 'reg:squarederror', 'eval_metric': 'mae', 'booster': 'gbtree', 'use_label_encoder': False, 'seed': 42}


Unnamed: 0_level_0,params,rank_test_score,mean_test_score,std_test_score
kernel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,{'gamma': 0.0},1,-0.780011,0.014561
0.2,{'gamma': 0.2},2,-0.781055,0.01469
0.1,{'gamma': 0.1},3,-0.781346,0.014774
0.4,{'gamma': 0.4},4,-0.78137,0.015571
0.3,{'gamma': 0.3},5,-0.7829,0.015462


### subsample && colsample_bytree tuning

In [9]:
# Tune subcample and colsample_bytree
param_test4 = {
 'subsample':[i/10.0 for i in range(6,10)],
 'colsample_bytree':[i/10.0 for i in range(6,10)]
}
gsearch4 = GridSearchCV(estimator = model(**params), 
                        param_grid = param_test4,
                        scoring='neg_mean_absolute_error',
                        n_jobs=4,
                        cv=5)
gsearch4.fit(features,target)

print_grid_search_results(gsearch4)

Unnamed: 0_level_0,params,rank_test_score,mean_test_score,std_test_score
kernel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.8_0.6,"{'colsample_bytree': 0.8, 'subsample': 0.6}",1,-0.777978,0.011379
0.7_0.6,"{'colsample_bytree': 0.7, 'subsample': 0.6}",2,-0.778329,0.015844
0.9_0.6,"{'colsample_bytree': 0.9, 'subsample': 0.6}",3,-0.778429,0.011992
0.8_0.7,"{'colsample_bytree': 0.8, 'subsample': 0.7}",4,-0.779263,0.014395
0.8_0.8,"{'colsample_bytree': 0.8, 'subsample': 0.8}",5,-0.780011,0.014561
0.9_0.7,"{'colsample_bytree': 0.9, 'subsample': 0.7}",6,-0.781131,0.01448
0.6_0.8,"{'colsample_bytree': 0.6, 'subsample': 0.8}",7,-0.781726,0.014399
0.6_0.7,"{'colsample_bytree': 0.6, 'subsample': 0.7}",8,-0.783263,0.011554
0.7_0.8,"{'colsample_bytree': 0.7, 'subsample': 0.8}",9,-0.783493,0.016066
0.6_0.6,"{'colsample_bytree': 0.6, 'subsample': 0.6}",10,-0.784065,0.015195


### Regularization

In [4]:
from sklearn.model_selection import cross_val_score
from xgboost.sklearn import XGBRegressor
from sklearn.model_selection import GridSearchCV
#0.78
params = {
    'learning_rate' : 0.01,
    'n_estimators' : 5000,
    'max_depth' : 7,
    'min_child_weight' : 6,
    'gamma' : 0,
    'subsample' : 0.6,
    'colsample_bytree' : 0.8,
    'objective' : 'reg:squarederror',
    'eval_metric' : 'mae',
    'booster' : 'gbtree',
    'use_label_encoder' : False,
    'seed' : 42
}

model = XGBRegressor
param_test = {
 'alpha' : range(0,10, 3),
 'lambda' : range(0, 10, 3)
}

gsearch = GridSearchCV(estimator = model(**params), 
                        param_grid = param_test,
                        scoring='neg_mean_absolute_error',
                        n_jobs=4,
                        cv=5)
gsearch.fit(features,target)

print_grid_search_results(gsearch)

Unnamed: 0_level_0,params,rank_test_score,mean_test_score,std_test_score
kernel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0_0,"{'alpha': 0, 'lambda': 0}",1,-0.781701,0.015754
0_3,"{'alpha': 0, 'lambda': 3}",2,-0.782072,0.01254
0_6,"{'alpha': 0, 'lambda': 6}",3,-0.785452,0.016518
9_3,"{'alpha': 9, 'lambda': 3}",4,-0.785746,0.021851
9_6,"{'alpha': 9, 'lambda': 6}",5,-0.785782,0.020734
9_0,"{'alpha': 9, 'lambda': 0}",6,-0.786199,0.022847
6_3,"{'alpha': 6, 'lambda': 3}",7,-0.786262,0.023022
3_0,"{'alpha': 3, 'lambda': 0}",8,-0.78628,0.020236
9_9,"{'alpha': 9, 'lambda': 9}",9,-0.787046,0.02255
6_9,"{'alpha': 6, 'lambda': 9}",10,-0.787212,0.021218


In [None]:
model = XGBRegressor
param_test = {
 'alpha' : [0, 1, 2],
 'lambda': [0, 1, 2]
}

gsearch = GridSearchCV(estimator = model(**params), 
                        param_grid = param_test,
                        scoring='neg_mean_absolute_error',
                        n_jobs=4,
                        cv=5)
gsearch.fit(features,target)

print_grid_search_results(gsearch)

In [6]:
import configparser

config = configparser.ConfigParser()
config.read('config/config_xgb.ini')

config['BODY_MASS_INDEX'] = params

with open('config/config_xgb.ini', 'w') as configfile:
    config.write(configfile)

# BIRTH WEIGHT

In [3]:
from src.data import Phenotypes, Models, DataType, make_features
import pandas as pd
import numpy as np

DATATYPE = DataType.PREGNANCY

# Load csv data into pandas dataframes
if DATATYPE == DataType.PREGNANCY:
    exposome = pd.read_csv("data/preprocessed/preg_exposome.csv")
elif DATATYPE == DataType.POSTNATAL:
    exposome = pd.read_csv("data/preprocessed/postnatal_exposome.csv")
else:
    exposome = pd.read_csv("data/preprocessed/exposome.csv")
covariates = pd.read_csv("data/preprocessed/covariates.csv")
phenotype = pd.read_csv("data/preprocessed/phenotype.csv")

In [4]:
TARGET = Phenotypes.BIRTH_WEIGHT

# Drop IQ covariates
# Post natal
covariates.drop('hs_child_age_None', axis=1, inplace=True)
covariates.drop('hs_c_height_None', axis=1, inplace=True)
covariates.drop('hs_c_weight_None', axis=1, inplace=True)

# pregnancy
covariates.drop('e3_yearbir_None', axis=1, inplace=True)

# Make features and target dataframes
features = make_features(exposome, covariates, phenotype)
target = features[TARGET.value]
features = features.drop(phenotype.columns, axis = 1)
features_columns = features.columns

print('target_name: ', target.name)

joining dataframes on same ID
dataframe of:
  * 1301 rows total
  * 112 features total
target_name:  e3_bw


### max_depth && min_child_weight tuning

In [5]:
from sklearn.model_selection import cross_val_score
from xgboost.sklearn import XGBRegressor
from sklearn.model_selection import GridSearchCV

params = {
    'learning_rate' : 0.01,
    'n_estimators' : 5000,
    'max_depth' : 5,
    'min_child_weight' : 1,
    'gamma' : 0,
    'subsample' : 0.8,
    'colsample_bytree' : 0.8,
    'objective' : 'reg:squarederror',
    'eval_metric' : 'mae',
    'booster' : 'gbtree',
    'use_label_encoder' : False,
    'seed' : 42
}

model = XGBRegressor
param_test1 = {
 'max_depth' : range(2,8,2),
 'min_child_weight' : range(1,6,2)
}

gsearch1 = GridSearchCV(estimator = model(**params), 
                        param_grid = param_test1,
                        scoring='neg_mean_absolute_error',
                        cv=5)
gsearch1.fit(features,target)

print_grid_search_results(gsearch1)

Unnamed: 0_level_0,params,rank_test_score,mean_test_score,std_test_score
kernel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
6_5,"{'max_depth': 6, 'min_child_weight': 5}",1,-323.090044,15.599355
6_3,"{'max_depth': 6, 'min_child_weight': 3}",2,-323.130776,15.683531
6_1,"{'max_depth': 6, 'min_child_weight': 1}",3,-323.205166,14.600872
4_5,"{'max_depth': 4, 'min_child_weight': 5}",4,-327.601267,13.932038
4_3,"{'max_depth': 4, 'min_child_weight': 3}",5,-327.678153,13.364518
4_1,"{'max_depth': 4, 'min_child_weight': 1}",6,-327.90827,13.602159
2_5,"{'max_depth': 2, 'min_child_weight': 5}",7,-333.045969,11.290305
2_3,"{'max_depth': 2, 'min_child_weight': 3}",8,-333.453415,10.586355
2_1,"{'max_depth': 2, 'min_child_weight': 1}",9,-333.932185,10.685666


In [6]:
# Affine search for max_depth and min_child_weight
param_test2 = {
 'max_depth':[5,6,7],
 'min_child_weight':[4,5,6]
}
gsearch2 = GridSearchCV(estimator = model(**params), 
                        param_grid = param_test2,
                        scoring='neg_mean_absolute_error',
                        cv=5)
gsearch2.fit(features,target)

print_grid_search_results(gsearch2)

Unnamed: 0_level_0,params,rank_test_score,mean_test_score,std_test_score
kernel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
7_6,"{'max_depth': 7, 'min_child_weight': 6}",1,-320.965986,15.58871
7_4,"{'max_depth': 7, 'min_child_weight': 4}",2,-321.267677,15.793749
7_5,"{'max_depth': 7, 'min_child_weight': 5}",3,-321.563638,15.611595
6_4,"{'max_depth': 6, 'min_child_weight': 4}",4,-322.138034,15.54483
6_6,"{'max_depth': 6, 'min_child_weight': 6}",5,-322.394535,14.107039
6_5,"{'max_depth': 6, 'min_child_weight': 5}",6,-323.090044,15.599355
5_6,"{'max_depth': 5, 'min_child_weight': 6}",7,-323.994054,14.216304
5_5,"{'max_depth': 5, 'min_child_weight': 5}",8,-324.677148,13.644761
5_4,"{'max_depth': 5, 'min_child_weight': 4}",9,-324.701864,14.917129


### Gamma tuning

In [8]:
params['max_depth'] = 7
params['min_child_weight'] = 6
print(params)

param_test3 = {
 'gamma':[i/10.0 for i in range(0,5)]
}
gsearch3 = GridSearchCV(estimator = model(**params), 
                        param_grid = param_test3,
                        scoring='neg_mean_absolute_error',
                        cv=5)
gsearch3.fit(features,target)

print_grid_search_results(gsearch3)

{'learning_rate': 0.01, 'n_estimators': 5000, 'max_depth': 7, 'min_child_weight': 6, 'gamma': 0, 'subsample': 0.8, 'colsample_bytree': 0.8, 'objective': 'reg:squarederror', 'eval_metric': 'mae', 'booster': 'gbtree', 'use_label_encoder': False, 'seed': 42}


Unnamed: 0_level_0,params,rank_test_score,mean_test_score,std_test_score
kernel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,{'gamma': 0.0},1,-320.965986,15.58871
0.1,{'gamma': 0.1},2,-320.979147,15.596143
0.2,{'gamma': 0.2},3,-320.979324,15.600214
0.3,{'gamma': 0.3},4,-320.982875,15.594309
0.4,{'gamma': 0.4},5,-320.987939,15.603121


### subsample && colsample_bytree tuning

In [9]:
# Tune subcample and colsample_bytree
param_test4 = {
 'subsample':[i/10.0 for i in range(6,11)],
 'colsample_bytree':[i/10.0 for i in range(6,11)]
}
gsearch4 = GridSearchCV(estimator = model(**params), 
                        param_grid = param_test4,
                        scoring='neg_mean_absolute_error',
                        n_jobs=4,
                        cv=5)
gsearch4.fit(features,target)

print_grid_search_results(gsearch4)

Unnamed: 0_level_0,params,rank_test_score,mean_test_score,std_test_score
kernel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.9_0.8,"{'colsample_bytree': 0.9, 'subsample': 0.8}",1,-320.288505,15.3323
0.8_0.8,"{'colsample_bytree': 0.8, 'subsample': 0.8}",2,-320.965986,15.58871
0.8_0.9,"{'colsample_bytree': 0.8, 'subsample': 0.9}",3,-321.30462,15.938939
0.8_0.7,"{'colsample_bytree': 0.8, 'subsample': 0.7}",4,-321.519862,13.893169
0.6_0.9,"{'colsample_bytree': 0.6, 'subsample': 0.9}",5,-321.883523,17.438531
0.6_0.8,"{'colsample_bytree': 0.6, 'subsample': 0.8}",6,-322.002641,17.103146
0.7_0.9,"{'colsample_bytree': 0.7, 'subsample': 0.9}",7,-322.168807,16.681372
0.9_0.9,"{'colsample_bytree': 0.9, 'subsample': 0.9}",8,-322.186417,14.929439
0.7_0.8,"{'colsample_bytree': 0.7, 'subsample': 0.8}",9,-322.537064,16.720121
0.7_0.6,"{'colsample_bytree': 0.7, 'subsample': 0.6}",10,-322.815668,14.837599


### Régularization

In [10]:
params['colsample_bytree'] = 0.9
params['subsample'] = 0.8

model = XGBRegressor
param_test = {
 'alpha' : range(0,10, 3),
 'lambda' : range(0, 10, 3)
}

gsearch = GridSearchCV(estimator = model(**params), 
                        param_grid = param_test,
                        scoring='neg_mean_absolute_error',
                        n_jobs=4,
                        cv=5)
gsearch.fit(features,target)

print_grid_search_results(gsearch)

Unnamed: 0_level_0,params,rank_test_score,mean_test_score,std_test_score
kernel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
3_0,"{'alpha': 3, 'lambda': 0}",1,-320.345597,14.979499
0_0,"{'alpha': 0, 'lambda': 0}",2,-320.894019,15.629442
6_0,"{'alpha': 6, 'lambda': 0}",3,-321.887705,14.967258
9_0,"{'alpha': 9, 'lambda': 0}",4,-322.013934,14.938198
0_3,"{'alpha': 0, 'lambda': 3}",5,-322.445332,15.340703
3_3,"{'alpha': 3, 'lambda': 3}",6,-323.115911,14.240589
6_6,"{'alpha': 6, 'lambda': 6}",7,-323.33881,15.198107
6_3,"{'alpha': 6, 'lambda': 3}",8,-323.384107,14.986635
9_6,"{'alpha': 9, 'lambda': 6}",9,-323.526392,15.974641
9_3,"{'alpha': 9, 'lambda': 3}",10,-323.781873,14.599572


In [12]:
model = XGBRegressor
param_test = {
 'alpha' : [0, 1, 2, 3],
 'lambda': [0, 1, 2]
}

gsearch = GridSearchCV(estimator = model(**params), 
                        param_grid = param_test,
                        scoring='neg_mean_absolute_error',
                        n_jobs=4,
                        cv=5)
gsearch.fit(features,target)

print_grid_search_results(gsearch)

Unnamed: 0_level_0,params,rank_test_score,mean_test_score,std_test_score
kernel,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0_1,"{'alpha': 0, 'lambda': 1}",1,-320.288505,15.3323
3_0,"{'alpha': 3, 'lambda': 0}",2,-320.345597,14.979499
1_1,"{'alpha': 1, 'lambda': 1}",3,-320.527295,14.9835
3_1,"{'alpha': 3, 'lambda': 1}",4,-320.801455,14.837921
2_1,"{'alpha': 2, 'lambda': 1}",5,-320.83869,14.938163
0_0,"{'alpha': 0, 'lambda': 0}",6,-320.894019,15.629442
1_0,"{'alpha': 1, 'lambda': 0}",7,-321.369967,15.357062
2_0,"{'alpha': 2, 'lambda': 0}",8,-321.996379,15.352971
0_2,"{'alpha': 0, 'lambda': 2}",9,-322.233446,13.987306
1_2,"{'alpha': 1, 'lambda': 2}",10,-322.814436,14.359973


In [14]:
import configparser

params['alpha'] = 0
params['lambda'] = 1
print(params)

config = configparser.ConfigParser()
config.read('config/config_xgb.ini')

config['BIRTH_WEIGHT'] = params

with open('config/config_xgb.ini', 'w') as configfile:
    config.write(configfile)

{'learning_rate': 0.01, 'n_estimators': 5000, 'max_depth': 7, 'min_child_weight': 6, 'gamma': 0, 'subsample': 0.8, 'colsample_bytree': 0.9, 'objective': 'reg:squarederror', 'eval_metric': 'mae', 'booster': 'gbtree', 'use_label_encoder': False, 'seed': 42, 'alpha': 0, 'lambda': 1}
