In [1]:
import warnings
warnings.filterwarnings('ignore')

import matplotlib.pyplot as plt; plt.style.use('ggplot')
import seaborn as sns
import plotly.express as px
import pandas as pd
import numpy as np
import time
import random
from itertools import combinations
from tqdm.notebook import tqdm
import optuna
import math

from sklearn.preprocessing import OneHotEncoder, StandardScaler, MinMaxScaler, MaxAbsScaler, RobustScaler, QuantileTransformer
from sklearn.compose import make_column_transformer
from sklearn.metrics import make_scorer, accuracy_score
from sklearn.model_selection import StratifiedKFold, cross_validate, RandomizedSearchCV
from sklearn.inspection import permutation_importance, PartialDependenceDisplay
from sklearn.feature_selection import RFECV, mutual_info_classif

from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from concurrent.futures import ThreadPoolExecutor
from category_encoders import TargetEncoder

from lightgbm import LGBMClassifier

from pprint import pprint
import os

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

experiment_name = 'lgbm'

In [2]:
train = pd.read_csv('new_combined_train_spear.csv')

train.head()

Unnamed: 0,FAF_ceil_round,FCVC_ceil_round,TUE_floor_round,TUE_ceil_round,CH2O_floor_round,FAF_floor_round,CH2O_ceil_round,FCVC_round,NCP_ceil_round,FAF_round,FCVC_floor_round,TUE_round,CH2O_round,NCP_floor_round,Weight_2dec,Height_2dec,Automobile,Bike,Motorbike,Walking,CALC_map,family_history_with_overweight_map,CAEC_map,SCC_map,Gender_map,FAVC_map,SMOKE_map,CALC,CAEC,Weight_2dec_gt_Height_2dec,Weight_2dec_gt_Gender,Weight_2dec_div_CALC,Weight_2dec_gt_CALC,Weight_2dec_div_CAEC,Weight_2dec_gt_CAEC,Weight_2dec_gt_MTRANS,Weight_2dec_div_family_history_with_overweight,Weight_2dec_gt_family_history_with_overweight,Weight_2dec_div_FAVC,Weight_2dec_gt_FAVC,Weight_2dec_gt_SMOKE,Weight_2dec_div_SCC,Weight_2dec_gt_SCC,Height_2dec_x_Gender,Height_2dec_plus_Gender,Height_2dec_gt_Gender,Height_2dec_x_CALC,Height_2dec_minus_CALC,Height_2dec_gt_CALC,Height_2dec_x_CAEC,Height_2dec_minus_CAEC,Height_2dec_x_MTRANS,Height_2dec_minus_MTRANS,Height_2dec_x_family_history_with_overweight,Height_2dec_minus_family_history_with_overweight,Height_2dec_x_FAVC,Height_2dec_minus_FAVC,Height_2dec_gt_FAVC,Height_2dec_gt_SMOKE,Gender_x_CALC,Gender_minus_CALC,Gender_x_CAEC,Gender_minus_CAEC,Gender_minus_MTRANS,Std_Gender_MTRANS,Gender_x_family_history_with_overweight,Gender_minus_family_history_with_overweight,Gender_x_FAVC,Gender_minus_FAVC,Gender_x_SCC,CALC_x_CAEC,CALC_minus_CAEC,CALC_x_MTRANS,CALC_minus_MTRANS,CALC_x_family_history_with_overweight,CALC_minus_family_history_with_overweight,CALC_x_FAVC,CALC_minus_FAVC,CAEC_x_MTRANS,CAEC_minus_MTRANS,CAEC_x_family_history_with_overweight,CAEC_minus_family_history_with_overweight,CAEC_div_family_history_with_overweight,Std_CAEC_family_history_with_overweight,CAEC_gt_family_history_with_overweight,CAEC_x_FAVC,CAEC_minus_FAVC,MTRANS_x_family_history_with_overweight,MTRANS_minus_family_history_with_overweight,MTRANS_x_FAVC,MTRANS_minus_FAVC,Std_MTRANS_SMOKE,MTRANS_x_SCC,MTRANS_minus_SCC,Std_family_history_with_overweight_SCC,FAVC_x_SCC,FAVC_minus_SCC,SMOKE_x_SCC,SMOKE_minus_SCC,Weight_2dec_div_prod_of_CALC_and_CAEC,Weight_2dec_div_prod_of_CALC_and_family_history_with_overweight,Weight_2dec_div_prod_of_CALC_and_FAVC,Weight_2dec_div_prod_of_CALC_and_SCC,Weight_2dec_div_sum_of_CAEC_and_family_history_with_overweight,Weight_2dec_div_prod_of_CAEC_and_family_history_with_overweight,CAEC_div_prod_of_Weight_2dec_and_family_history_with_overweight,family_history_with_overweight_div_prod_of_Weight_2dec_and_CAEC,Weight_2dec_div_prod_of_CAEC_and_FAVC,Height_2dec_x_Gender_x_CALC,CALC_div_prod_of_Height_2dec_and_Gender,Height_2dec_plus_Gender_minus_CAEC,Height_2dec_plus_Gender_plus_CAEC,Height_2dec_x_Gender_x_CAEC,CAEC_div_prod_of_Height_2dec_and_Gender,Std_of_Height_2dec_Gender_CAEC,Height_2dec_plus_Gender_minus_MTRANS,Height_2dec_plus_Gender_plus_MTRANS,Height_2dec_x_Gender_x_MTRANS,MTRANS_div_prod_of_Height_2dec_and_Gender,Height_2dec_plus_Gender_minus_family_history_with_overweight,Height_2dec_plus_Gender_plus_family_history_with_overweight,Height_2dec_x_Gender_x_family_history_with_overweight,family_history_with_overweight_div_prod_of_Height_2dec_and_Gender,Height_2dec_plus_Gender_minus_FAVC,Height_2dec_plus_Gender_plus_FAVC,Height_2dec_x_Gender_x_FAVC,FAVC_div_prod_of_Height_2dec_and_Gender,Gender_plus_SCC_minus_Height_2dec,Height_2dec_plus_CALC_minus_CAEC,Height_2dec_plus_CAEC_minus_CALC,CALC_plus_CAEC_minus_Height_2dec,Height_2dec_plus_CALC_plus_CAEC,Height_2dec_plus_MTRANS_minus_CALC,Height_2dec_plus_CALC_minus_family_history_with_overweight,Height_2dec_plus_family_history_with_overweight_minus_CALC,CALC_plus_family_history_with_overweight_minus_Height_2dec,Height_2dec_plus_CALC_plus_family_history_with_overweight,Height_2dec_plus_CALC_minus_FAVC,Height_2dec_plus_FAVC_minus_CALC,CALC_plus_FAVC_minus_Height_2dec,Height_2dec_plus_MTRANS_minus_CAEC,Height_2dec_plus_CAEC_plus_MTRANS,Height_2dec_plus_CAEC_minus_family_history_with_overweight,Height_2dec_plus_family_history_with_overweight_minus_CAEC,CAEC_plus_family_history_with_overweight_minus_Height_2dec,Height_2dec_plus_CAEC_plus_family_history_with_overweight,CAEC_div_sum_of_Height_2dec_and_family_history_with_overweight,Std_of_Height_2dec_CAEC_family_history_with_overweight,Height_2dec_plus_CAEC_minus_FAVC,CAEC_plus_FAVC_minus_Height_2dec,Std_of_Height_2dec_CAEC_FAVC,SCC_div_sum_of_Height_2dec_and_CAEC,Height_2dec_plus_MTRANS_minus_family_history_with_overweight,Height_2dec_plus_MTRANS_plus_family_history_with_overweight,MTRANS_div_sum_of_Height_2dec_and_family_history_with_overweight,Height_2dec_plus_MTRANS_minus_FAVC,Height_2dec_plus_FAVC_minus_MTRANS,MTRANS_plus_FAVC_minus_Height_2dec,Height_2dec_plus_MTRANS_plus_FAVC,Height_2dec_plus_MTRANS_minus_SCC,MTRANS_plus_SCC_minus_Height_2dec,Std_of_Height_2dec_family_history_with_overweight_FAVC,SCC_div_sum_of_Height_2dec_and_family_history_with_overweight,Height_2dec_plus_FAVC_minus_SCC,Gender_plus_CALC_minus_FAVC,Gender_plus_FAVC_minus_CALC,CALC_plus_FAVC_minus_Gender,Gender_plus_CALC_plus_FAVC,MTRANS_div_sum_of_Gender_and_CAEC,Gender_plus_CAEC_minus_family_history_with_overweight,Gender_plus_family_history_with_overweight_minus_CAEC,CAEC_plus_family_history_with_overweight_minus_Gender,Gender_plus_CAEC_plus_family_history_with_overweight,CAEC_div_sum_of_Gender_and_family_history_with_overweight,FAVC_div_sum_of_Gender_and_CAEC,SCC_div_sum_of_Gender_and_CAEC,MTRANS_div_sum_of_Gender_and_family_history_with_overweight,Gender_plus_FAVC_minus_MTRANS,MTRANS_div_sum_of_Gender_and_FAVC,Std_of_Gender_MTRANS_SMOKE,FAVC_div_sum_of_Gender_and_family_history_with_overweight,SCC_div_sum_of_Gender_and_family_history_with_overweight,Gender_plus_SCC_minus_FAVC,CALC_plus_CAEC_minus_MTRANS,CAEC_plus_MTRANS_minus_CALC,CALC_plus_CAEC_minus_family_history_with_overweight,CALC_plus_family_history_with_overweight_minus_CAEC,CAEC_plus_family_history_with_overweight_minus_CALC,CAEC_div_sum_of_CALC_and_family_history_with_overweight,CAEC_div_prod_of_CALC_and_family_history_with_overweight,family_history_with_overweight_div_prod_of_CALC_and_CAEC,CALC_plus_CAEC_minus_FAVC,CAEC_plus_FAVC_minus_CALC,SCC_div_sum_of_CALC_and_CAEC,CALC_plus_MTRANS_minus_FAVC,CALC_plus_FAVC_minus_MTRANS,MTRANS_plus_FAVC_minus_CALC,CALC_plus_MTRANS_plus_FAVC,CALC_plus_family_history_with_overweight_minus_FAVC,family_history_with_overweight_plus_FAVC_minus_CALC,SCC_div_sum_of_CALC_and_family_history_with_overweight,CALC_plus_SCC_minus_FAVC,CALC_plus_SMOKE_minus_SCC,CALC_plus_SCC_minus_SMOKE,CAEC_plus_MTRANS_minus_family_history_with_overweight,CAEC_plus_family_history_with_overweight_minus_MTRANS,MTRANS_plus_family_history_with_overweight_minus_CAEC,CAEC_plus_MTRANS_plus_family_history_with_overweight,CAEC_div_sum_of_MTRANS_and_family_history_with_overweight,family_history_with_overweight_div_prod_of_CAEC_and_MTRANS,FAVC_div_sum_of_CAEC_and_MTRANS,SCC_div_sum_of_CAEC_and_MTRANS,CAEC_plus_FAVC_minus_family_history_with_overweight,CAEC_div_sum_of_family_history_with_overweight_and_FAVC,FAVC_div_sum_of_CAEC_and_family_history_with_overweight,CAEC_div_prod_of_family_history_with_overweight_and_FAVC,SCC_div_sum_of_CAEC_and_family_history_with_overweight,FAVC_div_sum_of_CAEC_and_SMOKE,SCC_div_sum_of_CAEC_and_FAVC,SCC_div_sum_of_CAEC_and_SMOKE,FAVC_div_sum_of_MTRANS_and_family_history_with_overweight,SCC_div_sum_of_MTRANS_and_family_history_with_overweight,MTRANS_plus_FAVC_minus_SCC,MTRANS_plus_SCC_minus_FAVC,family_history_with_overweight_plus_FAVC_minus_SMOKE,family_history_with_overweight_plus_SMOKE_minus_FAVC,FAVC_div_sum_of_family_history_with_overweight_and_SMOKE,SCC_div_sum_of_family_history_with_overweight_and_FAVC,SCC_div_sum_of_family_history_with_overweight_and_SMOKE,FAVC_div_sum_of_SMOKE_and_SCC,FAF_ceil_round+FCVC_ceil_round,FAF_ceil_round+NCP_ceil_round,FAF_ceil_round+TUE_round,FAF_ceil_round+CH2O_round,FAF_ceil_round+CALC_map,FAF_ceil_round+Gender_map,FCVC_ceil_round+TUE_floor_round,FCVC_ceil_round+Automobile,FCVC_ceil_round+family_history_with_overweight_map,FCVC_ceil_round+CAEC_map,FCVC_ceil_round+Gender_map,TUE_floor_round+CH2O_floor_round,TUE_floor_round+FAF_floor_round,TUE_floor_round+CH2O_ceil_round,TUE_floor_round+FCVC_round,TUE_floor_round+NCP_ceil_round,TUE_floor_round+FCVC_floor_round,TUE_floor_round+CH2O_round,TUE_floor_round+Automobile,TUE_floor_round+Public_Transportation,TUE_floor_round+CALC_map,TUE_floor_round+family_history_with_overweight_map,TUE_floor_round+CAEC_map,TUE_floor_round+Gender_map,TUE_ceil_round+CH2O_floor_round,TUE_ceil_round+FAF_floor_round,TUE_ceil_round+CH2O_ceil_round,TUE_ceil_round+FCVC_round,TUE_ceil_round+NCP_ceil_round,TUE_ceil_round+Automobile,TUE_ceil_round+CALC_map,TUE_ceil_round+family_history_with_overweight_map,TUE_ceil_round+CAEC_map,TUE_ceil_round+Gender_map,CH2O_floor_round+FAF_floor_round,CH2O_floor_round+FCVC_round,CH2O_floor_round+NCP_ceil_round,CH2O_floor_round+FCVC_floor_round,CH2O_floor_round+TUE_round,CH2O_floor_round+Automobile,CH2O_floor_round+Public_Transportation,CH2O_floor_round+CALC_map,CH2O_floor_round+family_history_with_overweight_map,CH2O_floor_round+CAEC_map,CH2O_floor_round+Gender_map,FAF_floor_round+CH2O_ceil_round,FAF_floor_round+FCVC_round,FAF_floor_round+NCP_ceil_round,FAF_floor_round+Automobile,FAF_floor_round+CALC_map,FAF_floor_round+family_history_with_overweight_map,FAF_floor_round+Gender_map,CH2O_ceil_round+FCVC_round,CH2O_ceil_round+NCP_ceil_round,CH2O_ceil_round+TUE_round,CH2O_ceil_round+Automobile,CH2O_ceil_round+Public_Transportation,CH2O_ceil_round+CALC_map,CH2O_ceil_round+CAEC_map,CH2O_ceil_round+Gender_map,FCVC_round+NCP_ceil_round,FCVC_round+TUE_round,FCVC_round+CH2O_round,FCVC_round+Automobile,FCVC_round+Public_Transportation,FCVC_round+CALC_map,FCVC_round+family_history_with_overweight_map,FCVC_round+CAEC_map,FCVC_round+Gender_map,NCP_ceil_round+FCVC_floor_round,NCP_ceil_round+TUE_round,NCP_ceil_round+CH2O_round,NCP_ceil_round+Automobile,NCP_ceil_round+Public_Transportation,NCP_ceil_round+CALC_map,NCP_ceil_round+family_history_with_overweight_map,NCP_ceil_round+CAEC_map,NCP_ceil_round+Gender_map,NCP_ceil_round+FAVC_map,FCVC_floor_round+Automobile,FCVC_floor_round+Public_Transportation,FCVC_floor_round+CALC_map,FCVC_floor_round+family_history_with_overweight_map,FCVC_floor_round+CAEC_map,FCVC_floor_round+Gender_map,TUE_round+Automobile,TUE_round+Public_Transportation,TUE_round+CALC_map,TUE_round+family_history_with_overweight_map,TUE_round+CAEC_map,TUE_round+Gender_map,CH2O_round+Automobile,CH2O_round+Public_Transportation,CH2O_round+CALC_map,CH2O_round+CAEC_map,CH2O_round+Gender_map,NCP_floor_round+Automobile,NCP_floor_round+Public_Transportation,NCP_floor_round+family_history_with_overweight_map,NCP_floor_round+CAEC_map,NCP_floor_round+Gender_map,Age_ceil_round+Age_round,Automobile+CALC_map,Automobile+CAEC_map,Automobile+FAVC_map,Bike+Motorbike,Public_Transportation+CALC_map,Walking+SCC_map,Walking+FAVC_map,Walking+SMOKE_map,CALC_map+CAEC_map,CALC_map+Gender_map,CALC_map+FAVC_map,family_history_with_overweight_map+CAEC_map,CAEC_map+FAVC_map,gender_height,NObeyesdad
0,0,2,0,1,2,0,3,2,3,0,2,1,3,2,81.67,1.7,0,0,0,0,1,1,1,0,1,1,0,3.54758,3.645175,1,1,23.021327,1,22.404962,1,1,21.789209,1,24.120359,1,1,24.615686,1,5.291556,4.81268,0,6.030886,-1.84758,0,6.196797,-1.945175,5.633071,-1.613571,6.371916,-2.048186,5.756092,-1.685936,0,0,11.04248,-0.4349,11.346261,-0.532495,-0.200891,0.142052,11.666902,-0.635506,10.539335,-0.273257,10.327259,12.931549,-0.097595,11.755158,0.234009,13.296989,-0.200606,12.01188,0.161644,12.078545,0.331604,13.662792,-0.103011,0.972517,0.07284,0,12.342329,0.259238,12.419879,-0.434615,11.21954,-0.072365,0.049868,10.993776,-0.004232,0.304326,11.23387,0.068133,10.759791,-0.074756,6.315562,6.141992,6.799102,6.938726,11.046398,5.977549,0.011908,0.01259,6.617065,18.772217,0.670423,1.167505,8.457854,19.288644,0.688866,1.00523,1.499109,8.126251,17.533945,0.6262,1.064494,8.560865,19.833733,0.708333,1.426743,8.198616,17.91687,0.639875,4.730483,1.602405,1.797595,5.492755,8.892755,1.465991,1.499394,1.900606,5.595766,8.995766,1.861644,1.538356,5.233516,1.368396,8.658746,1.596989,1.803011,5.69336,9.09336,0.669062,1.153934,1.959238,5.331111,1.056195,0.62071,1.265385,8.761757,0.608197,1.627635,1.772365,4.999507,8.399507,1.695768,4.931374,1.093059,0.608974,1.768133,3.274323,2.951036,3.820837,10.046196,0.490329,3.009669,3.215691,4.280681,10.50604,0.5313,0.501037,0.490955,0.482967,3.185045,0.509889,0.10192,0.493514,0.483584,3.044547,3.879184,3.411166,3.444569,3.650591,3.84578,0.499629,0.274135,0.289848,3.806818,3.483531,0.46127,3.475215,3.619945,3.151927,10.247087,3.909829,3.586542,0.454757,3.479447,3.472824,3.622336,3.21056,4.079789,3.416582,10.706931,0.516185,0.310318,0.486573,0.476782,3.282925,0.510949,0.45797,0.287223,0.448754,0.491554,0.471875,0.481663,0.479475,0.469827,3.381704,3.245438,3.891075,3.605296,0.484312,0.465061,0.474566,0.516082,2,3,1,3,1,1,2,2,3,3,3,2,0,3,2,3,2,3,0,1,1,1,1,1,3,1,4,3,4,1,2,2,2,2,2,4,5,4,3,2,3,3,3,3,3,3,2,3,0,1,1,1,5,6,4,3,4,4,4,4,5,3,5,2,3,3,3,3,3,5,4,6,3,4,4,4,4,4,4,2,3,3,3,3,3,1,2,2,2,2,2,3,4,4,4,4,2,3,3,3,3,49,1,1,1,0,2,0,1,0,2,2,2,2,2,1,3
1,1,2,1,1,2,1,2,2,3,1,2,1,2,3,57.0,1.56,1,0,0,0,0,1,2,0,0,1,0,2.456911,0.876197,1,1,23.19986,1,65.053827,1,1,15.207358,1,16.834339,1,1,17.180043,1,5.269983,4.938195,0,3.832782,-0.896911,0,1.366868,0.683803,4.961433,-1.620406,5.84717,-2.188186,5.282061,-1.825936,0,0,8.299925,0.921283,2.959966,2.501997,0.197789,0.139858,12.6621,-0.369991,11.438351,-0.007742,11.208184,2.15274,1.580714,7.813976,-0.723495,9.20896,-1.291274,8.318946,-0.929025,2.786664,-2.304208,3.284151,-2.871988,0.233766,2.030802,0,2.966749,-2.509739,11.920752,-0.56778,10.768652,-0.20553,0.044294,10.551961,-0.137397,0.304326,11.23387,0.068133,10.759791,-0.074756,26.477889,6.189624,6.85183,6.992537,12.325968,17.356084,0.004101,0.075049,19.212951,12.947882,0.466209,4.061997,5.814392,4.617546,0.166262,1.293149,1.757789,8.1186,16.760686,0.603494,1.190009,8.68638,19.752877,0.711233,1.552258,8.324131,17.843828,0.642495,5.135998,3.140714,-0.020714,1.773109,4.893109,2.283495,0.268726,2.851274,4.645097,7.765097,0.630975,2.489025,4.282848,3.864208,5.616603,-1.311988,4.431988,3.064383,6.184383,0.165065,1.500225,-0.949739,2.702134,1.29746,1.361878,0.99222,8.488592,0.599151,1.35447,1.76553,5.006342,8.126342,1.422603,4.938209,1.172847,0.625035,1.628133,2.44917,4.307219,2.464653,9.221042,0.747558,0.506206,6.250183,1.246189,8.002578,0.122951,0.795868,0.779854,0.446286,3.583725,0.470187,0.101085,0.475127,0.465566,3.310061,0.152703,1.599692,-0.415077,5.3289,2.167472,0.141206,0.095146,1.741124,-0.052827,1.805222,0.995408,2.251381,2.662442,4.109431,9.023254,2.819161,4.677211,0.53469,2.388778,2.382155,2.531668,0.308418,1.443977,6.052394,7.804789,0.126461,1.345044,0.834673,0.817877,0.513948,0.122818,0.732192,0.06904,0.717459,0.82198,0.778437,0.80544,0.48869,0.478857,3.248539,3.112273,3.891075,3.605296,0.484312,0.465061,0.474566,0.516082,3,4,2,3,1,1,3,3,3,4,2,3,2,3,3,4,3,3,2,1,1,2,3,1,3,2,3,3,4,2,1,2,3,1,3,4,5,4,3,3,2,2,3,4,2,3,3,4,2,1,2,1,4,5,3,3,2,2,4,2,5,3,4,3,2,2,3,4,2,5,4,5,4,3,3,4,5,3,4,3,2,2,3,4,2,2,1,1,2,3,1,3,2,2,4,2,4,3,4,5,3,36,1,3,2,0,0,0,1,0,2,0,1,3,3,1,1
2,1,2,1,2,1,0,2,2,2,1,1,2,2,1,50.17,1.71,0,0,0,0,0,1,1,0,0,1,0,2.456911,3.645175,1,1,20.419947,1,13.763401,1,1,13.385143,1,14.817172,1,1,15.121452,1,5.776713,5.088195,0,4.201319,-0.746911,0,6.233249,-1.935175,5.666206,-1.603571,6.409398,-2.038186,5.789951,-1.675936,0,0,8.299925,0.921283,12.314109,-0.26698,0.064624,0.045696,12.6621,-0.369991,11.438351,-0.007742,11.208184,8.955871,-1.188263,8.14115,-0.85666,9.20896,-1.291274,8.318946,-0.929025,12.078545,0.331604,13.662792,-0.103011,0.972517,0.07284,0,12.342329,0.259238,12.419879,-0.434615,11.21954,-0.072365,0.049868,10.993776,-0.004232,0.304326,11.23387,0.068133,10.759791,-0.074756,5.601912,5.447955,6.030812,6.154659,6.785818,3.672017,0.019384,0.020496,4.064873,14.192871,0.425313,1.44302,8.733369,21.057126,0.631012,1.048734,1.774624,8.401765,19.141547,0.573608,1.340009,8.83638,21.652192,0.648844,1.702258,8.474131,19.559581,0.586135,4.985998,0.521737,2.898263,4.392086,7.812086,2.56666,0.418726,3.001274,4.495097,7.915097,0.780975,2.639025,4.132848,1.378396,8.668746,1.606989,1.813011,5.68336,9.10336,0.667836,1.148166,1.969238,5.321111,1.050466,0.619551,1.275385,8.771757,0.607083,1.637635,1.782365,4.989507,8.409507,1.705768,4.921374,1.087366,0.607858,1.778133,2.44917,4.307219,2.464653,9.221042,0.471792,3.275183,3.481206,4.015166,10.771555,0.511504,0.482096,0.472395,0.464973,3.45056,0.489874,0.067595,0.475127,0.465566,3.310061,2.788515,4.501834,2.3539,2.559922,4.936449,0.587448,0.395829,0.418517,2.71615,4.5742,0.543716,2.384546,2.529277,4.242596,9.156419,2.819161,4.677211,0.53469,2.388778,2.382155,2.531668,3.21056,4.079789,3.416582,10.706931,0.516185,0.310318,0.486573,0.476782,3.282925,0.510949,0.45797,0.287223,0.448754,0.491554,0.471875,0.481663,0.479475,0.469827,3.381704,3.245438,3.891075,3.605296,0.484312,0.465061,0.474566,0.516082,3,3,3,3,1,1,3,2,3,3,2,2,1,3,3,3,2,3,1,2,1,2,2,1,3,2,4,4,4,2,2,3,3,2,1,3,3,2,3,1,2,1,2,2,1,2,2,2,0,0,1,0,4,4,4,2,3,2,3,2,4,4,4,2,3,2,3,3,2,3,4,4,2,3,2,3,3,2,3,1,2,1,2,2,1,2,3,2,3,3,2,2,3,2,3,2,1,2,2,2,1,36,0,1,1,0,1,0,1,0,1,0,1,2,2,1,0
3,2,3,0,1,1,1,2,3,3,1,3,1,2,3,131.27,1.71,0,0,0,0,1,1,1,0,0,1,0,3.54758,3.645175,1,1,37.002689,1,36.011992,1,1,35.022278,1,38.769188,1,1,39.565337,1,5.776713,5.088195,0,6.066362,-1.83758,0,6.233249,-1.935175,5.666206,-1.603571,6.409398,-2.038186,5.789951,-1.675936,0,0,11.984415,-0.169386,12.314109,-0.26698,0.064624,0.045696,12.6621,-0.369991,11.438351,-0.007742,11.208184,12.931549,-0.097595,11.755158,0.234009,13.296989,-0.200606,12.01188,0.161644,12.078545,0.331604,13.662792,-0.103011,0.972517,0.07284,0,12.342329,0.259238,12.419879,-0.434615,11.21954,-0.072365,0.049868,10.993776,-0.004232,0.304326,11.23387,0.068133,10.759791,-0.074756,10.151143,9.87216,10.928348,11.152768,17.75512,9.607846,0.007409,0.007833,10.635756,20.49335,0.614117,1.44302,8.733369,21.057126,0.631012,1.048734,1.774624,8.401765,19.141547,0.573608,1.340009,8.83638,21.652192,0.648844,1.702258,8.474131,19.559581,0.586135,4.985998,1.612405,1.807595,5.482755,8.902755,1.475991,1.509394,1.910606,5.585766,9.005766,1.871644,1.548356,5.223516,1.378396,8.668746,1.606989,1.813011,5.68336,9.10336,0.667836,1.148166,1.969238,5.321111,1.050466,0.619551,1.275385,8.771757,0.607083,1.637635,1.782365,4.989507,8.409507,1.705768,4.921374,1.087366,0.607858,1.778133,3.539838,3.216551,3.555322,10.311711,0.471792,3.275183,3.481206,4.015166,10.771555,0.511504,0.482096,0.472395,0.464973,3.45056,0.489874,0.067595,0.475127,0.465566,3.310061,3.879184,3.411166,3.444569,3.650591,3.84578,0.499629,0.274135,0.289848,3.806818,3.483531,0.46127,3.475215,3.619945,3.151927,10.247087,3.909829,3.586542,0.454757,3.479447,3.472824,3.622336,3.21056,4.079789,3.416582,10.706931,0.516185,0.310318,0.486573,0.476782,3.282925,0.510949,0.45797,0.287223,0.448754,0.491554,0.471875,0.481663,0.479475,0.469827,3.381704,3.245438,3.891075,3.605296,0.484312,0.465061,0.474566,0.516082,5,5,3,4,3,2,3,3,4,4,3,1,1,2,3,3,3,2,0,1,1,1,1,0,2,2,3,4,4,1,2,2,2,1,2,4,4,4,2,1,2,2,2,2,1,3,4,4,1,2,2,1,5,5,3,2,3,3,3,2,6,4,5,3,4,4,4,4,3,6,4,5,3,4,4,4,4,3,4,3,4,4,4,4,3,1,2,2,2,2,1,2,3,3,3,2,3,4,4,4,3,42,1,1,1,0,2,0,1,0,2,1,2,2,2,1,6
4,2,3,0,1,1,1,2,3,2,2,2,1,2,1,93.8,1.91,0,0,0,0,1,1,1,0,1,1,0,3.54758,3.645175,1,1,26.440559,1,25.732649,1,1,25.025441,1,27.702825,1,1,28.27172,1,5.945218,5.02268,0,6.775878,-1.63758,0,6.962284,-1.735175,6.32892,-1.403571,7.159035,-1.838186,6.467138,-1.475936,0,0,11.04248,-0.4349,11.346261,-0.532495,-0.200891,0.142052,11.666902,-0.635506,10.539335,-0.273257,10.327259,12.931549,-0.097595,11.755158,0.234009,13.296989,-0.200606,12.01188,0.161644,12.078545,0.331604,13.662792,-0.103011,0.972517,0.07284,0,12.342329,0.259238,12.419879,-0.434615,11.21954,-0.072365,0.049868,10.993776,-0.004232,0.304326,11.23387,0.068133,10.759791,-0.074756,7.253578,7.054229,7.808936,7.969297,12.687059,6.865361,0.010368,0.010962,7.599862,21.091138,0.596711,1.377505,8.667854,21.671359,0.613127,0.888896,1.709109,8.336251,19.699903,0.557351,1.274494,8.770865,22.283782,0.630454,1.636743,8.408616,20.130131,0.569523,4.520483,1.812405,2.007595,5.282755,9.102755,1.675991,1.709394,2.110606,5.385766,9.205766,2.071644,1.748356,5.023516,1.578396,8.868746,1.806989,2.013011,5.48336,9.30336,0.64423,1.032825,2.169238,5.121111,0.935986,0.597246,1.475385,8.971757,0.585624,1.837635,1.982365,4.789507,8.609507,1.905768,4.721374,0.973699,0.586372,1.978133,3.274323,2.951036,3.820837,10.046196,0.490329,3.009669,3.215691,4.280681,10.50604,0.5313,0.501037,0.490955,0.482967,3.185045,0.509889,0.10192,0.493514,0.483584,3.044547,3.879184,3.411166,3.444569,3.650591,3.84578,0.499629,0.274135,0.289848,3.806818,3.483531,0.46127,3.475215,3.619945,3.151927,10.247087,3.909829,3.586542,0.454757,3.479447,3.472824,3.622336,3.21056,4.079789,3.416582,10.706931,0.516185,0.310318,0.486573,0.476782,3.282925,0.510949,0.45797,0.287223,0.448754,0.491554,0.471875,0.481663,0.479475,0.469827,3.381704,3.245438,3.891075,3.605296,0.484312,0.465061,0.474566,0.516082,5,4,3,4,3,3,3,3,4,4,4,1,1,2,3,2,2,2,0,1,1,1,1,1,2,2,3,4,3,1,2,2,2,2,2,4,3,3,2,1,2,2,2,2,2,3,4,3,1,2,2,2,5,4,3,2,3,3,3,3,5,4,5,3,4,4,4,4,4,4,3,4,2,3,3,3,3,3,3,2,3,3,3,3,3,1,2,2,2,2,2,2,3,3,3,3,1,2,2,2,2,64,1,1,1,0,2,0,1,0,2,2,2,2,2,1,3


In [3]:
TARGET = 'NObeyesdad'

In [4]:
X = train.drop(TARGET, axis=1)
y = train[TARGET]

sk = StratifiedKFold(n_splits=5, shuffle=True, random_state=5)

In [5]:
# cat_cols_11 = X.columns[X.nunique() < 11].tolist()
# cat_cols_7 = X.columns[X.nunique() < 7].tolist()
# diff = list(set(cat_cols_11) - set(cat_cols_7))

# X['CAEC_minus_FAVC'].unique()

X.apply(lambda col: col.nunique() / X.shape[0] * 100).sort_values(ascending=False)

# cat_cols = X.columns[X.nunique() < 11]
# num_cols = X.columns[X.nunique() >= 11]
# len(cat_cols), len(num_cols)

Weight_2dec_div_prod_of_CALC_and_CAEC                                13.424286
Weight_2dec_div_prod_of_CALC_and_FAVC                                12.847086
Weight_2dec_div_prod_of_CALC_and_family_history_with_overweight      12.733395
Weight_2dec_div_prod_of_CALC_and_SCC                                 12.103721
Weight_2dec_div_CALC                                                 11.194193
Weight_2dec_div_prod_of_CAEC_and_FAVC                                10.481438
family_history_with_overweight_div_prod_of_Weight_2dec_and_CAEC      10.450829
Weight_2dec_div_sum_of_CAEC_and_family_history_with_overweight       10.450829
Weight_2dec_div_prod_of_CAEC_and_family_history_with_overweight      10.450829
CAEC_div_prod_of_Weight_2dec_and_family_history_with_overweight      10.450829
Weight_2dec_div_CAEC                                                  8.789191
Weight_2dec_div_FAVC                                                  8.588045
Weight_2dec_div_family_history_with_overweight      

In [6]:
# transformer = make_column_transformer(
#     (StandardScaler(), num_cols),
#     remainder='passthrough')

# df_to_ohe = X.copy()
# # test_ohe = test.drop('id', axis=1)

# transformed = transformer.fit_transform(df_to_ohe)

# transformed = transformer.transform(X)

# # Get the transformed feature names
# transformed_feat_names = [name.split('__')[-1] for name in transformer.get_feature_names_out()]

# # Create DataFrame of the transformed features
# df_to_ohe_transformed = pd.DataFrame(transformed, columns=transformed_feat_names)
# df_to_ohe_transformed.sample()

- Mutual Information

In [7]:
# Generate a random feature
np.random.seed(5)
X['random_control_feature'] = np.round(np.random.uniform(-2, 2, X.shape[0]), 6)
X.shape

(22869, 347)

In [13]:
# Calculate mutual information (3, 5, 10, 15, 20) - this can be memory and CPU intensive
# The plan is to remove any features that have mutual information less than random_feature because surely you cannot be important
mi_scores = mutual_info_classif(X, y, discrete_features='auto', n_neighbors=5, copy=True, random_state=5)

# Make results easier to interpret by placing them in a DataFrame
mi_scores = pd.Series(mi_scores, name="MI Scores", index=X.columns)
mi_scores = mi_scores.sort_values(ascending=False)

# Now you have the MI scores sorted from the most to the least informative
mi_scores.to_csv(f'{experiment_name}_5_mi_scores.csv')

In [15]:
# Read the CSV
mi_df = pd.read_csv('lgbm_mi_scores.csv')

# Get the mi score for control feature
mi_threshold = mi_df.loc[mi_df['Feature'] == 'random_control_feature', 'Average'].iloc[0]

# Get the features above random_control_feature to move forward
mi_feats = mi_df[mi_df['Average'] > mi_threshold]['Feature'].tolist()

In [16]:
X_mi = X[mi_feats]

- Feature Importances

In [24]:
n_splits = 10
feat_import_model = LGBMClassifier(n_jobs=-1, random_state=5, objective='multiclass', num_class=y.nunique())

# Initialize array to store feature importances
feature_importances = np.zeros(X_mi.shape[1])

# Loop through each fold and calculate the feature importances
for train_index, test_index in sk.split(X_mi, y):
    X_train, X_test = X_mi.iloc[train_index], X_mi.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    feat_import_model.fit(X_train, y_train)

    # Get the feature importances and them to the total
    feature_importances += feat_import_model.feature_importances_

feature_importances /= n_splits

feature_importances_dict = dict(zip(X_mi.columns, feature_importances))

df = pd.DataFrame.from_dict(feature_importances_dict, orient='index')
df = df.rename(columns={0: 'Avg_Feat_Importance'})
df.sort_values(by='Avg_Feat_Importance', ascending=False, inplace=True)
df.to_csv('feature_importances.csv')

In [25]:
# Read the CSV
fi_df = pd.read_csv('feature_importances.csv')

# Define threshold feature importance score
fi_threshold = 0

# Get the features above threshold
fi_feats = fi_df[fi_df['Avg_Feat_Importance'] > fi_threshold]['Feature'].tolist()

In [26]:
X_fi = X[fi_feats]

In [27]:
models = [LGBMClassifier(n_jobs=-1, random_state=5, objective='multiclass', num_class=y.nunique()),]

- Permutation Importance

In [31]:
# Generate a random feature for X
np.random.seed(5)
X_mi['random_control_feature'] = np.round(np.random.uniform(-2, 2, X_mi.shape[0]), 6)
X_mi.shape

(22869, 339)

In [32]:
perm_cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=5)

perm_importances = {model.__class__.__name__: [] for model in models}

for i, (train_idx, test_idx) in enumerate(perm_cv.split(X_mi, y)):
    X_train, X_test = X_mi.iloc[train_idx], X_mi.iloc[test_idx]
    y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

    for model in models:
        model_name = model.__class__.__name__
        model.fit(X_train, y_train)
        # Calculate permutation importance
        result = permutation_importance(model, X_test, y_test, n_repeats=10, random_state=5, n_jobs=-1, scoring='accuracy')
        perm_importances[model_name].append(result.importances_mean)
        print(f'Done with {model_name}.')
    
    print(f'Done with Fold {i+1}', end='\n\n')

Done with LGBMClassifier.
Done with Fold 1

Done with LGBMClassifier.
Done with Fold 2

Done with LGBMClassifier.
Done with Fold 3

Done with LGBMClassifier.
Done with Fold 4

Done with LGBMClassifier.
Done with Fold 5

Done with LGBMClassifier.
Done with Fold 6

Done with LGBMClassifier.
Done with Fold 7

Done with LGBMClassifier.
Done with Fold 8

Done with LGBMClassifier.
Done with Fold 9

Done with LGBMClassifier.
Done with Fold 10



In [35]:
# Average importances across folds and export to CSV
for model_name, importances in perm_importances.items():
    avg_importance = np.mean(importances, axis=0)
    importance_df = pd.DataFrame({'Feature': X_mi.columns, 'Importance': avg_importance})
    importance_df.sort_values(by='Importance', ascending=False, inplace=True)
    # Export to CSV
    importance_df.to_csv(f'.\permutation_importances\{model_name}_permutation_importance.csv', index=False)

print('Done with Permuation Importances', end='\n\n')

Done with Permuation Importances



In [36]:
directory = 'permutation_importances'

# Initialize a dictionary for the features
perm_important_features = {}

for model in models:
    model_name = model.__class__.__name__
    csv_path = os.path.join(directory, f'{model_name}_permutation_importance.csv')
    if os.path.exists(csv_path):
        df = pd.read_csv(csv_path)

        # Check for 'random_control_feature' and its importance
        if 'random_control_feature' in df['Feature'].values:
            random_feature_importance = df.loc[df['Feature'] == 'random_control_feature', 'Importance'].iloc[0]
        else:
            random_feature_importance = 0

        # Determine the threshold
        threshold = max(0, random_feature_importance)

        # Filter features where importance is greater than 0
        important_feats = df[df['Importance'] > threshold]['Feature'].tolist()

        # important_feats = df.head(50)['Feature'].tolist()

        # Add to importance dictionary
        perm_important_features[model_name] = important_feats

    else:
        print(f'CSV file for {model_name} not found.')

print('Done getting important features dictionary')

Done getting important features dictionary


- RFECV

In [40]:
rfe_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=5)

# Initialize empty dictionary for RFECV features
rfecv_features = {}

for alg in models:
    # set name
    MLA_name = alg.__class__.__name__
		
    features = perm_important_features[MLA_name]

    # incase there is no feature that had importance, go to the next model
    if len(features) == 0:
        continue
	
    X_rfecv = X[features]

    try:
        print(f'Starting with {MLA_name}')
        # Create the RFECV object and rank each feature
        selector = RFECV(alg, cv=sk, step=1, scoring='accuracy', verbose=2)
        selector = selector.fit(X_rfecv, y)

        selected_features = list(X_rfecv.columns[selector.support_])

        rfecv_features[MLA_name] = selected_features

        print(f'Done with {MLA_name}', end='\n\n')
    
    except ValueError:
        rfecv_features[MLA_name] = features
        print(f'{MLA_name} does not have coef_ or feature_importances_', end='\n\n')


Starting with LGBMClassifier
Fitting estimator with 178 features.
Fitting estimator with 177 features.
Fitting estimator with 176 features.
Fitting estimator with 175 features.
Fitting estimator with 174 features.
Fitting estimator with 173 features.
Fitting estimator with 172 features.
Fitting estimator with 171 features.
Fitting estimator with 170 features.
Fitting estimator with 169 features.
Fitting estimator with 168 features.
Fitting estimator with 167 features.
Fitting estimator with 166 features.
Fitting estimator with 165 features.
Fitting estimator with 164 features.
Fitting estimator with 163 features.
Fitting estimator with 162 features.
Fitting estimator with 161 features.
Fitting estimator with 160 features.
Fitting estimator with 159 features.
Fitting estimator with 158 features.
Fitting estimator with 157 features.
Fitting estimator with 156 features.
Fitting estimator with 155 features.
Fitting estimator with 154 features.
Fitting estimator with 153 features.
Fitting e

In [41]:
with open('rfecv_features_lgbm.txt', mode='w') as f:
    pprint(rfecv_features, stream=f)

- SFs

In [43]:
# Initialize empty dictionary for SFS features
sfs_features = {}

for alg in models:
    # set name
    MLA_name = alg.__class__.__name__

    try:
            
        # features = rfecv_features[MLA_name]
        features = rfecv_features[MLA_name]

        # incase there is no feature that had importance, go to the next model
        if len(features) == 0:
            continue
        
        X_sfs = X[features]

        print(f'Running backward feature selection with {MLA_name}')

        sfs = SFS(alg,
            k_features='best',
            forward=False,
            floating=False,
            scoring='accuracy',
            verbose=2,
            n_jobs=-1,
            cv=sk)
        
        sfs = sfs.fit(X_sfs, y)

        # Get the selected features index
        selected_sfs_idx = list(sfs.k_feature_idx_)

        # Get the feature names
        selected_sfs_feats = X_sfs.columns[selected_sfs_idx]

        sfs_features[MLA_name] = list(selected_sfs_feats)

        print(f'Done with {MLA_name}', end='\n\n')

    except KeyError:
        print(f'{MLA_name} not in the dictionary.')

Running backward feature selection with LGBMClassifier


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:  7.2min
[Parallel(n_jobs=-1)]: Done  78 out of  78 | elapsed: 18.1min finished

[2024-02-10 21:35:11] Features: 77/1 -- score: 0.9085660295200505[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:  7.0min
[Parallel(n_jobs=-1)]: Done  77 out of  77 | elapsed: 17.7min finished

[2024-02-10 21:52:56] Features: 76/1 -- score: 0.908085040509345[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:  7.2min
[Parallel(n_jobs=-1)]: Done  76 out of  76 | elapsed: 17.5min finished

[2024-02-10 22:10:28] Features: 75/1 -- score: 0.9080413342281759[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:  6.9min
[Parallel(n_jobs=-1)]: Done  75 out of

Done with LGBMClassifier



[Parallel(n_jobs=-1)]: Done   2 out of   2 | elapsed:    8.3s finished

[2024-02-11 05:45:38] Features: 1/1 -- score: 0.7483492249473656

In [44]:
with open('sfs_features.txt', mode='w') as f:
    pprint(sfs_features, stream=f)

In [28]:
def evaluate_models(models, X, y, important_features, cv_split, experiment_name):
    # Create a DataFrame to store comparison results
    MLA_compare = pd.DataFrame(columns=['MLA Name', 
                                        'MLA Parameters', 
                                        'MLA Train Accuracy', 
                                        'MLA Test Accuracy', 
                                        'MLA Time'])
    
    def evaluate_model(alg, idx):
        MLA_name = alg.__class__.__name__
        features = important_features.get(MLA_name, [])

        # Check if the list of important features is empty
        if len(features) == 0:
            # If empty, return results with zero values
            print(f'Skipping {MLA_name} due to no important features.')
            return {
                'MLA Name': MLA_name,
                'MLA Parameters': str(alg.get_params()),
                'MLA Train Accuracy': 0,
                'MLA Test Accuracy': 0,
                'MLA Time': "0 min 0.00 sec",
            }

        # Perform cross-validation
        roc_scorer = make_scorer(accuracy_score, greater_is_better=True, needs_proba=False)
        
        cv_results = cross_validate(alg, X[features], y, 
                                    cv=cv_split, 
                                    scoring={'Accuracy': roc_scorer}, 
                                    return_train_score=True, 
                                    n_jobs=-1)

        # Format time
        mean_fit_time = cv_results['fit_time'].mean()
        minutes, seconds = divmod(mean_fit_time, 60)

        print(f'Done with {MLA_name}.')
        
        # Populate results
        return {
            'MLA Name': MLA_name,
            'MLA Parameters': str(alg.get_params()),
            'MLA Train Accuracy': cv_results['train_Accuracy'].mean() if 'train_Accuracy' in cv_results else 0,
            'MLA Test Accuracy': cv_results['test_Accuracy'].mean() if 'test_Accuracy' in cv_results else 0,
            'MLA Time': f"{int(minutes)} min {seconds:.2f} sec",
        }

    results_list = []

    # Use ThreadPoolExecutor for parallel execution
    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = [executor.submit(evaluate_model, alg, idx) for idx, alg in enumerate(models)]
        for future in futures:
            result = future.result()
            if result:
                results_list.append(result)

    # Create a DataFrame from the list of dictionaries
    MLA_compare = pd.DataFrame(results_list)

    # Sort and save results
    MLA_compare.sort_values(by=['MLA Test Accuracy'], ascending=True, inplace=True)
    MLA_compare.to_csv(f'{experiment_name}_results.csv', index=False)

    return MLA_compare

In [29]:
baseline_features = {}

for model in models:
    model_name = model.__class__.__name__

    baseline_features[model_name] = list(X_fi.columns)

In [30]:
baseline_models = evaluate_models(models, X_fi, y, baseline_features, sk, f'{experiment_name}')
baseline_models

Done with LGBMClassifier.


Unnamed: 0,MLA Name,MLA Parameters,MLA Train Accuracy,MLA Test Accuracy,MLA Time
0,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None...",0.990325,0.905286,0 min 58.25 sec


In [38]:
# Set seeds for reproducibility
# 22 minutes run time
np.random.seed(42)
random.seed(42)

permutation_models = evaluate_models(models, X, y, perm_important_features, sk, f'{experiment_name}_perm_imp')
permutation_models

Done with LGBMClassifier.


Unnamed: 0,MLA Name,MLA Parameters,MLA Train Accuracy,MLA Test Accuracy,MLA Time
0,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None...",0.98885,0.906467,0 min 30.29 sec


In [42]:
# Set seeds for reproducibility
#  minutes run time
np.random.seed(42)
random.seed(42)

rfecv_models = evaluate_models(models, X, y, rfecv_features, sk, f'{experiment_name}_rfecv')
rfecv_models

Done with LGBMClassifier.


Unnamed: 0,MLA Name,MLA Parameters,MLA Train Accuracy,MLA Test Accuracy,MLA Time
0,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None...",0.98827,0.906467,0 min 15.68 sec


In [45]:
# Set seeds for reproducibility
#  minutes run time
np.random.seed(42)
random.seed(42)

sfs_models = evaluate_models(models, X, y, sfs_features, sk, f'{experiment_name}_sfs')
sfs_models

Done with LGBMClassifier.


Unnamed: 0,MLA Name,MLA Parameters,MLA Train Accuracy,MLA Test Accuracy,MLA Time
0,LGBMClassifier,"{'boosting_type': 'gbdt', 'class_weight': None...",0.981984,0.910446,0 min 5.50 sec
