# Build model for different features
- HB Features
- Delineation Features
- HRV Features
- CNN Features

In [70]:
import sys

import pandas as pd
import numpy as np
from imblearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.impute import KNNImputer, SimpleImputer
from sklearn.feature_selection import SelectKBest, VarianceThreshold
from sklearn.decomposition import KernelPCA
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import f1_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
import seaborn as sns
import matplotlib.pyplot as plt

sns.set('talk')
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
%matplotlib inline

params

In [2]:
train_size = 0.8

Read in features

In [3]:
path = '/home/rapwag01/eth/aml/aml-project/task2/data/'

X_train_hb = pd.read_csv(path+'first_run/features_heartbeat_train.csv', index_col=False).values
X_test_hb = pd.read_csv(path+'first_run/features_heartbeat_test.csv', index_col=False).values

X_train_cwt = pd.read_csv(path+'features_cwt_train.csv', index_col=False).values
X_test_cwt = pd.read_csv(path+'features_cwt_test.csv', index_col=False).values

X_train_delin = pd.read_csv(path+'first_run/features_delineation_train.csv', index_col=False).values
X_test_delin = pd.read_csv(path+'first_run/features_delineation_test.csv', index_col=False).values

X_train_hrv = pd.read_csv(path+'first_run/features_hrv_train.csv', index_col=False).values
X_test_hrv = pd.read_csv(path+'first_run/features_hrv_test.csv', index_col=False).values


Read in target

In [83]:
y_train = pd.read_csv('../../task2/y_train.csv').y.values.ravel()

In [84]:
y_train.shape

(5117,)

Join feature matrices
- Skip cwt for now

In [85]:
X_train_full = np.hstack([X_train_hb, X_train_delin, X_train_hrv])[:,1:] # drop first column which is index

In [86]:
X_test_full = np.hstack([X_test_hb, X_test_delin, X_test_hrv])[:,1:] # drop first column which is index

In [76]:
# don't need to apply train / test split if we use gridsearchcv
# X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, target, random_state=0, train_size=train_size, stratify=target)

remove samples with >80% NaNs

In [77]:
df_train_full = pd.DataFrame(X_train_full)

In [78]:
df_train_full.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,...,739,740,741,742,743,744,745,746,747,748,749,750,751,752,753,754,755,756,757,758,759,760,761,762,763,764,765,766,767,768,769,770,771,772,773,774,775,776,777,778,779,780,781,782,783,784,785,786,787,788,789,790,791,792,793,794,795,796,797,798,799,800,801,802,803,804,805,806,807,808,809,810,811,812,813,814,815,816,817,818,819,820,821,822,823,824,825,826,827,828,829,830,831,832,833,834,835,836,837,838,839,840,841,842,843,844,845,846,847,848,849,850,851,852,853,854,855,856,857,858,859,860,861,862,863,864,865,866,867,868,869,870,871,872,873,874,875,876,877,878,879,880,881,882,883,884,885,886,887,888,889,890,891,892,893,894,895,896,897,898,899,900,901,902,903,904,905,906,907,908,909,910,911,912,913,914,915,916,917,918,919,920,921,922,923,924,925,926,927,928,929,930,931,932,933,934,935,936,937,938,939,940,941,942,943,944,945,946,947,948,949,950,951,952,953,954,955,956,957,958,959,960,961,962,963,964,965,966,967,968,969,970,971,972,973,974,975,976,977,978,979,980,981,982,983,984,985,986,987,988
0,-14.498234,-13.234579,-11.628894,-9.684386,-7.431202,-4.918017,-2.186781,0.7197,3.730826,6.763773,9.754217,12.654224,15.387219,17.858503,19.989768,21.740134,23.07122,23.946301,24.353872,24.313122,23.82637,22.872333,21.427582,19.466121,16.945076,13.789532,9.954404,5.429056,0.238137,-5.53401,-11.735787,-18.131505,-24.451479,-30.464007,-35.978735,-40.880341,-45.115909,-48.6983,-51.698581,-54.150485,-56.086609,-57.528309,-58.489907,-59.00352,-59.099691,-58.84484,-58.264232,-57.272099,-55.560516,-52.325388,-46.043368,-34.343886,-14.213665,17.30129,61.813321,118.367141,183.221865,249.319054,307.014239,346.379558,360.369182,347.130355,308.973094,252.583987,187.814469,124.78981,70.895187,28.894343,-1.227022,-21.555891,-35.419058,-46.058657,-55.581692,-64.479414,-72.558327,-79.542895,-85.247278,-89.626694,-92.800152,-95.014692,-96.535539,-97.528265,-98.079411,-98.268462,-98.182151,-97.877847,-97.381024,-96.717243,-95.922674,-95.021363,-94.015547,-92.90164,-91.685387,-90.388166,-89.011632,-87.539819,-85.974066,-84.309869,-82.534768,-80.618668,-78.546665,-76.31315,-73.882243,-71.248823,-68.420004,-65.416821,-62.280539,-59.067606,-55.863958,-52.705899,-49.611246,-46.578907,-43.560687,-40.476442,-37.218287,-33.699423,-29.858347,-25.650435,-21.043737,-16.026551,-10.591853,-4.728458,1.59287,8.417317,15.792974,23.810822,32.558446,42.093098,52.453073,63.630357,75.536966,87.945192,100.561262,113.104475,125.296269,136.883797,147.65425,157.488734,166.321037,174.048829,180.538305,185.606931,189.055862,190.660683,190.147849,187.28012,181.890234,173.972233,163.655362,151.162823,136.866472,121.242203,104.797544,87.89824,70.848631,53.972026,37.557659,21.829952,7.003641,-6.598808,-18.710091,-29.20393,-38.047099,-45.283864,-51.052402,-55.59407,-59.139776,-61.87632,-63.980914,-65.585412,-66.767905,-67.575868,-68.04553,-68.201873,-68.047691,-67.590102,-66.860754,-65.893511,-64.724872,-63.396068,-15.489147,-13.030004,-10.820615,-8.62494,-5.895799,-2.7254,-0.469552,2.266584,6.23146,10.126356,13.440836,15.515698,18.423118,20.660126,23.277018,24.091516,24.515854,25.127938,25.930316,27.018542,26.089801,24.913826,24.208905,23.20777,21.057288,18.180164,15.251634,10.320022,5.526655,-0.329275,-6.520551,-14.801481,-20.393518,-27.13713,-32.638144,-36.423865,-40.277212,-43.698567,-47.490486,-51.543123,-54.810245,-56.615152,-58.192914,-58.974315,-57.309944,-58.883707,-59.27449,-60.730837,-59.857486,-55.610578,-48.135666,-36.108145,-18.188899,10.957213,55.448484,110.667819,175.120652,235.753106,295.64809,340.506318,356.351168,341.520023,302.930429,245.382878,185.706886,122.376672,67.033858,24.924293,-4.485634,-24.920159,...,-88.547933,-89.01932,-90.127355,-92.258864,-95.856269,-101.250935,-108.46874,-117.174545,-126.86665,-136.987933,-146.759194,-155.568074,-163.024622,-168.905759,-173.044183,-175.189496,-175.147097,-172.722641,-167.666484,-159.645907,-154.55898,-159.248125,-163.096352,-166.105715,-168.306237,-169.89479,-171.068419,-172.107669,-173.237675,-174.128149,-173.698911,-169.453197,-157.755404,-134.192168,-94.738984,-38.149051,33.407889,113.623386,181.663722,235.362578,263.293168,268.110091,256.595362,222.022915,170.403166,109.778496,47.557057,-6.867491,-50.842132,-84.000763,-108.869908,-127.614806,-141.733882,-152.642114,-160.809951,-166.291106,-169.167031,-169.991375,-169.567777,-168.449881,-167.024682,-167.88344,-165.360556,-162.664542,-161.491838,-160.47514,-159.585054,-158.792256,-158.095275,-157.464944,-156.872187,-156.315797,-155.73911,-155.141124,-154.520945,-153.850016,-153.072119,-152.186716,-151.16561,-149.869621,-148.104135,-145.702434,-142.497915,-138.185193,-132.79231,-126.569616,-119.878641,-113.108747,-107.087917,-102.548624,-99.854653,-99.559947,-98.597387,-96.968555,-94.647343,-91.635502,-88.018201,-83.964022,-79.447179,-75.512136,-73.126446,-70.294428,-66.570445,-61.564506,-54.692266,-45.119472,-32.400846,-16.50786,-38.575326,-62.477736,-83.417822,-100.951659,-115.163223,-126.553296,-135.539485,-142.817345,-149.08261,-154.55898,-159.248125,-163.096352,-166.105715,-168.306237,-169.89479,-171.068419,-172.107669,-173.237675,-174.128149,-173.698911,-169.453197,-157.755404,-134.192168,-94.738984,-38.149051,33.407889,26.944514,6.63785,-13.533267,-32.927279,-50.291455,-64.81748,-76.252607,-84.538581,-89.867225,-92.736037,-94.114876,-94.640438,-94.644052,-108.869908,-127.614806,-141.733882,-152.642114,-160.809951,-166.291106,-169.167031,-169.991375,-169.567777,-168.449881,-167.024682,-165.512534,-164.022713,-162.664542,42.17822,-69.494141,-107.674375,198.932065,360.369182,815.846154,817.538462,186.931217,121.767677,117.070707,29.378788,73.708605,33.449697,-66.502132,-104.719503,196.101072,356.351168,830.0,823.333333,173.333333,126.666667,91.666667,33.5,72.289157,45.665314,23.252994,18.778421,25.495103,46.43198,65.387813,96.618515,72.145995,42.633114,75.088086,10.046506,8.510272,273.236801,-38.20758,-63.517605,298.723811,535.188836,853.333333,1050.0,413.333333,333.333333,376.666667,44.0,185.56701,-41.018732,-175.147097,-169.567777,136.528583,268.110091,323.333333,240.0,73.333333,53.333333,23.333333,11.0,70.3125,815.846154,65.896675,,,,,,,64.209942,64.22376,0.080771,0.078703,830.0,14.826,0.017863,23.333333,1.538462,4.615385,3.823529,523.4375,,,0.006599,0.004682,0.000334,1.409581,0.568152,0.403065,-5.364064
1,-1.753309,-0.242555,1.452685,3.257832,5.084793,6.856565,8.53898,10.120082,11.629965,13.121732,14.672293,16.334753,18.086824,19.897503,21.708818,23.449324,25.021414,26.338618,27.360529,28.045984,28.352265,28.204947,27.53758,26.290099,24.404062,21.841696,18.586689,14.691006,10.262195,5.426866,0.328308,-4.883041,-10.036107,-14.982046,-19.622026,-23.880251,-27.706346,-31.097585,-34.095723,-36.785412,-39.259284,-41.603668,-43.887478,-46.131266,-48.310392,-50.288354,-51.815995,-52.530699,-51.945275,-49.416204,-44.089653,-35.117344,-21.790658,-3.6565,19.260505,46.150882,75.344927,104.435743,130.190362,148.560079,155.609025,148.58163,126.583574,90.834151,44.890066,-5.244272,-52.72074,-91.638008,-118.340765,-131.603079,-132.383179,-123.702039,-109.549739,-93.40849,-77.708192,-63.88278,-52.613881,-43.970487,-37.506574,-32.83094,-29.532595,-27.187109,-25.383592,-23.74374,-22.077385,-20.285667,-18.326909,-16.199944,-13.930626,-11.563092,-9.099451,-6.510889,-3.826561,-1.134383,1.449127,3.778846,5.706445,7.089908,7.907028,8.297462,8.465114,8.689247,9.210981,10.215838,11.809138,13.955674,16.563843,19.503109,22.692091,26.074763,29.604578,33.285741,37.157341,41.257637,45.575654,50.092454,54.78749,59.633854,64.585582,69.565754,74.507773,79.344262,83.976907,88.309008,92.243897,95.723038,98.698255,101.119834,102.975412,104.231255,104.829082,104.681306,103.682146,101.734609,98.725887,94.561477,89.215975,82.731479,75.201715,66.716467,57.402053,47.455438,37.072799,26.420948,15.652397,5.007733,-5.262165,-14.96598,-23.932277,-32.010308,-39.05255,-45.039321,-50.000205,-53.988665,-57.112989,-59.480745,-61.213059,-62.364459,-62.983984,-63.112007,-62.765944,-61.983109,-60.815151,-59.347888,-57.664004,-55.851767,-53.994705,-52.162079,-50.396969,-48.718647,-47.151145,-45.705784,-44.396253,-43.188601,-42.06631,-41.028706,-40.076671,-39.219778,-38.435817,-37.74222,-1.219333,-2.887117,-3.042428,-0.180479,-0.035532,1.125449,3.783738,7.898919,11.031366,14.846134,16.350363,19.309462,23.618644,24.62235,23.882625,25.319072,27.576786,29.78653,28.309271,27.088866,25.951592,28.437887,26.275017,26.474506,29.493239,30.233601,26.932193,20.750344,14.169941,7.495982,4.068793,1.609599,-2.755134,-8.925125,-14.679182,-21.5957,-24.854458,-29.247049,-30.952465,-34.040928,-36.345614,-38.808245,-41.905718,-45.985519,-49.260584,-51.605338,-54.501382,-54.923608,-55.732643,-49.939888,-46.619433,-35.695846,-24.709805,-11.435534,11.16829,34.970849,67.112959,94.601651,117.972197,136.560382,141.058357,133.278022,115.232093,78.198366,35.353863,-13.861311,-60.850271,-99.608253,-122.481351,-135.805903,...,-148.102136,-148.43512,-148.071567,-147.175232,-145.993129,-144.716644,-143.564875,-142.784641,-142.456042,-142.659135,-143.390609,-144.647124,-146.369767,-148.416286,-150.69999,-153.106431,-155.576752,-158.024374,-160.36279,-162.533368,-164.422034,-165.748181,-165.786908,-163.480141,-157.908862,-148.182,-133.43642,-113.114693,-104.015477,-107.492194,-109.214349,-108.071708,-102.898443,-92.250895,-74.685339,-49.230191,-16.608211,21.096845,45.35337,64.450345,76.777817,79.225118,69.709449,48.398096,16.819536,-21.553241,-76.593105,-130.380987,-171.580882,-196.858438,-208.845463,-218.906818,-213.228019,-195.89129,-182.338511,-173.722071,-165.882849,-159.015529,-152.731567,-146.531412,-139.80449,-132.301418,-123.911774,-114.691859,-104.920245,-99.423865,-97.547836,-95.313473,-92.386114,-88.431242,-83.253366,-76.712689,-68.475097,-58.262159,-50.80035,-51.64381,-52.150019,-52.236058,-54.506096,-59.316539,-62.564915,-64.446109,-65.571776,-66.137005,-68.294204,-85.047368,-87.188405,-79.661667,-67.9673,-58.446898,-55.365442,-51.232145,-45.770655,-38.95467,-34.484791,-36.612438,-39.859366,-43.838695,-48.080378,-51.975631,-55.165819,-57.486888,-58.913811,-59.505018,-59.402393,-58.886826,-58.155982,-57.324295,-56.450749,-55.511086,-54.481144,-53.309075,-51.943117,-50.359373,-48.534031,-46.471138,-44.2026,-41.7604,-39.148818,-36.372204,-33.434976,-30.341614,-27.0411,-27.289439,-44.063488,-57.971507,-68.596745,-75.996953,-85.460687,-93.506936,-100.246186,-105.816716,-110.3846,-114.338159,-117.871305,-121.039092,-123.841053,-126.276759,-128.42915,-130.325643,-131.938135,-133.210772,-134.087729,-134.907924,-135.454496,-134.930457,-133.11524,-130.418238,-127.916551,-131.260199,-135.290131,-138.993104,-142.396642,-145.639307,-148.804036,-151.918142,-154.981095,-157.96453,-160.951137,-163.940223,-166.931056,45.600836,-58.337274,-135.289086,116.187169,155.098157,789.047619,811.568627,167.333333,132.190476,135.238095,32.142857,73.84689,43.067625,-58.336062,-137.64002,113.876522,140.949618,800.0,786.666667,173.333333,123.333333,110.0,35.0,71.713147,36.216761,27.299799,62.163862,54.910037,40.242121,51.551001,174.405124,31.860808,45.662272,76.562188,8.986157,4.067595,138.536977,-10.13749,159.419397,341.411772,231.47868,863.333333,1590.0,226.666667,286.666667,343.333333,51.0,102.857143,-94.812612,-165.786908,-218.906818,-7.091954,79.225118,583.333333,530.0,103.333333,56.666667,33.333333,15.0,69.498069,789.047619,52.30361,,,,,,,55.20905,56.032151,0.066287,0.069969,800.0,44.478,0.055597,63.333333,8.571429,68.571429,8.75,226.5625,,,,0.024982,0.010756,,,0.699037,-3.689603
2,-15.740629,-13.864271,-11.691737,-9.261015,-6.574892,-3.650031,-0.504923,2.85585,6.435141,10.251569,14.259899,18.406591,22.605733,26.734411,30.600296,33.957392,36.579173,38.209511,38.609889,37.569581,34.983425,30.876804,25.38898,18.786963,11.434943,3.763724,-3.796887,-10.933831,-17.445261,-23.219257,-28.240323,-32.569955,-36.326282,-39.597041,-42.451623,-44.964229,-47.192584,-49.179789,-50.943216,-52.488397,-53.810874,-54.886016,-55.676414,-56.121689,-56.150521,-55.682482,-54.570613,-52.469919,-48.629936,-41.67155,-29.430482,-8.805396,24.099558,73.359048,142.017361,230.23197,333.808784,443.287382,543.791992,617.421342,647.143129,621.881064,540.280601,411.398716,254.311266,93.549033,-47.204205,-151.228411,-212.092372,-232.400981,-222.175855,-195.052599,-163.580845,-135.964085,-115.610621,-102.803016,-95.820323,-92.112165,-89.606138,-87.341151,-85.039638,-82.624227,-80.074213,-77.401477,-74.648628,-71.838068,-68.996978,-66.151754,-63.334476,-60.571786,-57.870062,-55.242254,-52.685655,-50.179111,-47.68671,-45.184629,-42.650007,-40.070198,-37.44739,-34.789337,-32.127868,-29.467029,-26.836778,-24.259648,-21.744252,-19.302879,-16.92278,-14.63005,-12.416485,-10.267348,-8.177109,-6.138337,-4.167618,-2.254633,-0.40382,1.372103,3.071582,4.695896,6.257484,7.761428,9.211002,10.603969,11.939981,13.208542,14.41585,15.558875,16.630901,17.648377,18.622209,19.574417,20.505725,21.431668,22.377022,23.342474,24.328683,25.341832,26.395472,27.504038,28.670806,29.918447,31.252918,32.670864,34.166109,35.737994,37.391374,39.101451,40.852659,42.609047,44.335579,45.973142,47.471885,48.801412,49.936898,50.880393,51.610832,52.097922,52.292901,52.134091,51.542278,50.424426,48.694047,46.277693,43.131626,39.250156,34.676756,29.48277,23.785931,17.74295,11.493966,5.156976,-1.192533,-7.492895,-13.690718,-19.744595,-25.604752,-31.211202,-36.490062,-41.363742,-45.776887,-49.701018,-12.624124,-11.869465,-10.778435,-7.773356,-5.155552,-2.293272,-0.425352,3.554757,8.073808,12.018306,16.318128,20.160432,25.424818,29.810309,33.939505,37.288502,39.171516,40.025286,39.581442,38.437253,35.860205,31.740109,26.133421,19.179903,10.97709,1.690469,-7.581131,-13.527236,-18.859527,-24.120911,-29.184199,-33.828832,-37.510691,-40.611693,-44.171125,-47.754708,-50.296313,-52.814126,-54.693051,-56.737631,-58.009052,-58.733156,-59.667189,-60.116707,-59.545,-58.676832,-57.325058,-55.091,-51.230835,-45.776798,-31.882237,-11.90583,22.877433,69.255212,137.888729,225.356285,326.725024,439.109955,542.146524,617.828077,645.257053,621.640783,542.443148,412.808402,258.607114,99.046169,-42.777537,-152.298608,-216.774954,-237.856332,...,-49.258874,-51.86235,-55.717751,-60.458831,-65.747069,-71.188346,-76.499629,-81.453426,-85.877801,-89.600835,-92.506193,-94.588698,-95.954339,-96.709179,-96.903808,-96.588912,-95.926395,-94.939389,-93.567817,-91.835068,-89.848001,-87.769175,-85.566848,-83.181642,-82.434643,-83.447288,-83.885479,-83.390227,-81.269346,-76.330754,-66.632438,-49.260197,-33.407509,-7.343445,34.055886,93.688717,171.635383,267.982913,375.424092,481.152093,565.983967,600.110386,555.225303,446.975084,301.920451,142.840979,-5.483496,-129.118325,-216.127621,-258.825272,-264.125043,-252.047729,-225.481853,-195.123063,-168.765509,-149.308818,-137.736904,-131.929044,-131.678389,-132.410667,-132.84632,-132.540209,-131.436292,-129.645397,-127.334104,-124.752514,-121.984242,-119.113074,-116.222951,-113.39796,-110.666769,-107.974831,-105.378817,-102.852163,-100.312829,-97.678844,-94.868297,-91.827097,-88.646292,-86.27378,-83.888295,-81.463796,-78.974204,-76.337838,-73.52852,-70.52001,-67.36934,-64.105691,-60.785953,-57.466935,-54.205371,-51.05791,-48.025567,-45.13705,-42.337649,-39.628132,-36.981402,-34.342511,-31.711983,-29.241329,-27.077279,-26.860028,-26.819392,-26.95564,-27.185669,-27.481906,-27.761196,-27.912589,-27.797346,-27.332278,-26.461974,-25.131027,-23.339598,-21.11564,-18.570461,-15.787618,-12.822919,-9.759984,-6.626917,-3.479638,-0.290772,2.967015,6.265465,9.576287,10.160019,10.727593,10.919412,11.153538,10.902894,10.707506,10.714524,10.784154,10.915497,11.079883,11.276442,11.504334,11.790535,12.162067,12.562678,12.963962,13.282029,13.405298,12.888943,11.371571,8.714115,4.860948,-0.243444,-6.487791,-13.696223,-22.591176,-33.367569,-43.377934,-52.989372,-62.541103,-72.400035,-82.738539,-93.701126,-105.209999,-116.992841,-128.777253,-140.290755,39.876474,-58.153225,-233.394404,59.456172,647.143129,900.689655,900.344828,182.111111,119.444444,86.0,39.966667,68.782483,40.210325,-60.883929,-239.928115,53.003391,645.257053,906.666667,903.333333,183.333333,108.333333,63.333333,40.0,70.588235,25.344543,17.332724,32.532097,35.313818,30.164765,35.291662,95.457538,4.673276,42.138074,75.750076,1.277585,2.604269,109.443646,4.679702,-74.585329,229.882472,702.155116,993.333333,1206.666667,190.0,250.0,353.333333,43.0,71.428571,-49.105674,-96.903808,-264.125043,24.672119,600.110386,840.0,693.333333,166.666667,76.666667,33.333333,36.0,60.402685,900.689655,35.916341,,,,,,,26.5324,26.862158,0.039876,0.029458,906.666667,29.652,0.032704,46.666667,3.448276,51.724138,4.142857,78.125,,,,0.056782,0.000642,,,0.988822,-2.868542
3,93.934773,96.321661,98.295196,99.880049,101.08301,101.886574,102.264761,102.163024,101.506803,100.19511,98.126172,95.22221,91.431148,86.752678,81.228851,74.940227,68.000313,60.538035,52.681489,44.537425,36.211743,27.810769,19.44637,11.247902,3.348954,-4.11396,-11.030897,-17.332613,-22.992282,-28.037056,-32.53013,-36.557932,-40.216883,-43.578368,-46.70369,-49.628263,-52.357337,-54.874966,-57.155542,-59.199252,-61.009848,-62.630953,-64.150748,-65.699817,-67.385742,-69.230685,-71.141027,-72.70862,-72.998795,-70.435373,-62.86388,-47.622877,-21.771275,17.289063,70.894554,138.11268,215.152712,295.042245,367.299027,419.312176,438.598173,415.789745,347.052507,235.846882,93.625049,-62.150324,-211.237791,-335.073622,-420.412059,-462.040344,-462.490321,-430.414125,-378.134641,-318.092101,-260.336402,-210.957017,-172.521019,-144.813667,-125.782409,-112.850971,-103.730874,-96.72869,-90.808842,-85.418809,-80.300647,-75.360636,-70.559441,-65.876624,-61.274741,-56.705314,-52.112233,-47.446709,-42.658284,-37.682861,-32.441836,-26.848074,-20.810173,-14.248699,-7.094895,0.691373,9.116067,18.152261,27.717923,37.707966,48.007073,58.502521,69.082471,79.622299,90.016223,100.202098,110.126379,119.72661,128.92972,137.676824,145.891606,153.435455,160.164313,165.942354,170.644557,174.143031,176.324966,177.087934,176.300569,173.811111,169.493124,163.277119,155.155237,145.209874,133.635465,120.755127,106.931333,92.532554,77.910591,63.393711,49.259707,35.698263,22.857088,10.839758,-0.266097,-10.408895,-19.565847,-27.730581,-34.910577,-41.134871,-46.470723,-50.990708,-54.768018,-57.87389,-60.390427,-62.378956,-63.863361,-64.863401,-65.371189,-65.339645,-64.657263,-63.150228,-60.590096,-56.6771,-51.050391,-43.317927,-33.130806,-20.24478,-4.586891,13.648401,33.897967,55.150548,75.93097,94.344611,108.240239,115.515585,114.497684,104.338607,85.284686,58.703415,26.85566,-7.498386,-41.62701,-73.297095,-100.998084,-123.892863,107.629654,112.723087,111.981647,114.826471,114.870064,115.673503,123.09843,125.574594,130.150533,135.163778,126.935168,125.865308,119.558893,114.046664,110.947792,98.04477,91.379573,77.164003,63.735822,47.746858,29.655875,14.209798,11.856464,4.680618,-6.835062,-12.102879,-20.113824,-27.103506,-29.549029,-35.383631,-39.299388,-40.132215,-42.49838,-46.566131,-48.88962,-50.790997,-53.888348,-55.314287,-54.988403,-61.489536,-62.389415,-64.478688,-65.504133,-66.332714,-65.690576,-67.785271,-69.40601,-71.687211,-73.312781,-69.983849,-61.497786,-44.610199,-18.840683,18.212576,72.038395,140.175401,217.213119,297.894296,367.117683,417.449202,437.149473,407.640138,341.579922,230.636125,84.75713,-69.091241,-213.653265,-337.223887,-420.590955,-460.802,...,-163.775586,-168.771061,-172.394643,-174.925776,-176.560834,-177.385332,-177.346131,-176.445863,-174.715135,-172.240285,-169.13558,-165.487644,-161.438765,-157.131323,-152.624433,-152.350999,-153.520596,-153.995098,-153.693602,-152.479622,-150.02218,-146.262892,-162.282513,-180.724026,-199.408129,-218.417858,-237.280836,-255.358154,-271.399919,-284.211912,-291.683349,-291.286863,-280.35626,-255.975375,-215.95027,-158.892523,-85.691418,-0.013901,91.056566,177.186706,245.821227,283.905058,280.577788,230.118094,115.698626,-29.526101,-178.822886,-312.327657,-444.057526,-543.993562,-598.565706,-609.72911,-606.674266,-629.60041,-633.769885,-626.041381,-611.274067,-591.772063,-566.701128,-535.033109,-495.712601,-449.018041,-396.228349,-339.706229,-282.787013,-229.000846,-181.155968,-167.42734,-162.233406,-156.94555,-151.677099,-146.54154,-141.65249,-137.040335,-132.596639,-128.101888,-123.308791,-117.94224,-111.643717,-104.137927,-95.371647,-85.513697,-74.788242,-63.391446,-51.658117,-39.950605,-28.519909,-17.45014,-6.741875,3.54894,9.319677,15.056802,22.137801,30.811861,41.217028,53.130239,66.21734,80.088677,74.736841,65.604044,56.567211,47.433238,37.953482,27.962645,17.128758,5.28651,-7.534992,-21.140009,-36.671868,-53.22237,-69.121627,-84.507556,-99.351622,-113.625523,-126.967871,-138.878652,-148.997014,-157.240162,-163.775586,-168.771061,-172.394643,-174.925776,-176.560834,-177.385332,-177.346131,-176.445863,-174.715135,-172.240285,-169.13558,-165.487644,-161.438765,-157.131323,-152.624433,-147.977261,-145.401889,-145.015917,-144.483418,-143.66849,-144.0007,-162.282513,-180.724026,-199.408129,-218.417858,-237.280836,-255.358154,-271.399919,-284.211912,-291.683349,-291.286863,-311.933819,-412.791517,-476.535978,-557.466529,-606.674266,-629.60041,-633.769885,-626.041381,-611.274067,-591.772063,-566.701128,-587.411075,146.331552,-80.614658,-452.181306,79.535368,439.802266,406.615385,2004.871795,171.111111,131.666667,96.190476,33.785714,158.285005,184.039253,-77.021869,-448.663312,-36.750449,437.976693,383.333333,1656.666667,186.666667,146.666667,100.0,40.0,166.666667,111.562434,44.520546,54.550278,214.536985,51.869951,70.178021,973.407633,24.393887,44.716923,26.721882,9.274994,15.935667,340.85188,26.990479,-330.096571,488.233931,582.207504,676.666667,4160.0,190.0,183.333333,150.0,43.0,178.217822,-56.712875,-291.286863,-626.041381,-113.862059,283.905058,336.666667,450.0,136.666667,50.0,36.666667,16.0,88.669951,406.615385,70.724162,,,,,,,93.910973,94.65265,0.173934,0.230958,383.333333,34.594,0.090245,50.0,32.307692,63.076923,5.0,31.25,,,,0.120121,0.058922,,,0.670907,-2.119254
4,-26.443108,-25.117556,-23.562392,-21.706623,-19.477371,-16.804914,-13.620787,-9.884917,-5.556579,-0.601962,4.998901,11.223096,17.979587,25.132589,32.476317,39.735005,46.600785,52.767789,57.988977,62.035734,64.698511,65.846801,65.370433,63.22629,59.454722,54.206696,47.740005,40.36055,32.452011,24.387448,16.484478,9.002098,2.13563,-3.994656,-9.334721,-13.882242,-17.703673,-20.919098,-23.668153,-26.085404,-28.309194,-30.458284,-32.642585,-34.982099,-37.632168,-40.726018,-44.356453,-48.531655,-53.083636,-57.529977,-60.878109,-61.560911,-57.473317,-46.237448,-25.671039,5.252352,45.362956,90.913493,134.8437,167.760143,179.79795,163.502121,116.572458,42.160252,-50.097031,-146.043595,-230.623415,-291.688366,-323.10874,-324.666591,-301.599948,-262.806744,-217.647223,-173.732447,-135.645243,-105.166837,-82.234537,-65.518108,-53.415333,-44.563992,-37.976322,-33.013113,-29.162108,-26.038627,-23.444265,-21.270299,-19.442125,-17.864959,-16.430783,-15.04043,-13.565713,-11.912549,-10.003286,-7.776071,-5.174756,-2.148263,1.321626,5.244891,9.628951,14.486864,19.828494,25.659242,32.00846,38.933874,46.499495,54.730806,63.622977,73.150344,83.239911,93.723381,104.344112,114.831622,124.899081,134.271095,142.691929,150.000635,156.147482,161.142192,165.045739,167.909117,169.741779,170.473756,169.928512,167.822308,163.834906,157.697947,149.300998,138.687759,126.139382,112.174458,97.412638,82.457823,67.770625,53.70433,40.431653,27.969335,16.269039,5.312657,-4.839718,-14.094009,-22.370136,-29.599505,-35.735118,-40.795135,-44.86288,-48.074215,-50.537369,-52.357552,-53.635698,-54.462781,-54.901504,-54.98693,-54.762462,-54.267851,-53.54234,-52.61646,-51.536013,-50.355754,-49.116657,-47.850331,-46.57901,-45.325021,-44.096886,-42.889948,-41.698991,-40.516969,-39.335632,-38.145515,-36.934041,-35.698137,-34.439807,-33.157289,-31.844416,-30.490612,-29.089728,-27.616675,-26.033107,-24.306985,-22.413841,-20.341192,-31.555943,-31.03256,-29.101405,-26.660622,-25.314319,-23.417416,-20.711332,-16.520459,-11.35078,-6.365956,-0.551104,4.315769,12.254081,20.626238,28.361368,36.049931,42.387701,47.41952,52.584072,56.572827,59.19254,62.752809,65.322379,60.827687,58.392142,51.879037,47.053124,40.166896,30.871447,23.597805,17.11621,10.282344,3.459494,-3.701511,-8.061931,-13.602216,-17.441389,-21.265467,-24.714773,-27.96625,-31.58202,-33.212187,-34.796037,-37.213718,-40.019164,-44.83783,-48.957177,-54.180722,-59.943377,-64.427049,-67.846767,-69.773475,-67.736436,-57.363968,-40.858358,-12.671629,28.083464,71.352293,116.508137,153.190137,169.763444,151.855647,108.543867,39.917412,-55.524664,-157.083108,-239.673435,-305.298202,-338.17945,-333.613276,...,-66.723384,-68.065778,-69.814589,-71.802241,-73.916716,-76.018236,-78.105941,-80.151241,-82.042274,-83.722811,-85.136721,-86.255764,-86.996273,-88.75624,-94.815196,-98.828187,-101.21173,-102.410073,-102.922988,-103.222461,-103.419378,-103.569087,-103.671405,-103.670621,-103.455485,-102.859205,-101.826101,-100.272705,-103.268685,-108.151417,-112.027124,-115.006475,-119.000189,-117.947333,-109.992611,-111.180313,-109.47952,-103.079964,-92.174006,-78.565046,-65.306411,-56.86802,-57.275274,-70.025732,-97.672458,-143.871003,-237.889201,-327.04497,-396.488424,-436.288614,-445.749324,-428.424438,-392.701295,-347.828478,-301.88804,-260.212168,-224.938728,-197.316809,-176.178931,-159.774366,-146.408009,-134.912591,-132.774873,-130.625636,-127.741452,-124.625525,-121.892214,-119.90592,-118.975521,-119.409923,-121.406949,-124.914444,-129.685834,-135.44679,-141.645229,-147.562428,-152.674136,-156.678353,-159.24533,-160.156457,-159.470933,-157.359091,-153.907951,-149.148996,-143.224828,-136.25027,-128.229027,-128.250634,-128.07616,-127.551444,-126.650546,-125.375316,-123.727603,-121.681465,-119.266476,-116.039941,-110.9202,-102.464393,-88.729544,-67.383656,-36.261235,4.497835,52.364989,64.680431,66.821965,67.103166,65.46844,61.862411,56.313237,8.500049,-82.18501,-167.258829,-232.831355,-270.373622,-277.745567,-259.140503,-223.946256,-182.57851,-143.453039,-111.652376,-88.564698,-76.36733,-81.444609,-85.443985,-88.446869,-90.645773,-94.957192,-98.665013,-100.972119,-102.046908,-102.058014,-101.396533,-100.287119,-106.110126,-112.042856,-117.101183,-121.036711,-123.767972,-125.352643,-125.90421,-125.564187,-124.335442,-122.248856,-122.076966,-121.966282,-121.444663,-120.513386,-119.173847,-117.45533,-115.415004,-113.054584,-110.431436,-107.575237,-104.515748,-101.22725,-98.844753,-97.582131,-95.700269,-93.225825,-91.95906,-92.619381,67.107476,-66.970043,-320.795351,176.213541,178.868548,612.424242,626.589147,146.969697,120.378788,98.636364,27.840909,99.131894,66.603497,-71.033316,-327.652237,174.391177,169.697309,600.0,603.333333,168.333333,110.0,86.666667,34.5,100.0,44.155314,38.968145,47.24172,36.112023,82.853353,69.155671,117.031519,35.35404,31.319597,43.107325,10.293297,6.087403,191.35189,80.127998,-128.07616,240.696324,488.124756,950.0,1243.333333,216.666667,190.0,230.0,41.0,117.647059,-65.650306,-117.947333,-428.424438,66.821965,-56.86802,510.0,553.333333,96.666667,83.333333,43.333333,12.0,63.157895,612.424242,69.955185,,,,,,,83.534641,84.522923,0.114227,0.1364,600.0,22.239,0.037065,30.833333,9.090909,25.0,6.285714,109.375,,,,0.033902,0.00567,,,0.856713,-3.384267


In [68]:
df_train_full['NaNs'] = df_train_full.isnull().sum(axis=1)

In [72]:
df_train_full['NaN_perc'] = df_train_full.NaNs/df_train_full.shape[1]

only one sample with almost all Nans -> remove

In [74]:
df_train_full['NaN_perc'].value_counts()

0.011111    4017
0.008081     742
0.015152     172
0.012121     113
0.089899      29
0.014141      21
0.016162       9
0.021212       8
0.013131       5
0.998990       1
Name: NaN_perc, dtype: int64

In [92]:
clf1 = XGBClassifier(learning_rate=0.05, n_estimators=300, max_depth=5)

clf2 = GradientBoostingClassifier(learning_rate=0.05, n_estimators=500, max_depth=7, 
                                 min_samples_split=60, min_samples_leaf=9, subsample=1.0,
                                 max_features=50, random_state=0)

param_grid = {
    'imputer': [SimpleImputer(strategy='constant', fill_value=0, add_indicator=True)],
    'scaler': [RobustScaler()],
    'selector': [SelectKBest()],
    'selector__k': [5, 10, 50],
    'pca': [KernelPCA()],
    'pca__n_components': [5, 10, 50],
    'clf': [clf1],
}

In [None]:
#add VarianceThreshold, we got constant features (nan to zero)
#962  963  964  965  966  967  980  981 1951 1952 1953 1954 1955 1956

In [93]:
pipeline = Pipeline(steps=[
    ('imputer', SimpleImputer()),
    ('scaler', RobustScaler()),
    ('threshold', VarianceThreshold()),
    ('selector', SelectKBest()),
    ('pca', KernelPCA()),
    ('clf', XGBClassifier())
    ])

In [94]:
grid = GridSearchCV(estimator=pipeline, param_grid=param_grid, scoring='f1_micro', cv=5, verbose=10)

In [None]:
grid.fit(X_train_full, y_train)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV 1/5; 1/9] START clf=XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
              colsample_bynode=None, colsample_bytree=None,
              enable_categorical=False, gamma=None, gpu_id=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.05, max_delta_step=None, max_depth=5,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=300, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, reg_alpha=None,
              reg_lambda=None, scale_pos_weight=None, subsample=None,
              tree_method=None, validate_parameters=None, verbosity=None), imputer=SimpleImputer(add_indicator=True, fill_value=0, strategy='constant'), pca=KernelPCA(), pca__n_components=5, scaler=RobustScaler(), selector=SelectKBest(), selector__k=5


 1969 1970] are constant.
  f = msb / msw


[CV 1/5; 1/9] END clf=XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
              colsample_bynode=None, colsample_bytree=None,
              enable_categorical=False, gamma=None, gpu_id=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.05, max_delta_step=None, max_depth=5,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=300, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, reg_alpha=None,
              reg_lambda=None, scale_pos_weight=None, subsample=None,
              tree_method=None, validate_parameters=None, verbosity=None), imputer=SimpleImputer(add_indicator=True, fill_value=0, strategy='constant'), pca=KernelPCA(), pca__n_components=5, scaler=RobustScaler(), selector=SelectKBest(), selector__k=5;, score=0.688 total time=  58.8s
[CV 2/5; 1/9] START clf=XGBClassifier(base_score=None, booster=None, colsample_bylev

 1069 1070] are constant.
  f = msb / msw


[CV 2/5; 1/9] END clf=XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
              colsample_bynode=None, colsample_bytree=None,
              enable_categorical=False, gamma=None, gpu_id=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.05, max_delta_step=None, max_depth=5,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=300, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, reg_alpha=None,
              reg_lambda=None, scale_pos_weight=None, subsample=None,
              tree_method=None, validate_parameters=None, verbosity=None), imputer=SimpleImputer(add_indicator=True, fill_value=0, strategy='constant'), pca=KernelPCA(), pca__n_components=5, scaler=RobustScaler(), selector=SelectKBest(), selector__k=5;, score=0.680 total time= 1.4min
[CV 3/5; 1/9] START clf=XGBClassifier(base_score=None, booster=None, colsample_bylev

 1969 1970] are constant.
  f = msb / msw


[CV 3/5; 1/9] END clf=XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
              colsample_bynode=None, colsample_bytree=None,
              enable_categorical=False, gamma=None, gpu_id=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.05, max_delta_step=None, max_depth=5,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=300, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, reg_alpha=None,
              reg_lambda=None, scale_pos_weight=None, subsample=None,
              tree_method=None, validate_parameters=None, verbosity=None), imputer=SimpleImputer(add_indicator=True, fill_value=0, strategy='constant'), pca=KernelPCA(), pca__n_components=5, scaler=RobustScaler(), selector=SelectKBest(), selector__k=5;, score=0.701 total time= 1.4min
[CV 4/5; 1/9] START clf=XGBClassifier(base_score=None, booster=None, colsample_bylev

 1969 1970] are constant.
  f = msb / msw


[CV 4/5; 1/9] END clf=XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
              colsample_bynode=None, colsample_bytree=None,
              enable_categorical=False, gamma=None, gpu_id=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.05, max_delta_step=None, max_depth=5,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=300, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, reg_alpha=None,
              reg_lambda=None, scale_pos_weight=None, subsample=None,
              tree_method=None, validate_parameters=None, verbosity=None), imputer=SimpleImputer(add_indicator=True, fill_value=0, strategy='constant'), pca=KernelPCA(), pca__n_components=5, scaler=RobustScaler(), selector=SelectKBest(), selector__k=5;, score=0.709 total time= 1.5min
[CV 5/5; 1/9] START clf=XGBClassifier(base_score=None, booster=None, colsample_bylev

 1969 1970] are constant.
  f = msb / msw


[CV 5/5; 1/9] END clf=XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
              colsample_bynode=None, colsample_bytree=None,
              enable_categorical=False, gamma=None, gpu_id=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.05, max_delta_step=None, max_depth=5,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=300, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, reg_alpha=None,
              reg_lambda=None, scale_pos_weight=None, subsample=None,
              tree_method=None, validate_parameters=None, verbosity=None), imputer=SimpleImputer(add_indicator=True, fill_value=0, strategy='constant'), pca=KernelPCA(), pca__n_components=5, scaler=RobustScaler(), selector=SelectKBest(), selector__k=5;, score=0.709 total time= 1.4min
[CV 1/5; 2/9] START clf=XGBClassifier(base_score=None, booster=None, colsample_bylev

 1969 1970] are constant.
  f = msb / msw


[CV 1/5; 2/9] END clf=XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
              colsample_bynode=None, colsample_bytree=None,
              enable_categorical=False, gamma=None, gpu_id=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.05, max_delta_step=None, max_depth=5,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=300, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, reg_alpha=None,
              reg_lambda=None, scale_pos_weight=None, subsample=None,
              tree_method=None, validate_parameters=None, verbosity=None), imputer=SimpleImputer(add_indicator=True, fill_value=0, strategy='constant'), pca=KernelPCA(), pca__n_components=5, scaler=RobustScaler(), selector=SelectKBest(), selector__k=10;, score=0.674 total time= 1.5min
[CV 2/5; 2/9] START clf=XGBClassifier(base_score=None, booster=None, colsample_byle

 1069 1070] are constant.
  f = msb / msw


[CV 2/5; 2/9] END clf=XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
              colsample_bynode=None, colsample_bytree=None,
              enable_categorical=False, gamma=None, gpu_id=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.05, max_delta_step=None, max_depth=5,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=300, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, reg_alpha=None,
              reg_lambda=None, scale_pos_weight=None, subsample=None,
              tree_method=None, validate_parameters=None, verbosity=None), imputer=SimpleImputer(add_indicator=True, fill_value=0, strategy='constant'), pca=KernelPCA(), pca__n_components=5, scaler=RobustScaler(), selector=SelectKBest(), selector__k=10;, score=0.703 total time= 1.3min
[CV 3/5; 2/9] START clf=XGBClassifier(base_score=None, booster=None, colsample_byle

 1969 1970] are constant.
  f = msb / msw


[CV 3/5; 2/9] END clf=XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
              colsample_bynode=None, colsample_bytree=None,
              enable_categorical=False, gamma=None, gpu_id=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.05, max_delta_step=None, max_depth=5,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=300, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, reg_alpha=None,
              reg_lambda=None, scale_pos_weight=None, subsample=None,
              tree_method=None, validate_parameters=None, verbosity=None), imputer=SimpleImputer(add_indicator=True, fill_value=0, strategy='constant'), pca=KernelPCA(), pca__n_components=5, scaler=RobustScaler(), selector=SelectKBest(), selector__k=10;, score=0.694 total time= 1.2min
[CV 4/5; 2/9] START clf=XGBClassifier(base_score=None, booster=None, colsample_byle

 1969 1970] are constant.
  f = msb / msw


[CV 4/5; 2/9] END clf=XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
              colsample_bynode=None, colsample_bytree=None,
              enable_categorical=False, gamma=None, gpu_id=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.05, max_delta_step=None, max_depth=5,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=300, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, reg_alpha=None,
              reg_lambda=None, scale_pos_weight=None, subsample=None,
              tree_method=None, validate_parameters=None, verbosity=None), imputer=SimpleImputer(add_indicator=True, fill_value=0, strategy='constant'), pca=KernelPCA(), pca__n_components=5, scaler=RobustScaler(), selector=SelectKBest(), selector__k=10;, score=0.664 total time=  58.0s
[CV 5/5; 2/9] START clf=XGBClassifier(base_score=None, booster=None, colsample_byle

 1969 1970] are constant.
  f = msb / msw


[CV 5/5; 2/9] END clf=XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
              colsample_bynode=None, colsample_bytree=None,
              enable_categorical=False, gamma=None, gpu_id=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.05, max_delta_step=None, max_depth=5,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=300, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, reg_alpha=None,
              reg_lambda=None, scale_pos_weight=None, subsample=None,
              tree_method=None, validate_parameters=None, verbosity=None), imputer=SimpleImputer(add_indicator=True, fill_value=0, strategy='constant'), pca=KernelPCA(), pca__n_components=5, scaler=RobustScaler(), selector=SelectKBest(), selector__k=10;, score=0.663 total time= 1.2min
[CV 1/5; 3/9] START clf=XGBClassifier(base_score=None, booster=None, colsample_byle

 1969 1970] are constant.
  f = msb / msw


[CV 1/5; 3/9] END clf=XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
              colsample_bynode=None, colsample_bytree=None,
              enable_categorical=False, gamma=None, gpu_id=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.05, max_delta_step=None, max_depth=5,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=300, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, reg_alpha=None,
              reg_lambda=None, scale_pos_weight=None, subsample=None,
              tree_method=None, validate_parameters=None, verbosity=None), imputer=SimpleImputer(add_indicator=True, fill_value=0, strategy='constant'), pca=KernelPCA(), pca__n_components=5, scaler=RobustScaler(), selector=SelectKBest(), selector__k=50;, score=0.652 total time= 1.4min
[CV 2/5; 3/9] START clf=XGBClassifier(base_score=None, booster=None, colsample_byle

 1069 1070] are constant.
  f = msb / msw


[CV 2/5; 3/9] END clf=XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
              colsample_bynode=None, colsample_bytree=None,
              enable_categorical=False, gamma=None, gpu_id=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.05, max_delta_step=None, max_depth=5,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=300, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, reg_alpha=None,
              reg_lambda=None, scale_pos_weight=None, subsample=None,
              tree_method=None, validate_parameters=None, verbosity=None), imputer=SimpleImputer(add_indicator=True, fill_value=0, strategy='constant'), pca=KernelPCA(), pca__n_components=5, scaler=RobustScaler(), selector=SelectKBest(), selector__k=50;, score=0.631 total time= 1.4min
[CV 3/5; 3/9] START clf=XGBClassifier(base_score=None, booster=None, colsample_byle

 1969 1970] are constant.
  f = msb / msw


[CV 3/5; 3/9] END clf=XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
              colsample_bynode=None, colsample_bytree=None,
              enable_categorical=False, gamma=None, gpu_id=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.05, max_delta_step=None, max_depth=5,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=300, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, reg_alpha=None,
              reg_lambda=None, scale_pos_weight=None, subsample=None,
              tree_method=None, validate_parameters=None, verbosity=None), imputer=SimpleImputer(add_indicator=True, fill_value=0, strategy='constant'), pca=KernelPCA(), pca__n_components=5, scaler=RobustScaler(), selector=SelectKBest(), selector__k=50;, score=0.642 total time= 1.9min
[CV 4/5; 3/9] START clf=XGBClassifier(base_score=None, booster=None, colsample_byle

 1969 1970] are constant.
  f = msb / msw


[CV 4/5; 3/9] END clf=XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
              colsample_bynode=None, colsample_bytree=None,
              enable_categorical=False, gamma=None, gpu_id=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.05, max_delta_step=None, max_depth=5,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=300, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, reg_alpha=None,
              reg_lambda=None, scale_pos_weight=None, subsample=None,
              tree_method=None, validate_parameters=None, verbosity=None), imputer=SimpleImputer(add_indicator=True, fill_value=0, strategy='constant'), pca=KernelPCA(), pca__n_components=5, scaler=RobustScaler(), selector=SelectKBest(), selector__k=50;, score=0.646 total time= 1.2min
[CV 5/5; 3/9] START clf=XGBClassifier(base_score=None, booster=None, colsample_byle

 1969 1970] are constant.
  f = msb / msw


[CV 5/5; 3/9] END clf=XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
              colsample_bynode=None, colsample_bytree=None,
              enable_categorical=False, gamma=None, gpu_id=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.05, max_delta_step=None, max_depth=5,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=300, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, reg_alpha=None,
              reg_lambda=None, scale_pos_weight=None, subsample=None,
              tree_method=None, validate_parameters=None, verbosity=None), imputer=SimpleImputer(add_indicator=True, fill_value=0, strategy='constant'), pca=KernelPCA(), pca__n_components=5, scaler=RobustScaler(), selector=SelectKBest(), selector__k=50;, score=0.641 total time= 1.6min
[CV 1/5; 4/9] START clf=XGBClassifier(base_score=None, booster=None, colsample_byle

 1969 1970] are constant.
  f = msb / msw


[CV 1/5; 4/9] END clf=XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
              colsample_bynode=None, colsample_bytree=None,
              enable_categorical=False, gamma=None, gpu_id=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.05, max_delta_step=None, max_depth=5,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=300, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, reg_alpha=None,
              reg_lambda=None, scale_pos_weight=None, subsample=None,
              tree_method=None, validate_parameters=None, verbosity=None), imputer=SimpleImputer(add_indicator=True, fill_value=0, strategy='constant'), pca=KernelPCA(), pca__n_components=10, scaler=RobustScaler(), selector=SelectKBest(), selector__k=5;, score=0.688 total time= 2.6min
[CV 2/5; 4/9] START clf=XGBClassifier(base_score=None, booster=None, colsample_byle

 1069 1070] are constant.
  f = msb / msw


[CV 2/5; 4/9] END clf=XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
              colsample_bynode=None, colsample_bytree=None,
              enable_categorical=False, gamma=None, gpu_id=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.05, max_delta_step=None, max_depth=5,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=300, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, reg_alpha=None,
              reg_lambda=None, scale_pos_weight=None, subsample=None,
              tree_method=None, validate_parameters=None, verbosity=None), imputer=SimpleImputer(add_indicator=True, fill_value=0, strategy='constant'), pca=KernelPCA(), pca__n_components=10, scaler=RobustScaler(), selector=SelectKBest(), selector__k=5;, score=0.680 total time= 3.0min
[CV 3/5; 4/9] START clf=XGBClassifier(base_score=None, booster=None, colsample_byle

 1969 1970] are constant.
  f = msb / msw


[CV 3/5; 4/9] END clf=XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
              colsample_bynode=None, colsample_bytree=None,
              enable_categorical=False, gamma=None, gpu_id=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.05, max_delta_step=None, max_depth=5,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=300, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, reg_alpha=None,
              reg_lambda=None, scale_pos_weight=None, subsample=None,
              tree_method=None, validate_parameters=None, verbosity=None), imputer=SimpleImputer(add_indicator=True, fill_value=0, strategy='constant'), pca=KernelPCA(), pca__n_components=10, scaler=RobustScaler(), selector=SelectKBest(), selector__k=5;, score=0.701 total time= 3.0min
[CV 4/5; 4/9] START clf=XGBClassifier(base_score=None, booster=None, colsample_byle

 1969 1970] are constant.
  f = msb / msw


[CV 4/5; 4/9] END clf=XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
              colsample_bynode=None, colsample_bytree=None,
              enable_categorical=False, gamma=None, gpu_id=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.05, max_delta_step=None, max_depth=5,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=300, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, reg_alpha=None,
              reg_lambda=None, scale_pos_weight=None, subsample=None,
              tree_method=None, validate_parameters=None, verbosity=None), imputer=SimpleImputer(add_indicator=True, fill_value=0, strategy='constant'), pca=KernelPCA(), pca__n_components=10, scaler=RobustScaler(), selector=SelectKBest(), selector__k=5;, score=0.709 total time= 5.8min
[CV 5/5; 4/9] START clf=XGBClassifier(base_score=None, booster=None, colsample_byle

 1969 1970] are constant.
  f = msb / msw


[CV 5/5; 4/9] END clf=XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
              colsample_bynode=None, colsample_bytree=None,
              enable_categorical=False, gamma=None, gpu_id=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.05, max_delta_step=None, max_depth=5,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=300, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, reg_alpha=None,
              reg_lambda=None, scale_pos_weight=None, subsample=None,
              tree_method=None, validate_parameters=None, verbosity=None), imputer=SimpleImputer(add_indicator=True, fill_value=0, strategy='constant'), pca=KernelPCA(), pca__n_components=10, scaler=RobustScaler(), selector=SelectKBest(), selector__k=5;, score=0.709 total time= 5.7min
[CV 1/5; 5/9] START clf=XGBClassifier(base_score=None, booster=None, colsample_byle

 1969 1970] are constant.
  f = msb / msw


[CV 1/5; 5/9] END clf=XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
              colsample_bynode=None, colsample_bytree=None,
              enable_categorical=False, gamma=None, gpu_id=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.05, max_delta_step=None, max_depth=5,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=300, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, reg_alpha=None,
              reg_lambda=None, scale_pos_weight=None, subsample=None,
              tree_method=None, validate_parameters=None, verbosity=None), imputer=SimpleImputer(add_indicator=True, fill_value=0, strategy='constant'), pca=KernelPCA(), pca__n_components=10, scaler=RobustScaler(), selector=SelectKBest(), selector__k=10;, score=0.704 total time= 9.1min
[CV 2/5; 5/9] START clf=XGBClassifier(base_score=None, booster=None, colsample_byl

 1069 1070] are constant.
  f = msb / msw


[CV 2/5; 5/9] END clf=XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
              colsample_bynode=None, colsample_bytree=None,
              enable_categorical=False, gamma=None, gpu_id=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.05, max_delta_step=None, max_depth=5,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=300, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, reg_alpha=None,
              reg_lambda=None, scale_pos_weight=None, subsample=None,
              tree_method=None, validate_parameters=None, verbosity=None), imputer=SimpleImputer(add_indicator=True, fill_value=0, strategy='constant'), pca=KernelPCA(), pca__n_components=10, scaler=RobustScaler(), selector=SelectKBest(), selector__k=10;, score=0.732 total time=11.2min
[CV 3/5; 5/9] START clf=XGBClassifier(base_score=None, booster=None, colsample_byl

 1969 1970] are constant.
  f = msb / msw


[CV 3/5; 5/9] END clf=XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
              colsample_bynode=None, colsample_bytree=None,
              enable_categorical=False, gamma=None, gpu_id=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.05, max_delta_step=None, max_depth=5,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=300, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, reg_alpha=None,
              reg_lambda=None, scale_pos_weight=None, subsample=None,
              tree_method=None, validate_parameters=None, verbosity=None), imputer=SimpleImputer(add_indicator=True, fill_value=0, strategy='constant'), pca=KernelPCA(), pca__n_components=10, scaler=RobustScaler(), selector=SelectKBest(), selector__k=10;, score=0.720 total time=11.9min
[CV 4/5; 5/9] START clf=XGBClassifier(base_score=None, booster=None, colsample_byl

 1969 1970] are constant.
  f = msb / msw


[CV 4/5; 5/9] END clf=XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
              colsample_bynode=None, colsample_bytree=None,
              enable_categorical=False, gamma=None, gpu_id=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.05, max_delta_step=None, max_depth=5,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=300, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, reg_alpha=None,
              reg_lambda=None, scale_pos_weight=None, subsample=None,
              tree_method=None, validate_parameters=None, verbosity=None), imputer=SimpleImputer(add_indicator=True, fill_value=0, strategy='constant'), pca=KernelPCA(), pca__n_components=10, scaler=RobustScaler(), selector=SelectKBest(), selector__k=10;, score=0.718 total time= 9.6min
[CV 5/5; 5/9] START clf=XGBClassifier(base_score=None, booster=None, colsample_byl

 1969 1970] are constant.
  f = msb / msw


[CV 5/5; 5/9] END clf=XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
              colsample_bynode=None, colsample_bytree=None,
              enable_categorical=False, gamma=None, gpu_id=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.05, max_delta_step=None, max_depth=5,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=300, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, reg_alpha=None,
              reg_lambda=None, scale_pos_weight=None, subsample=None,
              tree_method=None, validate_parameters=None, verbosity=None), imputer=SimpleImputer(add_indicator=True, fill_value=0, strategy='constant'), pca=KernelPCA(), pca__n_components=10, scaler=RobustScaler(), selector=SelectKBest(), selector__k=10;, score=0.719 total time= 7.8min
[CV 1/5; 6/9] START clf=XGBClassifier(base_score=None, booster=None, colsample_byl

 1969 1970] are constant.
  f = msb / msw


[CV 1/5; 6/9] END clf=XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
              colsample_bynode=None, colsample_bytree=None,
              enable_categorical=False, gamma=None, gpu_id=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.05, max_delta_step=None, max_depth=5,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=300, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, reg_alpha=None,
              reg_lambda=None, scale_pos_weight=None, subsample=None,
              tree_method=None, validate_parameters=None, verbosity=None), imputer=SimpleImputer(add_indicator=True, fill_value=0, strategy='constant'), pca=KernelPCA(), pca__n_components=10, scaler=RobustScaler(), selector=SelectKBest(), selector__k=50;, score=0.678 total time=10.5min
[CV 2/5; 6/9] START clf=XGBClassifier(base_score=None, booster=None, colsample_byl

 1069 1070] are constant.
  f = msb / msw


[CV 2/5; 6/9] END clf=XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
              colsample_bynode=None, colsample_bytree=None,
              enable_categorical=False, gamma=None, gpu_id=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=0.05, max_delta_step=None, max_depth=5,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=300, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, reg_alpha=None,
              reg_lambda=None, scale_pos_weight=None, subsample=None,
              tree_method=None, validate_parameters=None, verbosity=None), imputer=SimpleImputer(add_indicator=True, fill_value=0, strategy='constant'), pca=KernelPCA(), pca__n_components=10, scaler=RobustScaler(), selector=SelectKBest(), selector__k=50;, score=0.692 total time=10.2min
[CV 3/5; 6/9] START clf=XGBClassifier(base_score=None, booster=None, colsample_byl

In [43]:
grid.best_estimator_

Pipeline(steps=[('imputer', SimpleImputer()), ('scaler', RobustScaler()),
                ('selector', SelectKBest(k=100)),
                ('pca', KernelPCA(n_components=50)),
                ('clf',
                 XGBClassifier(base_score=0.5, booster='gbtree',
                               colsample_bylevel=1, colsample_bynode=1,
                               colsample_bytree=1, enable_categorical=False,
                               gamma=0, gpu_id=-1, importance_type=None,
                               interaction_constraints='', learning_rate=0.05,
                               max_delta_step=0, max_depth=7,
                               min_child_weight=1, missing=nan,
                               monotone_constraints='()', n_estimators=500,
                               n_jobs=4, num_parallel_tree=1,
                               objective='multi:softprob', predictor='auto',
                               random_state=0, reg_alpha=0, reg_lambda=1,
                  

In [54]:
# total fit time in min
grid.cv_results_['mean_fit_time'].sum()/60+grid.refit_time_/60

47.13453327814737

In [49]:
grid.cv_results_

{'mean_fit_time': array([355.32276225, 352.91089821, 360.78657513, 446.23621502,
        542.56032243, 492.97693124]),
 'std_fit_time': array([ 7.37814669,  2.24550966,  3.57112282,  3.5199128 ,  2.80051938,
        35.66951078]),
 'mean_score_time': array([0.36865253, 0.2251111 , 0.27229733, 0.24745607, 0.3048562 ,
        0.29407144]),
 'std_score_time': array([0.15649779, 0.02188388, 0.03999984, 0.02521694, 0.03070745,
        0.02930795]),
 'param_clf': masked_array(data=[XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
                                  colsample_bynode=None, colsample_bytree=None,
                                  enable_categorical=False, gamma=None, gpu_id=None,
                                  importance_type=None, interaction_constraints=None,
                                  learning_rate=0.05, max_delta_step=None, max_depth=7,
                                  min_child_weight=None, missing=nan, monotone_constraints=None,
              

In [64]:
grid.best_estimator_

Pipeline(steps=[('imputer', SimpleImputer()), ('scaler', RobustScaler()),
                ('selector', SelectKBest(k=100)),
                ('pca', KernelPCA(n_components=50)),
                ('clf',
                 XGBClassifier(base_score=0.5, booster='gbtree',
                               colsample_bylevel=1, colsample_bynode=1,
                               colsample_bytree=1, enable_categorical=False,
                               gamma=0, gpu_id=-1, importance_type=None,
                               interaction_constraints='', learning_rate=0.05,
                               max_delta_step=0, max_depth=7,
                               min_child_weight=1, missing=nan,
                               monotone_constraints='()', n_estimators=500,
                               n_jobs=4, num_parallel_tree=1,
                               objective='multi:softprob', predictor='auto',
                               random_state=0, reg_alpha=0, reg_lambda=1,
                  

In [50]:
grid.best_score_

0.7422394774473015

In [56]:
y_pred = grid.best_estimator_.predict(X_test_full)

In [72]:
file_name = 'results_first_run_partdata.csv'

In [73]:
df_res = pd.DataFrame({'id': np.arange(len(y_pred)), 'y': y_pred.ravel()})
df_res.to_csv(f'submissions/{file_name}', header=True, index=False)