# 모듈로드

In [520]:
import numpy as np
import pandas as pd
import warnings
import gc
from tqdm import tqdm_notebook as tqdm
import lightgbm as lgb
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from sklearn.metrics import roc_auc_score
warnings.filterwarnings("ignore")
gc.enable()

In [521]:
pd.set_option('max_rows', 500)
pd.set_option('max_colwidth', 500)
pd.set_option('max_columns', 500)

# 데이터로드

In [522]:
train_raw = pd.read_csv('./data/train.csv')
test_raw = pd.read_csv('./data/test.csv')
train_raw.shape, test_raw.shape

((200000, 202), (200000, 201))

In [523]:
bbiggu = pd.read_csv('./data_temp/bbiggu_1.csv')
bbiggu['target'] = train_raw.target

normal_idx = bbiggu[(bbiggu.target == 0) & (bbiggu.pred < 0.1)].index.values
bbiggu_idx = bbiggu[(bbiggu.target == 1) & (bbiggu.pred < 0.1)].index.values

In [524]:
train = train_raw.copy()
test = test_raw.copy()

In [525]:
col_list = train.columns[2:]

# 로직 설계

* target 0 => 110000개
* target 1 => 2600개
* 나머지 35700개에서 삐꾸 식별기 만들기

(1) 1개씩 Greedy 하게 ?

(2) 

(3)

(4)

In [589]:
train = train_raw.copy()
temp_1 = train.loc[normal_idx].copy().iloc[:100000]
temp_2 = train.loc[bbiggu_idx].copy().iloc[:2600]

dummy_1 = train.loc[normal_idx].copy().iloc[100000:]
dummy_2 = train.loc[bbiggu_idx].copy().iloc[2600:]

temp_1.target = 0
temp_2.target = 1

train = temp_1.append(temp_2)
train = train.reset_index(drop=True)

dummy = dummy_1.append(dummy_2).sort_index()
dummy.target = 1

In [592]:
target = train['target']

In [593]:
folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
for fold_, (trn_idx, val_idx) in enumerate(folds.split(train, target.values)):
    break

## logic

In [594]:
param = {
    'bagging_freq': 5,
    'bagging_fraction': 0.335,
    'boost_from_average': False,
    'boost': 'gbdt',
    'feature_fraction_seed': 47,
    'feature_fraction': 0.041,
    'learning_rate': 0.01,
    'max_depth': -1,
    'metric':'auc',
    'min_data_in_leaf': 80,
    'min_sum_hessian_in_leaf': 10.0,
    'num_leaves': 13,
    'num_threads': 8,
    'tree_learner': 'serial',
    'objective': 'binary', 
    'silent': True,
    'num_threads': 8
}

In [634]:
~dummy.iloc[[i]].ID_code.isin(bbiggu[bbiggu.ID_code.isin(dummy.ID_code) & (bbiggu.bbiggu == 1)].ID_code).values[0]

True

In [635]:
num_round = 10000
root_dict = {"ID_codes":[], "iteration":[], "score":[], "index":[]}

for i in tqdm(range(dummy.shape[0])):
    
    i = i - 1
    
    if ~dummy.iloc[[i]].ID_code.isin(bbiggu[bbiggu.ID_code.isin(dummy.ID_code) & (bbiggu.bbiggu == 1)].ID_code).values[0]:
        continue
        
    
    if i == -1:
        train_new = train
        target_new = train_new.target
        
        train_index = trn_idx.copy().tolist()
        validation_index = val_idx.copy().tolist()
    else:
        new_data = dummy.iloc[[i]]

        train_new = train.append(new_data)
        target_new = train_new.target

        train_index = trn_idx.copy().tolist()
        validation_index = val_idx.copy().tolist()
        validation_index.extend(new_data.index.tolist())

    trn_data = lgb.Dataset(train_new.loc[train_index][col_list], label=target_new.loc[train_index])
    val_data = lgb.Dataset(train_new.loc[validation_index][col_list], label=target_new.loc[validation_index])

    clf = lgb.train(param, trn_data, num_round, valid_sets = [trn_data, val_data],verbose_eval=False, early_stopping_rounds = 200)
    
    if i == -1:
        root_dict['ID_codes'].append('base')
        root_dict['iteration'].append(clf.best_iteration)
        root_dict['score'].append(roc_auc_score(target_new.loc[validation_index], clf.predict(train_new.loc[validation_index][col_list], num_iteration=clf.best_iteration)))
        root_dict['index'].append(i)

        del train_new, target_new, train_index, validation_index, trn_data, val_data
        gc.collect()
    else:
        root_dict['ID_codes'].append(new_data.ID_code.values[0])
        root_dict['iteration'].append(clf.best_iteration)
        root_dict['score'].append(roc_auc_score(target_new.loc[validation_index], clf.predict(train_new.loc[validation_index][col_list], num_iteration=clf.best_iteration)))
        root_dict['index'].append(i)
    
        del new_data, train_new, target_new, train_index, validation_index, trn_data, val_data
        gc.collect()

HBox(children=(IntProgress(value=0, max=45333), HTML(value='')))

KeyboardInterrupt: 

ValueError: Only one class present in y_true. ROC AUC score is not defined in that case.

In [627]:
a = bbiggu[bbiggu.ID_code.isin(dummy.ID_code) & (bbiggu.bbiggu == 1)]

In [636]:
pd.DataFrame(root_dict)

Unnamed: 0,ID_codes,iteration,score,index
0,train_152164,1999,0.654372,10128
1,train_152273,1999,0.654397,10201
2,train_152312,1999,0.654401,10232
3,train_152364,1999,0.654649,10271
4,train_152376,1999,0.654415,10278
5,train_152500,1999,0.653266,10364
6,train_152510,1999,0.653405,10371
7,train_152547,1999,0.653152,10399
8,train_152584,1541,0.653017,10428
9,train_152737,1999,0.654102,10539


In [638]:
t = pd.read_csv('./data_temp/dict.csv')

In [640]:
t

Unnamed: 0,ID_codes,iteration,score,index
0,base,1999,0.654277,-1
1,train_138117,1999,0.653124,0
2,train_138118,1999,0.653331,1
3,train_138119,1999,0.654783,2
4,train_138120,1999,0.653840,3
5,train_138121,1999,0.653701,4
6,train_138123,1541,0.652762,5
7,train_138126,1999,0.653240,6
8,train_138127,1999,0.654110,7
9,train_138129,1999,0.654354,8


In [639]:
t[t.iteration > 2000]

Unnamed: 0,ID_codes,iteration,score,index
243,train_138462,3054,0.665343,242
715,train_139131,3054,0.666401,714


In [612]:
idcodes = pd.DataFrame(root_dict).ID_codes

In [610]:
pd.DataFrame(root_dict).to_csv('./data_temp/dict.csv', index=False)

In [564]:
dummy.iloc[[i]].index.tolist()

[138138]

In [562]:
train_new.loc[trn_idx].head()

Unnamed: 0,ID_code,target,var_0,var_1,var_2,var_3,var_4,var_5,var_6,var_7,var_8,var_9,var_10,var_11,var_12,var_13,var_14,var_15,var_16,var_17,var_18,var_19,var_20,var_21,var_22,var_23,var_24,var_25,var_26,var_27,var_28,var_29,var_30,var_31,var_32,var_33,var_34,var_35,var_36,var_37,var_38,var_39,var_40,var_41,var_42,var_43,var_44,var_45,var_46,var_47,var_48,var_49,var_50,var_51,var_52,var_53,var_54,var_55,var_56,var_57,var_58,var_59,var_60,var_61,var_62,var_63,var_64,var_65,var_66,var_67,var_68,var_69,var_70,var_71,var_72,var_73,var_74,var_75,var_76,var_77,var_78,var_79,var_80,var_81,var_82,var_83,var_84,var_85,var_86,var_87,var_88,var_89,var_90,var_91,var_92,var_93,var_94,var_95,var_96,var_97,var_98,var_99,var_100,var_101,var_102,var_103,var_104,var_105,var_106,var_107,var_108,var_109,var_110,var_111,var_112,var_113,var_114,var_115,var_116,var_117,var_118,var_119,var_120,var_121,var_122,var_123,var_124,var_125,var_126,var_127,var_128,var_129,var_130,var_131,var_132,var_133,var_134,var_135,var_136,var_137,var_138,var_139,var_140,var_141,var_142,var_143,var_144,var_145,var_146,var_147,var_148,var_149,var_150,var_151,var_152,var_153,var_154,var_155,var_156,var_157,var_158,var_159,var_160,var_161,var_162,var_163,var_164,var_165,var_166,var_167,var_168,var_169,var_170,var_171,var_172,var_173,var_174,var_175,var_176,var_177,var_178,var_179,var_180,var_181,var_182,var_183,var_184,var_185,var_186,var_187,var_188,var_189,var_190,var_191,var_192,var_193,var_194,var_195,var_196,var_197,var_198,var_199
0,train_0,0,8.9255,-6.7863,11.9081,5.093,11.4607,-9.2834,5.1187,18.6266,-4.92,5.747,2.9252,3.1821,14.0137,0.5745,8.7989,14.5691,5.7487,-7.2393,4.284,30.7133,10.535,16.2191,2.5791,2.4716,14.3831,13.4325,-5.1488,-0.4073,4.9306,5.9965,-0.3085,12.9041,-3.8766,16.8911,11.192,10.5785,0.6764,7.8871,4.6667,3.8743,-5.2387,7.3746,11.5767,12.0446,11.6418,-7.017,5.9226,-14.2136,16.0283,5.3253,12.9194,29.046,-0.694,5.1736,-0.7474,14.8322,11.2668,5.3822,2.0183,10.1166,16.1828,4.959,2.0771,-0.2154,8.6748,9.5319,5.8056,22.4321,5.0109,-4.701,21.6374,0.5663,5.1999,8.86,43.1127,18.3816,-2.344,23.4104,6.5199,12.1983,13.6468,13.8372,1.3675,2.9423,-4.5213,21.4669,9.3225,16.4597,7.9984,-1.7069,-21.4494,6.7806,11.0924,9.9913,14.8421,0.1812,8.9642,16.2572,2.1743,-3.4132,9.4763,13.3102,26.5376,1.4403,14.71,6.0454,9.5426,17.1554,14.1104,24.3627,2.0323,6.7602,3.9141,-0.4851,2.524,1.5093,2.5516,15.5752,-13.4221,7.2739,16.0094,9.7268,0.8897,0.7754,4.2218,12.0039,13.8571,-0.7338,-1.9245,15.4462,12.8287,0.3587,9.6508,6.5674,5.1726,3.1345,29.4547,31.4045,2.8279,15.6599,8.3307,-5.6011,19.0614,11.2663,8.6989,8.3694,11.5659,-16.4727,4.0288,17.9244,18.5177,10.78,9.0056,16.6964,10.4838,1.6573,12.1749,-13.1324,17.6054,11.5423,15.4576,5.3133,3.6159,5.0384,6.676,12.6644,2.7004,-0.6975,9.5981,5.4879,-4.7645,-8.4254,20.8773,3.1531,18.5618,7.7423,-10.1245,13.7241,-3.5189,1.7202,-8.4051,9.0164,3.0657,14.3691,25.8398,5.8764,11.8411,-19.7159,17.5743,0.5857,4.4354,3.9642,3.1364,1.691,18.5227,-2.3978,7.8784,8.5635,12.7803,-1.0914
1,train_2,0,8.6093,-2.7457,12.0805,7.8928,10.5825,-9.0837,6.9427,14.6155,-4.9193,5.9525,-0.3249,-11.2648,14.1929,7.3124,7.5244,14.6472,7.6782,-1.7395,4.7011,20.4775,17.7559,18.1377,1.2145,3.5137,5.6777,13.2177,-7.994,-2.9029,5.8463,6.1439,-11.1025,12.4858,-2.2871,19.0422,11.0449,4.1087,4.6974,6.9346,10.8917,0.9003,-13.5174,2.2439,11.5283,12.0406,4.1006,-7.9078,11.1405,-5.7864,20.7477,6.8874,12.9143,19.5856,0.7268,6.4059,9.3124,6.2846,15.6372,5.82,1.1,9.1854,12.5963,-10.3734,0.8748,5.8042,3.7163,-1.1016,7.3667,9.8565,5.0228,-5.7828,2.3612,0.852,6.3577,12.1719,19.7312,19.4465,4.5048,23.2378,6.3191,12.8046,7.4729,15.7811,13.3529,10.1852,5.4604,19.0773,-4.4577,9.5413,11.9052,2.1447,-22.4038,7.0883,14.1613,10.508,14.2621,0.2647,20.4031,17.036,1.6981,-0.0269,-0.3939,12.6317,14.8863,1.3854,15.0284,3.9995,5.3683,8.6273,14.1963,20.3882,3.2304,5.7033,4.5255,2.1929,3.129,2.9044,1.1696,28.7632,-17.2738,2.1056,21.1613,8.9573,2.7768,-2.1746,3.6932,12.4653,14.1978,-2.5511,-0.9479,17.1092,11.5419,0.0975,8.8186,6.6231,3.9358,-11.7218,24.5437,15.5827,3.8212,8.6674,7.3834,-2.4438,10.2158,7.4844,9.1104,4.3649,11.4934,1.7624,4.0714,-1.2681,14.333,8.0088,4.4015,14.1479,-5.1747,0.5778,14.5362,-1.7624,33.882,11.6041,13.207,5.8442,4.7086,5.7141,-1.041,20.5092,3.279,-5.5952,7.3176,5.769,-7.0927,-3.9116,7.2569,-5.8234,25.682,10.9202,-0.3104,8.8438,-9.7009,2.4013,-4.2935,9.3908,-13.2648,3.1545,23.0866,-5.3,5.3745,-6.266,10.1934,-0.8417,2.9057,9.7905,1.6704,1.6858,21.6042,3.1417,-6.5213,8.2675,14.7222,0.3965
2,train_4,0,9.8369,-1.4834,12.8746,6.6375,12.2772,2.4486,5.9405,19.2514,6.2654,7.6784,-9.4458,-12.1419,13.8481,7.8895,7.7894,15.0553,8.4871,-3.068,6.5263,11.3152,21.4246,18.9608,10.1102,2.7142,14.208,13.5433,3.1736,-3.3423,5.9015,7.9352,-3.1582,9.4668,-0.0083,19.3239,12.4057,0.6329,2.7922,5.8184,19.3038,1.445,-5.5963,14.0685,11.9171,11.5111,6.9087,-65.4863,13.8657,0.0444,-0.1346,14.4268,13.3273,10.4857,-1.4367,5.7555,-8.5414,14.1482,16.984,6.1812,1.9548,9.2048,8.6591,-27.7439,-0.4952,-1.7839,5.267,-4.3205,6.986,1.6184,5.0301,-3.2431,40.1236,0.7737,-0.7264,4.5886,-4.5346,23.3521,1.0273,19.16,7.1734,14.3937,2.9598,13.3317,-9.2587,-6.7075,7.8984,14.5265,7.0799,20.167,8.0053,3.7954,-39.7997,7.0065,9.3627,10.4316,14.0553,0.0213,14.7246,35.2988,1.6844,0.6715,-22.9264,12.3562,17.341,1.694,7.1179,5.1934,8.823,10.6617,14.0837,28.2749,-0.1937,5.9654,1.0719,7.9923,2.9138,-3.6135,1.4684,25.6795,13.8224,4.7478,41.1037,12.714,5.2964,9.7289,3.937,12.1316,12.5815,7.0642,5.6518,10.9346,11.4266,0.9442,7.7532,6.6173,-6.8304,6.473,17.1728,25.8128,2.6791,13.9547,6.6289,-4.3965,11.7159,16.108,7.6874,9.157,11.567,-12.7047,3.7574,9.911,20.1461,1.2995,5.8493,19.8234,4.7022,10.6101,13.0021,-12.6068,27.0846,8.0913,33.5107,5.6953,5.4663,18.2201,6.5769,21.2607,3.2304,-1.7759,3.1283,5.5518,1.4493,-2.6627,19.8056,2.3705,18.4685,16.3309,-3.3456,13.5261,1.7189,5.1743,-7.6938,9.7685,4.891,12.2198,11.8503,-7.8931,6.4209,5.927,16.0201,-0.2829,-1.4905,9.5214,-0.1508,9.1942,13.2876,-1.5121,3.9267,9.5031,17.9974,-8.8104
3,train_5,0,11.4763,-2.3182,12.608,8.6264,10.9621,3.5609,4.5322,15.2255,3.5855,5.979,0.801,-0.6192,13.638,1.2589,8.1939,14.9894,12.0763,-1.471,6.7341,14.8241,19.7172,11.9882,1.0468,3.8663,4.7252,13.9427,-1.2796,-4.3763,5.1494,0.4124,-5.0732,4.901,1.5459,15.6423,10.7209,15.1886,1.8685,7.7223,5.5317,2.2308,2.6553,13.973,11.5015,11.6891,14.1062,-44.7257,10.647,-24.6935,29.9912,13.5894,13.2355,5.5513,-7.7762,5.2399,7.0189,15.2849,11.8258,5.9587,4.7676,8.2253,11.1254,-0.7472,-1.6063,6.0702,6.0604,5.4693,4.9098,14.116,5.011,-5.6684,35.2664,0.678,8.1653,9.9031,10.5392,14.4814,12.7784,17.4977,3.3793,16.0082,-5.4905,15.3766,1.8135,-1.9153,-8.3058,20.169,2.6433,19.9457,9.4972,-3.5982,-1.1717,6.9204,8.8093,10.5375,10.2029,1.1735,14.0648,17.91,0.9217,0.0886,-8.3794,19.8236,12.278,1.6012,15.409,4.2972,10.5131,16.4164,14.1434,23.9731,7.004,7.1879,3.9414,9.9727,3.6737,0.897,2.2988,-18.1194,8.6835,-2.8671,11.2701,9.0465,-1.5511,9.9305,4.3756,12.3697,13.0031,5.0993,-5.8702,15.0534,12.7212,0.5762,7.0965,6.6143,-8.8256,-0.9844,26.1603,11.9963,-5.9018,13.3065,4.4419,-0.0496,18.7239,14.1586,9.6607,2.6134,12.965,4.728,3.9799,1.4462,19.5171,7.5903,5.7223,15.2331,15.4401,-2.0738,12.4068,-16.8151,24.2054,4.9455,16.5552,5.3739,6.4487,11.5631,1.3847,14.9638,2.8455,-9.0953,3.8278,5.9714,-6.1449,-2.0285,18.4106,1.4457,21.8853,9.2654,-6.5247,10.7687,-7.6283,1.0208,7.1968,11.1227,2.2257,6.4056,21.055,-13.6509,4.7691,-8.9114,15.1007,2.4286,-6.3068,6.6025,5.2912,0.4403,14.9452,1.0314,-3.6241,9.767,12.5809,-4.7602
4,train_7,0,13.558,-7.9881,13.8776,7.5985,8.6543,0.831,5.689,22.3262,5.0647,7.1971,1.4532,-6.7033,14.2919,10.9699,6.919,14.2459,9.5376,-0.7226,5.1548,17.1535,13.7326,14.4195,1.2375,3.1711,9.1258,13.325,3.3883,-0.4418,5.4501,7.9894,-0.9976,14.5609,-2.0712,16.9717,11.5257,-0.499,2.8303,7.5772,9.0294,1.2659,-12.0219,14.1991,11.2721,11.2407,13.5776,-27.2543,14.0908,0.8217,13.1139,9.3627,13.7538,11.0741,-8.4732,6.6503,-14.4815,9.6728,18.6279,6.5482,3.8325,8.0006,13.5775,-3.7978,0.1582,4.2888,4.4444,-8.4758,5.2078,8.772,5.0128,-7.411,27.0016,1.0085,3.7054,16.4133,20.0,10.2128,0.7557,20.7298,6.7311,14.5571,-3.813,13.3314,-10.5909,6.7943,-0.3443,23.553,6.986,6.0584,6.3539,6.7745,-24.7167,6.8082,9.1942,11.2303,11.8245,-0.6602,18.3689,27.5106,2.0848,2.2442,-0.6013,20.9117,16.5885,1.4998,14.1148,4.1641,8.5132,16.4147,14.2331,15.1642,2.5181,7.1014,3.3046,5.7495,2.8527,4.1954,4.9895,19.4388,0.2901,0.0794,31.9043,12.7068,1.6484,-1.7767,3.0002,11.9988,13.4588,0.7693,1.7439,13.4471,11.3924,0.4158,9.8819,7.0754,-1.8253,-1.9968,21.5241,28.9405,-0.0447,17.2586,1.0003,-8.4069,18.4261,16.1918,9.745,12.1132,9.1689,-10.6657,4.1435,26.7837,14.055,14.582,7.2698,13.8387,13.2217,-1.5242,11.2587,-0.0072,8.6696,19.2246,23.7765,5.4098,5.1402,10.7013,-8.2583,26.3286,2.6085,-10.9163,8.7362,5.2273,8.9519,-2.3522,6.1335,0.0876,19.5642,13.2008,-11.1786,17.3041,-0.6535,0.0592,5.114,10.5478,6.9736,6.9724,24.0369,-4.822,8.4947,-5.9076,18.8663,1.9731,13.17,6.5491,3.9906,5.8061,23.1407,-0.3776,4.2178,9.4237,8.6624,3.4806


In [552]:
trn_idx

array([     0,      1,      2, ..., 102596, 102597, 102599])

In [551]:
train_new.tail()

Unnamed: 0,ID_code,target,var_0,var_1,var_2,var_3,var_4,var_5,var_6,var_7,var_8,var_9,var_10,var_11,var_12,var_13,var_14,var_15,var_16,var_17,var_18,var_19,var_20,var_21,var_22,var_23,var_24,var_25,var_26,var_27,var_28,var_29,var_30,var_31,var_32,var_33,var_34,var_35,var_36,var_37,var_38,var_39,var_40,var_41,var_42,var_43,var_44,var_45,var_46,var_47,var_48,var_49,var_50,var_51,var_52,var_53,var_54,var_55,var_56,var_57,var_58,var_59,var_60,var_61,var_62,var_63,var_64,var_65,var_66,var_67,var_68,var_69,var_70,var_71,var_72,var_73,var_74,var_75,var_76,var_77,var_78,var_79,var_80,var_81,var_82,var_83,var_84,var_85,var_86,var_87,var_88,var_89,var_90,var_91,var_92,var_93,var_94,var_95,var_96,var_97,var_98,var_99,var_100,var_101,var_102,var_103,var_104,var_105,var_106,var_107,var_108,var_109,var_110,var_111,var_112,var_113,var_114,var_115,var_116,var_117,var_118,var_119,var_120,var_121,var_122,var_123,var_124,var_125,var_126,var_127,var_128,var_129,var_130,var_131,var_132,var_133,var_134,var_135,var_136,var_137,var_138,var_139,var_140,var_141,var_142,var_143,var_144,var_145,var_146,var_147,var_148,var_149,var_150,var_151,var_152,var_153,var_154,var_155,var_156,var_157,var_158,var_159,var_160,var_161,var_162,var_163,var_164,var_165,var_166,var_167,var_168,var_169,var_170,var_171,var_172,var_173,var_174,var_175,var_176,var_177,var_178,var_179,var_180,var_181,var_182,var_183,var_184,var_185,var_186,var_187,var_188,var_189,var_190,var_191,var_192,var_193,var_194,var_195,var_196,var_197,var_198,var_199
102596,train_151577,1,6.5763,-9.2927,7.0587,4.0243,14.2409,4.0407,7.2237,13.7335,1.1738,9.3207,7.8616,-7.7491,14.1115,18.4393,6.8216,13.8723,8.2116,-13.8151,8.4051,26.5516,19.1389,10.1803,3.2025,3.5388,6.9551,14.3698,-5.9343,-3.5603,5.8303,1.1116,-19.2327,13.3207,-1.3064,20.1506,12.0015,-8.3937,0.0597,2.5207,12.6765,0.9255,-18.3639,23.8288,11.4946,11.6506,-4.4341,-34.1885,11.4974,-21.9398,15.2082,17.7783,12.6856,12.5614,0.1762,6.3621,-4.0127,7.2061,19.4519,4.9843,4.6779,8.3289,9.7394,-5.9096,1.3043,6.6146,5.9744,1.8271,5.935,28.8586,5.0021,-5.1251,9.4,1.1467,0.935,15.9409,32.2042,21.8529,6.1134,22.464,8.5298,13.3294,16.5035,12.3558,4.9601,-8.2005,-14.2161,24.2554,12.2943,15.3481,12.6156,5.7628,-0.2546,6.8502,16.0076,10.3339,8.7564,-0.4265,11.6693,13.9336,2.864,2.0569,-28.5694,20.8398,39.0708,1.2685,12.0281,4.7551,8.1797,19.3918,14.305,12.7187,1.3498,6.4444,2.4983,2.5,2.9398,4.0043,2.62,-7.9332,6.1303,4.6319,21.4555,13.0406,0.577,0.1601,8.5217,12.229,13.664,2.9369,-6.4586,19.1639,11.6384,0.632,4.9018,6.3338,-4.8333,-9.6063,43.3477,6.7191,7.3015,-5.8327,5.302,4.8874,10.7423,8.9327,8.9809,2.117,8.24,-2.6857,3.9165,4.2545,15.0857,4.5542,12.194,17.1749,16.2005,0.04,14.0404,-13.9321,12.9819,9.8541,29.0063,5.9409,5.9035,15.6608,-2.0947,20.1795,2.7498,-12.7955,9.0331,5.6516,6.3809,-1.8596,31.8145,-4.9734,15.2135,14.6087,-12.9447,12.795,2.5472,3.3544,-8.8933,11.4857,3.3655,6.0209,21.3709,-4.6332,10.1302,-8.9551,16.4936,-0.6171,8.5721,8.7168,0.4837,8.4766,17.6744,1.4059,-3.59,8.6509,16.8789,-13.6006
102597,train_151649,1,6.1289,-0.9143,12.7399,6.0544,11.081,-15.1958,5.8477,15.9536,-5.0501,7.2621,-1.0692,2.8731,14.2875,7.9075,6.5715,14.926,7.7745,2.3625,14.5175,17.215,6.7918,29.9404,2.7616,3.1673,6.6488,14.0225,-2.4687,-1.9853,5.545,11.8214,-0.3965,8.9355,-2.7562,12.29,10.7838,-1.0841,5.7148,6.8898,9.9758,4.0379,-10.2224,13.8139,10.8574,11.4719,12.4035,25.1443,10.5083,-28.4608,27.0596,27.5872,12.0777,19.3836,8.1772,7.0722,-2.8917,5.4129,14.0844,6.7769,3.6377,8.0524,8.348,-4.6881,0.6951,-1.4784,4.4147,2.6906,5.4844,11.7506,5.0142,-1.6742,28.8856,0.7254,6.0861,28.272,12.2195,7.9287,1.686,10.616,6.8057,14.3437,16.4832,14.261,6.0781,-4.3175,6.5706,19.9106,5.8357,16.4102,5.0448,0.2243,-19.5536,6.9334,10.6517,10.7728,10.8142,-0.3631,14.8722,30.3217,1.4813,3.429,-12.9202,17.6068,31.13,1.6314,11.3042,3.7511,5.6434,24.4379,14.4306,27.9529,4.3833,7.1574,4.6208,6.9851,4.1955,-5.7756,0.2161,17.145,8.4425,-2.9172,42.6729,11.1628,-4.1577,6.9172,3.2385,12.0891,14.7703,5.4723,-4.1918,15.5376,12.8864,1.0819,10.3535,7.6673,5.9942,0.4745,20.1532,23.81,-5.1304,27.8166,12.7814,5.2838,20.1454,17.4299,8.2753,8.8493,7.6316,-8.4744,4.2197,-5.3733,13.8616,13.0652,8.2993,19.8616,5.7277,-4.9896,12.3685,-11.5048,14.6098,13.6174,32.4721,5.3681,6.799,-2.0961,-12.1838,16.4221,2.9892,-10.8365,3.7771,6.1381,2.3724,-5.2785,22.9437,-1.0724,24.7147,10.6186,7.3569,10.8793,-9.4887,-0.734,6.7248,11.6017,2.5716,3.7516,5.0162,-3.2573,7.9806,-4.2111,21.0914,-0.1439,2.8617,7.724,3.3839,2.807,15.7526,-1.5098,7.5833,9.2552,16.835,-1.8993
102598,train_151650,1,7.7834,-8.9858,11.5836,6.6377,7.6076,-0.2626,5.973,15.5026,4.4203,9.3755,-7.8258,11.8922,14.2157,6.3762,5.4837,14.0239,10.3398,-6.6971,7.7148,27.8073,11.2756,15.9048,2.0566,2.6189,12.9405,13.8867,-4.2746,-0.5891,4.4879,6.947,-12.1086,9.9474,0.1021,19.9214,10.2511,-1.7225,10.1936,3.4983,9.1344,-6.5711,-11.224,15.9631,10.3438,11.6536,14.9744,12.7612,13.8684,3.5185,7.8028,1.4656,13.5062,8.3532,1.4424,5.4491,-9.0073,8.0687,19.062,5.2877,2.7111,9.0958,17.6132,-18.338,1.3207,0.5204,5.1339,7.6233,6.675,18.6526,5.0297,-4.8558,10.1928,0.4772,-3.2904,18.0781,25.5985,21.328,9.7654,18.3117,5.5099,14.0409,1.5353,10.0572,-17.9742,6.5269,5.1446,13.2503,9.0274,17.4386,12.1857,1.0105,-17.496,6.8411,10.1161,10.9661,9.8648,-0.3429,4.4178,23.511,2.1155,1.3879,-2.3118,11.5377,34.2252,1.421,12.5875,2.8279,6.718,24.843,14.3574,12.5016,7.7045,7.6494,1.1005,2.1456,3.7533,1.4799,5.7881,11.809,-13.0797,4.6028,27.8903,10.2919,-1.8575,14.8628,8.8603,12.3393,13.8041,-2.1152,-1.8574,16.2563,11.044,0.9458,7.4858,6.5622,-7.6325,-19.8301,5.0054,12.154,0.8889,1.4466,4.3741,2.3111,15.2478,14.4373,7.1915,9.4454,6.8892,-9.7701,4.1391,-6.5948,21.6202,13.1611,4.6318,12.4624,5.4769,-3.4827,13.9395,-14.9361,11.9579,14.0317,18.8818,5.7811,6.9998,23.9484,-1.1515,25.4671,2.5258,-11.7064,3.7792,5.6665,1.7593,-3.3208,14.151,-1.1912,26.1612,11.7938,-5.0929,12.3636,4.4822,3.572,-4.9432,8.8014,-2.5623,6.2481,27.3171,-5.6838,11.7759,6.7356,13.4732,-0.8801,4.8594,5.2367,1.5506,11.8823,16.5957,-0.8957,-6.6106,9.2358,14.3793,-16.1768
102599,train_152061,1,6.896,1.2085,15.7742,7.4885,11.3211,-6.0795,5.7517,14.9615,-2.1829,6.6585,-9.6077,9.479,14.2197,14.8391,7.2515,14.8036,7.5749,-7.708,8.7036,6.0143,21.6682,17.3945,-1.2065,2.4049,8.8194,13.6601,0.3658,-0.6568,5.9534,4.107,3.142,15.1043,-0.5427,17.1184,11.6214,5.365,4.4787,6.1819,16.2705,3.0945,-18.0108,0.3981,12.1296,11.8646,-0.1975,-21.162,9.816,-29.7288,28.2001,22.0542,13.1736,1.8402,-3.1454,5.7823,1.6112,6.8927,17.36,6.4221,0.1423,7.9208,8.2394,-14.4916,-0.4571,-2.127,7.7881,1.9514,5.2448,14.9008,5.0227,-4.0677,33.2384,0.273,1.3426,13.6464,-1.7105,25.602,12.7022,16.8365,4.9719,15.6596,2.3097,13.6533,1.2184,0.4394,-4.6052,15.6379,4.9819,20.6334,3.2007,6.4299,-9.1131,6.9128,6.9198,10.1495,12.093,-0.3116,13.0104,27.6371,1.69,2.0176,-2.6913,21.2771,13.4838,1.6217,12.0025,3.7138,6.3153,15.8718,14.1025,12.7134,2.3354,6.0569,6.4854,5.5945,2.8438,4.1298,2.2743,2.3702,-22.4602,2.8679,26.5676,10.0746,-3.9062,-8.7587,4.8517,13.2814,13.2804,0.5099,-0.2375,16.17,12.0643,1.0758,6.1702,7.187,2.1039,4.8441,35.7114,10.5052,6.3755,15.3595,4.5276,7.9486,11.0837,5.1448,9.5751,7.7905,10.8598,-5.2555,4.0418,-11.6712,14.5261,10.8627,7.0593,17.5641,9.2089,1.2175,14.0311,-7.7902,8.484,12.244,26.3117,5.5193,3.1236,15.5722,-9.6694,17.8268,3.3384,-4.3821,3.6325,6.0678,-5.2404,-5.6675,13.1882,2.7242,20.5159,10.8986,9.4302,14.3463,0.1571,6.3618,-8.4591,9.2334,11.5785,7.1021,15.4795,-9.337,8.5805,-20.2111,18.7687,0.4999,1.2374,10.1448,4.411,6.492,12.596,0.4303,0.1748,9.161,18.7987,-5.8705
138177,train_138177,0,13.9714,-1.943,8.6637,9.7935,11.0794,-21.3777,4.2713,13.108,0.9655,7.8917,5.5676,-7.8794,13.9548,16.1481,3.4394,15.0469,10.5822,2.1086,25.3637,9.3391,22.1091,19.2099,6.4005,2.3025,12.8847,13.8634,-13.5704,-2.3954,6.2668,7.1642,-2.8194,13.6002,-0.0972,12.3714,10.8064,0.2132,2.1673,5.2441,10.1075,5.2512,-6.5263,9.9046,11.4549,11.3758,9.6934,18.1162,8.8643,-17.9901,4.8244,9.1282,12.8453,22.3669,-2.7363,7.7804,2.7932,17.6836,20.0718,7.4704,8.9089,8.1252,16.8272,-16.0594,1.0498,6.855,5.7283,-1.4697,6.5847,8.2275,5.025,-5.927,16.3226,1.2079,-7.0473,24.8177,-2.2843,24.4855,0.6291,18.8757,6.687,13.3891,12.9276,15.0573,-6.9357,-2.9288,0.6711,20.3475,-5.9697,4.4113,6.6918,10.755,-20.6096,7.0343,16.1676,9.7682,8.9943,-0.7289,9.9164,36.5395,1.4658,-2.4887,-17.3262,8.828,32.6054,1.7381,7.7028,5.5114,11.2739,25.6252,14.2737,17.7549,7.8774,5.4963,0.3107,-0.5622,3.289,1.333,2.4998,26.207,6.2223,5.3259,15.532,11.094,-9.9938,9.7162,0.1389,12.5963,13.6006,6.5489,2.4392,9.6002,13.0782,0.9213,9.7176,6.7296,4.8669,3.4301,26.8287,12.4097,0.2054,7.08,1.013,12.1523,17.3149,9.2619,7.1146,-0.5391,13.9929,-3.209,3.968,-5.8937,12.4855,5.1763,4.7868,16.5409,8.0063,8.8839,12.8807,-12.698,6.0428,12.1487,21.4157,5.3444,3.9828,4.5367,-8.6112,22.4812,3.122,-2.5404,2.2202,5.774,-0.895,0.5358,26.045,7.4671,11.6859,11.1207,-1.6299,12.3262,6.115,2.1425,-6.8826,10.3087,6.4708,11.8421,5.82,-10.4948,10.2648,-4.1952,17.698,0.3311,-2.3739,6.389,2.2393,1.5349,18.6224,0.1489,13.503,9.9182,19.8614,-4.3776


In [546]:
dummy[dummy.target == 1].head()

Unnamed: 0,ID_code,target,var_0,var_1,var_2,var_3,var_4,var_5,var_6,var_7,var_8,var_9,var_10,var_11,var_12,var_13,var_14,var_15,var_16,var_17,var_18,var_19,var_20,var_21,var_22,var_23,var_24,var_25,var_26,var_27,var_28,var_29,var_30,var_31,var_32,var_33,var_34,var_35,var_36,var_37,var_38,var_39,var_40,var_41,var_42,var_43,var_44,var_45,var_46,var_47,var_48,var_49,var_50,var_51,var_52,var_53,var_54,var_55,var_56,var_57,var_58,var_59,var_60,var_61,var_62,var_63,var_64,var_65,var_66,var_67,var_68,var_69,var_70,var_71,var_72,var_73,var_74,var_75,var_76,var_77,var_78,var_79,var_80,var_81,var_82,var_83,var_84,var_85,var_86,var_87,var_88,var_89,var_90,var_91,var_92,var_93,var_94,var_95,var_96,var_97,var_98,var_99,var_100,var_101,var_102,var_103,var_104,var_105,var_106,var_107,var_108,var_109,var_110,var_111,var_112,var_113,var_114,var_115,var_116,var_117,var_118,var_119,var_120,var_121,var_122,var_123,var_124,var_125,var_126,var_127,var_128,var_129,var_130,var_131,var_132,var_133,var_134,var_135,var_136,var_137,var_138,var_139,var_140,var_141,var_142,var_143,var_144,var_145,var_146,var_147,var_148,var_149,var_150,var_151,var_152,var_153,var_154,var_155,var_156,var_157,var_158,var_159,var_160,var_161,var_162,var_163,var_164,var_165,var_166,var_167,var_168,var_169,var_170,var_171,var_172,var_173,var_174,var_175,var_176,var_177,var_178,var_179,var_180,var_181,var_182,var_183,var_184,var_185,var_186,var_187,var_188,var_189,var_190,var_191,var_192,var_193,var_194,var_195,var_196,var_197,var_198,var_199
152164,train_152164,1,8.0798,1.4421,7.7231,5.3061,12.1774,-7.2362,4.5544,20.7839,-2.3368,7.0138,5.6214,-3.6104,13.9094,4.6577,5.5277,15.4997,9.3696,-9.5902,18.5468,11.1342,25.0511,17.1605,5.822,2.4459,11.0589,13.454,-6.1849,-2.1696,6.5751,6.1677,-15.8315,8.7565,0.1912,9.7065,10.505,5.199,-1.4366,7.7711,15.1837,-3.2656,-3.0826,2.8055,12.0384,11.86,-2.8252,-12.9213,11.4665,8.2518,9.9909,28.6218,12.7814,7.3034,-7.2367,5.0884,10.8568,21.9516,19.6961,6.038,-1.3782,8.952,9.3637,-6.2561,3.1488,3.5782,6.7013,-0.1706,5.5357,13.3246,5.0231,-3.3629,11.0805,0.7406,2.0953,16.9251,-13.7506,30.2647,1.0636,23.2928,3.2001,13.5271,0.7789,14.8173,-1.8217,12.6964,1.1992,18.5791,5.2797,12.6361,6.7304,1.2357,-0.8029,7.2403,11.3176,11.2684,15.0826,-0.5832,0.1055,35.1438,2.018,-1.5791,-2.9528,11.9733,7.4244,1.7767,11.4002,5.4737,8.1628,22.078,14.0127,22.1688,10.7774,7.0761,4.3907,10.0443,1.9709,3.076,2.9918,-0.8473,-13.0662,0.8199,29.8408,14.1915,3.6146,4.7994,6.4216,12.1988,12.9733,-2.9092,1.5833,19.6921,12.7782,0.4143,7.5247,6.8147,-3.22,2.6964,30.1519,19.8925,-0.2937,8.8362,1.0609,-0.5087,8.0914,12.4674,8.8358,0.1825,7.7351,10.1425,4.0975,1.9108,18.3876,10.6844,7.7044,19.318,5.2755,1.1882,11.7885,1.0773,29.1637,6.6526,36.089,5.8176,7.3708,10.1065,3.8203,15.5952,2.9883,-14.1768,11.2927,5.75,1.6411,0.4846,20.1197,0.3392,13.0102,10.4872,-11.9574,8.4864,6.4103,9.7912,-3.121,10.8858,-7.6558,11.3112,5.0457,-0.0805,8.3788,-13.8439,18.0611,1.2774,6.7857,6.8344,1.3545,8.9154,22.2941,-0.5776,-0.8867,9.9084,18.0908,-1.1923
152273,train_152273,1,7.3838,-5.0633,18.0705,8.028,10.2105,-12.3824,6.882,15.381,-1.1611,6.0517,-8.8538,-5.4708,13.8313,5.0771,6.1174,14.3858,8.1919,-13.9765,12.369,24.4888,11.2618,14.0639,1.8877,3.1576,4.878,13.9419,-3.5424,-0.5076,5.0367,1.2326,0.1833,8.6083,-1.8112,13.5426,11.6207,3.576,2.2488,2.8734,8.0048,-2.2662,-1.8543,13.9275,10.5169,11.4657,4.512,13.9858,13.3413,-4.6283,30.8191,34.1645,11.4717,16.6695,8.3936,6.2894,-12.0487,18.9022,11.4652,6.1339,-6.1655,10.4676,16.6034,-2.1503,3.0057,4.772,7.7714,-5.665,6.74,15.1724,5.0306,0.6358,6.6776,0.0753,0.7797,17.7794,39.5983,14.6759,8.3391,19.8518,3.9454,17.2056,5.7635,15.2529,0.3206,0.7382,-10.3322,19.9459,14.3683,7.7184,11.0583,2.7529,-26.1178,7.1,19.3064,10.6678,9.9821,-1.0803,20.6625,5.0317,1.4716,-0.1211,-5.0997,15.811,21.4548,1.2735,10.235,4.5052,8.5643,7.2206,14.2005,11.2053,0.2103,4.8063,4.094,1.7148,3.0601,-0.1039,-0.4812,19.0782,0.3424,-2.5121,36.4111,9.4428,9.6774,13.3525,8.4092,12.0628,12.8106,1.8896,2.9041,17.3562,12.4455,1.3503,8.5639,6.7028,-5.8707,-12.633,8.0859,19.0072,-4.7458,7.3886,3.9876,12.0899,16.2825,7.9126,8.4545,7.354,14.2831,-1.3989,4.1438,18.8842,12.1212,11.9382,5.896,14.3751,4.0629,3.0541,13.4245,-12.3089,14.8032,15.068,29.508,5.8259,6.8509,9.6783,3.1241,21.1461,3.5217,-3.1155,9.2434,5.9423,2.7138,1.1054,11.8807,-7.8253,18.8924,8.8501,0.2262,10.3198,-12.3115,2.7054,2.3146,8.6676,-5.3299,15.0031,12.5654,-6.3027,14.1485,-19.4587,18.4906,1.6809,-3.8506,4.4127,0.5793,10.2844,21.0327,-1.4657,0.5539,10.2911,14.1753,-14.2112
152312,train_152312,1,7.4984,-0.3937,7.5544,6.2319,10.8557,-20.014,6.0404,16.0769,1.2746,6.5228,-6.2148,-8.5656,13.9113,4.2828,4.384,13.9951,9.159,-1.9977,13.1892,25.7379,8.5556,9.1163,5.7636,2.0221,17.8967,13.4457,-1.9765,2.0295,5.4651,5.8359,1.582,8.6183,-0.0264,19.997,12.0708,5.1524,-2.649,2.33,9.2257,-0.1933,-4.2983,8.8863,11.2054,11.8372,15.0011,-4.3172,13.443,-12.9343,21.3575,24.7368,13.6225,14.6152,3.3421,5.648,2.7157,20.4995,14.043,5.3636,4.9483,8.877,10.3779,-34.2347,3.1659,0.2626,5.3898,8.1669,5.211,14.1632,5.0188,-0.2648,31.1422,0.5908,3.7391,22.7272,21.9054,12.3182,9.7397,19.5841,4.3829,13.477,7.5478,13.4026,-0.9065,0.5351,4.4791,18.7978,-0.3447,9.2017,7.2054,3.448,-0.3434,6.882,8.5774,10.5051,16.4576,-0.6401,16.1219,29.5688,1.8423,0.0627,-15.6595,20.4344,25.0021,1.8183,11.0619,3.8938,7.345,20.407,14.216,16.7609,-0.5119,7.8931,5.0593,5.3293,2.9237,1.4785,1.528,14.0018,7.5746,3.6815,34.1943,9.6564,2.8773,-0.8811,6.1293,12.1627,13.6139,-0.4649,2.9148,20.0897,11.9728,0.6803,8.0055,6.6413,-10.5319,-9.5245,30.7849,24.461,-2.528,-6.0584,3.2967,7.7071,11.9253,8.8009,7.743,-1.0318,14.2207,-2.7427,3.666,29.8156,17.4413,13.9582,12.8667,14.3515,8.5582,4.12,14.0386,-6.8139,4.9901,2.3276,40.4472,5.7399,4.18,12.016,5.7536,16.188,3.2555,0.7343,7.816,5.5409,-6.2642,6.1095,13.6426,5.3821,22.2421,8.726,-7.3418,12.8153,1.8509,0.6633,-1.465,9.7588,3.6756,16.0224,17.6219,2.8046,8.6817,-5.2644,16.6146,0.9931,3.5198,10.2471,2.9604,-3.3455,20.6308,0.5755,4.4869,7.2132,16.1858,-7.1869
152364,train_152364,1,7.0455,2.6738,9.3783,5.3103,14.3828,-1.7712,6.3165,15.6599,3.6336,8.2484,-0.0609,2.4833,13.8823,13.6578,3.6643,14.491,7.6515,-20.504,18.96,19.9074,3.313,11.8632,6.669,2.2591,6.0288,13.915,1.4677,-0.4084,6.0944,2.8881,-30.085,7.4088,-0.0029,19.5216,11.7339,-1.6171,-2.3929,3.0866,17.1896,-4.7314,12.1749,9.262,11.6058,12.0293,6.9499,-27.3268,7.6594,-20.1631,19.4445,11.1752,13.0083,11.77,-7.7241,6.4998,-4.3194,11.0881,12.8938,6.9589,7.2307,9.5684,6.2805,-5.8182,3.2589,0.6982,7.682,4.0197,5.9071,11.3824,5.0216,-4.815,33.0694,0.6474,-1.5564,17.1624,10.7557,9.1426,-4.1835,20.1777,9.3506,15.7507,8.8209,16.1519,6.3653,-2.49,9.046,15.0696,8.27,9.2001,10.1893,5.9656,-2.4393,6.9534,17.6613,10.8195,14.9646,-0.0742,26.1463,34.3557,2.4545,0.5562,0.0905,20.0478,24.2357,1.3625,10.8796,4.0933,8.974,20.4528,14.226,23.1783,6.8336,5.8424,3.2571,7.5948,4.4749,-0.7175,3.0567,5.5247,5.3577,-4.8846,28.189,9.8619,-2.1287,15.0254,4.2622,12.7392,14.4387,2.1136,-5.0161,10.4729,11.1617,0.7834,5.9786,6.4133,-6.8404,-16.3513,44.2066,23.3717,0.9489,-5.373,-0.5176,5.6458,16.0739,14.0245,8.0461,-4.2906,9.3924,6.2022,3.8703,7.5435,17.0135,8.554,12.0194,13.2565,7.7794,-8.7002,12.8402,0.6082,32.7529,4.6098,35.5423,5.0419,6.4754,15.89,0.8154,19.8112,2.6865,2.5155,5.5233,5.2116,-1.7077,-10.053,13.3376,0.0607,12.2139,8.2433,5.4485,8.7321,-5.8961,4.0183,3.5091,8.6294,0.5886,10.882,-1.1941,0.2577,10.5139,-7.744,14.5241,0.8812,-5.0739,5.5364,3.8801,1.9346,19.5093,0.5073,7.6779,7.7982,14.1151,6.464
152376,train_152376,1,9.3421,-1.8169,8.0716,6.3358,11.7304,-16.3254,5.0335,13.9517,-3.1797,5.8538,-4.5407,-8.2446,14.0668,1.9786,4.4033,14.2495,7.8218,-16.9252,24.639,19.7332,7.8731,18.6592,13.539,3.362,5.9367,13.4895,0.2435,1.1195,5.6628,4.1436,-5.24,11.9083,1.0257,23.0975,11.4538,6.4587,-0.959,6.9359,6.7466,4.994,-6.6151,6.9634,10.2803,11.3167,2.5847,28.7013,10.55,-5.7828,9.3836,25.015,13.1731,6.0044,-10.8499,5.9704,11.1769,16.7212,18.4269,7.1673,-0.1727,10.349,10.9979,-31.3339,4.7765,2.4037,3.4866,2.5746,4.7439,2.1949,5.0231,-2.6567,24.3221,0.4151,1.3403,19.7335,34.6741,21.3196,7.5744,12.715,4.0262,14.7881,14.2718,9.2213,14.5525,-2.7405,6.1533,21.7693,10.7328,13.4642,10.4996,4.5809,-30.2121,7.0663,10.9548,10.3942,10.678,-1.0147,20.891,-1.0122,2.785,-2.6827,-12.2037,5.8061,9.7967,1.6252,9.0296,2.3144,5.0594,17.22,14.2935,8.5001,3.1795,7.6953,3.6186,11.5355,2.4878,0.441,-0.8473,37.1105,-2.021,-1.2715,15.4769,9.2712,6.8483,16.436,6.3123,11.9344,13.5828,-2.7254,-0.4729,12.6872,12.0858,0.4632,5.6103,6.9498,-1.1742,-7.12,7.3113,11.6755,3.058,12.7378,6.5046,13.656,18.1043,17.2393,9.4307,4.8947,13.4122,-1.9915,4.2199,4.1539,14.3253,6.9274,5.3289,16.2092,7.0421,11.0884,12.6949,-9.106,21.965,13.6364,24.1508,5.5426,7.1647,5.1834,-2.9312,25.1419,3.3018,-7.6703,-1.6652,5.4405,1.8981,-0.7333,19.7045,-0.2216,18.0375,12.5751,10.0216,13.3819,-12.4812,2.9738,-4.1729,10.8384,9.7004,1.8199,15.0006,-1.7042,11.6043,-7.6766,18.8282,0.8961,2.4268,5.9516,1.666,12.491,15.376,-2.5221,9.7669,8.887,14.2861,-6.3397


In [511]:
train = train_raw.copy()
temp_1 = train.loc[normal_idx].copy().reset_index(drop=True)
temp_2 = train.loc[bbiggu_idx].copy().reset_index(drop=True)
temp_3 = temp_1.iloc[100000:100050]
temp_4 = temp_2.iloc[2500:3100]

temp_1 = temp_1.iloc[:2500]
temp_2 = temp_2.iloc[:2500]

temp_1.target = 0
temp_2.target = 1
temp_3.target = 1

train = temp_1.append(temp_2).append(temp_3)#.append(temp_4)
train = train.reset_index(drop=True)

In [512]:
train.target.value_counts()

1    2550
0    2500
Name: target, dtype: int64

In [513]:
train.tail()

Unnamed: 0,ID_code,target,var_0,var_1,var_2,var_3,var_4,var_5,var_6,var_7,var_8,var_9,var_10,var_11,var_12,var_13,var_14,var_15,var_16,var_17,var_18,var_19,var_20,var_21,var_22,var_23,var_24,var_25,var_26,var_27,var_28,var_29,var_30,var_31,var_32,var_33,var_34,var_35,var_36,var_37,var_38,var_39,var_40,var_41,var_42,var_43,var_44,var_45,var_46,var_47,var_48,var_49,var_50,var_51,var_52,var_53,var_54,var_55,var_56,var_57,var_58,var_59,var_60,var_61,var_62,var_63,var_64,var_65,var_66,var_67,var_68,var_69,var_70,var_71,var_72,var_73,var_74,var_75,var_76,var_77,var_78,var_79,var_80,var_81,var_82,var_83,var_84,var_85,var_86,var_87,var_88,var_89,var_90,var_91,var_92,var_93,var_94,var_95,var_96,var_97,var_98,var_99,var_100,var_101,var_102,var_103,var_104,var_105,var_106,var_107,var_108,var_109,var_110,var_111,var_112,var_113,var_114,var_115,var_116,var_117,var_118,var_119,var_120,var_121,var_122,var_123,var_124,var_125,var_126,var_127,var_128,var_129,var_130,var_131,var_132,var_133,var_134,var_135,var_136,var_137,var_138,var_139,var_140,var_141,var_142,var_143,var_144,var_145,var_146,var_147,var_148,var_149,var_150,var_151,var_152,var_153,var_154,var_155,var_156,var_157,var_158,var_159,var_160,var_161,var_162,var_163,var_164,var_165,var_166,var_167,var_168,var_169,var_170,var_171,var_172,var_173,var_174,var_175,var_176,var_177,var_178,var_179,var_180,var_181,var_182,var_183,var_184,var_185,var_186,var_187,var_188,var_189,var_190,var_191,var_192,var_193,var_194,var_195,var_196,var_197,var_198,var_199
5045,train_138178,1,4.7222,-6.5607,12.7414,10.7398,9.8036,-13.4954,6.3972,17.1144,-3.417,6.8393,-11.0083,-7.2956,13.932,12.9577,4.1975,14.307,8.7752,-6.2193,27.2111,8.5936,11.199,22.483,2.2544,3.9465,11.457,13.6128,-12.8651,-2.3583,6.7071,3.7255,-4.2776,8.4849,-1.6638,9.3282,11.8092,1.9516,0.3264,7.0073,6.4881,3.7484,3.3969,23.7103,12.2952,11.1853,19.3805,17.59,13.487,1.1742,2.5975,9.1361,12.3316,9.4293,-7.4846,5.02,-0.2612,5.6703,18.802,7.0088,2.6174,9.9541,6.1927,-15.5297,-1.4898,-2.1063,7.4961,-0.0577,6.3438,31.0242,5.0236,0.4833,33.9955,0.3957,-6.1819,9.1596,-6.2562,21.8667,8.5619,21.0451,4.168,13.3981,2.9557,15.3562,2.2393,9.4649,-9.6052,11.566,15.7281,1.9839,8.6876,8.9964,-8.7663,6.8815,12.8538,10.0849,14.4006,0.2452,5.333,32.835,2.0087,-1.5448,-13.1893,18.3891,0.1731,1.272,8.7964,3.5414,9.2645,12.8583,14.4898,15.894,4.0356,6.4597,5.7263,6.2884,3.9104,4.1657,3.202,11.7367,2.8704,-1.7354,41.0909,10.0676,-2.5246,5.4644,7.4987,12.6103,12.4785,0.4783,-5.2411,7.9304,12.1119,-0.1981,9.7289,6.8875,-5.0241,-5.1362,16.2673,6.5711,5.3739,2.4883,6.7125,4.8339,11.9066,10.5039,9.3287,1.8299,11.8195,-2.6244,3.8658,-4.3155,16.1019,3.2515,6.0872,15.7237,17.1726,-11.6236,13.4329,-0.3809,25.294,7.5328,17.7056,5.4026,6.2048,19.2789,3.5933,23.9582,2.4143,-2.9056,5.5566,5.157,1.0791,-5.2347,22.6354,-0.0001,14.1219,14.3546,2.8016,10.779,-2.57,3.1252,-6.9395,13.7118,3.2319,4.8317,-2.3609,-10.7237,6.833,0.9565,17.2911,1.7357,5.1976,4.8373,1.7556,11.0084,18.7561,-0.6506,-2.0771,10.6484,20.4937,-2.4981
5046,train_138179,1,9.8485,-0.7884,12.1896,8.9858,11.9194,-14.8527,6.0571,16.2003,5.5767,8.7277,0.9658,-12.2154,13.934,11.928,4.8543,14.2752,11.1435,-7.4627,11.681,25.4261,25.7691,22.4857,4.1029,3.4745,12.3614,14.0323,11.1886,-2.9525,6.5478,0.7201,-11.9535,10.8357,-0.6388,14.6646,10.8762,8.7063,8.5519,6.1109,14.3119,8.8851,-15.2695,13.2119,11.7721,11.3798,9.8293,-12.2583,9.8715,-17.7773,22.4313,22.4494,13.1501,17.1783,0.4875,6.6579,5.7704,16.8117,19.5838,6.0221,0.3114,8.1948,9.8983,-19.7468,0.5933,-3.2023,4.9432,-1.696,5.4379,0.5261,5.0189,-1.7481,22.3265,0.5462,2.1665,18.928,37.0709,10.0538,6.5298,24.1485,6.7379,16.561,20.2177,13.1715,14.2932,10.8245,-13.7896,16.5193,4.7796,19.9865,5.8611,4.8611,-25.6038,7.1146,18.4685,10.2136,8.1837,-0.6556,10.5909,25.3144,1.2317,-4.547,-15.8986,12.1479,16.5475,1.5558,9.4734,2.7289,7.4516,23.6273,14.0218,11.2572,8.6164,7.1112,3.387,4.5346,2.9935,3.4537,4.5943,36.9857,10.9801,3.4561,33.1245,12.9772,9.8419,1.8981,2.6344,12.2307,12.6652,4.5945,-1.2439,20.3053,11.6854,1.278,6.4206,6.282,-1.6069,-5.3096,19.0005,12.6455,-1.793,14.2085,8.103,11.6197,15.5651,16.8175,9.2685,9.2192,14.9022,5.2985,4.1129,-10.5423,17.1412,3.7795,6.9364,16.5954,8.9498,1.3133,13.8111,9.0241,11.0162,10.8454,17.6249,5.5787,7.9617,13.0443,-1.2328,13.8064,3.4972,-10.4181,6.2649,5.9172,-5.9067,2.4815,11.0587,10.2923,30.7718,12.3032,6.5828,12.0505,2.3116,3.7677,2.697,8.2047,-7.5666,17.0457,7.7008,-2.2498,10.3743,-19.987,13.7452,3.2984,13.3366,6.473,1.1917,4.2972,16.3178,0.443,-4.4059,9.2034,15.2895,-12.829
5047,train_138180,1,9.9133,-8.0963,11.0011,5.1173,12.6007,3.6273,5.2786,23.885,-0.107,6.4108,-7.7269,-11.4912,14.1876,19.2388,10.041,14.0412,8.724,-1.1661,26.8116,18.686,21.4138,20.9237,5.4773,3.2242,5.5609,13.3759,-2.7238,-1.4686,6.0454,1.6012,-20.0613,10.6356,-0.3363,17.4982,12.4347,-7.9208,3.0384,5.367,11.9901,-6.2633,1.4411,5.7069,11.0877,11.4056,15.4457,-44.3787,11.7456,-8.8451,4.13,33.2985,13.7647,20.5913,-4.9946,4.9039,8.2894,17.0257,18.5807,6.563,0.7658,9.3126,20.6722,-4.4725,-2.5639,4.3784,3.96,-5.5472,4.7824,13.1602,5.0188,2.9221,37.5917,0.2279,0.8413,18.7917,23.379,22.0053,6.4766,15.6,7.0219,13.5318,13.3172,19.3246,-7.8489,9.2055,2.0208,19.0504,16.7733,10.6683,10.9479,3.2987,-30.5853,7.3185,9.631,10.163,6.2411,-0.2393,16.0363,32.9874,1.3868,-0.1277,-15.6477,18.5088,29.2052,1.7386,12.7921,2.9834,10.6701,10.5293,13.9188,14.6732,5.5146,5.0484,1.5886,10.4751,2.5515,-0.1935,2.9492,-10.7786,12.8555,5.9143,28.9368,12.1757,7.4884,-7.0672,5.7766,12.5501,12.9448,-5.8323,-2.5175,17.9459,13.051,0.1629,9.9172,6.9895,1.3866,-4.582,25.6948,-1.6134,2.4543,4.8995,1.6589,6.3819,6.6528,10.1915,7.6099,0.6882,5.7765,-2.7741,3.7437,-15.2172,13.7577,13.2335,7.3891,19.3942,6.2217,-7.1372,12.2845,-9.3024,20.7729,8.1553,23.3803,5.6287,4.9319,13.5452,2.0791,26.2421,2.7474,-7.2702,6.2677,5.66,-4.4111,8.2269,16.2037,4.9958,33.6966,12.6906,-7.8845,12.9296,-3.2124,2.5401,-2.4838,9.2454,2.9399,9.8684,-0.4418,-7.586,10.759,-20.2707,10.9809,1.3371,9.7209,2.8653,1.9149,3.2674,21.1381,1.849,9.404,8.0958,11.198,-18.0711
5048,train_138181,1,6.0738,-5.9791,9.9174,8.5375,8.7678,5.407,6.7435,18.9025,4.5527,6.2517,7.5,3.885,14.2615,13.0207,10.2158,14.1691,8.4465,-7.8941,20.7722,2.659,4.3707,12.6061,2.3733,3.7702,13.6143,13.5216,-4.5869,0.8029,5.889,1.1631,-16.1081,8.2892,1.3226,16.7937,10.4845,6.4391,9.4199,8.1173,5.4139,6.8521,-15.1994,5.987,10.7124,12.0058,5.6614,-26.439,12.8412,-8.8369,19.5871,17.5941,12.7577,5.5406,-6.6227,5.1051,3.969,9.0211,12.9142,6.4745,2.8083,8.6132,10.7941,-2.4694,0.6771,-0.2122,7.7627,-0.0367,6.3691,8.9387,5.03,0.0963,24.8548,0.5058,7.0754,9.6576,9.5294,14.5371,3.1285,17.2146,3.5491,14.6119,14.9891,13.3372,-0.9038,1.3593,-12.108,12.8435,-5.0995,15.3378,14.2459,0.8284,-12.9388,7.2855,19.6442,10.3441,7.8603,0.2731,27.1399,26.5052,2.6728,-0.1599,7.1505,15.2265,15.4508,1.5292,10.7977,5.8193,6.7847,2.2071,14.1569,18.6377,1.2953,6.3209,2.1691,2.4326,3.8534,4.1901,2.9072,21.3233,-2.8448,-3.1918,14.4434,8.8231,-6.5819,6.7204,2.5453,12.4662,12.2122,-2.2781,-3.5669,22.6007,12.4688,0.8642,8.7776,6.2514,-17.9072,4.9945,22.5896,22.6933,10.8724,6.8316,3.8007,4.4444,5.5337,18.878,7.1546,10.5227,13.403,-0.1891,3.5689,8.9269,18.9913,5.5559,7.3156,14.5919,-0.2916,5.9912,12.7827,-2.3613,14.6582,6.4776,12.6658,5.5894,6.9241,12.4401,1.6339,12.4761,2.6665,9.2086,9.391,5.8586,-0.8148,2.1528,22.0641,-1.759,23.784,12.6648,-3.1815,13.1224,-10.279,6.1751,-4.8754,9.2533,14.6193,10.0352,33.2745,-11.2374,8.6692,-9.9378,12.3799,-0.281,9.5305,7.2804,3.944,-1.1461,14.9557,-0.1506,-0.8868,8.2236,12.9823,-10.3276
5049,train_138183,1,14.0157,-0.9616,12.9761,9.0387,12.0919,-4.3812,5.2316,17.2407,-2.7353,7.9674,0.706,0.2961,13.7951,4.7297,9.3674,14.6071,8.9067,-8.3295,8.5456,7.906,17.442,12.7282,9.8328,2.5893,9.2591,13.4384,4.3145,-1.6838,3.9913,7.209,4.5736,6.9106,-1.3851,10.473,11.4216,-0.361,6.6381,7.6371,14.9984,-0.1727,-13.4456,16.778,11.2177,12.0227,7.2859,5.3933,9.3822,6.3431,8.2757,27.4582,12.8401,16.373,-7.4884,7.0339,-7.3386,19.9519,20.387,7.0045,8.2369,8.3925,11.0732,-25.029,0.4804,-0.9832,8.0327,6.313,5.2482,8.0198,5.0152,-5.8581,3.3233,0.8368,5.9963,23.3416,22.8497,9.372,0.4468,19.4473,2.4572,12.6744,8.2781,12.8098,-12.1226,4.717,-7.8477,24.769,13.1359,8.6316,9.0225,2.1701,11.9166,7.0733,17.6442,10.4254,11.4419,-0.5515,9.8066,7.6152,1.9671,3.9282,-8.5461,13.2605,27.2739,1.7225,12.5937,5.3872,6.8881,19.5185,14.3022,15.6579,9.9861,4.648,4.1815,7.7249,2.3637,1.6579,0.4661,19.2402,4.1081,-2.9176,50.4388,9.3204,11.2703,7.0547,5.025,12.4612,11.5354,1.0337,3.2611,11.2559,12.0405,0.3793,7.5487,6.9508,-14.0127,-6.1901,23.4779,20.2502,-0.7392,11.4919,9.5942,-1.6344,6.9623,12.521,7.8801,8.8106,9.0984,1.1238,3.7739,-3.2144,18.5611,8.5768,9.396,15.8774,11.6206,-3.4368,13.212,1.2693,13.4653,-0.5054,26.9062,5.9076,4.9987,11.5833,-0.1486,20.3246,2.9723,-0.3088,6.261,5.4393,3.7207,-7.4026,15.9901,-1.8579,36.8041,13.1289,0.1484,11.6926,6.4981,3.0372,-6.4274,9.9315,2.8975,17.5678,5.5285,-2.0885,6.4574,-3.2827,15.0283,-0.002,-7.9094,5.207,1.0548,6.3427,19.4271,-0.6792,-7.4778,9.1883,16.2649,-1.4593


In [400]:
target = train['target']

In [383]:
param = {
    'bagging_freq': 5,
    'bagging_fraction': 0.335,
    'boost_from_average': False,
    'boost': 'gbdt',
    'feature_fraction_seed': 47,
    'feature_fraction': 0.041,
    'learning_rate': 0.01,
    'max_depth': -1,
    'metric':'auc',
    'min_data_in_leaf': 80,
    'min_sum_hessian_in_leaf': 10.0,
    'num_leaves': 13,
    'num_threads': 8,
    'tree_learner': 'serial',
    'objective': 'binary', 
    'verbosity': -1,
    'num_threads': 8
}

In [505]:
folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

oof_lgb = np.zeros(len(train))
# predictions_lgb = np.zeros(len(test))
feature_importance = pd.DataFrame()

train_columns = [c for c in train.columns if c not in ['ID_code', 'target']]

for fold_, (trn_idx, val_idx) in enumerate(folds.split(train, target.values)):
    break
    print("fold n°{}".format(fold_))
    trn_data = lgb.Dataset(train.iloc[trn_idx][train_columns], label=target.iloc[trn_idx])
    val_data = lgb.Dataset(train.iloc[val_idx][train_columns], label=target.iloc[val_idx])

    num_round = 10000
    clf = lgb.train(param, trn_data, num_round, valid_sets = [trn_data, val_data], verbose_eval=400, early_stopping_rounds = 200)
    oof_lgb[val_idx] = clf.predict(train.iloc[val_idx][train_columns], num_iteration=clf.best_iteration)
#     predictions_lgb += clf.predict(test[train_columns], num_iteration=clf.best_iteration) / folds.n_splits

    fold_importance = pd.DataFrame()
    fold_importance["Feature"] = train_columns
    fold_importance["importance"] = clf.feature_importance()
    fold_importance["fold"] = fold_ + 1
    feature_importance = pd.concat([feature_importance, fold_importance], axis=0)
    
    print("CV score: {:<8.5f}".format(roc_auc_score(target.values[val_idx], oof_lgb[val_idx])))
    
    break
    
# print("CV score: {:<8.5f}".format(roc_auc_score(target.values, oof_lgb)))

In [514]:
target = train['target']

In [515]:
train_index = trn_idx.tolist()
validation_index = val_idx.tolist()

In [516]:
validation_index.extend(train.index[-50:].tolist())

In [517]:
len(train_index)

4000

In [518]:
len(validation_index)

1050

In [519]:
trn_data = lgb.Dataset(train.iloc[train_index][train_columns], label=target.iloc[train_index])
val_data = lgb.Dataset(train.iloc[validation_index][train_columns], label=target.iloc[validation_index])

num_round = 10000
clf = lgb.train(param, trn_data, num_round, valid_sets = [trn_data, val_data], verbose_eval=400, early_stopping_rounds = 200)
oof_lgb[validation_index] = clf.predict(train.iloc[validation_index][train_columns], num_iteration=clf.best_iteration)

fold_importance = pd.DataFrame()
fold_importance["Feature"] = train_columns
fold_importance["importance"] = clf.feature_importance()
fold_importance["fold"] = fold_ + 1
feature_importance = pd.concat([feature_importance, fold_importance], axis=0)

print("CV score: {:<8.5f}".format(roc_auc_score(target.values[validation_index], oof_lgb[validation_index])))

fold n°0
Training until validation scores don't improve for 200 rounds.
[400]	training's auc: 0.952282	valid_1's auc: 0.595887
[800]	training's auc: 0.978332	valid_1's auc: 0.608262
Early stopping, best iteration is:
[608]	training's auc: 0.969965	valid_1's auc: 0.609156
CV score: 0.60916 
