In [1]:
# https://www.kaggle.com/yekenot/2-level-stacker

In [2]:
import pandas as pd
import numpy as np

from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.metrics import roc_auc_score

from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from rgf.sklearn import RGFClassifier
from catboost import CatBoostClassifier

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.neural_network import MLPClassifier
from datetime import datetime
from numba import jit
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm
import time 
import warnings
import time

warnings.filterwarnings('ignore')

  return f(*args, **kwds)


In [3]:
# Read data
train = pd.read_csv('../data/train.csv')
test = pd.read_csv('../data/test.csv')

In [4]:
# Preprocessing (remove ps_calc cols, replace -1 with NaN, OHE categorical features)
id_test = test['id'].values
target_train = train['target'].values
target = train['target']
train = train.drop(['target','id'], axis = 1)
test = test.drop(['id'], axis = 1)

In [5]:
@jit
def eval_gini(y_true, y_prob):
    """
    Original author CPMP : https://www.kaggle.com/cpmpml
    In kernel : https://www.kaggle.com/cpmpml/extremely-fast-gini-computation
    """
    y_true = np.asarray(y_true)
    y_true = y_true[np.argsort(y_prob)]
    ntrue = 0
    gini = 0
    delta = 0
    n = len(y_true)
    for i in range(n-1, -1, -1):
        y_i = y_true[i]
        ntrue += y_i
        gini += y_i * delta
        delta += 1 - y_i
    gini = 1 - 2 * gini / (ntrue * (n - ntrue))
    return gini

def gini_xgb(preds, dtrain):
    labels = dtrain.get_label()
    gini_score = eval_gini(labels, preds)
    return [('gini', gini_score)]

def gini_lgb(preds, dtrain):
    y = list(dtrain.get_label())
    score = gini(y, preds) / gini(y, y)
    return 'gini', score, True


def add_noise(series, noise_level):
    return series * (1 + noise_level * np.random.randn(len(series)))


In [6]:
def target_encode(trn_series=None,
                  tst_series=None,
                  target=None,
                  min_samples_leaf=1,
                  smoothing=1,
                  noise_level=0):
    """
    Smoothing is computed like in the following paper by Daniele Micci-Barreca
    https://kaggle2.blob.core.windows.net/forum-message-attachments/225952/7441/high%20cardinality%20categoricals.pdf
    trn_series : training categorical feature as a pd.Series
    tst_series : test categorical feature as a pd.Series
    target : target data as a pd.Series
    min_samples_leaf (int) : minimum samples to take category average into account
    smoothing (int) : smoothing effect to balance categorical average vs prior
    """
    assert len(trn_series) == len(target)
    assert trn_series.name == tst_series.name
    temp = pd.concat([trn_series, target], axis=1)
    # Compute target mean
    averages = temp.groupby(by=trn_series.name)[target.name].agg(["mean", "count"])
    # Compute smoothing
    smoothing = 1 / (1 + np.exp(-(averages["count"] - min_samples_leaf) / smoothing))
    # Apply average function to all target data
    prior = target.mean()
    # The bigger the count the less full_avg is taken into account
    averages[target.name] = prior * (1 - smoothing) + averages["mean"] * smoothing
    averages.drop(["mean", "count"], axis=1, inplace=True)
    # Apply averages to trn and tst series
    ft_trn_series = pd.merge(
        trn_series.to_frame(trn_series.name),
        averages.reset_index().rename(columns={'index': target.name, target.name: 'average'}),
        on=trn_series.name,
        how='left')['average'].rename(trn_series.name + '_mean').fillna(prior)
    # pd.merge does not keep the index so restore it
    ft_trn_series.index = trn_series.index
    ft_tst_series = pd.merge(
        tst_series.to_frame(tst_series.name),
        averages.reset_index().rename(columns={'index': target.name, target.name: 'average'}),
        on=tst_series.name,
        how='left')['average'].rename(trn_series.name + '_mean').fillna(prior)
    # pd.merge does not keep the index so restore it
    ft_tst_series.index = tst_series.index
    return add_noise(ft_trn_series, noise_level), add_noise(ft_tst_series, noise_level)


In [7]:
train_features = [
    "ps_car_13",  #            : 1571.65 / shadow  609.23
    "ps_reg_03",  #            : 1408.42 / shadow  511.15
    "ps_ind_05_cat",  #        : 1387.87 / shadow   84.72
    "ps_ind_03",  #            : 1219.47 / shadow  230.55
    "ps_ind_15",  #            :  922.18 / shadow  242.00
    "ps_reg_02",  #            :  920.65 / shadow  267.50
    "ps_car_14",  #            :  798.48 / shadow  549.58
    "ps_car_12",  #            :  731.93 / shadow  293.62
    "ps_car_01_cat",  #        :  698.07 / shadow  178.72
    "ps_car_07_cat",  #        :  694.53 / shadow   36.35
    "ps_ind_17_bin",  #        :  620.77 / shadow   23.15
    "ps_car_03_cat",  #        :  611.73 / shadow   50.67
    "ps_reg_01",  #            :  598.60 / shadow  178.57
    "ps_car_15",  #            :  593.35 / shadow  226.43
    "ps_ind_01",  #            :  547.32 / shadow  154.58
    "ps_ind_16_bin",  #        :  475.37 / shadow   34.17
    "ps_ind_07_bin",  #        :  435.28 / shadow   28.92
    "ps_car_06_cat",  #        :  398.02 / shadow  212.43
    "ps_car_04_cat",  #        :  376.87 / shadow   76.98
    "ps_ind_06_bin",  #        :  370.97 / shadow   36.13
    "ps_car_09_cat",  #        :  214.12 / shadow   81.38
    "ps_car_02_cat",  #        :  203.03 / shadow   26.67
    "ps_ind_02_cat",  #        :  189.47 / shadow   65.68
    "ps_car_11",  #            :  173.28 / shadow   76.45
    "ps_car_05_cat",  #        :  172.75 / shadow   62.92
    "ps_calc_09",  #           :  169.13 / shadow  129.72
    "ps_calc_05",  #           :  148.83 / shadow  120.68
    "ps_ind_08_bin",  #        :  140.73 / shadow   27.63
    "ps_car_08_cat",  #        :  120.87 / shadow   28.82
    "ps_ind_09_bin",  #        :  113.92 / shadow   27.05
    "ps_ind_04_cat",  #        :  107.27 / shadow   37.43
    "ps_ind_18_bin",  #        :   77.42 / shadow   25.97
    "ps_ind_12_bin",  #        :   39.67 / shadow   15.52
    "ps_ind_14",  #            :   37.37 / shadow   16.65
    "ps_car_11_cat" # Very nice spot from Tilii : https://www.kaggle.com/tilii7
]
# add combinations
combs = [
    ('ps_reg_01', 'ps_car_02_cat'),  
    ('ps_reg_01', 'ps_car_04_cat'),
]

In [8]:
f_ind_reg = [
    'ps_car_13',
    'ps_reg_03'
]

# transformations = ['sq', 'sqrt', 'exp', 'div_sqrt', 'cbrt', 'pow_3', 'pow_5', 'sin', 'log']

transformations = ['sin']

In [9]:
def trans(t, x):
    if (x == -1) | (float(x) == -1.0):
        return -1
    if float(x) == 0.0:
        x = float(x) + 0.001 # increment x by delta
    if t == 'sq':
        return x * x
    elif t == 'sqrt':
        return math.sqrt(x)
    elif t == 'exp':
        return math.exp(x)
    elif t == 'div_sqrt':
        return 1./math.sqrt(x)
    elif t == 'cbrt':
        return x ** (1./3)
    elif t == 'pow_3':
        return 3 ** x
    elif t == 'pow_5':
        return 5 ** x
    elif t == 'pow_5':
        return 5 ** x
    elif t == 'sin':
        return math.sin(x)
    elif t == 'log':
        return math.log(x)

In [10]:
start = time.time()
for n_c, (f1, f2) in enumerate(combs):
    name1 = f1 + "_plus_" + f2
    print('current feature %60s %4d in %5.1f'
          % (name1, n_c + 1, (time.time() - start) / 60), end='')
    print('\r' * 75, end='')
    train[name1] = train[f1].apply(lambda x: str(x)) + "_" + train[f2].apply(lambda x: str(x))
    test[name1] = test[f1].apply(lambda x: str(x)) + "_" + test[f2].apply(lambda x: str(x))
    # Label Encode
    lbl = LabelEncoder()
    lbl.fit(list(train[name1].values) + list(test[name1].values))
    train[name1] = lbl.transform(list(train[name1].values))
    test[name1] = lbl.transform(list(test[name1].values))

    train_features.append(name1)

current feature                                 ps_reg_01_plus_ps_car_04_cat    2 in   0.0

In [11]:
train['ps_car_13_x_ps_reg_03'] = train['ps_car_13'] * train['ps_reg_03']
test['ps_car_13_x_ps_reg_03'] = test['ps_car_13'] * test['ps_reg_03']
train_features.append('ps_car_13_x_ps_reg_03')

In [12]:
f_calc_counts = ['ps_car_14','ps_car_13','ps_reg_03', 'ps_ind_03']
f_calc_cats = ['ps_car_01_cat', 'ps_ind_05_cat']

In [13]:
for col in f_calc_counts:
    for f in f_calc_cats:

        new_col1 = '{}_{}_mean'.format(col, f) 
        new_col2 = '{}_{}_median'.format(col, f) 
        new_col3 = '{}_{}_skew'.format(col, f) 
        new_col4 = '{}_{}_kurtosis'.format(col, f) 
        train[new_col1] = 0
        train[new_col2] = 0
        train[new_col3] = 0
        train[new_col4] = 0
        
        test[new_col1] = 0
        test[new_col2] = 0
        test[new_col3] = 0
        test[new_col4] = 0

In [14]:
for col in tqdm(f_calc_counts):
    for f in f_calc_cats:
        new_col1 = '{}_{}_mean'.format(col, f) 
        new_col2 = '{}_{}_median'.format(col, f) 
        new_col3 = '{}_{}_skew'.format(col, f) 
        new_col4 = '{}_{}_kurtosis'.format(col, f) 
        unique_f = np.unique(train[f].values)
        for val in unique_f:
            if val == -1:
                continue
            data1 = train[col][train[f] == val]
            mean1 = data1.mean()
            median1 = data1.median()
            skew1 = data1.skew()
            kurtosis1 = data1.kurtosis()
            train[new_col1][train[f] == val] = mean1
            train[new_col2][train[f] == val] = median1
            train[new_col3][train[f] == val] = skew1
            train[new_col4][train[f] == val] = kurtosis1
            
            data1 = test[col][test[f] == val]
            mean1 = data1.mean()
            median1 = data1.median()
            skew1 = data1.skew()
            kurtosis1 = data1.kurtosis()
            test[new_col1][test[f] == val] = mean1
            test[new_col2][test[f] == val] = median1
            test[new_col3][test[f] == val] = skew1
            test[new_col4][test[f] == val] = kurtosis1
            
            data2 = train[col][train[f] == val]
            mean2 = data2.mean()
            median2 = data2.median()
            skew2 = data2.skew()
            kurtosis2 = data1.kurtosis()
            train[new_col1][train[f] == val] = mean2
            train[new_col2][train[f] == val] = median2
            train[new_col3][train[f] == val] = skew2
            train[new_col4][train[f] == val] = kurtosis2
            
            data2 = test[col][test[f] == val]
            mean2 = data2.mean()
            median2 = data2.median()
            skew2 = data2.skew()
            kurtosis2 = data1.kurtosis()
            test[new_col1][test[f] == val] = mean2
            test[new_col2][test[f] == val] = median2
            test[new_col3][test[f] == val] = skew2
            test[new_col4][test[f] == val] = kurtosis2
        
        train_features.append(new_col1)
        train_features.append(new_col2)
        train_features.append(new_col3)
        train_features.append(new_col4)

100%|██████████| 4/4 [00:59<00:00, 14.76s/it]


In [15]:
train = train[train_features]
test = test[train_features]


In [16]:
f_cats = [f for f in train.columns if "_cat" in f]

In [17]:
for f in f_cats:
    train[f + "_avg"], test[f + "_avg"] = target_encode(trn_series=train[f],
                                         tst_series=test[f],
                                         target=target,
                                         min_samples_leaf=200,
                                         smoothing=10,
                                         noise_level=0)

In [18]:
# col_to_drop = train.columns[train.columns.str.startswith('ps_calc_')]
# train = train.drop(col_to_drop, axis=1)  
# test = test.drop(col_to_drop, axis=1)  

train = train.replace(-1, np.nan)
test = test.replace(-1, np.nan)

# cat_features = [a for a in train.columns if a.endswith('cat')]

# for column in cat_features:
#     temp = pd.get_dummies(pd.Series(train[column]))
#     train = pd.concat([train,temp],axis=1)
#     train = train.drop([column],axis=1)
    
# for column in cat_features:
#     temp = pd.get_dummies(pd.Series(test[column]))
#     test = pd.concat([test,temp],axis=1)
#     test = test.drop([column],axis=1)

# print(train.values.shape, test.values.shape)

In [19]:
final_score = 0.0
class Ensemble(object):    
    def __init__(self, mode, n_splits, stacker_2, stacker_1, base_models):
        self.mode = mode
        self.n_splits = n_splits
        self.stacker_2 = stacker_2
        self.stacker_1 = stacker_1
        self.base_models = base_models

    def fit_predict(self, X, y, T):
        X = np.array(X)
        y = np.array(y)
        T = np.array(T)


        folds = list(StratifiedKFold(n_splits=self.n_splits, shuffle=True, 
                                                             random_state=2016).split(X, y))
        
        OOF_columns = []

        S_train = np.zeros((X.shape[0], len(self.base_models)))
        S_test = np.zeros((T.shape[0], len(self.base_models)))
        
        for i, clf in enumerate(self.base_models):

            S_test_i = np.zeros((T.shape[0], self.n_splits))

            for j, (train_idx, test_idx) in enumerate(folds):                
                X_train = X[train_idx]
                y_train = y[train_idx]
                X_holdout = X[test_idx]

                print ("Fit %s_%d fold %d" % (str(clf).split("(")[0], i+1, j+1))
                clf.fit(X_train, y_train)

                S_train[test_idx, i] = clf.predict_proba(X_holdout)[:,1]  
                S_test_i[:, j] = clf.predict_proba(T)[:,1]                
            S_test[:, i] = S_test_i.mean(axis=1)
            
            print("  Base model_%d score: %.5f\n" % (i+1, roc_auc_score(y, S_train[:,i])))
            print("  Base model_%d gini score: %.5f\n" % (i+1, eval_gini(y, S_train[:,i])))
        
            OOF_columns.append('Base model_'+str(i+1))
        OOF_S_train = pd.DataFrame(S_train, columns = OOF_columns)
        print('\n')
        print('Correlation between out-of-fold predictions from Base models:')
        print('\n')
        print(OOF_S_train.corr())
        print('\n')
            
        
        if self.mode==1:
            
            folds_2 = list(StratifiedKFold(n_splits=self.n_splits, shuffle=True,
                                                                   random_state=2016).split(S_train, y))
            
            OOF_columns = []

            S_train_2 = np.zeros((S_train.shape[0], len(self.stacker_1)))
            S_test_2 = np.zeros((S_test.shape[0], len(self.stacker_1)))
            
            for i, clf in enumerate(self.stacker_1):
            
                S_test_i_2 = np.zeros((S_test.shape[0], self.n_splits))

                for j, (train_idx, test_idx) in enumerate(folds_2):
                    X_train_2 = S_train[train_idx]
                    y_train_2 = y[train_idx]
                    X_holdout_2 = S_train[test_idx]

                    print ("Fit %s_%d fold %d" % (str(clf).split("(")[0], i+1, j+1))
                    clf.fit(X_train_2, y_train_2)
                                 
                    S_train_2[test_idx, i] = clf.predict_proba(X_holdout_2)[:,1] 
                    S_test_i_2[:, j] = clf.predict_proba(S_test)[:,1]
                S_test_2[:, i] = S_test_i_2.mean(axis=1)
                
                print("  1st level model_%d score: %.5f\n"%(i+1,
                                                            roc_auc_score(y, S_train_2.mean(axis=1))))
                
                print("  1st level model_%d gini score: %.5f\n"%(i+1,
                                                            eval_gini(y, S_train_2.mean(axis=1))))
                
                OOF_columns.append('1st level model_'+str(i+1))
            OOF_S_train = pd.DataFrame(S_train_2, columns = OOF_columns)
            print('\n')
            print('Correlation between out-of-fold predictions from 1st level models:')
            print('\n')
            print(OOF_S_train.corr())
            print('\n')


        if self.mode==2:
            
            WOC_columns = []
        
            S_train_2 = np.zeros((S_train.shape[0], len(self.stacker_1)))
            S_test_2 = np.zeros((S_test.shape[0], len(self.stacker_1)))
               
            for i, clf in enumerate(self.stacker_1):
            
                S_train_i_2= np.zeros((S_train.shape[0], S_train.shape[1]))
                S_test_i_2 = np.zeros((S_test.shape[0], S_train.shape[1]))
                                       
                for j in range(S_train.shape[1]):
                                
                    S_tr = S_train[:,np.arange(S_train.shape[1])!=j]
                    S_te = S_test[:,np.arange(S_test.shape[1])!=j]
                                               
                    print ("Fit %s_%d subset %d" % (str(clf).split("(")[0], i+1, j+1))
                    clf.fit(S_tr, y)

                    S_train_i_2[:, j] = clf.predict_proba(S_tr)[:,1]                
                    S_test_i_2[:, j] = clf.predict_proba(S_te)[:,1]
                S_train_2[:, i] = S_train_i_2.mean(axis=1)    
                S_test_2[:, i] = S_test_i_2.mean(axis=1)
            
                print("  1st level model_%d score: %.5f\n"%(i+1,roc_auc_score(y, S_train_2.mean(axis=1))))
                print("  1st level model_%d gini score: %.5f\n"%(i+1,eval_gini(y, S_train_2.mean(axis=1))))
                
                
                WOC_columns.append('1st level model_'+str(i+1))
            WOC_S_train = pd.DataFrame(S_train_2, columns = WOC_columns)
            print('\n')
            print('Correlation between without-one-column predictions from 1st level models:')
            print('\n')
            print(WOC_S_train.corr())
            print('\n')
            
            
        try:
            num_models = len(self.stacker_2)
            if self.stacker_2==(et_model):
                num_models=1
        except TypeError:
            num_models = len([self.stacker_2])
            
        if num_models==1:
                
            print ("Fit %s for final\n" % (str(self.stacker_2).split("(")[0]))
            self.stacker_2.fit(S_train_2, y)
            
            stack_res = self.stacker_2.predict_proba(S_test_2)[:,1]
        
            stack_score = self.stacker_2.predict_proba(S_train_2)[:,1]
            print("2nd level model final score: %.5f" % (roc_auc_score(y, stack_score)))
            print("2nd level model final gini score: %.5f" % (eval_gini(y, stack_score)))
            final_score = eval_gini(y, stack_score.mean(axis=1))    
        else:
            
            F_columns = []
            
            stack_score = np.zeros((S_train_2.shape[0], len(self.stacker_2)))
            res = np.zeros((S_test_2.shape[0], len(self.stacker_2)))
            
            for i, clf in enumerate(self.stacker_2):
                
                print ("Fit %s_%d" % (str(clf).split("(")[0], i+1))
                clf.fit(S_train_2, y)
                
                stack_score[:, i] = clf.predict_proba(S_train_2)[:,1]
                print("  2nd level model_%d score: %.5f\n"%(i+1,roc_auc_score(y, stack_score[:, i])))
                
                res[:, i] = clf.predict_proba(S_test_2)[:,1]
                
                F_columns.append('2nd level model_'+str(i+1))
            F_S_train = pd.DataFrame(stack_score, columns = F_columns)
            print('\n')
            print('Correlation between final predictions from 2nd level models:')
            print('\n')
            print(F_S_train.corr())
            print('\n')
        
            stack_res = res.mean(axis=1)            
            print("2nd level models final score: %.5f" % (roc_auc_score(y, stack_score.mean(axis=1))))
            print("2nd level models final gini score: %.5f" % (eval_gini(y, stack_score.mean(axis=1))))
            final_score = eval_gini(y, stack_score.mean(axis=1))
        return stack_res

In [20]:
lgb_params_1 = {
    'learning_rate': 0.02,
    'n_estimators': 475,
    'subsample': 0.4,
    'subsample_freq': 1,
    'colsample_bytree': 0.9,  
    'num_leaves': 28,
    'max_bin': 10,
    'min_child_samples': 700,
    'seed': 99
}


lgb_params_2 = {
    'learning_rate': 0.02,
    'n_estimators': 1200,
    'subsample': 0.7,
    'subsample_freq': 2,
    'colsample_bytree': 0.3,  
    'num_leaves': 16,
    'seed': 99
}

lgb_params_3 = {
    'learning_rate': 0.02,
    'n_estimators': 750,
    'subsample': 0.8,
    'subsample_freq': 10,
    'colsample_bytree': 0.8,
    'max_bin': 10,
    'min_child_samples': 500,
    'seed': 99
}

lgb_params_4 = {
    'learning_rate': 0.025,
    'n_estimators': 550,
    'subsample': 0.7,
    'subsample_freq': 1,
    'colsample_bytree': 0.9,  
    'num_leaves': 28,
    'max_bin': 10,
    'min_child_samples': 700,
    'seed': 99
}

lgb_params_5 = {
    'learning_rate': 0.03,
    'n_estimators': 550,
    'subsample': 0.4,
    'subsample_freq': 1,
    'colsample_bytree': 0.9,  
    'num_leaves': 28,
    'max_bin': 10,
    'min_child_samples': 700,
    'seed': 99
}

lgb_params_6 = {
    'learning_rate': 0.035,
    'n_estimators': 550,
    'subsample': 0.4,
    'subsample_freq': 1,
    'colsample_bytree': 0.9,  
    'num_leaves': 28,
    'max_bin': 10,
    'min_child_samples': 700,
    'seed': 99
}

lgb_params_7 = {
    'learning_rate': 0.04,
    'n_estimators': 550,
    'subsample': 0.4,
    'subsample_freq': 1,
    'colsample_bytree': 0.9,  
    'num_leaves': 28,
    'max_bin': 10,
    'min_child_samples': 700,
    'seed': 99
}

lgb_params_8 = {
    'learning_rate': 0.03,
    'n_estimators': 550,
    'subsample': 0.4,
    'subsample_freq': 1,
    'colsample_bytree': 0.9,  
    'num_leaves': 25,
    'max_bin': 10,
    'min_child_samples': 700,
    'seed': 99
}

lgb_params_9 = {
    'learning_rate': 0.03,
    'n_estimators': 550,
    'subsample': 0.4,
    'subsample_freq': 1,
    'colsample_bytree': 0.9,  
    'num_leaves': 25,
    'max_bin': 10,
    'min_child_samples': 800,
    'seed': 99
}

lgb_params_10 = {
    'learning_rate': 0.03,
    'n_estimators': 550,
    'subsample': 0.4,
    'subsample_freq': 1,
    'colsample_bytree': 0.9,  
    'num_leaves': 30,
    'max_bin': 15,
    'min_child_samples': 800,
    'seed': 99
}


xgb_param_1 = {
    'learning_rate': 0.1,
    'objective':"binary:logistic",
    'n_estimators':200,
    'max_depth':4,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'scale_pos_weight':1.52632,
    'gamma':1,
    'reg_alpha':0,
    'reg_lambda':1,
    'seed':99
}

         
xgb_param_2 = {
    'learning_rate': 0.07,
    'objective':"binary:logistic",
    'n_estimators':200,
    'max_depth':4,
    'min_child_weight':6,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'scale_pos_weight':1.6,
    'gamma':10,
    'reg_alpha':8,
    'reg_lambda':1.3,
    'seed':99
}

xgb_param_3 = {
    'learning_rate': 0.1,
    'objective':"binary:logistic",
    'n_estimators':200,
    'max_depth':4,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'scale_pos_weight':26.43,
    'gamma':1,
    'reg_alpha':0,
    'reg_lambda':1,
    'seed':99
}

xgb_param_4 = {
    'learning_rate': 0.07,
    'objective':"binary:logistic",
    'n_estimators':200,
    'max_depth':4,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'scale_pos_weight':1.52632,
    'gamma':1,
    'reg_alpha':0,
    'reg_lambda':1,
    'seed':99
}

xgb_param_5 = {
    'learning_rate': 0.07,
    'objective':"binary:logistic",
    'n_estimators':200,
    'max_depth':4,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'scale_pos_weight':2.0,
    'gamma':1,
    'reg_alpha':0,
    'reg_lambda':1,
    'seed':99
}

xgb_param_6 = {
    'learning_rate': 0.07,
    'objective':"binary:logistic",
    'n_estimators':200,
    'max_depth':4,
    'min_child_weight':6,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'scale_pos_weight':2.0,
    'gamma':10,
    'reg_alpha':8,
    'reg_lambda':1.3,
    'seed':99
}

xgb_param_7 = {
    'learning_rate': 0.07,
    'objective':"binary:logistic",
    'n_estimators':200,
    'max_depth':4,
    'min_child_weight':6,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'scale_pos_weight':1.53,
    'gamma':10,
    'reg_alpha':8,
    'reg_lambda':1.3,
    'seed':99
}

xgb_param_8 = {
    'learning_rate': 0.07,
    'objective':"binary:logistic",
    'n_estimators':200,
    'max_depth':4,
    'min_child_weight':6,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'scale_pos_weight':26.43,
    'gamma':10,
    'reg_alpha':8,
    'reg_lambda':1.3,
    'seed':99
}

xgb_param_9 = {
    'learning_rate': 0.08,
    'objective':"binary:logistic",
    'n_estimators':200,
    'max_depth':4,
    'min_child_weight':6,
    'subsample': 0.8,
    'colsample_bytree': 1.0,
    'scale_pos_weight':1.52632,
    'gamma':10,
    'reg_alpha':8,
    'reg_lambda':1.3,
    'seed':99
}

xgb_param_10 = {
    'learning_rate': 0.09,
    'objective':"binary:logistic",
    'n_estimators':200,
    'max_depth':4,
    'min_child_weight':6,
    'subsample': 0.8,
    'colsample_bytree': 1.0,
    'scale_pos_weight':1.52632,
    'gamma':10,
    'reg_alpha':8,
    'reg_lambda':1.3,
    'seed':99
}

xgb_param_11 = {
    'learning_rate': 0.1,
    'objective':"binary:logistic",
    'n_estimators':250,
    'max_depth':4,
    'subsample': 0.9,
    'colsample_bytree': 1,
    'scale_pos_weight':1.52632,
    'gamma':1,
    'reg_alpha':0,
    'reg_lambda':1,
    'seed':99
}


xgb_param_12 = {
    'learning_rate': 0.1,
    'objective':"binary:logistic",
    'n_estimators':250,
    'max_depth':4,
    'subsample': 1,
    'colsample_bytree': 1,
    'scale_pos_weight':1.52632,
    'gamma':1,
    'reg_alpha':0,
    'reg_lambda':1,
    'seed':99
}


xgb_param_13 = {
    'learning_rate': 0.09,
    'objective':"binary:logistic",
    'n_estimators':250,
    'max_depth':6,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'scale_pos_weight':1.52632,
    'gamma':1,
    'reg_alpha':0,
    'reg_lambda':1,
    'seed':99
}

xgb_param_14 = {
    'learning_rate': 0.07,
    'objective':"binary:logistic",
    'n_estimators':250,
    'max_depth':6,
    'subsample': 0.9,
    'colsample_bytree': 0.8,
    'scale_pos_weight':1.52632,
    'gamma':1,
    'reg_alpha':0,
    'reg_lambda':1,
    'seed':99
}

xgb_param_15 = {
    'learning_rate': 0.07,
    'objective':"binary:logistic",
    'n_estimators':250,
    'max_depth':6,
    'subsample': 0.9,
    'colsample_bytree': 0.9,
    'scale_pos_weight':1.52632,
    'gamma':1,
    'reg_alpha':0,
    'reg_lambda':1,
    'seed':99
}

xgb_param_15 = {
    'learning_rate': 0.07,
    'objective':"binary:logistic",
    'n_estimators':250,
    'max_depth':5,
    'subsample': 0.9,
    'colsample_bytree': 0.9,
    'scale_pos_weight':1.52632,
    'gamma':1,
    'reg_alpha':0,
    'reg_lambda':1,
    'seed':99
}

xgb_param_15 = {
    'learning_rate': 0.06,
    'objective':"binary:logistic",
    'n_estimators':250,
    'max_depth':5,
    'subsample': 0.9,
    'colsample_bytree': 0.9,
    'scale_pos_weight':1.52632,
    'gamma':1,
    'reg_alpha':0,
    'reg_lambda':1,
    'seed':99
}

rgf_param_1 = {
    'max_leaf':1000,
    'algorithm':"RGF",  
    'loss':"Log",
    'l2':0.01,
    'sl2':0.01,
    'normalize':False,
    'min_samples_leaf':10,
    'n_iter':None,
    'opt_interval':100,
    'learning_rate':.5,
    'calc_prob':"sigmoid",
    'n_jobs':-1,
    'memory_policy':"generous",
    'verbose':0
}

rgf_param_2 = {
    'max_leaf':1000,
    'algorithm':"RGF",  
    'loss':"Log",
    'l2':0.02,
    'sl2':0.01,
    'normalize':False,
    'min_samples_leaf':10,
    'n_iter':None,
    'opt_interval':100,
    'learning_rate':.5,
    'calc_prob':"sigmoid",
    'n_jobs':-1,
    'memory_policy':"generous",
    'verbose':0
}

rgf_param_3 = {
    'max_leaf':1000,
    'algorithm':"RGF",  
    'loss':"Log",
    'l2':0.01,
    'sl2':0.01,
    'normalize':False,
    'min_samples_leaf':10,
    'n_iter':None,
    'opt_interval':100,
    'learning_rate':.1,
    'calc_prob':"sigmoid",
    'n_jobs':-1,
    'memory_policy':"generous",
    'verbose':0
}

rgf_param_4 = {
    'max_leaf':1000,
    'algorithm':"RGF",  
    'loss':"Log",
    'l2':0.01,
    'sl2':0.01,
    'normalize':False,
    'min_samples_leaf':10,
    'n_iter':None,
    'opt_interval':100,
    'learning_rate':.4,
    'calc_prob':"sigmoid",
    'n_jobs':-1,
    'memory_policy':"generous",
    'verbose':0
}

rgf_param_5 = {
    'max_leaf':3000,
    'algorithm':"RGF",  
    'loss':"Log",
    'l2':0.01,
    'sl2':0.01,
    'normalize':False,
    'min_samples_leaf':10,
    'n_iter':None,
    'opt_interval':100,
    'learning_rate':0.09,
    'calc_prob':"sigmoid",
    'n_jobs':-1,
    'memory_policy':"generous",
    'verbose':0
}

rgf_param_6 = {
    'max_leaf':2000,
    'algorithm':"RGF",  
    'loss':"Log",
    'l2':0.01,
    'sl2':0.01,
    'normalize':False,
    'min_samples_leaf':10,
    'n_iter':None,
    'opt_interval':100,
    'learning_rate':0.07,
    'calc_prob':"sigmoid",
    'n_jobs':-1,
    'memory_policy':"generous",
    'verbose':0
}

rgf_param_7 = {
    'max_leaf':2000,
    'algorithm':"RGF",  
    'loss':"Log",
    'l2':0.01,
    'sl2':0.01,
    'normalize':False,
    'min_samples_leaf':5,
    'n_iter':None,
    'opt_interval':100,
    'learning_rate':0.07,
    'calc_prob':"sigmoid",
    'n_jobs':-1,
    'memory_policy':"generous",
    'verbose':0
}

rgf_param_8 = {
    'max_leaf':1000,
    'algorithm':"RGF",  
    'loss':"Log",
    'l2':0.01,
    'sl2':0.01,
    'normalize':False,
    'min_samples_leaf':5,
    'n_iter':None,
    'opt_interval':100,
    'learning_rate':0.08,
    'calc_prob':"sigmoid",
    'n_jobs':-1,
    'memory_policy':"generous",
    'verbose':0
}

rgf_param_9 = {
    'max_leaf':1000,
    'algorithm':"RGF",  
    'loss':"Log",
    'l2':0.01,
    'sl2':0.01,
    'normalize':False,
    'min_samples_leaf':5,
    'n_iter':None,
    'opt_interval':100,
    'learning_rate':0.08,
    'calc_prob':"sigmoid",
    'n_jobs':-1,
    'memory_policy':"generous",
    'verbose':0
}

rgf_param_10 = {
    'max_leaf':1000,
    'algorithm':"RGF",  
    'loss':"Log",
    'l2':0.01,
    'sl2':0.01,
    'normalize':False,
    'min_samples_leaf':15,
    'n_iter':None,
    'opt_interval':100,
    'learning_rate':0.5,
    'calc_prob':"sigmoid",
    'n_jobs':-1,
    'memory_policy':"generous",
    'verbose':0
}

rgf_param_11 = {
    'max_leaf':1000,
    'algorithm':"RGF",  
    'loss':"Log",
    'l2':0.01,
    'sl2':0.01,
    'normalize':False,
    'min_samples_leaf':20,
    'n_iter':None,
    'opt_interval':100,
    'learning_rate':0.5,
    'calc_prob':"sigmoid",
    'n_jobs':-1,
    'memory_policy':"generous",
    'verbose':0
}

rgf_param_12 = {
    'max_leaf':1000,
    'algorithm':"RGF",  
    'loss':"Log",
    'l2':0.01,
    'sl2':0.02,
    'normalize':False,
    'min_samples_leaf':20,
    'n_iter':None,
    'opt_interval':100,
    'learning_rate':0.5,
    'calc_prob':"sigmoid",
    'n_jobs':-1,
    'memory_policy':"generous",
    'verbose':0
}

rgf_param_13 = {
    'max_leaf':1000,
    'algorithm':"RGF",  
    'loss':"Log",
    'l2':0.02,
    'sl2':0.02,
    'normalize':False,
    'min_samples_leaf':20,
    'n_iter':None,
    'opt_interval':100,
    'learning_rate':0.5,
    'calc_prob':"sigmoid",
    'memory_policy':"generous",
    'verbose':0
}

rgf_param_14 = {
    'max_leaf':1000,
    'algorithm':"RGF",  
    'loss':"Log",
    'l2':0.02,
    'sl2':0.02,
    'normalize':False,
    'min_samples_leaf':20,
    'n_iter':None,
    'opt_interval':200,
    'learning_rate':0.5,
    'calc_prob':"sigmoid",
    'memory_policy':"generous",
    'verbose':0
}

rgf_param_15 = {
    'max_leaf':1000,
    'algorithm':"RGF",  
    'loss':"Log",
    'l2':0.02,
    'sl2':0.02,
    'normalize':False,
    'min_samples_leaf':5,
    'n_iter':4,
    'opt_interval':200,
    'learning_rate':0.5,
    'calc_prob':"sigmoid",
    'memory_policy':"generous",
    'verbose':0
}

cb_param_1 = {
    'iterations': 800,
    'depth':8,
    'rsm':0.95,
    'learning_rate':0.03,
    'l2_leaf_reg':3.5,  
    'border_count':8,
    'gradient_iterations':4,
    'od_type':'Iter',
    'od_wait':20,
    'random_seed':99,
    'verbose':0
}

cb_param_2 = {
    'iterations': 800,
    'depth':8,
    'rsm':0.95,
    'learning_rate':0.03,
    'l2_leaf_reg':4.0 , 
    'border_count':8,
    'gradient_iterations':4,
    'od_type':'Iter',
    'od_wait':20,
    'random_seed':99
}

cb_param_3 = {
    'iterations': 900,
    'depth':8,
    'rsm':0.95,
    'learning_rate':0.05,
    'l2_leaf_reg':3.5,  
    'border_count':8,
    'gradient_iterations':4,
    'od_type':'Iter',
    'od_wait':20,
    'random_seed':99
}

cb_param_4 = {
    'iterations': 900,
    'depth':8,
    'rsm':0.95,
    'learning_rate':0.07,
    'l2_leaf_reg':3.5,  
    'border_count':10,
    'gradient_iterations':4,
    'od_type':'Iter',
    'od_wait':20,
    'random_seed':99
}

cb_param_5 = {
    'iterations': 900,
    'depth':8,
    'rsm':0.95,
    'learning_rate':0.07,
    'l2_leaf_reg':3.5,  
    'border_count':10,
    'gradient_iterations':4,
    'od_type':'Iter',
    'od_wait':20,
    'random_seed':99
}

cb_param_6 = {
    'iterations': 700,
    'depth':8,
    'rsm':0.95,
    'learning_rate':0.06,
    'l2_leaf_reg':3.5, 
    'border_count':8,
    'gradient_iterations':4,
    'od_type':'Iter',
    'od_wait':20,
    'random_seed':99
}

cb_param_7 = {
    'iterations': 700,
    'depth':8,
    'rsm':0.95,
    'learning_rate':0.06,
    'l2_leaf_reg':3.5 , 
    'border_count':8,
    'gradient_iterations':5,
    'od_type':'Iter',
    'od_wait':20,
    'random_seed':99
}

cb_param_8 = {
    'iterations': 700,
    'depth':8,
    'rsm':0.95,
    'learning_rate':0.05,
    'l2_leaf_reg':3.5 , 
    'border_count':8,
    'gradient_iterations':4,
    'od_type':'Iter',
    'od_wait':20,
    'random_seed':99
}

cb_param_9 = {
    'iterations': 700,
    'depth':8,
    'rsm':0.95,
    'learning_rate':0.05,
    'l2_leaf_reg':3.5 , 
    'border_count':8,
    'gradient_iterations':4,
    'od_type':'Iter',
    'od_wait':20,
    'random_seed':99
}

cb_param_10 = {
    'iterations': 900,
    'depth':8,
    'rsm':0.85,
    'learning_rate':0.02,
    'l2_leaf_reg':3.5  ,
    'border_count':8,
    'gradient_iterations':4,
    'od_type':'Iter',
    'od_wait':20,
    'random_seed':99
}

cb_param_11 = {
    'iterations': 900,
    'depth':8,
    'rsm':0.75,
    'learning_rate':0.02,
    'l2_leaf_reg':3.5  ,
    'border_count':8,
    'gradient_iterations':4,
    'od_type':'Iter',
    'od_wait':20,
    'random_seed':99
}

cb_param_12 = {
    'iterations': 900,
    'depth':8,
    'rsm':0.80,
    'learning_rate':0.02,
    'l2_leaf_reg':3.5  ,
    'border_count':8,
    'gradient_iterations':4,
    'od_type':'Iter',
    'od_wait':20,
    'random_seed':99
}

cb_param_13 = {
    'iterations': 900,
    'depth':8,
    'rsm':0.90,
    'learning_rate':0.02,
    'l2_leaf_reg':3.5  ,
    'border_count':8,
    'gradient_iterations':4,
    'od_type':'Iter',
    'od_wait':20,
    'random_seed':99
}

cb_param_14 = {
    'iterations': 900,
    'depth':4,
    'rsm':0.85,
    'learning_rate':0.02,
    'l2_leaf_reg':3.5  ,
    'border_count':8,
    'gradient_iterations':4,
    'od_type':'Iter',
    'od_wait':20,
    'random_seed':99
}

cb_param_15 = {
    'iterations': 900,
    'depth':5,
    'rsm':0.85,
    'learning_rate':0.02,
    'l2_leaf_reg':3.5  ,
    'border_count':8,
    'gradient_iterations':4,
    'od_type':'Iter',
    'od_wait':100,
    'random_seed':99
}


In [21]:
# Base models
lgb_model_1 = LGBMClassifier(**lgb_params_1)

lgb_model_2 = LGBMClassifier(**lgb_params_2)

lgb_model_3 = LGBMClassifier(**lgb_params_3)

lgb_model_4 = LGBMClassifier(**lgb_params_4)

lgb_model_5 = LGBMClassifier(**lgb_params_5)

lgb_model_6 = LGBMClassifier(**lgb_params_6)

lgb_model_7 = LGBMClassifier(**lgb_params_7)

lgb_model_8 = LGBMClassifier(**lgb_params_8)

lgb_model_9 = LGBMClassifier(**lgb_params_9)

lgb_model_10 = LGBMClassifier(**lgb_params_10)

xgb_model_1 = XGBClassifier(**xgb_param_1)

xgb_model_2 = XGBClassifier(**xgb_param_2)

xgb_model_3 = XGBClassifier(**xgb_param_3)

xgb_model_4 = XGBClassifier(**xgb_param_4)

xgb_model_5 = XGBClassifier(**xgb_param_5)

xgb_model_6 = XGBClassifier(**xgb_param_6)

xgb_model_7 = XGBClassifier(**xgb_param_7)

xgb_model_8 = XGBClassifier(**xgb_param_8)

xgb_model_9 = XGBClassifier(**xgb_param_9)

xgb_model_10 = XGBClassifier(**xgb_param_10)

xgb_model_11 = XGBClassifier(**xgb_param_11)

xgb_model_12 = XGBClassifier(**xgb_param_12)

xgb_model_13 = XGBClassifier(**xgb_param_13)

xgb_model_14 = XGBClassifier(**xgb_param_14)

xgb_model_15 = XGBClassifier(**xgb_param_15)

rgf_model_1 = RGFClassifier(** rgf_param_1)

rgf_model_2 = RGFClassifier(** rgf_param_2)

rgf_model_3 = RGFClassifier(** rgf_param_3)

rgf_model_4 = RGFClassifier(** rgf_param_4)

rgf_model_5 = RGFClassifier(** rgf_param_5)

rgf_model_6 = RGFClassifier(** rgf_param_6)

rgf_model_7 = RGFClassifier(** rgf_param_7)

rgf_model_8 = RGFClassifier(** rgf_param_8)

rgf_model_9 = RGFClassifier(** rgf_param_9)

rgf_model_10 = RGFClassifier(** rgf_param_10)

cb_model_1 = CatBoostClassifier(** cb_param_1)

cb_model_2 = CatBoostClassifier(** cb_param_2)

cb_model_3 = CatBoostClassifier(** cb_param_3)

cb_model_4 = CatBoostClassifier(** cb_param_4)

cb_model_5 = CatBoostClassifier(** cb_param_5)

cb_model_6 = CatBoostClassifier(** cb_param_6)

cb_model_7 = CatBoostClassifier(** cb_param_7)

cb_model_8 = CatBoostClassifier(** cb_param_8)

cb_model_9 = CatBoostClassifier(** cb_param_9)

cb_model_10 = CatBoostClassifier(** cb_param_10)

cb_model_11 = CatBoostClassifier(** cb_param_11)

cb_model_12 = CatBoostClassifier(** cb_param_12)

cb_model_13 = CatBoostClassifier(** cb_param_13)

cb_model_14 = CatBoostClassifier(** cb_param_14)

cb_model_15 = CatBoostClassifier(** cb_param_15)

In [22]:
# Stacker models
log_model = LogisticRegression()

et_model = ExtraTreesClassifier(n_estimators=200, max_depth=6, min_samples_split=10, random_state=10)

mlp_model = MLPClassifier(max_iter=20, random_state=42)



In [23]:
# Mode 2 run
stack = Ensemble(mode=2,
        n_splits=5,
        stacker_2 = (log_model, et_model),         
        stacker_1 = (log_model, et_model, mlp_model),
        base_models = (
            cb_model_1, cb_model_2, cb_model_3, cb_model_4, cb_model_5, 
            cb_model_6, cb_model_7, cb_model_8, cb_model_9, cb_model_10,
            cb_model_11, cb_model_12, cb_model_13, cb_model_14, cb_model_15,
            lgb_model_1, lgb_model_2, lgb_model_3, lgb_model_4, lgb_model_5, lgb_model_6,
            lgb_model_7, lgb_model_8, lgb_model_9, lgb_model_10, 
            xgb_model_1, xgb_model_2, xgb_model_3, xgb_model_4, xgb_model_5, 
            xgb_model_6, xgb_model_7, xgb_model_8, xgb_model_9, xgb_model_10,
            xgb_model_11, xgb_model_12, xgb_model_13, xgb_model_14, xgb_model_15
            
        ))       
        
y_pred = stack.fit_predict(train, target_train, test)

Fit <catboost.core.CatBoostClassifier object at 0x7f2ee782e5c0>_1 fold 1

Iteration with suspicious time -2.95 sec ignored in overall statistics.

Iteration with suspicious time -2.91 sec ignored in overall statistics.

Iteration with suspicious time -2.89 sec ignored in overall statistics.

Iteration with suspicious time -3.01 sec ignored in overall statistics.

Iteration with suspicious time -2.94 sec ignored in overall statistics.

Iteration with suspicious time -2.88 sec ignored in overall statistics.

Iteration with suspicious time -2.96 sec ignored in overall statistics.

Iteration with suspicious time -2.89 sec ignored in overall statistics.

Iteration with suspicious time -2.85 sec ignored in overall statistics.

Iteration with suspicious time -2.87 sec ignored in overall statistics.

Iteration with suspicious time -2.83 sec ignored in overall statistics.

Iteration with suspicious time -3.08 sec ignored in overall statistics.

Iteration with suspicious time -2.87 sec ignored i

In [24]:
# now = datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
# fn = '../submissions/sub.xgb.{}.{}GMT'.format(full_oof_score, now)

In [28]:
final_score = 0.29227

In [29]:
# Submission from mode 2
sub = pd.DataFrame()
sub['id'] = id_test
sub['target'] = y_pred
now = datetime.now().strftime('%Y_%m_%d_%H_%M_%S')
fn = '../submissions/sub.2l.{}.{}GMT'.format(final_score, now)
sub.to_csv(fn, index=False)

In [30]:
print(fn)

../submissions/sub.2l.0.29227.2017_11_30_06_56_49GMT
