In [4]:
import scipy as sp
from functools import partial
import gc
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import lightgbm as lgb
import xgboost as xgb
import torch
from torch import nn
from torch.nn import functional as F
from sklearn.metrics import f1_score, mean_squared_error
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import AdaBoostRegressor

def MacroF1MetricClassification(preds, dtrain):
    labels = dtrain.get_label()
    preds = np.argmax(preds).astype(np.int16)
    score = f1_score(labels, preds, average='macro')
    return ('MacroF1Metric', score, True)


def MacroF1MetricRegression(preds, dtrain):
    labels = dtrain.get_label()
    preds = np.round(np.clip(preds, 0, 10)).astype(np.int16)
    score = f1_score(labels, preds, average='macro')
    return ('MacroF1Metric', score, True)


def reduce_mem_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage(deep=True).sum() / 1024**2  # just added
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(
                        np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(
                        np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(
                        np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(
                        np.int64).max:
                    df[col] = df[col].astype(np.int64)
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(
                        np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(
                        np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
    end_mem = df.memory_usage(deep=True).sum() / 1024**2
    percent = 100 * (start_mem - end_mem) / start_mem
    print(
        'Mem. usage decreased from {:5.2f} Mb to {:5.2f} Mb ({:.1f}% reduction)'
        .format(start_mem, end_mem, percent))
    return df


class OptimizedRounder(object):
    """
    An optimizer for rounding thresholds
    to maximize F1 (Macro) score
    # https://www.kaggle.com/naveenasaithambi/optimizedrounder-improved
    """
    def __init__(self):
        self.coef_ = 0

    def _f1_loss(self, coef, X, y):
        """
        Get loss according to
        using current coefficients

        :param coef: A list of coefficients that will be used for rounding
        :param X: The raw predictions
        :param y: The ground truth labels
        """
        X_p = pd.cut(X, [-np.inf] + list(np.sort(coef)) + [np.inf],
                     labels=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

        return -f1_score(y, X_p, average='macro')

    def fit(self, X, y):
        """
        Optimize rounding thresholds

        :param X: The raw predictions
        :param y: The ground truth labels
        """
        loss_partial = partial(self._f1_loss, X=X, y=y)
        initial_coef = [0.5, 1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5]
        self.coef_ = sp.optimize.minimize(loss_partial,
                                          initial_coef,
                                          method='nelder-mead')

    def predict(self, X, coef):
        """
        Make predictions with specified thresholds

        :param X: The raw predictions
        :param coef: A list of coefficients that will be used for rounding
        """
        return pd.cut(X, [-np.inf] + list(np.sort(coef)) + [np.inf],
                      labels=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10])

    def coefficients(self):
        """
        Return the optimized coefficients
        """
        return self.coef_['x']


def cross_validate(params: dict,
                   X,
                   y,
                   X_test,
                   oof_df,
                   features,
                   model_type: str = 'lgb',
                   feval: dict = {},
                   objective: str = 'regression',
                   num_boost_round: int = 1,
                   early_stopping_rounds: int = 50,
                   sklearn_model = None):
    kfold = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)
    feat_importance_df = pd.DataFrame(index=features)
    fold = 0
    if model_type == 'lgb':
        for train_id, valid_id in kfold.split(X, y):
            fold += 1
            x_train, y_train = X.iloc[train_id, :], y[train_id]
            x_val, y_val = X.iloc[valid_id, :], y[valid_id]

            train_set = lgb.Dataset(x_train, y_train)
            valid_set = lgb.Dataset(x_val, y_val)

            model = lgb.train(params=params,
                              feval=feval[objective],
                              train_set=train_set,
                              num_boost_round=num_boost_round,
                              early_stopping_rounds=early_stopping_rounds,
                              valid_sets=[train_set, valid_set],
                              verbose_eval=1000)
            if objective == 'regression':
                pred = model.predict(x_val, num_iteration=model.best_iteration)
                pred = np.round(np.clip(pred, 0, 10)).astype(np.int32)
                test_preds = model.predict(X_test,
                                           num_iteration=model.best_iteration)
                test_preds = np.round(np.clip(test_preds, 0,
                                              10)).astype(np.int32)
            elif objective == 'classification':
                pred = model.predict(x_val, num_iteration=model.best_iteration)
                pred = np.argmax(pred).astype(np.int32)
                test_preds = model.predict(X_test,
                                           num_iteration=model.best_iteration)
                test_preds = np.argmax(test_preds, 0, 10).astype(np.int32)

            oof_df.loc[oof_df.iloc[valid_id].index, 'oof'] = pred
            sub[f'{model_type}_open_channels_fold_{fold}'] = test_preds

            f1 = f1_score(
                oof_df.loc[oof_df.iloc[valid_id].index]['open_channels'],
                oof_df.loc[oof_df.iloc[valid_id].index]['oof'],
                average='macro')
            rmse = np.sqrt(
                mean_squared_error(
                    oof_df.loc[oof_df.index.isin(valid_id)]['open_channels'],
                    oof_df.loc[oof_df.index.isin(valid_id)]['oof']))
            feat_importance_df[
                f'{model_type}_importance_{fold}'] = model.feature_importance(
                )

        oof_f1 = f1_score(oof_df['open_channels'],
                          oof_df['oof'],
                          average='macro')
        oof_rmse = np.sqrt(
            mean_squared_error(oof_df['open_channels'], oof_df['oof']))
    elif model_type == 'xgb':
        test_set = xgb.DMatrix(X_test)
        for train_id, valid_id in kfold.split(X, y):
            fold += 1
            x_train, y_train = X.iloc[train_id, :], y[train_id]
            x_val, y_val = X.iloc[valid_id, :], y[valid_id]

            train_set = xgb.DMatrix(x_train, y_train)
            valid_set = xgb.DMatrix(x_val, y_val)

            model = xgb.train(params=params,
                              dtrain=train_set,
                              num_boost_round=num_boost_round,
                              early_stopping_rounds=early_stopping_rounds,
                              evals=((train_set, 'train'), (valid_set, 'val')),
                              verbose_eval=1000)
            if objective == 'regression':
                pred = model.predict(x_val, ntree_limit=model.best_ntree_limit)
                #pred = np.round(np.clip(pred, 0, 10)).astype(np.int32)
                test_preds = model.predict(test_set,
                                           ntree_limit=model.best_ntree_limit)
#                 test_preds = np.round(np.clip(test_preds, 0,
#                                               10)).astype(np.int32)
            elif objective == 'classification':
                pred = model.predict(x_val, ntree_limit=model.best_ntree_limit)
                pred = np.argmax(pred).astype(np.int16)
                test_preds = model.predict(test_set,
                                           ntree_limit=model.best_ntree_limit)
                test_preds = np.argmax(test_preds).astype(np.int32)

            oof_df.loc[oof_df.iloc[valid_id].index, 'oof'] = pred
            sub[f'{model_type}_open_channels_fold_{fold}'] = test_preds

            f1 = f1_score(
                oof_df.loc[oof_df.iloc[valid_id].index]['open_channels'],
                oof_df.loc[oof_df.iloc[valid_id].index]['oof'],
                average='macro')
            rmse = np.sqrt(
                mean_squared_error(
                    oof_df.loc[oof_df.index.isin(valid_id)]['open_channels'],
                    oof_df.loc[oof_df.index.isin(valid_id)]['oof'])).astype(
                        np.float32)

            feat_importance_df[
                f'{model_type}_importance_{fold}'] = model.feature_importances_

        oof_f1 = f1_score(oof_df['open_channels'],
                          oof_df['oof'],
                          average='macro')
        oof_rmse = np.sqrt(
            mean_squared_error(oof_df['open_channels'], oof_df['oof']))
    elif model_type == 'sklearn':
        rounder = OptimizedRounder()
        for train_id, valid_id in kfold.split(X, y):
            fold += 1
            x_train, y_train = X.iloc[train_id, :], y[train_id]
            x_val, y_val = X.iloc[valid_id, :], y[valid_id]
            
            model = sklearn_model.fit(x_train, y_train)
            pred = model.predict(x_val)
            rounder.fit(X=y_val, y=pred)
            test_preds = rounder.predict(X=model.predict(X_test),
                                         coef=rounder.coefficients())
            oof_df.loc[oof_df.iloc[valid_id].index, 'oof'] = rounder.predict(pred, 
                                                                             rounder.coefficients())
            sub[f'{model_type}_open_channels_fold_{fold}'] = test_preds
            f1 = f1_score(
                oof_df.loc[oof_df.iloc[valid_id].index]['open_channels'],
                oof_df.loc[oof_df.iloc[valid_id].index]['oof'],
                average='macro')
            rmse = np.sqrt(
                mean_squared_error(
                    oof_df.loc[oof_df.index.isin(valid_id)]['open_channels'],
                    oof_df.loc[oof_df.index.isin(valid_id)]['oof'])).astype(
                        np.float32)
        oof_f1 = f1_score(oof_df['open_channels'],
                          oof_df['oof'],
                          average='macro')
        oof_rmse = np.sqrt(
            mean_squared_error(oof_df['open_channels'], oof_df['oof']))
        
    return oof_df.copy(), feat_importance_df.copy(), sub.copy(), oof_f1, oof_rmse


feval = {
    'classification': MacroF1MetricClassification,
    'regression': MacroF1MetricRegression
}

plt.style.use('fivethirtyeight')
%matplotlib inline

In [5]:
train = pd.read_csv('liverpool-ion-switching/train.csv',
                    dtype={
                        'time': np.float32,
                        'signal': np.float32,
                        'open_channels': np.int32
                    })
test = pd.read_csv('liverpool-ion-switching/test.csv',
                    dtype={
                        'time': np.float32,
                        'signal': np.float32,
                    })
sub = pd.read_csv('liverpool-ion-switching/sample_submission.csv')

## Features

In [6]:
def feature_eng(df: pd.DataFrame, bs=500_000, bs_slice=25_000):
    df = df.sort_values(by=['time']).reset_index(drop=True)
    df.index = ((df.time * 10_000) - 1).values
    df['batch'] = df.index // bs
    df['batch_index'] = df.index - (df.batch * bs)
    df['batch_slices'] = df['batch_index'] // bs_slice
    df['batch_slices2'] = df['batch'].astype(str).str.zfill(
        3) + '_' + df['batch_slices'].astype(str).str.zfill(3)

    for c in ['batch', 'batch_slices2']:
        df[f'batch_{bs//1000}k_max_{c}'] = df.groupby(
            [f'{c}'])['signal'].transform(np.max)
        df[f'batch_{bs//1000}k_min_{c}'] = df.groupby(
            [f'{c}'])['signal'].transform(np.min)
        df[f'batch_{bs//1000}k_mean_{c}'] = df.groupby(
            [f'{c}'])['signal'].transform(np.mean)
        df[f'batch_{bs//1000}k_std_{c}'] = df.groupby(
            [f'{c}'])['signal'].transform(np.std)
        df[f'batch_{bs//1000}k_median_{c}'] = df.groupby(
            [f'{c}'])['signal'].transform(np.median)
        df[f'batch_{bs//1000}k_diff_max_{c}'] = df.groupby(
            [f'{c}'])['signal'].transform(lambda x: np.max(np.diff(x)))
        df[f'batch_{bs//1000}k_diff_min_{c}'] = df.groupby(
            [f'{c}'])['signal'].transform(lambda x: np.min(np.diff(x)))
        df[f'batch_{bs//1000}k_range_{c}'] = np.abs(
            df[f'batch_{bs//1000}k_max_{c}'] - df[f'batch_{bs//1000}k_min_{c}'])
        df[f'batch_{bs//1000}k_maxtomin_{c}'] = np.abs(
            (df[f'batch_{bs//1000}k_max_{c}'] + 1e-8) /
            (df[f'batch_{bs//1000}k_min_{c}'] + 1e-8))
        
        df[f'batch_{bs//1000}k_shift_1_{c}'] = df.groupby(
            [f'{c}']).shift(1)['signal']
        df[f'batch_{bs//1000}k_shift_-1_{c}'] = df.groupby(
            [f'{c}']).shift(-1)['signal']
        df[f'batch_{bs//1000}k_shift_2_{c}'] = df.groupby(
            [f'{c}']).shift(2)['signal']
        df[f'batch_{bs//1000}k_shift_-2_{c}'] = df.groupby(
            [f'{c}']).shift(-2)['signal']
    feats = [
        c for c in df.columns if c not in [
            'time', 'open_channels', 'batch', 'batch_index', 'batch_slices',
            'batch_slices2'
        ]
    ]

    for c in feats + ['signal']:
        df[c + '_msignal'] = df[c] - df['signal']

    return df, feats

In [7]:
train, features = feature_eng(train, bs=50_000, bs_slice=5000)
train = reduce_mem_usage(train)
test, _ = feature_eng(test, bs=50_000, bs_slice=5000)
test = reduce_mem_usage(test)

Mem. usage decreased from 1606.46 Mb to 905.51 Mb (43.6% reduction)
Mem. usage decreased from 637.05 Mb to 362.40 Mb (43.1% reduction)


In [8]:
X = train[features]
X_test = test[features]
y = train['open_channels']
oof_df = train[['time', 'open_channels']].copy()

## Signal Analysis

In [5]:
from scipy import signal as sps
from numpy import fft
import pywt
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
output_notebook()

signal = train.signal.values
time = train.time.values


def moving_average(a, n=3):
    ret = np.cumsum(a, dtype=float)
    ret[n:] = ret[n:] - ret[:-n]
    return ret[n - 1:] / n

In [None]:
p = figure(plot_width=800, plot_height=400)

p.line(train.time.values[::10000], signal[::10000])
show(p)

## `LightGBM`

### Grid Search for Best Parameters

In [41]:
from sklearn.model_selection import train_test_split

x_train, x_val, y_train, y_val = train_test_split(X,
                                                  y,
                                                  test_size=0.2,
                                                  stratify=y)
train_set = lgb.Dataset(x_train, y_train)
val_set = lgb.Dataset(x_val, y_val)

# below are fixed parameters for which we don't really need anything
RANDOM_SEED = 42
MODEL_TYPE = 'LGBM'
EARLY_STOPPING_ROUNDS = 50
N_THREADS = -1
OBJECTIVE = 'regression'
NUM_CLASS = 1 if OBJECTIVE == 'regression' else 11
METRIC = 'rmse'
NUM_BOOST_ROUND = 250_000


#### `max_depth` and `min_child_weight`

In [42]:
MAX_DEPTH = -1
MIN_CHILD_WEIGHT = 0
LEARNING_RATE = 0.09
NUM_LEAVES = 2**8 + 1
FEATURE_FRACTION = 1
BAGGING_FRACTION = 1
BAGGING_FREQ = 0
L1 = 0
L2 = 0
params_lgb = {
    'learning_rate': LEARNING_RATE,
    'lambda_l1': L1,
    'lambda_l2': L2,
    'max_depth': MAX_DEPTH,
    'min_child_weight': MIN_CHILD_WEIGHT,
    'num_leaves': NUM_LEAVES,
    'feature_fraction': FEATURE_FRACTION,
    'bagging_fraction': BAGGING_FRACTION,
    'bagging_freq': BAGGING_FREQ,
    'n_jobs': N_THREADS,
    'seed': RANDOM_SEED,
    'metric': METRIC,
    'objective': OBJECTIVE,
    'num_class': NUM_CLASS,
}

In [43]:
gridsearch_params = [(max_depth, min_child_weight)
                    for max_depth in np.arange(-1, 12, 4)
                    for min_child_weight in np.arange(1e-3, 0.1, 0.05)]

In [44]:
min_rmse = np.float32("Inf")
best_params = None

for max_depth, min_child_weight in gridsearch_params:
    print(f"CV with max_depth={max_depth}, min_child_weight={min_child_weight}")
    # Update our parameters
    params_lgb['max_depth'] = max_depth
    params_lgb['min_child_weight'] = min_child_weight    # Run CV
    cv_results = lgb.cv(
        params_lgb,
        train_set,
        num_boost_round=NUM_BOOST_ROUND,
        seed=RANDOM_SEED,
        nfold=5,
        metrics={METRIC},
        early_stopping_rounds=EARLY_STOPPING_ROUNDS
    )
        
    mean_rmse = cv_results['rmse-mean'].min()
    boost_rounds = cv_results['rmse-mean'].argmin()
    print(f"\tRMSE {mean_rmse} for {boost_rounds} rounds")
    if mean_rmse < min_rmse:
        min_rmse = mean_rmse
        best_params = (max_depth, min_child_weight)

CV with max_depth=-1, min_child_weight=0.001


KeyError: 'test-rmse-mean'

In [None]:
params_lgb['max_depth'], params_lgb['min_child_weight'] = best_params

#### `num_leaves`

In [None]:
min_rmse = np.float32("Inf")
best_params = None
gridsearch_params = [num_leaves for num_leaves in range(64, 2**10+1, 64)]
for num_leaves in gridsearch_params:
    print(f"CV with num_leaves = {num_leaves}")
    # Update our parameters
    params_lgb['num_leaves'] = num_leaves    # Run CV
    cv_results = lgb.cv(
        params_lgb,
        train_set,
        num_boost_round=NUM_BOOST_ROUND,
        seed=RANDOM_SEED,
        nfold=5,
        metrics={METRIC},
        early_stopping_rounds=EARLY_STOPPING_ROUNDS
    )
        # Update best MAE
    mean_rmse = cv_results['test-rmse-mean'].min()
    boost_rounds = cv_results['test-rmse-mean'].argmin()
    print(f"\tRMSE {mean_rmse} for {boost_rounds} rounds")
    if mean_rmse < min_rmse:
        min_rmse = mean_rmse
        best_params = num_leaves

In [None]:
params_lgb['num_leaves'] = best_params

#### `feature_frac`, `baggin_frac`, `bagging_freq`

In [None]:
min_rmse = np.float32("Inf")
best_params = None
gridsearch_params = [(feature_frac, baggin_frac, bagging_freq) 
                     for feature_frac in np.arange(0., 1., 0.2)
                     for baggin_frac in np.arange(0., 1., 0.2)
                     for baggin_freq in np.arange(0, 150, 50)]
for feature_frac, baggin_frac, bagging_freq in gridsearch_params:
    print(f"CV with feature_frac = {feature_frac}, baggin_frac={baggin_frac}, bagging_freq={bagging_freq}")
    # Update our parameters
    params_lgb['feature_frac'] = feature_frac    # Run CV
    params_lgb['baggin_frac'] = baggin_frac    # Run CV
    params_lgb['baggin_freq'] = baggin_freq    # Run CV
    cv_results = lgb.cv(
        params_lgb,
        train_set,
        num_boost_round=NUM_BOOST_ROUND,
        seed=RANDOM_SEED,
        nfold=5,
        metrics={METRIC},
        early_stopping_rounds=EARLY_STOPPING_ROUNDS
    )
        # Update best MAE
    mean_rmse = cv_results['test-rmse-mean'].min()
    boost_rounds = cv_results['test-rmse-mean'].argmin()
    print(f"\tRMSE {mean_rmse} for {boost_rounds} rounds")
    if mean_rmse < min_rmse:
        min_rmse = mean_rmse
        best_params = (feature_frac, baggin_frac, bagging_freq)

In [None]:
params_lgb['feature_frac'], params_lgb['baggin_frac'], params_lgb['baggin_freq'] = best_params

#### `learning_rate`, `L1`, `L2`

In [None]:
min_rmse = np.float32("Inf")
best_params = None
gridsearch_params = [(learning_rate, lambda_l1, lambda_l2) 
                     for learning_rate in [0.3, 0.2, 0.1, 0.05, 0.01, 0.05]
                     for lambda_l1 in [0.3, 0.2, 0.1, 0.05, 0.01, 0.05]
                     for lambda_l2 in [0.3, 0.2, 0.1, 0.05, 0.01, 0.05]]
for learning_rate, lambda_l1, lambda_l2 in gridsearch_params:
    print(f"CV with learning_rate = {learning_rate}, lambda_l1={lambda_l1}, lambda_l2={lambda_l2}")
    # Update our parameters
    params_lgb['learning_rate'] = learning_rate    # Run CV
    params_lgb['lambda_l1'] = lambda_l1    # Run CV
    params_lgb['lambda_l2'] = lambda_l2    # Run CV
    cv_results = lgb.cv(
        params_lgb,
        train_set,
        num_boost_round=NUM_BOOST_ROUND,
        seed=RANDOM_SEED,
        nfold=5,
        metrics={METRIC},
        early_stopping_rounds=EARLY_STOPPING_ROUNDS
    )
        # Update best MAE
    mean_rmse = cv_results['test-rmse-mean'].min()
    boost_rounds = cv_results['test-rmse-mean'].argmin()
    print(f"\tRMSE {mean_rmse} for {boost_rounds} rounds")
    if mean_rmse < min_rmse:
        min_rmse = mean_rmse
        best_params = (learning_rate, lambda_l1, lambda_l2)

In [None]:
params_lgb['learning_rate'], params_lgb['lambda_l1'], params_lgb['lambda_l2'] = best_params

### Training using learned parameters

In [13]:
oof_df_lgb, feat_importance_df_lgb, sub_lgb = cross_validate(
    params = params_lgb,
    X=X,
    y=y,
    X_test=X_test,
    oof_df=oof_df,
    features=features,
    feval=feval,
    model_type='lgb',
    objective=OBJECTIVE,
)

Training until validation scores don't improve for 50 rounds
Early stopping, best iteration is:
[947]	training's rmse: 0.16443	training's MacroF1Metric: 0.929394	valid_1's rmse: 0.166628	valid_1's MacroF1Metric: 0.928888
Training until validation scores don't improve for 50 rounds
[1000]	training's rmse: 0.163996	training's MacroF1Metric: 0.929733	valid_1's rmse: 0.16699	valid_1's MacroF1Metric: 0.928086
Early stopping, best iteration is:
[1256]	training's rmse: 0.163	training's MacroF1Metric: 0.930483	valid_1's rmse: 0.166604	valid_1's MacroF1Metric: 0.928432
Training until validation scores don't improve for 50 rounds
[1000]	training's rmse: 0.164024	training's MacroF1Metric: 0.929503	valid_1's rmse: 0.16682	valid_1's MacroF1Metric: 0.928063
Early stopping, best iteration is:
[976]	training's rmse: 0.164143	training's MacroF1Metric: 0.929461	valid_1's rmse: 0.166876	valid_1's MacroF1Metric: 0.928086
Training until validation scores don't improve for 50 rounds
[1000]	training's rmse: 

In [26]:
oof_df_lgb.to_csv('oof_df_lgb.csv', index=False)
feat_importance_df_lgb.to_csv('feat_importance_df_lgb.csv', index=False)
sub_lgb.to_csv('sub_lgb_with_folds.csv', index=False)


## `XGBoost`

In [89]:
TARGET = 'open_channels'

RANDOM_SEED = 42
MODEL_TYPE = 'LGBM'
LEARNING_RATE = 0.1
NUM_BOOST_ROUND = 5000
EARLY_STOPPING_ROUNDS = 50
N_THREADS = -1
OBJECTIVE = 'reg:squarederror'
METRIC = 'rmse'
MAX_DEPTH = 10
L1 = 0
L2 = 0

params_xgb = {
    'colsample_bytree': 0.375,
    'learning_rate': LEARNING_RATE,
    'max_depth': MAX_DEPTH,
    'seed': RANDOM_SEED,
    'eval_metric': METRIC,
    'objective': OBJECTIVE,
    'subsample': 1,
    'reg_lambda': L2,
    'reg_alpha': L1
}

In [90]:
oof_df_xgb, feat_importance_df_xgb, sub_xgb = cross_validate(
    X=X.reset_index(drop=True),
    y=y.reset_index(drop=True),
    X_test=X_test.reset_index(drop=True),
    oof_df=oof_df,
    features=features,
    params = params_xgb,
    model_type='xgb',
    objective=OBJECTIVE,
    num_boost_round=NUM_BOOST_ROUND,
    early_stopping_rounds=EARLY_STOPPING_ROUNDS)

[0]	train-rmse:3.1386	val-rmse:3.13293
Multiple eval metrics have been passed: 'val-rmse' will be used for early stopping.

Will train until val-rmse hasn't improved in 250 rounds.
[1000]	train-rmse:0.136969	val-rmse:0.164947


KeyboardInterrupt: 

## `SKLearn`

## `PyTorch` ?

## Combine and Submit

In [None]:
s_cols = [s for s in sub.columns if 'open_channels' in s]

sub['open_channels'] = sub[s_cols].median(axis=1).astype(int)

sub.head()