In [1]:
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import matthews_corrcoef
import lightgbm as lgb
import pandas as pd
import numpy as np

This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.


In [2]:
train = pd.read_csv('../data/processed_train_2500Hz.csv')
train = train.drop('signal_id', axis=1)

y = train['target']
X = train.drop('target', axis=1)

In [36]:
folds = StratifiedKFold(n_splits=5, 
                        shuffle=True, 
                        random_state=5000)

oof_preds = np.zeros(len(X))

for fold_, (trn_, val_) in enumerate(folds.split(X, y)):
    trn_x, trn_y = X.iloc[trn_], y.iloc[trn_]
    val_x, val_y = X.iloc[val_], y.iloc[val_]
    
#     train = lgb.Dataset(trn_x, trn_y)
#     val = lgb.Dataset(val_x, val_y)
    
    params = {'objective': 'binary',
              'seed': 5000,
              'learning_rate': 0.2,
              'num_boosting_rounds': 50,
              }
#               'early_stopping_rounds': 100}
    
    num_round = 1000
    
    clf = lgb.LGBMClassifier(**params)
    clf.fit(trn_x, trn_y, eval_set=(val_x, val_y), early_stopping_rounds=100, verbose=10)
    
    oof_preds[val_] = clf.predict(val_x)
    
    print('no {}-fold MCC: {}'.format(fold_ + 1, matthews_corrcoef(val_y.values, oof_preds[val_])))
    
    del clf
    
score = matthews_corrcoef(y, oof_preds)
print('OVERALL MCC: {:.5f}'.format(score))

Training until validation scores don't improve for 100 rounds.
[10]	valid_0's binary_logloss: 0.584095
[20]	valid_0's binary_logloss: 0.598815
[30]	valid_0's binary_logloss: 0.634703
[40]	valid_0's binary_logloss: 0.668604
[50]	valid_0's binary_logloss: 0.70047
[60]	valid_0's binary_logloss: 0.721683
[70]	valid_0's binary_logloss: 0.748737
[80]	valid_0's binary_logloss: 0.778149
[90]	valid_0's binary_logloss: 0.824425
[100]	valid_0's binary_logloss: 0.85028
Did not meet early stopping. Best iteration is:
[12]	valid_0's binary_logloss: 0.582744
no 1-fold MCC: 0.3250056229832958
Training until validation scores don't improve for 100 rounds.
[10]	valid_0's binary_logloss: 0.564415
[20]	valid_0's binary_logloss: 0.575608
[30]	valid_0's binary_logloss: 0.591794
[40]	valid_0's binary_logloss: 0.619266
[50]	valid_0's binary_logloss: 0.666193
[60]	valid_0's binary_logloss: 0.689183
[70]	valid_0's binary_logloss: 0.730453
[80]	valid_0's binary_logloss: 0.740983
[90]	valid_0's binary_logloss: 0.

In [7]:
clf.feature_importances_

array([401, 160, 120, 251, 331,  74, 286, 154, 338, 365, 516,   0,   0])

In [9]:
X

Unnamed: 0,amplitude__binned_entropy__max_bins_10,amplitude__count_above_mean,amplitude__count_below_mean,"amplitude__fft_coefficient__coeff_0__attr_""abs""","amplitude__fft_coefficient__coeff_1__attr_""abs""",amplitude__longest_strike_above_mean,amplitude__longest_strike_below_mean,amplitude__mean,amplitude__mean_abs_change,amplitude__mean_change,amplitude__median,amplitude__value_count__value_10,amplitude__value_count__value_8
0,0.022124,392074.0,407926.0,1352.195189,1337.861724,62.0,96.0,0.001690,0.117584,-1.207128e-07,-0.000009,0.0,0.0
1,0.043530,396747.0,403253.0,544.628315,862.484891,62.0,96.0,0.000681,0.114221,-1.207128e-07,-0.000014,0.0,0.0
2,0.010825,397061.0,402939.0,482.595174,613.577102,62.0,96.0,0.000603,0.120238,-1.207128e-07,-0.000020,0.0,0.0
3,0.040154,399621.0,400379.0,2.536181,349.094421,62.0,210.0,-0.000003,0.084222,-1.207128e-07,-0.000084,0.0,0.0
4,0.000999,401525.0,398475.0,367.944598,155.632718,62.0,210.0,-0.000460,0.093093,-1.207128e-07,-0.000106,0.0,0.0
5,0.033510,398544.0,401456.0,189.402373,826.003871,62.0,210.0,0.000237,0.096750,-1.207128e-07,-0.000061,0.0,0.0
6,0.593824,400580.0,399420.0,175.354632,223.168574,62.0,210.0,-0.000219,0.087823,-1.207128e-07,-0.000085,0.0,0.0
7,0.019323,400481.0,399519.0,153.150066,90.969407,62.0,210.0,-0.000191,0.081270,-1.207128e-07,-0.000085,0.0,0.0
8,0.125193,414999.0,385001.0,2652.837770,3846.915703,62.0,210.0,-0.003316,0.483648,-1.207128e-07,-0.000079,0.0,0.0
9,0.018505,398951.0,401049.0,111.391245,158.322199,62.0,210.0,0.000139,0.089959,-1.207128e-07,-0.000084,0.0,0.0
