In [None]:
#!cp /kaggle/input/data-bowl-2019-external-data/*.py /kaggle/working

In [95]:
%reload_ext autoreload
%autoreload 2
import warnings
import jupytools.syspath
def ignore(*args, **kwargs): pass
warnings.warn = ignore
jupytools.syspath.add('..')

In [96]:
import os
import re
from collections import defaultdict, Counter, OrderedDict
from functools import partial
from multiprocessing import cpu_count
from os.path import join

import feather
import joblib
import lightgbm as lgb
import numpy as np
import pandas as pd
import scipy
from joblib import Parallel, delayed
from sklearn.model_selection import GroupKFold
from tqdm.auto import tqdm

import bundle
import utils as U
from dataset import load, load_sample, Subset, to_accuracy_group
from metric import qwk

## Extending The Dataset

The original datasets are processed as the whole to extend them with additional columns.

In [3]:
CYCLIC_FEATURES = ('Year', 'Month', 'Week', 'Dayofweek', 'Hour', 'Minute')

In [4]:
def add_feature_combinations(data, pairs):
    for c1, c2 in pairs:
        assert c1 in data.columns, f'Column not found: {c1}'
        assert c2 in data.columns, f'Column not found: {c2}'
        data[f'{c1}_{c2}'] = data[c1].astype(str).str.cat(data[c2].astype(str), '_')
    return data

def add_datetime(data, column, prefix=None, with_time=True):
    data[column] = pd.to_datetime(data[column])
    prefix = U.default(prefix, re.sub('[Dd]ate$', '', column))
    attrs = ('Year', 'Month', 'Week', 'Day', 'Dayofweek')
    if with_time:
        attrs += ('Hour', 'Minute')
    for attr in attrs:
        data[f'{prefix}_{attr}'] = getattr(data[column].dt, attr.lower())
    return data

def add_cyclical(data, prefix, features=CYCLIC_FEATURES, modulo=None):
    modulo = modulo or {}
    for feature in features:
        column = f'{prefix}_{feature}'
        m = modulo.get(feature, 23.0)
        data[f'{column}_sin'] = np.sin(2*np.pi*data[column] / m)
        data[f'{column}_cos'] = np.cos(2*np.pi*data[column] / m)
    return data

## Computing Meta Information

The meta-information is computed using public train and test datasets.

In [5]:
def compute_meta_data(dataset, *datasets):
    datasets = [dataset] + list(datasets)
    uniq = OrderedDict()
    uniq['title_event_code'] = U.unique(datasets, column='title_event_code')
    uniq['title'] = U.unique(datasets, column='title')
    uniq['event_code'] = U.unique(datasets, column='event_code')
    uniq['event_id'] = U.unique(datasets, column='event_id')
    uniq['world'] = U.unique(datasets, column='world')
    uniq['type'] = U.unique(datasets, column='type')
    asm_datasets = [ds.query('type == "Assessment"') for ds in datasets]
    uniq['assessment_titles'] = U.unique(asm_datasets, column='title')
    win_codes = {t: 4100 for t in uniq['title']}
    win_codes['Bird Measurer (Assessment)'] = 4110
    meta = {'win_codes': win_codes, **uniq}
    return U.named_tuple('Meta', **meta)

## User Features

Converts the raw dataset into user-specific features.

In [6]:
class FeaturesExtractor:
    def __init__(self, steps):
        self.steps = steps
        
    def init_steps(self, meta):
        for step in self.steps:
            if hasattr(step, 'init'):
                step.init(meta)
                
    def __call__(self, user, meta, test=False):
        rows = []
        self.init_steps(meta)
        for _, session in user.groupby('game_session', sort=False):
            info = session_info(session, meta, test)
            features = OrderedDict([
                ('installation_id', info.installation_id),
                ('game_session', info.game_session),
                ('session_title', info.session_title)
            ])
            for step in self.steps:
                extracted = step.extract(session, info, meta)
                features.update(extracted)
            if info.should_include:
                rows.append(features)
        return [rows[-1]] if test else rows
    
def session_info(session, meta, test):
    """Computes information about user's session."""
    assert not session.empty, 'Session cannot be empty!'
    session_type = session['type'].iloc[0]
    assessment = session_type == 'Assessment'
    outcomes = attempt_outcomes(session, meta) if assessment else None
    should_include = (
        (assessment and test) or
        (assessment and (len(session) > 1) and outcomes.total > 0))
    duration = session.timestamp.iloc[-1] - session.timestamp.iloc[0]
    return U.named_tuple(
        name='Info', 
        installation_id=session['installation_id'].iloc[0],
        game_session=session['game_session'].iloc[0],
        session_title=session['title'].iloc[0],
        session_type=session_type,
        is_assessment=assessment,
        should_include=should_include,
        outcomes=outcomes,
        duration_seconds=duration.seconds)

def attempt_outcomes(session, meta):
    """Computes how many successful and unsuccessful attempts contains the session."""
    event_code = meta.win_codes.get(session.title.iloc[0], 4100)
    total_attempts = session.query(f'event_code == {event_code}')
    pos = total_attempts.event_data.str.contains('true').sum()
    neg = total_attempts.event_data.str.contains('false').sum()
    summary = dict(pos=pos, neg=neg, total=(pos + neg))
    return U.named_tuple('Trial', **summary)

In [7]:
class BaseFeatures:
    def __init__(self, meta):
        self.init(meta)

In [8]:
class CountingFeatures(BaseFeatures):
    def init(self, meta):
        self.cnt_title_event_code = U.init_dict(meta.title_event_code)
        self.cnt_title = U.init_dict(meta.title)
        self.cnt_event_code = U.init_dict(meta.event_code)
        self.cnt_event_id = U.init_dict(meta.event_id)
        self.cnt_activities = U.init_dict(meta.type)
        self.last_activity = None
        
    def extract(self, session, info, meta):
        features = OrderedDict()
        if info.should_include:
            counters = OrderedDict([
                *self.cnt_title_event_code.items(),
                *self.cnt_title.items(),
                *self.cnt_event_code.items(),
                *self.cnt_event_id.items(),
                *self.cnt_activities.items()])
            features.update([(f'cnt_{k}', v) for k, v in counters.items()])
        self.update_counters(self.cnt_title_event_code, session, 'title_event_code')
        self.update_counters(self.cnt_title, session, 'title')
        self.update_counters(self.cnt_event_code, session, 'event_code')
        self.update_counters(self.cnt_event_id, session, 'event_id')
        if self.last_activity is None or self.last_activity != info.session_type:
            self.cnt_activities[info.session_type] += 1
            self.last_activity = info.session_type
        return features
        
    def update_counters(self, cnt, sess, column):
        uniq_counts = Counter(sess[column])
        for k, v in uniq_counts.items():
            if k in cnt:
                cnt[k] += v

In [9]:
class PerformanceFeatures(BaseFeatures):
    def init(self, meta):
        self.acc_accuracy = 0
        self.acc_accuracy_group = 0
        self.acc_correct_attempts = 0
        self.acc_incorrect_attempts = 0
        self.acc_actions = 0
        self.durations = []
        self.accuracy_groups = U.init_dict([0, 1, 2, 3])
        self.last_accuracy_title = U.init_dict([f'acc_{t}' for t in meta.title], -1)
        self.n_rows = 0
    
    def extract(self, session, info, meta):
        features = OrderedDict()
        
        if info.should_include:
            features['acc_attempts_pos'] = self.acc_correct_attempts
            features['acc_attempts_neg'] = self.acc_incorrect_attempts
            self.acc_correct_attempts += info.outcomes.pos
            self.acc_incorrect_attempts += info.outcomes.neg
            
            features['acc_accuracy'] = U.savediv(self.acc_accuracy, self.n_rows)
            accuracy = U.savediv(info.outcomes.pos, info.outcomes.total)
            self.acc_accuracy += accuracy
            
            features.update(self.last_accuracy_title)
            self.last_accuracy_title[f'acc_{info.session_title}'] = accuracy
            
            features['accuracy_group'] = to_accuracy_group(accuracy)
            self.accuracy_groups[features['accuracy_group']] += 1
            
            features['acc_accuracy_group'] = U.savediv(self.acc_accuracy_group, self.n_rows)
            self.acc_accuracy_group += features['accuracy_group']

            features['acc_actions'] = self.acc_actions
            
            features['duration_mean'] = np.mean(self.durations) if self.durations else 0
            self.durations.append(info.duration_seconds)
            
            self.n_rows += 1
            
        self.acc_actions += len(session)
        
        return features

In [10]:
class CyclicFeatures(BaseFeatures):
    def init(self, meta):
        self.acc = defaultdict(list)
    
    def extract(self, session, info, meta):
        features = OrderedDict()
        if not info.should_include:
            return features
        for dt in CYCLIC_FEATURES:
            for angle in ('sin', 'cos'):
                key = f'ts_{dt}_{angle}'
                acc = self.acc
                features[f'{key}_mean'] = np.mean(acc[key]) if acc[key] else 0
                features[f'{key}_std'] = np.std(acc[key]) if acc[key] else 0
                acc[key] += session[key].tolist()
        return features

In [11]:
class TimestampFeatures(BaseFeatures):
    def init(self, meta):
        self.cnt_month = U.init_dict([7, 8, 9, 10])
        self.cnt_dayofweek = U.init_dict(range(7))
        self.cnt_dayofmonth = U.init_dict(range(1, 32))
        self.cnt_hour = U.init_dict(range(24))
        self.cnt_minute = U.init_dict(range(60))
        
    def extract(self, session, info, meta):
        features = OrderedDict()
        if not info.should_include:
            return features
        
        features.update(U.prefix_keys(self.cnt_month, 'month_'))
        features.update(U.prefix_keys(self.cnt_dayofweek, 'dow_'))
        features.update(U.prefix_keys(self.cnt_dayofmonth, 'dom_'))
        features.update(U.prefix_keys(self.cnt_hour, 'hour_'))
        features.update(U.prefix_keys(self.cnt_minute, 'minute_'))
        
        self.update_counters(self.cnt_month, session, 'ts_Month')
        self.update_counters(self.cnt_dayofweek, session, 'ts_Dayofweek')
        self.update_counters(self.cnt_dayofmonth, session, 'ts_Day')
        self.update_counters(self.cnt_hour, session, 'ts_Hour')
        self.update_counters(self.cnt_minute, session, 'ts_Minute')
        
        return U.prefix_keys(features, 'ts_')
    
    def update_counters(self, cnt, sess, column):
        uniq_counts = Counter(sess[column])
        for k, v in uniq_counts.items():
            if k in cnt:
                cnt[k] += v

## Preparing Dataset In Memory
There are two possible algorithms to prepare the data before training:
1. store every user subset on disk and process it from there,
2. keep the whole dataset in memory and process without dumping on disk (local training).

The first approach is more difficult, and requires extra steps between pipeline stages. Therefore, we go with the second one and hope that the data fits into memory on kernel.

In [12]:
class InMemoryAlgorithm:
    def __init__(self, extractor, meta, pbar=True, num_workers=cpu_count()):
        self.extractor = extractor
        self.meta = meta
        self.pbar = pbar
        self.num_workers = num_workers
    
    def run(self, dataset, test=False):
        mode = 'test' if test else 'train'
        U.log(f'Running algorithm in {mode} mode.')
        
        def _extract(user):
            return pd.DataFrame(self.extractor(user, self.meta, test))
        
        grouped = dataset.groupby('installation_id', sort=False)
        users = (g for _, g in grouped)
        if self.pbar:
            users = tqdm(users, total= grouped.ngroups)
        datasets = U.parallel(_extract, users, num_workers=self.num_workers)
        dataset = pd.concat(datasets, axis=0)
        return dataset

In [13]:
def encode(dataset, columns, encoders=None):
    def make_encoder(mapping):
        return lambda x: mapping.get(x, -1)
    encoders = encoders or {}
    for column in columns:
        if column in encoders:
            dataset[column] = dataset[column].map(make_encoder(encoders[column]))
        else:
            encoded, labels = pd.factorize(dataset[column])
            encoder = OrderedDict([(x, i) for i, x in enumerate(labels)])
            encoders[column] = encoder
            dataset[column] = encoded
    return dataset, encoders

## Post-processing Features
Some features can be added only when user-wise features are created and represented as data frame. This features are created in this section.

In [14]:
def add_user_wise_features(dataset, meta, pbar=True):
    def transform(group_obj, key, agg): 
        return group_obj[key].transform(agg)
    
    events = [f'cnt_{code}' for code in meta.event_code]
    grouped = dataset.groupby('installation_id')
    dataset['user_session_cnt'] = transform(grouped, 'cnt_Clip', 'count')
    dataset['user_duration_mean'] = transform(grouped, 'duration_mean', 'mean')
    dataset['user_title_nunique'] = transform(grouped, 'session_title', 'nunique')
    dataset['user_events_sum'] = dataset[events].sum(axis=1)
    dataset['user_events_mean'] = transform(grouped, 'user_events_sum', 'mean')

## Getting Relevant Features Only

In [15]:
class FeatureSelection:
    def __init__(self, rules, ignore_cols=None):
        self.rules = rules
        self.ignore_cols = ignore_cols or []
        self.selected = None
    def select(self, dataset):
        relevant = {}
        total = len(dataset.columns)
        if self.ignore_cols:
            U.log(f'Excluding from consideration: {self.ignore_cols}')
            dataset = dataset.drop(columns=self.ignore_cols)
        for name, rule in self.rules:
            U.log(f'Applying feature selection rule: {name}')
            features = rule(dataset)
            relevant[name] = set(features)
            U.log(f'Selected features: {len(features)} of {total}')
        U.log(f'Keeping only features, selected by every rule.')
        features = set.intersection(*relevant.values())
        U.log(f'Final number of features changed from {total} to {len(features)}')
        return sorted(list(features))
        
def non_zero_rows_and_cols(dataset):
    def nonzero(x): return not np.allclose(x, 0)
    nonzero_rows = dataset.sum(axis=1).map(nonzero)
    nonzero_cols = dataset.sum(axis=0).map(nonzero)
    features = dataset.loc[nonzero_rows, nonzero_cols].columns.tolist()
    return features

def non_correlated_cols(dataset, threshold=0.995):
    from itertools import combinations
    correlated = set()
    columns = dataset.columns
    pairs = combinations(columns, 2)
    n_pairs = len(columns)*(len(columns) - 1)//2
    for a, b in tqdm(pairs, total=n_pairs):
        if a in correlated: continue
        if b in correlated: continue
        c = np.corrcoef(dataset[a], dataset[b])[0][1]
        if c > threshold:
            correlated.add(b)
    return [c for c in columns if c not in correlated]

## Raw Into Prepared Pipeline
Gathering all created functions into data preparing pipeline.

In [16]:
sample = False
if U.on_kaggle():
    U.log('Loading test set only.')
    tst_data = pd.read_csv('/kaggle/input/data-science-bowl-2019/test.csv')
else:
    if sample:
        U.log('Warning: loading train and test data sample.')
        trn_data, _, _ = load_sample(Subset.Train, 500_000)
        [tst_data] = load_sample(Subset.Test, 500_000)
    else:
        U.log('Loading train and test.')
        trn_data, trn_spec, trn_targ = load(Subset.Train)
        [tst_data] = load(Subset.Test)

Loading train and test.
(11341042, 11) (17690, 7) (386, 3) (1156414, 11) 

In [17]:
transform = U.combine(
    partial(add_feature_combinations, pairs=[('title', 'event_code')]),
    partial(add_datetime, column='timestamp', prefix='ts'),
    partial(add_cyclical, prefix='ts'))

if U.on_kaggle():
    U.log('Transforming test data only.')
    X_tst = transform(tst_data.copy())
    U.log(X_tst.shape)
else:
    U.log('Transforming train and test data.')
    X_tst = transform(tst_data.copy())
    X_trn = transform(trn_data.copy())
    U.log(X_trn.shape, X_tst.shape)

Transforming train and test data.
(11341042, 31) (1156414, 31)


In [18]:
if U.on_kaggle():
    U.log('Reading pre-computed meta from disk.')
    meta = bundle.meta()
else:
    U.log('Computing meta using train and test datasets.')
    meta = compute_meta_data(X_trn, X_tst)
    U.log('Saving computed meta on disk.')
    bundle.save_meta(meta, 'meta')

Computing meta using train and test datasets.
Saving computed meta on disk.


In [19]:
extractor = FeaturesExtractor([
    CountingFeatures(meta),
    PerformanceFeatures(meta),
    CyclicFeatures(meta),
    TimestampFeatures(meta)
])
algo = InMemoryAlgorithm(extractor, meta, num_workers=12)
cat_cols = ['session_title']

In [20]:
if U.on_kaggle():
    U.log('Preparing test dataset.')
    X_tst = algo.run(X_tst, test=True)
    encoders = bundle.encoders()
    X_tst, _ = encode(X_tst, cat_cols, encoders=encoders)
else:
    U.log('Preparing train and test datasets.')
    X_trn = algo.run(X_trn)
    X_tst = algo.run(X_tst, test=True)
    X_trn, encoders = encode(X_trn, cat_cols)
    X_tst, _ = encode(X_tst, cat_cols, encoders=encoders)
    bundle.save(encoders, 'encoders')

Preparing train and test datasets.
Running algorithm in train mode.


HBox(children=(IntProgress(value=0, max=17000), HTML(value='')))


Running algorithm in test mode.


HBox(children=(IntProgress(value=0, max=1000), HTML(value='')))




In [21]:
if U.on_kaggle():
    U.log('Running post-processing on test set only.')
    add_user_wise_features(X_tst, meta)
else:
    U.log('Running post-processing on train and test sets.')
    add_user_wise_features(X_trn, meta)
    add_user_wise_features(X_tst, meta)

Running post-processing on train and test sets.


In [22]:
selector = FeatureSelection(
    rules=[
        ('nonzero', non_zero_rows_and_cols),
        ('uncorr', non_correlated_cols),
    ],
    ignore_cols=[
        'accuracy_group', 
        'installation_id', 
        'game_session'
    ]
)

if U.on_kaggle():
    U.log('Loading relevant features list from disk.')
    features = bundle.features()
else:
    U.log('Deriving relevant features from train dataset.')
    features = selector.select(X_trn)
    bundle.save(features, 'features')

Deriving relevant features from train dataset.
Excluding from consideration: ['accuracy_group', 'installation_id', 'game_session']
Applying feature selection rule: nonzero
Selected features: 1069 of 1086
Applying feature selection rule: uncorr


HBox(children=(IntProgress(value=0, max=585903), HTML(value='')))

  c /= stddev[:, None]
  c /= stddev[None, :]



Selected features: 578 of 1086
Keeping only features, selected by every rule.
Final number of features changed from 1086 to 564


## Modelling

In [118]:
class LightGBM:
    def __init__(self, config):
        self.model = lgb.LGBMRegressor(**config.get('model_params', {}))
        self.config = config
    def fit(self, train_data, valid_data, metric):
        x_trn, y_trn = train_data
        x_val, y_val = valid_data
        params = self.config.get('fit_params', {}).copy()
        params['eval_set'] = [(x_trn, y_trn), (x_val, y_val)]
        params['eval_names'] = ['trn', 'val']
        params['eval_metric'] = metric
        params['X'] = x_trn
        params['y'] = y_trn
        self.model.fit(**params)
    def predict(self, X):
        return self.model.predict(X)
    
MODEL_CONFIG = dict(
    lightgbm=dict(
        model_params=dict(
            n_estimators=2000,
            max_depth=15,
            metric='rmse',
            objective='regression',
            learning_rate=1e-2,
            feature_fraction=0.7,
            bagging_fraction=0.5,
            num_leaves=40,
            lambda_l1=1.0,
            lambda_l2=1.0
        ),
        fit_params=dict(
            early_stopping_rounds=100,
            verbose=100,
            categorical_feature='auto'
        )
    )
)

def get_default_config(name):
    return MODEL_CONFIG[name]

def get_model_class(name):
    if name == 'lightgbm': return LightGBM
    raise ValueError(f'unknown model class: {name}')

In [119]:
def inference(data, features, bounds, model='lightgbm', version='003', chunk_size=128):
    U.log(f'Running inference on dataset of shape: {len(features)}')
    indexes = np.arange(len(data))
    U.log(f'Loading external models: {model} v{version}.')
    models = bundle.models(model=model, version=version)
    preds = {i: [] for i, _ in enumerate(models)}
    U.log('Running models on test data...')
    for chunk in U.chunks(indexes, chunk_size):
        x_test = data[features].iloc[chunk]
        for i, model in enumerate(models):
            pred = model.predict(x_test).tolist()
            preds[i].extend(pred)
    U.log('Averaging ensemble predictions.')
    avg_preds = pd.DataFrame(preds).mean(axis=1).values
    U.log('Rounding predictions using optimal bounds.')
    y_hat = round_regressor_predictions(avg_preds, bounds)
    return y_hat

In [120]:
def submit(predicted, filename='submission.csv'):
    U.log('Converting predictions into submission file.')
    if U.on_kaggle():
        U.log('Running on Kaggle.')
        sample = pd.read_csv('/kaggle/input/data-science-bowl-2019/sample_submission.csv')
    else:
        U.log('Running locally.')
        [sample] = load(Subset.Sample)
    sample['accuracy_group'] = predicted.astype(int)
    sample.to_csv(filename, index=False)
    return filename

In [121]:
def train(dataset, features, reg_metric, algo='lightgbm', n_folds=5, config=None):
    models = []
    folds = GroupKFold(n_splits=n_folds)
    groups = dataset['installation_id']
    X = dataset[features].copy()
    y = dataset['accuracy_group']
    oof = np.zeros(X.shape[0], dtype=np.float32)
    cv = OrderedDict()
    model_cls = get_model_class(algo)
    metric = getattr(reg_metric, algo)
    
    for i, (trn_idx, val_idx) in enumerate(folds.split(X, y, groups), 1):
        U.log(f'Running k-fold {i} of {n_folds}')
        x_trn, y_trn = X.iloc[trn_idx], y.iloc[trn_idx]
        x_val, y_val = X.iloc[val_idx], y.iloc[val_idx]
        model = model_cls(config or get_default_config(algo))
        model.fit(train_data=(x_trn, y_trn), 
                  valid_data=(x_val, y_val), 
                  metric=metric)
        oof[val_idx] = model.predict(x_val)
        cv[f'cv_cappa_{i}'] = np.mean(reg_metric(y_val, oof[val_idx]))
        models.append(model)
        
    return U.named_tuple('Result', models=models, cv=cv, oof=oof)

In [122]:
class RegressionCappa:
    def __init__(self, bounds):
        self.bounds = bounds
    def __call__(self, y_true, y_pred):
        y_rounded = round_regressor_predictions(y_pred, self.bounds)
        metric = qwk(y_true, y_rounded)
        return metric
    def lightgbm(self, y_true, y_pred):
        return 'cappa', self(y_true, y_pred), True
    
def round_regressor_predictions(preds, coefs):
    x = preds.copy()
    for i, (lo, hi) in enumerate(zip(coefs[:-1], coefs[1:])):
        x[(x > lo) & (x <= hi)] = i
    return x

def optimize_rounding_bounds_with_scipy(X, y):
    def _loss(coef):
        buckets = pd.cut(X, [-np.inf] + list(np.sort(coef)) + [np.inf], labels=[0, 1, 2, 3])
        return -qwk(y, buckets)
    
    init_coef = [0.5, 1.5, 2.5]
    opt_coef = scipy.optimize.minimize(_loss, init_coef, method='nelder-mead')
    optimized = opt_coef['x']
    return [-np.inf] + optimized.tolist() + [np.inf]

#def optimize_rounding_bounds_with_perc(X, y):
#    X['accuracy_group']

In [126]:
from IPython.display import display
algo = 'lightgbm'
version = '008'

def get_features(dataset):
    cols = [c for c in dataset.columns 
            if c not in ('installation_id', 'game_session', 'accuracy_group')]
    return cols
    
if U.on_kaggle():
    features = get_features(X_tst)
    U.log('Inference on Kaggle.')
    predicted = inference(X_tst, features, bounds=bounds, model=algo, version=version)
    U.log('Saving predictions on disk.')
    filename = submit(predicted)
    submit_df = pd.read_csv(filename)
    U.log('First 20 submission rows:')
    display(submit_df.head(20))
    
else:
    features = get_features(X_trn)
    U.log('Training with sub-optimal rounding.')
    reg_metric = RegressionCappa([-np.inf, 1., 2., 3., +np.inf])
    result = train(X_trn, features, reg_metric, algo=algo)
    
    U.log('Using predictions to find optimal rounding boundaries.')
    opt_bounds = optimize_rounding_bounds(result.oof, X_trn['accuracy_group'])
    U.log(f'Optimal values: {opt_bounds}')
    
    U.log('Using optimal boundaries to train a new ensemble of models.')
    reg_metric = RegressionCappa(opt_bounds)
    result = train(X_trn, features, reg_metric, algo=algo)
    
    U.log('Saving the final results.')
    bundle.save(result.models, f'models_{algo}_{version}')
    bundle.save(opt_bounds, 'bounds')

Training with sub-optimal rounding.
Running k-fold 1 of 5
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.03936	trn's cappa: 0.357995	val's rmse: 1.06798	val's cappa: 0.324698
[200]	trn's rmse: 0.952503	trn's cappa: 0.500485	val's rmse: 1.00472	val's cappa: 0.451879
[300]	trn's rmse: 0.90973	trn's cappa: 0.531379	val's rmse: 0.986598	val's cappa: 0.469112
[400]	trn's rmse: 0.880103	trn's cappa: 0.550739	val's rmse: 0.980274	val's cappa: 0.477278
[500]	trn's rmse: 0.856114	trn's cappa: 0.565384	val's rmse: 0.976732	val's cappa: 0.478811
[600]	trn's rmse: 0.835787	trn's cappa: 0.578807	val's rmse: 0.974481	val's cappa: 0.476676
Early stopping, best iteration is:
[517]	trn's rmse: 0.852408	trn's cappa: 0.567859	val's rmse: 0.976228	val's cappa: 0.480484
Running k-fold 2 of 5
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.02992	trn's cappa: 0.369341	val's rmse: 1.06796	val's cappa: 0.316178
[200]	trn's rmse: 0.95087	trn

In [127]:
if not U.on_kaggle():
    # features = bundle.features()
    features = get_features(X_tst)
    bounds = bundle.bounds()
    filename = submit(inference(X_tst, features, bounds, model=algo, version=version))
    assert os.path.exists(filename)
    assert pd.read_csv(filename).shape[0] == 1000
    bundle.package(folder='/home/ck/data/bowl2019/external/')

Running inference on dataset of shape: 1083
Loading external models: lightgbm v008.
Running models on test data...
Averaging ensemble predictions.
Rounding predictions using optimal bounds.
Converting predictions into submission file.
Running locally.
(1000, 2) Packaging training results into dataset.
/tmp/bowl2019/meta.joblib --> /home/ck/data/bowl2019/external/meta.joblib
/tmp/bowl2019/models_lightgbm_007.joblib --> /home/ck/data/bowl2019/external/models_lightgbm_007.joblib
/tmp/bowl2019/bounds.joblib --> /home/ck/data/bowl2019/external/bounds.joblib
/tmp/bowl2019/features.joblib --> /home/ck/data/bowl2019/external/features.joblib
/tmp/bowl2019/encoders.joblib --> /home/ck/data/bowl2019/external/encoders.joblib
/tmp/bowl2019/models_lightgbm_008.joblib --> /home/ck/data/bowl2019/external/models_lightgbm_008.joblib
Packaging helper scripts into dataset.
../style.py --> /home/ck/data/bowl2019/external/style.py
../basedir.py --> /home/ck/data/bowl2019/external/basedir.py
../dataset.py --

In [None]:
# inference(X_tst, features, bounds=bounds, model=algo, version=version)

In [86]:
# common = dict(objective='regression', metric='rmse', n_estimators=10000)

# depth_config = dict(
#     learning_rate=[0.01],
#     max_depth=[5, 10, 25],
#     feature_fraction=[0.5, 0.7, 1.0],
#     bagging_fraction=[0.5, 0.7, 1.0],
#     num_leaves=[40, 60, 80, 100],
#     lambda_l1=[0.01, 0.1, 1.0],
#     lambda_l2=[0.01, 0.1, 1.0]
# )

# curr_conf = depth_config

In [87]:
# domains = []
# for key, values in curr_conf.items():
#     keys = [key] * len(values)
#     broadcasted = list(zip(keys, values))
#     domains.append(broadcasted)

In [None]:
# from itertools import product
# benchmark = []
# features = [f for f in X_trn.columns 
#             if f not in ('installation_id', 'accuracy_group', 'game_session')]
# fit_params = MODEL_CONFIG['lightgbm']['fit_params']
# fit_params['verbose'] = 100
# reg_metric = RegressionCappa(opt_bounds)
# for i, params in enumerate(product(*domains), 1):
#     U.log(f'[#{i}] Training with params:')
#     for k, v in params:
#         U.log(f'{k:20s}= {v}')
#     params = OrderedDict(params)
#     config = dict(model_params={**params, **common}, fit_params=fit_params)
#     results = train(X_trn, features, reg_metric, 
#                     algo=algo, config=config, n_folds=3)
#     params.update(results.cv)
#     benchmark.append(params)

[#1] Training with params:
learning_rate       = 0.01
max_depth           = 5
feature_fraction    = 0.5
bagging_fraction    = 0.5
num_leaves          = 40
lambda_l1           = 0.01
lambda_l2           = 0.01
Running k-fold 1 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.06356	trn's cappa: 0.465041	val's rmse: 1.10136	val's cappa: 0.418367
[200]	trn's rmse: 0.985699	trn's cappa: 0.619633	val's rmse: 1.0396	val's cappa: 0.569204
[300]	trn's rmse: 0.944986	trn's cappa: 0.658054	val's rmse: 1.01367	val's cappa: 0.592244
[400]	trn's rmse: 0.920713	trn's cappa: 0.672643	val's rmse: 1.00065	val's cappa: 0.60228
[500]	trn's rmse: 0.903412	trn's cappa: 0.681942	val's rmse: 0.994519	val's cappa: 0.605262
[600]	trn's rmse: 0.889803	trn's cappa: 0.694248	val's rmse: 0.991691	val's cappa: 0.605498
Early stopping, best iteration is:
[521]	trn's rmse: 0.900137	trn's cappa: 0.684845	val's rmse: 0.993775	val's cappa: 0.60768
Running k-fold 2 of 3
Training unti

[100]	trn's rmse: 1.07185	trn's cappa: 0.473651	val's rmse: 1.09256	val's cappa: 0.397727
[200]	trn's rmse: 0.990904	trn's cappa: 0.62308	val's rmse: 1.03102	val's cappa: 0.557386
[300]	trn's rmse: 0.947658	trn's cappa: 0.661322	val's rmse: 1.00554	val's cappa: 0.584563
[400]	trn's rmse: 0.920743	trn's cappa: 0.675916	val's rmse: 0.995451	val's cappa: 0.590922
[500]	trn's rmse: 0.902225	trn's cappa: 0.689164	val's rmse: 0.990729	val's cappa: 0.592419
[600]	trn's rmse: 0.886749	trn's cappa: 0.701944	val's rmse: 0.988927	val's cappa: 0.595963
[700]	trn's rmse: 0.873108	trn's cappa: 0.71267	val's rmse: 0.988196	val's cappa: 0.594956
Early stopping, best iteration is:
[608]	trn's rmse: 0.885708	trn's cappa: 0.702365	val's rmse: 0.988783	val's cappa: 0.596593
[#4] Training with params:
learning_rate       = 0.01
max_depth           = 5
feature_fraction    = 0.5
bagging_fraction    = 0.5
num_leaves          = 40
lambda_l1           = 0.1
lambda_l2           = 0.01
Running k-fold 1 of 3
Train

[200]	trn's rmse: 0.990929	trn's cappa: 0.622905	val's rmse: 1.03113	val's cappa: 0.556987
[300]	trn's rmse: 0.947718	trn's cappa: 0.661443	val's rmse: 1.00562	val's cappa: 0.583853
[400]	trn's rmse: 0.92067	trn's cappa: 0.676847	val's rmse: 0.995692	val's cappa: 0.589566
[500]	trn's rmse: 0.90205	trn's cappa: 0.68942	val's rmse: 0.991244	val's cappa: 0.589989
Early stopping, best iteration is:
[430]	trn's rmse: 0.914437	trn's cappa: 0.681244	val's rmse: 0.993633	val's cappa: 0.592192
[#7] Training with params:
learning_rate       = 0.01
max_depth           = 5
feature_fraction    = 0.5
bagging_fraction    = 0.5
num_leaves          = 40
lambda_l1           = 1.0
lambda_l2           = 0.01
Running k-fold 1 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.06419	trn's cappa: 0.464802	val's rmse: 1.10177	val's cappa: 0.417064
[200]	trn's rmse: 0.986421	trn's cappa: 0.619984	val's rmse: 1.0402	val's cappa: 0.569676
[300]	trn's rmse: 0.946014	trn's capp

[400]	trn's rmse: 0.921792	trn's cappa: 0.676311	val's rmse: 0.995467	val's cappa: 0.591342
[500]	trn's rmse: 0.903539	trn's cappa: 0.686385	val's rmse: 0.990849	val's cappa: 0.593311
Early stopping, best iteration is:
[445]	trn's rmse: 0.912974	trn's cappa: 0.681019	val's rmse: 0.992995	val's cappa: 0.594831
[#10] Training with params:
learning_rate       = 0.01
max_depth           = 5
feature_fraction    = 0.5
bagging_fraction    = 0.5
num_leaves          = 60
lambda_l1           = 0.01
lambda_l2           = 0.01
Running k-fold 1 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.06356	trn's cappa: 0.465041	val's rmse: 1.10136	val's cappa: 0.418367
[200]	trn's rmse: 0.985699	trn's cappa: 0.619633	val's rmse: 1.0396	val's cappa: 0.569204
[300]	trn's rmse: 0.944986	trn's cappa: 0.658054	val's rmse: 1.01367	val's cappa: 0.592244
[400]	trn's rmse: 0.920713	trn's cappa: 0.672643	val's rmse: 1.00065	val's cappa: 0.60228
[500]	trn's rmse: 0.903412	trn's 

[800]	trn's rmse: 0.858247	trn's cappa: 0.725149	val's rmse: 1.00547	val's cappa: 0.573852
Early stopping, best iteration is:
[743]	trn's rmse: 0.863321	trn's cappa: 0.720189	val's rmse: 1.0057	val's cappa: 0.574962
Running k-fold 3 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.07185	trn's cappa: 0.473651	val's rmse: 1.09256	val's cappa: 0.397727
[200]	trn's rmse: 0.990904	trn's cappa: 0.62308	val's rmse: 1.03102	val's cappa: 0.557386
[300]	trn's rmse: 0.947658	trn's cappa: 0.661322	val's rmse: 1.00554	val's cappa: 0.584563
[400]	trn's rmse: 0.920743	trn's cappa: 0.675916	val's rmse: 0.995451	val's cappa: 0.590922
[500]	trn's rmse: 0.902225	trn's cappa: 0.689164	val's rmse: 0.990729	val's cappa: 0.592419
[600]	trn's rmse: 0.886749	trn's cappa: 0.701944	val's rmse: 0.988927	val's cappa: 0.595963
[700]	trn's rmse: 0.873108	trn's cappa: 0.71267	val's rmse: 0.988196	val's cappa: 0.594956
Early stopping, best iteration is:
[608]	trn's rmse: 0.885708

Running k-fold 3 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.07189	trn's cappa: 0.473378	val's rmse: 1.09253	val's cappa: 0.397993
[200]	trn's rmse: 0.990929	trn's cappa: 0.622905	val's rmse: 1.03113	val's cappa: 0.556987
[300]	trn's rmse: 0.947718	trn's cappa: 0.661443	val's rmse: 1.00562	val's cappa: 0.583853
[400]	trn's rmse: 0.92067	trn's cappa: 0.676847	val's rmse: 0.995692	val's cappa: 0.589566
[500]	trn's rmse: 0.90205	trn's cappa: 0.68942	val's rmse: 0.991244	val's cappa: 0.589989
Early stopping, best iteration is:
[430]	trn's rmse: 0.914437	trn's cappa: 0.681244	val's rmse: 0.993633	val's cappa: 0.592192
[#16] Training with params:
learning_rate       = 0.01
max_depth           = 5
feature_fraction    = 0.5
bagging_fraction    = 0.5
num_leaves          = 60
lambda_l1           = 1.0
lambda_l2           = 0.01
Running k-fold 1 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.06419	trn's cappa: 0.

[200]	trn's rmse: 0.991588	trn's cappa: 0.623919	val's rmse: 1.03131	val's cappa: 0.556632
[300]	trn's rmse: 0.94856	trn's cappa: 0.66011	val's rmse: 1.00557	val's cappa: 0.583323
[400]	trn's rmse: 0.921792	trn's cappa: 0.676311	val's rmse: 0.995467	val's cappa: 0.591342
[500]	trn's rmse: 0.903539	trn's cappa: 0.686385	val's rmse: 0.990849	val's cappa: 0.593311
Early stopping, best iteration is:
[445]	trn's rmse: 0.912974	trn's cappa: 0.681019	val's rmse: 0.992995	val's cappa: 0.594831
[#19] Training with params:
learning_rate       = 0.01
max_depth           = 5
feature_fraction    = 0.5
bagging_fraction    = 0.5
num_leaves          = 80
lambda_l1           = 0.01
lambda_l2           = 0.01
Running k-fold 1 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.06356	trn's cappa: 0.465041	val's rmse: 1.10136	val's cappa: 0.418367
[200]	trn's rmse: 0.985699	trn's cappa: 0.619633	val's rmse: 1.0396	val's cappa: 0.569204
[300]	trn's rmse: 0.944986	trn's c

[600]	trn's rmse: 0.878548	trn's cappa: 0.70877	val's rmse: 1.00711	val's cappa: 0.572947
[700]	trn's rmse: 0.867292	trn's cappa: 0.716516	val's rmse: 1.00604	val's cappa: 0.573365
[800]	trn's rmse: 0.858247	trn's cappa: 0.725149	val's rmse: 1.00547	val's cappa: 0.573852
Early stopping, best iteration is:
[743]	trn's rmse: 0.863321	trn's cappa: 0.720189	val's rmse: 1.0057	val's cappa: 0.574962
Running k-fold 3 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.07185	trn's cappa: 0.473651	val's rmse: 1.09256	val's cappa: 0.397727
[200]	trn's rmse: 0.990904	trn's cappa: 0.62308	val's rmse: 1.03102	val's cappa: 0.557386
[300]	trn's rmse: 0.947658	trn's cappa: 0.661322	val's rmse: 1.00554	val's cappa: 0.584563
[400]	trn's rmse: 0.920743	trn's cappa: 0.675916	val's rmse: 0.995451	val's cappa: 0.590922
[500]	trn's rmse: 0.902225	trn's cappa: 0.689164	val's rmse: 0.990729	val's cappa: 0.592419
[600]	trn's rmse: 0.886749	trn's cappa: 0.701944	val's rmse: 0.

[700]	trn's rmse: 0.866142	trn's cappa: 0.718261	val's rmse: 1.00625	val's cappa: 0.57478
[800]	trn's rmse: 0.857033	trn's cappa: 0.726532	val's rmse: 1.00573	val's cappa: 0.574122
Early stopping, best iteration is:
[707]	trn's rmse: 0.865488	trn's cappa: 0.718917	val's rmse: 1.00623	val's cappa: 0.575329
Running k-fold 3 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.07189	trn's cappa: 0.473378	val's rmse: 1.09253	val's cappa: 0.397993
[200]	trn's rmse: 0.990929	trn's cappa: 0.622905	val's rmse: 1.03113	val's cappa: 0.556987
[300]	trn's rmse: 0.947718	trn's cappa: 0.661443	val's rmse: 1.00562	val's cappa: 0.583853
[400]	trn's rmse: 0.92067	trn's cappa: 0.676847	val's rmse: 0.995692	val's cappa: 0.589566
[500]	trn's rmse: 0.90205	trn's cappa: 0.68942	val's rmse: 0.991244	val's cappa: 0.589989
Early stopping, best iteration is:
[430]	trn's rmse: 0.914437	trn's cappa: 0.681244	val's rmse: 0.993633	val's cappa: 0.592192
[#25] Training with params:


Early stopping, best iteration is:
[564]	trn's rmse: 0.884621	trn's cappa: 0.704435	val's rmse: 1.00732	val's cappa: 0.573188
Running k-fold 3 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.07242	trn's cappa: 0.473006	val's rmse: 1.09277	val's cappa: 0.396633
[200]	trn's rmse: 0.991588	trn's cappa: 0.623919	val's rmse: 1.03131	val's cappa: 0.556632
[300]	trn's rmse: 0.94856	trn's cappa: 0.66011	val's rmse: 1.00557	val's cappa: 0.583323
[400]	trn's rmse: 0.921792	trn's cappa: 0.676311	val's rmse: 0.995467	val's cappa: 0.591342
[500]	trn's rmse: 0.903539	trn's cappa: 0.686385	val's rmse: 0.990849	val's cappa: 0.593311
Early stopping, best iteration is:
[445]	trn's rmse: 0.912974	trn's cappa: 0.681019	val's rmse: 0.992995	val's cappa: 0.594831
[#28] Training with params:
learning_rate       = 0.01
max_depth           = 5
feature_fraction    = 0.5
bagging_fraction    = 0.5
num_leaves          = 100
lambda_l1           = 0.01
lambda_l2           = 0.

[300]	trn's rmse: 0.938729	trn's cappa: 0.666004	val's rmse: 1.02395	val's cappa: 0.555815
[400]	trn's rmse: 0.911389	trn's cappa: 0.685873	val's rmse: 1.01329	val's cappa: 0.56657
[500]	trn's rmse: 0.893065	trn's cappa: 0.698588	val's rmse: 1.00885	val's cappa: 0.571208
[600]	trn's rmse: 0.878548	trn's cappa: 0.70877	val's rmse: 1.00711	val's cappa: 0.572947
[700]	trn's rmse: 0.867292	trn's cappa: 0.716516	val's rmse: 1.00604	val's cappa: 0.573365
[800]	trn's rmse: 0.858247	trn's cappa: 0.725149	val's rmse: 1.00547	val's cappa: 0.573852
Early stopping, best iteration is:
[743]	trn's rmse: 0.863321	trn's cappa: 0.720189	val's rmse: 1.0057	val's cappa: 0.574962
Running k-fold 3 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.07185	trn's cappa: 0.473651	val's rmse: 1.09256	val's cappa: 0.397727
[200]	trn's rmse: 0.990904	trn's cappa: 0.62308	val's rmse: 1.03102	val's cappa: 0.557386
[300]	trn's rmse: 0.947658	trn's cappa: 0.661322	val's rmse: 1.005

[400]	trn's rmse: 0.911127	trn's cappa: 0.684782	val's rmse: 1.01342	val's cappa: 0.566689
[500]	trn's rmse: 0.892649	trn's cappa: 0.698058	val's rmse: 1.00881	val's cappa: 0.570247
[600]	trn's rmse: 0.877802	trn's cappa: 0.71008	val's rmse: 1.0071	val's cappa: 0.572086
[700]	trn's rmse: 0.866142	trn's cappa: 0.718261	val's rmse: 1.00625	val's cappa: 0.57478
[800]	trn's rmse: 0.857033	trn's cappa: 0.726532	val's rmse: 1.00573	val's cappa: 0.574122
Early stopping, best iteration is:
[707]	trn's rmse: 0.865488	trn's cappa: 0.718917	val's rmse: 1.00623	val's cappa: 0.575329
Running k-fold 3 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.07189	trn's cappa: 0.473378	val's rmse: 1.09253	val's cappa: 0.397993
[200]	trn's rmse: 0.990929	trn's cappa: 0.622905	val's rmse: 1.03113	val's cappa: 0.556987
[300]	trn's rmse: 0.947718	trn's cappa: 0.661443	val's rmse: 1.00562	val's cappa: 0.583853
[400]	trn's rmse: 0.92067	trn's cappa: 0.676847	val's rmse: 0.995

[400]	trn's rmse: 0.912344	trn's cappa: 0.684352	val's rmse: 1.01326	val's cappa: 0.567768
[500]	trn's rmse: 0.893988	trn's cappa: 0.697713	val's rmse: 1.00878	val's cappa: 0.572462
[600]	trn's rmse: 0.879557	trn's cappa: 0.708177	val's rmse: 1.00671	val's cappa: 0.572102
Early stopping, best iteration is:
[564]	trn's rmse: 0.884621	trn's cappa: 0.704435	val's rmse: 1.00732	val's cappa: 0.573188
Running k-fold 3 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.07242	trn's cappa: 0.473006	val's rmse: 1.09277	val's cappa: 0.396633
[200]	trn's rmse: 0.991588	trn's cappa: 0.623919	val's rmse: 1.03131	val's cappa: 0.556632
[300]	trn's rmse: 0.94856	trn's cappa: 0.66011	val's rmse: 1.00557	val's cappa: 0.583323
[400]	trn's rmse: 0.921792	trn's cappa: 0.676311	val's rmse: 0.995467	val's cappa: 0.591342
[500]	trn's rmse: 0.903539	trn's cappa: 0.686385	val's rmse: 0.990849	val's cappa: 0.593311
Early stopping, best iteration is:
[445]	trn's rmse: 0.912974	

Running k-fold 2 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.066	trn's cappa: 0.480616	val's rmse: 1.10449	val's cappa: 0.390382
[200]	trn's rmse: 0.985673	trn's cappa: 0.625003	val's rmse: 1.05006	val's cappa: 0.528419
[300]	trn's rmse: 0.938729	trn's cappa: 0.666004	val's rmse: 1.02395	val's cappa: 0.555815
[400]	trn's rmse: 0.911389	trn's cappa: 0.685873	val's rmse: 1.01329	val's cappa: 0.56657
[500]	trn's rmse: 0.893065	trn's cappa: 0.698588	val's rmse: 1.00885	val's cappa: 0.571208
[600]	trn's rmse: 0.878548	trn's cappa: 0.70877	val's rmse: 1.00711	val's cappa: 0.572947
[700]	trn's rmse: 0.867292	trn's cappa: 0.716516	val's rmse: 1.00604	val's cappa: 0.573365
[800]	trn's rmse: 0.858247	trn's cappa: 0.725149	val's rmse: 1.00547	val's cappa: 0.573852
Early stopping, best iteration is:
[743]	trn's rmse: 0.863321	trn's cappa: 0.720189	val's rmse: 1.0057	val's cappa: 0.574962
Running k-fold 3 of 3
Training until validation scores don't improv

[100]	trn's rmse: 1.06607	trn's cappa: 0.480717	val's rmse: 1.10462	val's cappa: 0.389191
[200]	trn's rmse: 0.985457	trn's cappa: 0.626326	val's rmse: 1.05015	val's cappa: 0.531331
[300]	trn's rmse: 0.938432	trn's cappa: 0.667487	val's rmse: 1.0241	val's cappa: 0.553163
[400]	trn's rmse: 0.911127	trn's cappa: 0.684782	val's rmse: 1.01342	val's cappa: 0.566689
[500]	trn's rmse: 0.892649	trn's cappa: 0.698058	val's rmse: 1.00881	val's cappa: 0.570247
[600]	trn's rmse: 0.877802	trn's cappa: 0.71008	val's rmse: 1.0071	val's cappa: 0.572086
[700]	trn's rmse: 0.866142	trn's cappa: 0.718261	val's rmse: 1.00625	val's cappa: 0.57478
[800]	trn's rmse: 0.857033	trn's cappa: 0.726532	val's rmse: 1.00573	val's cappa: 0.574122
Early stopping, best iteration is:
[707]	trn's rmse: 0.865488	trn's cappa: 0.718917	val's rmse: 1.00623	val's cappa: 0.575329
Running k-fold 3 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.07189	trn's cappa: 0.473378	val's rmse: 1.0925

[100]	trn's rmse: 1.06677	trn's cappa: 0.478859	val's rmse: 1.10474	val's cappa: 0.391367
[200]	trn's rmse: 0.98643	trn's cappa: 0.625433	val's rmse: 1.05028	val's cappa: 0.52972
[300]	trn's rmse: 0.939754	trn's cappa: 0.66551	val's rmse: 1.02387	val's cappa: 0.554843
[400]	trn's rmse: 0.912344	trn's cappa: 0.684352	val's rmse: 1.01326	val's cappa: 0.567768
[500]	trn's rmse: 0.893988	trn's cappa: 0.697713	val's rmse: 1.00878	val's cappa: 0.572462
[600]	trn's rmse: 0.879557	trn's cappa: 0.708177	val's rmse: 1.00671	val's cappa: 0.572102
Early stopping, best iteration is:
[564]	trn's rmse: 0.884621	trn's cappa: 0.704435	val's rmse: 1.00732	val's cappa: 0.573188
Running k-fold 3 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.07242	trn's cappa: 0.473006	val's rmse: 1.09277	val's cappa: 0.396633
[200]	trn's rmse: 0.991588	trn's cappa: 0.623919	val's rmse: 1.03131	val's cappa: 0.556632
[300]	trn's rmse: 0.94856	trn's cappa: 0.66011	val's rmse: 1.00557

[400]	trn's rmse: 0.920853	trn's cappa: 0.677287	val's rmse: 0.995396	val's cappa: 0.591241
[500]	trn's rmse: 0.90219	trn's cappa: 0.688505	val's rmse: 0.990751	val's cappa: 0.591315
Early stopping, best iteration is:
[476]	trn's rmse: 0.906248	trn's cappa: 0.6857	val's rmse: 0.991525	val's cappa: 0.592527
[#54] Training with params:
learning_rate       = 0.01
max_depth           = 5
feature_fraction    = 0.5
bagging_fraction    = 0.7
num_leaves          = 60
lambda_l1           = 1.0
lambda_l2           = 1.0
Running k-fold 1 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.06468	trn's cappa: 0.463568	val's rmse: 1.1019	val's cappa: 0.419091
[200]	trn's rmse: 0.986898	trn's cappa: 0.619374	val's rmse: 1.04046	val's cappa: 0.57044
[300]	trn's rmse: 0.946473	trn's cappa: 0.654709	val's rmse: 1.01415	val's cappa: 0.595418
[400]	trn's rmse: 0.920859	trn's cappa: 0.673238	val's rmse: 1.00221	val's cappa: 0.599552
[500]	trn's rmse: 0.903184	trn's cappa

[400]	trn's rmse: 0.919865	trn's cappa: 0.678213	val's rmse: 0.995207	val's cappa: 0.590986
[500]	trn's rmse: 0.901668	trn's cappa: 0.689603	val's rmse: 0.990749	val's cappa: 0.591098
[600]	trn's rmse: 0.886292	trn's cappa: 0.701461	val's rmse: 0.988977	val's cappa: 0.593657
[700]	trn's rmse: 0.872865	trn's cappa: 0.713279	val's rmse: 0.987906	val's cappa: 0.593713
[800]	trn's rmse: 0.860898	trn's cappa: 0.722583	val's rmse: 0.98738	val's cappa: 0.593999
Early stopping, best iteration is:
[743]	trn's rmse: 0.867258	trn's cappa: 0.716978	val's rmse: 0.987522	val's cappa: 0.595518
[#57] Training with params:
learning_rate       = 0.01
max_depth           = 5
feature_fraction    = 0.5
bagging_fraction    = 0.7
num_leaves          = 80
lambda_l1           = 0.01
lambda_l2           = 1.0
Running k-fold 1 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.064	trn's cappa: 0.466282	val's rmse: 1.10165	val's cappa: 0.417056
[200]	trn's rmse: 0.986285	trn's

[400]	trn's rmse: 0.920149	trn's cappa: 0.678209	val's rmse: 0.995555	val's cappa: 0.593252
[500]	trn's rmse: 0.901525	trn's cappa: 0.68987	val's rmse: 0.990995	val's cappa: 0.593225
Early stopping, best iteration is:
[431]	trn's rmse: 0.913749	trn's cappa: 0.681574	val's rmse: 0.993686	val's cappa: 0.594659
[#60] Training with params:
learning_rate       = 0.01
max_depth           = 5
feature_fraction    = 0.5
bagging_fraction    = 0.7
num_leaves          = 80
lambda_l1           = 0.1
lambda_l2           = 1.0
Running k-fold 1 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.06404	trn's cappa: 0.465847	val's rmse: 1.1017	val's cappa: 0.417273
[200]	trn's rmse: 0.986171	trn's cappa: 0.619521	val's rmse: 1.0399	val's cappa: 0.570352
[300]	trn's rmse: 0.945529	trn's cappa: 0.656965	val's rmse: 1.01408	val's cappa: 0.59332
[400]	trn's rmse: 0.920919	trn's cappa: 0.67213	val's rmse: 1.00169	val's cappa: 0.599424
[500]	trn's rmse: 0.90332	trn's cappa:

[100]	trn's rmse: 1.07204	trn's cappa: 0.473879	val's rmse: 1.09246	val's cappa: 0.397849
[200]	trn's rmse: 0.991105	trn's cappa: 0.623229	val's rmse: 1.03121	val's cappa: 0.555722
[300]	trn's rmse: 0.947912	trn's cappa: 0.660466	val's rmse: 1.0053	val's cappa: 0.582694
[400]	trn's rmse: 0.920853	trn's cappa: 0.677287	val's rmse: 0.995396	val's cappa: 0.591241
[500]	trn's rmse: 0.90219	trn's cappa: 0.688505	val's rmse: 0.990751	val's cappa: 0.591315
Early stopping, best iteration is:
[476]	trn's rmse: 0.906248	trn's cappa: 0.6857	val's rmse: 0.991525	val's cappa: 0.592527
[#63] Training with params:
learning_rate       = 0.01
max_depth           = 5
feature_fraction    = 0.5
bagging_fraction    = 0.7
num_leaves          = 80
lambda_l1           = 1.0
lambda_l2           = 1.0
Running k-fold 1 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.06468	trn's cappa: 0.463568	val's rmse: 1.1019	val's cappa: 0.419091
[200]	trn's rmse: 0.986898	trn's cappa:

[100]	trn's rmse: 1.07126	trn's cappa: 0.475425	val's rmse: 1.09241	val's cappa: 0.398863
[200]	trn's rmse: 0.990198	trn's cappa: 0.623468	val's rmse: 1.03084	val's cappa: 0.555442
[300]	trn's rmse: 0.946881	trn's cappa: 0.661772	val's rmse: 1.00525	val's cappa: 0.582283
[400]	trn's rmse: 0.919865	trn's cappa: 0.678213	val's rmse: 0.995207	val's cappa: 0.590986
[500]	trn's rmse: 0.901668	trn's cappa: 0.689603	val's rmse: 0.990749	val's cappa: 0.591098
[600]	trn's rmse: 0.886292	trn's cappa: 0.701461	val's rmse: 0.988977	val's cappa: 0.593657
[700]	trn's rmse: 0.872865	trn's cappa: 0.713279	val's rmse: 0.987906	val's cappa: 0.593713
[800]	trn's rmse: 0.860898	trn's cappa: 0.722583	val's rmse: 0.98738	val's cappa: 0.593999
Early stopping, best iteration is:
[743]	trn's rmse: 0.867258	trn's cappa: 0.716978	val's rmse: 0.987522	val's cappa: 0.595518
[#66] Training with params:
learning_rate       = 0.01
max_depth           = 5
feature_fraction    = 0.5
bagging_fraction    = 0.7
num_leaves 

[100]	trn's rmse: 1.07135	trn's cappa: 0.474943	val's rmse: 1.09235	val's cappa: 0.39963
[200]	trn's rmse: 0.990245	trn's cappa: 0.623483	val's rmse: 1.0309	val's cappa: 0.556291
[300]	trn's rmse: 0.94707	trn's cappa: 0.660816	val's rmse: 1.00539	val's cappa: 0.581287
[400]	trn's rmse: 0.920149	trn's cappa: 0.678209	val's rmse: 0.995555	val's cappa: 0.593252
[500]	trn's rmse: 0.901525	trn's cappa: 0.68987	val's rmse: 0.990995	val's cappa: 0.593225
Early stopping, best iteration is:
[431]	trn's rmse: 0.913749	trn's cappa: 0.681574	val's rmse: 0.993686	val's cappa: 0.594659
[#69] Training with params:
learning_rate       = 0.01
max_depth           = 5
feature_fraction    = 0.5
bagging_fraction    = 0.7
num_leaves          = 100
lambda_l1           = 0.1
lambda_l2           = 1.0
Running k-fold 1 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.06404	trn's cappa: 0.465847	val's rmse: 1.1017	val's cappa: 0.417273
[200]	trn's rmse: 0.986171	trn's cappa

[600]	trn's rmse: 0.878512	trn's cappa: 0.7103	val's rmse: 1.00645	val's cappa: 0.572166
Early stopping, best iteration is:
[582]	trn's rmse: 0.881151	trn's cappa: 0.707843	val's rmse: 1.00675	val's cappa: 0.573584
Running k-fold 3 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.07204	trn's cappa: 0.473879	val's rmse: 1.09246	val's cappa: 0.397849
[200]	trn's rmse: 0.991105	trn's cappa: 0.623229	val's rmse: 1.03121	val's cappa: 0.555722
[300]	trn's rmse: 0.947912	trn's cappa: 0.660466	val's rmse: 1.0053	val's cappa: 0.582694
[400]	trn's rmse: 0.920853	trn's cappa: 0.677287	val's rmse: 0.995396	val's cappa: 0.591241
[500]	trn's rmse: 0.90219	trn's cappa: 0.688505	val's rmse: 0.990751	val's cappa: 0.591315
Early stopping, best iteration is:
[476]	trn's rmse: 0.906248	trn's cappa: 0.6857	val's rmse: 0.991525	val's cappa: 0.592527
[#72] Training with params:
learning_rate       = 0.01
max_depth           = 5
feature_fraction    = 0.5
bagging_fraction

[900]	trn's rmse: 0.847159	trn's cappa: 0.733603	val's rmse: 1.00516	val's cappa: 0.573022
Early stopping, best iteration is:
[816]	trn's rmse: 0.855014	trn's cappa: 0.727276	val's rmse: 1.00508	val's cappa: 0.572623
Running k-fold 3 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.07126	trn's cappa: 0.475425	val's rmse: 1.09241	val's cappa: 0.398863
[200]	trn's rmse: 0.990198	trn's cappa: 0.623468	val's rmse: 1.03084	val's cappa: 0.555442
[300]	trn's rmse: 0.946881	trn's cappa: 0.661772	val's rmse: 1.00525	val's cappa: 0.582283
[400]	trn's rmse: 0.919865	trn's cappa: 0.678213	val's rmse: 0.995207	val's cappa: 0.590986
[500]	trn's rmse: 0.901668	trn's cappa: 0.689603	val's rmse: 0.990749	val's cappa: 0.591098
[600]	trn's rmse: 0.886292	trn's cappa: 0.701461	val's rmse: 0.988977	val's cappa: 0.593657
[700]	trn's rmse: 0.872865	trn's cappa: 0.713279	val's rmse: 0.987906	val's cappa: 0.593713
[800]	trn's rmse: 0.860898	trn's cappa: 0.722583	val's rms

[700]	trn's rmse: 0.866502	trn's cappa: 0.718867	val's rmse: 1.00615	val's cappa: 0.572983
Early stopping, best iteration is:
[655]	trn's rmse: 0.870948	trn's cappa: 0.7153	val's rmse: 1.00636	val's cappa: 0.57368
Running k-fold 3 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.07135	trn's cappa: 0.474943	val's rmse: 1.09235	val's cappa: 0.39963
[200]	trn's rmse: 0.990245	trn's cappa: 0.623483	val's rmse: 1.0309	val's cappa: 0.556291
[300]	trn's rmse: 0.94707	trn's cappa: 0.660816	val's rmse: 1.00539	val's cappa: 0.581287
[400]	trn's rmse: 0.920149	trn's cappa: 0.678209	val's rmse: 0.995555	val's cappa: 0.593252
[500]	trn's rmse: 0.901525	trn's cappa: 0.68987	val's rmse: 0.990995	val's cappa: 0.593225
Early stopping, best iteration is:
[431]	trn's rmse: 0.913749	trn's cappa: 0.681574	val's rmse: 0.993686	val's cappa: 0.594659
[#78] Training with params:
learning_rate       = 0.01
max_depth           = 5
feature_fraction    = 0.5
bagging_fraction 

[300]	trn's rmse: 0.939207	trn's cappa: 0.666613	val's rmse: 1.02415	val's cappa: 0.552734
[400]	trn's rmse: 0.911497	trn's cappa: 0.684477	val's rmse: 1.01354	val's cappa: 0.567539
[500]	trn's rmse: 0.893328	trn's cappa: 0.698579	val's rmse: 1.00894	val's cappa: 0.572322
[600]	trn's rmse: 0.878512	trn's cappa: 0.7103	val's rmse: 1.00645	val's cappa: 0.572166
Early stopping, best iteration is:
[582]	trn's rmse: 0.881151	trn's cappa: 0.707843	val's rmse: 1.00675	val's cappa: 0.573584
Running k-fold 3 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.07204	trn's cappa: 0.473879	val's rmse: 1.09246	val's cappa: 0.397849
[200]	trn's rmse: 0.991105	trn's cappa: 0.623229	val's rmse: 1.03121	val's cappa: 0.555722
[300]	trn's rmse: 0.947912	trn's cappa: 0.660466	val's rmse: 1.0053	val's cappa: 0.582694
[400]	trn's rmse: 0.920853	trn's cappa: 0.677287	val's rmse: 0.995396	val's cappa: 0.591241
[500]	trn's rmse: 0.90219	trn's cappa: 0.688505	val's rmse: 0.99

[600]	trn's rmse: 0.877383	trn's cappa: 0.71081	val's rmse: 1.00684	val's cappa: 0.57184
[700]	trn's rmse: 0.865666	trn's cappa: 0.7187	val's rmse: 1.00569	val's cappa: 0.572686
[800]	trn's rmse: 0.85669	trn's cappa: 0.725339	val's rmse: 1.00519	val's cappa: 0.573293
[900]	trn's rmse: 0.847159	trn's cappa: 0.733603	val's rmse: 1.00516	val's cappa: 0.573022
Early stopping, best iteration is:
[816]	trn's rmse: 0.855014	trn's cappa: 0.727276	val's rmse: 1.00508	val's cappa: 0.572623
Running k-fold 3 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.07126	trn's cappa: 0.475425	val's rmse: 1.09241	val's cappa: 0.398863
[200]	trn's rmse: 0.990198	trn's cappa: 0.623468	val's rmse: 1.03084	val's cappa: 0.555442
[300]	trn's rmse: 0.946881	trn's cappa: 0.661772	val's rmse: 1.00525	val's cappa: 0.582283
[400]	trn's rmse: 0.919865	trn's cappa: 0.678213	val's rmse: 0.995207	val's cappa: 0.590986
[500]	trn's rmse: 0.901668	trn's cappa: 0.689603	val's rmse: 0.990

[400]	trn's rmse: 0.91066	trn's cappa: 0.68577	val's rmse: 1.01288	val's cappa: 0.567263
[500]	trn's rmse: 0.892417	trn's cappa: 0.698779	val's rmse: 1.00866	val's cappa: 0.57072
[600]	trn's rmse: 0.877923	trn's cappa: 0.710226	val's rmse: 1.007	val's cappa: 0.571651
[700]	trn's rmse: 0.866502	trn's cappa: 0.718867	val's rmse: 1.00615	val's cappa: 0.572983
Early stopping, best iteration is:
[655]	trn's rmse: 0.870948	trn's cappa: 0.7153	val's rmse: 1.00636	val's cappa: 0.57368
Running k-fold 3 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.07135	trn's cappa: 0.474943	val's rmse: 1.09235	val's cappa: 0.39963
[200]	trn's rmse: 0.990245	trn's cappa: 0.623483	val's rmse: 1.0309	val's cappa: 0.556291
[300]	trn's rmse: 0.94707	trn's cappa: 0.660816	val's rmse: 1.00539	val's cappa: 0.581287
[400]	trn's rmse: 0.920149	trn's cappa: 0.678209	val's rmse: 0.995555	val's cappa: 0.593252
[500]	trn's rmse: 0.901525	trn's cappa: 0.68987	val's rmse: 0.990995	val

Running k-fold 2 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.06621	trn's cappa: 0.479882	val's rmse: 1.1046	val's cappa: 0.392217
[200]	trn's rmse: 0.985839	trn's cappa: 0.624664	val's rmse: 1.05019	val's cappa: 0.52993
[300]	trn's rmse: 0.939207	trn's cappa: 0.666613	val's rmse: 1.02415	val's cappa: 0.552734
[400]	trn's rmse: 0.911497	trn's cappa: 0.684477	val's rmse: 1.01354	val's cappa: 0.567539
[500]	trn's rmse: 0.893328	trn's cappa: 0.698579	val's rmse: 1.00894	val's cappa: 0.572322
[600]	trn's rmse: 0.878512	trn's cappa: 0.7103	val's rmse: 1.00645	val's cappa: 0.572166
Early stopping, best iteration is:
[582]	trn's rmse: 0.881151	trn's cappa: 0.707843	val's rmse: 1.00675	val's cappa: 0.573584
Running k-fold 3 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.07204	trn's cappa: 0.473879	val's rmse: 1.09246	val's cappa: 0.397849
[200]	trn's rmse: 0.991105	trn's cappa: 0.623229	val's rmse: 1.03121	val'

[300]	trn's rmse: 0.937803	trn's cappa: 0.667259	val's rmse: 1.0236	val's cappa: 0.553296
[400]	trn's rmse: 0.910288	trn's cappa: 0.686456	val's rmse: 1.01294	val's cappa: 0.566039
[500]	trn's rmse: 0.891914	trn's cappa: 0.699501	val's rmse: 1.00888	val's cappa: 0.568789
[600]	trn's rmse: 0.877383	trn's cappa: 0.71081	val's rmse: 1.00684	val's cappa: 0.57184
[700]	trn's rmse: 0.865666	trn's cappa: 0.7187	val's rmse: 1.00569	val's cappa: 0.572686
[800]	trn's rmse: 0.85669	trn's cappa: 0.725339	val's rmse: 1.00519	val's cappa: 0.573293
[900]	trn's rmse: 0.847159	trn's cappa: 0.733603	val's rmse: 1.00516	val's cappa: 0.573022
Early stopping, best iteration is:
[816]	trn's rmse: 0.855014	trn's cappa: 0.727276	val's rmse: 1.00508	val's cappa: 0.572623
Running k-fold 3 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.07126	trn's cappa: 0.475425	val's rmse: 1.09241	val's cappa: 0.398863
[200]	trn's rmse: 0.990198	trn's cappa: 0.623468	val's rmse: 1.03084

[100]	trn's rmse: 1.0656	trn's cappa: 0.481769	val's rmse: 1.1042	val's cappa: 0.391778
[200]	trn's rmse: 0.985092	trn's cappa: 0.627115	val's rmse: 1.04975	val's cappa: 0.529682
[300]	trn's rmse: 0.937921	trn's cappa: 0.667051	val's rmse: 1.02372	val's cappa: 0.553039
[400]	trn's rmse: 0.91066	trn's cappa: 0.68577	val's rmse: 1.01288	val's cappa: 0.567263
[500]	trn's rmse: 0.892417	trn's cappa: 0.698779	val's rmse: 1.00866	val's cappa: 0.57072
[600]	trn's rmse: 0.877923	trn's cappa: 0.710226	val's rmse: 1.007	val's cappa: 0.571651
[700]	trn's rmse: 0.866502	trn's cappa: 0.718867	val's rmse: 1.00615	val's cappa: 0.572983
Early stopping, best iteration is:
[655]	trn's rmse: 0.870948	trn's cappa: 0.7153	val's rmse: 1.00636	val's cappa: 0.57368
Running k-fold 3 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.07135	trn's cappa: 0.474943	val's rmse: 1.09235	val's cappa: 0.39963
[200]	trn's rmse: 0.990245	trn's cappa: 0.623483	val's rmse: 1.0309	val's 

[700]	trn's rmse: 0.87759	trn's cappa: 0.702672	val's rmse: 0.990784	val's cappa: 0.609467
[800]	trn's rmse: 0.867295	trn's cappa: 0.711601	val's rmse: 0.990063	val's cappa: 0.608005
Early stopping, best iteration is:
[749]	trn's rmse: 0.872433	trn's cappa: 0.706316	val's rmse: 0.99054	val's cappa: 0.609911
Running k-fold 2 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.06621	trn's cappa: 0.479882	val's rmse: 1.1046	val's cappa: 0.392217
[200]	trn's rmse: 0.985839	trn's cappa: 0.624664	val's rmse: 1.05019	val's cappa: 0.52993
[300]	trn's rmse: 0.939207	trn's cappa: 0.666613	val's rmse: 1.02415	val's cappa: 0.552734
[400]	trn's rmse: 0.911497	trn's cappa: 0.684477	val's rmse: 1.01354	val's cappa: 0.567539
[500]	trn's rmse: 0.893328	trn's cappa: 0.698579	val's rmse: 1.00894	val's cappa: 0.572322
[600]	trn's rmse: 0.878512	trn's cappa: 0.7103	val's rmse: 1.00645	val's cappa: 0.572166
Early stopping, best iteration is:
[582]	trn's rmse: 0.881151	trn

Running k-fold 2 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.06559	trn's cappa: 0.482047	val's rmse: 1.10431	val's cappa: 0.392718
[200]	trn's rmse: 0.984807	trn's cappa: 0.627172	val's rmse: 1.04979	val's cappa: 0.531933
[300]	trn's rmse: 0.937803	trn's cappa: 0.667259	val's rmse: 1.0236	val's cappa: 0.553296
[400]	trn's rmse: 0.910288	trn's cappa: 0.686456	val's rmse: 1.01294	val's cappa: 0.566039
[500]	trn's rmse: 0.891914	trn's cappa: 0.699501	val's rmse: 1.00888	val's cappa: 0.568789
[600]	trn's rmse: 0.877383	trn's cappa: 0.71081	val's rmse: 1.00684	val's cappa: 0.57184
[700]	trn's rmse: 0.865666	trn's cappa: 0.7187	val's rmse: 1.00569	val's cappa: 0.572686
[800]	trn's rmse: 0.85669	trn's cappa: 0.725339	val's rmse: 1.00519	val's cappa: 0.573293
[900]	trn's rmse: 0.847159	trn's cappa: 0.733603	val's rmse: 1.00516	val's cappa: 0.573022
Early stopping, best iteration is:
[816]	trn's rmse: 0.855014	trn's cappa: 0.727276	val's rmse: 1.00508

[600]	trn's rmse: 0.889141	trn's cappa: 0.693555	val's rmse: 0.991112	val's cappa: 0.607953
Early stopping, best iteration is:
[544]	trn's rmse: 0.896568	trn's cappa: 0.68827	val's rmse: 0.99258	val's cappa: 0.610036
Running k-fold 2 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.0656	trn's cappa: 0.481769	val's rmse: 1.1042	val's cappa: 0.391778
[200]	trn's rmse: 0.985092	trn's cappa: 0.627115	val's rmse: 1.04975	val's cappa: 0.529682
[300]	trn's rmse: 0.937921	trn's cappa: 0.667051	val's rmse: 1.02372	val's cappa: 0.553039
[400]	trn's rmse: 0.91066	trn's cappa: 0.68577	val's rmse: 1.01288	val's cappa: 0.567263
[500]	trn's rmse: 0.892417	trn's cappa: 0.698779	val's rmse: 1.00866	val's cappa: 0.57072
[600]	trn's rmse: 0.877923	trn's cappa: 0.710226	val's rmse: 1.007	val's cappa: 0.571651
[700]	trn's rmse: 0.866502	trn's cappa: 0.718867	val's rmse: 1.00615	val's cappa: 0.572983
Early stopping, best iteration is:
[655]	trn's rmse: 0.870948	trn's c

[400]	trn's rmse: 0.920466	trn's cappa: 0.672715	val's rmse: 1.0022	val's cappa: 0.600049
[500]	trn's rmse: 0.902661	trn's cappa: 0.683304	val's rmse: 0.996022	val's cappa: 0.605856
[600]	trn's rmse: 0.889225	trn's cappa: 0.692751	val's rmse: 0.992873	val's cappa: 0.607161
[700]	trn's rmse: 0.87759	trn's cappa: 0.702672	val's rmse: 0.990784	val's cappa: 0.609467
[800]	trn's rmse: 0.867295	trn's cappa: 0.711601	val's rmse: 0.990063	val's cappa: 0.608005
Early stopping, best iteration is:
[749]	trn's rmse: 0.872433	trn's cappa: 0.706316	val's rmse: 0.99054	val's cappa: 0.609911
Running k-fold 2 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.06621	trn's cappa: 0.479882	val's rmse: 1.1046	val's cappa: 0.392217
[200]	trn's rmse: 0.985839	trn's cappa: 0.624664	val's rmse: 1.05019	val's cappa: 0.52993
[300]	trn's rmse: 0.939207	trn's cappa: 0.666613	val's rmse: 1.02415	val's cappa: 0.552734
[400]	trn's rmse: 0.911497	trn's cappa: 0.684477	val's rmse: 1

[200]	trn's rmse: 0.974843	trn's cappa: 0.635689	val's rmse: 1.04141	val's cappa: 0.538461
[300]	trn's rmse: 0.934503	trn's cappa: 0.664541	val's rmse: 1.02245	val's cappa: 0.557541
[400]	trn's rmse: 0.911174	trn's cappa: 0.682191	val's rmse: 1.01581	val's cappa: 0.563437
[500]	trn's rmse: 0.89303	trn's cappa: 0.695167	val's rmse: 1.01192	val's cappa: 0.566355
[600]	trn's rmse: 0.879848	trn's cappa: 0.708618	val's rmse: 1.01029	val's cappa: 0.567743
[700]	trn's rmse: 0.86972	trn's cappa: 0.716348	val's rmse: 1.00961	val's cappa: 0.567391
Early stopping, best iteration is:
[635]	trn's rmse: 0.876171	trn's cappa: 0.710907	val's rmse: 1.01016	val's cappa: 0.568983
Running k-fold 3 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.05387	trn's cappa: 0.524616	val's rmse: 1.07055	val's cappa: 0.473103
[200]	trn's rmse: 0.980992	trn's cappa: 0.62848	val's rmse: 1.01621	val's cappa: 0.573506
[300]	trn's rmse: 0.9434	trn's cappa: 0.656235	val's rmse: 0.9981

[700]	trn's rmse: 0.877968	trn's cappa: 0.703889	val's rmse: 0.99151	val's cappa: 0.605878
[800]	trn's rmse: 0.867206	trn's cappa: 0.71239	val's rmse: 0.990702	val's cappa: 0.60768
[900]	trn's rmse: 0.85794	trn's cappa: 0.720164	val's rmse: 0.990383	val's cappa: 0.60657
Early stopping, best iteration is:
[800]	trn's rmse: 0.867206	trn's cappa: 0.71239	val's rmse: 0.990702	val's cappa: 0.60768
Running k-fold 2 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.05095	trn's cappa: 0.534004	val's rmse: 1.08913	val's cappa: 0.450619
[200]	trn's rmse: 0.974825	trn's cappa: 0.63657	val's rmse: 1.04143	val's cappa: 0.537547
[300]	trn's rmse: 0.934649	trn's cappa: 0.664598	val's rmse: 1.02275	val's cappa: 0.554941
[400]	trn's rmse: 0.911037	trn's cappa: 0.682049	val's rmse: 1.01572	val's cappa: 0.563758
Early stopping, best iteration is:
[397]	trn's rmse: 0.911681	trn's cappa: 0.681394	val's rmse: 1.01578	val's cappa: 0.564345
Running k-fold 3 of 3
Training 

[200]	trn's rmse: 0.975786	trn's cappa: 0.634822	val's rmse: 1.0415	val's cappa: 0.539334
[300]	trn's rmse: 0.935655	trn's cappa: 0.664307	val's rmse: 1.02229	val's cappa: 0.556311
[400]	trn's rmse: 0.911318	trn's cappa: 0.681975	val's rmse: 1.01521	val's cappa: 0.564572
[500]	trn's rmse: 0.893143	trn's cappa: 0.694627	val's rmse: 1.01123	val's cappa: 0.568674
[600]	trn's rmse: 0.878901	trn's cappa: 0.707869	val's rmse: 1.00973	val's cappa: 0.569848
Early stopping, best iteration is:
[590]	trn's rmse: 0.880169	trn's cappa: 0.706858	val's rmse: 1.00993	val's cappa: 0.570226
Running k-fold 3 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.05452	trn's cappa: 0.524394	val's rmse: 1.07092	val's cappa: 0.47276
[200]	trn's rmse: 0.9819	trn's cappa: 0.627282	val's rmse: 1.01661	val's cappa: 0.569469
[300]	trn's rmse: 0.944391	trn's cappa: 0.655618	val's rmse: 0.998163	val's cappa: 0.588006
[400]	trn's rmse: 0.919463	trn's cappa: 0.673094	val's rmse: 0.99

Running k-fold 2 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.05093	trn's cappa: 0.533529	val's rmse: 1.08889	val's cappa: 0.450593
[200]	trn's rmse: 0.974843	trn's cappa: 0.635689	val's rmse: 1.04141	val's cappa: 0.538461
[300]	trn's rmse: 0.934503	trn's cappa: 0.664541	val's rmse: 1.02245	val's cappa: 0.557541
[400]	trn's rmse: 0.911174	trn's cappa: 0.682191	val's rmse: 1.01581	val's cappa: 0.563437
[500]	trn's rmse: 0.89303	trn's cappa: 0.695167	val's rmse: 1.01192	val's cappa: 0.566355
[600]	trn's rmse: 0.879848	trn's cappa: 0.708618	val's rmse: 1.01029	val's cappa: 0.567743
[700]	trn's rmse: 0.86972	trn's cappa: 0.716348	val's rmse: 1.00961	val's cappa: 0.567391
Early stopping, best iteration is:
[635]	trn's rmse: 0.876171	trn's cappa: 0.710907	val's rmse: 1.01016	val's cappa: 0.568983
Running k-fold 3 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.05387	trn's cappa: 0.524616	val's rmse: 1.07055	va

[500]	trn's rmse: 0.903738	trn's cappa: 0.68221	val's rmse: 0.996238	val's cappa: 0.604523
[600]	trn's rmse: 0.889781	trn's cappa: 0.693364	val's rmse: 0.992849	val's cappa: 0.605966
[700]	trn's rmse: 0.877968	trn's cappa: 0.703889	val's rmse: 0.99151	val's cappa: 0.605878
[800]	trn's rmse: 0.867206	trn's cappa: 0.71239	val's rmse: 0.990702	val's cappa: 0.60768
[900]	trn's rmse: 0.85794	trn's cappa: 0.720164	val's rmse: 0.990383	val's cappa: 0.60657
Early stopping, best iteration is:
[800]	trn's rmse: 0.867206	trn's cappa: 0.71239	val's rmse: 0.990702	val's cappa: 0.60768
Running k-fold 2 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.05095	trn's cappa: 0.534004	val's rmse: 1.08913	val's cappa: 0.450619
[200]	trn's rmse: 0.974825	trn's cappa: 0.63657	val's rmse: 1.04143	val's cappa: 0.537547
[300]	trn's rmse: 0.934649	trn's cappa: 0.664598	val's rmse: 1.02275	val's cappa: 0.554941
[400]	trn's rmse: 0.911037	trn's cappa: 0.682049	val's rmse: 1.01

Running k-fold 2 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.05179	trn's cappa: 0.530634	val's rmse: 1.08925	val's cappa: 0.4482
[200]	trn's rmse: 0.975786	trn's cappa: 0.634822	val's rmse: 1.0415	val's cappa: 0.539334
[300]	trn's rmse: 0.935655	trn's cappa: 0.664307	val's rmse: 1.02229	val's cappa: 0.556311
[400]	trn's rmse: 0.911318	trn's cappa: 0.681975	val's rmse: 1.01521	val's cappa: 0.564572
[500]	trn's rmse: 0.893143	trn's cappa: 0.694627	val's rmse: 1.01123	val's cappa: 0.568674
[600]	trn's rmse: 0.878901	trn's cappa: 0.707869	val's rmse: 1.00973	val's cappa: 0.569848
Early stopping, best iteration is:
[590]	trn's rmse: 0.880169	trn's cappa: 0.706858	val's rmse: 1.00993	val's cappa: 0.570226
Running k-fold 3 of 3
Training until validation scores don't improve for 100 rounds.
[100]	trn's rmse: 1.05452	trn's cappa: 0.524394	val's rmse: 1.07092	val's cappa: 0.47276
[200]	trn's rmse: 0.9819	trn's cappa: 0.627282	val's rmse: 1.01661	val's 

In [None]:
# pd.DataFrame(benchmark).to_csv('benchmark.csv', index=False)

In [107]:
# bench = pd.read_csv('benchmark.csv')
# cv_cols = bench.columns[bench.columns.str.startswith('cv_')]
# bench['cv_mean'] = bench[cv_cols].mean(axis=1)
# bench['cv_std'] = bench[cv_cols].std(axis=1)
# bench.sort_values(by='cv_mean', ascending=False, inplace=True)
# bench.head(20).reset_index(drop=True)

Unnamed: 0,learning_rate,max_depth,feature_fraction,bagging_fraction,num_leaves,lambda_l1,lambda_l2,cv_cappa_1,cv_cappa_2,cv_cappa_3,cv_mean,cv_std
0,0.01,25,0.7,0.5,40,1.0,0.1,0.612306,0.581767,0.605414,0.599829,0.016017
1,0.01,25,0.7,0.7,40,1.0,0.1,0.612306,0.581767,0.605414,0.599829,0.016017
2,0.01,25,0.7,1.0,40,1.0,0.1,0.612306,0.581767,0.605414,0.599829,0.016017
3,0.01,25,0.7,0.5,40,1.0,1.0,0.611266,0.58484,0.603072,0.599726,0.013527
4,0.01,25,0.7,0.7,40,1.0,1.0,0.611266,0.58484,0.603072,0.599726,0.013527
5,0.01,25,0.7,1.0,40,1.0,1.0,0.611266,0.58484,0.603072,0.599726,0.013527
6,0.01,25,0.5,0.5,40,1.0,1.0,0.61302,0.585292,0.600265,0.599526,0.013879
7,0.01,25,0.5,1.0,40,1.0,1.0,0.61302,0.585292,0.600265,0.599526,0.013879
8,0.01,25,0.5,0.7,40,1.0,1.0,0.61302,0.585292,0.600265,0.599526,0.013879
9,0.01,10,0.7,0.7,40,0.01,0.01,0.612273,0.581622,0.604435,0.599443,0.015924
