In [None]:
# !cp /kaggle/input/data-bowl-2019-external-data/*.py /kaggle/working

In [1]:
%reload_ext autoreload
%autoreload 2
import warnings
import jupytools.syspath
def ignore(*args, **kwargs): pass
warnings.warn = ignore
jupytools.syspath.add('..')

In [2]:
from collections import Counter, OrderedDict
from functools import partial

import numpy as np
import pandas as pd
from IPython.display import display
from sklearn.metrics import cohen_kappa_score
from sklearn.model_selection import GroupKFold
from tqdm.auto import tqdm

import bundle
import features as F
import selection
import utils as U
from dataset import load, load_sample, Subset
from encode import encode
from training import train, inference, submit, EnsembleTrainer, get_default_config
from meta import compute_meta_data
from metric import optimize_rounding_bounds, make_cappa_metric
from normalize import normalize

In [3]:
sample = False
if U.on_kaggle():
    U.log('Loading test set only.')
    tst_data = pd.read_csv('/kaggle/input/data-science-bowl-2019/test.csv')
else:
    if sample:
        U.log('Warning: loading train and test data sample.')
        trn_data, _, _ = load_sample(Subset.Train, 500_000)
        [tst_data] = load_sample(Subset.Test, 500_000)
    else:
        U.log('Loading train and test.')
        trn_data, trn_spec, trn_targ = load(Subset.Train)
        [tst_data] = load(Subset.Test)

Loading train and test.
(11341042, 11) (17690, 7) (386, 3) (1156414, 11) 

In [4]:
transform = U.combine(
    partial(F.add_feature_combinations, pairs=[('title', 'event_code'),
                                               ('title', 'world'),
                                               ('title', 'type'),
                                               ('world', 'type')]),
    partial(F.add_datetime, column='timestamp', prefix='ts'),
)

if U.on_kaggle():
    U.log('Transforming test data only.')
    X_tst = transform(tst_data.copy())
    U.log(X_tst.shape)
else:
    U.log('Transforming train and test data.')
    X_tst = transform(tst_data.copy())
    X_trn = transform(trn_data.copy())
    U.log(X_trn.shape, X_tst.shape)

Transforming train and test data.
(11341042, 22) (1156414, 22)


In [5]:
if U.on_kaggle():
    U.log('Reading pre-computed meta from disk.')
    meta = bundle.meta()
else:
    U.log('Computing meta using train and test datasets.')
    meta = compute_meta_data(X_trn, X_tst)
    U.log('Saving computed meta on disk.')
    bundle.save_meta(meta, 'meta')

Computing meta using train and test datasets.
Saving computed meta on disk.


In [8]:
extractor = F.FeaturesExtractor([
    F.CountingFeatures(meta),
    F.PerformanceFeatures(meta),
    F.VarietyFeatures(meta),
    F.EventDataFeatures(meta),
    F.FeedbackFeatures(meta),
    F.TimestampFeatures2(meta),
    # F.ZFeatures(meta)
])

algo = F.InMemoryAlgorithm(extractor, meta, num_workers=12)

cat_cols = ['session_title','cnt_most_freq_title', 'cnt_least_freq_title', 
            'cnt_most_freq_world', 'cnt_least_freq_world', 'cnt_most_freq_type', 
            'cnt_least_freq_type', 'cnt_most_freq_title_world', 
            'cnt_least_freq_title_world', 'cnt_most_freq_title_type', 
            'cnt_least_freq_title_type', 'cnt_most_freq_world_type', 
            'cnt_least_freq_world_type']

if U.on_kaggle():
    U.log('Preparing test dataset.')
    X_tst = algo.run(X_tst, test=True)
    encoders = bundle.encoders()
    X_tst, _ = encode(X_tst, cat_cols, encoders=encoders)
else:
    U.log('Preparing train and test datasets.')
    X_trn = algo.run(X_trn)
    X_tst = algo.run(X_tst, test=True)
    X_trn, encoders = encode(X_trn, cat_cols)
    X_tst, _ = encode(X_tst, cat_cols, encoders=encoders)
    bundle.save(encoders, 'encoders')

Preparing train and test datasets.
Running algorithm in train mode.


HBox(children=(FloatProgress(value=0.0, max=17000.0), HTML(value='')))


Running algorithm in test mode.


HBox(children=(FloatProgress(value=0.0, max=1000.0), HTML(value='')))




In [9]:
if U.on_kaggle():
    U.log('Running post-processing on test set only.')
    F.add_user_wise_features(X_tst, meta)
else:
    U.log('Running post-processing on train and test sets.')
    F.add_user_wise_features(X_trn, meta)
    F.add_user_wise_features(X_tst, meta)

Running post-processing on train and test sets.


In [13]:
te_features = list(np.unique(cat_cols + U.starts_with(X_trn.columns, 'cnt_')))

In [15]:
def encode_multi_target(dataset, feature, target='accuracy_group', smoothing=3.0):
    from category_encoders import TargetEncoder
    mappings = []
    for i in (1, 2, 3):
        target_col = dataset[target].map(lambda x: int(x == i))
        te = TargetEncoder(cols=[feature], smoothing=smoothing)
        te.fit(dataset, target_col)
        mapping = te.mapping[feature]
        mapping = {k-1: v for k, v in mapping.to_dict().items() if k > 0}
        dataset[f'te_{feature}_{i}'] = dataset[feature].map(mapping)
        mappings.append(mapping)
    return feature, mappings

if U.on_kaggle():
    te = bundle.target_encoders()
    for feature, mappings in te.items():
        for i, mapping in enumerate(mappings, 1):
            X_tst[f'te_{feature}_{i}'] = X_tst[feature].map(mapping)
else:
    target_encoders = {}
    for feature in tqdm(te_features, desc='Target Encoder'):
        feature, mappings = encode_multi_target(X_trn, feature=feature)
        target_encoders[feature] = mappings
    for feature, mappings in target_encoders.items():
        for i, mapping in enumerate(mappings, 1):
            X_tst[f'te_{feature}_{i}'] = X_tst[feature].map(mapping)
    bundle.save(target_encoders, 'target_encoders')

HBox(children=(FloatProgress(value=0.0, description='Target Encoder', max=995.0, style=ProgressStyle(descripti…




In [18]:
for feature, mappings in target_encoders.items():
    for i, mapping in enumerate(mappings, 1):
        X_trn[f'te_{feature}_{i}'] = X_trn[feature].map(mapping)

In [None]:
# if not U.on_kaggle():
#     with pd.option_context('display.max_rows', 2000):
#         display(X_trn.sample(3).T)

In [17]:
if not U.on_kaggle():
    X_trn.to_pickle('/tmp/X_trn.pickle')
    X_tst.to_pickle('/tmp/X_tst.pickle')

In [3]:
if not U.on_kaggle():
    X_trn = pd.read_pickle('/tmp/X_trn.pickle')
    X_tst = pd.read_pickle('/tmp/X_tst.pickle')

In [None]:
# group_col = 'installation_id'
# U.log(f'Normalizing dataset using column for grouping: {group_col}')
# norm_dataset = X_tst if U.on_kaggle() else X_trn
# cnt_cols = U.starts_with(norm_dataset.columns, 'cnt_')
# normalize(norm_dataset, cnt_cols, grouping_key=group_col, method='min-max')

## Features Selection

In [20]:
selector = selection.FeatureSelection(
    rules=[
        ('nonzero', selection.non_zero_rows_and_cols),
    ],
    ignore_cols=[
        'accuracy_group', 
        'installation_id', 
        'game_session'
    ]
)
if U.on_kaggle():
    U.log('Loading relevant features list from disk.')
    features = bundle.features()
else:
    U.log('Deriving relevant features from train dataset.')
    features = selector.select(X_trn)
    bundle.save(features, 'features')

Deriving relevant features from train dataset.
Excluding from consideration: ['accuracy_group', 'installation_id', 'game_session']
Applying feature selection rule: nonzero
Selected features: 4091 of 4127
Keeping only features, selected by every rule.
Final number of features changed from 4127 to 4091


## Submission

In [None]:
bundle.save({
    'rounders': rounders, 
    'ensemble': [result.models for result in ensemble]
}, key='lightgbm')

In [14]:
bundle.package(folder='/home/ck/data/bowl2019/external/')

Packaging training results into dataset.
/tmp/bowl2019/meta.joblib --> /home/ck/data/bowl2019/external/meta.joblib
/tmp/bowl2019/lightgbm.joblib --> /home/ck/data/bowl2019/external/lightgbm.joblib
/tmp/bowl2019/target_encoders.joblib --> /home/ck/data/bowl2019/external/target_encoders.joblib
/tmp/bowl2019/bounds.joblib --> /home/ck/data/bowl2019/external/bounds.joblib
/tmp/bowl2019/features.joblib --> /home/ck/data/bowl2019/external/features.joblib
/tmp/bowl2019/encoders.joblib --> /home/ck/data/bowl2019/external/encoders.joblib
/tmp/bowl2019/models_lightgbm_24.joblib --> /home/ck/data/bowl2019/external/models_lightgbm_24.joblib
/tmp/bowl2019/models_lightgbm_023.joblib --> /home/ck/data/bowl2019/external/models_lightgbm_023.joblib
Packaging helper scripts into dataset.
../selection.py --> /home/ck/data/bowl2019/external/selection.py
../encode.py --> /home/ck/data/bowl2019/external/encode.py
../features.py --> /home/ck/data/bowl2019/external/features.py
../training.py --> /home/ck/data/

'/home/ck/data/bowl2019/external/'

In [None]:
if U.on_kaggle():
    U.log('Inference on Kaggle.')
    features = bundle.features()
    lightgbm = bundle.load('lightgbm')
    labels = []
    U.log('Running model predictions.')
    for rounder in lightgbm['rounders']:
        U.log(f'Using rounding method: {rounder.method}')
        for models in lightgbm['ensemble']:
            for model in models:
                model_pred = model.predict(X_tst[features])
                model_labels = rounder.predict(model_pred)
                labels.append(model_labels)
    labels = np.column_stack(labels)
    predictions = []
    U.log('Majority vote.')
    for label in labels:
        majority = Counter(label).most_common(1)[0][0]
        predictions.append(majority)
    U.log('Saving predictions on disk.')
    filename = submit(predictions)
    submit_df = pd.read_csv(filename)
    U.log('First 20 submission rows:')
    display(submit_df.head(20))
            
    #features = bundle.features()
    #bounds = bundle.bounds()
    #predicted = inference(X_tst, features, bounds=bounds, model=algo, version=version)
    #U.log('Saving predictions on disk.')
    #filename = submit(predicted)
    #submit_df = pd.read_csv(filename)
    #U.log('First 20 submission rows:')
    #display(submit_df.head(20))

In [None]:
rounders = [Rounder(method='dist'), Rounder(method='optimal')]
labels = []
for rounder in rounders:
    for i, result in enumerate(ensemble, 1):
        rounder.fit(X_trn['accuracy_group'], result.oof)
        for j, model in enumerate(result.models, 1):
            U.log(f'Rounder: {rounder.method}, ensemble #{i}, model #{j}')
            model_pred = model.predict(X_tst[features])
            model_labels = rounder.predict(model_pred)
            labels.append(model_labels)

In [6]:
from rounding import Rounder

In [21]:
def seed_all(value):
    import random
    random.seed(value)
    np.random.seed(value)

In [4]:
features = bundle.features()

In [None]:
ensemble = []
U.log('Start training an ensemble of models with various random seeds.')
trainer = EnsembleTrainer(algo='lightgbm', eval_metric='rmse')
fold = GroupKFold(n_splits=5)
for i in range(5):
    seed = np.random.randint(0, 1001)
    U.log('=' * 80)
    U.log(f'Training model with random seed: {seed}')
    seed_all(seed)
    config = get_default_config('lightgbm')
    U.set_nested(config, 'model_params.seed', seed)
    U.set_nested(config, 'model_params.feature_fraction', 0.4)
    U.set_nested(config, 'model_params.bagging_fraction', 0.6)
    U.set_nested(config, 'model_params.bagging_freq', 1)
    U.set_nested(config, 'model_params.learning_rate', 0.003)
    U.set_nested(config, 'model_params.n_estimators', 10000)
    U.set_nested(config, 'fit_params.verbose', 250)
    U.set_nested(config, 'fit_params.early_stopping_rounds', 250)
    result = trainer.train(X_trn, features=features, fold=fold, config=config)
    ensemble.append(result)

Start training an ensemble of models with various random seeds.
Training model with random seed: 91
Running k-fold 1 of 5
Training until validation scores don't improve for 250 rounds.
[250]	trn's rmse: 1.06746	val's rmse: 1.08523
[500]	trn's rmse: 0.987306	val's rmse: 1.02252
[750]	trn's rmse: 0.944411	val's rmse: 0.997515
[1000]	trn's rmse: 0.915548	val's rmse: 0.986274
[1250]	trn's rmse: 0.892956	val's rmse: 0.980646
[1500]	trn's rmse: 0.873281	val's rmse: 0.977601
[1750]	trn's rmse: 0.855888	val's rmse: 0.975981
[2000]	trn's rmse: 0.839793	val's rmse: 0.974742


In [None]:
rounders = [Rounder(method='dist'), Rounder(method='optimal')]
labels = []
for rounder in rounders:
    for i, result in enumerate(ensemble, 1):
        rounder.fit(X_trn['accuracy_group'], result.oof)
        for j, model in enumerate(result.models, 1):
            U.log(f'Rounder: {rounder.method}, ensemble #{i}, model #{j}')
            model_pred = model.predict(X_tst[features])
            model_labels = rounder.predict(model_pred)
            labels.append(model_labels)

In [None]:
bundle.save({
    'rounders': rounders, 
    'ensemble': [result.models for result in ensemble]
}, key='lightgbm')

In [11]:
from multiprocessing import Queue, Process

In [None]:
ensemble = []
U.log('Start training an ensemble of models with various random seeds.')
trainer = EnsembleTrainer(algo='xgboost', eval_metric='rmse')
fold = GroupKFold(n_splits=5)
for i in range(5):
    seed = np.random.randint(0, 1001)
    U.log('=' * 80)
    U.log(f'Training model with random seed: {seed}')
    seed_all(seed)
    config = get_default_config('xgboost')
    U.set_nested(config, 'model_params.seed', seed)
    U.set_nested(config, 'model_params.max_depth', 6)
    U.set_nested(config, 'model_params.subsample', 0.5)
    U.set_nested(config, 'model_params.colsample_bytree', 0.8)
    U.set_nested(config, 'model_params.colsample_bylevel', 0.6)
    U.set_nested(config, 'model_params.colsample_bynode', 0.5)
    U.set_nested(config, 'model_params.learning_rate', 0.003)
    U.set_nested(config, 'model_params.n_estimators', 5000)
    U.set_nested(config, 'model_params.lambda', 1)
    U.set_nested(config, 'model_params.alpha', 1)
    U.set_nested(config, 'model_params.objective', 'reg:squarederror')
    # U.set_nested(config, 'model_params.gpu_id', 1)
    U.set_nested(config, 'model_params.nthread', 12)
    U.set_nested(config, 'model_params.tree_method', 'hist')
    U.set_nested(config, 'fit_params.verbose', 100)
    U.set_nested(config, 'fit_params.early_stopping_rounds', 100)
    result = trainer.train(X_trn, features=features, fold=fold, config=config)
    ensemble.append(result)

Start training an ensemble of models with various random seeds.
Training model with random seed: 875
Running k-fold 1 of 5
[0]	validation_0-rmse:1.86716	validation_1-rmse:1.86478
Multiple eval metrics have been passed: 'validation_1-rmse' will be used for early stopping.

Will train until validation_1-rmse hasn't improved in 100 rounds.
[100]	validation_0-rmse:1.53987	validation_1-rmse:1.5447
[200]	validation_0-rmse:1.3199	validation_1-rmse:1.33309
[300]	validation_0-rmse:1.17516	validation_1-rmse:1.19777
[400]	validation_0-rmse:1.08176	validation_1-rmse:1.1134
[500]	validation_0-rmse:1.0218	validation_1-rmse:1.0622
[600]	validation_0-rmse:0.982178	validation_1-rmse:1.03073
[700]	validation_0-rmse:0.954875	validation_1-rmse:1.01114
[800]	validation_0-rmse:0.935723	validation_1-rmse:0.999103
[900]	validation_0-rmse:0.921191	validation_1-rmse:0.991458
[1000]	validation_0-rmse:0.909429	validation_1-rmse:0.986045
[1100]	validation_0-rmse:0.899873	validation_1-rmse:0.982395
[1200]	validatio

[700]	validation_0-rmse:0.946975	validation_1-rmse:1.04265
[800]	validation_0-rmse:0.92722	validation_1-rmse:1.03273
[900]	validation_0-rmse:0.912319	validation_1-rmse:1.02636
[1000]	validation_0-rmse:0.900447	validation_1-rmse:1.0224
[1100]	validation_0-rmse:0.890746	validation_1-rmse:1.01983
[1200]	validation_0-rmse:0.882211	validation_1-rmse:1.01781
[1300]	validation_0-rmse:0.874735	validation_1-rmse:1.01604
[1400]	validation_0-rmse:0.867582	validation_1-rmse:1.0149
[1500]	validation_0-rmse:0.861279	validation_1-rmse:1.01391
[1600]	validation_0-rmse:0.855259	validation_1-rmse:1.01324
[1700]	validation_0-rmse:0.849783	validation_1-rmse:1.01267
[1800]	validation_0-rmse:0.844541	validation_1-rmse:1.01224
[1900]	validation_0-rmse:0.83932	validation_1-rmse:1.01183
[2000]	validation_0-rmse:0.834461	validation_1-rmse:1.01159
[2100]	validation_0-rmse:0.829706	validation_1-rmse:1.01123
[2200]	validation_0-rmse:0.825177	validation_1-rmse:1.01112
[2300]	validation_0-rmse:0.82066	validation_1-r

Stopping. Best iteration:
[2300]	validation_0-rmse:0.826902	validation_1-rmse:0.993519

Running k-fold 5 of 5
[0]	validation_0-rmse:1.8688	validation_1-rmse:1.85834
Multiple eval metrics have been passed: 'validation_1-rmse' will be used for early stopping.

Will train until validation_1-rmse hasn't improved in 100 rounds.
[100]	validation_0-rmse:1.5387	validation_1-rmse:1.5453
[200]	validation_0-rmse:1.31682	validation_1-rmse:1.34134
[300]	validation_0-rmse:1.17066	validation_1-rmse:1.21252
[400]	validation_0-rmse:1.07598	validation_1-rmse:1.13372
[500]	validation_0-rmse:1.01446	validation_1-rmse:1.08652
[600]	validation_0-rmse:0.974272	validation_1-rmse:1.05906
[700]	validation_0-rmse:0.946742	validation_1-rmse:1.04271
[800]	validation_0-rmse:0.927338	validation_1-rmse:1.03273
[900]	validation_0-rmse:0.912528	validation_1-rmse:1.02677
[1000]	validation_0-rmse:0.900676	validation_1-rmse:1.02289
[1100]	validation_0-rmse:0.890753	validation_1-rmse:1.02023
[1200]	validation_0-rmse:0.8819

[1100]	validation_0-rmse:0.89515	validation_1-rmse:1.00328
[1200]	validation_0-rmse:0.886525	validation_1-rmse:1.00076
[1300]	validation_0-rmse:0.879211	validation_1-rmse:0.999228
[1400]	validation_0-rmse:0.872431	validation_1-rmse:0.997996
[1500]	validation_0-rmse:0.866284	validation_1-rmse:0.996891
[1600]	validation_0-rmse:0.860377	validation_1-rmse:0.996053
[1700]	validation_0-rmse:0.854955	validation_1-rmse:0.995419
[1800]	validation_0-rmse:0.849975	validation_1-rmse:0.994796
[1900]	validation_0-rmse:0.844831	validation_1-rmse:0.994379
[2000]	validation_0-rmse:0.840072	validation_1-rmse:0.994109
[2100]	validation_0-rmse:0.835304	validation_1-rmse:0.993533
[2200]	validation_0-rmse:0.83098	validation_1-rmse:0.99316
[2300]	validation_0-rmse:0.826652	validation_1-rmse:0.992864
[2400]	validation_0-rmse:0.82232	validation_1-rmse:0.992755
[2500]	validation_0-rmse:0.818013	validation_1-rmse:0.992466
[2600]	validation_0-rmse:0.813778	validation_1-rmse:0.992379
[2700]	validation_0-rmse:0.809

[1100]	validation_0-rmse:0.901355	validation_1-rmse:0.986747
[1200]	validation_0-rmse:0.893271	validation_1-rmse:0.984449
[1300]	validation_0-rmse:0.886119	validation_1-rmse:0.98285
[1400]	validation_0-rmse:0.879181	validation_1-rmse:0.98145
[1500]	validation_0-rmse:0.873083	validation_1-rmse:0.980405
[1600]	validation_0-rmse:0.867079	validation_1-rmse:0.979536
[1700]	validation_0-rmse:0.861658	validation_1-rmse:0.979025
[1800]	validation_0-rmse:0.8564	validation_1-rmse:0.978632
[1900]	validation_0-rmse:0.851767	validation_1-rmse:0.978208
[2000]	validation_0-rmse:0.846917	validation_1-rmse:0.977889
[2100]	validation_0-rmse:0.842615	validation_1-rmse:0.977678
[2200]	validation_0-rmse:0.838117	validation_1-rmse:0.977463
[2300]	validation_0-rmse:0.833829	validation_1-rmse:0.977541
Stopping. Best iteration:
[2214]	validation_0-rmse:0.837582	validation_1-rmse:0.977401

Running k-fold 4 of 5
[0]	validation_0-rmse:1.87449	validation_1-rmse:1.83516
Multiple eval metrics have been passed: 'vali

In [None]:
rounders = [Rounder(method='dist'), Rounder(method='optimal')]
labels = []
for rounder in rounders:
    for i, result in enumerate(ensemble, 1):
        rounder.fit(X_trn['accuracy_group'], result.oof)
        for j, model in enumerate(result.models, 1):
            U.log(f'Rounder: {rounder.method}, ensemble #{i}, model #{j}')
            model_pred = model.predict(X_tst[features])
            model_labels = rounder.predict(model_pred)
            labels.append(model_labels)

In [None]:
bundle.save({
    'rounders': rounders, 
    'ensemble': [result.models for result in ensemble]
}, key='xgboost')

In [None]:
algo = 'lightgbm'
version = '024'

if U.on_kaggle():
    U.log('Inference on Kaggle.')
    features = bundle.features()
    bounds = bundle.bounds()
    predicted = inference(X_tst, features, bounds=bounds, model=algo, version=version)
    U.log('Saving predictions on disk.')
    filename = submit(predicted)
    submit_df = pd.read_csv(filename)
    U.log('First 20 submission rows:')
    display(submit_df.head(20))
    
else:
    U.log(f'Training model: {algo}')
    cappa = make_cappa_metric(X_trn['accuracy_group'])
    trainer = EnsembleTrainer(algo=algo, cv_metrics={'cappa': cappa})
    fold = GroupKFold(n_splits=5)
    config = get_default_config(algo)
    U.set_nested(config, 'model_params.feature_fraction', 0.4)
    U.set_nested(config, 'model_params.bagging_fraction', 0.5)
    U.set_nested(config, 'model_params.bagging_freq', 1)
    U.set_nested(config, 'model_params.learning_rate', 0.003)
    result = trainer.train(X_trn, features=features, fold=fold, config=config)
    U.log('Saving the trained models')
    bundle.save(result.models, f'models_{algo}_{version}')
    U.log('Saving the optimal rounding bounds')
    bounds = optimize_rounding_bounds(result.oof, X_trn['accuracy_group'].values)
    U.log(f'Optimal bounds: {bounds}')
    bundle.save(bounds, 'bounds')
    U.log(f'Final features number: {len(features)}')
    bundle.save(features, 'features')

In [None]:
if not U.on_kaggle():
    import os
    features = bundle.features()
    bounds = bundle.bounds()
    filename = submit(inference(X_tst, features, bounds, model=algo, version=version))
    assert os.path.exists(filename)
    assert pd.read_csv(filename).shape[0] == 1000
    bundle.package(folder='/home/ck/data/bowl2019/external/')

In [None]:
%matplotlib inline

In [None]:
pd.read_csv('submission.csv')['accuracy_group'].value_counts().reset_index().sort_values(by='index').set_index('index').plot.bar()

In [None]:
X_trn['accuracy_group'].value_counts().reset_index().sort_values(by='index').set_index('index').plot.bar()

## Stack Many Models

In [16]:
bundle.package(folder='/home/ck/data/bowl2019/external/')

Packaging training results into dataset.
/tmp/bowl2019/meta.joblib --> /home/ck/data/bowl2019/external/meta.joblib
/tmp/bowl2019/lightgbm.joblib --> /home/ck/data/bowl2019/external/lightgbm.joblib
/tmp/bowl2019/target_encoders.joblib --> /home/ck/data/bowl2019/external/target_encoders.joblib
/tmp/bowl2019/bounds.joblib --> /home/ck/data/bowl2019/external/bounds.joblib
/tmp/bowl2019/xgboost.joblib --> /home/ck/data/bowl2019/external/xgboost.joblib
/tmp/bowl2019/features.joblib --> /home/ck/data/bowl2019/external/features.joblib
/tmp/bowl2019/encoders.joblib --> /home/ck/data/bowl2019/external/encoders.joblib
/tmp/bowl2019/models_lightgbm_24.joblib --> /home/ck/data/bowl2019/external/models_lightgbm_24.joblib
/tmp/bowl2019/models_lightgbm_023.joblib --> /home/ck/data/bowl2019/external/models_lightgbm_023.joblib
Packaging helper scripts into dataset.
../selection.py --> /home/ck/data/bowl2019/external/selection.py
../encode.py --> /home/ck/data/bowl2019/external/encode.py
../features.py -

'/home/ck/data/bowl2019/external/'