In [None]:
import cudf
import cupy
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from catboost import CatBoost
from catboost import Pool
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from catboost import CatBoostClassifier


In [None]:
def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.
    """
    start_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))

    for col in df.columns:
        col_type = df[col].dtype.name

        if col_type not in ['object', 'category', 'datetime64[ns, UTC]']:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))

    return df

In [None]:
def create_adversarial_data(df_train, df_test, cols, N_val=70000):
    adversarial_val = df_train[cols].append(df_test[cols], ignore_index=True).sample(N_val, replace=False)
    adversarial_train = df_train[cols].append(df_test[cols], ignore_index=True)[
        ~df_train[cols].append(df_test[cols], ignore_index=True).index.isin(adversarial_val.index)
    ]
    del df_train
    del df_test
    return adversarial_train, adversarial_val

This work inspired me to create this notebook - https://www.kaggle.com/code/zakopur0/adversarial-validation-private-vs-public/notebook

## This notebook is an extension of the notebooks: https://www.kaggle.com/code/mikhaildonskoy/looking-for-risky-features-in-train-data, https://www.kaggle.com/code/mikhaildonskoy/looking-for-risky-features-in-first-rows

## In this work, I want to find out which features differ the most on the test and training dataset, which can lead to overfitting 
## Unlike my previous work, I will look for risky features in aggregated data.

# Data preparation

In [None]:
train = reduce_mem_usage(pd.read_pickle('../input/amex-agg-data-pickle/train_agg.pkl', compression="gzip").sample(100000))
test = reduce_mem_usage(pd.read_pickle('../input/amex-agg-data-pickle/test_agg.pkl', compression="gzip").sample(100000))

In [None]:
train["is_train"] = 1
test["is_train"] = 0
target = ['is_train']
drop_cols = ['S_2','customer_ID',"target","is_train"]

features = test.columns.to_list()
cat_features = ["B_30","B_38","D_114", "D_116","D_117","D_120","D_126","D_63", "D_64","D_66", "D_68"]

cat_cols = [f"{cf}_last" for cf in cat_features]            
use_cols = [c for c in train.columns if c not in drop_cols]

adversarial_train, adversarial_test = create_adversarial_data(train, test, features)
del train
del test

In [None]:
adversarial_train[cat_cols] = adversarial_train[cat_cols].astype('str')
adversarial_test[cat_cols] = adversarial_test[cat_cols].astype('str')

# Starting Negative Feature Selection

In [None]:
train_data = Pool(
    data=adversarial_train[use_cols],
    label=adversarial_train[target],
    cat_features = cat_cols
)
holdout_data = Pool(
    data=adversarial_test[use_cols],
    label=adversarial_test[target],
    cat_features = cat_cols
)

In [None]:
params = {
    'loss_function' : 'Logloss',
    'eval_metric' : 'AUC',
    'od_type': 'Iter',
    'num_boost_round': 100,
    'early_stopping_rounds': 10,
    "depth": 2,
    'task_type': 'GPU'
}

model = CatBoostClassifier(**params)
_ = model.fit(train_data, eval_set=holdout_data, verbose_eval=50)

In [None]:
feature_importance =pd.DataFrame({'feature_importance': model.get_feature_importance(), 
              'feature_names': adversarial_train[use_cols].columns}).sort_values(by=['feature_importance'], 
                                                           ascending=False)
f, ax = plt.subplots(figsize=(10, 10))
sns.set_color_codes("pastel")
sns.barplot(x="feature_names", y="feature_importance", data=feature_importance.sort_values(by="feature_importance", ascending=False)[:10],color="b")

# Drop D_121_std

In [None]:
drop_cols = ['S_2','customer_ID',"is_train","D_121_std"]
use_cols = [c for c in adversarial_train.columns if c not in drop_cols]
            
train_data = Pool(
    data=adversarial_train[use_cols],
    label=adversarial_train[target],
    cat_features = cat_cols
)
holdout_data = Pool(
    data=adversarial_test[use_cols],
    label=adversarial_test[target],
    cat_features = cat_cols
)
params = {
    'loss_function' : 'Logloss',
    'eval_metric' : 'AUC',
    'od_type': 'Iter',
    'num_boost_round': 100,
    'early_stopping_rounds': 10,
    "depth": 2,
    'task_type': 'GPU'
}

model = CatBoostClassifier(**params)
_ = model.fit(train_data, eval_set=holdout_data, verbose_eval=50)

feature_importance =pd.DataFrame({'feature_importance': model.get_feature_importance(), 
              'feature_names': adversarial_train[use_cols].columns}).sort_values(by=['feature_importance'], 
                                                           ascending=False)
f, ax = plt.subplots(figsize=(10, 10))
sns.set_color_codes("pastel")
sns.barplot(x="feature_names", y="feature_importance", data=feature_importance.sort_values(by="feature_importance", ascending=False)[:10],color="b")

# Drop S_11_last

In [None]:
drop_cols = ['S_2','customer_ID',"is_train","D_121_std","S_11_last"]
use_cols = [c for c in adversarial_train.columns if c not in drop_cols]
            
train_data = Pool(
    data=adversarial_train[use_cols],
    label=adversarial_train[target],
    cat_features = cat_cols
)
holdout_data = Pool(
    data=adversarial_test[use_cols],
    label=adversarial_test[target],
    cat_features = cat_cols
)
params = {
    'loss_function' : 'Logloss',
    'eval_metric' : 'AUC',
    'od_type': 'Iter',
    'num_boost_round': 100,
    'early_stopping_rounds': 10,
    "depth": 2,
    'task_type': 'GPU'
}

model = CatBoostClassifier(**params)
_ = model.fit(train_data, eval_set=holdout_data, verbose_eval=50)

feature_importance =pd.DataFrame({'feature_importance': model.get_feature_importance(), 
              'feature_names': adversarial_train[use_cols].columns}).sort_values(by=['feature_importance'], 
                                                           ascending=False)
f, ax = plt.subplots(figsize=(10, 10))
sns.set_color_codes("pastel")
sns.barplot(x="feature_names", y="feature_importance", data=feature_importance.sort_values(by="feature_importance", ascending=False)[:10],color="b")
print(feature_importance.sort_values(by="feature_importance", ascending=False)[:10])

# Drop D_59_last D_118_std D_115_std S_11_min D_59_min D_59_mean S_9_std B_29_mean D_126_nunique

In [None]:
drop_cols = ['S_2','customer_ID',"is_train","D_121_std","S_11_last", 'D_59_last', 'D_118_std', 'D_115_std',
             'S_11_min', 'D_59_min', 'D_59_mean', 'S_9_std', 'B_29_mean', 'D_126_nunique']
use_cols = [c for c in adversarial_train.columns if c not in drop_cols]
            
train_data = Pool(
    data=adversarial_train[use_cols],
    label=adversarial_train[target],
    cat_features = cat_cols
)
holdout_data = Pool(
    data=adversarial_test[use_cols],
    label=adversarial_test[target],
    cat_features = cat_cols
)
params = {
    'loss_function' : 'Logloss',
    'eval_metric' : 'AUC',
    'od_type': 'Iter',
    'num_boost_round': 100,
    'early_stopping_rounds': 10,
    "depth": 2,
    'task_type': 'GPU'
}

model = CatBoostClassifier(**params)
_ = model.fit(train_data, eval_set=holdout_data, verbose_eval=50)

feature_importance =pd.DataFrame({'feature_importance': model.get_feature_importance(), 
              'feature_names': adversarial_train[use_cols].columns}).sort_values(by=['feature_importance'], 
                                                           ascending=False)
f, ax = plt.subplots(figsize=(10, 10))
sns.set_color_codes("pastel")
sns.barplot(x="feature_names", y="feature_importance", data=feature_importance.sort_values(by="feature_importance", ascending=False)[:10],color="b")
print(feature_importance.sort_values(by="feature_importance", ascending=False)[:10])
print(list(feature_importance.sort_values(by="feature_importance", ascending=False)[:10]["feature_names"]))

# Drop 'D_119_std', 'D_59_max', 'S_11_mean', 'B_29_max', 'S_11_max', 'D_59_std', 'S_9_max', 'B_29_min', 'D_121_mean', 'S_22_mean'

In [None]:
drop_cols = ['S_2','customer_ID',"is_train","D_121_std","S_11_last", 'D_59_last', 'D_118_std', 'D_115_std',
             'S_11_min', 'D_59_min', 'D_59_mean', 'S_9_std', 'B_29_mean', 'D_126_nunique',
            'D_119_std', 'D_59_max', 'S_11_mean', 'B_29_max', 'S_11_max', 'D_59_std', 'S_9_max', 'B_29_min', 'D_121_mean', 'S_22_mean']
use_cols = [c for c in adversarial_train.columns if c not in drop_cols]
            
train_data = Pool(
    data=adversarial_train[use_cols],
    label=adversarial_train[target],
    cat_features = cat_cols
)
holdout_data = Pool(
    data=adversarial_test[use_cols],
    label=adversarial_test[target],
    cat_features = cat_cols
)
params = {
    'loss_function' : 'Logloss',
    'eval_metric' : 'AUC',
    'od_type': 'Iter',
    'num_boost_round': 100,
    'early_stopping_rounds': 10,
    "depth": 2,
    'task_type': 'GPU'
}

model = CatBoostClassifier(**params)
_ = model.fit(train_data, eval_set=holdout_data, verbose_eval=50)

feature_importance =pd.DataFrame({'feature_importance': model.get_feature_importance(), 
              'feature_names': adversarial_train[use_cols].columns}).sort_values(by=['feature_importance'], 
                                                           ascending=False)
f, ax = plt.subplots(figsize=(10, 10))
sns.set_color_codes("pastel")
sns.barplot(x="feature_names", y="feature_importance", data=feature_importance.sort_values(by="feature_importance", ascending=False)[:10],color="b")
print(feature_importance.sort_values(by="feature_importance", ascending=False)[:10])
print(list(feature_importance.sort_values(by="feature_importance", ascending=False)[:10]["feature_names"]))

# Drop 'B_29_last', 'S_11_std', 'S_9_min', 'B_29_std', 'S_24_min', 'S_9_mean', 'D_55_std', 'S_6_last', 'D_124_std', 'D_121_max'

In [None]:
drop_cols = ['S_2','customer_ID',"is_train","D_121_std","S_11_last", 'D_59_last', 'D_118_std', 'D_115_std',
             'S_11_min', 'D_59_min', 'D_59_mean', 'S_9_std', 'B_29_mean', 'D_126_nunique',
            'D_119_std', 'D_59_max', 'S_11_mean', 'B_29_max', 'S_11_max', 'D_59_std', 'S_9_max', 'B_29_min', 'D_121_mean', 'S_22_mean',
            'B_29_last', 'S_11_std', 'S_9_min', 'B_29_std', 'S_24_min', 'S_9_mean', 'D_55_std', 'S_6_last', 'D_124_std', 'D_121_max']
use_cols = [c for c in adversarial_train.columns if c not in drop_cols]
            
train_data = Pool(
    data=adversarial_train[use_cols],
    label=adversarial_train[target],
    cat_features = cat_cols
)
holdout_data = Pool(
    data=adversarial_test[use_cols],
    label=adversarial_test[target],
    cat_features = cat_cols
)
params = {
    'loss_function' : 'Logloss',
    'eval_metric' : 'AUC',
    'od_type': 'Iter',
    'num_boost_round': 100,
    'early_stopping_rounds': 10,
    "depth": 2,
    'task_type': 'GPU'
}

model = CatBoostClassifier(**params)
_ = model.fit(train_data, eval_set=holdout_data, verbose_eval=50)

feature_importance =pd.DataFrame({'feature_importance': model.get_feature_importance(), 
              'feature_names': adversarial_train[use_cols].columns}).sort_values(by=['feature_importance'], 
                                                           ascending=False)
f, ax = plt.subplots(figsize=(10, 10))
sns.set_color_codes("pastel")
sns.barplot(x="feature_names", y="feature_importance", data=feature_importance.sort_values(by="feature_importance", ascending=False)[:10],color="b")
print(feature_importance.sort_values(by="feature_importance", ascending=False)[:10])
print(list(feature_importance.sort_values(by="feature_importance", ascending=False)[:10]["feature_names"]))

# Drop 'S_9_last', 'S_15_last', 'D_121_last', 'D_113_std', 'D_45_std', 'S_24_mean', 'D_122_std', 'D_55_min', 'D_121_min', 'S_24_last'

In [None]:
drop_cols = ['S_2','customer_ID',"is_train","D_121_std","S_11_last", 'D_59_last', 'D_118_std', 'D_115_std',
             'S_11_min', 'D_59_min', 'D_59_mean', 'S_9_std', 'B_29_mean', 'D_126_nunique',
            'D_119_std', 'D_59_max', 'S_11_mean', 'B_29_max', 'S_11_max', 'D_59_std', 'S_9_max', 'B_29_min', 'D_121_mean', 'S_22_mean',
            'B_29_last', 'S_11_std', 'S_9_min', 'B_29_std', 'S_24_min', 'S_9_mean', 'D_55_std', 'S_6_last', 'D_124_std', 'D_121_max',
            'S_9_last', 'S_15_last', 'D_121_last', 'D_113_std', 'D_45_std', 'S_24_mean', 'D_122_std', 'D_55_min', 'D_121_min', 'S_24_last']
use_cols = [c for c in adversarial_train.columns if c not in drop_cols]
            
train_data = Pool(
    data=adversarial_train[use_cols],
    label=adversarial_train[target],
    cat_features = cat_cols
)
holdout_data = Pool(
    data=adversarial_test[use_cols],
    label=adversarial_test[target],
    cat_features = cat_cols
)
params = {
    'loss_function' : 'Logloss',
    'eval_metric' : 'AUC',
    'od_type': 'Iter',
    'num_boost_round': 100,
    'early_stopping_rounds': 10,
    "depth": 2,
    'task_type': 'GPU'
}

model = CatBoostClassifier(**params)
_ = model.fit(train_data, eval_set=holdout_data, verbose_eval=50)

feature_importance =pd.DataFrame({'feature_importance': model.get_feature_importance(), 
              'feature_names': adversarial_train[use_cols].columns}).sort_values(by=['feature_importance'], 
                                                           ascending=False)
f, ax = plt.subplots(figsize=(10, 10))
sns.set_color_codes("pastel")
sns.barplot(x="feature_names", y="feature_importance", data=feature_importance.sort_values(by="feature_importance", ascending=False)[:10],color="b")
print(feature_importance.sort_values(by="feature_importance", ascending=False)[:10])
print(list(feature_importance.sort_values(by="feature_importance", ascending=False)[:10]["feature_names"]))

# Drop 'S_24_max', 'S_15_max', 'D_55_max', 'R_27_min', 'P_4_max', 'D_66_last', 'R_27_std', 'S_13_max', 'R_9_std', 'D_55_last'

In [None]:
drop_cols = ['S_2','customer_ID',"is_train","D_121_std","S_11_last", 'D_59_last', 'D_118_std', 'D_115_std',
             'S_11_min', 'D_59_min', 'D_59_mean', 'S_9_std', 'B_29_mean', 'D_126_nunique',
            'D_119_std', 'D_59_max', 'S_11_mean', 'B_29_max', 'S_11_max', 'D_59_std', 'S_9_max', 'B_29_min', 'D_121_mean', 'S_22_mean',
            'B_29_last', 'S_11_std', 'S_9_min', 'B_29_std', 'S_24_min', 'S_9_mean', 'D_55_std', 'S_6_last', 'D_124_std', 'D_121_max',
            'S_9_last', 'S_15_last', 'D_121_last', 'D_113_std', 'D_45_std', 'S_24_mean', 'D_122_std', 'D_55_min', 'D_121_min', 'S_24_last',
            'S_24_max', 'S_15_max', 'D_55_max', 'R_27_min', 'P_4_max', 'D_66_last', 'R_27_std', 'S_13_max', 'R_9_std', 'D_55_last']
use_cols = [c for c in adversarial_train.columns if c not in drop_cols]
cat_cols = [c for c in cat_cols if c not in drop_cols]            
train_data = Pool(
    data=adversarial_train[use_cols],
    label=adversarial_train[target],
    cat_features = cat_cols
)
holdout_data = Pool(
    data=adversarial_test[use_cols],
    label=adversarial_test[target],
    cat_features = cat_cols
)
params = {
    'loss_function' : 'Logloss',
    'eval_metric' : 'AUC',
    'od_type': 'Iter',
    'num_boost_round': 100,
    'early_stopping_rounds': 10,
    "depth": 2,
    'task_type': 'GPU'
}

model = CatBoostClassifier(**params)
_ = model.fit(train_data, eval_set=holdout_data, verbose_eval=50)

feature_importance =pd.DataFrame({'feature_importance': model.get_feature_importance(), 
              'feature_names': adversarial_train[use_cols].columns}).sort_values(by=['feature_importance'], 
                                                           ascending=False)
f, ax = plt.subplots(figsize=(10, 10))
sns.set_color_codes("pastel")
sns.barplot(x="feature_names", y="feature_importance", data=feature_importance.sort_values(by="feature_importance", ascending=False)[:10],color="b")
print(feature_importance.sort_values(by="feature_importance", ascending=False)[:10])
print(list(feature_importance.sort_values(by="feature_importance", ascending=False)[:10]["feature_names"]))

# Drop 'D_55_mean', 'S_15_std', 'R_27_max', 'D_68_count', 'R_27_mean', 'D_118_min', 'B_8_std', 'D_45_last', 'S_27_mean', 'D_119_min'

In [None]:
drop_cols = ['S_2','customer_ID',"is_train","D_121_std","S_11_last", 'D_59_last', 'D_118_std', 'D_115_std',
             'S_11_min', 'D_59_min', 'D_59_mean', 'S_9_std', 'B_29_mean', 'D_126_nunique',
            'D_119_std', 'D_59_max', 'S_11_mean', 'B_29_max', 'S_11_max', 'D_59_std', 'S_9_max', 'B_29_min', 'D_121_mean', 'S_22_mean',
            'B_29_last', 'S_11_std', 'S_9_min', 'B_29_std', 'S_24_min', 'S_9_mean', 'D_55_std', 'S_6_last', 'D_124_std', 'D_121_max',
            'S_9_last', 'S_15_last', 'D_121_last', 'D_113_std', 'D_45_std', 'S_24_mean', 'D_122_std', 'D_55_min', 'D_121_min', 'S_24_last',
            'S_24_max', 'S_15_max', 'D_55_max', 'R_27_min', 'P_4_max', 'D_66_last', 'R_27_std', 'S_13_max', 'R_9_std', 'D_55_last'
            'D_55_mean', 'S_15_std', 'R_27_max', 'D_68_count', 'R_27_mean', 'D_118_min', 'B_8_std', 'D_45_last', 'S_27_mean', 'D_119_min']
use_cols = [c for c in adversarial_train.columns if c not in drop_cols]
cat_cols = [c for c in cat_cols if c not in drop_cols]            
train_data = Pool(
    data=adversarial_train[use_cols],
    label=adversarial_train[target],
    cat_features = cat_cols
)
holdout_data = Pool(
    data=adversarial_test[use_cols],
    label=adversarial_test[target],
    cat_features = cat_cols
)
params = {
    'loss_function' : 'Logloss',
    'eval_metric' : 'AUC',
    'od_type': 'Iter',
    'num_boost_round': 100,
    'early_stopping_rounds': 10,
    "depth": 2,
    'task_type': 'GPU'
}

model = CatBoostClassifier(**params)
_ = model.fit(train_data, eval_set=holdout_data, verbose_eval=50)

feature_importance =pd.DataFrame({'feature_importance': model.get_feature_importance(), 
              'feature_names': adversarial_train[use_cols].columns}).sort_values(by=['feature_importance'], 
                                                           ascending=False)
f, ax = plt.subplots(figsize=(10, 10))
sns.set_color_codes("pastel")
sns.barplot(x="feature_names", y="feature_importance", data=feature_importance.sort_values(by="feature_importance", ascending=False)[:10],color="b")
print(feature_importance.sort_values(by="feature_importance", ascending=False)[:10])
print(list(feature_importance.sort_values(by="feature_importance", ascending=False)[:10]["feature_names"]))

# Drop 'R_27_last', 'D_55_mean', 'S_15_mean', 'S_27_max', 'D_126_count', 'D_55_last', 'D_117_nunique', 'S_18_last', 'S_17_last', 'S_8_mean'

In [None]:
drop_cols = ['S_2','customer_ID',"is_train","D_121_std","S_11_last", 'D_59_last', 'D_118_std', 'D_115_std',
             'S_11_min', 'D_59_min', 'D_59_mean', 'S_9_std', 'B_29_mean', 'D_126_nunique',
            'D_119_std', 'D_59_max', 'S_11_mean', 'B_29_max', 'S_11_max', 'D_59_std', 'S_9_max', 'B_29_min', 'D_121_mean', 'S_22_mean',
            'B_29_last', 'S_11_std', 'S_9_min', 'B_29_std', 'S_24_min', 'S_9_mean', 'D_55_std', 'S_6_last', 'D_124_std', 'D_121_max',
            'S_9_last', 'S_15_last', 'D_121_last', 'D_113_std', 'D_45_std', 'S_24_mean', 'D_122_std', 'D_55_min', 'D_121_min', 'S_24_last',
            'S_24_max', 'S_15_max', 'D_55_max', 'R_27_min', 'P_4_max', 'D_66_last', 'R_27_std', 'S_13_max', 'R_9_std', 'D_55_last'
            'D_55_mean', 'S_15_std', 'R_27_max', 'D_68_count', 'R_27_mean', 'D_118_min', 'B_8_std', 'D_45_last', 'S_27_mean', 'D_119_min',
            'R_27_last', 'D_55_mean', 'S_15_mean', 'S_27_max', 'D_126_count', 'D_55_last', 'D_117_nunique', 'S_18_last', 'S_17_last', 'S_8_mean']
use_cols = [c for c in adversarial_train.columns if c not in drop_cols]
cat_cols = [c for c in cat_cols if c not in drop_cols]            
train_data = Pool(
    data=adversarial_train[use_cols],
    label=adversarial_train[target],
    cat_features = cat_cols
)
holdout_data = Pool(
    data=adversarial_test[use_cols],
    label=adversarial_test[target],
    cat_features = cat_cols
)
params = {
    'loss_function' : 'Logloss',
    'eval_metric' : 'AUC',
    'od_type': 'Iter',
    'num_boost_round': 100,
    'early_stopping_rounds': 10,
    "depth": 2,
    'task_type': 'GPU'
}

model = CatBoostClassifier(**params)
_ = model.fit(train_data, eval_set=holdout_data, verbose_eval=50)

feature_importance =pd.DataFrame({'feature_importance': model.get_feature_importance(), 
              'feature_names': adversarial_train[use_cols].columns}).sort_values(by=['feature_importance'], 
                                                           ascending=False)
f, ax = plt.subplots(figsize=(10, 10))
sns.set_color_codes("pastel")
sns.barplot(x="feature_names", y="feature_importance", data=feature_importance.sort_values(by="feature_importance", ascending=False)[:10],color="b")
print(feature_importance.sort_values(by="feature_importance", ascending=False)[:10])
print(list(feature_importance.sort_values(by="feature_importance", ascending=False)[:10]["feature_names"]))

# Drop 'D_125_std', 'D_120_count', 'D_45_max', 'D_64_count', 'D_45_min', 'D_116_count', 'D_117_count', 'S_27_min', 'R_26_std', 'S_13_last'

In [None]:
drop_cols = ['S_2','customer_ID',"is_train","D_121_std","S_11_last", 'D_59_last', 'D_118_std', 'D_115_std',
             'S_11_min', 'D_59_min', 'D_59_mean', 'S_9_std', 'B_29_mean', 'D_126_nunique',
            'D_119_std', 'D_59_max', 'S_11_mean', 'B_29_max', 'S_11_max', 'D_59_std', 'S_9_max', 'B_29_min', 'D_121_mean', 'S_22_mean',
            'B_29_last', 'S_11_std', 'S_9_min', 'B_29_std', 'S_24_min', 'S_9_mean', 'D_55_std', 'S_6_last', 'D_124_std', 'D_121_max',
            'S_9_last', 'S_15_last', 'D_121_last', 'D_113_std', 'D_45_std', 'S_24_mean', 'D_122_std', 'D_55_min', 'D_121_min', 'S_24_last',
            'S_24_max', 'S_15_max', 'D_55_max', 'R_27_min', 'P_4_max', 'D_66_last', 'R_27_std', 'S_13_max', 'R_9_std', 'D_55_last'
            'D_55_mean', 'S_15_std', 'R_27_max', 'D_68_count', 'R_27_mean', 'D_118_min', 'B_8_std', 'D_45_last', 'S_27_mean', 'D_119_min',
            'R_27_last', 'D_55_mean', 'S_15_mean', 'S_27_max', 'D_126_count', 'D_55_last', 'D_117_nunique', 'S_18_last', 'S_17_last', 'S_8_mean',
            'D_125_std', 'D_120_count', 'D_45_max', 'D_64_count', 'D_45_min', 'D_116_count', 'D_117_count', 'S_27_min', 'R_26_std', 'S_13_last']
use_cols = [c for c in adversarial_train.columns if c not in drop_cols]
cat_cols = [c for c in cat_cols if c not in drop_cols]            
train_data = Pool(
    data=adversarial_train[use_cols],
    label=adversarial_train[target],
    cat_features = cat_cols
)
holdout_data = Pool(
    data=adversarial_test[use_cols],
    label=adversarial_test[target],
    cat_features = cat_cols
)
params = {
    'loss_function' : 'Logloss',
    'eval_metric' : 'AUC',
    'od_type': 'Iter',
    'num_boost_round': 100,
    'early_stopping_rounds': 10,
    "depth": 2,
    'task_type': 'GPU'
}

model = CatBoostClassifier(**params)
_ = model.fit(train_data, eval_set=holdout_data, verbose_eval=50)

feature_importance =pd.DataFrame({'feature_importance': model.get_feature_importance(), 
              'feature_names': adversarial_train[use_cols].columns}).sort_values(by=['feature_importance'], 
                                                           ascending=False)
f, ax = plt.subplots(figsize=(10, 10))
sns.set_color_codes("pastel")
sns.barplot(x="feature_names", y="feature_importance", data=feature_importance.sort_values(by="feature_importance", ascending=False)[:10],color="b")
print(feature_importance.sort_values(by="feature_importance", ascending=False)[:10])
print(list(feature_importance.sort_values(by="feature_importance", ascending=False)[:10]["feature_names"]))

# Drop 'D_114_count', 'D_69_std', 'D_123_std', 'D_64_last', 'S_13_mean', 'D_120_nunique', 'S_17_mean', 'D_120_last', 'D_61_max', 'D_64_nunique'

In [None]:
drop_cols = ['S_2','customer_ID',"is_train","D_121_std","S_11_last", 'D_59_last', 'D_118_std', 'D_115_std',
             'S_11_min', 'D_59_min', 'D_59_mean', 'S_9_std', 'B_29_mean', 'D_126_nunique',
            'D_119_std', 'D_59_max', 'S_11_mean', 'B_29_max', 'S_11_max', 'D_59_std', 'S_9_max', 'B_29_min', 'D_121_mean', 'S_22_mean',
            'B_29_last', 'S_11_std', 'S_9_min', 'B_29_std', 'S_24_min', 'S_9_mean', 'D_55_std', 'S_6_last', 'D_124_std', 'D_121_max',
            'S_9_last', 'S_15_last', 'D_121_last', 'D_113_std', 'D_45_std', 'S_24_mean', 'D_122_std', 'D_55_min', 'D_121_min', 'S_24_last',
            'S_24_max', 'S_15_max', 'D_55_max', 'R_27_min', 'P_4_max', 'D_66_last', 'R_27_std', 'S_13_max', 'R_9_std', 'D_55_last'
            'D_55_mean', 'S_15_std', 'R_27_max', 'D_68_count', 'R_27_mean', 'D_118_min', 'B_8_std', 'D_45_last', 'S_27_mean', 'D_119_min',
            'R_27_last', 'D_55_mean', 'S_15_mean', 'S_27_max', 'D_126_count', 'D_55_last', 'D_117_nunique', 'S_18_last', 'S_17_last', 'S_8_mean',
            'D_125_std', 'D_120_count', 'D_45_max', 'D_64_count', 'D_45_min', 'D_116_count', 'D_117_count', 'S_27_min', 'R_26_std', 'S_13_last',
            'D_114_count', 'D_69_std', 'D_123_std', 'D_64_last', 'S_13_mean', 'D_120_nunique', 'S_17_mean', 'D_120_last', 'D_61_max', 'D_64_nunique']
use_cols = [c for c in adversarial_train.columns if c not in drop_cols]
cat_cols = [c for c in cat_cols if c not in drop_cols]            
train_data = Pool(
    data=adversarial_train[use_cols],
    label=adversarial_train[target],
    cat_features = cat_cols
)
holdout_data = Pool(
    data=adversarial_test[use_cols],
    label=adversarial_test[target],
    cat_features = cat_cols
)
params = {
    'loss_function' : 'Logloss',
    'eval_metric' : 'AUC',
    'od_type': 'Iter',
    'num_boost_round': 100,
    'early_stopping_rounds': 10,
    "depth": 2,
    'task_type': 'GPU'
}

model = CatBoostClassifier(**params)
_ = model.fit(train_data, eval_set=holdout_data, verbose_eval=50)

feature_importance =pd.DataFrame({'feature_importance': model.get_feature_importance(), 
              'feature_names': adversarial_train[use_cols].columns}).sort_values(by=['feature_importance'], 
                                                           ascending=False)
f, ax = plt.subplots(figsize=(10, 10))
sns.set_color_codes("pastel")
sns.barplot(x="feature_names", y="feature_importance", data=feature_importance.sort_values(by="feature_importance", ascending=False)[:10],color="b")
print(feature_importance.sort_values(by="feature_importance", ascending=False)[:10])
print(list(feature_importance.sort_values(by="feature_importance", ascending=False)[:10]["feature_names"]))

# Drop 'D_123_mean', 'D_83_std', 'R_1_max', 'S_13_min', 'D_114_nunique', 'D_118_last', 'D_42_min', 'S_17_std', 'R_26_max', 'S_13_std'

In [None]:
drop_cols = ['S_2','customer_ID',"is_train","D_121_std","S_11_last", 'D_59_last', 'D_118_std', 'D_115_std',
             'S_11_min', 'D_59_min', 'D_59_mean', 'S_9_std', 'B_29_mean', 'D_126_nunique',
            'D_119_std', 'D_59_max', 'S_11_mean', 'B_29_max', 'S_11_max', 'D_59_std', 'S_9_max', 'B_29_min', 'D_121_mean', 'S_22_mean',
            'B_29_last', 'S_11_std', 'S_9_min', 'B_29_std', 'S_24_min', 'S_9_mean', 'D_55_std', 'S_6_last', 'D_124_std', 'D_121_max',
            'S_9_last', 'S_15_last', 'D_121_last', 'D_113_std', 'D_45_std', 'S_24_mean', 'D_122_std', 'D_55_min', 'D_121_min', 'S_24_last',
            'S_24_max', 'S_15_max', 'D_55_max', 'R_27_min', 'P_4_max', 'D_66_last', 'R_27_std', 'S_13_max', 'R_9_std', 'D_55_last'
            'D_55_mean', 'S_15_std', 'R_27_max', 'D_68_count', 'R_27_mean', 'D_118_min', 'B_8_std', 'D_45_last', 'S_27_mean', 'D_119_min',
            'R_27_last', 'D_55_mean', 'S_15_mean', 'S_27_max', 'D_126_count', 'D_55_last', 'D_117_nunique', 'S_18_last', 'S_17_last', 'S_8_mean',
            'D_125_std', 'D_120_count', 'D_45_max', 'D_64_count', 'D_45_min', 'D_116_count', 'D_117_count', 'S_27_min', 'R_26_std', 'S_13_last',
            'D_114_count', 'D_69_std', 'D_123_std', 'D_64_last', 'S_13_mean', 'D_120_nunique', 'S_17_mean', 'D_120_last', 'D_61_max', 'D_64_nunique',
            'D_123_mean', 'D_83_std', 'R_1_max', 'S_13_min', 'D_114_nunique', 'D_118_last', 'D_42_min', 'S_17_std', 'R_26_max', 'S_13_std']
use_cols = [c for c in adversarial_train.columns if c not in drop_cols]
cat_cols = [c for c in cat_cols if c not in drop_cols]            
train_data = Pool(
    data=adversarial_train[use_cols],
    label=adversarial_train[target],
    cat_features = cat_cols
)
holdout_data = Pool(
    data=adversarial_test[use_cols],
    label=adversarial_test[target],
    cat_features = cat_cols
)
params = {
    'loss_function' : 'Logloss',
    'eval_metric' : 'AUC',
    'od_type': 'Iter',
    'num_boost_round': 100,
    'early_stopping_rounds': 10,
    "depth": 2,
    'task_type': 'GPU'
}

model = CatBoostClassifier(**params)
_ = model.fit(train_data, eval_set=holdout_data, verbose_eval=50)

feature_importance =pd.DataFrame({'feature_importance': model.get_feature_importance(), 
              'feature_names': adversarial_train[use_cols].columns}).sort_values(by=['feature_importance'], 
                                                           ascending=False)
f, ax = plt.subplots(figsize=(10, 10))
sns.set_color_codes("pastel")
sns.barplot(x="feature_names", y="feature_importance", data=feature_importance.sort_values(by="feature_importance", ascending=False)[:10],color="b")
print(feature_importance.sort_values(by="feature_importance", ascending=False)[:10])
print(list(feature_importance.sort_values(by="feature_importance", ascending=False)[:10]["feature_names"]))

# Drop 'S_17_max', 'D_62_last', 'B_40_last', 'P_4_std', 'S_22_std', 'S_27_std', 'D_61_mean', 'S_15_min', 'R_26_mean', 'R_26_last'

In [None]:
drop_cols = ['S_2','customer_ID',"is_train","D_121_std","S_11_last", 'D_59_last', 'D_118_std', 'D_115_std',
             'S_11_min', 'D_59_min', 'D_59_mean', 'S_9_std', 'B_29_mean', 'D_126_nunique',
            'D_119_std', 'D_59_max', 'S_11_mean', 'B_29_max', 'S_11_max', 'D_59_std', 'S_9_max', 'B_29_min', 'D_121_mean', 'S_22_mean',
            'B_29_last', 'S_11_std', 'S_9_min', 'B_29_std', 'S_24_min', 'S_9_mean', 'D_55_std', 'S_6_last', 'D_124_std', 'D_121_max',
            'S_9_last', 'S_15_last', 'D_121_last', 'D_113_std', 'D_45_std', 'S_24_mean', 'D_122_std', 'D_55_min', 'D_121_min', 'S_24_last',
            'S_24_max', 'S_15_max', 'D_55_max', 'R_27_min', 'P_4_max', 'D_66_last', 'R_27_std', 'S_13_max', 'R_9_std', 'D_55_last'
            'D_55_mean', 'S_15_std', 'R_27_max', 'D_68_count', 'R_27_mean', 'D_118_min', 'B_8_std', 'D_45_last', 'S_27_mean', 'D_119_min',
            'R_27_last', 'D_55_mean', 'S_15_mean', 'S_27_max', 'D_126_count', 'D_55_last', 'D_117_nunique', 'S_18_last', 'S_17_last', 'S_8_mean',
            'D_125_std', 'D_120_count', 'D_45_max', 'D_64_count', 'D_45_min', 'D_116_count', 'D_117_count', 'S_27_min', 'R_26_std', 'S_13_last',
            'D_114_count', 'D_69_std', 'D_123_std', 'D_64_last', 'S_13_mean', 'D_120_nunique', 'S_17_mean', 'D_120_last', 'D_61_max', 'D_64_nunique',
            'D_123_mean', 'D_83_std', 'R_1_max', 'S_13_min', 'D_114_nunique', 'D_118_last', 'D_42_min', 'S_17_std', 'R_26_max', 'S_13_std',
            'S_17_max', 'D_62_last', 'B_40_last', 'P_4_std', 'S_22_std', 'S_27_std', 'D_61_mean', 'S_15_min', 'R_26_mean', 'R_26_last']
use_cols = [c for c in adversarial_train.columns if c not in drop_cols]
cat_cols = [c for c in cat_cols if c not in drop_cols]            
train_data = Pool(
    data=adversarial_train[use_cols],
    label=adversarial_train[target],
    cat_features = cat_cols
)
holdout_data = Pool(
    data=adversarial_test[use_cols],
    label=adversarial_test[target],
    cat_features = cat_cols
)
params = {
    'loss_function' : 'Logloss',
    'eval_metric' : 'AUC',
    'od_type': 'Iter',
    'num_boost_round': 100,
    'early_stopping_rounds': 10,
    "depth": 2,
    'task_type': 'GPU'
}

model = CatBoostClassifier(**params)
_ = model.fit(train_data, eval_set=holdout_data, verbose_eval=50)

feature_importance =pd.DataFrame({'feature_importance': model.get_feature_importance(), 
              'feature_names': adversarial_train[use_cols].columns}).sort_values(by=['feature_importance'], 
                                                           ascending=False)
f, ax = plt.subplots(figsize=(10, 10))
sns.set_color_codes("pastel")
sns.barplot(x="feature_names", y="feature_importance", data=feature_importance.sort_values(by="feature_importance", ascending=False)[:10],color="b")
print(feature_importance.sort_values(by="feature_importance", ascending=False)[:10])
print(list(feature_importance.sort_values(by="feature_importance", ascending=False)[:10]["feature_names"]))

# Drop 'R_26_min', 'D_62_mean', 'B_40_max', 'R_1_std', 'D_125_mean', 'S_8_max', 'D_68_nunique', 'S_22_min', 'B_17_last', 'D_61_std'

In [None]:
drop_cols = ['S_2','customer_ID',"is_train","D_121_std","S_11_last", 'D_59_last', 'D_118_std', 'D_115_std',
             'S_11_min', 'D_59_min', 'D_59_mean', 'S_9_std', 'B_29_mean', 'D_126_nunique',
            'D_119_std', 'D_59_max', 'S_11_mean', 'B_29_max', 'S_11_max', 'D_59_std', 'S_9_max', 'B_29_min', 'D_121_mean', 'S_22_mean',
            'B_29_last', 'S_11_std', 'S_9_min', 'B_29_std', 'S_24_min', 'S_9_mean', 'D_55_std', 'S_6_last', 'D_124_std', 'D_121_max',
            'S_9_last', 'S_15_last', 'D_121_last', 'D_113_std', 'D_45_std', 'S_24_mean', 'D_122_std', 'D_55_min', 'D_121_min', 'S_24_last',
            'S_24_max', 'S_15_max', 'D_55_max', 'R_27_min', 'P_4_max', 'D_66_last', 'R_27_std', 'S_13_max', 'R_9_std', 'D_55_last'
            'D_55_mean', 'S_15_std', 'R_27_max', 'D_68_count', 'R_27_mean', 'D_118_min', 'B_8_std', 'D_45_last', 'S_27_mean', 'D_119_min',
            'R_27_last', 'D_55_mean', 'S_15_mean', 'S_27_max', 'D_126_count', 'D_55_last', 'D_117_nunique', 'S_18_last', 'S_17_last', 'S_8_mean',
            'D_125_std', 'D_120_count', 'D_45_max', 'D_64_count', 'D_45_min', 'D_116_count', 'D_117_count', 'S_27_min', 'R_26_std', 'S_13_last',
            'D_114_count', 'D_69_std', 'D_123_std', 'D_64_last', 'S_13_mean', 'D_120_nunique', 'S_17_mean', 'D_120_last', 'D_61_max', 'D_64_nunique',
            'D_123_mean', 'D_83_std', 'R_1_max', 'S_13_min', 'D_114_nunique', 'D_118_last', 'D_42_min', 'S_17_std', 'R_26_max', 'S_13_std',
            'S_17_max', 'D_62_last', 'B_40_last', 'P_4_std', 'S_22_std', 'S_27_std', 'D_61_mean', 'S_15_min', 'R_26_mean', 'R_26_last',
            'R_26_min', 'D_62_mean', 'B_40_max', 'R_1_std', 'D_125_mean', 'S_8_max', 'D_68_nunique', 'S_22_min', 'B_17_last', 'D_61_std']
use_cols = [c for c in adversarial_train.columns if c not in drop_cols]
cat_cols = [c for c in cat_cols if c not in drop_cols]            
train_data = Pool(
    data=adversarial_train[use_cols],
    label=adversarial_train[target],
    cat_features = cat_cols
)
holdout_data = Pool(
    data=adversarial_test[use_cols],
    label=adversarial_test[target],
    cat_features = cat_cols
)
params = {
    'loss_function' : 'Logloss',
    'eval_metric' : 'AUC',
    'od_type': 'Iter',
    'num_boost_round': 100,
    'early_stopping_rounds': 10,
    "depth": 2,
    'task_type': 'GPU'
}

model = CatBoostClassifier(**params)
_ = model.fit(train_data, eval_set=holdout_data, verbose_eval=50)

feature_importance =pd.DataFrame({'feature_importance': model.get_feature_importance(), 
              'feature_names': adversarial_train[use_cols].columns}).sort_values(by=['feature_importance'], 
                                                           ascending=False)
f, ax = plt.subplots(figsize=(10, 10))
sns.set_color_codes("pastel")
sns.barplot(x="feature_names", y="feature_importance", data=feature_importance.sort_values(by="feature_importance", ascending=False)[:10],color="b")
print(feature_importance.sort_values(by="feature_importance", ascending=False)[:10])
print(list(feature_importance.sort_values(by="feature_importance", ascending=False)[:10]["feature_names"]))

# Drop 'D_61_last', 'D_122_max', 'D_39_last', 'S_24_std', 'B_39_min', 'D_47_min', 'D_42_last', 'B_8_mean', 'D_68_last', 'D_39_max'

In [None]:
drop_cols = ['S_2','customer_ID',"is_train","D_121_std","S_11_last", 'D_59_last', 'D_118_std', 'D_115_std',
             'S_11_min', 'D_59_min', 'D_59_mean', 'S_9_std', 'B_29_mean', 'D_126_nunique',
            'D_119_std', 'D_59_max', 'S_11_mean', 'B_29_max', 'S_11_max', 'D_59_std', 'S_9_max', 'B_29_min', 'D_121_mean', 'S_22_mean',
            'B_29_last', 'S_11_std', 'S_9_min', 'B_29_std', 'S_24_min', 'S_9_mean', 'D_55_std', 'S_6_last', 'D_124_std', 'D_121_max',
            'S_9_last', 'S_15_last', 'D_121_last', 'D_113_std', 'D_45_std', 'S_24_mean', 'D_122_std', 'D_55_min', 'D_121_min', 'S_24_last',
            'S_24_max', 'S_15_max', 'D_55_max', 'R_27_min', 'P_4_max', 'D_66_last', 'R_27_std', 'S_13_max', 'R_9_std', 'D_55_last'
            'D_55_mean', 'S_15_std', 'R_27_max', 'D_68_count', 'R_27_mean', 'D_118_min', 'B_8_std', 'D_45_last', 'S_27_mean', 'D_119_min',
            'R_27_last', 'D_55_mean', 'S_15_mean', 'S_27_max', 'D_126_count', 'D_55_last', 'D_117_nunique', 'S_18_last', 'S_17_last', 'S_8_mean',
            'D_125_std', 'D_120_count', 'D_45_max', 'D_64_count', 'D_45_min', 'D_116_count', 'D_117_count', 'S_27_min', 'R_26_std', 'S_13_last',
            'D_114_count', 'D_69_std', 'D_123_std', 'D_64_last', 'S_13_mean', 'D_120_nunique', 'S_17_mean', 'D_120_last', 'D_61_max', 'D_64_nunique',
            'D_123_mean', 'D_83_std', 'R_1_max', 'S_13_min', 'D_114_nunique', 'D_118_last', 'D_42_min', 'S_17_std', 'R_26_max', 'S_13_std',
            'S_17_max', 'D_62_last', 'B_40_last', 'P_4_std', 'S_22_std', 'S_27_std', 'D_61_mean', 'S_15_min', 'R_26_mean', 'R_26_last',
            'R_26_min', 'D_62_mean', 'B_40_max', 'R_1_std', 'D_125_mean', 'S_8_max', 'D_68_nunique', 'S_22_min', 'B_17_last', 'D_61_std',
            'D_61_last', 'D_122_max', 'D_39_last', 'S_24_std', 'B_39_min', 'D_47_min', 'D_42_last', 'B_8_mean', 'D_68_last', 'D_39_max']
use_cols = [c for c in adversarial_train.columns if c not in drop_cols]
cat_cols = [c for c in cat_cols if c not in drop_cols]            
train_data = Pool(
    data=adversarial_train[use_cols],
    label=adversarial_train[target],
    cat_features = cat_cols
)
holdout_data = Pool(
    data=adversarial_test[use_cols],
    label=adversarial_test[target],
    cat_features = cat_cols
)
params = {
    'loss_function' : 'Logloss',
    'eval_metric' : 'AUC',
    'od_type': 'Iter',
    'num_boost_round': 100,
    'early_stopping_rounds': 10,
    "depth": 2,
    'task_type': 'GPU'
}

model = CatBoostClassifier(**params)
_ = model.fit(train_data, eval_set=holdout_data, verbose_eval=50)

feature_importance =pd.DataFrame({'feature_importance': model.get_feature_importance(), 
              'feature_names': adversarial_train[use_cols].columns}).sort_values(by=['feature_importance'], 
                                                           ascending=False)
f, ax = plt.subplots(figsize=(10, 10))
sns.set_color_codes("pastel")
sns.barplot(x="feature_names", y="feature_importance", data=feature_importance.sort_values(by="feature_importance", ascending=False)[:10],color="b")
print(feature_importance.sort_values(by="feature_importance", ascending=False)[:10])
print(list(feature_importance.sort_values(by="feature_importance", ascending=False)[:10]["feature_names"]))

# Сonclusion


### Risky features on the last lines: "R_1","D_59","S_11","B_29","S_9","S_15","D_121","S_24", "D_62","R_27","S_17","S_13", "S_18","D_45". 

### Risky features on the first lines "R_1","D_59","S_11","D_121","S_27","D_118","D_119","D_120","P_4","D_39","R_27","D_126"

Risky features in aggregated data "D_121_std","S_11_last", 'D_59_last', 'D_118_std', 'D_115_std',
             'S_11_min', 'D_59_min', 'D_59_mean', 'S_9_std', 'B_29_mean', 'D_126_nunique',
            'D_119_std', 'D_59_max', 'S_11_mean', 'B_29_max', 'S_11_max', 'D_59_std', 'S_9_max', 'B_29_min', 'D_121_mean', 'S_22_mean',
            'B_29_last', 'S_11_std', 'S_9_min', 'B_29_std', 'S_24_min', 'S_9_mean', 'D_55_std', 'S_6_last', 'D_124_std', 'D_121_max',
            'S_9_last', 'S_15_last', 'D_121_last', 'D_113_std', 'D_45_std', 'S_24_mean', 'D_122_std', 'D_55_min', 'D_121_min', 'S_24_last',
            'S_24_max', 'S_15_max', 'D_55_max', 'R_27_min', 'P_4_max', 'D_66_last', 'R_27_std', 'S_13_max', 'R_9_std', 'D_55_last'
            'D_55_mean', 'S_15_std', 'R_27_max', 'D_68_count', 'R_27_mean', 'D_118_min', 'B_8_std', 'D_45_last', 'S_27_mean', 'D_119_min',
            'R_27_last', 'D_55_mean', 'S_15_mean', 'S_27_max', 'D_126_count', 'D_55_last', 'D_117_nunique', 'S_18_last', 'S_17_last', 'S_8_mean',
            'D_125_std', 'D_120_count', 'D_45_max', 'D_64_count', 'D_45_min', 'D_116_count', 'D_117_count', 'S_27_min', 'R_26_std', 'S_13_last',
            'D_114_count', 'D_69_std', 'D_123_std', 'D_64_last', 'S_13_mean', 'D_120_nunique', 'S_17_mean', 'D_120_last', 'D_61_max', 'D_64_nunique',
            'D_123_mean', 'D_83_std', 'R_1_max', 'S_13_min', 'D_114_nunique', 'D_118_last', 'D_42_min', 'S_17_std', 'R_26_max', 'S_13_std',
            'S_17_max', 'D_62_last', 'B_40_last', 'P_4_std', 'S_22_std', 'S_27_std', 'D_61_mean', 'S_15_min', 'R_26_mean', 'R_26_last',
            'R_26_min', 'D_62_mean', 'B_40_max', 'R_1_std', 'D_125_mean', 'S_8_max', 'D_68_nunique', 'S_22_min', 'B_17_last', 'D_61_std',
            'D_61_last', 'D_122_max', 'D_39_last', 'S_24_std', 'B_39_min', 'D_47_min', 'D_42_last', 'B_8_mean', 'D_68_last', 'D_39_max'

### Vote for this notebook if its content was useful or interesting to you