<a href="https://colab.research.google.com/github/human-ai2025/Elo-Merchant-Recommendation/blob/master/Feature%20Engineering/Feature_Selection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Libraries

In [1]:
#--------------- DATA HANDLING AND MANIPULATION LIBRARIES ---------------#
import random 
import pandas as pd
import numpy as np
import os
import gc
import pickle
from scipy.stats import uniform, truncnorm, randint
from scipy.stats import randint as sp_randint
import datetime
#---------------                 Warning                  ---------------#
import warnings 
warnings.filterwarnings('ignore')

#---------------        Visualization Libraries           ---------------#
import matplotlib.pyplot as plt
import seaborn as sns
color = sns.color_palette()

#---------------        Machine Learning  Libraries       ---------------#
from sklearn.feature_selection import VarianceThreshold
from sklearn.model_selection import RandomizedSearchCV
from mlxtend.feature_selection import SequentialFeatureSelector as SFS
from sklearn.ensemble import RandomForestRegressor
from sklearn.feature_selection import mutual_info_regression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
import xgboost as xgb

## Memory Reduction Script

In [2]:

#https://www.kaggle.com/fabiendaniel/elo-world
#Function to load data into pandas and reduce memory usage

def reduce_mem_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2    
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)    
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

## Mount the drive 

In [4]:
#Mounting drive 
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Set Path

In [5]:
%cd /content/drive/MyDrive/data

/content/drive/MyDrive/data


## Load Train and Test 

In [137]:
#load train
train = reduce_mem_usage(pd.read_csv('FINAL_TRAIN_M.csv'))
test = reduce_mem_usage(pd.read_csv('FINAL_TEST_M.csv'))

Mem. usage decreased to 99.56 Mb (71.3% reduction)
Mem. usage decreased to 62.01 Mb (70.6% reduction)


## Feature Selection 

Remember not to perform any type of feature selection on Test.

### Check for null values 

In [138]:
train

Unnamed: 0.1,Unnamed: 0,first_active_month,card_id,feature_1,feature_2,feature_3,target,year,quarter,elapsed_time,feature_sum,feature_mean,feature_max,feature_min,feature_std,purchase_Year_mode,purchase_Week_sum,purchase_Week_mean,purchase_Week_std,purchase_Day_sum,purchase_Day_mean,purchase_Day_std,purchase_Dayofyear_sum,purchase_Dayofyear_mean,purchase_Dayofyear_std,purchase_Is_month_end_sum,purchase_Is_month_end_mean,purchase_Is_month_end_std,purchase_Is_month_start_sum,purchase_Is_month_start_mean,purchase_Is_month_start_std,purchase_Is_quarter_end_sum,purchase_Is_quarter_end_mean,purchase_Is_quarter_end_std,purchase_Is_quarter_start_sum,purchase_Is_quarter_start_mean,purchase_Is_quarter_start_std,purchase_Is_year_end_sum,purchase_Is_year_end_mean,purchase_Is_year_end_std,purchase_Is_year_start_sum,purchase_Is_year_start_mean,purchase_Is_year_start_std,purchase_Hour_sum,purchase_Hour_mean,purchase_Hour_std,purchase_Minute_sum,purchase_Minute_mean,purchase_Minute_std,purchase_Second_sum,purchase_Second_mean,purchase_Second_std,purchase_Elapsed_sum,purchase_Elapsed_mean,purchase_Elapsed_std,purchasedWeekday_0_sum,purchasedWeekday_0_mean,purchasedWeekday_0_std,purchasedWeekday_1_sum,purchasedWeekday_1_mean,purchasedWeekday_1_std,purchase_Month_1_sum,purchase_Month_1_mean,purchase_Month_1_std,purchase_Month_2_sum,purchase_Month_2_mean,purchase_Month_2_std,purchase_Month_3_sum,purchase_Month_3_mean,purchase_Month_3_std,purchase_Month_4_sum,purchase_Month_4_mean,purchase_Month_4_std,purchase_Month_5_sum,purchase_Month_5_mean,purchase_Month_5_std,purchase_Month_6_sum,purchase_Month_6_mean,purchase_Month_6_std,purchase_Month_7_sum,purchase_Month_7_mean,purchase_Month_7_std,purchase_Month_8_sum,purchase_Month_8_mean,purchase_Month_8_std,purchase_Month_9_sum,purchase_Month_9_mean,purchase_Month_9_std,purchase_Month_10_sum,purchase_Month_10_mean,purchase_Month_10_std,purchase_Month_11_sum,purchase_Month_11_mean,purchase_Month_11_std,purchase_Month_12_sum,purchase_Month_12_mean,purchase_Month_12_std,purchasedWeekend_0_sum,purchasedWeekend_0_mean,purchasedWeekend_0_std,purchasedWeekend_1_sum,purchasedWeekend_1_mean,purchasedWeekend_1_std,purchase_Dayofweek_0_sum,purchase_Dayofweek_0_mean,purchase_Dayofweek_0_std,purchase_Dayofweek_1_sum,purchase_Dayofweek_1_mean,purchase_Dayofweek_1_std,purchase_Dayofweek_2_sum,purchase_Dayofweek_2_mean,purchase_Dayofweek_2_std,purchase_Dayofweek_3_sum,purchase_Dayofweek_3_mean,purchase_Dayofweek_3_std,purchase_Dayofweek_4_sum,purchase_Dayofweek_4_mean,purchase_Dayofweek_4_std,purchase_Dayofweek_5_sum,purchase_Dayofweek_5_mean,purchase_Dayofweek_5_std,purchase_Dayofweek_6_sum,purchase_Dayofweek_6_mean,purchase_Dayofweek_6_std,purchase_date_temp_period,purchase_date_temp_mean_diff,purchase_date_temp_min,purchase_date_temp_max,monthDiff_period,monthDiff_mean_diff,authorized_flag_sum,authorized_flag_mean,authorized_flag_std,category_1_sum,category_1_mean,category_1_std,category_2_sum,category_2_mean,category_2_std,category_3_sum,category_3_mean,category_3_std,month_lag_sum,month_lag_mean,month_lag_min,month_lag_max,month_lag_std,installments_sum,installments_mean,installments_min,installments_max,installments_std,purchase_amount_sum,purchase_amount_mean,purchase_amount_min,purchase_amount_max,purchase_amount_std,mer_numerical_1_sum,mer_numerical_1_mean,mer_numerical_1_min,mer_numerical_1_max,mer_numerical_1_std,mer_numerical_2_sum,mer_numerical_2_mean,mer_numerical_2_min,mer_numerical_2_max,mer_numerical_2_std,mer_avg_sales_lag3_sum,mer_avg_sales_lag3_mean,mer_avg_sales_lag3_min,mer_avg_sales_lag3_max,mer_avg_sales_lag3_std,mer_avg_sales_lag6_sum,mer_avg_sales_lag6_mean,mer_avg_sales_lag6_std,mer_avg_sales_lag12_sum,mer_avg_sales_lag12_mean,mer_avg_sales_lag12_min,mer_avg_sales_lag12_max,mer_avg_sales_lag12_std,mer_avg_purchases_lag3_sum,mer_avg_purchases_lag3_mean,mer_avg_purchases_lag3_min,mer_avg_purchases_lag3_max,mer_avg_purchases_lag3_std,mer_avg_purchases_lag6_sum,mer_avg_purchases_lag6_mean,mer_avg_purchases_lag6_min,mer_avg_purchases_lag6_max,mer_avg_purchases_lag6_std,mer_avg_purchases_lag12_sum,mer_avg_purchases_lag12_mean,mer_avg_purchases_lag12_min,mer_avg_purchases_lag12_max,mer_avg_purchases_lag12_std,mer_active_months_lag3_sum,mer_active_months_lag3_mean,mer_active_months_lag3_min,mer_active_months_lag3_max,mer_active_months_lag3_std,mer_active_months_lag6_sum,mer_active_months_lag6_mean,mer_active_months_lag6_min,mer_active_months_lag6_max,mer_active_months_lag6_std,mer_active_months_lag12_sum,mer_active_months_lag12_mean,mer_active_months_lag12_min,mer_active_months_lag12_max,mer_active_months_lag12_std,mer_category_1_sum,mer_category_1_mean,mer_category_1_std,mer_most_recent_sales_range_sum,mer_most_recent_sales_range_mean,mer_most_recent_sales_range_std,mer_most_recent_purchases_range_sum,mer_most_recent_purchases_range_mean,mer_most_recent_purchases_range_std,mer_category_4_sum,mer_category_4_mean,mer_category_4_std,mer_category_2_sum,mer_category_2_mean,mer_category_2_std
0,0,2017-06-01,C_ID_92a2005557,5,2,1,-0.820312,2017,2,1357,8,2.666016,5,1,2.082031,2017,8905,31.468750,15.554688,4411,15.585938,8.804688,61760,218.250,109.1875,9,0.031799,0.175781,7,0.024734,0.155640,5,0.017670,0.131958,3,0.010597,0.102600,1,0.003534,0.059448,0,0.000000,0.000000,3758,13.281250,4.796875,8128,28.718750,18.765625,7417,26.203125,18.437500,427192299097,1.509513e+09,7279052.5,96.0,0.339111,0.474365,187.0,0.660645,0.474365,21.0,0.074219,0.262451,23.0,0.081299,0.273682,12.0,0.042389,0.201904,11.0,0.038879,0.193604,0.0,0.000000,0.000000,3.0,0.010597,0.102600,49.0,0.173096,0.379150,44.0,0.155518,0.363037,20.0,0.070679,0.256836,22.0,0.077759,0.268311,21.0,0.074219,0.262451,57.0,0.201416,0.401855,187.0,0.660645,0.474365,96.0,0.339111,0.474365,34.0,0.120117,0.325684,33.0,0.116638,0.321533,44.0,0.155518,0.363037,41.0,0.144897,0.352539,35.0,0.123657,0.329834,53.0,0.187256,0.390869,43.0,0.151978,0.359619,306,26.046875,1331,1025,1,0.003546,270.0,0.954102,0.209717,0.0,0.000000,0.000000,295.0,1.041992,0.410400,4.0,0.014137,0.118225,-983.0,-3.472656,-8,2,2.734375,4,0.014137,0,1,0.118225,21403.699219,75.631447,5.000000,2000.000000,138.101593,4688.00000,16.625000,-0.057465,183.750000,44.968750,4532.000000,16.062500,-0.057465,182.125000,44.500000,351.779999,1.247447,0.680176,7.730000,1.034572,1903.690063,6.750674,27.957880,2174.559814,7.711205,0.529785,194.610001,32.652981,427.50,1.516602,0.466553,12.703125,2.101562,5484.00,19.437500,0.321045,504.250000,93.312500,6044.00,21.437500,0.252930,554.500000,102.812500,846.0,3.000000,3.0,3.0,0.000000,1691.0,5.996094,5.0,6.0,0.059540,3348.0,11.875000,5.0,12.0,0.794434,258.0,0.915039,0.279541,469.0,1.663086,1.278320,410.0,1.454102,1.288086,16.0,0.056732,0.231812,294.0,1.042969,0.411133
1,1,2017-01-01,C_ID_3d0044924f,4,1,0,0.392822,2017,1,1508,5,1.666992,4,0,2.082031,2017,8881,24.953125,16.703125,5914,16.609375,8.835938,61516,172.750,117.1875,12,0.033722,0.180786,15,0.042145,0.201172,6,0.016861,0.128906,6,0.016861,0.128906,3,0.008430,0.091553,1,0.002810,0.053009,5218,14.656250,5.585938,10360,29.093750,17.562500,10272,28.859375,17.593750,535130645472,1.503176e+09,10257346.0,132.0,0.370850,0.483643,224.0,0.629395,0.483643,72.0,0.202271,0.402344,25.0,0.070251,0.255859,16.0,0.044952,0.207520,18.0,0.050568,0.219360,15.0,0.042145,0.201172,34.0,0.095520,0.294434,49.0,0.137695,0.344971,14.0,0.039337,0.194702,24.0,0.067444,0.250977,26.0,0.073059,0.260498,16.0,0.044952,0.207520,47.0,0.132080,0.339111,224.0,0.629395,0.483643,132.0,0.370850,0.483643,42.0,0.117981,0.322998,47.0,0.132080,0.339111,35.0,0.098328,0.298096,44.0,0.123596,0.329590,56.0,0.157349,0.364502,79.0,0.221924,0.416016,53.0,0.148926,0.356445,448,30.281250,1503,1055,1,0.002817,345.0,0.969238,0.173340,31.0,0.087097,0.282227,356.0,1.000000,0.000000,426.0,1.196289,0.411865,-1752.0,-4.921875,-12,2,3.865234,549,1.541992,-1,10,1.500000,34297.628906,96.341660,3.000000,3578.479980,254.249817,9960.00000,26.843750,-0.057465,172.750000,50.218750,9752.000000,26.281250,-0.057465,170.750000,49.781250,452.660004,1.220108,0.479980,10.750000,1.065189,761.210022,2.051779,10.825763,817.449951,2.203369,0.270020,166.679993,12.232841,518.00,1.396484,0.523926,12.703125,1.900391,1546.00,4.167969,0.307861,504.250000,36.937500,1679.00,4.527344,0.208984,554.500000,40.625000,1113.0,3.000000,3.0,3.0,0.000000,2226.0,6.000000,6.0,6.0,0.000000,4400.0,11.859375,7.0,12.0,0.788574,309.0,0.833008,0.373535,403.0,1.085938,1.363281,397.0,1.070312,1.364258,29.0,0.078186,0.268799,371.0,1.000000,0.000000
2,2,2016-08-01,C_ID_d639edf6cd,2,2,0,0.687988,2016,3,1661,4,1.333008,2,0,1.154297,2017,807,18.343750,13.726562,859,19.515625,7.910156,5572,126.625,96.5000,1,0.022720,0.150757,0,0.000000,0.000000,0,0.000000,0.000000,0,0.000000,0.000000,0,0.000000,0.000000,0,0.000000,0.000000,787,17.890625,3.478516,1147,26.062500,16.609375,1432,32.531250,16.953125,65868733852,1.497017e+09,10884969.0,12.0,0.272705,0.450439,32.0,0.727051,0.450439,8.0,0.181763,0.390137,7.0,0.159058,0.369873,5.0,0.113647,0.321045,8.0,0.181763,0.390137,4.0,0.090881,0.290771,0.0,0.000000,0.000000,2.0,0.045441,0.210693,2.0,0.045441,0.210693,1.0,0.022720,0.150757,6.0,0.136353,0.347168,0.0,0.000000,0.000000,1.0,0.022720,0.150757,32.0,0.727051,0.450439,12.0,0.272705,0.450439,3.0,0.068176,0.254883,2.0,0.045441,0.210693,9.0,0.204590,0.407959,7.0,0.159058,0.369873,11.0,0.250000,0.437988,10.0,0.227295,0.423828,2.0,0.045441,0.210693,472,263.500000,1498,1026,1,0.023254,42.0,0.954590,0.210693,0.0,0.000000,0.000000,204.0,4.636719,1.163086,0.0,0.000000,0.000000,-368.0,-8.367188,-13,2,4.121094,0,0.000000,0,0,0.000000,1994.050049,45.319317,11.156250,400.000000,57.512569,53.75000,1.194336,-0.057465,21.828125,4.484375,46.062500,1.023438,-0.057465,21.593750,4.480469,51.360001,1.141333,0.930176,6.930000,0.884468,53.169998,1.181556,1.105324,53.689999,1.193111,0.759766,8.570000,1.127176,57.97,1.288086,0.949219,12.703125,1.741211,61.97,1.376953,0.871094,15.859375,2.208984,62.84,1.396484,0.752441,16.218750,2.261719,135.0,3.000000,3.0,3.0,0.000000,270.0,6.000000,6.0,6.0,0.000000,533.0,11.843750,7.0,12.0,0.796387,43.0,0.955566,0.208374,18.0,0.399902,0.962891,18.0,0.399902,1.008789,3.0,0.066650,0.252197,205.0,4.554688,1.271484
3,3,2017-09-01,C_ID_186d6a6901,4,3,0,0.142456,2017,3,1265,7,2.333984,4,0,2.082031,2017,2562,30.500000,17.140625,1391,16.562500,8.906250,17723,211.000,120.3750,3,0.035706,0.186646,0,0.000000,0.000000,0,0.000000,0.000000,0,0.000000,0.000000,0,0.000000,0.000000,0,0.000000,0.000000,1203,14.320312,5.082031,2488,29.625000,18.296875,2037,24.250000,18.125000,127034254917,1.512313e+09,5463778.0,14.0,0.166626,0.375000,70.0,0.833496,0.375000,6.0,0.071411,0.259033,16.0,0.190430,0.395020,2.0,0.023804,0.153320,5.0,0.059509,0.238037,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,11.0,0.130981,0.339355,31.0,0.369141,0.485352,7.0,0.083313,0.278076,6.0,0.071411,0.259033,70.0,0.833496,0.375000,14.0,0.166626,0.375000,7.0,0.083313,0.278076,12.0,0.142822,0.352051,22.0,0.261963,0.442383,10.0,0.119019,0.325684,19.0,0.226196,0.420898,8.0,0.095215,0.295410,6.0,0.071411,0.259033,204,59.000000,1240,1036,1,0.012047,84.0,1.000000,0.000000,13.0,0.154785,0.363770,261.0,3.107422,1.379883,88.0,1.047852,0.343750,-206.0,-2.453125,-5,2,2.142578,89,1.059570,-1,3,0.608398,5719.299805,68.086906,4.000000,1459.089966,167.072281,368.00000,3.833984,-0.057465,27.578125,8.593750,360.250000,3.753906,-0.057465,27.312500,8.507812,150.750000,1.570312,0.589844,6.930000,1.739451,162.660004,1.694375,2.181952,163.869995,1.706979,0.509766,8.570000,2.230313,204.20,2.126953,0.617676,12.703125,3.421875,230.90,2.404297,0.426270,15.859375,4.355469,234.10,2.437500,0.436035,16.218750,4.460938,288.0,3.000000,3.0,3.0,0.000000,576.0,6.000000,6.0,6.0,0.000000,1094.0,11.398438,7.0,12.0,1.539062,65.0,0.677246,0.469971,161.0,1.676758,1.592773,159.0,1.656250,1.575195,2.0,0.020828,0.143555,282.0,2.937500,1.442383
4,4,2017-11-01,C_ID_cdbd2c0db2,1,3,0,-0.159790,2017,4,1204,4,1.333008,3,0,1.527344,2018,3378,19.984375,19.593750,2666,15.773438,8.953125,23233,137.500,137.6250,8,0.047333,0.213013,7,0.041412,0.199829,2,0.011833,0.108459,3,0.017746,0.132446,0,0.000000,0.000000,1,0.005917,0.076904,2265,13.406250,5.402344,4836,28.609375,17.296875,4538,26.859375,17.328125,256356557498,1.516903e+09,3909616.5,54.0,0.319580,0.467773,115.0,0.680664,0.467773,46.0,0.272217,0.446289,35.0,0.207153,0.406494,16.0,0.094666,0.293701,20.0,0.118347,0.323975,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,21.0,0.124268,0.330811,31.0,0.183472,0.388184,115.0,0.680664,0.467773,54.0,0.319580,0.467773,21.0,0.124268,0.330811,18.0,0.106506,0.309326,23.0,0.136108,0.343994,19.0,0.112427,0.316895,34.0,0.201172,0.402100,32.0,0.189331,0.393066,22.0,0.130127,0.337402,167,23.859375,1193,1026,1,0.005951,164.0,0.970215,0.169922,17.0,0.100586,0.301758,583.0,3.449219,1.138672,176.0,1.041016,0.227661,-115.0,-0.680664,-3,2,1.497070,217,1.284180,-1,12,1.698242,38341.328125,226.871780,0.500000,5283.959961,802.122375,506.00000,2.857422,-0.057465,27.578125,7.988281,497.250000,2.808594,-0.057465,27.312500,7.917969,420.059998,2.373220,0.509766,185.490005,13.899587,560.960022,3.169266,16.487820,585.419983,3.307458,0.439941,166.679993,17.468569,326.50,1.844727,0.392822,37.125000,3.654297,856.50,4.839844,0.294189,504.250000,37.968750,915.00,5.167969,0.286377,554.500000,41.718750,531.0,3.000000,3.0,3.0,0.000000,1061.0,5.996094,5.0,6.0,0.075134,2068.0,11.687500,5.0,12.0,1.210938,139.0,0.785156,0.411865,345.0,1.949219,1.418945,350.0,1.977539,1.417969,17.0,0.096069,0.295410,578.0,3.265625,1.262695
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201912,201912,2017-09-01,C_ID_963962de2c,3,2,1,-2.740234,2017,3,1265,6,2.000000,3,1,1.000000,2017,1359,28.921875,20.296875,649,13.804688,6.886719,9431,200.625,142.3750,0,0.000000,0.000000,1,0.021271,0.145874,0,0.000000,0.000000,0,0.000000,0.000000,0,0.000000,0.000000,0,0.000000,0.000000,594,12.640625,4.390625,1498,31.875000,16.671875,1360,28.937500,18.937500,71123944840,1.513275e+09,4350446.5,17.0,0.361816,0.485596,30.0,0.638184,0.485596,8.0,0.170166,0.379883,10.0,0.212769,0.413574,1.0,0.021271,0.145874,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,4.0,0.085083,0.281982,5.0,0.106384,0.311768,9.0,0.191528,0.397705,10.0,0.212769,0.413574,30.0,0.638184,0.485596,17.0,0.361816,0.485596,2.0,0.042542,0.203979,2.0,0.042542,0.203979,6.0,0.127686,0.337402,9.0,0.191528,0.397705,11.0,0.234009,0.427979,15.0,0.319092,0.471191,2.0,0.042542,0.203979,185,96.500000,1259,1074,1,0.021744,43.0,0.915039,0.281982,0.0,0.000000,0.000000,113.0,2.404297,0.924316,1.0,0.021271,0.145874,-94.0,-2.000000,-5,1,1.628906,1,0.021271,0,1,0.145874,1188.609985,25.289574,3.000000,116.459999,27.289751,2.53125,0.042175,-0.057465,1.618164,0.356201,1.738281,0.028961,-0.057465,1.608398,0.356689,137.339996,2.289000,0.890137,6.930000,2.462659,156.979996,2.616333,3.082739,157.589996,2.626500,0.649902,8.570000,3.163647,213.60,3.560547,0.919434,12.703125,4.847656,255.00,4.250000,0.783203,15.859375,6.160156,258.50,4.308594,0.583984,16.218750,6.320312,180.0,3.000000,3.0,3.0,0.000000,360.0,6.000000,6.0,6.0,0.000000,655.0,10.914062,7.0,12.0,2.078125,33.0,0.549805,0.501465,126.0,2.099609,1.558594,120.0,2.000000,1.518555,17.0,0.283447,0.454346,126.0,2.099609,1.002930
201913,201913,2015-10-01,C_ID_1314773c0b,3,1,1,0.312988,2015,4,1966,5,1.666992,3,1,1.154297,2017,1251,26.062500,17.796875,653,13.601562,7.410156,8629,179.750,125.4375,0,0.000000,0.000000,0,0.000000,0.000000,0,0.000000,0.000000,0,0.000000,0.000000,0,0.000000,0.000000,0,0.000000,0.000000,627,13.062500,5.355469,1355,28.234375,18.515625,1263,26.312500,21.250000,72411760563,1.508578e+09,8547709.0,5.0,0.104187,0.308594,43.0,0.895996,0.308594,9.0,0.187500,0.394531,6.0,0.125000,0.334229,3.0,0.062500,0.244629,0.0,0.000000,0.000000,0.0,0.000000,0.000000,1.0,0.020828,0.144287,6.0,0.125000,0.334229,4.0,0.083313,0.279297,4.0,0.083313,0.279297,4.0,0.083313,0.279297,7.0,0.145874,0.356689,4.0,0.083313,0.279297,43.0,0.895996,0.308594,5.0,0.104187,0.308594,5.0,0.104187,0.308594,11.0,0.229126,0.424805,2.0,0.041656,0.201904,13.0,0.270752,0.448975,12.0,0.250000,0.437500,5.0,0.104187,0.308594,0.0,0.000000,0.000000,439,224.125000,1504,1065,1,0.021271,41.0,0.854004,0.356689,0.0,0.000000,0.000000,48.0,1.000000,0.000000,1.0,0.020828,0.144287,-181.0,-3.771484,-13,1,3.203125,1,0.020828,0,1,0.144287,1114.369995,23.216042,1.839844,114.900002,26.457180,52.53125,1.094727,-0.057465,23.859375,3.513672,51.625000,1.075195,-0.057465,23.734375,3.498047,52.779999,1.099583,0.649902,2.240000,0.300927,62.549999,1.303125,0.935390,81.910004,1.706458,0.439941,15.170000,2.293151,51.90,1.081055,0.641113,2.037109,0.228027,59.88,1.247070,0.519531,5.507812,0.747559,73.50,1.530273,0.449707,11.796875,1.704102,142.0,2.958984,1.0,3.0,0.288574,283.0,5.894531,1.0,6.0,0.721680,565.0,11.773438,1.0,12.0,1.587891,47.0,0.979004,0.144287,103.0,2.146484,1.129883,94.0,1.958008,0.966797,47.0,0.979004,0.144287,48.0,1.000000,0.000000
201914,201914,2017-08-01,C_ID_7666735b3d,4,3,0,0.093506,2017,3,1296,7,2.333984,4,0,2.082031,2017,3447,38.312500,11.500000,1401,15.570312,9.945312,23869,265.250,80.6250,3,0.033325,0.180542,6,0.066650,0.250732,0,0.000000,0.000000,0,0.000000,0.000000,0,0.000000,0.000000,0,0.000000,0.000000,1281,14.234375,6.480469,2412,26.796875,19.015625,2322,25.796875,17.843750,135833680242,1.509263e+09,4346600.5,9.0,0.099976,0.301758,81.0,0.899902,0.301758,4.0,0.044434,0.207275,2.0,0.022217,0.148193,3.0,0.033325,0.180542,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,14.0,0.155518,0.364502,13.0,0.144409,0.353516,14.0,0.155518,0.364502,36.0,0.399902,0.492676,4.0,0.044434,0.207275,81.0,0.899902,0.301758,9.0,0.099976,0.301758,21.0,0.233276,0.425293,9.0,0.099976,0.301758,26.0,0.288818,0.455811,10.0,0.111084,0.315918,15.0,0.166626,0.374756,6.0,0.066650,0.250732,3.0,0.033325,0.180542,217,58.531250,1274,1057,1,0.011238,84.0,0.933105,0.250732,9.0,0.099976,0.301758,226.0,2.511719,0.864258,100.0,1.111328,0.349854,-322.0,-3.578125,-6,1,1.668945,113,1.255859,-1,9,1.066406,22167.679688,246.307556,5.980469,4500.000000,567.508423,27.78125,0.302002,-0.057465,4.195312,0.981445,24.281250,0.263916,-0.057465,4.125000,0.964844,160.770004,1.747500,0.620117,57.610001,5.953237,214.199997,2.328261,11.011539,225.850006,2.454891,0.170044,118.760002,12.312107,131.50,1.428711,0.541992,15.593750,2.275391,151.60,1.647461,0.278564,28.468750,3.572266,153.60,1.669922,0.141968,31.343750,3.847656,276.0,3.000000,3.0,3.0,0.000000,552.0,6.000000,6.0,6.0,0.000000,1091.0,11.859375,7.0,12.0,0.792480,66.0,0.717285,0.452637,153.0,1.663086,1.311523,158.0,1.717773,1.303711,23.0,0.250000,0.435303,224.0,2.435547,0.905273
201915,201915,2016-07-01,C_ID_73f5a0efd0,3,2,1,-4.675781,2016,3,1692,6,2.000000,3,1,1.000000,2017,957,30.875000,14.171875,572,18.453125,9.843750,6629,213.875,98.8125,1,0.032257,0.179565,1,0.032257,0.179565,0,0.000000,0.000000,0,0.000000,0.000000,0,0.000000,0.000000,0,0.000000,0.000000,448,14.453125,2.322266,673,21.703125,15.640625,928,29.937500,16.687500,46551814108,1.501671e+09,8539426.0,5.0,0.161255,0.373779,26.0,0.838867,0.373779,2.0,0.064514,0.249756,1.0,0.032257,0.179565,2.0,0.064514,0.249756,1.0,0.032257,0.179565,3.0,0.096802,0.300537,2.0,0.064514,0.249756,4.0,0.129028,0.340820,1.0,0.032257,0.179565,4.0,0.129028,0.340820,6.0,0.193604,0.401611,2.0,0.064514,0.249756,3.0,0.096802,0.300537,26.0,0.838867,0.373779,5.0,0.161255,0.373779,1.0,0.032257,0.179565,8.0,0.258057,0.444824,6.0,0.193604,0.401611,5.0,0.161255,0.373779,6.0,0.193604,0.401611,4.0,0.129028,0.340820,1.0,0.032257,0.179565,333,266.500000,1481,1148,1,0.033325,29.0,0.935547,0.249756,0.0,0.000000,0.000000,31.0,1.000000,0.000000,0.0,0.000000,0.000000,-79.0,-2.548828,-9,2,3.285156,0,0.000000,0,0,0.000000,4788.109863,154.455154,10.000000,1999.609985,454.747833,318.50000,9.648438,-0.057465,172.750000,32.281250,296.500000,8.984375,-0.057465,170.750000,32.093750,47.029999,1.425151,0.859863,6.930000,1.457700,51.260002,1.553333,1.850987,52.239998,1.583030,0.759766,8.570000,1.883847,59.38,1.798828,0.879395,12.703125,2.837891,66.70,2.019531,0.871094,15.859375,3.617188,67.90,2.056641,0.752441,16.218750,3.703125,99.0,3.000000,3.0,3.0,0.000000,198.0,6.000000,6.0,6.0,0.000000,380.0,11.515625,6.0,12.0,1.563477,29.0,0.878906,0.331543,33.0,1.000000,1.369141,31.0,0.939453,1.273438,1.0,0.030304,0.174072,33.0,1.000000,0.000000


In [139]:
train.isna().sum()

Unnamed: 0             0
first_active_month     0
card_id                0
feature_1              0
feature_2              0
                      ..
mer_category_4_mean    0
mer_category_4_std     0
mer_category_2_sum     0
mer_category_2_mean    0
mer_category_2_std     0
Length: 225, dtype: int64

In [140]:
#if there are any inf values we will first treat it a nan
train = train.replace([np.inf, -np.inf], np.nan)
test = test.replace([np.inf, -np.inf], np.nan)

In [141]:
train

Unnamed: 0.1,Unnamed: 0,first_active_month,card_id,feature_1,feature_2,feature_3,target,year,quarter,elapsed_time,feature_sum,feature_mean,feature_max,feature_min,feature_std,purchase_Year_mode,purchase_Week_sum,purchase_Week_mean,purchase_Week_std,purchase_Day_sum,purchase_Day_mean,purchase_Day_std,purchase_Dayofyear_sum,purchase_Dayofyear_mean,purchase_Dayofyear_std,purchase_Is_month_end_sum,purchase_Is_month_end_mean,purchase_Is_month_end_std,purchase_Is_month_start_sum,purchase_Is_month_start_mean,purchase_Is_month_start_std,purchase_Is_quarter_end_sum,purchase_Is_quarter_end_mean,purchase_Is_quarter_end_std,purchase_Is_quarter_start_sum,purchase_Is_quarter_start_mean,purchase_Is_quarter_start_std,purchase_Is_year_end_sum,purchase_Is_year_end_mean,purchase_Is_year_end_std,purchase_Is_year_start_sum,purchase_Is_year_start_mean,purchase_Is_year_start_std,purchase_Hour_sum,purchase_Hour_mean,purchase_Hour_std,purchase_Minute_sum,purchase_Minute_mean,purchase_Minute_std,purchase_Second_sum,purchase_Second_mean,purchase_Second_std,purchase_Elapsed_sum,purchase_Elapsed_mean,purchase_Elapsed_std,purchasedWeekday_0_sum,purchasedWeekday_0_mean,purchasedWeekday_0_std,purchasedWeekday_1_sum,purchasedWeekday_1_mean,purchasedWeekday_1_std,purchase_Month_1_sum,purchase_Month_1_mean,purchase_Month_1_std,purchase_Month_2_sum,purchase_Month_2_mean,purchase_Month_2_std,purchase_Month_3_sum,purchase_Month_3_mean,purchase_Month_3_std,purchase_Month_4_sum,purchase_Month_4_mean,purchase_Month_4_std,purchase_Month_5_sum,purchase_Month_5_mean,purchase_Month_5_std,purchase_Month_6_sum,purchase_Month_6_mean,purchase_Month_6_std,purchase_Month_7_sum,purchase_Month_7_mean,purchase_Month_7_std,purchase_Month_8_sum,purchase_Month_8_mean,purchase_Month_8_std,purchase_Month_9_sum,purchase_Month_9_mean,purchase_Month_9_std,purchase_Month_10_sum,purchase_Month_10_mean,purchase_Month_10_std,purchase_Month_11_sum,purchase_Month_11_mean,purchase_Month_11_std,purchase_Month_12_sum,purchase_Month_12_mean,purchase_Month_12_std,purchasedWeekend_0_sum,purchasedWeekend_0_mean,purchasedWeekend_0_std,purchasedWeekend_1_sum,purchasedWeekend_1_mean,purchasedWeekend_1_std,purchase_Dayofweek_0_sum,purchase_Dayofweek_0_mean,purchase_Dayofweek_0_std,purchase_Dayofweek_1_sum,purchase_Dayofweek_1_mean,purchase_Dayofweek_1_std,purchase_Dayofweek_2_sum,purchase_Dayofweek_2_mean,purchase_Dayofweek_2_std,purchase_Dayofweek_3_sum,purchase_Dayofweek_3_mean,purchase_Dayofweek_3_std,purchase_Dayofweek_4_sum,purchase_Dayofweek_4_mean,purchase_Dayofweek_4_std,purchase_Dayofweek_5_sum,purchase_Dayofweek_5_mean,purchase_Dayofweek_5_std,purchase_Dayofweek_6_sum,purchase_Dayofweek_6_mean,purchase_Dayofweek_6_std,purchase_date_temp_period,purchase_date_temp_mean_diff,purchase_date_temp_min,purchase_date_temp_max,monthDiff_period,monthDiff_mean_diff,authorized_flag_sum,authorized_flag_mean,authorized_flag_std,category_1_sum,category_1_mean,category_1_std,category_2_sum,category_2_mean,category_2_std,category_3_sum,category_3_mean,category_3_std,month_lag_sum,month_lag_mean,month_lag_min,month_lag_max,month_lag_std,installments_sum,installments_mean,installments_min,installments_max,installments_std,purchase_amount_sum,purchase_amount_mean,purchase_amount_min,purchase_amount_max,purchase_amount_std,mer_numerical_1_sum,mer_numerical_1_mean,mer_numerical_1_min,mer_numerical_1_max,mer_numerical_1_std,mer_numerical_2_sum,mer_numerical_2_mean,mer_numerical_2_min,mer_numerical_2_max,mer_numerical_2_std,mer_avg_sales_lag3_sum,mer_avg_sales_lag3_mean,mer_avg_sales_lag3_min,mer_avg_sales_lag3_max,mer_avg_sales_lag3_std,mer_avg_sales_lag6_sum,mer_avg_sales_lag6_mean,mer_avg_sales_lag6_std,mer_avg_sales_lag12_sum,mer_avg_sales_lag12_mean,mer_avg_sales_lag12_min,mer_avg_sales_lag12_max,mer_avg_sales_lag12_std,mer_avg_purchases_lag3_sum,mer_avg_purchases_lag3_mean,mer_avg_purchases_lag3_min,mer_avg_purchases_lag3_max,mer_avg_purchases_lag3_std,mer_avg_purchases_lag6_sum,mer_avg_purchases_lag6_mean,mer_avg_purchases_lag6_min,mer_avg_purchases_lag6_max,mer_avg_purchases_lag6_std,mer_avg_purchases_lag12_sum,mer_avg_purchases_lag12_mean,mer_avg_purchases_lag12_min,mer_avg_purchases_lag12_max,mer_avg_purchases_lag12_std,mer_active_months_lag3_sum,mer_active_months_lag3_mean,mer_active_months_lag3_min,mer_active_months_lag3_max,mer_active_months_lag3_std,mer_active_months_lag6_sum,mer_active_months_lag6_mean,mer_active_months_lag6_min,mer_active_months_lag6_max,mer_active_months_lag6_std,mer_active_months_lag12_sum,mer_active_months_lag12_mean,mer_active_months_lag12_min,mer_active_months_lag12_max,mer_active_months_lag12_std,mer_category_1_sum,mer_category_1_mean,mer_category_1_std,mer_most_recent_sales_range_sum,mer_most_recent_sales_range_mean,mer_most_recent_sales_range_std,mer_most_recent_purchases_range_sum,mer_most_recent_purchases_range_mean,mer_most_recent_purchases_range_std,mer_category_4_sum,mer_category_4_mean,mer_category_4_std,mer_category_2_sum,mer_category_2_mean,mer_category_2_std
0,0,2017-06-01,C_ID_92a2005557,5,2,1,-0.820312,2017,2,1357,8,2.666016,5,1,2.082031,2017,8905,31.468750,15.554688,4411,15.585938,8.804688,61760,218.250,109.1875,9,0.031799,0.175781,7,0.024734,0.155640,5,0.017670,0.131958,3,0.010597,0.102600,1,0.003534,0.059448,0,0.000000,0.000000,3758,13.281250,4.796875,8128,28.718750,18.765625,7417,26.203125,18.437500,427192299097,1.509513e+09,7279052.5,96.0,0.339111,0.474365,187.0,0.660645,0.474365,21.0,0.074219,0.262451,23.0,0.081299,0.273682,12.0,0.042389,0.201904,11.0,0.038879,0.193604,0.0,0.000000,0.000000,3.0,0.010597,0.102600,49.0,0.173096,0.379150,44.0,0.155518,0.363037,20.0,0.070679,0.256836,22.0,0.077759,0.268311,21.0,0.074219,0.262451,57.0,0.201416,0.401855,187.0,0.660645,0.474365,96.0,0.339111,0.474365,34.0,0.120117,0.325684,33.0,0.116638,0.321533,44.0,0.155518,0.363037,41.0,0.144897,0.352539,35.0,0.123657,0.329834,53.0,0.187256,0.390869,43.0,0.151978,0.359619,306,26.046875,1331,1025,1,0.003546,270.0,0.954102,0.209717,0.0,0.000000,0.000000,295.0,1.041992,0.410400,4.0,0.014137,0.118225,-983.0,-3.472656,-8,2,2.734375,4,0.014137,0,1,0.118225,21403.699219,75.631447,5.000000,2000.000000,138.101593,4688.00000,16.625000,-0.057465,183.750000,44.968750,4532.000000,16.062500,-0.057465,182.125000,44.500000,351.779999,1.247447,0.680176,7.730000,1.034572,1903.690063,6.750674,27.957880,2174.559814,7.711205,0.529785,194.610001,32.652981,427.50,1.516602,0.466553,12.703125,2.101562,5484.00,19.437500,0.321045,504.250000,93.312500,6044.00,21.437500,0.252930,554.500000,102.812500,846.0,3.000000,3.0,3.0,0.000000,1691.0,5.996094,5.0,6.0,0.059540,3348.0,11.875000,5.0,12.0,0.794434,258.0,0.915039,0.279541,469.0,1.663086,1.278320,410.0,1.454102,1.288086,16.0,0.056732,0.231812,294.0,1.042969,0.411133
1,1,2017-01-01,C_ID_3d0044924f,4,1,0,0.392822,2017,1,1508,5,1.666992,4,0,2.082031,2017,8881,24.953125,16.703125,5914,16.609375,8.835938,61516,172.750,117.1875,12,0.033722,0.180786,15,0.042145,0.201172,6,0.016861,0.128906,6,0.016861,0.128906,3,0.008430,0.091553,1,0.002810,0.053009,5218,14.656250,5.585938,10360,29.093750,17.562500,10272,28.859375,17.593750,535130645472,1.503176e+09,10257346.0,132.0,0.370850,0.483643,224.0,0.629395,0.483643,72.0,0.202271,0.402344,25.0,0.070251,0.255859,16.0,0.044952,0.207520,18.0,0.050568,0.219360,15.0,0.042145,0.201172,34.0,0.095520,0.294434,49.0,0.137695,0.344971,14.0,0.039337,0.194702,24.0,0.067444,0.250977,26.0,0.073059,0.260498,16.0,0.044952,0.207520,47.0,0.132080,0.339111,224.0,0.629395,0.483643,132.0,0.370850,0.483643,42.0,0.117981,0.322998,47.0,0.132080,0.339111,35.0,0.098328,0.298096,44.0,0.123596,0.329590,56.0,0.157349,0.364502,79.0,0.221924,0.416016,53.0,0.148926,0.356445,448,30.281250,1503,1055,1,0.002817,345.0,0.969238,0.173340,31.0,0.087097,0.282227,356.0,1.000000,0.000000,426.0,1.196289,0.411865,-1752.0,-4.921875,-12,2,3.865234,549,1.541992,-1,10,1.500000,34297.628906,96.341660,3.000000,3578.479980,254.249817,9960.00000,26.843750,-0.057465,172.750000,50.218750,9752.000000,26.281250,-0.057465,170.750000,49.781250,452.660004,1.220108,0.479980,10.750000,1.065189,761.210022,2.051779,10.825763,817.449951,2.203369,0.270020,166.679993,12.232841,518.00,1.396484,0.523926,12.703125,1.900391,1546.00,4.167969,0.307861,504.250000,36.937500,1679.00,4.527344,0.208984,554.500000,40.625000,1113.0,3.000000,3.0,3.0,0.000000,2226.0,6.000000,6.0,6.0,0.000000,4400.0,11.859375,7.0,12.0,0.788574,309.0,0.833008,0.373535,403.0,1.085938,1.363281,397.0,1.070312,1.364258,29.0,0.078186,0.268799,371.0,1.000000,0.000000
2,2,2016-08-01,C_ID_d639edf6cd,2,2,0,0.687988,2016,3,1661,4,1.333008,2,0,1.154297,2017,807,18.343750,13.726562,859,19.515625,7.910156,5572,126.625,96.5000,1,0.022720,0.150757,0,0.000000,0.000000,0,0.000000,0.000000,0,0.000000,0.000000,0,0.000000,0.000000,0,0.000000,0.000000,787,17.890625,3.478516,1147,26.062500,16.609375,1432,32.531250,16.953125,65868733852,1.497017e+09,10884969.0,12.0,0.272705,0.450439,32.0,0.727051,0.450439,8.0,0.181763,0.390137,7.0,0.159058,0.369873,5.0,0.113647,0.321045,8.0,0.181763,0.390137,4.0,0.090881,0.290771,0.0,0.000000,0.000000,2.0,0.045441,0.210693,2.0,0.045441,0.210693,1.0,0.022720,0.150757,6.0,0.136353,0.347168,0.0,0.000000,0.000000,1.0,0.022720,0.150757,32.0,0.727051,0.450439,12.0,0.272705,0.450439,3.0,0.068176,0.254883,2.0,0.045441,0.210693,9.0,0.204590,0.407959,7.0,0.159058,0.369873,11.0,0.250000,0.437988,10.0,0.227295,0.423828,2.0,0.045441,0.210693,472,263.500000,1498,1026,1,0.023254,42.0,0.954590,0.210693,0.0,0.000000,0.000000,204.0,4.636719,1.163086,0.0,0.000000,0.000000,-368.0,-8.367188,-13,2,4.121094,0,0.000000,0,0,0.000000,1994.050049,45.319317,11.156250,400.000000,57.512569,53.75000,1.194336,-0.057465,21.828125,4.484375,46.062500,1.023438,-0.057465,21.593750,4.480469,51.360001,1.141333,0.930176,6.930000,0.884468,53.169998,1.181556,1.105324,53.689999,1.193111,0.759766,8.570000,1.127176,57.97,1.288086,0.949219,12.703125,1.741211,61.97,1.376953,0.871094,15.859375,2.208984,62.84,1.396484,0.752441,16.218750,2.261719,135.0,3.000000,3.0,3.0,0.000000,270.0,6.000000,6.0,6.0,0.000000,533.0,11.843750,7.0,12.0,0.796387,43.0,0.955566,0.208374,18.0,0.399902,0.962891,18.0,0.399902,1.008789,3.0,0.066650,0.252197,205.0,4.554688,1.271484
3,3,2017-09-01,C_ID_186d6a6901,4,3,0,0.142456,2017,3,1265,7,2.333984,4,0,2.082031,2017,2562,30.500000,17.140625,1391,16.562500,8.906250,17723,211.000,120.3750,3,0.035706,0.186646,0,0.000000,0.000000,0,0.000000,0.000000,0,0.000000,0.000000,0,0.000000,0.000000,0,0.000000,0.000000,1203,14.320312,5.082031,2488,29.625000,18.296875,2037,24.250000,18.125000,127034254917,1.512313e+09,5463778.0,14.0,0.166626,0.375000,70.0,0.833496,0.375000,6.0,0.071411,0.259033,16.0,0.190430,0.395020,2.0,0.023804,0.153320,5.0,0.059509,0.238037,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,11.0,0.130981,0.339355,31.0,0.369141,0.485352,7.0,0.083313,0.278076,6.0,0.071411,0.259033,70.0,0.833496,0.375000,14.0,0.166626,0.375000,7.0,0.083313,0.278076,12.0,0.142822,0.352051,22.0,0.261963,0.442383,10.0,0.119019,0.325684,19.0,0.226196,0.420898,8.0,0.095215,0.295410,6.0,0.071411,0.259033,204,59.000000,1240,1036,1,0.012047,84.0,1.000000,0.000000,13.0,0.154785,0.363770,261.0,3.107422,1.379883,88.0,1.047852,0.343750,-206.0,-2.453125,-5,2,2.142578,89,1.059570,-1,3,0.608398,5719.299805,68.086906,4.000000,1459.089966,167.072281,368.00000,3.833984,-0.057465,27.578125,8.593750,360.250000,3.753906,-0.057465,27.312500,8.507812,150.750000,1.570312,0.589844,6.930000,1.739451,162.660004,1.694375,2.181952,163.869995,1.706979,0.509766,8.570000,2.230313,204.20,2.126953,0.617676,12.703125,3.421875,230.90,2.404297,0.426270,15.859375,4.355469,234.10,2.437500,0.436035,16.218750,4.460938,288.0,3.000000,3.0,3.0,0.000000,576.0,6.000000,6.0,6.0,0.000000,1094.0,11.398438,7.0,12.0,1.539062,65.0,0.677246,0.469971,161.0,1.676758,1.592773,159.0,1.656250,1.575195,2.0,0.020828,0.143555,282.0,2.937500,1.442383
4,4,2017-11-01,C_ID_cdbd2c0db2,1,3,0,-0.159790,2017,4,1204,4,1.333008,3,0,1.527344,2018,3378,19.984375,19.593750,2666,15.773438,8.953125,23233,137.500,137.6250,8,0.047333,0.213013,7,0.041412,0.199829,2,0.011833,0.108459,3,0.017746,0.132446,0,0.000000,0.000000,1,0.005917,0.076904,2265,13.406250,5.402344,4836,28.609375,17.296875,4538,26.859375,17.328125,256356557498,1.516903e+09,3909616.5,54.0,0.319580,0.467773,115.0,0.680664,0.467773,46.0,0.272217,0.446289,35.0,0.207153,0.406494,16.0,0.094666,0.293701,20.0,0.118347,0.323975,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,21.0,0.124268,0.330811,31.0,0.183472,0.388184,115.0,0.680664,0.467773,54.0,0.319580,0.467773,21.0,0.124268,0.330811,18.0,0.106506,0.309326,23.0,0.136108,0.343994,19.0,0.112427,0.316895,34.0,0.201172,0.402100,32.0,0.189331,0.393066,22.0,0.130127,0.337402,167,23.859375,1193,1026,1,0.005951,164.0,0.970215,0.169922,17.0,0.100586,0.301758,583.0,3.449219,1.138672,176.0,1.041016,0.227661,-115.0,-0.680664,-3,2,1.497070,217,1.284180,-1,12,1.698242,38341.328125,226.871780,0.500000,5283.959961,802.122375,506.00000,2.857422,-0.057465,27.578125,7.988281,497.250000,2.808594,-0.057465,27.312500,7.917969,420.059998,2.373220,0.509766,185.490005,13.899587,560.960022,3.169266,16.487820,585.419983,3.307458,0.439941,166.679993,17.468569,326.50,1.844727,0.392822,37.125000,3.654297,856.50,4.839844,0.294189,504.250000,37.968750,915.00,5.167969,0.286377,554.500000,41.718750,531.0,3.000000,3.0,3.0,0.000000,1061.0,5.996094,5.0,6.0,0.075134,2068.0,11.687500,5.0,12.0,1.210938,139.0,0.785156,0.411865,345.0,1.949219,1.418945,350.0,1.977539,1.417969,17.0,0.096069,0.295410,578.0,3.265625,1.262695
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201912,201912,2017-09-01,C_ID_963962de2c,3,2,1,-2.740234,2017,3,1265,6,2.000000,3,1,1.000000,2017,1359,28.921875,20.296875,649,13.804688,6.886719,9431,200.625,142.3750,0,0.000000,0.000000,1,0.021271,0.145874,0,0.000000,0.000000,0,0.000000,0.000000,0,0.000000,0.000000,0,0.000000,0.000000,594,12.640625,4.390625,1498,31.875000,16.671875,1360,28.937500,18.937500,71123944840,1.513275e+09,4350446.5,17.0,0.361816,0.485596,30.0,0.638184,0.485596,8.0,0.170166,0.379883,10.0,0.212769,0.413574,1.0,0.021271,0.145874,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,4.0,0.085083,0.281982,5.0,0.106384,0.311768,9.0,0.191528,0.397705,10.0,0.212769,0.413574,30.0,0.638184,0.485596,17.0,0.361816,0.485596,2.0,0.042542,0.203979,2.0,0.042542,0.203979,6.0,0.127686,0.337402,9.0,0.191528,0.397705,11.0,0.234009,0.427979,15.0,0.319092,0.471191,2.0,0.042542,0.203979,185,96.500000,1259,1074,1,0.021744,43.0,0.915039,0.281982,0.0,0.000000,0.000000,113.0,2.404297,0.924316,1.0,0.021271,0.145874,-94.0,-2.000000,-5,1,1.628906,1,0.021271,0,1,0.145874,1188.609985,25.289574,3.000000,116.459999,27.289751,2.53125,0.042175,-0.057465,1.618164,0.356201,1.738281,0.028961,-0.057465,1.608398,0.356689,137.339996,2.289000,0.890137,6.930000,2.462659,156.979996,2.616333,3.082739,157.589996,2.626500,0.649902,8.570000,3.163647,213.60,3.560547,0.919434,12.703125,4.847656,255.00,4.250000,0.783203,15.859375,6.160156,258.50,4.308594,0.583984,16.218750,6.320312,180.0,3.000000,3.0,3.0,0.000000,360.0,6.000000,6.0,6.0,0.000000,655.0,10.914062,7.0,12.0,2.078125,33.0,0.549805,0.501465,126.0,2.099609,1.558594,120.0,2.000000,1.518555,17.0,0.283447,0.454346,126.0,2.099609,1.002930
201913,201913,2015-10-01,C_ID_1314773c0b,3,1,1,0.312988,2015,4,1966,5,1.666992,3,1,1.154297,2017,1251,26.062500,17.796875,653,13.601562,7.410156,8629,179.750,125.4375,0,0.000000,0.000000,0,0.000000,0.000000,0,0.000000,0.000000,0,0.000000,0.000000,0,0.000000,0.000000,0,0.000000,0.000000,627,13.062500,5.355469,1355,28.234375,18.515625,1263,26.312500,21.250000,72411760563,1.508578e+09,8547709.0,5.0,0.104187,0.308594,43.0,0.895996,0.308594,9.0,0.187500,0.394531,6.0,0.125000,0.334229,3.0,0.062500,0.244629,0.0,0.000000,0.000000,0.0,0.000000,0.000000,1.0,0.020828,0.144287,6.0,0.125000,0.334229,4.0,0.083313,0.279297,4.0,0.083313,0.279297,4.0,0.083313,0.279297,7.0,0.145874,0.356689,4.0,0.083313,0.279297,43.0,0.895996,0.308594,5.0,0.104187,0.308594,5.0,0.104187,0.308594,11.0,0.229126,0.424805,2.0,0.041656,0.201904,13.0,0.270752,0.448975,12.0,0.250000,0.437500,5.0,0.104187,0.308594,0.0,0.000000,0.000000,439,224.125000,1504,1065,1,0.021271,41.0,0.854004,0.356689,0.0,0.000000,0.000000,48.0,1.000000,0.000000,1.0,0.020828,0.144287,-181.0,-3.771484,-13,1,3.203125,1,0.020828,0,1,0.144287,1114.369995,23.216042,1.839844,114.900002,26.457180,52.53125,1.094727,-0.057465,23.859375,3.513672,51.625000,1.075195,-0.057465,23.734375,3.498047,52.779999,1.099583,0.649902,2.240000,0.300927,62.549999,1.303125,0.935390,81.910004,1.706458,0.439941,15.170000,2.293151,51.90,1.081055,0.641113,2.037109,0.228027,59.88,1.247070,0.519531,5.507812,0.747559,73.50,1.530273,0.449707,11.796875,1.704102,142.0,2.958984,1.0,3.0,0.288574,283.0,5.894531,1.0,6.0,0.721680,565.0,11.773438,1.0,12.0,1.587891,47.0,0.979004,0.144287,103.0,2.146484,1.129883,94.0,1.958008,0.966797,47.0,0.979004,0.144287,48.0,1.000000,0.000000
201914,201914,2017-08-01,C_ID_7666735b3d,4,3,0,0.093506,2017,3,1296,7,2.333984,4,0,2.082031,2017,3447,38.312500,11.500000,1401,15.570312,9.945312,23869,265.250,80.6250,3,0.033325,0.180542,6,0.066650,0.250732,0,0.000000,0.000000,0,0.000000,0.000000,0,0.000000,0.000000,0,0.000000,0.000000,1281,14.234375,6.480469,2412,26.796875,19.015625,2322,25.796875,17.843750,135833680242,1.509263e+09,4346600.5,9.0,0.099976,0.301758,81.0,0.899902,0.301758,4.0,0.044434,0.207275,2.0,0.022217,0.148193,3.0,0.033325,0.180542,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,14.0,0.155518,0.364502,13.0,0.144409,0.353516,14.0,0.155518,0.364502,36.0,0.399902,0.492676,4.0,0.044434,0.207275,81.0,0.899902,0.301758,9.0,0.099976,0.301758,21.0,0.233276,0.425293,9.0,0.099976,0.301758,26.0,0.288818,0.455811,10.0,0.111084,0.315918,15.0,0.166626,0.374756,6.0,0.066650,0.250732,3.0,0.033325,0.180542,217,58.531250,1274,1057,1,0.011238,84.0,0.933105,0.250732,9.0,0.099976,0.301758,226.0,2.511719,0.864258,100.0,1.111328,0.349854,-322.0,-3.578125,-6,1,1.668945,113,1.255859,-1,9,1.066406,22167.679688,246.307556,5.980469,4500.000000,567.508423,27.78125,0.302002,-0.057465,4.195312,0.981445,24.281250,0.263916,-0.057465,4.125000,0.964844,160.770004,1.747500,0.620117,57.610001,5.953237,214.199997,2.328261,11.011539,225.850006,2.454891,0.170044,118.760002,12.312107,131.50,1.428711,0.541992,15.593750,2.275391,151.60,1.647461,0.278564,28.468750,3.572266,153.60,1.669922,0.141968,31.343750,3.847656,276.0,3.000000,3.0,3.0,0.000000,552.0,6.000000,6.0,6.0,0.000000,1091.0,11.859375,7.0,12.0,0.792480,66.0,0.717285,0.452637,153.0,1.663086,1.311523,158.0,1.717773,1.303711,23.0,0.250000,0.435303,224.0,2.435547,0.905273
201915,201915,2016-07-01,C_ID_73f5a0efd0,3,2,1,-4.675781,2016,3,1692,6,2.000000,3,1,1.000000,2017,957,30.875000,14.171875,572,18.453125,9.843750,6629,213.875,98.8125,1,0.032257,0.179565,1,0.032257,0.179565,0,0.000000,0.000000,0,0.000000,0.000000,0,0.000000,0.000000,0,0.000000,0.000000,448,14.453125,2.322266,673,21.703125,15.640625,928,29.937500,16.687500,46551814108,1.501671e+09,8539426.0,5.0,0.161255,0.373779,26.0,0.838867,0.373779,2.0,0.064514,0.249756,1.0,0.032257,0.179565,2.0,0.064514,0.249756,1.0,0.032257,0.179565,3.0,0.096802,0.300537,2.0,0.064514,0.249756,4.0,0.129028,0.340820,1.0,0.032257,0.179565,4.0,0.129028,0.340820,6.0,0.193604,0.401611,2.0,0.064514,0.249756,3.0,0.096802,0.300537,26.0,0.838867,0.373779,5.0,0.161255,0.373779,1.0,0.032257,0.179565,8.0,0.258057,0.444824,6.0,0.193604,0.401611,5.0,0.161255,0.373779,6.0,0.193604,0.401611,4.0,0.129028,0.340820,1.0,0.032257,0.179565,333,266.500000,1481,1148,1,0.033325,29.0,0.935547,0.249756,0.0,0.000000,0.000000,31.0,1.000000,0.000000,0.0,0.000000,0.000000,-79.0,-2.548828,-9,2,3.285156,0,0.000000,0,0,0.000000,4788.109863,154.455154,10.000000,1999.609985,454.747833,318.50000,9.648438,-0.057465,172.750000,32.281250,296.500000,8.984375,-0.057465,170.750000,32.093750,47.029999,1.425151,0.859863,6.930000,1.457700,51.260002,1.553333,1.850987,52.239998,1.583030,0.759766,8.570000,1.883847,59.38,1.798828,0.879395,12.703125,2.837891,66.70,2.019531,0.871094,15.859375,3.617188,67.90,2.056641,0.752441,16.218750,3.703125,99.0,3.000000,3.0,3.0,0.000000,198.0,6.000000,6.0,6.0,0.000000,380.0,11.515625,6.0,12.0,1.563477,29.0,0.878906,0.331543,33.0,1.000000,1.369141,31.0,0.939453,1.273438,1.0,0.030304,0.174072,33.0,1.000000,0.000000


In [142]:
# find the columns o which there are null vales 
train.columns[train.isna().any()]

Index(['mer_avg_purchases_lag3_sum', 'mer_avg_purchases_lag6_sum',
       'mer_avg_purchases_lag12_sum'],
      dtype='object')

In [143]:

def nullValues(df):
  """ Find the columns with null values """
  columns_with_na = [var for var in df.columns if df[var].isnull().mean()  > 0]
  #lets find out the percentage of observations missing per variable

  #calculate the percentage of missing 
  data_na = df[columns_with_na].isnull().mean()

  #transform the array to dataframe 
  data_na = pd.DataFrame(data_na.reset_index())

  #add names to the dataframe 
  data_na.columns = ['col','percentage_na']

  #oreder the dataframe acc to percentage 
  data_na.sort_values(by = 'percentage_na',ascending = False, inplace = True)

  #show
  data_na
  return data_na

In [144]:
print(nullValues(train))

                           col  percentage_na
2  mer_avg_purchases_lag12_sum       0.002135
1   mer_avg_purchases_lag6_sum       0.002125
0   mer_avg_purchases_lag3_sum       0.002035


In [145]:
# As the missing value percentage is very low we will mean impute the values 
train['mer_avg_purchases_lag12_sum'].fillna(train['mer_avg_purchases_lag12_sum'].mean(), inplace=True)
test['mer_avg_purchases_lag12_sum'].fillna(test['mer_avg_purchases_lag12_sum'].mean(), inplace=True)
train['mer_avg_purchases_lag6_sum'].fillna(train['mer_avg_purchases_lag6_sum'].mean(), inplace=True)
test['mer_avg_purchases_lag6_sum'].fillna(test['mer_avg_purchases_lag6_sum'].mean(), inplace=True)
train['mer_avg_purchases_lag3_sum'].fillna(train['mer_avg_purchases_lag3_sum'].mean(), inplace=True)
test['mer_avg_purchases_lag3_sum'].fillna(test['mer_avg_purchases_lag3_sum'].mean(), inplace=True)

In [146]:
print(nullValues(train))

Empty DataFrame
Columns: [col, percentage_na]
Index: []


## Basic Filter methods 

In [147]:
pd.set_option('display.max_columns', 225)
train.head()

Unnamed: 0.1,Unnamed: 0,first_active_month,card_id,feature_1,feature_2,feature_3,target,year,quarter,elapsed_time,feature_sum,feature_mean,feature_max,feature_min,feature_std,purchase_Year_mode,purchase_Week_sum,purchase_Week_mean,purchase_Week_std,purchase_Day_sum,purchase_Day_mean,purchase_Day_std,purchase_Dayofyear_sum,purchase_Dayofyear_mean,purchase_Dayofyear_std,purchase_Is_month_end_sum,purchase_Is_month_end_mean,purchase_Is_month_end_std,purchase_Is_month_start_sum,purchase_Is_month_start_mean,purchase_Is_month_start_std,purchase_Is_quarter_end_sum,purchase_Is_quarter_end_mean,purchase_Is_quarter_end_std,purchase_Is_quarter_start_sum,purchase_Is_quarter_start_mean,purchase_Is_quarter_start_std,purchase_Is_year_end_sum,purchase_Is_year_end_mean,purchase_Is_year_end_std,purchase_Is_year_start_sum,purchase_Is_year_start_mean,purchase_Is_year_start_std,purchase_Hour_sum,purchase_Hour_mean,purchase_Hour_std,purchase_Minute_sum,purchase_Minute_mean,purchase_Minute_std,purchase_Second_sum,purchase_Second_mean,purchase_Second_std,purchase_Elapsed_sum,purchase_Elapsed_mean,purchase_Elapsed_std,purchasedWeekday_0_sum,purchasedWeekday_0_mean,purchasedWeekday_0_std,purchasedWeekday_1_sum,purchasedWeekday_1_mean,purchasedWeekday_1_std,purchase_Month_1_sum,purchase_Month_1_mean,purchase_Month_1_std,purchase_Month_2_sum,purchase_Month_2_mean,purchase_Month_2_std,purchase_Month_3_sum,purchase_Month_3_mean,purchase_Month_3_std,purchase_Month_4_sum,purchase_Month_4_mean,purchase_Month_4_std,purchase_Month_5_sum,purchase_Month_5_mean,purchase_Month_5_std,purchase_Month_6_sum,purchase_Month_6_mean,purchase_Month_6_std,purchase_Month_7_sum,purchase_Month_7_mean,purchase_Month_7_std,purchase_Month_8_sum,purchase_Month_8_mean,purchase_Month_8_std,purchase_Month_9_sum,purchase_Month_9_mean,purchase_Month_9_std,purchase_Month_10_sum,purchase_Month_10_mean,purchase_Month_10_std,purchase_Month_11_sum,purchase_Month_11_mean,purchase_Month_11_std,purchase_Month_12_sum,purchase_Month_12_mean,purchase_Month_12_std,purchasedWeekend_0_sum,purchasedWeekend_0_mean,purchasedWeekend_0_std,purchasedWeekend_1_sum,purchasedWeekend_1_mean,purchasedWeekend_1_std,purchase_Dayofweek_0_sum,purchase_Dayofweek_0_mean,purchase_Dayofweek_0_std,purchase_Dayofweek_1_sum,purchase_Dayofweek_1_mean,purchase_Dayofweek_1_std,purchase_Dayofweek_2_sum,purchase_Dayofweek_2_mean,purchase_Dayofweek_2_std,purchase_Dayofweek_3_sum,purchase_Dayofweek_3_mean,purchase_Dayofweek_3_std,purchase_Dayofweek_4_sum,purchase_Dayofweek_4_mean,purchase_Dayofweek_4_std,purchase_Dayofweek_5_sum,purchase_Dayofweek_5_mean,purchase_Dayofweek_5_std,purchase_Dayofweek_6_sum,purchase_Dayofweek_6_mean,purchase_Dayofweek_6_std,purchase_date_temp_period,purchase_date_temp_mean_diff,purchase_date_temp_min,purchase_date_temp_max,monthDiff_period,monthDiff_mean_diff,authorized_flag_sum,authorized_flag_mean,authorized_flag_std,category_1_sum,category_1_mean,category_1_std,category_2_sum,category_2_mean,category_2_std,category_3_sum,category_3_mean,category_3_std,month_lag_sum,month_lag_mean,month_lag_min,month_lag_max,month_lag_std,installments_sum,installments_mean,installments_min,installments_max,installments_std,purchase_amount_sum,purchase_amount_mean,purchase_amount_min,purchase_amount_max,purchase_amount_std,mer_numerical_1_sum,mer_numerical_1_mean,mer_numerical_1_min,mer_numerical_1_max,mer_numerical_1_std,mer_numerical_2_sum,mer_numerical_2_mean,mer_numerical_2_min,mer_numerical_2_max,mer_numerical_2_std,mer_avg_sales_lag3_sum,mer_avg_sales_lag3_mean,mer_avg_sales_lag3_min,mer_avg_sales_lag3_max,mer_avg_sales_lag3_std,mer_avg_sales_lag6_sum,mer_avg_sales_lag6_mean,mer_avg_sales_lag6_std,mer_avg_sales_lag12_sum,mer_avg_sales_lag12_mean,mer_avg_sales_lag12_min,mer_avg_sales_lag12_max,mer_avg_sales_lag12_std,mer_avg_purchases_lag3_sum,mer_avg_purchases_lag3_mean,mer_avg_purchases_lag3_min,mer_avg_purchases_lag3_max,mer_avg_purchases_lag3_std,mer_avg_purchases_lag6_sum,mer_avg_purchases_lag6_mean,mer_avg_purchases_lag6_min,mer_avg_purchases_lag6_max,mer_avg_purchases_lag6_std,mer_avg_purchases_lag12_sum,mer_avg_purchases_lag12_mean,mer_avg_purchases_lag12_min,mer_avg_purchases_lag12_max,mer_avg_purchases_lag12_std,mer_active_months_lag3_sum,mer_active_months_lag3_mean,mer_active_months_lag3_min,mer_active_months_lag3_max,mer_active_months_lag3_std,mer_active_months_lag6_sum,mer_active_months_lag6_mean,mer_active_months_lag6_min,mer_active_months_lag6_max,mer_active_months_lag6_std,mer_active_months_lag12_sum,mer_active_months_lag12_mean,mer_active_months_lag12_min,mer_active_months_lag12_max,mer_active_months_lag12_std,mer_category_1_sum,mer_category_1_mean,mer_category_1_std,mer_most_recent_sales_range_sum,mer_most_recent_sales_range_mean,mer_most_recent_sales_range_std,mer_most_recent_purchases_range_sum,mer_most_recent_purchases_range_mean,mer_most_recent_purchases_range_std,mer_category_4_sum,mer_category_4_mean,mer_category_4_std,mer_category_2_sum,mer_category_2_mean,mer_category_2_std
0,0,2017-06-01,C_ID_92a2005557,5,2,1,-0.820312,2017,2,1357,8,2.666016,5,1,2.082031,2017,8905,31.46875,15.554688,4411,15.585938,8.804688,61760,218.25,109.1875,9,0.031799,0.175781,7,0.024734,0.15564,5,0.01767,0.131958,3,0.010597,0.1026,1,0.003534,0.059448,0,0.0,0.0,3758,13.28125,4.796875,8128,28.71875,18.765625,7417,26.203125,18.4375,427192299097,1509513000.0,7279052.5,96.0,0.339111,0.474365,187.0,0.660645,0.474365,21.0,0.074219,0.262451,23.0,0.081299,0.273682,12.0,0.042389,0.201904,11.0,0.038879,0.193604,0.0,0.0,0.0,3.0,0.010597,0.1026,49.0,0.173096,0.37915,44.0,0.155518,0.363037,20.0,0.070679,0.256836,22.0,0.077759,0.268311,21.0,0.074219,0.262451,57.0,0.201416,0.401855,187.0,0.660645,0.474365,96.0,0.339111,0.474365,34.0,0.120117,0.325684,33.0,0.116638,0.321533,44.0,0.155518,0.363037,41.0,0.144897,0.352539,35.0,0.123657,0.329834,53.0,0.187256,0.390869,43.0,0.151978,0.359619,306,26.046875,1331,1025,1,0.003546,270.0,0.954102,0.209717,0.0,0.0,0.0,295.0,1.041992,0.4104,4.0,0.014137,0.118225,-983.0,-3.472656,-8,2,2.734375,4,0.014137,0,1,0.118225,21403.699219,75.631447,5.0,2000.0,138.101593,4688.0,16.625,-0.057465,183.75,44.96875,4532.0,16.0625,-0.057465,182.125,44.5,351.779999,1.247447,0.680176,7.73,1.034572,1903.690063,6.750674,27.95788,2174.559814,7.711205,0.529785,194.610001,32.652981,427.5,1.516602,0.466553,12.703125,2.101562,5484.0,19.4375,0.321045,504.25,93.3125,6044.0,21.4375,0.25293,554.5,102.8125,846.0,3.0,3.0,3.0,0.0,1691.0,5.996094,5.0,6.0,0.05954,3348.0,11.875,5.0,12.0,0.794434,258.0,0.915039,0.279541,469.0,1.663086,1.27832,410.0,1.454102,1.288086,16.0,0.056732,0.231812,294.0,1.042969,0.411133
1,1,2017-01-01,C_ID_3d0044924f,4,1,0,0.392822,2017,1,1508,5,1.666992,4,0,2.082031,2017,8881,24.953125,16.703125,5914,16.609375,8.835938,61516,172.75,117.1875,12,0.033722,0.180786,15,0.042145,0.201172,6,0.016861,0.128906,6,0.016861,0.128906,3,0.00843,0.091553,1,0.00281,0.053009,5218,14.65625,5.585938,10360,29.09375,17.5625,10272,28.859375,17.59375,535130645472,1503176000.0,10257346.0,132.0,0.37085,0.483643,224.0,0.629395,0.483643,72.0,0.202271,0.402344,25.0,0.070251,0.255859,16.0,0.044952,0.20752,18.0,0.050568,0.21936,15.0,0.042145,0.201172,34.0,0.09552,0.294434,49.0,0.137695,0.344971,14.0,0.039337,0.194702,24.0,0.067444,0.250977,26.0,0.073059,0.260498,16.0,0.044952,0.20752,47.0,0.13208,0.339111,224.0,0.629395,0.483643,132.0,0.37085,0.483643,42.0,0.117981,0.322998,47.0,0.13208,0.339111,35.0,0.098328,0.298096,44.0,0.123596,0.32959,56.0,0.157349,0.364502,79.0,0.221924,0.416016,53.0,0.148926,0.356445,448,30.28125,1503,1055,1,0.002817,345.0,0.969238,0.17334,31.0,0.087097,0.282227,356.0,1.0,0.0,426.0,1.196289,0.411865,-1752.0,-4.921875,-12,2,3.865234,549,1.541992,-1,10,1.5,34297.628906,96.34166,3.0,3578.47998,254.249817,9960.0,26.84375,-0.057465,172.75,50.21875,9752.0,26.28125,-0.057465,170.75,49.78125,452.660004,1.220108,0.47998,10.75,1.065189,761.210022,2.051779,10.825763,817.449951,2.203369,0.27002,166.679993,12.232841,518.0,1.396484,0.523926,12.703125,1.900391,1546.0,4.167969,0.307861,504.25,36.9375,1679.0,4.527344,0.208984,554.5,40.625,1113.0,3.0,3.0,3.0,0.0,2226.0,6.0,6.0,6.0,0.0,4400.0,11.859375,7.0,12.0,0.788574,309.0,0.833008,0.373535,403.0,1.085938,1.363281,397.0,1.070312,1.364258,29.0,0.078186,0.268799,371.0,1.0,0.0
2,2,2016-08-01,C_ID_d639edf6cd,2,2,0,0.687988,2016,3,1661,4,1.333008,2,0,1.154297,2017,807,18.34375,13.726562,859,19.515625,7.910156,5572,126.625,96.5,1,0.02272,0.150757,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,787,17.890625,3.478516,1147,26.0625,16.609375,1432,32.53125,16.953125,65868733852,1497017000.0,10884969.0,12.0,0.272705,0.450439,32.0,0.727051,0.450439,8.0,0.181763,0.390137,7.0,0.159058,0.369873,5.0,0.113647,0.321045,8.0,0.181763,0.390137,4.0,0.090881,0.290771,0.0,0.0,0.0,2.0,0.045441,0.210693,2.0,0.045441,0.210693,1.0,0.02272,0.150757,6.0,0.136353,0.347168,0.0,0.0,0.0,1.0,0.02272,0.150757,32.0,0.727051,0.450439,12.0,0.272705,0.450439,3.0,0.068176,0.254883,2.0,0.045441,0.210693,9.0,0.20459,0.407959,7.0,0.159058,0.369873,11.0,0.25,0.437988,10.0,0.227295,0.423828,2.0,0.045441,0.210693,472,263.5,1498,1026,1,0.023254,42.0,0.95459,0.210693,0.0,0.0,0.0,204.0,4.636719,1.163086,0.0,0.0,0.0,-368.0,-8.367188,-13,2,4.121094,0,0.0,0,0,0.0,1994.050049,45.319317,11.15625,400.0,57.512569,53.75,1.194336,-0.057465,21.828125,4.484375,46.0625,1.023438,-0.057465,21.59375,4.480469,51.360001,1.141333,0.930176,6.93,0.884468,53.169998,1.181556,1.105324,53.689999,1.193111,0.759766,8.57,1.127176,57.97,1.288086,0.949219,12.703125,1.741211,61.97,1.376953,0.871094,15.859375,2.208984,62.84,1.396484,0.752441,16.21875,2.261719,135.0,3.0,3.0,3.0,0.0,270.0,6.0,6.0,6.0,0.0,533.0,11.84375,7.0,12.0,0.796387,43.0,0.955566,0.208374,18.0,0.399902,0.962891,18.0,0.399902,1.008789,3.0,0.06665,0.252197,205.0,4.554688,1.271484
3,3,2017-09-01,C_ID_186d6a6901,4,3,0,0.142456,2017,3,1265,7,2.333984,4,0,2.082031,2017,2562,30.5,17.140625,1391,16.5625,8.90625,17723,211.0,120.375,3,0.035706,0.186646,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,0,0.0,0.0,1203,14.320312,5.082031,2488,29.625,18.296875,2037,24.25,18.125,127034254917,1512313000.0,5463778.0,14.0,0.166626,0.375,70.0,0.833496,0.375,6.0,0.071411,0.259033,16.0,0.19043,0.39502,2.0,0.023804,0.15332,5.0,0.059509,0.238037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.0,0.130981,0.339355,31.0,0.369141,0.485352,7.0,0.083313,0.278076,6.0,0.071411,0.259033,70.0,0.833496,0.375,14.0,0.166626,0.375,7.0,0.083313,0.278076,12.0,0.142822,0.352051,22.0,0.261963,0.442383,10.0,0.119019,0.325684,19.0,0.226196,0.420898,8.0,0.095215,0.29541,6.0,0.071411,0.259033,204,59.0,1240,1036,1,0.012047,84.0,1.0,0.0,13.0,0.154785,0.36377,261.0,3.107422,1.379883,88.0,1.047852,0.34375,-206.0,-2.453125,-5,2,2.142578,89,1.05957,-1,3,0.608398,5719.299805,68.086906,4.0,1459.089966,167.072281,368.0,3.833984,-0.057465,27.578125,8.59375,360.25,3.753906,-0.057465,27.3125,8.507812,150.75,1.570312,0.589844,6.93,1.739451,162.660004,1.694375,2.181952,163.869995,1.706979,0.509766,8.57,2.230313,204.2,2.126953,0.617676,12.703125,3.421875,230.9,2.404297,0.42627,15.859375,4.355469,234.1,2.4375,0.436035,16.21875,4.460938,288.0,3.0,3.0,3.0,0.0,576.0,6.0,6.0,6.0,0.0,1094.0,11.398438,7.0,12.0,1.539062,65.0,0.677246,0.469971,161.0,1.676758,1.592773,159.0,1.65625,1.575195,2.0,0.020828,0.143555,282.0,2.9375,1.442383
4,4,2017-11-01,C_ID_cdbd2c0db2,1,3,0,-0.15979,2017,4,1204,4,1.333008,3,0,1.527344,2018,3378,19.984375,19.59375,2666,15.773438,8.953125,23233,137.5,137.625,8,0.047333,0.213013,7,0.041412,0.199829,2,0.011833,0.108459,3,0.017746,0.132446,0,0.0,0.0,1,0.005917,0.076904,2265,13.40625,5.402344,4836,28.609375,17.296875,4538,26.859375,17.328125,256356557498,1516903000.0,3909616.5,54.0,0.31958,0.467773,115.0,0.680664,0.467773,46.0,0.272217,0.446289,35.0,0.207153,0.406494,16.0,0.094666,0.293701,20.0,0.118347,0.323975,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,21.0,0.124268,0.330811,31.0,0.183472,0.388184,115.0,0.680664,0.467773,54.0,0.31958,0.467773,21.0,0.124268,0.330811,18.0,0.106506,0.309326,23.0,0.136108,0.343994,19.0,0.112427,0.316895,34.0,0.201172,0.4021,32.0,0.189331,0.393066,22.0,0.130127,0.337402,167,23.859375,1193,1026,1,0.005951,164.0,0.970215,0.169922,17.0,0.100586,0.301758,583.0,3.449219,1.138672,176.0,1.041016,0.227661,-115.0,-0.680664,-3,2,1.49707,217,1.28418,-1,12,1.698242,38341.328125,226.87178,0.5,5283.959961,802.122375,506.0,2.857422,-0.057465,27.578125,7.988281,497.25,2.808594,-0.057465,27.3125,7.917969,420.059998,2.37322,0.509766,185.490005,13.899587,560.960022,3.169266,16.48782,585.419983,3.307458,0.439941,166.679993,17.468569,326.5,1.844727,0.392822,37.125,3.654297,856.5,4.839844,0.294189,504.25,37.96875,915.0,5.167969,0.286377,554.5,41.71875,531.0,3.0,3.0,3.0,0.0,1061.0,5.996094,5.0,6.0,0.075134,2068.0,11.6875,5.0,12.0,1.210938,139.0,0.785156,0.411865,345.0,1.949219,1.418945,350.0,1.977539,1.417969,17.0,0.096069,0.29541,578.0,3.265625,1.262695


In [148]:
#drop the unnamed 0
cols = 'Unnamed: 0'
train.drop(cols, axis = 1, inplace = True)

In [149]:
# ignore id based columns and date based columns 
columnsToTake = []
for col in train.columns:
    if col != 'card_id' and col != 'first_active_month' and col != 'target':
        columnsToTake.append(col)

In [150]:
len(columnsToTake)

221

In [151]:
sel = VarianceThreshold(threshold = 0)
sel.fit(train[columnsToTake])

VarianceThreshold(threshold=0)

In [152]:
# get support is a boolean vector that indicates which features are retained 
# if we sum over get_support, we get the umber of features that are not constant
sum(sel.get_support())


219

In [153]:
# Two constant columns 
temp = train[columnsToTake]
[x for x in temp.columns if x not in temp.columns[sel.get_support()]]

['mer_active_months_lag3_max', 'mer_active_months_lag6_max']

In [154]:
temp.mer_active_months_lag3_max.value_counts()/np.float(len(temp))

3.0    1.0
Name: mer_active_months_lag3_max, dtype: float64

In [155]:
temp.mer_active_months_lag6_max.value_counts()/np.float(len(temp))

6.0    1.0
Name: mer_active_months_lag6_max, dtype: float64

In [156]:
def remove_noninformative(df):
    """Remove non-informative columns (all nan, or all same value)"""
    for col in df:
        if df[col].isnull().all():
            print('Removing '+col+' (all NaN)')
            del df[col]
        elif df[col].nunique()<2:
            print('Removing '+col+' (only 1 unique value)')
            del df[col]

remove_noninformative(temp)
gc.collect()

Removing mer_active_months_lag3_max (only 1 unique value)
Removing mer_active_months_lag6_max (only 1 unique value)


202

In [157]:
for col in columnsToTake:
    if col is "mer_active_months_lag3_max" or col is "mer_active_months_lag6_max":
        columnsToTake.remove(col)

In [158]:
len(columnsToTake)

221

In [127]:
quaziConstantFeatures = []
for feat in temp.columns:
    pre = (temp[feat].value_counts()/np.float(
    len(temp))).sort_values(ascending = False).values[0]
    
    if pre > 0.99:
        quaziConstantFeatures.append(feat)
len(quaziConstantFeatures)

3

In [128]:
quaziConstantFeatures

['mer_numerical_1_min', 'mer_numerical_2_min', 'mer_active_months_lag12_max']

In [129]:
temp.mer_numerical_1_min.value_counts()/np.float(len(temp))

-0.057465     0.991967
-0.047546     0.003853
-0.037628     0.001332
-0.027725     0.000797
-0.017807     0.000287
                ...   
 0.220093     0.000005
 20.500000    0.000005
 0.755371     0.000005
 7.152344     0.000005
 1.003906     0.000005
Name: mer_numerical_1_min, Length: 106, dtype: float64

In [130]:
temp.mer_numerical_2_min.value_counts()/np.float(len(temp))

-0.057465    0.997519
-0.047546    0.001030
-0.027725    0.000426
-0.037628    0.000406
-0.017807    0.000074
               ...   
 0.160645    0.000005
 0.755371    0.000005
 0.448242    0.000005
 2.599609    0.000005
 6.894531    0.000005
Name: mer_numerical_2_min, Length: 83, dtype: float64

In [131]:
temp.mer_active_months_lag12_max.value_counts()/np.float(len(temp))

12.0    0.999802
10.0    0.000183
11.0    0.000005
8.0     0.000005
9.0     0.000005
Name: mer_active_months_lag12_max, dtype: float64

In [133]:
columnsToTake = temp.columns

In [134]:
len(columnsToTake)

219

In [135]:
for col in columnsToTake:
    if col in quaziConstantFeatures:
        columnsToTake.remove(col)

AttributeError: ignored

In [136]:
len(columnsToTake)

219

In [117]:
# for duplicate features 
duplicatedFeat = []
for i in range(0, len(columnsToTake)):
    
    if i % 10 == 0:
        print(i)
        
    col1 = temp.columns[i]
    
    for col2 in temp.columns[i + 1:]:
        if temp[col1].equals(temp[col2]):
            print(col1)
            print(col2)
            duplicatedFeat.append(col2)

0
feature_3
feature_min
10
20
30
40
50
purchasedWeekday_0_sum
purchasedWeekend_1_sum
purchasedWeekday_0_mean
purchasedWeekend_1_mean
purchasedWeekday_0_std
purchasedWeekday_1_std
purchasedWeekday_0_std
purchasedWeekend_0_std
purchasedWeekday_0_std
purchasedWeekend_1_std
purchasedWeekday_1_sum
purchasedWeekend_0_sum
purchasedWeekday_1_mean
purchasedWeekend_0_mean
purchasedWeekday_1_std
purchasedWeekend_0_std
purchasedWeekday_1_std
purchasedWeekend_1_std
60
70
80
90
purchasedWeekend_0_std
purchasedWeekend_1_std
100
110
120
130
140
150
160
170
180
190
200
210


In [118]:
duplicatedFeat

['feature_min',
 'purchasedWeekend_1_sum',
 'purchasedWeekend_1_mean',
 'purchasedWeekday_1_std',
 'purchasedWeekend_0_std',
 'purchasedWeekend_1_std',
 'purchasedWeekend_0_sum',
 'purchasedWeekend_0_mean',
 'purchasedWeekend_0_std',
 'purchasedWeekend_1_std',
 'purchasedWeekend_1_std']

In [119]:
for col in columnsToTake:
    if col in duplicatedFeat:
        columnsToTake.remove(col)

In [120]:
len(columnsToTake)

213

In [122]:
for col in columnsToTake:
    if col is "mer_active_months_lag3_max" or col is "mer_active_months_lag6_max":
        columnsToTake.remove(col)

In [123]:
len(columnsToTake)

213

## Correlation 

In [124]:
temp = temp[columnsToTake]
temp

KeyError: ignored

In [None]:
corr = temp.corr()
fig, ax = plt.subplots()
fig.set_size_inches(11,11)
sns.heatmap(corr, vmin=-1, vmax=1, cmap='BrBG')