In [1]:
data_small_flag=False # True:データ少なくする　False:データ全部
exe_env_flag=3 # 1:local環境、2:kaggle kernel、3:google colab

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
pd.set_option('display.max_columns', 1000)

In [3]:
import datetime
import calendar
from datetime import time

In [4]:
def reduce_mem_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2    
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)    
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

In [5]:
if exe_env_flag==1:
    if data_small_flag:
        new_transactions = reduce_mem_usage(pd.read_csv('./new_transactions_small.csv',parse_dates=['purchase_date']))
        historical_transactions = reduce_mem_usage(pd.read_csv('./historical_transactions_small.csv',parse_dates=['purchase_date']))
    else:
        new_transactions = reduce_mem_usage(pd.read_csv('../input/new_merchant_transactions.csv',parse_dates=['purchase_date']))
        historical_transactions = reduce_mem_usage(pd.read_csv('../input/historical_transactions.csv',parse_dates=['purchase_date']))
elif exe_env_flag==2:
    new_transactions = reduce_mem_usage(pd.read_csv('../input/new_merchant_transactions.csv',parse_dates=['purchase_date']))
    historical_transactions = reduce_mem_usage(pd.read_csv('../input/historical_transactions.csv',parse_dates=['purchase_date'])) 
elif exe_env_flag==3:
    from google.colab import drive
    drive.mount('/content/gdrive')
    new_transactions = reduce_mem_usage(pd.read_csv('./gdrive/My Drive/Colab Notebooks/elo/input/new_merchant_transactions.csv',parse_dates=['purchase_date']))
    historical_transactions = reduce_mem_usage(pd.read_csv('./gdrive/My Drive/Colab Notebooks/elo/input/historical_transactions.csv',parse_dates=['purchase_date']))  

Mem. usage decreased to 114.20 Mb (45.5% reduction)
Mem. usage decreased to 1749.11 Mb (43.7% reduction)


#### authorized_flagとcategory_1の値を0 or 1に変換

In [6]:
def binarize(df):
    for col in ['authorized_flag','category_1']:
        df[col]=df[col].map({'Y':1,'N':0})
    return df
new_transactions = binarize(new_transactions)
historical_transactions = binarize(historical_transactions)

In [7]:
historical_transactions.head()

Unnamed: 0,authorized_flag,card_id,city_id,category_1,installments,category_3,merchant_category_id,merchant_id,month_lag,purchase_amount,purchase_date,category_2,state_id,subsector_id
0,1,C_ID_4e6213e9bc,88,0,0,A,80,M_ID_e020e9b302,-8,-0.703331,2017-06-25 15:33:07,1.0,16,37
1,1,C_ID_4e6213e9bc,88,0,0,A,367,M_ID_86ec983688,-7,-0.733128,2017-07-15 12:10:45,1.0,16,16
2,1,C_ID_4e6213e9bc,88,0,0,A,80,M_ID_979ed661fc,-6,-0.720386,2017-08-09 22:04:29,1.0,16,37
3,1,C_ID_4e6213e9bc,88,0,0,A,560,M_ID_e6d5ae8ea6,-5,-0.735352,2017-09-02 10:06:26,1.0,16,34
4,1,C_ID_4e6213e9bc,88,0,0,A,80,M_ID_e020e9b302,-11,-0.722865,2017-03-10 01:14:19,1.0,16,37


#### month_diff

In [8]:
historical_transactions['month_diff'] = ((datetime.datetime.today() - historical_transactions['purchase_date']).dt.days)//30
historical_transactions['month_diff'] += historical_transactions['month_lag']

new_transactions['month_diff'] = ((datetime.datetime.today() - new_transactions['purchase_date']).dt.days)//30
new_transactions['month_diff'] += new_transactions['month_lag']

#### ダミー変数作成

In [9]:
historical_transactions = pd.get_dummies(historical_transactions, columns=['category_2', 'category_3'])
new_transactions = pd.get_dummies(new_transactions, columns=['category_2', 'category_3'])

In [10]:
historical_transactions = reduce_mem_usage(historical_transactions)
new_transactions = reduce_mem_usage(new_transactions)

Mem. usage decreased to 1332.66 Mb (30.4% reduction)
Mem. usage decreased to 86.12 Mb (31.3% reduction)


#### 月変数

In [11]:
def get_month(tdate):
    return tdate.month

In [12]:
def make_month(df):
    df['purchase_month']=df['purchase_date'].apply(lambda x:get_month(x))
    df=pd.get_dummies(df, columns=['purchase_month'])
    return df

In [13]:
historical_transactions=make_month(historical_transactions)
new_transactions=make_month(new_transactions)

#### 週経過

In [14]:
def make_weekofyear(df):
    df['purchase_weekofyear']=df['purchase_date'].dt.weekofyear
    return df

In [15]:
historical_transactions=make_weekofyear(historical_transactions)
new_transactions=make_weekofyear(new_transactions)

#### 日変数

In [16]:
def get_day(tdate):
    if tdate.day>=25:
        return 1
    else:
        return 0

In [17]:
def get_end_of_month(df):
    df['purchase_day']=df['purchase_date'].apply(lambda x:get_day(x))
    return df

In [18]:
historical_transactions=get_end_of_month(historical_transactions)
new_transactions=get_end_of_month(new_transactions)

#### 曜日変数

In [19]:
def get_weekday(tdate):
    return calendar.day_name[tdate.weekday()]

In [20]:
def make_weekday(df):
    df['purchase_weekday']=df['purchase_date'].apply(lambda x :get_weekday(x))
    day_labels = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
    df['purchase_weekday'] = pd.Categorical(df['purchase_weekday'], categories = day_labels,ordered = True)
    df=pd.get_dummies(df, columns=['purchase_weekday'])
    return df

In [21]:
historical_transactions=make_weekday(historical_transactions)
new_transactions=make_weekday(new_transactions)

In [22]:
new_transactions.head()

Unnamed: 0,authorized_flag,card_id,city_id,category_1,installments,merchant_category_id,merchant_id,month_lag,purchase_amount,purchase_date,state_id,subsector_id,month_diff,category_2_1.0,category_2_2.0,category_2_3.0,category_2_4.0,category_2_5.0,category_3_A,category_3_B,category_3_C,purchase_month_1,purchase_month_2,purchase_month_3,purchase_month_4,purchase_month_5,purchase_month_6,purchase_month_7,purchase_month_8,purchase_month_9,purchase_month_10,purchase_month_11,purchase_month_12,purchase_weekofyear,purchase_day,purchase_weekday_Monday,purchase_weekday_Tuesday,purchase_weekday_Wednesday,purchase_weekday_Thursday,purchase_weekday_Friday,purchase_weekday_Saturday,purchase_weekday_Sunday
0,1,C_ID_415bb3a509,107,0,1,307,M_ID_b0c793002c,1,-0.557617,2018-03-11 14:57:36,9,19,12,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,1
1,1,C_ID_415bb3a509,140,0,1,307,M_ID_88920c89e8,1,-0.569336,2018-03-19 18:53:37,9,19,12,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,12,0,1,0,0,0,0,0,0
2,1,C_ID_415bb3a509,330,0,1,507,M_ID_ad5237ef6b,2,-0.55127,2018-04-26 14:08:44,9,14,12,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,17,1,0,0,0,1,0,0,0
3,1,C_ID_415bb3a509,-1,1,1,661,M_ID_9e84cda3b1,1,-0.671875,2018-03-07 09:43:21,-1,8,12,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,10,0,0,0,1,0,0,0,0
4,1,C_ID_ef55cf8d4b,-1,1,1,166,M_ID_3c86fa3831,1,-0.659668,2018-03-22 21:07:53,-1,29,12,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,12,0,0,0,0,1,0,0,0


#### 時間変数

In [23]:
def get_session(time_list):
    time_list = int(time_list)
    if time_list > 4 and time_list < 12:
        return 'Morning'
    elif time_list >= 12 and time_list < 17:
        return 'Afternoon'
    elif time_list >= 17 and time_list < 21:
        return 'Evening'
    else:
        return 'Night'

In [24]:
def make_hour(df):
    df['temp']=df['purchase_date'].dt.hour
    df['purchase_session'] = df['temp'].apply(lambda x: get_session(x))
    del df['temp']
    session_labels = ['Morning', 'Afternoon', 'Evening', 'Night']
    df['purchase_session'] = pd.Categorical(df['purchase_session'], categories = session_labels,ordered = True)
    df=pd.get_dummies(df, columns=['purchase_session'])
    return df

In [25]:
historical_transactions=make_hour(historical_transactions)
new_transactions=make_hour(new_transactions)

#### duration,amount_month_ratio,price

In [26]:
def make_duration(df):
    df['duration']=df['purchase_amount']*df['month_diff']
    df['amount_month_ratio'] = df['purchase_amount']/df['month_diff']
    df['price'] = df['purchase_amount'] / df['installments']
    return df

In [27]:
historical_transactions=make_duration(historical_transactions)
new_transactions=make_duration(new_transactions)

#### authorized_flagごとに分ける
historical_transactionsが対象、new_transactionsはすべてauthorized_flag=1なので

In [28]:
authorized_transactions = historical_transactions[historical_transactions['authorized_flag'] == 1]
non_authorized_transactions = historical_transactions[historical_transactions['authorized_flag'] == 0]

#### auth_mean作成
historical_transactionsの中でcard_idごとにauthorized_flagの平均値を算出

In [29]:
agg_fun={'authorized_flag':['mean']}
auth_mean=historical_transactions.groupby('card_id').agg(agg_fun)
auth_mean.columns=['_'.join(col).strip() for col in auth_mean.columns.values]
auth_mean.reset_index(inplace=True)

#### aggregate_transactions

In [30]:
def aggregate_transactions(df):
    #購入日を数字に変換
    
    df.loc[:,'purchase_date']=pd.DatetimeIndex(df['purchase_date']).astype(np.int64)*1e-9
    agg_func = {
    'category_1': ['sum', 'mean'],
    'category_2_1.0': ['mean'],
    'category_2_2.0': ['mean'],
    'category_2_3.0': ['mean'],
    'category_2_4.0': ['mean'],
    'category_2_5.0': ['mean'],
    'category_3_A': ['mean'],
    'category_3_B': ['mean'],
    'category_3_C': ['mean'],
    'merchant_id': ['nunique'],
    'merchant_category_id': ['nunique'],
    'state_id': ['nunique'],
    'city_id': ['nunique'],
    'card_id': ['size'],
    'subsector_id': ['nunique'],
    'purchase_amount': ['sum', 'mean', 'max', 'min', 'std'],
    'installments': ['sum', 'mean', 'max', 'min', 'std'],
    'purchase_date': [np.ptp, 'min', 'max'],
    'purchase_month_1':['mean'],
    'purchase_month_2':['mean'],
    'purchase_month_3':['mean'],
    'purchase_month_4':['mean'],
    'purchase_month_5':['mean'],
    'purchase_month_6':['mean'],
    'purchase_month_7':['mean'],
    'purchase_month_8':['mean'],
    'purchase_month_9':['mean'],
    'purchase_month_10':['mean'],
    'purchase_month_11':['mean'],
    'purchase_month_12':['mean'],
    'purchase_weekday_Monday':['mean'],
    'purchase_weekday_Tuesday':['mean'],
    'purchase_weekday_Wednesday':['mean'],
    'purchase_weekday_Thursday':['mean'],
    'purchase_weekday_Friday':['mean'],
    'purchase_weekday_Saturday':['mean'],
    'purchase_weekday_Sunday':['mean'],
    'purchase_weekofyear':['nunique','mean'],
    'purchase_session_Morning':['mean'],
    'purchase_session_Afternoon':['mean'],
    'purchase_session_Evening':['mean'],
    'purchase_session_Night':['mean'],
    'duration':['mean','min','max','std'],
    'amount_month_ratio':['mean','min','max','std'],
    'price':['mean','max','min','std'],
    'month_lag': ['mean', 'max', 'min', 'std'],
    'month_diff': ['mean', 'max', 'min', 'std'],
    }
    
    agg_df = df.groupby(['card_id']).agg(agg_func)
    agg_df.columns = ['_'.join(col).strip() for col in agg_df.columns.values]
    agg_df.reset_index(inplace=True)
    
    agg_df['purchase_date_diff']=(agg_df['purchase_date_max'] - agg_df['purchase_date_min'])
    agg_df['purchase_date_average'] = agg_df['purchase_date_diff']/agg_df['card_id_size']
    del agg_df['card_id_size']
    agg_df['purchase_date_uptonow'] = datetime.datetime.today().timestamp() - agg_df['purchase_date_max']

    tmp = (df.groupby('card_id').size().reset_index(name='transactions_count'))
    agg_df = pd.merge(tmp, agg_df, on='card_id', how='left')
    return agg_df

In [31]:
non_authorized_transactions.head()

Unnamed: 0,authorized_flag,card_id,city_id,category_1,installments,merchant_category_id,merchant_id,month_lag,purchase_amount,purchase_date,state_id,subsector_id,month_diff,category_2_1.0,category_2_2.0,category_2_3.0,category_2_4.0,category_2_5.0,category_3_A,category_3_B,category_3_C,purchase_month_1,purchase_month_2,purchase_month_3,purchase_month_4,purchase_month_5,purchase_month_6,purchase_month_7,purchase_month_8,purchase_month_9,purchase_month_10,purchase_month_11,purchase_month_12,purchase_weekofyear,purchase_day,purchase_weekday_Monday,purchase_weekday_Tuesday,purchase_weekday_Wednesday,purchase_weekday_Thursday,purchase_weekday_Friday,purchase_weekday_Saturday,purchase_weekday_Sunday,purchase_session_Morning,purchase_session_Afternoon,purchase_session_Evening,purchase_session_Night,duration,amount_month_ratio,price
115,0,C_ID_4e6213e9bc,88,0,0,842,M_ID_22c9cfa265,-10,-0.730379,2017-04-07 12:58:09,16,37,12,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,14,0,0,0,0,0,1,0,0,0,1,0,0,-8.764544,-0.060865,-inf
132,0,C_ID_4e6213e9bc,88,0,0,367,M_ID_86ec983688,-5,-0.723782,2017-09-17 22:40:27,16,16,12,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,37,0,0,0,0,0,0,0,1,0,0,0,1,-8.685384,-0.060315,-inf
148,0,C_ID_4e6213e9bc,88,0,0,367,M_ID_86ec983688,-5,-0.723782,2017-09-17 22:40:26,16,16,12,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,37,0,0,0,0,0,0,0,1,0,0,0,1,-8.685384,-0.060315,-inf
168,0,C_ID_4e6213e9bc,333,0,0,605,M_ID_c2ae34c2ef,0,-0.664262,2018-02-20 10:57:50,9,2,12,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,8,0,0,1,0,0,0,0,0,1,0,0,0,-7.971144,-0.055355,-inf
213,0,C_ID_4e6213e9bc,88,0,0,560,M_ID_e6d5ae8ea6,-7,-0.738132,2017-07-08 07:33:31,16,34,12,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,27,0,0,0,0,0,0,1,0,1,0,0,0,-8.857588,-0.061511,-inf


In [32]:
non_authorized=aggregate_transactions(non_authorized_transactions)
non_authorized.columns = ['non_' + c if c != 'card_id' else c for c in non_authorized.columns]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [33]:
non_authorized_transactions.head()

Unnamed: 0,authorized_flag,card_id,city_id,category_1,installments,merchant_category_id,merchant_id,month_lag,purchase_amount,purchase_date,state_id,subsector_id,month_diff,category_2_1.0,category_2_2.0,category_2_3.0,category_2_4.0,category_2_5.0,category_3_A,category_3_B,category_3_C,purchase_month_1,purchase_month_2,purchase_month_3,purchase_month_4,purchase_month_5,purchase_month_6,purchase_month_7,purchase_month_8,purchase_month_9,purchase_month_10,purchase_month_11,purchase_month_12,purchase_weekofyear,purchase_day,purchase_weekday_Monday,purchase_weekday_Tuesday,purchase_weekday_Wednesday,purchase_weekday_Thursday,purchase_weekday_Friday,purchase_weekday_Saturday,purchase_weekday_Sunday,purchase_session_Morning,purchase_session_Afternoon,purchase_session_Evening,purchase_session_Night,duration,amount_month_ratio,price
115,0,C_ID_4e6213e9bc,88,0,0,842,M_ID_22c9cfa265,-10,-0.730379,1491570000.0,16,37,12,1,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,14,0,0,0,0,0,1,0,0,0,1,0,0,-8.764544,-0.060865,-inf
132,0,C_ID_4e6213e9bc,88,0,0,367,M_ID_86ec983688,-5,-0.723782,1505688000.0,16,16,12,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,37,0,0,0,0,0,0,0,1,0,0,0,1,-8.685384,-0.060315,-inf
148,0,C_ID_4e6213e9bc,88,0,0,367,M_ID_86ec983688,-5,-0.723782,1505688000.0,16,16,12,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,37,0,0,0,0,0,0,0,1,0,0,0,1,-8.685384,-0.060315,-inf
168,0,C_ID_4e6213e9bc,333,0,0,605,M_ID_c2ae34c2ef,0,-0.664262,1519124000.0,9,2,12,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,8,0,0,1,0,0,0,0,0,1,0,0,0,-7.971144,-0.055355,-inf
213,0,C_ID_4e6213e9bc,88,0,0,560,M_ID_e6d5ae8ea6,-7,-0.738132,1499499000.0,16,34,12,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,27,0,0,0,0,0,0,1,0,1,0,0,0,-8.857588,-0.061511,-inf


In [34]:
non_authorized.head()

Unnamed: 0,card_id,non_transactions_count,non_category_1_sum,non_category_1_mean,non_category_2_1.0_mean,non_category_2_2.0_mean,non_category_2_3.0_mean,non_category_2_4.0_mean,non_category_2_5.0_mean,non_category_3_A_mean,non_category_3_B_mean,non_category_3_C_mean,non_merchant_id_nunique,non_merchant_category_id_nunique,non_state_id_nunique,non_city_id_nunique,non_subsector_id_nunique,non_purchase_amount_sum,non_purchase_amount_mean,non_purchase_amount_max,non_purchase_amount_min,non_purchase_amount_std,non_installments_sum,non_installments_mean,non_installments_max,non_installments_min,non_installments_std,non_purchase_date_ptp,non_purchase_date_min,non_purchase_date_max,non_purchase_month_1_mean,non_purchase_month_2_mean,non_purchase_month_3_mean,non_purchase_month_4_mean,non_purchase_month_5_mean,non_purchase_month_6_mean,non_purchase_month_7_mean,non_purchase_month_8_mean,non_purchase_month_9_mean,non_purchase_month_10_mean,non_purchase_month_11_mean,non_purchase_month_12_mean,non_purchase_weekday_Monday_mean,non_purchase_weekday_Tuesday_mean,non_purchase_weekday_Wednesday_mean,non_purchase_weekday_Thursday_mean,non_purchase_weekday_Friday_mean,non_purchase_weekday_Saturday_mean,non_purchase_weekday_Sunday_mean,non_purchase_weekofyear_nunique,non_purchase_weekofyear_mean,non_purchase_session_Morning_mean,non_purchase_session_Afternoon_mean,non_purchase_session_Evening_mean,non_purchase_session_Night_mean,non_duration_mean,non_duration_min,non_duration_max,non_duration_std,non_amount_month_ratio_mean,non_amount_month_ratio_min,non_amount_month_ratio_max,non_amount_month_ratio_std,non_price_mean,non_price_max,non_price_min,non_price_std,non_month_lag_mean,non_month_lag_max,non_month_lag_min,non_month_lag_std,non_month_diff_mean,non_month_diff_max,non_month_diff_min,non_month_diff_std,non_purchase_date_diff,non_purchase_date_average,non_purchase_date_uptonow
0,C_ID_00007093c1,35,4.0,0.114286,0.0,0.0,0.885714,0.0,0.0,0.0,0.828571,0.171429,11,8,2,3,7,-14.401965,-0.411485,1.507069,-0.728876,0.430059,45,1.285714,3,1,0.667367,28858113.0,1489250000.0,1518108000.0,0.057143,0.114286,0.085714,0.085714,0.114286,0.171429,0.057143,0.085714,0.028571,0.142857,0.028571,0.028571,0.228571,0.057143,0.171429,0.257143,0.085714,0.2,0.0,19,23.771429,0.228571,0.571429,0.114286,0.085714,-4.958083,-9.221439,18.084827,5.176523,-0.03416,-0.06074,0.125589,0.035754,-0.417031,0.502356,-0.728876,0.325729,-6.028571,0,-11,3.535415,12.028571,13,12,0.169031,28858113.0,824517.5,32798020.0
1,C_ID_0001238066,3,0.0,0.0,0.333333,0.0,0.0,0.0,0.666667,0.0,1.0,0.0,2,1,2,2,1,-1.789928,-0.596643,-0.596643,-0.596643,0.0,3,1.0,1,1,0.0,3609150.0,1514660000.0,1518269000.0,0.0,0.333333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.666667,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2,36.666667,0.0,0.333333,0.666667,0.0,-6.76195,-7.159712,-6.563069,0.344472,-0.052734,-0.05424,-0.04972,0.00261,-0.596643,-0.596643,-0.596643,0.0,-1.333333,0,-2,1.154701,11.333333,12,11,0.57735,3609150.0,1203050.0,32636910.0
2,C_ID_0001506ef0,4,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.75,0.25,0.0,4,4,1,1,4,-2.905782,-0.726445,-0.70859,-0.740491,0.016203,1,0.25,1,0,0.5,22098875.0,1496772000.0,1518871000.0,0.0,0.5,0.0,0.0,0.0,0.25,0.0,0.0,0.0,0.0,0.0,0.25,0.0,0.25,0.0,0.25,0.0,0.25,0.25,4,21.25,0.25,0.5,0.25,0.0,-8.717345,-8.885898,-8.503082,0.19444,-0.060537,-0.061708,-0.059049,0.00135,-inf,-0.70859,-inf,,-2.5,0,-8,3.785939,12.0,12,12,0.0,22098875.0,5524719.0,32035050.0
3,C_ID_0001793786,27,2.0,0.074074,0.111111,0.296296,0.111111,0.0,0.0,0.814815,0.185185,0.0,17,14,4,8,11,-11.825338,-0.437975,1.236592,-0.745405,0.49815,5,0.185185,1,0,0.395847,16780236.0,1488636000.0,1505416000.0,0.0,0.0,0.185185,0.148148,0.0,0.074074,0.148148,0.148148,0.296296,0.0,0.0,0.0,0.037037,0.111111,0.111111,0.259259,0.111111,0.333333,0.037037,13,26.0,0.185185,0.555556,0.185185,0.074074,-7.098594,-11.926482,19.785469,8.008268,-0.027039,-0.046588,0.077287,0.031017,,inf,-inf,,-3.518519,-1,-7,2.375684,16.148148,17,16,0.362014,16780236.0,621490.2,45489820.0
4,C_ID_000183fdda,7,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.428571,0.571429,4,4,1,1,4,-0.941381,-0.134483,0.334099,-0.714541,0.452741,19,2.714286,6,1,2.288689,6701589.0,1502099000.0,1508801000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.857143,0.0,0.142857,0.0,0.0,0.571429,0.0,0.0,0.428571,0.0,0.0,0.0,4,35.142857,0.428571,0.142857,0.285714,0.142857,-1.613796,-8.574488,4.009193,5.432896,-0.011207,-0.059545,0.027842,0.037728,-0.150477,0.334099,-0.714541,0.337646,-5.714286,-4,-6,0.755929,12.0,12,12,0.0,6701589.0,957369.9,42104910.0


In [35]:
authorized=aggregate_transactions(authorized_transactions)
authorized.columns = ['auth_' + c if c != 'card_id' else c for c in authorized.columns]

In [36]:
newt=aggregate_transactions(new_transactions)
newt.columns = ['newt_' + c if c != 'card_id' else c for c in newt.columns]

#### データ種類

non_authorized: historical_transactionsのauthorized_flag==1のデータ  
authorized: historical_transactionsのauthorized_flag==0のデータ  
newt: new_merchant_transactions  

---------------------------------

In [37]:
authorized.head()

Unnamed: 0,card_id,auth_transactions_count,auth_category_1_sum,auth_category_1_mean,auth_category_2_1.0_mean,auth_category_2_2.0_mean,auth_category_2_3.0_mean,auth_category_2_4.0_mean,auth_category_2_5.0_mean,auth_category_3_A_mean,auth_category_3_B_mean,auth_category_3_C_mean,auth_merchant_id_nunique,auth_merchant_category_id_nunique,auth_state_id_nunique,auth_city_id_nunique,auth_subsector_id_nunique,auth_purchase_amount_sum,auth_purchase_amount_mean,auth_purchase_amount_max,auth_purchase_amount_min,auth_purchase_amount_std,auth_installments_sum,auth_installments_mean,auth_installments_max,auth_installments_min,auth_installments_std,auth_purchase_date_ptp,auth_purchase_date_min,auth_purchase_date_max,auth_purchase_month_1_mean,auth_purchase_month_2_mean,auth_purchase_month_3_mean,auth_purchase_month_4_mean,auth_purchase_month_5_mean,auth_purchase_month_6_mean,auth_purchase_month_7_mean,auth_purchase_month_8_mean,auth_purchase_month_9_mean,auth_purchase_month_10_mean,auth_purchase_month_11_mean,auth_purchase_month_12_mean,auth_purchase_weekday_Monday_mean,auth_purchase_weekday_Tuesday_mean,auth_purchase_weekday_Wednesday_mean,auth_purchase_weekday_Thursday_mean,auth_purchase_weekday_Friday_mean,auth_purchase_weekday_Saturday_mean,auth_purchase_weekday_Sunday_mean,auth_purchase_weekofyear_nunique,auth_purchase_weekofyear_mean,auth_purchase_session_Morning_mean,auth_purchase_session_Afternoon_mean,auth_purchase_session_Evening_mean,auth_purchase_session_Night_mean,auth_duration_mean,auth_duration_min,auth_duration_max,auth_duration_std,auth_amount_month_ratio_mean,auth_amount_month_ratio_min,auth_amount_month_ratio_max,auth_amount_month_ratio_std,auth_price_mean,auth_price_max,auth_price_min,auth_price_std,auth_month_lag_mean,auth_month_lag_max,auth_month_lag_min,auth_month_lag_std,auth_month_diff_mean,auth_month_diff_max,auth_month_diff_min,auth_month_diff_std,auth_purchase_date_diff,auth_purchase_date_average,auth_purchase_date_uptonow
0,C_ID_00007093c1,114,24.0,0.210526,0.0,0.0,0.780702,0.0,0.008772,0.0,0.842105,0.157895,28,18,3,4,13,-62.443077,-0.547746,1.206539,-0.728876,0.237426,147,1.289474,6,1,0.795159,32627654.0,1487081000.0,1519708000.0,0.070175,0.105263,0.061404,0.078947,0.052632,0.114035,0.114035,0.122807,0.035088,0.105263,0.04386,0.096491,0.298246,0.122807,0.166667,0.140351,0.114035,0.122807,0.035088,34,26.096491,0.27193,0.359649,0.298246,0.070175,-6.601698,-9.319112,14.478466,2.870677,-0.04548,-0.064486,0.100545,0.019744,-0.523169,0.40218,-0.728876,0.222259,-5.798246,0,-12,3.441495,12.04386,13,11,0.308876,32627654.0,286207.491228,31197480.0
1,C_ID_0001238066,120,2.0,0.016667,0.783333,0.0,0.0,0.0,0.15,0.0,0.708333,0.266667,65,29,6,18,17,-70.657272,-0.588811,0.768095,-0.734887,0.192614,195,1.625,10,-1,1.50105,13110825.0,1506638000.0,1519748000.0,0.216667,0.191667,0.0,0.0,0.0,0.0,0.0,0.0,0.016667,0.091667,0.191667,0.291667,0.066667,0.166667,0.091667,0.141667,0.125,0.258333,0.15,23,29.8,0.141667,0.358333,0.25,0.25,-7.054791,-8.818639,9.217142,2.308758,-0.04915,-0.065062,0.064008,0.016096,-0.502052,0.605012,-0.734887,0.2633,-1.825,0,-5,1.294218,11.983333,12,11,0.128556,13110825.0,109256.875,31157640.0
2,C_ID_0001506ef0,62,0.0,0.0,0.032258,0.0,0.967742,0.0,0.0,1.0,0.0,0.0,28,19,2,3,12,-31.696098,-0.511227,1.493545,-0.740491,0.484575,0,0.0,0,0,0.0,34460275.0,1484411000.0,1518871000.0,0.145161,0.145161,0.129032,0.0,0.016129,0.032258,0.032258,0.032258,0.032258,0.048387,0.112903,0.274194,0.064516,0.129032,0.096774,0.129032,0.096774,0.435484,0.048387,24,27.467742,0.258065,0.66129,0.048387,0.032258,-6.158332,-9.563293,17.922541,5.82835,-0.042463,-0.061708,0.124462,0.040332,,inf,-inf,,-4.983871,0,-13,4.248402,12.032258,13,11,0.254,34460275.0,555810.887097,32035140.0
3,C_ID_0001793786,189,0.0,0.0,0.042328,0.359788,0.063492,0.0,0.0,1.0,0.0,0.0,114,45,4,9,22,-24.960674,-0.132067,4.554145,-0.737892,0.867916,0,0.0,0,0,0.0,24487497.0,1484994000.0,1509481000.0,0.010582,0.031746,0.058201,0.137566,0.021164,0.185185,0.132275,0.15873,0.15873,0.10582,0.0,0.0,0.089947,0.21164,0.195767,0.185185,0.174603,0.126984,0.015873,32,27.296296,0.137566,0.380952,0.31746,0.164021,-2.119884,-11.806271,72.866318,13.932748,-0.008231,-0.046598,0.284634,0.054088,,inf,-inf,,-3.301587,0,-9,2.301491,16.084656,17,15,0.297557,24487497.0,129563.47619,41424760.0
4,C_ID_000183fdda,137,4.0,0.029197,0.051095,0.007299,0.905109,0.0,0.007299,0.0,0.729927,0.240876,71,34,7,9,20,-67.896553,-0.495595,2.764788,-0.737892,0.52376,245,1.788321,10,-1,2.108912,15148616.0,1504444000.0,1519592000.0,0.20438,0.20438,0.0,0.0,0.0,0.0,0.0,0.0,0.160584,0.175182,0.065693,0.189781,0.124088,0.065693,0.211679,0.189781,0.167883,0.153285,0.087591,25,27.510949,0.087591,0.291971,0.270073,0.350365,-5.928506,-8.854703,33.177456,6.280169,-0.041441,-0.065988,0.230399,0.043699,-0.483919,0.936062,-0.737892,0.33874,-2.284672,0,-5,1.782055,11.970803,12,11,0.168976,15148616.0,110573.839416,31313750.0


In [38]:
authorized.columns

Index(['card_id', 'auth_transactions_count', 'auth_category_1_sum',
       'auth_category_1_mean', 'auth_category_2_1.0_mean',
       'auth_category_2_2.0_mean', 'auth_category_2_3.0_mean',
       'auth_category_2_4.0_mean', 'auth_category_2_5.0_mean',
       'auth_category_3_A_mean', 'auth_category_3_B_mean',
       'auth_category_3_C_mean', 'auth_merchant_id_nunique',
       'auth_merchant_category_id_nunique', 'auth_state_id_nunique',
       'auth_city_id_nunique', 'auth_subsector_id_nunique',
       'auth_purchase_amount_sum', 'auth_purchase_amount_mean',
       'auth_purchase_amount_max', 'auth_purchase_amount_min',
       'auth_purchase_amount_std', 'auth_installments_sum',
       'auth_installments_mean', 'auth_installments_max',
       'auth_installments_min', 'auth_installments_std',
       'auth_purchase_date_ptp', 'auth_purchase_date_min',
       'auth_purchase_date_max', 'auth_purchase_month_1_mean',
       'auth_purchase_month_2_mean', 'auth_purchase_month_3_mean',
       

In [39]:
#card_id,month_lagごとに集計し、その値をcard_idごとに'mean','std'で集計する
def aggregate_per_month(history):
    grouped = history.groupby(['card_id', 'month_lag'])

    agg_func = {
            'purchase_amount': ['count', 'sum', 'mean', 'min', 'max', 'std'],
            'installments': ['count', 'sum', 'mean', 'min', 'max', 'std'],
            }

    intermediate_group = grouped.agg(agg_func)
    intermediate_group.columns = ['_'.join(col).strip() for col in intermediate_group.columns.values]
    intermediate_group.reset_index(inplace=True)

    final_group = intermediate_group.groupby('card_id').agg(['mean', 'std'])
    final_group.columns = ['_'.join(col).strip() for col in final_group.columns.values]
    final_group.reset_index(inplace=True)
    
    return final_group

In [40]:
final_group =  aggregate_per_month(authorized_transactions) 

In [41]:
final_group.head()

Unnamed: 0,card_id,month_lag_mean,month_lag_std,purchase_amount_count_mean,purchase_amount_count_std,purchase_amount_sum_mean,purchase_amount_sum_std,purchase_amount_mean_mean,purchase_amount_mean_std,purchase_amount_min_mean,purchase_amount_min_std,purchase_amount_max_mean,purchase_amount_max_std,purchase_amount_std_mean,purchase_amount_std_std,installments_count_mean,installments_count_std,installments_sum_mean,installments_sum_std,installments_mean_mean,installments_mean_std,installments_min_mean,installments_min_std,installments_max_mean,installments_max_std,installments_std_mean,installments_std_std
0,C_ID_00007093c1,-6.0,3.89444,8.769231,3.539158,-4.803313,2.175011,-0.536131,0.075418,-0.702649,0.041293,-0.126683,0.446167,0.197339,0.120199,8.769231,3.539158,11.307692,4.210792,1.369646,0.42319,1.0,0.0,2.538462,1.613246,0.625428,0.675607
1,C_ID_0001238066,-2.5,1.870829,20.0,11.696153,-11.776212,6.676914,-0.603215,0.046045,-0.730351,0.002699,-0.160977,0.485869,0.151393,0.093832,20.0,11.696153,32.5,24.50102,1.652873,0.410539,0.666667,0.816497,4.666667,2.875181,1.256525,0.521292
2,C_ID_0001506ef0,-6.230769,4.225988,4.769231,4.342692,-2.438161,2.902502,-0.44666,0.271125,-0.725724,0.011212,0.161333,0.748701,0.496308,0.415117,4.769231,4.342692,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,C_ID_0001793786,-4.5,3.02765,18.9,12.160501,-2.496067,4.01881,-0.005687,0.271221,-0.646277,0.16623,2.190924,1.290775,0.880276,0.259048,18.9,12.160501,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,C_ID_000183fdda,-2.5,1.870829,22.833333,7.167054,-11.316092,4.800612,-0.489227,0.141015,-0.728405,0.009876,1.151797,1.062785,0.472149,0.315407,22.833333,7.167054,40.833333,14.048725,1.834943,0.362557,0.333333,1.032796,9.166667,2.041241,2.144389,0.665529


In [42]:
def successive_aggregates(df, field1, field2):
    t = df.groupby(['card_id', field1])[field2].mean()
    u = pd.DataFrame(t).reset_index().groupby('card_id')[field2].agg(['mean', 'min', 'max', 'std'])
    u.columns = [field1 + '_' + field2 + '_' + col for col in u.columns.values]
    u.reset_index(inplace=True)
    return u

In [43]:
new_transactions.head()

Unnamed: 0,authorized_flag,card_id,city_id,category_1,installments,merchant_category_id,merchant_id,month_lag,purchase_amount,purchase_date,state_id,subsector_id,month_diff,category_2_1.0,category_2_2.0,category_2_3.0,category_2_4.0,category_2_5.0,category_3_A,category_3_B,category_3_C,purchase_month_1,purchase_month_2,purchase_month_3,purchase_month_4,purchase_month_5,purchase_month_6,purchase_month_7,purchase_month_8,purchase_month_9,purchase_month_10,purchase_month_11,purchase_month_12,purchase_weekofyear,purchase_day,purchase_weekday_Monday,purchase_weekday_Tuesday,purchase_weekday_Wednesday,purchase_weekday_Thursday,purchase_weekday_Friday,purchase_weekday_Saturday,purchase_weekday_Sunday,purchase_session_Morning,purchase_session_Afternoon,purchase_session_Evening,purchase_session_Night,duration,amount_month_ratio,price
0,1,C_ID_415bb3a509,107,0,1,307,M_ID_b0c793002c,1,-0.557617,1520780000.0,9,19,12,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,0,1,0,1,0,0,-6.691406,-0.046478,-0.557617
1,1,C_ID_415bb3a509,140,0,1,307,M_ID_88920c89e8,1,-0.569336,1521486000.0,9,19,12,1,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,12,0,1,0,0,0,0,0,0,0,0,1,0,-6.832031,-0.047455,-0.569336
2,1,C_ID_415bb3a509,330,0,1,507,M_ID_ad5237ef6b,2,-0.55127,1524752000.0,9,14,12,1,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,17,1,0,0,0,1,0,0,0,0,1,0,0,-6.617188,-0.045929,-0.55127
3,1,C_ID_415bb3a509,-1,1,1,661,M_ID_9e84cda3b1,1,-0.671875,1520416000.0,-1,8,12,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,10,0,0,0,1,0,0,0,0,1,0,0,0,-8.0625,-0.056,-0.671875
4,1,C_ID_ef55cf8d4b,-1,1,1,166,M_ID_3c86fa3831,1,-0.659668,1521753000.0,-1,29,12,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,12,0,0,0,0,1,0,0,0,0,0,0,1,-7.914062,-0.054962,-0.659668


In [44]:
additional_fields = successive_aggregates(new_transactions, 'category_1', 'purchase_amount')
additional_fields = additional_fields.merge(successive_aggregates(new_transactions, 'installments', 'purchase_amount'),on = 'card_id', how='left')
additional_fields = additional_fields.merge(successive_aggregates(new_transactions, 'city_id', 'purchase_amount'),on = 'card_id', how='left')
additional_fields = additional_fields.merge(successive_aggregates(new_transactions, 'category_1', 'installments'),on = 'card_id', how='left')

In [45]:
additional_fields.head()

Unnamed: 0,card_id,category_1_purchase_amount_mean,category_1_purchase_amount_min,category_1_purchase_amount_max,category_1_purchase_amount_std,installments_purchase_amount_mean,installments_purchase_amount_min,installments_purchase_amount_max,installments_purchase_amount_std,city_id_purchase_amount_mean,city_id_purchase_amount_min,city_id_purchase_amount_max,city_id_purchase_amount_std,category_1_installments_mean,category_1_installments_min,category_1_installments_max,category_1_installments_std
0,C_ID_00007093c1,-0.664062,-0.664062,-0.664062,,-0.664062,-0.664062,-0.664062,,-0.664062,-0.671875,-0.656738,0.010704,1.0,1.0,1.0,
1,C_ID_0001238066,-0.607422,-0.650391,-0.564453,0.06076,-0.423828,-0.625977,-0.151978,0.222046,-0.558105,-0.666992,-0.472168,0.072632,2.479167,1.458333,3.5,1.443676
2,C_ID_0001506ef0,-0.723633,-0.723633,-0.723633,,-0.723633,-0.723633,-0.723633,,-0.723633,-0.723633,-0.723633,,0.0,0.0,0.0,
3,C_ID_0001793786,-0.007385,-0.007385,-0.007385,,-0.007385,-0.007385,-0.007385,,-0.24707,-0.572754,0.249268,0.346924,0.0,0.0,0.0,
4,C_ID_000183fdda,-0.599121,-0.599121,-0.599121,,-0.491211,-0.696289,-0.107666,0.233887,-0.572266,-0.60498,-0.539062,0.0466,1.454545,1.454545,1.454545,


-----------------------------------------

#### 合成特徴量作成まとめ

aggregate_transactions: card_idごとに各要素を集計する  
aggregate_per_month: card_id,month_lagごとに集計し、その値をcard_idごとに'mean','std'で集計する  
successive_aggregates: card_id,field1ごとに集計しfield2の平均を算出、さらにcard_idごとに'mean','min'などを算出する 

--------------------------------

#### train,test読み込み

In [46]:
def read_data(input_file):
    df = pd.read_csv(input_file)
    
    df['first_active_month'].fillna('2015-6-1',inplace=True)
    
    df['first_active_month'] = pd.to_datetime(df['first_active_month'])
    df['elapsed_time'] = (datetime.date(2018, 2, 1) - df['first_active_month'].dt.date).dt.days
    
    df['first_active_year']=df['first_active_month'].dt.year
#     year_labels = ['2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018']
#     df['first_active_year'] = pd.Categorical(df['first_active_year'], categories = year_labels,ordered = True)
#     df=pd.get_dummies(df, columns=['first_active_year'])

    df['first_active_month']=df['first_active_month'].dt.month
#     month_labels = ['1', '2', '3', '4', '5', '6', '7', '8','9','10','11','12']
#     df['first_active_month'] = pd.Categorical(df['first_active_month'], categories = month_labels,ordered = True)
#     df=pd.get_dummies(df, columns=['first_active_month'])
    
    return df
#_________________________________________
if exe_env_flag==1 or exe_env_flag==2:
    train = read_data('../input/train.csv')
    test = read_data('../input/test.csv')
elif exe_env_flag==3:
    from google.colab import drive
    drive.mount('/content/gdrive')
    train = read_data('./gdrive/My Drive/Colab Notebooks/elo/input/train.csv')
    test = read_data('./gdrive/My Drive/Colab Notebooks/elo/input/test.csv')

# target = train['target']
# del train['target']

#### trainに算出した特徴量を併合する

In [47]:
train = pd.merge(train, non_authorized, on='card_id', how='left')
test = pd.merge(test, non_authorized, on='card_id', how='left')

train = pd.merge(train, authorized, on='card_id', how='left')
test = pd.merge(test, authorized, on='card_id', how='left')

train = pd.merge(train, newt, on='card_id', how='left')
test = pd.merge(test, newt, on='card_id', how='left')

train = pd.merge(train, final_group, on='card_id', how='left')
test = pd.merge(test, final_group, on='card_id', how='left')

train = pd.merge(train, auth_mean, on='card_id', how='left')
test = pd.merge(test, auth_mean, on='card_id', how='left')

train = pd.merge(train, additional_fields, on='card_id', how='left')
test = pd.merge(test, additional_fields, on='card_id', how='left')

In [48]:
[c for c in train.columns]

['first_active_month',
 'card_id',
 'feature_1',
 'feature_2',
 'feature_3',
 'target',
 'elapsed_time',
 'first_active_year',
 'non_transactions_count',
 'non_category_1_sum',
 'non_category_1_mean',
 'non_category_2_1.0_mean',
 'non_category_2_2.0_mean',
 'non_category_2_3.0_mean',
 'non_category_2_4.0_mean',
 'non_category_2_5.0_mean',
 'non_category_3_A_mean',
 'non_category_3_B_mean',
 'non_category_3_C_mean',
 'non_merchant_id_nunique',
 'non_merchant_category_id_nunique',
 'non_state_id_nunique',
 'non_city_id_nunique',
 'non_subsector_id_nunique',
 'non_purchase_amount_sum',
 'non_purchase_amount_mean',
 'non_purchase_amount_max',
 'non_purchase_amount_min',
 'non_purchase_amount_std',
 'non_installments_sum',
 'non_installments_mean',
 'non_installments_max',
 'non_installments_min',
 'non_installments_std',
 'non_purchase_date_ptp',
 'non_purchase_date_min',
 'non_purchase_date_max',
 'non_purchase_month_1_mean',
 'non_purchase_month_2_mean',
 'non_purchase_month_3_mean',
 'n

In [49]:
train.to_csv('./save/train.csv', index=False)
test.to_csv('./save/test.csv', index=False)

In [50]:
train.head()

Unnamed: 0,first_active_month,card_id,feature_1,feature_2,feature_3,target,elapsed_time,first_active_year,non_transactions_count,non_category_1_sum,non_category_1_mean,non_category_2_1.0_mean,non_category_2_2.0_mean,non_category_2_3.0_mean,non_category_2_4.0_mean,non_category_2_5.0_mean,non_category_3_A_mean,non_category_3_B_mean,non_category_3_C_mean,non_merchant_id_nunique,non_merchant_category_id_nunique,non_state_id_nunique,non_city_id_nunique,non_subsector_id_nunique,non_purchase_amount_sum,non_purchase_amount_mean,non_purchase_amount_max,non_purchase_amount_min,non_purchase_amount_std,non_installments_sum,non_installments_mean,non_installments_max,non_installments_min,non_installments_std,non_purchase_date_ptp,non_purchase_date_min,non_purchase_date_max,non_purchase_month_1_mean,non_purchase_month_2_mean,non_purchase_month_3_mean,non_purchase_month_4_mean,non_purchase_month_5_mean,non_purchase_month_6_mean,non_purchase_month_7_mean,non_purchase_month_8_mean,non_purchase_month_9_mean,non_purchase_month_10_mean,non_purchase_month_11_mean,non_purchase_month_12_mean,non_purchase_weekday_Monday_mean,non_purchase_weekday_Tuesday_mean,non_purchase_weekday_Wednesday_mean,non_purchase_weekday_Thursday_mean,non_purchase_weekday_Friday_mean,non_purchase_weekday_Saturday_mean,non_purchase_weekday_Sunday_mean,non_purchase_weekofyear_nunique,non_purchase_weekofyear_mean,non_purchase_session_Morning_mean,non_purchase_session_Afternoon_mean,non_purchase_session_Evening_mean,non_purchase_session_Night_mean,non_duration_mean,non_duration_min,non_duration_max,non_duration_std,non_amount_month_ratio_mean,non_amount_month_ratio_min,non_amount_month_ratio_max,non_amount_month_ratio_std,non_price_mean,non_price_max,non_price_min,non_price_std,non_month_lag_mean,non_month_lag_max,non_month_lag_min,non_month_lag_std,non_month_diff_mean,non_month_diff_max,non_month_diff_min,non_month_diff_std,non_purchase_date_diff,non_purchase_date_average,non_purchase_date_uptonow,auth_transactions_count,auth_category_1_sum,auth_category_1_mean,auth_category_2_1.0_mean,auth_category_2_2.0_mean,auth_category_2_3.0_mean,auth_category_2_4.0_mean,auth_category_2_5.0_mean,auth_category_3_A_mean,auth_category_3_B_mean,auth_category_3_C_mean,auth_merchant_id_nunique,auth_merchant_category_id_nunique,auth_state_id_nunique,auth_city_id_nunique,auth_subsector_id_nunique,auth_purchase_amount_sum,auth_purchase_amount_mean,auth_purchase_amount_max,auth_purchase_amount_min,auth_purchase_amount_std,auth_installments_sum,auth_installments_mean,auth_installments_max,auth_installments_min,auth_installments_std,auth_purchase_date_ptp,auth_purchase_date_min,auth_purchase_date_max,auth_purchase_month_1_mean,auth_purchase_month_2_mean,auth_purchase_month_3_mean,auth_purchase_month_4_mean,auth_purchase_month_5_mean,auth_purchase_month_6_mean,auth_purchase_month_7_mean,auth_purchase_month_8_mean,auth_purchase_month_9_mean,auth_purchase_month_10_mean,auth_purchase_month_11_mean,auth_purchase_month_12_mean,auth_purchase_weekday_Monday_mean,auth_purchase_weekday_Tuesday_mean,auth_purchase_weekday_Wednesday_mean,auth_purchase_weekday_Thursday_mean,auth_purchase_weekday_Friday_mean,auth_purchase_weekday_Saturday_mean,auth_purchase_weekday_Sunday_mean,auth_purchase_weekofyear_nunique,auth_purchase_weekofyear_mean,auth_purchase_session_Morning_mean,auth_purchase_session_Afternoon_mean,auth_purchase_session_Evening_mean,auth_purchase_session_Night_mean,auth_duration_mean,auth_duration_min,auth_duration_max,auth_duration_std,auth_amount_month_ratio_mean,auth_amount_month_ratio_min,auth_amount_month_ratio_max,auth_amount_month_ratio_std,auth_price_mean,auth_price_max,auth_price_min,auth_price_std,auth_month_lag_mean,auth_month_lag_max,auth_month_lag_min,auth_month_lag_std,auth_month_diff_mean,auth_month_diff_max,auth_month_diff_min,auth_month_diff_std,auth_purchase_date_diff,auth_purchase_date_average,auth_purchase_date_uptonow,newt_transactions_count,newt_category_1_sum,newt_category_1_mean,newt_category_2_1.0_mean,newt_category_2_2.0_mean,newt_category_2_3.0_mean,newt_category_2_4.0_mean,newt_category_2_5.0_mean,newt_category_3_A_mean,newt_category_3_B_mean,newt_category_3_C_mean,newt_merchant_id_nunique,newt_merchant_category_id_nunique,newt_state_id_nunique,newt_city_id_nunique,newt_subsector_id_nunique,newt_purchase_amount_sum,newt_purchase_amount_mean,newt_purchase_amount_max,newt_purchase_amount_min,newt_purchase_amount_std,newt_installments_sum,newt_installments_mean,newt_installments_max,newt_installments_min,newt_installments_std,newt_purchase_date_ptp,newt_purchase_date_min,newt_purchase_date_max,newt_purchase_month_1_mean,newt_purchase_month_2_mean,newt_purchase_month_3_mean,newt_purchase_month_4_mean,newt_purchase_month_5_mean,newt_purchase_month_6_mean,newt_purchase_month_7_mean,newt_purchase_month_8_mean,newt_purchase_month_9_mean,newt_purchase_month_10_mean,newt_purchase_month_11_mean,newt_purchase_month_12_mean,newt_purchase_weekday_Monday_mean,newt_purchase_weekday_Tuesday_mean,newt_purchase_weekday_Wednesday_mean,newt_purchase_weekday_Thursday_mean,newt_purchase_weekday_Friday_mean,newt_purchase_weekday_Saturday_mean,newt_purchase_weekday_Sunday_mean,newt_purchase_weekofyear_nunique,newt_purchase_weekofyear_mean,newt_purchase_session_Morning_mean,newt_purchase_session_Afternoon_mean,newt_purchase_session_Evening_mean,newt_purchase_session_Night_mean,newt_duration_mean,newt_duration_min,newt_duration_max,newt_duration_std,newt_amount_month_ratio_mean,newt_amount_month_ratio_min,newt_amount_month_ratio_max,newt_amount_month_ratio_std,newt_price_mean,newt_price_max,newt_price_min,newt_price_std,newt_month_lag_mean,newt_month_lag_max,newt_month_lag_min,newt_month_lag_std,newt_month_diff_mean,newt_month_diff_max,newt_month_diff_min,newt_month_diff_std,newt_purchase_date_diff,newt_purchase_date_average,newt_purchase_date_uptonow,month_lag_mean,month_lag_std,purchase_amount_count_mean,purchase_amount_count_std,purchase_amount_sum_mean,purchase_amount_sum_std,purchase_amount_mean_mean,purchase_amount_mean_std,purchase_amount_min_mean,purchase_amount_min_std,purchase_amount_max_mean,purchase_amount_max_std,purchase_amount_std_mean,purchase_amount_std_std,installments_count_mean,installments_count_std,installments_sum_mean,installments_sum_std,installments_mean_mean,installments_mean_std,installments_min_mean,installments_min_std,installments_max_mean,installments_max_std,installments_std_mean,installments_std_std,authorized_flag_mean,category_1_purchase_amount_mean,category_1_purchase_amount_min,category_1_purchase_amount_max,category_1_purchase_amount_std,installments_purchase_amount_mean,installments_purchase_amount_min,installments_purchase_amount_max,installments_purchase_amount_std,city_id_purchase_amount_mean,city_id_purchase_amount_min,city_id_purchase_amount_max,city_id_purchase_amount_std,category_1_installments_mean,category_1_installments_min,category_1_installments_max,category_1_installments_std
0,6,C_ID_92a2005557,5,2,1,-0.820283,245,2017,13.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.692308,0.307692,0.0,12.0,10.0,1.0,2.0,7.0,-8.571723,-0.659363,-0.431922,-0.737892,0.098851,4.0,0.307692,1.0,0.0,0.480384,14254523.0,1500131000.0,1514385000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.307692,0.076923,0.230769,0.153846,0.153846,0.0,0.153846,0.461538,0.076923,0.0,0.230769,0.076923,9.0,39.230769,0.0,0.538462,0.153846,0.307692,-7.91236,-8.854703,-5.183064,1.186206,-0.054947,-0.061491,-0.035994,0.008238,-inf,-0.616027,-inf,,-4.461538,-2.0,-7.0,1.664101,12.0,12.0,12.0,0.0,14254523.0,1096502.0,36520640.0,247,0.0,0.0,0.987854,0.0,0.0,0.0,0.012146,1.0,0.0,0.0,93,41,3,7,21,-157.397018,-0.637235,2.258394,-0.739395,0.216518,0,0.0,0,0,0.0,20977987.0,1498573000.0,1519551000.0,0.08502,0.093117,0.0,0.0,0.0,0.012146,0.194332,0.161943,0.076923,0.076923,0.076923,0.222672,0.121457,0.117409,0.145749,0.149798,0.117409,0.190283,0.157895,35,32.748988,0.214575,0.493927,0.17004,0.121457,-7.665532,-9.463276,27.100735,2.604952,-0.05299,-0.065715,0.1882,0.018051,,inf,-inf,,-3.882591,0,-8,2.429155,12.02834,13,11,0.209545,20977987.0,84931.121457,31354900.0,23.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,23.0,14.0,1.0,3.0,10.0,-13.242188,-0.575684,-0.296143,-0.724609,0.135742,0.0,0.0,0.0,0.0,0.0,4742309.0,1520259000.0,1525001000.0,0.0,0.0,0.521739,0.478261,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.173913,0.086957,0.086957,0.130435,0.26087,0.130435,0.130435,7.0,13.304348,0.304348,0.695652,0.0,0.0,-6.898438,-8.695312,-3.257812,1.658203,-0.048096,-0.060394,-0.024673,0.011116,-inf,-inf,-inf,,1.478261,2.0,1.0,0.510754,11.956522,12.0,11.0,0.208514,4742309.0,206187.347826,25905030.0,-4.0,2.738613,27.444444,16.629124,-17.488558,10.015656,-0.650482,0.052264,-0.732686,0.008797,-0.158815,0.916015,0.109584,0.119218,27.444444,16.629124,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.95,-0.575684,-0.575684,-0.575684,,-0.575684,-0.575684,-0.575684,,-0.458984,-0.606445,-0.296143,0.155762,0.0,0.0,0.0,
1,1,C_ID_3d0044924f,4,1,0,0.392913,396,2017,11.0,2.0,0.181818,0.818182,0.0,0.0,0.0,0.0,0.0,0.363636,0.636364,9.0,9.0,2.0,2.0,9.0,-1.122886,-0.102081,1.942838,-0.740897,0.785906,42.0,3.818182,10.0,1.0,3.487641,25890841.0,1488576000.0,1514467000.0,0.0,0.0,0.090909,0.0,0.090909,0.0,0.090909,0.0,0.454545,0.090909,0.0,0.181818,0.090909,0.090909,0.0,0.181818,0.272727,0.272727,0.090909,9.0,35.363636,0.090909,0.363636,0.181818,0.363636,-1.299325,-9.631663,25.256891,10.232972,-0.008005,-0.056992,0.149449,0.06037,-0.260851,0.194284,-0.740897,0.354041,-4.454545,-1.0,-10.0,2.696799,13.090909,14.0,13.0,0.301511,25890841.0,2353713.0,36438580.0,339,29.0,0.085546,0.914454,0.0,0.0,0.0,0.0,0.0,0.80236,0.19174,141,57,3,9,24,-208.883453,-0.616175,4.6303,-0.7424,0.355554,501,1.477876,10,-1,1.350634,33717687.0,1483720000.0,1517438000.0,0.212389,0.064897,0.035398,0.053097,0.041298,0.100295,0.141593,0.041298,0.056047,0.073746,0.047198,0.132743,0.115044,0.129794,0.103245,0.120944,0.153392,0.224189,0.153392,49,24.890855,0.135693,0.365782,0.289086,0.20944,-8.039538,-10.330486,60.193893,4.63103,-0.047251,-0.06136,0.356177,0.027354,-0.578548,2.31515,-0.7424,0.292072,-5.050147,0,-12,3.836969,13.050147,14,12,0.298653,33717687.0,99462.20354,33468110.0,6.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,6.0,5.0,1.0,1.0,4.0,-4.355469,-0.726074,-0.70166,-0.739258,0.014381,6.0,1.0,1.0,1.0,0.0,4887632.0,1517505000.0,1522393000.0,0.0,0.5,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.333333,0.333333,0.0,0.166667,0.166667,0.0,0.0,4.0,9.0,0.5,0.166667,0.333333,0.0,-9.4375,-9.609375,-9.125,0.185547,-0.055847,-0.056854,-0.053986,0.001092,-0.725911,-0.70166,-0.739258,0.014381,1.5,2.0,1.0,0.547723,13.0,13.0,13.0,0.0,4887632.0,814605.333333,28513510.0,-6.0,3.89444,26.076923,13.76264,-16.067957,10.153878,-0.593358,0.095125,-0.736968,0.005142,0.510664,1.327026,0.282114,0.283998,26.076923,13.76264,38.538462,13.345507,1.618392,0.453852,0.692308,0.751068,6.0,2.54951,1.365982,0.792487,0.968571,-0.726074,-0.726074,-0.726074,,-0.726074,-0.726074,-0.726074,,-0.726074,-0.726074,-0.726074,,1.0,1.0,1.0,
2,8,C_ID_d639edf6cd,2,2,0,0.688056,549,2016,2.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,-1.338967,-0.669484,-0.637515,-0.701453,0.045211,0.0,0.0,0.0,0.0,0.0,4922885.0,1487878000.0,1492801000.0,0.0,0.5,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.5,0.0,0.0,2.0,12.0,0.0,0.0,1.0,0.0,-8.033804,-8.417431,-7.650177,0.54253,-0.05579,-0.058454,-0.053126,0.003768,-inf,-inf,-inf,,-11.0,-10.0,-12.0,1.414214,12.0,12.0,12.0,0.0,4922885.0,2461442.0,58104640.0,41,0.0,0.0,0.097561,0.0,0.0,0.0,0.902439,1.0,0.0,0.0,13,8,2,5,7,-27.828424,-0.678742,-0.145847,-0.730138,0.08923,0,0.0,0,0,0.0,35635623.0,1484123000.0,1519759000.0,0.195122,0.146341,0.121951,0.146341,0.097561,0.0,0.04878,0.04878,0.02439,0.146341,0.0,0.02439,0.073171,0.04878,0.219512,0.146341,0.243902,0.219512,0.04878,22,18.682927,0.073171,0.243902,0.463415,0.219512,-8.162649,-9.457808,-1.750168,1.086609,-0.056448,-0.060845,-0.012154,0.007408,-inf,-inf,-inf,,-8.487805,0,-13,3.893083,12.02439,13,12,0.156174,35635623.0,869161.536585,31147470.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,-0.700195,-0.700195,-0.700195,-0.700195,,0.0,0.0,0.0,0.0,,0.0,1524937000.0,1524937000.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,17.0,0.0,0.0,1.0,0.0,-8.40625,-8.40625,-8.40625,,-0.05835,-0.05835,-0.05835,,-inf,-inf,-inf,,2.0,2.0,2.0,,12.0,12.0,12.0,,0.0,0.0,25968630.0,-6.666667,4.396969,3.416667,2.108784,-2.319035,1.4985,-0.66932,0.080664,-0.706751,0.018795,-0.62109,0.151778,0.065945,0.119028,3.416667,2.108784,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.953488,-0.700195,-0.700195,-0.700195,,-0.700195,-0.700195,-0.700195,,-0.700195,-0.700195,-0.700195,,0.0,0.0,0.0,
3,9,C_ID_186d6a6901,4,3,0,0.142495,153,2017,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,77,12.0,0.155844,0.155844,0.0,0.0,0.688312,0.0,0.0,0.883117,0.090909,50,25,5,7,13,-49.491364,-0.642745,1.445596,-0.740897,0.261624,84,1.090909,3,-1,0.588974,13375339.0,1506443000.0,1519818000.0,0.077922,0.207792,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.402597,0.090909,0.077922,0.090909,0.142857,0.246753,0.12987,0.246753,0.077922,0.064935,20,32.012987,0.090909,0.584416,0.168831,0.155844,-7.694144,-8.890766,17.347145,3.135735,-0.053704,-0.066401,0.120466,0.021864,-0.596572,1.445596,-0.740897,0.333436,-2.831169,0,-5,1.802065,11.974026,12,11,0.160101,13375339.0,173705.701299,31087700.0,7.0,1.0,0.142857,0.0,0.0,0.0,0.857143,0.0,0.0,0.857143,0.0,7.0,6.0,2.0,2.0,5.0,-4.65625,-0.665039,-0.566895,-0.734375,0.065918,5.0,0.714286,1.0,-1.0,0.755929,3625505.0,1520424000.0,1524049000.0,0.0,0.0,0.285714,0.714286,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.142857,0.428571,0.0,0.0,0.285714,0.142857,5.0,13.857143,0.571429,0.142857,0.142857,0.142857,-7.980469,-8.8125,-6.804688,0.790527,-0.05542,-0.061188,-0.047241,0.005486,-0.502999,0.566895,-0.734375,0.474393,1.714286,2.0,1.0,0.48795,12.0,12.0,12.0,0.0,3625505.0,517929.285714,26856810.0,-2.5,1.870829,12.833333,9.703951,-8.248561,7.140594,-0.604986,0.167044,-0.734266,0.009074,-0.097344,0.766088,0.205505,0.277437,12.833333,9.703951,14.0,11.523888,1.044819,0.215637,0.333333,1.032796,2.0,1.095445,0.518905,0.483284,1.0,-0.664062,-0.665039,-0.663086,0.001381,-0.624023,-0.681152,-0.566895,0.080811,-0.664062,-0.665039,-0.663086,0.001381,0.833333,0.666667,1.0,0.235702
4,11,C_ID_cdbd2c0db2,1,3,0,-0.159749,92,2017,5.0,3.0,0.6,0.0,0.0,0.0,0.4,0.0,0.0,0.4,0.6,2.0,2.0,2.0,2.0,2.0,20.352808,4.070562,7.193041,-0.512945,4.18495,38.0,7.6,12.0,1.0,6.024948,3274330.0,1516485000.0,1519759000.0,0.4,0.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.0,0.0,0.0,0.4,0.0,2.0,6.6,0.0,0.0,0.6,0.4,48.846741,-6.15534,86.31649,50.219402,0.339213,-0.042745,0.59942,0.348746,0.151134,0.59942,-0.512945,0.606256,-0.4,0.0,-1.0,0.547723,12.0,12.0,12.0,0.0,3274330.0,654866.0,31146570.0,128,12.0,0.09375,0.078125,0.0,0.0,0.820312,0.007812,0.0,0.96875,0.03125,65,26,6,6,17,-69.040466,-0.539379,6.992617,-0.746156,0.737087,144,1.125,12,1,1.003929,9405641.0,1510445000.0,1519850000.0,0.34375,0.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.164062,0.242188,0.125,0.09375,0.148438,0.109375,0.210938,0.171875,0.140625,17,22.375,0.320312,0.296875,0.203125,0.179688,-6.445041,-8.953877,83.911407,8.839849,-0.045157,-0.066944,0.582718,0.06148,-0.599434,0.728986,-0.746156,0.237865,-1.320312,0,-3,1.02668,11.960938,12,11,0.194505,9405641.0,73481.570312,31055540.0,36.0,2.0,0.055556,0.055556,0.0,0.194444,0.694444,0.0,0.0,0.944444,0.027778,36.0,17.0,5.0,5.0,10.0,-19.921875,-0.553711,0.450928,-0.739258,0.223877,35.0,0.972222,2.0,-1.0,0.376913,4949682.0,1519992000.0,1524941000.0,0.0,0.0,0.444444,0.555556,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.138889,0.083333,0.111111,0.138889,0.194444,0.222222,0.111111,8.0,13.361111,0.222222,0.444444,0.277778,0.055556,-6.605469,-8.875,5.410156,2.669922,-0.046417,-0.06543,0.037567,0.018845,-0.539157,0.450928,-0.739258,0.252781,1.555556,2.0,1.0,0.503953,11.944444,12.0,11.0,0.232311,4949682.0,137491.166667,25964590.0,-1.5,1.290994,32.0,9.416298,-17.260117,8.31636,-0.525492,0.137282,-0.732039,0.016267,2.259349,3.344281,0.568395,0.578257,32.0,9.416298,36.0,9.128709,1.141621,0.158387,1.0,0.0,4.5,5.066228,0.681434,0.879016,0.962406,-0.492676,-0.561035,-0.424316,0.09668,-0.358887,-0.571289,-0.175903,0.199341,-0.534668,-0.670898,-0.32666,0.150635,1.220588,0.941176,1.5,0.395148
