In [9]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

## Read data 

In [17]:
# [!] use nrows here for reading sample transactions 
new_merchant_transactions = pd.read_csv('all/new_merchant_transactions.csv',parse_dates=['purchase_date'], nrows=1000)
historical_transactions = pd.read_csv('all/historical_transactions.csv',parse_dates=['purchase_date'], nrows=1000)
merchant = pd.read_csv('all/merchants.csv').drop(['category_1', 'category_2'], 1)# Since category column exist in transaction data already, we filter them out before merge into transaction data 

# Combine two transactions data into one 
# (not sure if we want to merge new and historical transactions, so just put it here)
transactions = pd.concat([new_merchant_transactions, historical_transactions], axis = 0) 
# transactions = transactions[transactions.authorized_flag== 'Y']

# merge transaction + merchant data 
transactions = pd.merge(transactions, merchant, on = 'merchant_id', how = 'left')# Merge transaction + merchant data




## Features 

In [18]:
# Transform featrues 
def transfrom_features(df):
    """
    Transform features, for example, extrac month from purchase date, get dummys from category
    """
    # Month
    df['purchase_month'] = df.purchase_date.dt.month
    
    return df 


In [40]:

def agg_transaction_features(df):
    """
    In this function, we input raw transaction data (assume we join merchant data)
    And output aggregate features by card_id 
    """
    
    # Aggregate features -- purchase related 
    agg_purchase = df.groupby(by='card_id').agg(
        {'purchase_amount': ['count','sum','mean']
        , 'merchant_id': ['nunique']
        , 'installments': ['sum']
        , 'purchase_date': ['min', 'max']
        }).reset_index() 
          
    agg_purchase.columns = ["_".join(x) for x in agg_purchase.columns.ravel()] 
    
    return agg_purchase

## Aggregate by card_id 

In [42]:
df = transfrom_features(transactions)
agg_transaction_features(df)

Unnamed: 0,card_id_,installments_sum,merchant_id_nunique,purchase_date_min,purchase_date_max,purchase_amount_count,purchase_amount_sum,purchase_amount_mean
0,C_ID_0682c61725,0,2,2018-03-05 09:39:03,2018-04-09 11:49:24,2,-1.293963,-0.646981
1,C_ID_086fe1da99,22,22,2018-03-01 14:25:48,2018-04-29 11:54:06,22,-13.099166,-0.595417
2,C_ID_0a6ceeffcc,0,20,2017-11-02 12:39:37,2017-12-22 13:52:37,21,-14.855736,-0.707416
3,C_ID_0cb8b21999,12,9,2018-03-13 17:11:35,2018-03-21 13:03:36,9,-5.172486,-0.574721
4,C_ID_0e171c1b48,3,72,2017-02-25 09:38:46,2018-02-27 13:48:19,301,-213.355250,-0.708821
5,C_ID_0e764b1f60,8,6,2018-03-03 10:18:46,2018-04-20 19:23:12,6,-4.029269,-0.671545
6,C_ID_18c53fe97a,23,16,2018-03-02 15:32:48,2018-04-30 20:12:32,16,-9.186728,-0.574171
7,C_ID_1c8decd066,0,3,2018-03-07 14:15:04,2018-03-20 16:14:12,3,-2.095462,-0.698487
8,C_ID_1f236c50e6,12,11,2018-03-01 18:25:39,2018-04-27 20:24:42,11,-3.269078,-0.297189
9,C_ID_2158fb5770,8,8,2018-03-18 09:30:54,2018-04-27 10:46:33,8,-3.668708,-0.458588
