### TODOs: 
- step to generate metadata to be run separately, then join to customer stats during FE pipeline.

In [2]:
import pandas as pd
df = pd.read_csv("../data/input_data_smp.csv")[:100]

In [42]:
import pandas as pd
import numpy as np 
from sklearn.pipeline import Pipeline

class DataframeFunctionTransformer():
    def __init__(self, func):
        self.func = func

    def transform(self, input_df, **transform_params):
        return self.func(input_df)

    def fit(self, X, y=None, **fit_params):
        return self

In [43]:
def step_day_extractor(input_df):
    input_df["stepDay"] = input_df["step"].map(lambda t: np.ceil(t/24))
    return input_df

In [44]:
def step_hour_extractor(input_df):
    input_df["stepHour"] = input_df["step"].map(lambda t: t - (np.ceil(t/24) - 1) * 24)
    return input_df

In [45]:
def step_weekday_extractor(input_df):
    input_df["stepWeekDay"] = input_df["stepDay"].map(lambda t: np.remainder(t, 7))
    return input_df

In [46]:
def is_dest_merchant_extractor(input_df):
    input_df["isDestMerchant"] = input_df["nameDest"].map(lambda t: (t[0] == "M") )
    input_df["isDestMerchant"] = input_df["isDestMerchant"].astype("int")
    return input_df

In [47]:
def is_dest_balance_new_zero(input_df):
    input_df["isDestBalanceNewZero"] = input_df["newbalanceDest"].map(lambda t: t == 0 )
    input_df["isDestBalanceNewZero"] = input_df["isDestBalanceNewZero"].astype("int")
    return input_df

In [48]:
def is_dest_balance_old_zero(input_df):
    input_df["isDestBalanceOldZero"] = input_df["oldbalanceDest"].map(lambda t: t == 0 )
    input_df["isDestBalanceOldZero"] = input_df["isDestBalanceOldZero"].astype("int")
    return input_df

In [49]:
def is_orig_balance_new_zero(input_df):
    input_df["isOrigBalanceNewZero"] = input_df["newbalanceOrig"].map(lambda t: t == 0 )
    input_df["isOrigBalanceNewZero"] = input_df["isOrigBalanceNewZero"].astype("int")
    return input_df

In [54]:
def is_orig_balance_old_zero(input_df):
    input_df["isOrigBalanceOldZero"] = input_df["oldbalanceOrg"].map(lambda t: t == 0 )
    input_df["isOrigBalanceOldZero"] = input_df["isOrigBalanceOldZero"].astype("int")
    return input_df

In [55]:

pipeline = Pipeline([
    ('step_day_extractor' ,DataframeFunctionTransformer(step_day_extractor)),
    ('step_hour_extractor' ,DataframeFunctionTransformer(step_hour_extractor)),
    ('step_weekday_extractor' ,DataframeFunctionTransformer(step_weekday_extractor)),
    ('is_dest_merchant_extractor' ,DataframeFunctionTransformer(is_dest_merchant_extractor)),
    ('is_dest_balance_new_zero' ,DataframeFunctionTransformer(is_dest_balance_new_zero)),
    ('is_dest_balance_old_zero' ,DataframeFunctionTransformer(is_dest_balance_old_zero)),
    ('is_orig_balance_new_zero' ,DataframeFunctionTransformer(is_orig_balance_new_zero)),
    ('is_orig_balance_old_zero' ,DataframeFunctionTransformer(is_orig_balance_old_zero))
])

# apply the pipeline to the input dataframe
pipeline.fit_transform(df)


#is_dest_balance_new_zero(df).head()
#is_dest_balance_old_zero(df).head()
#is_orig_balance_new_zero(df).head()
#is_dest_balance_old_zero(df).head()

Unnamed: 0.1,Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,...,isFlaggedFraud,stepDay,temp,isDestMerchant,isDestBalanceNewZero,isDestBalanceOldZero,isOrigBalanceNewZero,isOrigBalanceOldZero,stepWeekDay,stepHour
0,0,1,PAYMENT,9839.64,C1231006815,170136.00,160296.36,M1979787155,0.00,0.00,...,0,1.0,1.0,1,1,1,0,0,1.0,1.0
1,1,1,PAYMENT,1864.28,C1666544295,21249.00,19384.72,M2044282225,0.00,0.00,...,0,1.0,1.0,1,1,1,0,0,1.0,1.0
2,2,1,TRANSFER,181.00,C1305486145,181.00,0.00,C553264065,0.00,0.00,...,0,1.0,1.0,0,1,1,1,0,1.0,1.0
3,3,1,CASH_OUT,181.00,C840083671,181.00,0.00,C38997010,21182.00,0.00,...,0,1.0,1.0,0,1,0,1,0,1.0,1.0
4,4,1,PAYMENT,11668.14,C2048537720,41554.00,29885.86,M1230701703,0.00,0.00,...,0,1.0,1.0,1,1,1,0,0,1.0,1.0
5,5,1,PAYMENT,7817.71,C90045638,53860.00,46042.29,M573487274,0.00,0.00,...,0,1.0,1.0,1,1,1,0,0,1.0,1.0
6,6,1,PAYMENT,7107.77,C154988899,183195.00,176087.23,M408069119,0.00,0.00,...,0,1.0,1.0,1,1,1,0,0,1.0,1.0
7,7,1,PAYMENT,7861.64,C1912850431,176087.23,168225.59,M633326333,0.00,0.00,...,0,1.0,1.0,1,1,1,0,0,1.0,1.0
8,8,1,PAYMENT,4024.36,C1265012928,2671.00,0.00,M1176932104,0.00,0.00,...,0,1.0,1.0,1,1,1,1,0,1.0,1.0
9,9,1,DEBIT,5337.77,C712410124,41720.00,36382.23,C195600860,41898.00,40348.79,...,0,1.0,1.0,0,0,0,0,0,1.0,1.0
