In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

In [2]:
# mock data
# Define columns and their datatypes based on the image
columns_dtypes = {
    # int64 columns
    "Transaction Serial No": "int64",
    "Release Number": "int64",
    "Status": "int64",
    "MTI": "int64",
    "PANNumber": "int64",
    "Processing Code": "int64",
    "Merchant Id": "int64",
    "Retrival Reference Number": "int64",
    "Account Identification1": "int64",
    "Account Identification2": "int64",
    "Confirmed": "int64",
    # float64 columns
    "Transaction Amount": "float64",
    "Card Billing Amount": "float64",
    "Expiration Date": "float64",
    "MCC": "float64",
    "POS Entry Mode": "float64",
    "POS Condition Code": "float64",
    "Auth ID Response Length": "float64",
    "Card Acceptor ID": "float64",
    "Auth ID Response": "float64",
    # datetime columns
    "Transaction Datetime": "datetime64[ns]",
    # object columns (string data)
    "Product Indicator": "object",
    "Originator Code": "object",
    "Responder Code": "object",
    "Primary Bit Map": "object",
    "Secondary Bit Map": "object",
    "Transmission Date": "object",
    "STAN": "int64",
    "Settlement Date": "object",
    "Capture Date": "object",
    "Country Code": "int64",
    "Response Code": "object",
    "Card Acceptor TerminalID": "object",
    "Terminal Owner": "object",
    "Terminal CIty": "object",
    "Terminal State": "object",
    "Terminal Country": "object",
    "Additional Data": "object",
    "Currency Code": "object",
    "Terminal Data": "object",
    "POS Data": "object",
    "Banknet Data": "object",
    "Recv Institution ID": "object",
    "Auth AgentID": "object",
    "Settlement Record": "object",
    "Batch and Shift Data": "object",
    "Settlement Data": "object",
    "Account Indicator": "object",
    "Pre authorization": "object",
    "ATM Additional Data": "object",
}


# Function to generate mock data for each column
def generate_mock_value(dtype, colname):
    if dtype == "int64":
        return np.random.randint(1000, 10000)
    elif dtype == "float64":
        return round(np.random.uniform(10.0, 1000.0), 2)
    elif dtype == "datetime64[ns]":
        return datetime.now() - timedelta(days=random.randint(0, 30))
    else:  # object
        return f"{colname[:4]}_{np.random.randint(100, 999)}"


# Create the DataFrame
num_rows = 5
data = {
    col: [generate_mock_value(dtype, col) for _ in range(num_rows)]
    for col, dtype in columns_dtypes.items()
}

df_mock = pd.DataFrame(data)

# Ensure correct dtypes
for col, dtype in columns_dtypes.items():
    if dtype.startswith("datetime"):
        df_mock[col] = pd.to_datetime(df_mock[col])
    else:
        df_mock[col] = df_mock[col].astype(dtype)

# Feature Engineering Logics

In [3]:
from src.calculation_features import generate_rolling_features

## Frequency Features

In [4]:
freq_config = [
    {
        # Transaction Count Same from Account No
        "type": "frequency",
        "groupby": "Account Identification1",
        "amount_col": "Transaction Serial No",
        "groupby_type": "No",
        "groupby_col": None,
        "windows": {
            "900S": "MFTxnCount_L15M",
            "1H": "MFTxnCount_L1H",
            "1D": "MFTxnCount_L1D",
            "7D": "MFTxnCount_L7D",
            "14D": "MFTxnCount_L14D",
            "30D": "MFTxnCount_L30D",
            "90D": "MFTxnCount_L90D",
        },
    },
    {
        # Transaction Count Same to Account No
        "type": "frequency",
        "groupby": "Account Identification1",
        "amount_col": "Transaction Serial No",
        "groupby_type": "Yes",
        "groupby_col": "Account Identification2",
        "windows": {
            "900S": "MFTxnCountSameToAcc_L15M",
            "1H": "MFTxnCountSameToAcc_L1H",
            "1D": "MFTxnCountSameToAcc_L1D",
            "7D": "MFTxnCountSameToAcc_L7D",
            "14D": "MFTxnCountSameToAcc_L14D",
            "30D": "MFTxnCountSameToAcc_L30D",
            "90D": "MFTxnCountSameToAcc_L90D",
        },
    },
]

df_freq = generate_rolling_features(
    df_mock,
    datetime_col="Transaction Datetime",
    key_col="Transaction Serial No",
    features_config=freq_config,
)

  .rolling(window, closed="left")
  .rolling(window, closed="left")
  .rolling(window, closed="left")
  .rolling(window, closed="left")


In [5]:
df_freq

Unnamed: 0,Transaction Serial No,Release Number,Status,MTI,PANNumber,Processing Code,Merchant Id,Retrival Reference Number,Account Identification1,Account Identification2,...,MFTxnCount_L14D,MFTxnCount_L30D,MFTxnCount_L90D,MFTxnCountSameToAcc_L15M,MFTxnCountSameToAcc_L1H,MFTxnCountSameToAcc_L1D,MFTxnCountSameToAcc_L7D,MFTxnCountSameToAcc_L14D,MFTxnCountSameToAcc_L30D,MFTxnCountSameToAcc_L90D
0,7361,7550,3889,2734,5184,3832,9399,5147,6282,2621,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2804,2044,8789,1880,1866,4788,1469,5800,6421,7737,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1684,5993,5013,6299,7307,2445,4041,8789,6435,9059,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,7216,5542,8127,2737,1892,9584,9926,7561,9882,1351,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1603,9375,5424,1315,6794,4337,3329,1479,8378,1851,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Monetary

In [6]:
monetary_config = [
    {
        # Average Transaction Amount
        "type": "monetary",
        "groupby": "Account Identification1",
        "amount_col": "Transaction Amount",
        "groupby_type": "No",
        "groupby_col": None,
        "windows": {
            "900S": "Avg_Amt_L15M",
            "1H": "Avg_Amt_L1H",
            "1D": "Avg_Amt_L1D",
            "7D": "Avg_Amt_L7D",
            "14D": "Avg_Amt_L14D",
            "30D": "Avg_Amt_L30D",
            "90D": "Avg_Amt_L90D",
        },
    },
    {
        # Average Transaction Amount Same to Account
        "type": "monetary",
        "groupby": "Account Identification1",
        "amount_col": "Transaction Amount",
        "groupby_type": "Yes",
        "groupby_col": "Account Identification2",
        "windows": {
            "900S": "Avg_Amt_SameToAcc_L15M",
            "1H": "Avg_Amt_SameToAcc_L1H",
            "1D": "Avg_Amt_SameToAcc_L1D",
            "7D": "Avg_Amt_SameToAcc_L7D",
            "14D": "Avg_Amt_SameToAcc_L14D",
            "30D": "Avg_Amt_SameToAcc_L30D",
            "90D": "Avg_Amt_SameToAcc_L90D",
        },
    },
    {
        # Average Card Billing Amount
        "type": "monetary",
        "groupby": "Account Identification1",
        "amount_col": "Card Billing Amount",
        "groupby_type": "No",
        "groupby_col": None,
        "windows": {
            "900S": "Avg_CardBill_Amt_L15M",
            "1H": "Avg_CardBill_Amt_L1H",
            "1D": "Avg_CardBill_Amt_L1D",
            "7D": "Avg_CardBill_Amt_L7D",
            "14D": "Avg_CardBill_Amt_L14D",
            "30D": "Avg_CardBill_Amt_L30D",
            "90D": "Avg_CardBill_Amt_L90D",
        },
    },
    {
        # Average Card Billing Amount Same to Account
        "type": "monetary",
        "groupby": "Account Identification1",
        "amount_col": "Card Billing Amount",
        "groupby_type": "Yes",
        "groupby_col": "Account Identification2",
        "windows": {
            "900S": "Avg_CardBill_Amt_SameToAcc_L15M",
            "1H": "Avg_CardBill_Amt_SameToAcc_L1H",
            "1D": "Avg_CardBill_Amt_SameToAcc_L1D",
            "7D": "Avg_CardBill_Amt_SameToAcc_L7D",
            "14D": "Avg_CardBill_Amt_SameToAcc_L14D",
            "30D": "Avg_CardBill_Amt_SameToAcc_L30D",
            "90D": "Avg_CardBill_Amt_SameToAcc_L90D",
        },
    },
    {
        # Maximum Transaction Amount
        "type": "monetary_max",
        "groupby": "Account Identification1",
        "amount_col": "Transaction Amount",
        "groupby_type": "No",
        "groupby_col": None,
        "windows": {
            "900S": "Max_Amt_L15M",
            "1H": "Max_Amt_L1H",
            "1D": "Max_Amt_L1D",
            "7D": "Max_Amt_L7D",
            "14D": "Max_Amt_L14D",
            "30D": "Max_Amt_L30D",
            "90D": "Max_Amt_L90D",
        },
    },
    {
        # Maximum Card Billing Amount
        "type": "monetary_max",
        "groupby": "Account Identification1",
        "amount_col": "Card Billing Amount",
        "groupby_type": "No",
        "groupby_col": None,
        "windows": {
            "900S": "Max_CardBill_Amt_L15M",
            "1H": "Max_CardBill_Amt_L1H",
            "1D": "Max_CardBill_Amt_L1D",
            "7D": "Max_CardBill_Amt_L7D",
            "14D": "Max_CardBill_Amt_L14D",
            "30D": "Max_CardBill_Amt_L30D",
            "90D": "Max_CardBill_Amt_L90D",
        },
    },
]

df_monetary = generate_rolling_features(
    df_mock,
    datetime_col="Transaction Datetime",
    key_col="Transaction Serial No",
    features_config=monetary_config,
)

  .rolling(window, closed="left")
  .rolling(window, closed="left")
  .rolling(window, closed="left")
  .rolling(window, closed="left")
  .rolling(window, closed="left")
  .rolling(window, closed="left")
  .rolling(window, closed="left")
  .rolling(window, closed="left")
  .rolling(window, closed="left")
  .rolling(window, closed="left")
  .rolling(window, closed="left")
  .rolling(window, closed="left")


In [7]:
df_monetary

Unnamed: 0,Transaction Serial No,Release Number,Status,MTI,PANNumber,Processing Code,Merchant Id,Retrival Reference Number,Account Identification1,Account Identification2,...,Max_Amt_L14D,Max_Amt_L30D,Max_Amt_L90D,Max_CardBill_Amt_L15M,Max_CardBill_Amt_L1H,Max_CardBill_Amt_L1D,Max_CardBill_Amt_L7D,Max_CardBill_Amt_L14D,Max_CardBill_Amt_L30D,Max_CardBill_Amt_L90D
0,7361,7550,3889,2734,5184,3832,9399,5147,6282,2621,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2804,2044,8789,1880,1866,4788,1469,5800,6421,7737,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1684,5993,5013,6299,7307,2445,4041,8789,6435,9059,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,7216,5542,8127,2737,1892,9584,9926,7561,9882,1351,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1603,9375,5424,1315,6794,4337,3329,1479,8378,1851,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Unique Count

In [8]:
df_mock["Account Identification2 Num"], uniques = df_mock[
    "Account Identification2"
].factorize()
df_mock["Account Identification1 Num"], uniques = df_mock[
    "Account Identification1"
].factorize()

unique_count_config = [
    {
        # Unique to Account No (Same from Account No)
        "type": "unique",
        "groupby": "Account Identification1",
        "count_col": "Account Identification2 Num",
        "windows": {
            "900S": "Unique_To_Account_No_L15M",
            "1H": "Unique_To_Account_No_L1H",
            "1D": "Unique_To_Account_No_L1D",
            "7D": "Unique_To_Account_No_L7D",
            "14D": "Unique_To_Account_No_L14D",
            "30D": "Unique_To_Account_No_L30D",
            "90D": "Unique_To_Account_No_L90D",
        },
    },
    {
        # Unique from Account No (Same to Account No)
        "type": "unique",
        "groupby": "Account Identification2",
        "count_col": "Account Identification1 Num",
        "windows": {
            "900S": "Unique_From_Account_No_L15M",
            "1H": "Unique_From_Account_No_L1H",
            "1D": "Unique_From_Account_No_L1D",
            "7D": "Unique_From_Account_No_L7D",
            "14D": "Unique_From_Account_No_L14D",
            "30D": "Unique_From_Account_No_L30D",
            "90D": "Unique_From_Account_No_L90D",
        },
    },
    {
        # Unique Transaction Amount Same From Account
        "type": "unique",
        "groupby": "Account Identification1",
        "count_col": "Transaction Amount",
        "windows": {
            "900S": "Unique_AmountFrom_L15M",
            "1H": "Unique_AmountFrom_L1H",
            "1D": "Unique_AmountFrom_L1D",
            "7D": "Unique_AmountFrom_L7D",
            "14D": "Unique_AmountFrom_L14D",
            "30D": "Unique_AmountFrom_L30D",
            "90D": "Unique_AmountFrom_L90D",
        },
    },
    {
        # Unique Transaction Amount Same to Account
        "type": "unique",
        "groupby": "Account Identification2",
        "count_col": "Transaction Amount",
        "windows": {
            "900S": "Unique_AmountTo_L15M",
            "1H": "Unique_AmountTo_L1H",
            "1D": "Unique_AmountTo_L1D",
            "7D": "Unique_AmountTo_L7D",
            "14D": "Unique_AmountTo_L14D",
            "30D": "Unique_AmountTo_L30D",
            "90D": "Unique_AmountTo_L90D",
        },
    },
]

df_unique_count = generate_rolling_features(
    df_mock,
    datetime_col="Transaction Datetime",
    key_col="Transaction Serial No",
    features_config=unique_count_config,
)

  .rolling(window=window, closed="left", min_periods=1)
  .rolling(window=window, closed="left", min_periods=1)
  .rolling(window=window, closed="left", min_periods=1)
  .rolling(window=window, closed="left", min_periods=1)
  .rolling(window=window, closed="left", min_periods=1)
  .rolling(window=window, closed="left", min_periods=1)
  .rolling(window=window, closed="left", min_periods=1)
  .rolling(window=window, closed="left", min_periods=1)


In [9]:
df_unique_count

Unnamed: 0,Transaction Serial No,Release Number,Status,MTI,PANNumber,Processing Code,Merchant Id,Retrival Reference Number,Account Identification1,Account Identification2,...,Unique_AmountFrom_L14D,Unique_AmountFrom_L30D,Unique_AmountFrom_L90D,Unique_AmountTo_L15M,Unique_AmountTo_L1H,Unique_AmountTo_L1D,Unique_AmountTo_L7D,Unique_AmountTo_L14D,Unique_AmountTo_L30D,Unique_AmountTo_L90D
0,7361,7550,3889,2734,5184,3832,9399,5147,6282,2621,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2804,2044,8789,1880,1866,4788,1469,5800,6421,7737,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1684,5993,5013,6299,7307,2445,4041,8789,6435,9059,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,7216,5542,8127,2737,1892,9584,9926,7561,9882,1351,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1603,9375,5424,1315,6794,4337,3329,1479,8378,1851,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
