In [4]:
import pandas as pd

In [5]:
data = pd.read_csv('transaction_data.csv')

# Feature 1: Difference in balance for origin and destination accounts
**OrigBalanceDiff**: The difference between the original and new balance for the origin account, calculated as `oldbalanceOrg - newbalanceOrig`. This feature represents the change in the origin account's balance as a result of the transaction.

**DestBalanceDiff**: The difference between the original and new balance for the destination account, calculated as `oldbalanceDest - newbalanceDest`. This feature indicates the change in the destination account's balance due to the transaction.

In [12]:
data['OrigBalanceDiff'] = data['oldbalanceOrg'] - data['newbalanceOrig']
data['DestBalanceDiff'] = data['oldbalanceDest'] - data['newbalanceDest']

# Feature 2: Zero balance flags

**ZeroBalanceOrigFlag**: A binary flag that indicates whether the new balance of the origin account is zero after the transaction (`1` if `newbalanceOrig` is 0, else `0`).

**ZeroBalanceDestFlag**: A binary flag that shows whether the new balance of the destination account is zero post-transaction (`1` if `newbalanceDest` is 0, else `0`).

In [7]:
data['ZeroBalanceOrigFlag'] = (data['newbalanceOrig'] == 0).astype(int)
data['ZeroBalanceDestFlag'] = (data['newbalanceDest'] == 0).astype(int)

# Feature 3: Account type (assuming 'M' stands for merchant and 'C' for customer)

**OrigAccType**: A binary identifier indicating the type of the origin account: `1` if the account is a merchant (starting with 'M'), and `0` if it's a customer (starting with 'C').

**DestAccType**: Similar to `OrigAccType`, this binary flag identifies whether the destination account is a merchant (`1`) or a customer (`0`).

In [8]:
data['OrigAccType'] = data['nameOrig'].apply(lambda x: 1 if x.startswith('M') else 0)
data['DestAccType'] = data['nameDest'].apply(lambda x: 1 if x.startswith('M') else 0)

# Feature 4: Transaction type one-hot encoding

**Transaction Type One-Hot Encoding**: One-hot encoded features for the transaction type, creating individual columns like `type_PAYMENT`, `type_TRANSFER`, etc., indicating the type of transaction.

**AmountToOriginBalanceRatio**: The ratio of the transaction amount to the old balance of the origin account, providing insight into the transaction's size relative to the account's initial balance. It's calculated as `amount / oldbalanceOrg` (with handling for division by zero).

In [9]:
transaction_types = pd.get_dummies(data['type'], prefix='type')
df = pd.concat([data, transaction_types], axis=1)

In [10]:
df['AmountToOriginBalanceRatio'] = df.apply(lambda x: x['amount'] / x['oldbalanceOrg'] if x['oldbalanceOrg'] != 0 else 0, axis=1)

# Feature 5: Ratio of transaction amount to the old balance of the origin account (avoid division by zero)

In [11]:
df.to_csv('modified_transaction_data.csv', index=False)