### Synthetic Bank Transaction Data Creation

In [None]:
import random
import pandas as pd
from datetime import datetime, timedelta

# Define constants
NUM_TRANSACTIONS = 1000
ACCOUNT_ID = "8010-27075685-001"

MERCHANTS_NON_RECURRENT = ["Eateries", "Auto Repair", "Petrol", "Entertainment", "Hypermarket", "Groceries", "Plumber Service", "Electrician Service"]

MERCHANTS_RECURRENT_ENTERTAINMENT = ["Video Streaming", "Music Streaming", "Cloud Gaming Subscription"]

MERCHANTS_RECURRENT_FACILITIES = ["Electric Company", "Water Utility", "Gas Provider", "Trash Collection Service", "Cloud Service",
"Insurance", "Public Transport Pass", "Gym Subscription"]

BANK_RECURRENT = ["House Loan", "Vehicle Loan", "Personal Loan"]

FAMILY_RECURRENT = ["Spouse Allowance" , "Children Allowance", "Parent Allowance"]

DEPOSIT_SOURCE = [
    "Freelance Income", "ATM Deposit", "Check Deposit", "Wire Transfer", "Mobile Wallet Transfer", "Cash Deposit"]

TRANSACTION_TYPES = ["House Rental", "Salary Deposit", "Deposit", "Withdrawal", "Merchant Payment", "Bill Payment", "Bank Fee"]
BANK_FEES = ["Monthly Maintenance", "Overdraft Fee", "Transfer Fee"]
START_DATE = datetime(2024, 1, 1)
END_DATE = datetime(2025, 2, 5)

# Generate random date between start and end dates
def random_date(start, end):
    return start + timedelta(days=random.randint(0, (end - start).days), seconds=random.randint(0, 86400))

# Generate a single synthetic transaction
def generate_transaction():
    transaction_date = random_date(START_DATE, END_DATE)
    transaction_type = random.choice(TRANSACTION_TYPES)

    if transaction_type == "Salary Deposit":
        amount = round(random.uniform(4000, 4500), 2)
        description = "Salary Deposit"
    elif transaction_type == "Deposit":
        deposit_source = random.choice(DEPOSIT_SOURCE)
        amount = round(random.uniform(500, 1000), 2)
        description = f"Payment for {deposit_source}"
    elif transaction_type == "House Rental":
        amount = round(1500, 2) * -1
        description = "House Rental"
    elif transaction_type == "Withdrawal":
        amount = round(random.uniform(20, 2000), 2) * -1
        description = "ATM Withdrawal"
    elif transaction_type == "Merchant Payment":
        merchant = random.choice(MERCHANTS)
        amount = round(random.uniform(10, 500), 2) * -1
        description = f"Payment to {merchant}"
    elif transaction_type == "Bill Payment":
        payee = random.choice(PAYEES)
        amount = round(random.uniform(50, 1000), 2) * -1
        description = f"Payment to {payee}"
    else:  # Bank Fee
        fee = random.choice(BANK_FEES)
        amount = round(random.uniform(5, 50), 2) * -1
        description = f"{fee}"

    return {
        "accountID": ACCOUNT_ID,
        "transactionDATE": transaction_date,
        "transactionTYPE": transaction_type,
        "Description": description,
        "Amount": amount
    }

# Generate the dataset
transactions = [generate_transaction() for _ in range(NUM_TRANSACTIONS)]

# Convert to DataFrame
df = pd.DataFrame(transactions)
# Sort transactions by date
df.sort_values(by="transactionDATE", inplace=True)

df['accountBAL']=(df['Amount'].cumsum()).round(2)



# Save to CSV
df.to_csv("synthetic_current_account_transactions.csv", index=False)

print("Synthetic data generated and saved as 'synthetic_current_account_transactions.csv'.")


Synthetic data generated and saved as 'synthetic_current_account_transactions.csv'.


In [11]:
df

Unnamed: 0,accountID,transactionDATE,transactionTYPE,Description,Amount,accountBAL
797,8010-27075685-001,2024-01-02 07:00:11,Salary Deposit,Salary Deposit,4280.99,4280.99
818,8010-27075685-001,2024-01-02 07:32:02,Merchant Payment,Payment to GoShop,-15.56,4265.43
323,8010-27075685-001,2024-01-02 09:27:46,House Rental,House Rental,-1500.00,2765.43
223,8010-27075685-001,2024-01-02 14:19:59,Bill Payment,Payment to Gaming Subscription,-716.18,2049.25
760,8010-27075685-001,2024-01-03 10:11:22,Withdrawal,ATM Withdrawal,-1446.51,602.74
...,...,...,...,...,...,...
387,8010-27075685-001,2025-02-05 03:56:14,Merchant Payment,Payment to Courts Malaysia,-410.42,224854.35
274,8010-27075685-001,2025-02-05 07:28:50,Salary Deposit,Salary Deposit,4360.61,229214.96
646,8010-27075685-001,2025-02-05 07:48:55,Deposit,Payment for Real Estate Sale Proceeds,641.76,229856.72
637,8010-27075685-001,2025-02-05 19:01:19,Withdrawal,ATM Withdrawal,-360.28,229496.44


### Create Account ID 

In [None]:
# direct query

'''
UPDATE cif
SET accountID = 
    CASE 
        -- Savings Account: Format 8010-IC-001 (starts with 8010)
        WHEN productID = 1 THEN 
            '8010-' || substr('00000000' || ic, -8) || '-' ||
            substr('000' || id, -3)
        
        -- Current Account: Format 8020-IC-001 (starts with 8020)
        WHEN productID = 2 THEN 
            '8020-' || substr('00000000' || ic, -8) || '-' ||
            substr('000' || id, -3)
    END
WHERE productID IN (1, 2)

'''


# connect to sqlite
import sqlite3
conn = sqlite3.connect('instance/flaskr.sqlite')
print("Opened database successfully")

# pandas read from sqlite
import pandas as pd
#pd.read_sql_query("select * from 'productID(1)-transactions'", conn)

# write pandas df to sqlite table
#df = pd.read_csv('synthetic_current_account_transactions.csv')
#df.to_sql('productID(1)-transactions', conn, if_exists='append', index=False)

df=pd.read_sql_query("select * from 'productID(1)-transactions'", conn)
df['accountBAL']=(df['transactionAMT'].cumsum()).round(2)
df

#conn.close()


Opened database successfully


Unnamed: 0,accountID,transactionDATE,transactionTYPE,description,amount,accountBAL
0,8010-27075685-001,2024-01-01 05:11:51,Merchant Payment,Payment to Hermo,-178.52,-178.52
1,8010-27075685-001,2024-01-01 08:40:42,Salary Deposit,Salary Deposit,9929.20,9750.68
2,8010-27075685-001,2024-01-01 10:21:04,Deposit,Payment for Electronics Sale Proceeds,772.86,10523.54
3,8010-27075685-001,2024-01-01 14:11:09,Withdrawal,ATM Withdrawal,-98.67,10424.87
4,8010-27075685-001,2024-01-01 18:16:54,Bank Fee,Overdraft Fee,-30.79,10394.08
...,...,...,...,...,...,...
995,8010-27075685-001,2025-02-03 19:50:47,Withdrawal,ATM Withdrawal,-1996.43,1623481.60
996,8010-27075685-001,2025-02-03 22:59:57,Bank Fee,Transfer Fee,-25.11,1623456.49
997,8010-27075685-001,2025-02-04 07:31:04,Bill Payment,Payment to Sewage Service,-709.64,1622746.85
998,8010-27075685-001,2025-02-04 14:49:35,Salary Deposit,Salary Deposit,9647.90,1632394.75
