In [1]:
import pandas as pd
import numpy as np
from numpy.random import randint
import datetime
import string
import logging

In [2]:
df_personal = pd.DataFrame({"Ac":[], "Name":[], "Age":[], "Gender": [], "Email":[], "Password": []})
df_tran = pd.DataFrame({"Ac":[], "Date":[], "Transaction":[], "Trnx_Amount":[], "Type":[], "Balance_After_Trnx": [], "Current_Balance":[]})
df_personal = df_personal.set_index('Ac')
df_tran = df_tran.set_index('Ac')

In [3]:
# setup logging 
log_format = "%(levelname)s %(asctime)s -> %(message)s"
logging.basicConfig(filename='User_Database.log', level=logging.DEBUG, format=log_format)
logger = logging.getLogger()

In [4]:
def random_date(startYR, endYR, amount):
    """This function generates random date, given the start and end year
    and total random date to be generated.
    Parameter:
    --------------------------------
    startYR: the starting year range
    endYR: the ending year range
    amount: total number of date you want"""

    # adding log 
    logger.info(f"func called: 'random_date', args passed: startYR {startYR}, endYR {endYR}, amount {amount}")
    
    # defining days in different months
    days_in_month = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]

    # randomly selecting "amount" number of month (1-12)
    month = list(randint(1, 13, amount))
    day = [randint(1, days_in_month[x-1]+1) for x in month] # picking random day according to month
    yr = list(randint(startYR, endYR+1, amount)) # random year

    # sorting date
    date = sorted([datetime.date(y, m, d) for d, m, y in zip(day, month, yr)])
    date = [str(x) for x in date]
    
    # update log
    logger.debug(f"func: random_date -> output generated without any error")
    return date

def trans_data(tran, yr, bal, amt):
    """This function generate random transaction records, which includes
    transaction date, amount, type, etc.
    Parameter:
    ----------------------------------------------------------
    tran: list consisting minimum and maximum transaction to be created for an account
    yr: list consisting the starting and ending year for a transaction.
    bal: list consiting the minimum and maximum values for initial value an account holder to have
    amt: list consiting the minimum and maximum values for any transaction to happen"""
    
    # appending log 
    logger.info(f"func called: 'trans_data', args passed: tran = {tran}, yr = {yr}, bal = {bal}, amt = {amt}")

    # defining types of Withdrawal and deposit
    money_flow = {'D': ['Salary', 'Friend', 'Incentive', 'Other'],
                  'W': ['Fuel', 'Online-Shopping', 'Credit-bill', 'Bank-transfered', 'Rent-payment', 'Other']}
    
    ttl_tran = randint(tran[0], tran[1])                      # random count of total transactions
    date = random_date(yr[0], yr[1], ttl_tran)                # genrating random dates
    trnx_amt = randint(amt[0], amt[1], ttl_tran)              # transaction amount    
    typ = np.random.choice(['W', 'D'], ttl_tran)              # transaction type; W: withdrawal, D: Deposit
    spnt_on = [np.random.choice(money_flow[x]) for x in typ]  # money recived or spent on
    ttl_bal = randint(bal[0], bal[1])                         # total initial money in ac
    
    bal_aft_trxn = []                                         # ac balance after transaction
    for trnx, typs in zip(trnx_amt, typ):
        """This loop will add or subtract the transacted amt from initial 
        balance"""
        if typs == "W":
            ttl_bal -= trnx
            bal_aft_trxn.append(ttl_bal)
        else:
            ttl_bal += trnx
            bal_aft_trxn.append(ttl_bal)
    
    crnt_bal = bal_aft_trxn[-1]                        # latest ac balance
    
    output = {"Date": date, "Transaction":spnt_on, "Trnx_Amount":trnx_amt, "Type":typ, "Balance_After_Trnx": bal_aft_trxn,\
              "Current_Balance":crnt_bal}
    
    # udate log
    logger.debug(f"func: trans_data -> output generated without any error")
    return output

def prnsl_data():
    """This function generate random personal data."""
    
    # appending log 
    logger.info(f"func called: 'prnsl_data', args passed: None")
    
    # personal data
    name = ''.join(np.random.choice(list(string.ascii_letters), 6)).capitalize() 
    age = randint(18, 80)
    gender = np.random.choice(['M', 'F'])
    email = name+'@gmail.com' 
    pswrd = ''.join(np.random.choice(list(string.printable[:-15]), 6))
    
    output = {"Name":name, "Age":age, "Gender": gender, "Email":email, "Password": pswrd}
    
    # update log
    logger.debug(f"func: prnsl_data -> output generated without any error")
    return output

def data_gen(amount, tran=[200, 400], yr=[2017, 2021], bal=[100000,1000000], trx_amt=[1000, 50000]):
    """This function generate specified number of personal and transactional data.
    Parameter:
    -----------------------------------------------------------------------------
    amount: total number of records you want
    tran: total transaction count range an account holder can have
    yr: transaction year range
    bal: range of total initial balance an account holder can have 
    trx_amt: range of transaction value to have
    """
        
    # appending log 
    logger.info(f"func called: 'data_gen', args passed: amount = {amount} tran = {tran} yr = {yr} \
    bal = {bal} trx_amt = {trx_amt}")
    
    ac = randint(123456789, 999999999, amount) # generating random account number for each user
    user_data = {"Name":[], "Age":[], "Gender": [], "Email":[], "Password": []}
    trnx_data = {"Date":[], "Transaction":[], "Trnx_Amount":[], "Type":[], "Balance_After_Trnx":[], "Current_Balance":[]}
    
    for i in range(amount):
        user_d = prnsl_data()   # generating user data
        trnx_d = trans_data(tran, yr, bal, trx_amt) # generating personal data
        
        # adding data in prnsl_data
        for key in user_data.keys():
            user_data[key] += [user_d[key]]
        
        # adding data in trnx_data
        for key in trnx_data.keys():
            trnx_data[key] += [trnx_d[key]]
         
    prnsl_df = pd.DataFrame(user_data, index=ac)
    trnx_df = pd.DataFrame(trnx_data, index=ac)
    
    # update log
    logger.debug(f"func: data_gen -> output generated without any error")
    return prnsl_df, trnx_df


In [5]:
p, t = data_gen(5)
p.head()

Unnamed: 0,Name,Age,Gender,Email,Password
969844699,Tvgxfd,75,M,Tvgxfd@gmail.com,>4o&ho
537967156,Nxygyl,20,M,Nxygyl@gmail.com,"2YE""x1"
621115374,Olczls,69,F,Olczls@gmail.com,JYFa)T
730489089,Iazcqt,77,M,Iazcqt@gmail.com,FRZ&zr
337431292,Lrderw,77,M,Lrderw@gmail.com,)ZSGNu


In [6]:
t.head()

Unnamed: 0,Date,Transaction,Trnx_Amount,Type,Balance_After_Trnx,Current_Balance
969844699,"[2017-01-05, 2017-01-07, 2017-02-01, 2017-02-0...","[Online-Shopping, Online-Shopping, Other, Onli...","[14937, 1558, 43310, 14396, 2289, 10041, 11944...","[W, W, D, W, W, D, D, W, D, D, D, W, W, W, W, ...","[301412, 299854, 343164, 328768, 326479, 33652...",-232610
537967156,"[2017-01-06, 2017-01-09, 2017-01-18, 2017-01-2...","[Other, Other, Other, Fuel, Friend, Friend, On...","[15942, 7950, 10250, 49939, 23962, 22409, 9526...","[W, D, D, W, D, D, W, D, D, W, D, D, W, D, W, ...","[626727, 634677, 644927, 594988, 618950, 64135...",-34712
621115374,"[2017-01-04, 2017-01-08, 2017-01-13, 2017-01-2...","[Bank-transfered, Online-Shopping, Bank-transf...","[35702, 41636, 11555, 40381, 42990, 18884, 471...","[W, W, W, W, D, D, W, D, D, W, W, D, D, W, W, ...","[729133, 687497, 675942, 635561, 678551, 69743...",553334
730489089,"[2017-01-03, 2017-01-12, 2017-01-30, 2017-02-0...","[Other, Bank-transfered, Rent-payment, Credit-...","[12838, 44752, 43655, 19131, 4326, 23240, 7755...","[W, W, W, W, W, D, W, D, D, D, D, W, W, D, W, ...","[315671, 270919, 227264, 208133, 203807, 22704...",473440
337431292,"[2017-01-02, 2017-01-03, 2017-01-04, 2017-01-0...","[Other, Incentive, Fuel, Other, Rent-payment, ...","[4040, 11425, 49635, 33080, 17267, 38598, 6012...","[W, D, W, D, W, W, D, D, W, W, D, D, W, W, D, ...","[969477, 980902, 931267, 964347, 947080, 90848...",1063293


In [7]:
transaction = pd.DataFrame()
for usr in t.index:
    date, trnx, t_amt, typ, Bal = t.loc[usr][:5]
    ac = [usr]*len(date)
    temp = pd.DataFrame({'Ac': ac, 'Date': date, 'Transaction': trnx, 'Transaction_amount': t_amt,
                        'Transaction_Type': typ, 'Balance': Bal})
    transaction = transaction.append(temp)

In [8]:
transaction

Unnamed: 0,Ac,Date,Transaction,Transaction_amount,Transaction_Type,Balance
0,969844699,2017-01-05,Online-Shopping,14937,W,301412
1,969844699,2017-01-07,Online-Shopping,1558,W,299854
2,969844699,2017-02-01,Other,43310,D,343164
3,969844699,2017-02-09,Online-Shopping,14396,W,328768
4,969844699,2017-02-16,Other,2289,W,326479
...,...,...,...,...,...,...
368,337431292,2021-11-29,Credit-bill,18413,W,1019311
369,337431292,2021-12-11,Other,29597,D,1048908
370,337431292,2021-12-16,Bank-transfered,29946,W,1018962
371,337431292,2021-12-22,Other,15301,D,1034263


In [9]:
transaction.to_csv('transaction_data.csv', index=False)