In [1]:
import pandas as pd
import numpy as np

from google.cloud import bigquery

client = bigquery.Client()

In [2]:
start_date = '2024-04-01'
end_date =  '2024-04-28'

# Create a date range
date_range = pd.date_range(start=start_date, end=end_date)

In [3]:
%%time
print(f'generating results from {start_date} to {end_date}, for {len(date_range)} days')
res =  pd.DataFrame()
for date in date_range:
    date = date.date()
    query = f"""WITH 
            TSA_customers --list of customers having TSA account
                AS (
                SELECT DISTINCT OFCUSTOMERID, OFDATEOPENED, OFDATECLOSED 
                FROM prj-prod-dataplatform.core_raw.customer_accounts 
                WHERE OFPRODUCTCATEGORY = 'savings' 
                AND CRINTERDESC = 'Transactional Savings Account Inv_R'
            ),
            loan_cust --list of customers having active loan
                AS (
                SELECT 
                    CAST(customerId AS STRING) customerId
                FROM 
                    `prj-prod-dataplatform.risk_credit_mis.loan_master_table`
                WHERE 
                    loanPaidStatus IN ('Normal', 'In Arrears')
            ),
            MAU --list of monthly active users
                AS (
                SELECT DISTINCT customer_id 
                FROM `prj-prod-dataplatform.risk_mart.customer_transactions`
                WHERE transaction_date BETWEEN DATE_SUB('{date}', INTERVAL 30 DAY) AND '{date}'
                AND ABS(trx_amount) > 100
                AND account_type = "Tonik Account"
                AND customer_id NOT IN (SELECT customerId FROM loan_cust)
            ),
            MAU_count --count of monthly active users
                AS (
                SELECT
                    '{date}' AS Date,
                    COUNT(DISTINCT customer_id) AS MAU
                FROM MAU
            ),
            TSA_non_loan_cust --count of TSA customers whose account is not closed and no active loan
                AS (
                SELECT 
                    '{date}' AS Date,
                    COUNT(DISTINCT OFCUSTOMERID) AS TSA_customers 
                FROM 
                    TSA_customers 
                WHERE 
                    DATE('{date}') BETWEEN DATE(OFDATEOPENED) AND 
                    (CASE WHEN DATE(OFDATECLOSED) = '1970-01-01' THEN DATE('{date}') ELSE DATE(OFDATECLOSED) END)  
                AND OFCUSTOMERID NOT IN (SELECT customerId FROM loan_cust)
            ),
            TSA_balance --total TSA balance on particular date
                AS (
                SELECT 
                    CAST(balanceDateAsOf AS STRING) AS date,
                    SUM(clearedbalance) AS TSA_balance 
                FROM `prj-prod-dataplatform.risk_mart.customer_balance` 
                WHERE 
                    balanceDateAsOf = "{date}" 
                    AND account_type  = "Tonik Account"
                    AND client_id NOT IN (SELECT customerId FROM loan_cust)
                GROUP BY 1
            ),
            TSA_balance_MAU --total TSA balance for monthly active users on particular date
                AS (
                SELECT 
                    CAST(balanceDateAsOf AS STRING) AS date,
                    SUM(clearedbalance) AS TSA_balance_MAU 
                FROM `prj-prod-dataplatform.risk_mart.customer_balance` 
                WHERE 
                    balanceDateAsOf = "{date}" 
                    AND account_type  = "Tonik Account"
                    AND client_id IN (SELECT customer_id FROM MAU)
                GROUP BY 1
            ),
            transactional_cust --Mareks criteria
                AS (
                SELECT 
                    DISTINCT customer_id
                FROM 
                    `prj-prod-dataplatform.risk_mart.customer_transactions`
                WHERE
                    transaction_date BETWEEN DATE_SUB('{date}', INTERVAL 3 MONTH) AND '{date}'
                    AND customer_id NOT IN (SELECT customerId FROM loan_cust) 
                GROUP BY
                    customer_id
                HAVING 
                    COUNT(*) >= 5
            ),
            cust_with_balance --Mareks criteria
                AS (
                SELECT 
                    DISTINCT client_id 
                FROM 
                    `prj-prod-dataplatform.risk_mart.customer_balance`
                WHERE
                    clearedbalance > 1000
                    AND account_type IN ("Tonik Account","Group Stash","Individual Stash")
                    AND balanceDateAsOf = '{date}'
                    AND client_id NOT IN (SELECT customerId FROM loan_cust)
            ),
            cust_with_transactions_and_balance --Mareks criteria
                AS (
                SELECT 
                    customer_id 
                FROM 
                    transactional_cust
                UNION DISTINCT
                SELECT 
                    client_id AS customer_id
                FROM 
                    cust_with_balance
            ),
            count_cust_with_transactions_and_balance 
                AS (
                SELECT 
                    '{date}' AS Date,
                    COUNT(customer_id) AS MMAU 
                FROM 
                    cust_with_transactions_and_balance
            ),
           b 
            AS (
                SELECT 
                    a.Date,
                    MAU,
                    TSA_customers,
                    TSA_balance,
                    TSA_balance_MAU,
                    MMAU
                FROM 
                    MAU_count a 
                LEFT JOIN 
                    TSA_non_loan_cust b ON a.Date = b.Date
                LEFT JOIN 
                    TSA_balance c ON a.Date = c.Date
                LEFT JOIN 
                    TSA_balance_MAU d ON a.Date = d.Date
                LEFT JOIN 
                    count_cust_with_transactions_and_balance e ON a.Date = e.Date
            )

        SELECT  
            Date, MAU, TSA_customers, TSA_balance, TSA_balance_MAU, 
            MAU/TSA_customers AS Share_of_active_TSA_customers,
            TSA_balance/TSA_customers AS Average_TSA_balance_per_customer,
            TSA_balance_MAU/MAU AS Average_TSA_balance_per_MAU ,
            MMAU
        FROM 
            b;
            """
    df = client.query(query).to_dataframe()
    res = pd.concat([res,df])
    res['Change in MAU'] = res['MAU'].pct_change()


generating results from 2024-04-01 to 2024-04-28, for 28 days
CPU times: user 672 ms, sys: 96.9 ms, total: 769 ms
Wall time: 1min 44s


In [6]:
res.to_excel(f"MAU_from_{start_date.replace('-','_')}.xlsx",index=False)