In [1]:
# %% [markdown]
# # Jupyter Notebook Loading Header
#
# This is a custom loading header for Jupyter Notebooks in Visual Studio Code.
# It includes common imports and settings to get you started quickly.

# %% [markdown]
## Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.cloud import bigquery
import os
path = r'C:\Users\DwaipayanChakroborti\AppData\Roaming\gcloud\legacy_credentials\dchakroborti@tonikbank.com\adc.json'
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = path
client = bigquery.Client(project='prj-prod-dataplatform')

# %% [markdown]
## Configure Settings
# Set options or configurations as needed
# Example: pd.set_option('display.max_columns', None)

In [3]:
import datetime
from dateutil.relativedelta import relativedelta

def get_week_start_dates(start_date, end_date):
    week_start_dates = []
    current_date = start_date
    while current_date <= end_date:
        week_start_date = current_date - datetime.timedelta(days=current_date.weekday())
        week_start_dates.append(week_start_date)
        current_date += relativedelta(weeks=1)
    return week_start_dates

start_date = datetime.date(2024, 8, 26)
end_date = datetime.date(2024, 9, 8)

week_start_dates = get_week_start_dates(start_date, end_date)
print(week_start_dates)

[datetime.date(2024, 8, 26), datetime.date(2024, 9, 2)]


In [9]:
res =  pd.DataFrame()
for date in week_start_dates:
    query = f"""with 
    all_cust   --- To get the total registered customer till the cut off date
    AS( 
        SELECT count(distinct cust_id) Total_registered_users
        from prj-prod-dataplatform.dl_customers_db_raw.tdbk_customer_mtb
        where created_dt <= "{date}" and cust_id is not null 
        ) ,
    open_TSA_cust --- TSA customer which was opened before the cut off date and closed date is either null or greater than cutoff date
    AS(
        SELECT count (distinct OFCUSTOMERID) Total_open_TSA_customers from `core_raw.customer_accounts` 
            where CRINTERDESC like 'Transactional Savings Account Inv_R'
            and OFCUSTOMERID in (select distinct cust_id 
            from prj-prod-dataplatform.dl_customers_db_raw.tdbk_customer_mtb where cust_id is not null)
            AND OFDATEOPENED <= "{date}"
            AND (OFDATECLOSED = DATE('1970-01-01') OR  OFDATECLOSED > "{date}")
            )           
            ,
    loan_cust --list of customers having loan either in status Normal or Arrears at the cut off date
    AS (
        SELECT 
            distinct CAST(lmt.customerId AS STRING) customerId
        FROM 
            `prj-prod-dataplatform.risk_credit_mis.loan_master_table` lmt
            inner join prj-prod-dataplatform.risk_credit_mis.loan_bucket_flow_report_core lbfrc on lbfrc.loanAccountNumber = lmt.loanAccountNumber
        WHERE lbfrc.bucketDate = date_sub(date("{date}"), interval 1 day)  --- need to change the date
        and lbfrc.loanStatus IN ('Normal', 'In Arrears')
            )           
            ,
    ACL_Customer ---- ACL customer are TSBL loans with sub product type FP and disbursement date <= cutoff date
        as 
            (select lmt.customerId FROM `prj-prod-dataplatform.dl_loans_db_raw.tdbk_digital_loan_application` a
                inner join `risk_credit_mis.loan_master_table` lmt on lmt.digitalLoanAccountId = a.digitalLoanAccountId
                where coalesce(a.loanType, 'NA') = 'TSBL'and coalesce(a.loan_sub_product_type, 'NA') = 'FP'
                and date_trunc(a.loanDigitalAppliedDateAndTime, day) <= "{date}"
                and date_trunc(lmt.disbursementDateTime, day) <= "{date}" --- need to change the date
                ),
    non_ACL_cust --list of all customers who have no ACL loans (loatype TSBL and sub-product type FP)
    AS (
        SELECT 
            distinct CAST(cust_id AS STRING) customerId
        FROM 
            `prj-prod-dataplatform.dl_customers_db_raw.tdbk_customer_mtb`
        WHERE 
            cast(cust_id as string) not in (select customerId from loan_cust)
            and created_dt <= "{date}" and cust_id is not null 
    )
    /*So, new definition of MAU is. Customer with open TSA and at least one any transaction for the amount greater than 100 PHP in the last 30 days. 
    No active loan. OR has balance of the TSA + any Stash greater than 100 PHP. Whoch means if he didnt transact but he has money on TSA - he is MAU. 
    If he doesnt transact and doesnt have money on TSA but has more than 100 PHP on Stash - he is active */
    ,        
    MAU_with_trx 
    AS (
        SELECT DISTINCT customer_id 
        FROM `prj-prod-dataplatform.risk_mart.customer_transactions`
        WHERE transaction_date BETWEEN DATE_SUB(date("{date}"), INTERVAL 30 DAY) AND date("{date}")
        AND ABS(trx_amount) > 100  --- transaction greater than 100
        AND account_type = "Tonik Account"  -- TSA account type
        AND customer_id NOT IN (SELECT customerId FROM loan_cust)  --- no active loan
        )
        ,
    MAU_with_bal 
     AS (
        SELECT 
            DISTINCT client_id 
        FROM 
            `prj-prod-dataplatform.risk_mart.customer_balance`
        WHERE
            clearedbalance > 100  --- balance in any account greater than 100
            AND account_type IN ("Tonik Account","Group Stash","Individual Stash")  ---- TSA, Group Stash and Individual Stash
            AND balanceDateAsOf between date_sub(date("{date}"), interval 30 day) and date("{date}") ---- in last 30 days
            AND client_id NOT IN (SELECT customerId FROM loan_cust) 
        )            ,
    MAU --- 
        AS (
        SELECT COUNT(distinct customer_id) AS MAU 
        FROM 
            (SELECT customer_id 
            FROM 
                MAU_with_trx
            UNION DISTINCT
            SELECT 
                client_id AS customer_id
            FROM 
                MAU_with_bal)),
    /* a) MMAU should be without active SIL, Quck and Flex loan. but when they will have ACL TSA loan - those should be included into MMAUs. 
    b)  I would like to ask you to add  
    1) the active TSA ACL loans issued (today its zero) after the MMAU.  
    2) Difference between MMAU with TSA aCL loan and TSA ACL LOans that will show us the current pool of eligible customers. 
    3) Offers extended. They should live somewhere on the datalake table. 
    For example this week we will issue 1000 offers. Next week we will issue 10 000 more. So netx week it will be 11 k offers sent.   
    And last one 4) Uptake. TSA ACL Loans active divided into Offers sent. as %.   */
    MMAU_customerbase as (select cast(customerId as string)customerId from non_ACL_cust union all (select cast(customerId as string)customerId from ACL_Customer)) , 
    transactional_cust --Mareks criteria
    AS (
        SELECT 
            DISTINCT customer_id, count(*) unit
        FROM 
            `prj-prod-dataplatform.risk_mart.customer_transactions`
        WHERE
            transaction_date BETWEEN DATE_SUB(date("{date}"), INTERVAL 3 MONTH) AND date("{date}")
            AND customer_id in (SELECT customerId FROM MMAU_customerbase)
        GROUP BY
            customer_id
        HAVING 
            COUNT(*) >= 5
    ),
    cust_with_balance --Mareks criteria
    AS (
        SELECT 
            DISTINCT client_id AS customer_id, clearedbalance unit 
        FROM 
            `prj-prod-dataplatform.risk_mart.customer_balance`
        WHERE
            clearedbalance > 1000
            AND account_type IN ("Tonik Account","Group Stash","Individual Stash")
            AND balanceDateAsOf between date_sub(date("{date}"), interval 30 day) and date("{date}")
            AND client_id in (SELECT customerId FROM MMAU_customerbase)
            ),
      MMAU
    AS (
        SELECT 
           '{date}' AS Dated,
            customer_id AS MMAU ,
            unit
        FROM 
            ( SELECT 
            customer_id , unit
        FROM 
            transactional_cust
        UNION DISTINCT
        SELECT 
            customer_id, unit
        FROM 
            cust_with_balance))
        Select * from MMAU;
        """
    df = client.query(query).to_dataframe()
    res = pd.concat([res,df])
res.head()

Unnamed: 0,Dated,MMAU,unit
0,2024-08-26,1199463,14600.0
1,2024-08-26,1029450,155530.47
2,2024-08-26,1562504,14669.62
3,2024-08-26,1034356,102504.52
4,2024-08-26,1227778,5191.93


In [12]:
res.groupby('Dated')['MMAU'].nunique()

Dated
2024-08-26    55336
2024-09-02    62353
Name: MMAU, dtype: int64

In [21]:
filterddata = res[res['Dated'] =='2024-09-02']
cnt = filterddata[filterddata['MMAU'].duplicated()]
cnt.sort_values(by='MMAU')

Unnamed: 0,Dated,MMAU,unit
49961,2024-09-02,1000000,1300000.00
95495,2024-09-02,1000000,5848.54
180966,2024-09-02,1000000,5632.88
18623,2024-09-02,1000000,1110.51
114053,2024-09-02,1000000,25120.73
...,...,...,...
223378,2024-09-02,2815431,14.00
262418,2024-09-02,2815589,5.00
271288,2024-09-02,2816236,7.00
261324,2024-09-02,2816260,5.00


In [22]:
res[res['MMAU']=='1000000']

Unnamed: 0,Dated,MMAU,unit
2342,2024-08-26,1000000,245.00
5405,2024-08-26,1000000,1878.66
8731,2024-08-26,1000000,5120.73
26014,2024-08-26,1000000,35928.65
33039,2024-08-26,1000000,1200000.00
...,...,...,...
196415,2024-09-02,1000000,2647.00
202492,2024-09-02,1000000,20185.58
211813,2024-09-02,1000000,28120.73
241052,2024-09-02,1000000,262.00
