In [1]:
# %% [markdown]
# # Jupyter Notebook Loading Header
#
# This is a custom loading header for Jupyter Notebooks in Visual Studio Code.
# It includes common imports and settings to get you started quickly.

# %% [markdown]
## Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.cloud import bigquery
import os
path = r'C:\Users\DwaipayanChakroborti\AppData\Roaming\gcloud\legacy_credentials\dchakroborti@tonikbank.com\adc.json'
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = path
client = bigquery.Client(project='prj-prod-dataplatform')

# %% [markdown]
## Configure Settings
# Set options or configurations as needed
# Example: pd.set_option('display.max_columns', None) 

In [2]:
sq = """
with mau4m as 
(select customer_id, registration_date from prj-prod-dataplatform.worktable_data_analysis.customer_transaction_data
where 
    (tx_cnt_total_transactions_bw_3m_and_4m >=1  ---  Any one transaction greater than 100 pesos
    and 
    tx_cnt_active_loans_end_4m = 0)
    or
    ((tx_tsa_balance_4m+tx_stash_balance_4m) >= 100   ---    Total balance at the end of 5 month is >= 100 pesos
    and 
    tx_cnt_active_loans_end_4m = 0)
),
mau5m as
(select customer_id, registration_date from prj-prod-dataplatform.worktable_data_analysis.customer_transaction_data
where 
    (tx_cnt_total_transactions_bw_4m_and_5m >=1                     ---
    and                                                             ---     Any one transaction greater than 100 pesos         
    tx_cnt_active_loans_end_5m = 0)                                 ---  
    or                                                              ---    Total balance at the end of 5 month is >= 100 pesos
    ((tx_tsa_balance_5m+tx_stash_balance_5m) >= 100                 ---
    and 
    tx_cnt_active_loans_end_5m = 0)
),
lmt as 
(select customerId, startApplyDateTime, approvedDateTime from  `risk_credit_mis.loan_master_table`)
select 
ctd.customer_id,
ctd.registration_date,
ctd.tx_first_product,
case when ctd.customer_id in (select customer_id from mau4m) then 1 else 0 end tx_mau4m_active,
case when ctd.customer_id in (select customer_id from mau5m) then 1 else 0 end tx_mau5m_active,

---The user is Active (as per MAU definition) as of 120th Day from the Onboarding date

case when ctd.tx_first_product in ('Individual Stash', 'Group Stash', 'Bills Pay', 'Unsecured Loan')  -- only Stash, billpay and Unsecured loans
   then case when ctd.customer_id in (select customer_id from mau4m) --- Active MAU customers
    then 0
    else 1 
end end tx_Churn_flag_4m,

---If the user is also Active (as per MAU definition) as of 150th Day from the Onboarding date then 0, otherwise 1

case when ctd.tx_first_product in ('Individual Stash', 'Group Stash', 'Bills Pay', 'Unsecured Loan') then 
   case when ctd.customer_id in (select customer_id from mau5m)
    then 0
    else 1
end end tx_Churn_flag_5m,

# ---The user is Active (as per MAU definition) as of 120th Day from the Onboarding date, AND did not submit any loan application to Tonik

case when ctd.tx_first_product in ('Individual Stash', 'Group Stash', 'Bills Pay') then    ---- should be stash or bill payment
    case when ctd.customer_id in (select customer_id from mau4m)                                                                --- User is active MAU as of 120 day from Onboarding date
    and (select count(customerId) from `risk_credit_mis.loan_master_table` where customerId = cast(ctd.customer_id as numeric)
                                                               and date(startApplyDateTime) between date(ctd.registration_date)
                                                               and date_add(date(ctd.registration_date), interval 120 day)) = 0
    then 1
    else 0
end end tx_loanApplicationflag_4m,

(select count(customerId) from `risk_credit_mis.loan_master_table` where customerId = cast(ctd.customer_id as numeric)
                                                               and date(startApplyDateTime) between date(ctd.registration_date) 
                                                               and date_add(date(ctd.registration_date), interval 120 day)) as  submittedappliction4m,
                                                               
---If the users submitted any Cash loan application within 5th month (next 30 days from the 120th day from onboarding date) at Tonik then 1, otherwise 0

case when ctd.tx_first_product in ('Individual Stash', 'Group Stash', 'Bills Pay') then
    case when ctd.customer_id in (select customer_id from mau5m)
    and(select count(customerId) from `risk_credit_mis.loan_master_table` where customerId = cast(ctd.customer_id as numeric)
                                                               and date(startApplyDateTime) between  date_add(ctd.registration_date, interval 120 day)
                                                               and date_add(ctd.registration_date, interval 150 day)) > 0
    then 1
    else 0
end end tx_loanApplicationflag_5m,

(select count(customerId) from `risk_credit_mis.loan_master_table` where customerId = cast(ctd.customer_id as numeric)
                                                               and date(startApplyDateTime) between  date_add(ctd.registration_date, interval 120 day)
                                                               and date_add(ctd.registration_date, interval 150 day)) as submittedappliction5m,
                                                               
---The user is Active (as per MAU definition) as of 120th Day from the Onboarding date, AND did not submit any loan application to Tonik within 120 days from onboarding date       
                                                        
case when ctd.tx_first_product in ('Individual Stash', 'Group Stash', 'Bills Pay') then
    case when ctd.customer_id in (select customer_id from mau5m)
    and(select count(customerId) from `risk_credit_mis.loan_master_table` where customerId = cast(ctd.customer_id as numeric)
                                                               and date(approvedDateTime) between  date_add(ctd.registration_date, interval 120 day)
                                                               and date_add(ctd.registration_date, interval 150 day)
                                                            #    and date(approvedDateTime) is not null 
                                                               and flagApproval = 1
                                                               and new_loan_type in ('Quick', 'Flex')) > 0
    then 1
    else 0
end end tx_loanApprovedflag_5m,
---The user is Active (as per MAU definition) as of 120th Day from the Onboarding date AND never NPL AND current DPD status <=FSPD30 as of 120th day from the Onboarding date
case when ctd.tx_first_product in ('Unsecured Loan') then
     case when ctd.customer_id in (select customer_id from mau4m)
     and (select count(customerId) from `risk_credit_mis.loan_master_table` where customerId = cast(ctd.customer_id as numeric)
                                                                            and Max_Ever_DPD >= 180
                                                                            and new_loan_type in ('Quick', 'Flex')) = 0
     and (select count(lmt.customerId) from prj-prod-dataplatform.risk_credit_mis.loan_bucket_flow_report_core lbfrc 
                   inner join `risk_credit_mis.loan_master_table` lmt on lmt.loanAccountNumber = lbfrc.loanAccountNumber
                   inner join prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data ldd on ldd.loanAccountNumber = lmt.loanAccountNumber
                   where lmt.customerId = cast(ctd.customer_id as numeric)
                   and lbfrc.bucketDate = date_add(ctd.registration_date, interval 120 day)
                   and ldd.min_inst_def30 in (1, 2)
                   and lmt.new_loan_type in ('Quick', 'Flex')
                  ) = 0
     then 0
     else 1
end end tx_FSPD30_4m,
(select count(customerId) from `risk_credit_mis.loan_master_table` where customerId = cast(ctd.customer_id as numeric)
                                                                            and Max_Ever_DPD >= 180
                                                                            and new_loan_type in ('Quick', 'Flex')) npl,
---If the user becomes FSPD30 within next 90 days from the observation date (120th day from the onboarding date) then 1; otherwise 0
case when ctd.tx_first_product in ('Unsecured Loan') then
     case when ctd.customer_id in (select customer_id from mau5m)
     and (select count(lmt.customerId) from prj-prod-dataplatform.risk_credit_mis.loan_bucket_flow_report_core lbfrc 
                   inner join `risk_credit_mis.loan_master_table` lmt on lmt.loanAccountNumber = lbfrc.loanAccountNumber
                   inner join prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data ldd on ldd.loanAccountNumber = lmt.loanAccountNumber
                   where lmt.customerId = cast(ctd.customer_id as numeric)
                   and lbfrc.bucketDate = date_add(ctd.registration_date, interval 150 day)
                   and ldd.min_inst_def30 in (1, 2)
                   and lmt.new_loan_type in ('Quick', 'Flex')
                  ) = 0
     then 0
     else 1
end end tx_FSPD30_5m,
(select count(lmt.customerId) from prj-prod-dataplatform.risk_credit_mis.loan_bucket_flow_report_core lbfrc 
                   inner join `risk_credit_mis.loan_master_table` lmt on lmt.loanAccountNumber = lbfrc.loanAccountNumber
                   inner join prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data ldd on ldd.loanAccountNumber = lmt.loanAccountNumber
                   where lmt.customerId = cast(ctd.customer_id as numeric)
                   and lbfrc.bucketDate = date_add(ctd.registration_date, interval 120 day)
                   and ldd.min_inst_def30 in (1, 2)
                   and lmt.new_loan_type in ('Quick', 'Flex')
                  ) fspd304m,
(select count(lmt.customerId) from prj-prod-dataplatform.risk_credit_mis.loan_bucket_flow_report_core lbfrc 
                   inner join `risk_credit_mis.loan_master_table` lmt on lmt.loanAccountNumber = lbfrc.loanAccountNumber
                   inner join prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data ldd on ldd.loanAccountNumber = lmt.loanAccountNumber
                   where lmt.customerId = cast(ctd.customer_id as numeric)
                   and lbfrc.bucketDate = date_add(ctd.registration_date, interval 150 day)
                   and ldd.min_inst_def30 in (1, 2)
                   and lmt.new_loan_type in ('Quick', 'Flex')
                  ) fspd305m
from prj-prod-dataplatform.worktable_data_analysis.customer_transaction_data ctd
"""

df = client.query(sq).to_dataframe(progress_bar_type = 'tqdm')



Job ID 1e8d6e26-dc24-4cc1-93db-763304babbf6 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|


In [3]:
df.columns

Index(['customer_id', 'registration_date', 'tx_first_product',
       'tx_mau4m_active', 'tx_mau5m_active', 'tx_Churn_flag_4m',
       'tx_Churn_flag_5m', 'tx_loanApplicationflag_4m',
       'submittedappliction4m', 'tx_loanApplicationflag_5m',
       'submittedappliction5m', 'tx_loanApprovedflag_5m', 'tx_FSPD30_4m',
       'npl', 'tx_FSPD30_5m', 'fspd304m', 'fspd305m'],
      dtype='object')

# Churn

In [4]:
# Define the list of values to filter
a = ['Group Stash', 'Individual Stash']

# Filter the DataFrame for rows where 'tx_first_product' is in the list 'a'
df_filtered = df[df['tx_first_product'].isin(a)]

In [5]:
df_filtered[df_filtered['tx_Churn_flag_4m']==0].groupby(['tx_first_product','tx_mau4m_active', 'tx_mau5m_active', 'tx_Churn_flag_4m','tx_Churn_flag_5m']).agg(customer_unq_cnt = ('customer_id', 'nunique')).reset_index()

Unnamed: 0,tx_first_product,tx_mau4m_active,tx_mau5m_active,tx_Churn_flag_4m,tx_Churn_flag_5m,customer_unq_cnt
0,Group Stash,1,0,0,1,64
1,Group Stash,1,1,0,0,269
2,Individual Stash,1,0,0,1,523
3,Individual Stash,1,1,0,0,1779


In [6]:
df_filtered[df_filtered['tx_Churn_flag_4m']==0].head()

Unnamed: 0,customer_id,registration_date,tx_first_product,tx_mau4m_active,tx_mau5m_active,tx_Churn_flag_4m,tx_Churn_flag_5m,tx_loanApplicationflag_4m,submittedappliction4m,tx_loanApplicationflag_5m,submittedappliction5m,tx_loanApprovedflag_5m,tx_FSPD30_4m,npl,tx_FSPD30_5m,fspd304m,fspd305m
729,2439246,2024-03-24,Individual Stash,1,1,0,0,1,0,0,0,0,,0,,0,0
985,1866060,2023-01-18,Group Stash,1,1,0,0,1,0,0,0,0,,0,,0,0
1110,2294744,2023-11-05,Individual Stash,1,1,0,0,1,0,0,0,0,,0,,0,0
1146,2101068,2023-06-20,Individual Stash,1,1,0,0,1,0,0,0,0,,0,,0,0
1453,2070153,2023-05-31,Individual Stash,1,0,0,1,1,0,0,0,0,,0,,0,0


# Loan Application Flag

In [7]:
# Define the list of values to filter
a = ['Group Stash', 'Individual Stash']

# Filter the DataFrame for rows where 'tx_first_product' is in the list 'a'
df_filtered = df[df['tx_first_product'].isin(a)]

In [8]:
df_filtered[df_filtered['tx_loanApplicationflag_4m']==1].groupby(['tx_first_product','tx_mau4m_active', 'tx_mau5m_active', 'tx_loanApplicationflag_4m','tx_loanApplicationflag_5m', 'tx_loanApprovedflag_5m']).agg(customer_unq_cnt = ('customer_id', 'nunique')).reset_index()

Unnamed: 0,tx_first_product,tx_mau4m_active,tx_mau5m_active,tx_loanApplicationflag_4m,tx_loanApplicationflag_5m,tx_loanApprovedflag_5m,customer_unq_cnt
0,Group Stash,1,0,1,0,0,49
1,Group Stash,1,1,1,0,0,251
2,Group Stash,1,1,1,1,0,4
3,Individual Stash,1,0,1,0,0,344
4,Individual Stash,1,1,1,0,0,1510
5,Individual Stash,1,1,1,1,0,15


# Loan Approval Flag

In [9]:
# Define the list of values to filter
a = ['Group Stash', 'Individual Stash']

# Filter the DataFrame for rows where 'tx_first_product' is in the list 'a'
df_filtered = df[df['tx_first_product'].isin(a)]

In [10]:
df_filtered[df_filtered['tx_loanApplicationflag_4m']==0].groupby(['tx_first_product','tx_mau4m_active', 'tx_mau5m_active', 'tx_loanApplicationflag_4m','tx_loanApplicationflag_5m', 'tx_loanApprovedflag_5m']).agg(customer_unq_cnt = ('customer_id', 'nunique')).reset_index()

Unnamed: 0,tx_first_product,tx_mau4m_active,tx_mau5m_active,tx_loanApplicationflag_4m,tx_loanApplicationflag_5m,tx_loanApprovedflag_5m,customer_unq_cnt
0,Group Stash,0,0,0,0,0,1495
1,Group Stash,0,1,0,0,0,98
2,Group Stash,0,1,0,1,1,1
3,Group Stash,1,0,0,0,0,15
4,Group Stash,1,1,0,0,0,10
5,Group Stash,1,1,0,1,0,4
6,Individual Stash,0,0,0,0,0,19975
7,Individual Stash,0,1,0,0,0,491
8,Individual Stash,0,1,0,1,0,44
9,Individual Stash,0,1,0,1,1,9


In [11]:
sq = """drop table if exists prj-prod-dataplatform.worktable_data_analysis.customer_360_flags;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=0b4115e6-a2a3-406b-91a0-a91c2b3117bc>

In [12]:
# Define the dataset and table name
dataset_id = 'worktable_data_analysis'
table_id = 'customer_360_flags'

# Define the schema based on the DataFrame structure
schema = [
    bigquery.SchemaField("customer_id", "STRING"),
    bigquery.SchemaField("registration_date", "TIMESTAMP"),  # Update to TIMESTAMP
    bigquery.SchemaField("tx_first_product", "STRING"),
    bigquery.SchemaField("tx_mau4m_active", "INT64"),
    bigquery.SchemaField("tx_mau5m_active", "INT64"),
    bigquery.SchemaField("tx_Churn_flag_4m", "INT64"),
    bigquery.SchemaField("tx_Churn_flag_5m", "INT64"),
    bigquery.SchemaField("tx_loanApplicationflag_4m", "INT64"),
    bigquery.SchemaField("submittedappliction4m", "INT64"),
    bigquery.SchemaField("tx_loanApplicationflag_5m", "INT64"),
    bigquery.SchemaField("submittedappliction5m", "INT64"),
    bigquery.SchemaField("tx_loanApprovedflag_5m", "INT64"),
    bigquery.SchemaField("tx_FSPD30_4m", "INT64"),
    bigquery.SchemaField("tx_FSPD30_5m", "INT64"),
    bigquery.SchemaField("fspd304m", "INT64"),
    bigquery.SchemaField("fspd305m", "INT64")
    
]

# Create the dataset reference
dataset_ref = client.dataset(dataset_id)

# Define the table reference
table_ref = dataset_ref.table(table_id)

# Configure the job to overwrite the table if it already exists
job_config = bigquery.LoadJobConfig(schema=schema)

# Load the DataFrame into BigQuery
job = client.load_table_from_dataframe(df, table_ref, job_config=job_config)

# Wait for the job to complete
job.result()

print(f"Table {table_id} created in dataset {dataset_id}.")

Table customer_360_flags created in dataset worktable_data_analysis.


In [13]:
sq = """select * from worktable_data_analysis.customer_360_flags;"""

dfd = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID 80d7b1d5-54a4-43c2-abdb-520a5474c8c3 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|


In [14]:
dfd.head()

Unnamed: 0,customer_id,registration_date,tx_first_product,tx_mau4m_active,tx_mau5m_active,tx_Churn_flag_4m,tx_Churn_flag_5m,tx_loanApplicationflag_4m,submittedappliction4m,tx_loanApplicationflag_5m,submittedappliction5m,tx_loanApprovedflag_5m,tx_FSPD30_4m,npl,tx_FSPD30_5m,fspd304m,fspd305m
0,2276842,2023-10-18 00:00:00+00:00,SIL-VAS,0,0,,,,1,,0,,,0,,0,0
1,2276972,2023-10-18 00:00:00+00:00,SIL-VAS,0,0,,,,1,,0,,,0,,0,0
2,2276918,2023-10-18 00:00:00+00:00,SIL-VAS,0,0,,,,1,,0,,,0,,0,0
3,2277352,2023-10-18 00:00:00+00:00,SIL-VAS,0,0,,,,1,,0,,,0,,0,0
4,2276681,2023-10-18 00:00:00+00:00,SIL-VAS,0,0,,,,1,,0,,,0,,0,0


In [15]:
dfd.shape

(473102, 17)

In [16]:
dfd[(dfd['tx_loanApprovedflag_5m']==1)&(dfd['tx_loanApplicationflag_4m']==0)&(dfd['tx_loanApplicationflag_5m']==0)].to_csv(r"C:\Users\DwaipayanChakroborti\OneDrive - Tonik Financial Pte Ltd\MyStuff\Biswa\Customer_360_Data_Prep\Data\check.csv")

In [17]:
dfd.to_csv(r"C:\Users\DwaipayanChakroborti\OneDrive - Tonik Financial Pte Ltd\MyStuff\Biswa\Customer_360_Data_Prep\Data\customer360flag20240916.csv", index = False)

: 