In [1]:
# %% [markdown]
# # Jupyter Notebook Loading Header
#
# This is a custom loading header for Jupyter Notebooks in Visual Studio Code.
# It includes common imports and settings to get you started quickly.

# %% [markdown]
## Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.cloud import bigquery
import os
path = r'C:\Users\DwaipayanChakroborti\AppData\Roaming\gcloud\legacy_credentials\dchakroborti@tonikbank.com\adc.json'
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = path
client = bigquery.Client(project='prj-prod-dataplatform')

# %% [markdown]
## Configure Settings
# Set options or configurations as needed
# Example: pd.set_option('display.max_columns', None)

# Read the customer id from Raj CSV file

In [2]:
rajdf = pd.read_csv(r"C:\Users\DwaipayanChakroborti\OneDrive - Tonik Financial Pte Ltd\MyStuff\Biswa\Customer_360_Data_Prep\customerid_onboarded_1stJan2024.csv")

In [4]:
a = tuple(rajdf['customer_id'])
a[0:5]

(2355677, 2355517, 2355698, 2355673, 2355268)

In [5]:
sq = f"""    
-- CREATE OR REPLACE TABLE `prj-prod-dataplatform.worktable_datachampions.deposit_only_users` AS 
-- (
  
### This is the base table that will be used to connect to other features ###

### Deposit Customers who are defined as:
    ### those who has a deposit account (time deposit, tsa, stash) and has a min balance of 100 by the end of the observation date
    ### those who did not apply for a loan after within the observation window from account creation

WITH ac_created AS 
(
  ## Base data of onboarded users
  SELECT DISTINCT 
  product,
  CASE 
    WHEN product = 'LOAN1.0' THEN 'Borrow'
    WHEN product = 'TSA1.0' THEN 'Save'
    WHEN product = 'TSAE1.0' THEN 'Explore'
    ELSE product END type,
  cust_id,
  user_id,
  device_id,
  created_dt,
  gender,
  mobile_no,
  FROM prj-prod-dataplatform.dl_customers_db_raw.tdbk_customer_mtb
  WHERE 1=1
  AND cast(cust_id as numeric) in {a}
  AND created_dt >= '2023-06-01'
  ORDER BY 4
)

, f_loan AS 
(
  ## First applied loan of a customer 
  SELECT DISTINCT customerId, new_loan_type, applicationStatus, disbursementdatetime, startApplydatetime
  FROM `risk_credit_mis.loan_master_table`
  WHERE 1=1
  AND customerId in {a}
  AND startApplyDatetime >= '2023-06-01'
  QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId ORDER BY startApplyDatetime ASC) =1
)


, deposit_balance AS 
(
  ## Gets the balance of a customer on their 90th day upon account opening
    ## For the 1st condition of the balance of a customer upon the observation date
  SELECT DISTINCT a.ofdateopened,a.ofcustomerid, a.ofstandardaccountid,account_type,balanceDateAsOf, clearedbalance,
  FROM `prj-prod-dataplatform.core_raw.customer_accounts` a
  JOIN `risk_mart.customer_balance` b ON a.ofcustomerid = b.client_id AND a.ofstandardaccountid = b.accountid
  WHERE 1=1
  AND cast(a.ofcustomerid as numeric) in {a}
  -- AND clearedbalance >= 100
  -- AND ofcustomerid IN ('1514439','1252865','1248952')
  QUALIFY ROW_NUMBER() OVER (PARTITION BY ofcustomerid, ofstandardaccountid ORDER BY balancedateasof ASC) = 90
)

, af_link AS
(
  ## To get the AF ID and Customer ID Link (using the first install of a customer)
  SELECT DISTINCT customer_user_id, appsflyer_id, media_source, partner, campaign
  FROM `appsflyer_raw.organic_in_app_events_report`
  WHERE 1=1
  AND customer_user_id IS NOT NULL
  QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_user_id ORDER BY install_time ASC) = 1
  
  UNION ALL
  
  SELECT DISTINCT customer_user_id, appsflyer_id, media_source, partner, campaign
  FROM `appsflyer_raw.in_app_events_report`
  WHERE 1=1
  AND customer_user_id IS NOT NULL
  QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_user_id ORDER BY install_time ASC) = 1
)


, demog_details AS 
(
  ## Demographic Details
  SELECT DISTINCT
  a.user_id,
  a.cust_Id,
  a.device_id,
  b.id,
  c.description source_of_funds,
  d.description employment_status,
  monthly_income,
  hm_postalcode,
  hm_barangay,
  hm_city,
  hm_province,
  CASE
    WHEN hm_province = 'KALINGA' THEN 'CAR – Cordillera Administrative Region'
    WHEN hm_province = 'OCCIDENTAL MINDORO' THEN 'MIMAROPA Region'
    WHEN hm_province = 'ORIENTAL MINDORO' THEN  'MIMAROPA Region'
    WHEN hm_province = 'ORODNIM' THEN 'MIMAROPA Region'
    WHEN hm_province = 'SAMAR (WESTERN SAMAR)' THEN 'Region VIII – Eastern Visayas'
    WHEN hm_province = 'ZAMBOANGA SIBUGAY' THEN 'Region IX – Zamboanga Peninsula'
    ELSE Region_name END Region_Name
  FROM prj-prod-dataplatform.dl_customers_db_raw.tdbk_customer_mtb a
  LEFT JOIN prj-prod-dataplatform.dl_dynamo_db_raw.tdbk_regfinancial_profile1 b ON a.device_id = b.deviceId
  LEFT JOIN prj-prod-dataplatform.dl_dynamo_db_raw.tdbk_source_of_funds_mtb c ON b.sourceOfFundKey = c.id  
  LEFT JOIN prj-prod-dataplatform.dl_dynamo_db_raw.tdbk_employment_status_mtb d ON b.employmentStatusKey = d.id
  LEFT JOIN prj-prod-dataplatform.dl_customers_db_raw.tdbk_cust_profile_mtb e ON a.user_id = e.user_id
  LEFT JOIN prj-prod-dataplatform.dl_customers_db_raw.tdbk_customer_add_mtb f ON a.user_id = f.user_id
  LEFT JOIN prj-prod-dataplatform.dap_ds_poweruser_playground.region_mappings h ON LOWER(f.hm_province) = LOWER(h.province)
  where cast(a.cust_Id as numeric) in {a}
)


## Base query used to connect to other features 
SELECT DISTINCT
  a.created_dt registration_date,
  -- startApplyDateTime,
  -- ofdateopened,
  -- balancedateasof,
  a.cust_id,
  a.user_id,
  a.product,
  a.type,
  -- account_type,
  -- ofstandardaccountid,
  -- clearedbalance, 
  a.gender,
  customer_age,
  Region_Name,
  hm_province,
  hm_city,
  hm_barangay,
  hm_postalcode,
  source_of_funds,
  employment_status,
  CAST(monthly_income AS FLOAT64)*12.5 self_declared_annual_income,
  -- add the salary scale annual income
  -- double check if i can switch networks given the same number

  mobile_no,
  IFNULL(h.network_group,g.network_group) network_group,
  CASE
    WHEN g.network_group = 'Globe' AND h.network IS NULL THEN 'Prepaid'
    WHEN g.network_group = 'Globe' AND h.network IS NOT NULL THEN 'Postpaid'
    WHEN g.network_group = 'Smart' AND (LEFT(mobile_no,5) = '63920' OR LEFT(mobile_no,5) = '63918') THEN 'Postpaid'
    WHEN g.network_group NOT IN ('Globe','Smart') THEN NULL
    ELSE 'Prepaid' END network,
CASE 
  ### social media ###
  -- anything from social
  WHEN LOWER(COALESCE(Media_Source,Partner,Campaign)) LIKE '%social%' THEN 'Social'
  ### direct ###
  -- fwb / referral
  WHEN COALESCE(Media_Source,Partner,Campaign) = 'af_app_invites' THEN 'FWB'
  WHEN (Media_Source = 'invalid_media_source_name' OR Media_Source IS NULL) AND LOWER(Campaign) LIKE '%refer%' THEN 'FWB'
  WHEN LOWER(COALESCE(Media_Source,Partner,Campaign)) LIKE '%refer%' THEN 'FWB'
  WHEN LOWER(COALESCE(Media_Source,Partner,Campaign)) LIKE '%moengage%' THEN 'FWB'
  -- apple search
  WHEN LOWER(COALESCE(Media_Source,Partner,Campaign)) LIKE '%apple search ads%' THEN 'AppleSearch'
  -- facebook / meta
  WHEN COALESCE(Media_Source,Partner,Campaign) LIKE '%Facebook Ads%' THEN 'Ads_Meta'
  WHEN COALESCE(Media_Source,Partner,Campaign) LIKE '%meta%' THEN 'Ads_Meta'
  WHEN COALESCE(Media_Source,Partner,Campaign) = 'facebook' THEN 'Ads_Meta'
  WHEN LOWER(COALESCE(Media_Source,Partner,Campaign)) = 'restricted' THEN 'Ads_Meta'
  -- google
  WHEN COALESCE(Media_Source,Partner,Campaign) LIKE '%google%' THEN 'Ads_Google'
    -- tiktok
  WHEN COALESCE(Media_Source,Partner,Campaign) like '%bytedanceglobal%' THEN 'TikTok_Paid'
  ### affiliates ###
  -- pokkt source
  WHEN LOWER(COALESCE(Media_Source,Partner,Campaign)) LIKE '%pokkt%' THEN 'Aff_Pokkt'
  WHEN COALESCE(Media_Source,Partner,Campaign) LIKE '%Tonik_CPA%' THEN 'Aff_Pokkt'
  WHEN COALESCE(Media_Source,Partner,Campaign) LIKE '%campaign_name%' THEN 'Aff_Pokkt'
  -- tyr ads
  WHEN LOWER(COALESCE(Media_Source,Partner,Campaign)) IN ('tyrads_int','tjzymob_int','tyrads','ta_tonik_aos_ph') THEN 'Aff_TyrAds'
  -- sales doubler
  WHEN LOWER(COALESCE(Media_Source,Partner,Campaign)) LIKE '%salesdoubler%' THEN 'Aff_SalesDoubler'
  
  -- imoney
  WHEN COALESCE(Media_Source,Partner,Campaign) LIKE '%imoney%' THEN 'Aff_iMoney'
  -- moneymax
  WHEN COALESCE(Media_Source,Partner,Campaign) LIKE '%moneymax%' THEN 'Aff_MoneyMax'
  -- jeff
  WHEN COALESCE(Media_Source,Partner,Campaign) LIKE '%jeff%' THEN 'Aff_Jeff'
  -- shareit
  WHEN COALESCE(Media_Source,Partner,Campaign) LIKE '%shareit%' THEN 'Aff_ShareIt'
  
  -- appnext
  WHEN COALESCE(Media_Source,Partner,Campaign) LIKE '%appnext%' THEN 'Aff_AppNext'
  -- mediadonuts / entravision
  WHEN LOWER(COALESCE(Media_Source,Partner,Campaign)) LIKE '%mediadonuts%' THEN 'Aff_Mediadonuts'
  WHEN LOWER(COALESCE(Media_Source,Partner,Campaign)) LIKE '%entravision%' THEN 'Aff_Mediadonuts'
  -- shopback
  WHEN LOWER(COALESCE(Media_Source,Partner,Campaign)) LIKE '%shopback_int%' THEN 'Aff_Shopback'
  -- deepsea
  WHEN LOWER(COALESCE(Media_Source,Partner,Campaign)) LIKE '%deepsea%' THEN 'Aff_DeepSea'
  -- avow
  WHEN COALESCE(Media_Source,Partner,Campaign) LIKE '%avow%' THEN 'Aff_AvowTech'
  -- avow
  WHEN COALESCE(Media_Source,Partner,Campaign) LIKE '%vivo%' THEN 'Aff_Vivo'
  -- 711
  WHEN COALESCE(Media_Source,Partner,Campaign) LIKE '%SevenEleven%' THEN 'SevenEleven'
  ## organic ##
  WHEN COALESCE(Media_Source,Partner,Campaign) IN ('af_banner') THEN 'af_Banner'
  WHEN COALESCE(Media_Source,Partner,Campaign) LIKE '%sendgrid%' THEN 'Organic'
  WHEN COALESCE(Media_Source,Partner,Campaign) IN ('Website') THEN 'Website'
  
  ## owned media ##
  WHEN media_source IN ('PRFlexLoans_WebAndroidPressRelease') THEN 'Owned Media'
  
  WHEN media_source IN ('invalid_media_source_name') THEN 'Invalid Media Source'
  WHEN media_source IN ('None') THEN 'Broken OneLink'
  WHEN media_source IS NULL AND partner IS NULL AND campaign IS NULL THEN 'Organic'
  ELSE COALESCE(media_source,partner,campaign) END as Source,
  media_source,
  partner,
  campaign,
  
FROM ac_created a
LEFT JOIN f_loan b ON a.cust_id  = CAST(b.customerId AS STRING)
LEFT JOIN deposit_balance c ON a.cust_id = c.ofcustomerid
JOIN af_link d ON a.cust_id = d.customer_user_id
JOIN prj-prod-dataplatform.dl_customers_db_derived.Tdbk_customer_mtb_age_derived e ON a.cust_id = e.cust_id
LEFT JOIN demog_details f ON a.cust_id = f.cust_id
LEFT JOIN `prj-prod-dataplatform.manual_source_extracts.mobile_carrier_mapping` g
  ON LEFT(RIGHT(mobile_no,LENGTH(mobile_no)-2),3) = CAST(g.number_prefix AS STRING)
LEFT JOIN `prj-prod-dataplatform.manual_source_extracts.mobile_carrier_mapping` h 
  ON LEFT(RIGHT(mobile_no,LENGTH(mobile_no)-2),4) = CAST(h.number_prefix AS STRING)
WHERE 1=1
## The deposit account of a customer upon the 90th day is greater than 100 
AND clearedbalance >= 100

## The customer did not initiate any loan process within 90 days from account creation
AND (DATE_DIFF(DATE(startApplyDateTime),DATE(created_dt),DAY)>=90 OR b.customerid IS NULL)
AND c.ofcustomerid IS NOT NULL
AND cast( a.cust_id as numeric) in {a}
"""

In [6]:
idf = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID 097b133f-c6cd-4918-8d01-2dd195e8817d successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|


In [7]:
idf.columns

Index(['registration_date', 'cust_id', 'user_id', 'product', 'type', 'gender',
       'customer_age', 'Region_Name', 'hm_province', 'hm_city', 'hm_barangay',
       'hm_postalcode', 'source_of_funds', 'employment_status',
       'self_declared_annual_income', 'mobile_no', 'network_group', 'network',
       'Source', 'media_source', 'partner', 'campaign'],
      dtype='object')