# Define Library

In [1]:

# %% [markdown]
# # Jupyter Notebook Loading Header
#
# This is a custom loading header for Jupyter Notebooks in Visual Studio Code.
# It includes common imports and settings to get you started quickly.
# %% [markdown]
## Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.cloud import bigquery
from google.cloud import storage
import os

import time
from datetime import datetime
import uuid
import joblib
import uuid

import gcsfs
import duckdb as dd
import time
from datetime import datetime


path = r'C:\Users\Dwaipayan\AppData\Roaming\gcloud\legacy_credentials\dchakroborti@tonikbank.com\adc.json'
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = path
client = bigquery.Client(project='prj-prod-dataplatform')
os.environ["GOOGLE_CLOUD_PROJECT"] = "prj-prod-dataplatform"
# %% [markdown]
## Configure Settings
# Set options or configurations as needed
pd.set_option('display.max_columns', None)
pd.set_option("Display.max_rows", 100)


# Constant

In [2]:
CURRENT_DATE = datetime.now().strftime("%Y%m%d")


# <div align="left" style="color:rgb(51, 250, 250);"> Functions </div>

## <div align="left" style="color:rgb(51, 250, 250);"> Save the data to google clound storage </div>

In [3]:
def save_df_to_gcs(df, bucket_name, destination_blob_name, file_format='csv'):
    """Saves a pandas DataFrame to Google Cloud Storage.

    Args:
        df: The pandas DataFrame to save.
        bucket_name: The name of the GCS bucket.
        destination_blob_name: The name of the blob to be created.
        file_format: The file format to save the DataFrame in ('csv' or 'parquet').
    """

    # Create a temporary file
    if file_format == 'csv':
        temp_file = 'temp.csv'
        df.to_csv(temp_file, index=False)
    elif file_format == 'parquet':
        temp_file = 'temp.parquet'
        df.to_parquet(temp_file, index=False)
    else:
        raise ValueError("Invalid file format. Please choose 'csv' or 'parquet'.")

    # Upload the file to GCS
    storage_client = storage.Client(project="prj-prod-dataplatform")

    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)

    blob.upload_from_filename(temp_file)

    # Remove the temporary file
    import os
    os.remove(temp_file)
    


## <div align="left" style="color:rgb(51, 250, 250);"> Read the Data from Google Cloud Storage </div>

In [4]:
def read_df_from_gcs(bucket_name, source_blob_name, file_format='csv'):
    """Reads a DataFrame from Google Cloud Storage.

    Args:
        bucket_name: The name of the GCS bucket.
        source_blob_name: The name of the blob to read.
        file_format: The file format to read ('csv' or 'parquet').

    Returns:
        pandas.DataFrame: The data loaded from the GCS file.
    """
    # Create a temporary file name
    temp_file = f'temp.{file_format}'
    
    try:
        # Initialize GCS client
        storage_client = storage.Client()
        bucket = storage_client.bucket(bucket_name)
        blob = bucket.blob(source_blob_name)

        # Download the file to a temporary location
        blob.download_to_filename(temp_file)

        # Read the file into a DataFrame
        if file_format == 'csv':
            df = pd.read_csv(temp_file, low_memory=False)
        elif file_format == 'parquet':
            df = pd.read_parquet(temp_file)
        else:
            raise ValueError("Invalid file format. Please choose 'csv' or 'parquet'.")

        return df

    finally:
        # Clean up the temporary file
        if os.path.exists(temp_file):
            os.remove(temp_file)

## <div align = "left" style="color:rgb(51, 250, 250);"> Data Quality Report </div>

In [5]:
def data_quality_report(df, target_col='ln_fspd30_flag'):
    # Initialize an empty list to store each row of data
    report_data = []
    # Iterate over each column in the DataFrame to compute metrics
    for col in df.columns:
        # Determine the data type of the column
        data_type = df[col].dtype
       
        # Calculate the number of missing values in the column
        missing_values = df[col].isnull().sum()
       
        # Calculate the percentage of missing values relative to the total number of rows
        missing_percentage = (missing_values / len(df)) * 100
       
        # Calculate the number of unique values in the column
        unique_values = df[col].nunique()
       
        # Calculate the percentage of non-missing values
        non_missing_percentage = ((len(df) - missing_values) / len(df)) * 100
       
        # Check if the column is numeric to compute additional metrics
        if pd.api.types.is_numeric_dtype(df[col]):
            # Compute minimum, maximum, mean, median, mode, mode percentage, standard deviation, and quantiles
            min_value = df[col].min()
            max_value = df[col].max()
            mean_value = df[col].mean()
            median_value = df[col].median()
            mode_value = df[col].mode().iloc[0] if not df[col].mode().empty else None
            mode_percentage = (df[col] == mode_value).sum() / len(df) * 100 if mode_value is not None else None
            std_dev = df[col].std()
            quantile_25 = df[col].quantile(0.25)
            quantile_50 = df[col].quantile(0.50)  # Same as median
            quantile_75 = df[col].quantile(0.75)
            
            # Calculate the Interquartile Range (IQR)
            iqr = quantile_75 - quantile_25
            
            # Calculate Skewness and Kurtosis
            skewness = df[col].skew()
            kurtosis = df[col].kurt()
            
            # Calculate Coefficient of Variation (CV) - standardized measure of dispersion
            cv = (std_dev / mean_value) * 100 if mean_value != 0 else None
            
            # Calculate correlation with target variable if target exists in dataframe
            if target_col in df.columns and col != target_col and pd.api.types.is_numeric_dtype(df[target_col]):
                # Calculate correlation only using rows where both columns have non-null values
                correlation = df[[col, target_col]].dropna().corr().iloc[0, 1]
            else:
                correlation = None
        else:
            # Assign None for non-numeric columns where appropriate
            min_value = None
            max_value = None
            mean_value = None
            median_value = None
            mode_value = df[col].mode().iloc[0] if not df[col].mode().empty else None
            mode_percentage = (df[col] == mode_value).sum() / len(df) * 100 if mode_value is not None else None
            std_dev = None
            quantile_25 = None
            quantile_50 = None
            quantile_75 = None
            iqr = None
            skewness = None
            kurtosis = None
            cv = None
            correlation = None
       
        # Append the computed metrics for the current column to the list
        report_data.append({
            'Column': col,
            'Data Type': data_type,
            'Missing Values': missing_values,
            'Missing Percentage': missing_percentage,
            'Unique Values': unique_values,
            'Min': min_value,
            'Max': max_value,
            'Mean': mean_value,
            'Median': median_value,
            'Mode': mode_value,
            'Mode Percentage': mode_percentage,
            'Std Dev': std_dev,
            'Non-missing Percentage': non_missing_percentage,
            '25% Quantile': quantile_25,
            '50% Quantile': quantile_50,
            '75% Quantile': quantile_75,
            'IQR': iqr,
            'Skewness': skewness,
            'Kurtosis': kurtosis,
            'CV (%)': cv,
            f'Correlation with {target_col}': correlation
        })
    # Create the DataFrame from the list of dictionaries
    report = pd.DataFrame(report_data)
   
    # Return the complete data quality report DataFrame
    return report

# <div align = "left" style="color:rgb(51,250,250);"> Upload pickle file to Google Cloud Storage Bucke </div>

In [6]:
def upload_to_gcs(bucket_name, source_file_path, destination_blob_name):
    """Uploads a file to Google Cloud Storage"""
    client = storage.Client()
    bucket = client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)
    
    blob.upload_from_filename(source_file_path)
    print(f"File {source_file_path} uploaded to {bucket_name}/{destination_blob_name}")

In [7]:
import pickle
import io
from google.cloud import storage
def save_pickle_to_gcs(data, bucket_name, destination_blob_name):
    """
    Save any Python object as a pickle file to Google Cloud Storage
    
    Args:
        data: The Python object to pickle (DataFrame, dict, list, etc.)
        bucket_name: Name of the GCS bucket
        destination_blob_name: Path/filename in the bucket
    """
    # Initialize the GCS client
    client = storage.Client()
    bucket = client.bucket(bucket_name)
    blob = bucket.blob(destination_blob_name)
    
    # Serialize the data to pickle format in memory
    pickle_buffer = io.BytesIO()
    pickle.dump(data, pickle_buffer)
    pickle_buffer.seek(0)
    
    # Upload the pickle data to GCS
    blob.upload_from_file(pickle_buffer, content_type='application/octet-stream')
    print(f"Pickle file uploaded to gs://{bucket_name}/{destination_blob_name}")

# Table

In [None]:
schema1 = 'risk_mart'
schema2 = 'worktable_data_analysis'

OBDATE = '2025-09-30'
SDATE = '2023-01-01'
EDATE = '2025-09-30'

formatted_date = datetime.strptime(SDATE, '%Y-%m-%d').strftime('%Y%m%d')

print(formatted_date)  # Output: 20230101

al = f'applied_loans_20230101_{datetime.strptime(OBDATE, '%Y-%m-%d').strftime('%Y%m%d')}'
altrans = f'applied_loans_20210701_{datetime.strptime(OBDATE, '%Y-%m-%d').strftime('%Y%m%d')}_trans'
nal = f'tsa_onboarded_but_never_applied_loan_20230101_{datetime.strptime(OBDATE, '%Y-%m-%d').strftime('%Y%m%d')}'

bscoresnapshotcustomerdata = f'b_score_applied_loans_customer_information_data'
bscoresnapshotcustomertransactiondata = f'b_score_applied_loans_customer_transaction_data'
bscoresnapshotcustomerevent = f'b_score_applied_loans_customer_event_data'
bscoresnapshotcontactability = f'b_score_applied_loans_contactability'
bscoresnapshotcombineddata = f'b_score_model_applied_loans_{datetime.strptime(OBDATE, '%Y-%m-%d').strftime('%Y%m%d')}'


print(al)
print(altrans)
print(nal)
print(bscoresnapshotcustomerdata)
print(bscoresnapshotcustomertransactiondata)
print(bscoresnapshotcustomerevent)
print(bscoresnapshotcontactability)
print(bscoresnapshotcombineddata)

20230101
applied_loans_20230101_20250930
applied_loans_20210701_20250930_trans
tsa_onboarded_but_never_applied_loan_20230101_20250930
b_score_applied_loans_customer_information_data
b_score_applied_loans_customer_transaction_data
b_score_applied_loans_customer_event_data
b_score_applied_loans_contactability
b_score_model_applied_loans_20250930


# Tab1

In [9]:
sq = f"""
CREATE OR REPLACE TABLE {schema2}.{bscoresnapshotcustomerdata} as 
with 
cust_min_loan_disb_time as
(
  SELECT customerId,
  MIN(ln_appln_submit_datetime) AS min_loan_appln_time_disbursed_loan
  from {schema1}.{altrans}
  where ln_disb_flag = 1 --and ln_loan_type = 'Quick'
  group by 1
),

eligible_customers as (
  SELECT input.* except(ln_appln_submit_datetime),
ln_appln_submit_datetime as ln_loan_appln_time,
  FROM {schema1}.{altrans} input
  JOIN cust_min_loan_disb_time ON input.customerid = cust_min_loan_disb_time.customerId and input.ln_appln_submit_datetime > min_loan_appln_time_disbursed_loan
  WHERE ln_appln_submit_datetime >= '2023-01-01'
    
),
first_applied_loan_data as (
  SELECT applied_loans.customerId,
  ln_loan_appln_time,
  coalesce(termsAndConditionsSubmitDateTime,if (new_loan_type ='Flex-up',startApplyDateTime,termsAndConditionsSubmitDateTime)) AS first_applied_loan_appln_time,
  new_loan_type AS first_applied_loan_type,
  ln_chosen_tenor as first_applied_loan_tenor,
  ln_chosen_principal as first_applied_loan_amount,
  case when applied_loans.loantype='BNPL' and store_type =1 then 'Appliance'
    when applied_loans.loantype='BNPL' and store_type =2 then 'Mobile' 
    when applied_loans.loantype='BNPL' and store_type =3 then 'Mall' 
    when applied_loans.loantype='BNPL' and store_type not in (1,2,3) then store_tagging
    else 'not applicable' end as first_applied_product_type,
  from `risk_credit_mis.loan_master_table`  applied_loans
JOIN eligible_customers
ON applied_loans.customerid = eligible_customers.customerid and  coalesce(termsAndConditionsSubmitDateTime,if (new_loan_type ='Flex-up',startApplyDateTime,termsAndConditionsSubmitDateTime)) < ln_loan_appln_time
left join(SELECT DISTINCT mer_refferal_code, mer_name mer_name,store_type,store_tagging FROM `dl_loans_db_raw.tdbk_merchant_refferal_mtb`
  left join worktable_datachampions.TARGET_SPLIT P on P.STORE_NAME = mer_name
qualify row_number() over(partition by mer_refferal_code order by  created_dt desc)=1) sil_category on applied_loans.purpleKey=sil_category.mer_refferal_code
  QUALIFY ROW_NUMBER() OVER(PARTITION BY customerId,ln_loan_appln_time order by  coalesce(termsAndConditionsSubmitDateTime,if (new_loan_type ='Flex-up',startApplyDateTime,termsAndConditionsSubmitDateTime)) ASC) = 1
),
Reject_flag_data_new as (SELECT digitalLoanAccountId,applicationStatus,
case when  applicationStatus IN ('EXPIRED', 'EXEMPT') and prev_applicationStatus='REJECT' THEN 1 when applicationStatus ='REJECT' THEN 1 else 0 end reject_flag,
case when  applicationStatus IN ('ACCEPT', 'CANCELLED','EXEMPT','EXPIRED','REJECT') and prev_applicationStatus='APPROVED' THEN 1 when applicationStatus ='APPROVED' THEN 1 else 0 end approved_flag

 FROM (SELECT digitalLoanAccountId, applicationStatus, created_dt, LEAD(applicationStatus) OVER(PARTITION BY digitalLoanAccountId order by created_dt desc, statusTraceId desc) AS  prev_applicationStatus,LEAD(created_dt) OVER(PARTITION BY digitalLoanAccountId order by created_dt desc, statusTraceId desc) AS prev_created_dt,  row_number() over (partition by digitalLoanAccountId order by created_dt desc, statusTraceId desc ) rn  from dl_loans_db_raw.tdbk_status_trace ) where rn =1)
 ,
last_applied_loan_data as (
  SELECT input_customers.customerId,
  input_customers.ln_loan_appln_time ln_loan_appln_time,
  coalesce(termsAndConditionsSubmitDateTime,if (new_loan_type ='Flex-up',startApplyDateTime,termsAndConditionsSubmitDateTime)) AS last_applied_loan_appln_time,
  CASE WHEN lower(osversion_v2) like 'ios%' THEN 'iOS' ElSE 'Android' END  as last_applied_os_type,
  new_loan_type AS last_applied_loan_type,
  loanRequestTenure as last_applied_loan_tenor,
  loanRequestAmount as last_applied_loan_amount,
  lmt.digitalLoanAccountId as last_applied_crif_id,
  case when lmt.loantype='BNPL' and store_type =1 then 'Appliance'
    when lmt.loantype='BNPL' and store_type =2 then 'Mobile' 
    when lmt.loantype='BNPL' and store_type =3 then 'Mall' 
    when lmt.loantype='BNPL' and store_type not in (1,2,3) then store_tagging
    else 'not applicable' end as last_applied_product_type,
  CASE WHEN reject_flag = 1 or lmt.applicationStatus in ('REJECT','EXPIRED','RESET','EXEMPT','CANCELLED')  THEN 'REJECT'
  WHEN approved_flag = 1 or lmt.applicationStatus in ('APPROVED','COMPLETED','ACTIVATED') THEN 'APPROVED'
  ELSE 'REJECT' END AS last_applied_loan_decision,
  COALESCE(eligible_customers.cic_called_flag,0) as last_applied_cic_called_flag,
  COALESCE(eligible_customers.cic_hit_flag,0) as last_applied_cic_hit_flag,
  CASE WHEN  lmt.new_loan_type LIKE 'SIL%' THEN eligible_customers.s_credo_score
  WHEN lmt.new_loan_type in ('Quick','Flex','Flex-up','Big Loan','ACL TSA') THEN eligible_customers.c_credo_score
  END AS last_applied_credo_score,
  lmt.credolabRefNumber as last_applied_credo_ref_no,
COALESCE(eligible_customers.ln_self_dec_income) ln_self_dec_income,
COALESCE(eligible_customers.ln_marital_status,if(maritalStatus='Live-in Partner','With a Live-in Partner',maritalStatus)) ln_marital_status,
COALESCE(eligible_customers.ln_education_level) ln_education_level,
eligible_customers.ln_nature_of_work_new,
eligible_customers.onb_email_verified_flag,
eligible_customers.onb_place_of_birth,
eligible_customers.onb_doc_type,
eligible_customers.onb_country,
eligible_customers.onb_province,
eligible_customers.onb_city,
eligible_customers.onb_barangay,
eligible_customers.onb_postalcode,
eligible_customers.onb_latitude,
eligible_customers.onb_longitude,
eligible_customers.onb_osversion,
eligible_customers.onb_kyc_status,
eligible_customers.onb_kyc_status_upgrade_datetime,
COALESCE(eligible_customers.ln_osversion,lmt.osversion_v2) ln_osversion,
if(lower(eligible_customers.ln_osversion) like 'ios%','Apple', eligible_customers.ln_brand) ln_brand,
--eligible_customers.ln_brand,
eligible_customers.ln_cnt_dependents,
eligible_customers.ln_source_funds_new,
eligible_customers.ln_employment_type_new,
eligible_customers.ln_industry_new,
eligible_customers.ln_company_name,
eligible_customers.ln_salary_scaled_income,
eligible_customers.ln_vas_opted_flag,
eligible_customers.ln_age,
eligible_customers.ln_mobile_no,
eligible_customers.ln_alt_mobile_no,
eligible_customers.ln_province,
eligible_customers.ln_city,
eligible_customers.ln_barangay,
eligible_customers.ln_latitude,
eligible_customers.ln_longitude,
eligible_customers.ln_doc_type,
eligible_customers.ln_ref1_type,
eligible_customers.onb_first_name,
eligible_customers.onb_middle_name,
eligible_customers.onb_last_name,
eligible_customers.onb_age,
eligible_customers.onb_gender,
eligible_customers.onb_mobile_no,
eligible_customers.onb_email,
eligible_customers.ln_loan_applied_flag,
eligible_customers.ln_facta_flag,
eligible_customers.ln_dl_rule_reject_flag,
eligible_customers.ln_taran_rule_reject_flag,
eligible_customers.ln_taran_scorecard_reject_flag,
eligible_customers.ln_cdd_reject_flag,
eligible_customers.ln_marked_underwriter_check_flag,
eligible_customers.ln_underwriting_reject_flag,
eligible_customers.ln_approved_not_disb_flag,
eligible_customers.ln_vas_used_flag,
if(lower(eligible_customers.ln_osversion) like 'ios%' ,'iOS','Android') ln_os_type,
--eligible_customers.ln_os_type,
eligible_customers.ln_address,
eligible_customers.ln_postal_code,
eligible_customers.ln_doc_number,
eligible_customers.ln_source_funds,
eligible_customers.ln_employment_type,
eligible_customers.ln_nature_of_work,
eligible_customers.ln_industry,
eligible_customers.ln_ref2_type,
eligible_customers.onb_self_dec_income,
eligible_customers.onb_company_name,
from `risk_credit_mis.loan_master_table` lmt
JOIN eligible_customers input_customers on input_customers.customerId = lmt.customerId and coalesce(termsAndConditionsSubmitDateTime,if (new_loan_type ='Flex-up',startApplyDateTime,termsAndConditionsSubmitDateTime)) < input_customers.ln_loan_appln_time
LEFT JOIN `{schema1}.{altrans}` eligible_customers ON lmt.digitalLoanAccountId = eligible_customers.digitalLoanAccountId 
JOIN Reject_flag_data_new on Reject_flag_data_new.digitalLoanAccountId = lmt.digitalLoanAccountId
left join(SELECT DISTINCT mer_refferal_code, mer_name mer_name,store_type,store_tagging FROM `dl_loans_db_raw.tdbk_merchant_refferal_mtb`
left join worktable_datachampions.TARGET_SPLIT P on P.STORE_NAME = mer_name
qualify row_number() over(partition by mer_refferal_code order by  created_dt desc)=1) sil_category on lmt.purpleKey=sil_category.mer_refferal_code
  QUALIFY ROW_NUMBER() OVER(PARTITION BY customerId,input_customers.ln_loan_appln_time order by  coalesce(termsAndConditionsSubmitDateTime,if (new_loan_type ='Flex-up',startApplyDateTime,termsAndConditionsSubmitDateTime)) DESC) = 1
),
first_disb_loan_data as (
  SELECT eligible_customers.customerId,
  ln_loan_appln_time,
  coalesce(termsAndConditionsSubmitDateTime,if (new_loan_type ='Flex-up',startApplyDateTime,termsAndConditionsSubmitDateTime)) AS first_disb_loan_appln_time,
  disbursementDateTime as first_disb_loan_disb_time,
  new_loan_type AS first_disb_loan_type,
  loanRequestTenure as first_disb_loan_tenor,
  loanRequestAmount as first_disb_loan_amount,
 case when applied_loans.loantype='BNPL' and store_type =1 then 'Appliance'
    when applied_loans.loantype='BNPL' and store_type =2 then 'Mobile' 
    when applied_loans.loantype='BNPL' and store_type =3 then 'Mall' 
    when applied_loans.loantype='BNPL' and store_type not in (1,2,3) then store_tagging
    else 'not applicable' end first_disb_product_type
   from `risk_credit_mis.loan_master_table`  applied_loans
JOIN eligible_customers
ON applied_loans.customerid = eligible_customers.customerid and  coalesce(termsAndConditionsSubmitDateTime,if (new_loan_type ='Flex-up',startApplyDateTime,termsAndConditionsSubmitDateTime)) < ln_loan_appln_time
left join(SELECT DISTINCT mer_refferal_code, mer_name mer_name,store_type,store_tagging FROM `dl_loans_db_raw.tdbk_merchant_refferal_mtb`
  left join worktable_datachampions.TARGET_SPLIT P on P.STORE_NAME = mer_name
qualify row_number() over(partition by mer_refferal_code order by  created_dt desc)=1) sil_category on applied_loans.purpleKey=sil_category.mer_refferal_code
WHERE applied_loans.flagDisbursement = 1
  QUALIFY ROW_NUMBER() OVER(PARTITION BY customerId,ln_loan_appln_time order by  coalesce(termsAndConditionsSubmitDateTime,if (new_loan_type ='Flex-up',startApplyDateTime,termsAndConditionsSubmitDateTime)) ASC) = 1
),
last_disb_loan_data as (
  SELECT eligible_customers.customerId,
  ln_loan_appln_time,
   coalesce(termsAndConditionsSubmitDateTime,if (new_loan_type ='Flex-up',startApplyDateTime,termsAndConditionsSubmitDateTime))  AS last_disb_loan_appln_time,
  disbursementDateTime as last_disb_loan_disb_time,
  new_loan_type AS last_disb_loan_type,
  loanRequestTenure as last_disb_loan_tenor,
  loanRequestAmount as last_disb_loan_amount,
  applied_loans.loanaccountnumber as prev_loanAccountNumber,
  applied_loans.digitalLoanAccountId as last_disb_crif_id,
 case when applied_loans.loantype='BNPL' and store_type =1 then 'Appliance'
    when applied_loans.loantype='BNPL' and store_type =2 then 'Mobile' 
    when applied_loans.loantype='BNPL' and store_type =3 then 'Mall' 
    when applied_loans.loantype='BNPL' and store_type not in (1,2,3) then store_tagging
    else 'not applicable' end as last_disb_product_type
  from `risk_credit_mis.loan_master_table`  applied_loans
JOIN eligible_customers
ON applied_loans.customerid = eligible_customers.customerid and  coalesce(termsAndConditionsSubmitDateTime,if (new_loan_type ='Flex-up',startApplyDateTime,termsAndConditionsSubmitDateTime)) < ln_loan_appln_time
left join(SELECT DISTINCT mer_refferal_code, mer_name mer_name,store_type,store_tagging FROM `dl_loans_db_raw.tdbk_merchant_refferal_mtb`
  left join worktable_datachampions.TARGET_SPLIT P on P.STORE_NAME = mer_name
qualify row_number() over(partition by mer_refferal_code order by  created_dt desc)=1) sil_category on applied_loans.purpleKey=sil_category.mer_refferal_code
WHERE applied_loans.flagDisbursement = 1
  QUALIFY ROW_NUMBER() OVER(PARTITION BY customerId,ln_loan_appln_time order by coalesce(termsAndConditionsSubmitDateTime,if (new_loan_type ='Flex-up',startApplyDateTime,termsAndConditionsSubmitDateTime)) DESC) = 1
)
SELECT 
 cast(eligible_customers.customerId as string) customerid,
    onb_tsa_onboarding_datetime,
    credo_inquiry_date,
    DATE_DIFF(eligible_customers.ln_loan_appln_time,DATE(onb_tsa_onboarding_datetime),DAY) dob_observation_date,
    DATE_DIFF(DATE(onb_tsa_onboarding_datetime),credo_inquiry_date,DAY) days_since_credo_call_onb,
    DATE_DIFF(eligible_customers.ln_loan_appln_time,credo_inquiry_date,DAY) days_since_credo_call_loan_application,
    ln_final_approved_flag,
    ln_chosen_principal,
    ln_chosen_tenor,
    eligible_customers.loanaccountnumber,
    eligible_customers.ln_loan_appln_time,
    eligible_customers.digitalLoanAccountId,
    ln_loan_type,
    ln_user_type,
    ln_prod_type,
    ln_disb_dtime,
    ln_disb_flag,
    ln_approved_tenor,
    ln_approved_principal,
    ln_mature_fspd30_flag,
    ln_fspd30_flag,
    cust_status_flag,
    cust_status_close_date,
    first_applied_loan_data.* EXCEPT(customerid,ln_loan_appln_time),
    first_disb_loan_data.* EXCEPT(customerid,ln_loan_appln_time),
    last_applied_loan_data.* except(customerid,ln_loan_appln_time),
    last_disb_loan_data.* except(customerid,ln_loan_appln_time),
    /*first_applied_loan_appln_time
    first_applied_loan_appln_time,
    first_applied_loan_type,
    first_applied_loan_tenor,
    first_applied_loan_amount,
    first_applied_product_type,
    first_disb_loan_appln_time,
    first_disb_loan_disb_time,
    first_disb_loan_type,
    first_disb_loan_tenor,
    first_disb_loan_amount,
    first_disb_product_type,
    last_applied_loan_appln_time,
    last_applied_loan_type,
    last_applied_loan_tenor,
    last_applied_loan_amount,
    last_applied_product_type,
    last_applied_crif_id,
    last_disb_loan_appln_time,
    last_disb_loan_disb_time,
    last_disb_loan_type,
    last_disb_loan_tenor,
    last_disb_loan_amount,
    last_disb_crif_id,
    last_disb_product_type,*/
    ln_mature_fpd30_flag,
    ln_fpd30_flag,
    --repeat_loan_type,
    --prev_loanAccountNumber
FROM eligible_customers
LEFT JOIN first_applied_loan_data ON first_applied_loan_data.customerid = eligible_customers.customerid and first_applied_loan_data.ln_loan_appln_time = eligible_customers.ln_loan_appln_time
LEFT JOIN first_disb_loan_data ON first_disb_loan_data.customerid = eligible_customers.customerid and first_disb_loan_data.ln_loan_appln_time = eligible_customers.ln_loan_appln_time
LEFT JOIN last_applied_loan_data ON last_applied_loan_data.customerid = eligible_customers.customerid and last_applied_loan_data.ln_loan_appln_time = eligible_customers.ln_loan_appln_time
LEFT JOIN last_disb_loan_data ON last_disb_loan_data.customerid = eligible_customers.customerid and last_disb_loan_data.ln_loan_appln_time = eligible_customers.ln_loan_appln_time
--left join `risk_credit_mis.loan_level_master_table` loan_level on loan_level.digitalLoanAccountId=eligible_customers.digitalLoanAccountId
;
"""

job = client.query(sq)
job.result()  # Wait for the job to complete.
time.sleep(5) # Delays for 30 seconds
print(f'Table {schema2}.{bscoresnapshotcustomerdata} created successfully')

Table worktable_data_analysis.b_score_applied_loans_customer_information_data created successfully


# Transaction

In [10]:
sq = f"""
CREATE OR REPLACE TABLE `{schema2}.{bscoresnapshotcustomertransactiondata}` as
with input_customers as(
select * from {schema2}.{bscoresnapshotcustomerdata}
),
cust_onboarding_acc_data as (
    SELECT
    DATE(opendate,'Asia/Manila') OFDATEOPENED,
    DATE(closuredate,'Asia/Manila') ofdateclosed,
    closed OFISCLOSED,
    DATE(c.created_dt) registration_date,
    c.created_dt as onboarding_date,
    DATETIME(reccreatedon,'Asia/Manila') reccreatedon,
    cust_id,
    accountid,
    productid,
    accountdescription as account_type,
    clearedbalance,
    ln_loan_appln_time
    FROM `dl_customers_db_raw.tdbk_customer_mtb` c    
    JOIN `finastra_raw.account` b ON c.cust_id = b.ubcustomercode
    JOIN input_customers on c.cust_id = CAST(input_customers.customerid AS STRING)
),
main_transaction_data AS 
(
     SELECT 
    transaction_date,
    OFDATEOPENED,
    OFISCLOSED,
    registration_date,
    transaction_id,
    b.cust_id customer_id,
    a.accountid,
    productid,
    b.account_type,
    transaction_code,
    a.status,
    channel,
    credit_debit_indicator,
    inter_exter_flag,
    trx_amount,
    core_narration,
    input_customers.ln_loan_appln_time,
    transaction_datetime,
    -- customer_transactions to get the transactions
    FROM (SELECT * FROM cust_onboarding_acc_data WHERE account_type = 'Tonik Account') b
    JOIN input_customers on b.cust_id = CAST(input_customers.customerid AS STRING)
    LEFT JOIN `risk_mart.customer_transactions` a ON a.accountid = b.accountid
    and a.transaction_date < input_customers.ln_loan_appln_time and a.status = 'Success'
    --and a.transaction_datetime < input_customers.ln_loan_appln_time

    WHERE 1=1
   
),


#### Net Cash In ####
  -- 1. Outside Tonik to TSA
  -- 2. Other Tonik user to Own Tonik Account


net_cash_in AS 
(
  ## 1. Outside Tonik to TSA
  SELECT
    transaction_date,
    transaction_datetime,
    OFDATEOPENED,
    OFISCLOSED,
    registration_date,
    transaction_id,
    customer_id,
    accountid,
    account_type,
    status,
    channel,
    credit_debit_indicator,
    inter_exter_flag,
    trx_amount,
    core_narration,
    'Net Cash In' main_transaction_type,
    'Outside Tonik to TSA' sub_transaction_type,
    ln_loan_appln_time
  FROM main_transaction_data
  WHERE 1=1
  -- main conditions: should be a successful transaciton and credit and all coming from Tonik Account
  AND credit_debit_indicator = 'CREDIT'
  AND account_type = 'Tonik Account' and LOWER(core_narration) NOT LIKE '%blocking%' and transaction_code not like 'A0%'
  AND transaction_code IN ('N01','IP2','XE2','00T','21C','P01')
  -- 1. Outside Tonik to TSA conditions (all cash in)
  AND inter_exter_flag = 'Outside Tonik'

  UNION ALL

  ## 2. Other Tonik user to Own Tonik Account
  SELECT
    transaction_date,
    transaction_datetime,
    OFDATEOPENED,
    OFISCLOSED,
    registration_date,
    transaction_id,
    customer_id,
    accountid,
    account_type,
    status,
    channel,
    credit_debit_indicator,
    inter_exter_flag,
    trx_amount,
    core_narration,
    'Net Cash In' main_transaction_type,
    'Other Tonik Users to Town Tonik Account' sub_transaction_type,
    ln_loan_appln_time
  FROM main_transaction_data
  WHERE 1=1
  -- main conditions: should be a successful transaciton and credit and all coming from Tonik Account
  AND credit_debit_indicator = 'CREDIT'
  AND account_type = 'Tonik Account' and LOWER(core_narration) NOT LIKE '%blocking%' and transaction_code not like 'A0%'
  AND transaction_code IN ('N01','IP2','XE2','00T','21C','P01')

  -- 2. Other Tonik user to Own Tonik Account
  AND inter_exter_flag = 'Inside Tonik'
  AND core_narration LIKE '%Receive money from other Tonik Account%'
  -- AND LEFT(core_narration,STRPOS(core_narration, ",")-1) = 'Receive money from other Tonik Account'
)

#### Net Cash Out ####
-- 1. Bills Pay
-- 2. Card Transactions
-- 3. Own TSA to other Tonik Users
-- 4. TSA to Outside Tonik

, net_cash_out AS 
(

 ## 1. Bills Pay
  SELECT 
    transaction_date,
    transaction_datetime,
    OFDATEOPENED,
    OFISCLOSED,
    registration_date,
    transaction_id,
    customer_id,
    accountid,
    account_type,
    status,
    channel,
    credit_debit_indicator,
    inter_exter_flag,
    trx_amount,
    core_narration,
    'Net Cash Out' main_transaction_type,
    'Bills Pay' sub_transaction_type,
    ln_loan_appln_time
  FROM main_transaction_data
  WHERE 1=1
  -- main conditions: should be a successful transaciton and debit
  AND credit_debit_indicator = 'DEBIT'
  AND LOWER(core_narration) NOT LIKE '%blocking%'

  -- 1. Bills Pay
  AND channel = 'Billspay'


  UNION ALL

  ## 2. Card Transactions (Cash Out)
  SELECT
    a.transaction_date,
    transaction_datetime,
    a.OFDATEOPENED,
    a.OFISCLOSED,
    a.registration_date,
    a.transaction_id,
    a.customer_id,
    a.accountid,
    a.account_type,
    a.status,
    a.channel,
    a.credit_debit_indicator,
    a.inter_exter_flag,
    a.trx_amount,
    a.core_narration,
    'Net Cash Out' main_transaction_type,
    'Card Transactions (Cash Out)' sub_transaction_type,
    ln_loan_appln_time
  FROM main_transaction_data a
  -- 2. Card Transactions (Cash Out) -- using the table made above
  WHERE 1=1
  -- main conditions: should be a successful transaciton and debit and coming from tonik account
  AND a.credit_debit_indicator = 'DEBIT'
  AND a.account_type = 'Tonik Account'
  AND transaction_code like 'A0%' and core_narration not like '%Blocking%'

  UNION ALL

  ## 3. Own TSA to other Tonik Users
  SELECT DISTINCT
    transaction_date,
    transaction_datetime,
    OFDATEOPENED,
    OFISCLOSED,
    registration_date,
    transaction_id,
    customer_id,
    accountid,
    account_type,
    status,
    channel,
    credit_debit_indicator,
    inter_exter_flag,
    trx_amount,
    core_narration,
    'Net Cash Out' main_transaction_type,
    'Own TSA to Other Tonik Users' sub_transaction_type,
    ln_loan_appln_time
  FROM main_transaction_data a
  WHERE 1=1
  -- main conditions: should be a successful transaciton and debit
  AND a.credit_debit_indicator = 'DEBIT'
  AND a.account_type = 'Tonik Account'
  AND transaction_code not like 'A0%' and core_narration not like '%Blocking%'

  -- 3. Own TSA to other Tonik Users
  AND a.channel = 'Core transactions'
  AND a.inter_exter_flag = 'Inside Tonik'
  --AND LOWER(core_narration) LIKE '%send money to other tonik account%' 
  -- AND LOWER(core_narration) NOT LIKE '%scontri%'
  -- AND LOWER(core_narration) NOT LIKE '%stash%'
  -- AND LOWER(core_narration) NOT LIKE '%time deposit%'

  UNION ALL

  ## 4. TSA to Outside Tonik (Other banks)
  SELECT DISTINCT
    transaction_date,
    transaction_datetime,
    OFDATEOPENED,
    OFISCLOSED,
    registration_date,
    transaction_id,
    customer_id,
    accountid,
    account_type,
    status,
    channel,
    credit_debit_indicator,
    inter_exter_flag,
    trx_amount,
    core_narration,
    'Net Cash Out' main_transaction_type,
    'TSA to Outside Tonik (Other Banks)' sub_transaction_type,
    ln_loan_appln_time
  FROM main_transaction_data a
  WHERE 1=1
  -- main conditions: should be a successful transaciton and debit
  AND a.credit_debit_indicator = 'DEBIT'
  AND a.account_type = 'Tonik Account'
  AND core_narration not like '%Blocking%'

  -- channels not in core transactions and billspay with the flag as outside tonik are sending to other banks
  AND a.channel NOT IN  ('Core transactions','Billspay')
  AND a.inter_exter_flag = 'Outside Tonik'
)

, transactions_sub AS 
(
  -- merging the cash ins and cash outs
  SELECT DISTINCT *
  FROM net_cash_in 
  UNION ALL
  SELECT DISTINCT *
  FROM net_cash_out
)

, date_diff_sub AS 
(
    -- to get the date difference between 2 transactions (cash in and cash out)
    SELECT customer_id,
    
    'Overall' days_diff_type,
    DATE_DIFF(LEAD(transaction_date) OVER (PARTITION BY customer_id,ln_loan_appln_time ORDER BY transaction_date,core_narration ASC),transaction_date,DAY) days_bt_trans,
    ln_loan_appln_time
    
    FROM 
    (
        SELECT DISTINCT
        transaction_date,
        customer_id,
        main_transaction_type,
        ln_loan_appln_time,
        core_narration
        FROM transactions_sub
        WHERE transaction_date < ln_loan_appln_time
        --   AND customer_id IN ('2077378','2081999','2475220','2485072')
    )

    UNION ALL

    -- to get the date difference between 2 cash ins
    SELECT customer_id,
    'Cash In' days_diff_type,
    DATE_DIFF(LEAD(transaction_date) OVER (PARTITION BY customer_id,ln_loan_appln_time ORDER BY transaction_date,core_narration ASC),transaction_date,DAY) days_bt_trans,
    ln_loan_appln_time
    
    FROM 
    (
        SELECT DISTINCT
        transaction_date,
        customer_id,
        main_transaction_type,
        ln_loan_appln_time,
        core_narration
        FROM transactions_sub
        WHERE transaction_date < ln_loan_appln_time
        --   AND customer_id IN ('2077378','2081999','2475220','2485072')
        AND main_transaction_type = 'Net Cash In'
    )

    UNION ALL

    -- to get the date difference between 2 cash outs
    SELECT customer_id,
    'Cash Out' days_diff_type,
    DATE_DIFF(LEAD(transaction_date) OVER (PARTITION BY customer_id,ln_loan_appln_time ORDER BY transaction_date,core_narration ASC),transaction_date,DAY) days_bt_trans,
    ln_loan_appln_time
    FROM 
    (
        SELECT DISTINCT
        transaction_date,
        customer_id,
        main_transaction_type,
        ln_loan_appln_time,
        core_narration
        FROM transactions_sub
        WHERE transaction_date < ln_loan_appln_time
        --   AND customer_id IN ('2077378','2081999','2475220','2485072')
        AND main_transaction_type = 'Net Cash Out'
    )
)

, days_bt_trans_avg AS 
(
-- get the average days in between 
SELECT DISTINCT
customer_id,
ln_loan_appln_time,
AVG(IF(days_diff_type='Overall',days_bt_trans,NULL)) overall_avg_days_bt_trans,
AVG(IF(days_diff_type='Cash In',days_bt_trans,NULL)) net_cash_in_avg_days_bt_trans,
AVG(IF(days_diff_type='Cash Out',days_bt_trans,NULL)) net_cash_out_avg_days_bt_trans
FROM date_diff_sub
GROUP BY 1,2
)

, days_bt_trans_med AS 
(
-- get the median days in between
SELECT DISTINCT
customer_id,
ln_loan_appln_time,
PERCENTILE_CONT(IF(days_diff_type='Overall',days_bt_trans,NULL), .50) OVER (PARTITION BY customer_id,ln_loan_appln_time) overall_med_days_bt_trans,
PERCENTILE_CONT(IF(days_diff_type='Cash In',days_bt_trans,NULL), .50) OVER (PARTITION BY customer_id,ln_loan_appln_time) cash_in_med_days_bt_trans,
PERCENTILE_CONT(IF(days_diff_type='Cash Out',days_bt_trans,NULL), .50) OVER (PARTITION BY customer_id,ln_loan_appln_time) cash_out_med_days_bt_trans,
FROM date_diff_sub
)

, transactions_final AS 
(
SELECT DISTINCT
acc.customerid as customer_id,
acc.ln_loan_appln_time,
## Number of transactions within the observation window (x days from onboarding date),
## Cash In Count Details
COUNT(DISTINCT(IF(main_transaction_type = 'Net Cash In',transaction_id,NULL))) tx_cnt_cash_in_total,
COUNT(DISTINCT(IF(sub_transaction_type='Outside Tonik to TSA' ,transaction_id,NULL))) tx_cnt_cash_in_ob2t,
COUNT(DISTINCT(IF(sub_transaction_type='Other Tonik Users to Town Tonik Account' ,transaction_id,NULL))) tx_cnt_cash_in_ot2t,

## Cash In Amount Details
SUM(IF(main_transaction_type = 'Net Cash In',trx_amount,0)) tx_amt_cash_in_total,
SUM((IF(sub_transaction_type='Outside Tonik to TSA' ,trx_amount,0))) tx_amt_cash_in_ob2t,
SUM((IF(sub_transaction_type='Other Tonik Users to Town Tonik Account',trx_amount,0))) tx_amt_cash_in_ot2t,

## Cash Out Count Details
COUNT(DISTINCT(IF(main_transaction_type = 'Net Cash Out',transaction_id,NULL))) tx_cnt_cash_out_total,
COUNT(DISTINCT(IF(sub_transaction_type= 'Bills Pay' ,transaction_id,NULL))) tx_cnt_cash_out_billpay,
COUNT(DISTINCT(IF(sub_transaction_type= 'Card Transactions (Cash Out)' ,transaction_id,NULL))) tx_cnt_cash_out_cards,
COUNT(DISTINCT(IF(sub_transaction_type= 'Own TSA to Other Tonik Users' ,transaction_id,NULL))) tx_cnt_cash_out_t2ot,
COUNT(DISTINCT(IF(sub_transaction_type= 'TSA to Outside Tonik (Other Banks)' ,transaction_id,NULL))) tx_cnt_cash_out_t2ob,

## Cash Out Amount Details
SUM(IF(main_transaction_type = 'Net Cash Out',trx_amount,0)) tx_amt_cash_out_total,
SUM(IF(sub_transaction_type= 'Bills Pay' ,trx_amount,0)) tx_amt_cash_out_billpay,
SUM(IF(sub_transaction_type= 'Card Transactions (Cash Out)' ,trx_amount,0)) tx_amt_cash_out_cards,
SUM(IF(sub_transaction_type= 'Own TSA to Other Tonik Users' ,trx_amount,0)) tx_amt_cash_out_t2ot,
SUM(IF(sub_transaction_type= 'TSA to Outside Tonik (Other Banks)' ,trx_amount,0)) tx_amt_cash_out_t2ob,
FROM input_customers acc 
LEFT JOIN transactions_sub a ON CAST(acc.customerid AS STRING) = a.customer_id and transaction_datetime < acc.ln_loan_appln_time and acc.ln_loan_appln_time = a.ln_loan_appln_time

GROUP BY 1,2
ORDER BY 2 

),
Reject_flag_data_new as (SELECT digitalLoanAccountId, 
case when  applicationStatus IN ('EXPIRED', 'EXEMPT') and prev_applicationStatus='REJECT' THEN 1 when applicationStatus ='REJECT' THEN 1 else 0 end reject_flag,
case when  applicationStatus IN ('ACCEPT', 'CANCELLED','EXEMPT','EXPIRED','REJECT') and prev_applicationStatus='APPROVED' THEN 1 when applicationStatus ='APPROVED' THEN 1 else 0 end approved_flag

 FROM (SELECT digitalLoanAccountId, applicationStatus, created_dt, LEAD(applicationStatus) OVER(PARTITION BY digitalLoanAccountId order by created_dt desc, statusTraceId desc) AS  prev_applicationStatus,LEAD(created_dt) OVER(PARTITION BY digitalLoanAccountId order by created_dt desc, statusTraceId desc) AS prev_created_dt,  row_number() over (partition by digitalLoanAccountId order by created_dt desc, statusTraceId desc ) rn  from dl_loans_db_raw.tdbk_status_trace ) where rn =1)
 ,
delinquency_data as (
select loanAccountNumber,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30  in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from risk_credit_mis.loan_deliquency_data
),
fr_dpd as (
      select
      input_customers.customerid,
      ln_loan_appln_time,
      MAX(fr.Max_ever_DPD) as max_ever_dpd,
      MAX(CASE WHEN fr.loanAccountNumber = input_customers.loanAccountNumber THEN fr.Max_ever_DPD ELSE NULL END) AS max_current_dpd,
      MAX(CASE WHEN date(sourceDataAsOf) BETWEEN DATE_SUB(DATE(input_customers.ln_loan_appln_time),INTERVAL 30 DAY) AND DATE(input_customers.ln_loan_appln_time) THEN fr.Max_ever_DPD ELSE NULL END) max_ever_dpd_30d,
      MAX(CASE WHEN date(sourceDataAsOf) BETWEEN DATE_SUB(DATE(input_customers.ln_loan_appln_time),INTERVAL 60 DAY) AND DATE(input_customers.ln_loan_appln_time) THEN fr.Max_ever_DPD ELSE NULL END) max_ever_dpd_60d,
      MAX(CASE WHEN date(sourceDataAsOf) BETWEEN DATE_SUB(DATE(input_customers.ln_loan_appln_time),INTERVAL 90 DAY) AND DATE(input_customers.ln_loan_appln_time) THEN fr.Max_ever_DPD ELSE NULL END) max_ever_dpd_90d,
      MAX(CASE WHEN date(sourceDataAsOf) BETWEEN DATE_SUB(DATE(input_customers.ln_loan_appln_time),INTERVAL 120 DAY) AND DATE(input_customers.ln_loan_appln_time) THEN fr.Max_ever_DPD ELSE NULL END) max_ever_dpd_120d,
      MAX(CASE WHEN date(sourceDataAsOf) BETWEEN DATE_SUB(DATE(input_customers.ln_loan_appln_time),INTERVAL 150 DAY) AND DATE(input_customers.ln_loan_appln_time) THEN fr.Max_ever_DPD ELSE NULL END) max_ever_dpd_150d,
      MAX(CASE WHEN date(sourceDataAsOf) BETWEEN DATE_SUB(DATE(input_customers.ln_loan_appln_time),INTERVAL 180 DAY) AND DATE(input_customers.ln_loan_appln_time) THEN fr.Max_ever_DPD ELSE NULL END) max_ever_dpd_180d,
      from input_customers
JOIN `risk_credit_mis.loan_master_table` lmt on cast(lmt.customerId as string) = input_customers.customerid and COALESCE(DATE(termsAndConditionsSubmitDateTime),DATE(startApplyDateTime))  < DATE(input_customers.ln_loan_appln_time)
      left join `prj-prod-dataplatform.risk_credit_mis.loan_bucket_flow_report_core`  fr on  fr.loanAccountNumber = lmt.loanAccountNumber and date(sourceDataAsOf) < date(ln_loan_appln_time)
      where lmt.flagDisbursement = 1
      group by 1,2
 ),
loan_metrics as (
SELECT 
input_customers.customerid,
input_customers.loanAccountNumber,
input_customers.ln_loan_appln_time,
fr_dpd.max_ever_dpd,
max_current_dpd,
max_ever_dpd_30d,
max_ever_dpd_60d,
max_ever_dpd_90d,
max_ever_dpd_120d,
max_ever_dpd_150d,
max_ever_dpd_180d,
COUNT(CASE WHEN installmentPaidAmount > 0 AND prev_loanAccountNumber = a.loanAccountNumber and lastPaymentDate < DATE(input_customers.ln_loan_appln_time) THEN 1 ELSE NULL END) AS cnt_installments_paid_last_disb,
SUM(CASE WHEN installmentPaidAmount > 0 AND prev_loanAccountNumber = a.loanAccountNumber and lastPaymentDate < DATE(input_customers.ln_loan_appln_time) THEN installmentPaidAmount ELSE 0 END) AS total_amt_installments_paid_last_disb,
COUNT(CASE WHEN installmentPaidAmount >0 AND DPDwoToleranceCustom > 0 and (lastPaymentDate < DATE(input_customers.ln_loan_appln_time) OR isDelinquent =1) THEN 1 ELSE NULL END) AS tx_cnt_installments_paid_tot_with_dpd,
SUM(CASE WHEN installmentPaidAmount >0 AND DPDwoToleranceCustom > 0 and (lastPaymentDate < DATE(input_customers.ln_loan_appln_time) OR isDelinquent =1) THEN installmentPaidAmount ELSE 0 END) AS tx_amt_installments_paid_tot_with_dpd,
COUNT(CASE WHEN installmentPaidAmount >0 AND prev_loanAccountNumber = a.loanAccountNumber AND DPDwoToleranceCustom > 0 and (lastPaymentDate < DATE(input_customers.ln_loan_appln_time) OR isDelinquent =1) THEN 1 ELSE NULL END) AS tx_cnt_installments_paid_last_disb_withdpd,
SUM(installmentAmount) as tx_total_due_amount,
CASE WHEN COUNT(CASE WHEN flagDisbursement = 1 AND new_loan_type LIKE 'SIL%' THEN 1 ELSE NULL END) > 1 THEN 1 ELSE 0 END AS ln_any_prev_disb_loan_sil_mobile_flag,
COUNT(CASE WHEN installmentPaidAmount > 0  THEN 1 ELSE NULL END) AS cnt_installments_paid,

SUM(CASE WHEN installmentPaidAmount > 0  THEN installmentPaidAmount ELSE 0 END) AS total_amt_installments_paid,
count(DISTINCT(IF(coalesce(lmt.termsAndConditionsSubmitDateTime,if (lmt.new_loan_type ='Flex-up',lmt.startApplyDateTime,lmt.termsAndConditionsSubmitDateTime)) is not null,lmt.digitalLoanAccountId,NULL))) tx_cnt_applied_loan_apps,
count(DISTINCT(IF(Reject_flag_data_new.reject_flag = 1,lmt.digitalLoanAccountId,NULL))) tx_cnt_rejected_loan_apps,
count(DISTINCT(IF( applicationStatus in ('COMPLETED','ACTIVATED','APPROVED'),lmt.digitalLoanAccountId,NULL))) tx_cnt_approved_loan_apps,
count(DISTINCT(IF(la.LOANSTATUS IN('Completed','Settled'),lmt.digitalLoanAccountId,NULL))) tx_cnt_completed_loan_apps,
count(DISTINCT(IF(disbursementDateTime is not null ,lmt.digitalLoanAccountId,NULL))) tx_cnt_disbursed_loan_apps,
count(DISTINCT(IF( la.LOANSTATUS IN('Normal','In Arrears'),lmt.digitalLoanAccountId,NULL))) tx_cnt_active_loan_apps,
count(DISTINCT(IF (lmt.digitalLoanAccountId IS NOT NULL AND coalesce(lmt.termsAndConditionsSubmitDateTime,if (lmt.new_loan_type ='Flex-up',lmt.startApplyDateTime,lmt.termsAndConditionsSubmitDateTime)) is null,lmt.digitalLoanAccountId,NULL))) tx_incomplete_loan_apps,
MIN(CASE WHEN la.LOANSTATUS IN('Completed','Settled') THEN DATE_DIFF(LOANMATURITYDATE,DATEOFDISBURSEMENT, DAY) ELSE NULL END) AS tx_min_age_completed_loans,
MAX(CASE WHEN la.LOANSTATUS IN('Completed','Settled') THEN DATE_DIFF(LOANMATURITYDATE,DATEOFDISBURSEMENT, DAY) ELSE NULL END) AS tx_max_age_completed_loans,
AVG(CASE WHEN la.LOANSTATUS IN('Completed','Settled') THEN DATE_DIFF(LOANMATURITYDATE,DATEOFDISBURSEMENT, DAY) ELSE NULL END) AS tx_avg_age_completed_loans,
count(CASE WHEN defFPD05 = 1 THEN 1 ELSE NULL END) as cnt_fpd5,
count(CASE WHEN delinquency_data.defFPD10 = 1 THEN 1 ELSE NULL END) as cnt_fpd10,
count(CASE WHEN delinquency_data.defFPD30 = 1 THEN 1 ELSE NULL END) as cnt_fpd30,
count(CASE WHEN delinquency_data.deffspd30 = 1 THEN 1 ELSE NULL END) as cnt_fspd30,
COUNT(case when DPDwoToleranceCustom_DPD > 1 THEN 1 ELSE NULL END) AS cnt_dpd_gt_1,
COUNT(case when DPDwoToleranceCustom_DPD > 5 THEN 1 ELSE NULL END) AS cnt_dpd_gt_5,
from input_customers
JOIN `risk_credit_mis.loan_master_table` lmt on cast(lmt.customerId as string) = input_customers.customerid and COALESCE(DATE(termsAndConditionsSubmitDateTime),DATE(startApplyDateTime))  < DATE(input_customers.ln_loan_appln_time)
LEFT JOIN `risk_credit_mis.loan_installments_table` a on a.loanAccountNumber = lmt.loanAccountNumber and COALESCE(installmentDueDate) < DATE(input_customers.ln_loan_appln_time)
LEFT JOIN Reject_flag_data_new ON Reject_flag_data_new.digitalLoanAccountId = lmt.digitalLoanAccountId
LEFT JOIN delinquency_data on delinquency_data.loanAccountNumber = lmt.loanAccountNumber
LEFT JOIN core_raw.loan_accounts la ON la.AccountNumber = lmt.loanAccountNumber and _PARTITIONDATE = DATE_SUB(DATE(input_customers.ln_loan_appln_time),INTERVAL 1 DAY)
LEFT JOIN fr_dpd on  fr_dpd.customerid  = input_customers.customerid and fr_dpd.ln_loan_appln_time = input_customers.ln_loan_appln_time
group by all
),

utility_transaction_data AS (
        SELECT 
        customer_id,
        ln_loan_appln_time,
        CASE 
            WHEN SUM(CASE WHEN transaction_code IN ('BP1', 'BP2', 'BP3', 'BP4') THEN 1 ELSE 0 END) > 0 
                THEN MIN(transaction_date) 
            ELSE NULL 
        END AS first_billpay_date,
        CASE 
            WHEN SUM(CASE WHEN transaction_code like 'A0%' AND core_narration NOT LIKE '%Blocking%' THEN 1 ELSE 0 END) > 0 
                THEN MIN(transaction_date) 
            ELSE NULL 
        END AS virtual_transaction_date,
        CASE 
            WHEN SUM(CASE WHEN transaction_code IN ('21C', 'N01', 'IP2', 'XE2','P01') THEN 1 ELSE 0 END) > 0 
                THEN MIN(transaction_date) 
            ELSE NULL 
        END AS first_tsa_topup_date
    FROM 
        main_transaction_data
    GROUP BY 1,2
),

combined_data AS (
    SELECT 
        COALESCE(acc.cust_id,acc_data.customer_id,utility_transaction_data.customer_id) customer_id,
        productid,
        accountdescription,
        acc_data.opendate as opendate ,
        first_billpay_date,
        virtual_transaction_date,
        first_tsa_topup_date,
        acc.ln_loan_appln_time,
        LEAST(
            DATE(acc.ln_loan_appln_time),
            IFNULL(DATE(acc_data.opendate),'9999-12-31'), 
            IFNULL(first_billpay_date, '9999-12-31'), 
            IFNULL(virtual_transaction_date, '9999-12-31'), 
            IFNULL(first_tsa_topup_date, '9999-12-31')
        ) AS first_opened_date
        FROM (SELECT DISTINCT cust_id,registration_date,ln_loan_appln_time from cust_onboarding_acc_data) acc 
        LEFT JOIN (SELECT cust_id as customer_id,ofdateopened opendate ,productid,account_type as accountdescription from cust_onboarding_acc_data 
        WHERE  account_type NOT IN ('Tonik Account','Tendo Individual Stash') AND ofdateopened >= '2023-01-01' AND reccreatedon < ln_loan_appln_time
       QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY reccreatedon asc) = 1) acc_data
       ON acc.cust_id = acc_data.customer_id
      LEFT JOIN utility_transaction_data
        ON 
      acc.cust_id = utility_transaction_data.customer_id and acc.ln_loan_appln_time = utility_transaction_data.ln_loan_appln_time
),
first_product_data as (
SELECT
    customer_id,
    ln_loan_appln_time,
    --first_opened_date,
    CASE
        WHEN first_opened_date = DATE(opendate) THEN accountdescription
        WHEN first_opened_date = first_billpay_date THEN 'Bills Pay'
        WHEN first_opened_date = first_tsa_topup_date THEN 'TSA Top-Up'
        WHEN first_opened_date = virtual_transaction_date THEN 'Virtual Transaction'
        ELSE 'Unknown'
    END AS first_product,
    CASE
        WHEN first_opened_date = DATE(ln_loan_appln_time) THEN 'Loan Attempt'
        WHEN first_opened_date = DATE(opendate) and productid in ('fixdep','savings','SaveForFuture') THEN 'Deposit Users'
        --WHEN first_opened_date = DATE(opendate) and productid NOT IN ('fixdep','savings','SaveForFuture') THEN 'Loan Users'
        WHEN first_opened_date = first_tsa_topup_date or first_opened_date = virtual_transaction_date or first_opened_date = first_billpay_date THEN 'Utility Users'
        ELSE 'Ghost Users'
    END AS first_product_user_segment
FROM
    combined_data
),

complete_deposit_metrics as (
with  deposit_acc_main AS 
(
  SELECT
  a.OFDATEOPENED as ofdateopened,
  IF(OFISCLOSED = 'Y',DATE_DIFF(date(ofdateclosed),date(OFDATEOPENED),day),NULL) as stash_duration,
  registration_date,
  duration,
  a.cust_id as customer_id,
  a.account_type,
  reccreatedon,
  a.accountid as ofstandardaccountid,
  --balancedateasof,
  b.clearedbalance,
  a.OFISCLOSED as closed,
  ff.status as td_status,
  a.ofdateclosed,
  autorollover,
  ln_loan_appln_time,
 FROM cust_onboarding_acc_data a
  LEFT JOIN risk_mart.customer_balance b on a.accountid = b.accountid and date(balanceDateAsOf) = DATE_SUB(DATE(ln_loan_appln_time),INTERVAL 1 DAY)
  --JOIN input_customers on input_customers.customerid = a.cust_id 
  LEFT JOIN `finastra_raw.fixturefeature` ff on ff.accountid = a.accountid
  WHERE 1=1 
  --and date(balancedateasof) = DATE_SUB(p.start_date,INTERVAL 1 DAY)
  and productid in ('savings','fixdep','SaveForFuture') and  reccreatedon < ln_loan_appln_time
  )
  
,deposit_days_diff_sub as (
    SELECT customer_id,
    ln_loan_appln_time,
    'Between All Deposits' days_diff_type,
    DATE_DIFF(LEAD(ofdateopened) OVER (PARTITION BY customer_id,ln_loan_appln_time ORDER BY ofdateopened ASC),ofdateopened,DAY) days_bt_trans
    FROM deposit_acc_main
    WHERE account_type <> 'Tonik Account'

    UNION ALL

    SELECT customer_id,
    ln_loan_appln_time,
    'Between TSA AND TD' days_diff_type,
    DATE_DIFF(LEAD(ofdateopened) OVER (PARTITION BY customer_id,ln_loan_appln_time ORDER BY ofdateopened ASC),ofdateopened,DAY) days_bt_trans
    FROM deposit_acc_main
    WHERE account_type not like '%Stash%'

    UNION ALL

    SELECT customer_id,
    ln_loan_appln_time,
    'Between TDs' days_diff_type,
    DATE_DIFF(LEAD(ofdateopened) OVER (PARTITION BY customer_id,ln_loan_appln_time ORDER BY ofdateopened ASC),ofdateopened,DAY) days_bt_trans
    FROM deposit_acc_main
    WHERE account_type like '%Time Deposit%'

)
,dep_days_bt_trans_med AS 
(
-- get the median days in between
SELECT DISTINCT
customer_id,
ln_loan_appln_time,
PERCENTILE_CONT(IF(days_diff_type='Between All Deposits',days_bt_trans,NULL), .50) OVER (PARTITION BY customer_id,ln_loan_appln_time) med_days_bw_new_dep_acct_open,
PERCENTILE_CONT(IF(days_diff_type='Between TDs',days_bt_trans,NULL), .50) OVER (PARTITION BY customer_id,ln_loan_appln_time) med_days_bw_td_acct_open,
PERCENTILE_CONT(IF(days_diff_type='Between TSA AND TD',days_bt_trans,NULL), .50) OVER (PARTITION BY customer_id,ln_loan_appln_time) med_days_bw_td_tsa_acct_open,
FROM deposit_days_diff_sub
)

, deposit_account_counts AS 
(
#### Number of Stash and Time Deposit accounts that are still open until the observation date with balance >= 100
SELECT DISTINCT
customer_id,
ln_loan_appln_time,
CASE WHEN SUM(autorollover) >= 1.0 THEN 1 ELSE 0 END AS tx_td_auto_roll_over_enabled,
MAX((IF(account_type LIKE '%Time Deposit%' and (DATE(ofdateclosed) = '1970-01-01' OR DATE(ofdateclosed) < DATE(ln_loan_appln_time)),duration,0))) AS td_max_duration,
AVG((IF(account_type LIKE '%Time Deposit%'and (DATE(ofdateclosed) = '1970-01-01' OR DATE(ofdateclosed) < DATE(ln_loan_appln_time)) ,duration,0))) AS td_avg_duration,
MIN((IF(account_type LIKE '%Time Deposit%'and (DATE(ofdateclosed) = '1970-01-01' OR DATE(ofdateclosed) < DATE(ln_loan_appln_time)),duration,0))) AS td_min_duration,
MAX((IF(account_type LIKE '%Stash%' and (DATE(ofdateclosed) = '1970-01-01' OR DATE(ofdateclosed) < DATE(ln_loan_appln_time)),stash_duration,0))) AS stash_max_duration,
AVG((IF(account_type LIKE '%Stash%' and (DATE(ofdateclosed) = '1970-01-01' OR DATE(ofdateclosed) < DATE(ln_loan_appln_time)),stash_duration,0))) AS stash_avg_duration,
MIN((IF(account_type LIKE '%Stash%' and (DATE(ofdateclosed) = '1970-01-01' OR DATE(ofdateclosed) < DATE(ln_loan_appln_time)),stash_duration,0))) AS stash_min_duration,
SUM(DISTINCT(IF(account_type LIKE '%Time Deposit%' AND td_status = '1' and (DATE(ofdateclosed) = '1970-01-01' OR DATE(ofdateclosed) < DATE(ln_loan_appln_time)),clearedbalance,NULL))) td_balance,
SUM(DISTINCT(IF(account_type LIKE '%Stash%' and (DATE(ofdateclosed) = '1970-01-01' OR DATE(ofdateclosed) < DATE(ln_loan_appln_time)),clearedbalance,NULL))) stash_balance,
COUNT(DISTINCT(IF(account_type LIKE '%Time Deposit%' AND td_status = '4' and closed = 'Y' and (DATE(ofdateclosed) = '1970-01-01' OR DATE(ofdateclosed) < DATE(ln_loan_appln_time)),ofstandardaccountid,NULL))) td_accounts_completed_cnt,
COUNT(DISTINCT(IF(account_type LIKE '%Time Deposit%' AND td_status = '9' and closed = 'Y' and (DATE(ofdateclosed) = '1970-01-01' OR DATE(ofdateclosed) < DATE(ln_loan_appln_time)),ofstandardaccountid,NULL))) td_accounts_broken_cnt,
COUNT(DISTINCT(IF(account_type <> 'Tonik Account', ofstandardaccountid,NULL))) deposit_accs_cnt,
COUNT(DISTINCT(IF(account_type LIKE '%Stash%',ofstandardaccountid,NULL))) stash_accounts_opened_cnt,
COUNT(DISTINCT(IF(account_type LIKE '%Time Deposit%',ofstandardaccountid,NULL))) td_accounts_opened_cnt,
COUNT(DISTINCT(IF(account_type LIKE '%Stash%' and closed = 'Y' and (DATE(ofdateclosed) = '1970-01-01' OR DATE(ofdateclosed) < DATE(ln_loan_appln_time)) ,ofstandardaccountid,NULL))) stash_accounts_closed_cnt,
FROM 
deposit_acc_main
where
DATE(reccreatedon) < DATE(ln_loan_appln_time) and account_type <> 'Tonik Account'
GROUP BY 1,2
)

SELECT DISTINCT 
deposit_acc_main.customer_id,
deposit_acc_main.ln_loan_appln_time,
td_balance,
stash_balance,
tx_td_auto_roll_over_enabled,
deposit_accs_cnt,
stash_accounts_opened_cnt,
stash_accounts_closed_cnt,
td_accounts_opened_cnt,
td_accounts_completed_cnt,
td_accounts_broken_cnt,
med_days_bw_td_tsa_acct_open,
med_days_bw_new_dep_acct_open,
med_days_bw_td_acct_open,
td_max_duration,
td_min_duration,
td_avg_duration,
stash_max_duration,
stash_avg_duration,
stash_min_duration

FROM (SELECT DISTINCT customer_id,ln_loan_appln_time FROM deposit_acc_main) deposit_acc_main  
LEFT JOIN deposit_account_counts a ON deposit_acc_main.customer_id = a.customer_id AND a.ln_loan_appln_time =deposit_acc_main.ln_loan_appln_time
LEFT JOIN dep_days_bt_trans_med b ON b.customer_id = deposit_acc_main.customer_id AND b.ln_loan_appln_time =deposit_acc_main.ln_loan_appln_time
ORDER BY 2 DESC
)
SELECT
COALESCE(first_product_data.first_product,'Unknown') as tx_first_product,
COALESCE(first_product_data.first_product_user_segment,'Ghost Users') tx_first_product_user_segment,
acc.customerid as customer_id,
acc.loanaccountnumber,
acc.digitalloanaccountid,
onboarding_date,
ln_user_type,
acc.ln_loan_appln_time,
ln_disb_dtime,
ln_loan_type,
ln_prod_type as ln_prod_type,
--repeat_loan_type as ln_repeat_loan_type,
ln_mature_fpd30_flag,
ln_fpd30_flag,
ln_mature_fspd30_flag,
ln_fspd30_flag,
ln_self_dec_income,
ln_marital_status,
ln_education_level,
ln_nature_of_work_new,
onb_email_verified_flag,
onb_place_of_birth,
onb_doc_type,
onb_country,
onb_province,
onb_city,
onb_barangay,
onb_postalcode,
onb_latitude,
onb_longitude,
onb_osversion,
onb_kyc_status,
onb_kyc_status_upgrade_datetime,
ln_osversion,
ln_brand,
ln_cnt_dependents,
ln_source_funds_new,
ln_employment_type_new,
ln_industry_new,
ln_company_name,
ln_salary_scaled_income,
ln_vas_opted_flag,
ln_age,
ln_mobile_no,
ln_alt_mobile_no,
ln_province,
ln_city,
ln_barangay,
ln_latitude,
ln_longitude,
ln_doc_type,
ln_ref1_type,
onb_first_name,
onb_middle_name,
onb_last_name,
onb_age,
onb_gender,
onb_mobile_no,
onb_email,
ln_loan_applied_flag,
ln_facta_flag,
ln_dl_rule_reject_flag,
ln_taran_rule_reject_flag,
ln_taran_scorecard_reject_flag,
ln_cdd_reject_flag,
ln_marked_underwriter_check_flag,
ln_underwriting_reject_flag,
ln_vas_used_flag,
ln_os_type,
ln_address,
ln_postal_code,
ln_doc_number,
ln_source_funds,
ln_employment_type,
ln_nature_of_work,
ln_industry,
ln_ref2_type,
onb_self_dec_income,
onb_company_name,
credo_inquiry_date,
    cust_status_flag,
    cust_status_close_date,
	dob_observation_date,
	days_since_credo_call_onb,
	days_since_credo_call_loan_application,
    first_applied_loan_appln_time,
    first_applied_loan_type,
    first_applied_loan_tenor,
    first_applied_loan_amount,
    first_applied_product_type,
    first_disb_loan_appln_time,
    first_disb_loan_type,
    first_disb_loan_tenor,
    first_disb_loan_amount,
    first_disb_product_type,
    first_disb_loan_disb_time,
    last_applied_loan_appln_time,
    last_applied_loan_decision,
    last_applied_os_type,
    last_applied_loan_type,
    last_applied_loan_tenor,
    last_applied_loan_amount,
    last_applied_product_type,
    last_applied_crif_id,
    last_applied_cic_called_flag,
    last_applied_cic_hit_flag,
    last_applied_credo_ref_no,
    last_applied_credo_score,
    last_disb_loan_appln_time,
    last_disb_loan_disb_time,
    last_disb_loan_type,
    last_disb_loan_tenor,
    last_disb_loan_amount,
    last_disb_crif_id,
    last_disb_product_type,
    
EXTRACT(YEAR FROM onboarding_date) onb_year,
EXTRACT(MONTH FROM onboarding_date) onb_month_of_year,
EXTRACT(WEEK FROM onboarding_date) - EXTRACT(WEEK FROM DATE_TRUNC(onboarding_date, MONTH)) + 1 AS onb_week_of_month,
--EXTRACT(WEEK FROM onboarding_date) onboarding_week,
EXTRACT(DAY FROM onboarding_date) onb_day_of_month,
EXTRACT(TIME FROM onboarding_date) onb_time_of_day,
a.tx_cnt_cash_in_total,
a.tx_cnt_cash_in_ob2t,
a.tx_cnt_cash_in_ot2t,
a.tx_amt_cash_in_total,
a.tx_amt_cash_in_ob2t,
a.tx_amt_cash_in_ot2t,
a.tx_cnt_cash_out_total,
a.tx_cnt_cash_out_billpay,
a.tx_cnt_cash_out_cards,
a.tx_cnt_cash_out_t2ot,
a.tx_cnt_cash_out_t2ob,
a.tx_amt_cash_out_total,
a.tx_amt_cash_out_billpay,
a.tx_amt_cash_out_cards,
a.tx_amt_cash_out_t2ot,
a.tx_amt_cash_out_t2ob,
overall_avg_days_bt_trans tx_avg_days_bt_trans,
net_cash_in_avg_days_bt_trans tx_avg_days_bt_cash_in_trans,
net_cash_out_avg_days_bt_trans tx_avg_days_bt_cash_out_trans,
overall_med_days_bt_trans tx_med_days_bt_trans,
cash_in_med_days_bt_trans tx_med_days_bt_cash_in_trans,
cash_out_med_days_bt_trans tx_med_days_bt_cash_out_trans,
deposit_accs_cnt tx_deposit_accnt_cnt,
stash_accounts_opened_cnt tx_stash_accnt_opened_cnt,
stash_accounts_closed_cnt tx_stash_accnt_closed_cnt,
stash_balance tx_stash_balance,
td_accounts_opened_cnt tx_td_accnt_opened_cnt,
td_accounts_completed_cnt tx_td_accnt_completed_cnt,
td_accounts_broken_cnt tx_td_accnt_broken_cnt,
tx_td_auto_roll_over_enabled,
td_balance tx_td_balance,
td_max_duration tx_td_max_duration,
td_min_duration tx_td_min_duration,
td_avg_duration tx_td_avg_duration,
stash_max_duration tx_stash_max_duration,
stash_avg_duration tx_stash_avg_duration,
stash_min_duration tx_stash_min_duration,
med_days_bw_td_tsa_acct_open tx_med_days_bw_td_tsa_acct_open,
med_days_bw_new_dep_acct_open tx_med_days_bw_new_dep_acct_open,
med_days_bw_td_acct_open tx_med_days_bw_td_acct_open,
tx_cnt_completed_loan_apps tx_cnt_completed_loans,
tx_cnt_rejected_loan_apps tx_cnt_rejected_loans,
tx_cnt_active_loan_apps tx_cnt_active_loans,
tx_cnt_applied_loan_apps tx_cnt_applied_loan,
tx_cnt_approved_loan_apps tx_cnt_approved_loans,
tx_cnt_disbursed_loan_apps tx_cnt_disbursed_loans,
tx_incomplete_loan_apps tx_cnt_incomplete_loan_apps,
COALESCE(tx_min_age_completed_loans,0) tx_min_age_completed_loans,
COALESCE(tx_max_age_completed_loans,0) tx_max_age_completed_loans,
COALESCE(tx_avg_age_completed_loans,0) tx_avg_age_completed_loans,
cnt_installments_paid tx_cnt_installments_paid_tot,
tx_cnt_installments_paid_tot_with_dpd,
tx_amt_installments_paid_tot_with_dpd,
tx_total_due_amount tx_amount_tot_due,
total_amt_installments_paid tx_amt_installments_paid_tot,
tx_cnt_installments_paid_last_disb_withdpd,
total_amt_installments_paid_last_disb/last_disb_loan_amount as ratio_amt_paid_last_disb_loan,
cnt_installments_paid_last_disb tx_cnt_installments_paid_last_disb,
total_amt_installments_paid_last_disb tx_amt_installments_paid_last_disb,
cnt_fpd10 tx_cnt_fpd10_ever,
cnt_fpd30 tx_cnt_fpd30_ever,
cnt_fspd30 tx_cnt_fspd30_ever,
cnt_dpd_gt_1 tx_cnt_dpd_gt_1_ever,
cnt_dpd_gt_5 tx_cnt_dpd_gt_5_ever,
loan_metrics.max_ever_dpd tx_max_ever_dpd,
max_ever_dpd_30d tx_max_dpd_30d,
max_ever_dpd_60d tx_max_dpd_60d,
max_ever_dpd_90d tx_max_dpd_90d,
max_ever_dpd_120d tx_max_dpd_120d,
max_ever_dpd_150d tx_max_dpd_150d,
max_ever_dpd_180d tx_max_dpd_180d,
max_current_dpd tx_max_current_dpd,
ln_any_prev_disb_loan_sil_mobile_flag
FROM input_customers acc
LEFT JOIN (SELECT DISTINCT cust_id,onboarding_date,ln_loan_appln_time from cust_onboarding_acc_data) cust_onboarding_acc_data on cust_onboarding_acc_data.cust_id = acc.customerid and acc.ln_loan_appln_time = cust_onboarding_acc_data.ln_loan_appln_time
LEFT JOIN transactions_final a ON acc.customerid = a.customer_id and acc.ln_loan_appln_time = a.ln_loan_appln_time
LEFT JOIN days_bt_trans_avg b ON acc.customerid = b.customer_id and b.ln_loan_appln_time = acc.ln_loan_appln_time
LEFT JOIN days_bt_trans_med c ON acc.customerid = c.customer_id and c.ln_loan_appln_time = acc.ln_loan_appln_time
LEFT JOIN complete_deposit_metrics d on d.customer_id = acc.customerid and acc.ln_loan_appln_time = d.ln_loan_appln_time
LEFT JOIN loan_metrics ON cast(loan_metrics.customerid as string) = acc.customerid and acc.ln_loan_appln_time = loan_metrics.ln_loan_appln_time
LEFT JOIN first_product_data ON first_product_data.customer_id = acc.customerid and acc.ln_loan_appln_time = first_product_data.ln_loan_appln_time
;
""" 
job = client.query(sq)
job.result()  # Wait for the job to complete.
time.sleep(5) # Delays for 30 seconds
print(f'Table {schema2}.{bscoresnapshotcustomertransactiondata} created successfully')



Table worktable_data_analysis.b_score_applied_loans_customer_transaction_data created successfully


# Event

In [11]:
sq = f""" 
CREATE OR REPLACE TABLE {schema2}.{bscoresnapshotcustomerevent} as
with input_customers as ( 
SELECT * FROM {schema2}.{bscoresnapshotcustomerdata}
),
af_link AS
(
  ## To get the AF ID and Customer ID Link (using the first install of a customer)
  SELECT DISTINCT appsflyer_id, customer_user_id, install_time
  FROM `appsflyer_raw.in_app_events_report` in_apps_events
  JOIN `dl_customers_db_raw.tdbk_customer_mtb` c ON c.cust_id = in_apps_events.customer_user_id
  JOIN input_customers ON In_apps_events.customer_user_id = input_customers.customerid and DATE(in_apps_events._partitiondate) < DATE(input_customers.ln_loan_appln_time)
  WHERE 1=1
  AND customer_user_id IS NOT NULL
  QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_user_id ORDER BY install_time ASC) = 1
 
  UNION ALL
 
  SELECT DISTINCT appsflyer_id, customer_user_id, install_time
  FROM `appsflyer_raw.organic_in_app_events_report` organic_in_apps_events
  JOIN `dl_customers_db_raw.tdbk_customer_mtb` c ON c.cust_id = organic_in_apps_events.customer_user_id
 JOIN input_customers ON organic_in_apps_events.customer_user_id = input_customers.customerid and DATE(organic_in_apps_events._partitiondate) < DATE(input_customers.ln_loan_appln_time)
  WHERE 1=1
  AND customer_user_id IS NOT NULL
  QUALIFY ROW_NUMBER() OVER (PARTITION BY customer_user_id ORDER BY install_time ASC) = 1
)
, events AS
(
  SELECT DISTINCT
  input_customers.customerid as customer_id,
  ln_loan_appln_time,
COUNT(DISTINCT IF(event_name = 'App_Launch',event_uuid,NULL)) meng_no_of_logins,
COUNT(DISTINCT IF(
    event_name like any ('Loans_%_Calculator'),moengagerefid,NULL)) meng_calculator_count,
COUNT(DISTINCT IF(
    event_name like any ('Loans_%_Calculator'),event_uuid,NULL)) meng_calculator_tot_visit_cnt
  FROM input_customers 
  JOIN `moengage_raw.events_hourly` a ON  input_customers.customerid = cast(a.customer_id as string)
 --JOIN input_customers ON cast(a.customer_id as string) = input_customers.customerid 
  WHERE 1=1 and DATE(event_time) < DATE(input_customers.ln_loan_appln_time)
  --AND event_name like any ('%App_Launch%','Loans_%_Calculator')
  group by 1,2
  ),

campaign_data as
(  SELECT DISTINCT a.customer_user_id, a.appsflyer_id, a.media_source, a.partner, a.campaign, a.Retargeting_Conversion_Type,ln_loan_appln_time,
  CASE
  WHEN a.media_source = 'website_channel=website_ss_ui=true_ss_gtm_ui=true_ss_qr=c' THEN 'Website'
  WHEN a.media_source IS NULL AND a.partner IS NULL AND a.campaign IS NULL THEN 'Organic'
  ELSE b.source END source,
CASE
  WHEN a.media_source = 'website_channel=website_ss_ui=true_ss_gtm_ui=true_ss_qr=c' THEN 'Website'
  WHEN a.media_source IS NULL AND a.partner IS NULL AND a.campaign IS NULL THEN 'Organic'
  ELSE b.source_group END source_group,
  FROM 
  (
    SELECT DISTINCT
    install_time,
    customer_user_id,
    AppsFlyer_ID,
    media_source,
    partner,
    campaign,
    ln_loan_appln_time,
    'Install' Retargeting_Conversion_Type,
    FROM `appsflyer_raw.in_app_events_report` organic_in_apps_events
    JOIN input_customers ON organic_in_apps_events.customer_user_id = input_customers.customerid and DATE(organic_in_apps_events._partitiondate) < DATE(input_customers.ln_loan_appln_time)

    UNION ALL

    SELECT DISTINCT
    install_time,
    customer_user_id,
    AppsFlyer_ID,
    media_source,
    partner,
    campaign,
    ln_loan_appln_time,
    'Install' Retargeting_Conversion_Type
    FROM `appsflyer_raw.organic_in_app_events_report` organic_in_apps_events
    JOIN input_customers ON organic_in_apps_events.customer_user_id = input_customers.customerid and DATE(organic_in_apps_events._partitiondate) < DATE(input_customers.ln_loan_appln_time)


    UNION ALL

    SELECT DISTINCT
    install_time,
    customer_user_id,
    AppsFlyer_ID,
    media_source,
    partner,
    campaign,
    ln_loan_appln_time,
    Retargeting_Conversion_Type
    FROM `appsflyer_raw.in_app_events_retarget` organic_in_apps_events
    JOIN input_customers ON organic_in_apps_events.customer_user_id = input_customers.customerid and DATE(organic_in_apps_events._partitiondate) < DATE(input_customers.ln_loan_appln_time)
  ) a
  LEFT JOIN `prj-prod-dataplatform.worktable_datachampions.installs_attribution_mapping` b
  ON 
    COALESCE(a.media_source,a.partner,a.campaign) = COALESCE(b.media_source,b.partner,b.campaign)

  WHERE 1=1
 
  QUALIFY ROW_NUMBER() OVER (PARTITION BY a.customer_user_id,ln_loan_appln_time ORDER BY install_time DESC) = 1
  ORDER BY customer_user_id
 ),
final_output as (
SELECT DISTINCT
input_customers.customerId,
ln_loan_appln_time,
TIMESTAMP_DIFF(MIN(cust_mtb.created_dt),MIN(install_time),MINUTE) appsflyer_install_to_registration_minutes,
FROM input_customers
JOIN `dl_customers_db_raw.tdbk_customer_mtb` cust_mtb ON cust_mtb.cust_id = input_customers.customerId
LEFT JOIN af_link b ON b.customer_user_id = input_customers.customerId
group by 1,2
)
SELECT 
final_output.*,
CASE WHEN campaign_data.source_group = 'Organic' THEN 'Organic'
ELSE 'InOrganic' END AS channel_source_group,
source as marketing_source_name,
meng_no_of_logins,
meng_calculator_count,
meng_calculator_tot_visit_cnt,
from final_output
LEFT JOIN campaign_data ON campaign_data.customer_user_id = final_output.customerId and final_output.ln_loan_appln_time = campaign_data.ln_loan_appln_time
LEFT JOIN events c ON c.customer_id = final_output.customerId and final_output.ln_loan_appln_time = c.ln_loan_appln_time

"""


job = client.query(sq)
job.result()  # Wait for the job to complete.
time.sleep(5) # Delays for 30 seconds
print(f'Table {schema2}.{bscoresnapshotcustomerevent} created successfully')

Table worktable_data_analysis.b_score_applied_loans_customer_event_data created successfully


# Contactability

In [12]:
sq = f""" 
CREATE OR REPLACE TABLE
  {schema2}.{bscoresnapshotcontactability} AS
with input_customers as ( 
select * from {schema2}.{bscoresnapshotcustomerdata}),
cust_emails as (
  with temp_output as (
select too.email as to_email, emailTranscript.from.email as from_email,
 
CASE WHEN SPLIT(emailTranscript.from.email,'@')[SAFE_OFFSET(1)] like '%tonik%' THEN 'outbound'
WHEN SPLIT(too.email,'@')[SAFE_OFFSET(1)] like '%tonik%' THEN 'inbound'
ELSE 'outbound'
END AS category,
creationTime
from `genesys_raw.emails`,
unnest(emailTranscript) emailTranscript,unnest(emailTranscript.to) too
)
SELECT
*,
CASE WHEN category = 'outbound' THEN to_email
WHEN category = 'inbound'THEN from_email
WHEN from_email not like '%tonik%' THEN from_email
END AS customer_email_address,
FROM temp_output
),
cust_mobile_genesys_data as (
SELECT
input_customers.customerid,
input_customers.ln_loan_appln_time,
MAX(
    CASE
      WHEN DATE(call_history.callDatetime) between DATE_SUB(input_customers.ln_loan_appln_time, INTERVAL 90 DAY) and DATE(input_customers.ln_loan_appln_time) THEN 1
      ELSE 0
  END
    ) AS flag_contactable_last90D,
  count(
    CASE
      WHEN DATE(call_history.callDatetime) between DATE_SUB(input_customers.ln_loan_appln_time, INTERVAL 90 DAY) and DATE(input_customers.ln_loan_appln_time) THEN 1
      ELSE null
  END
    ) AS count_contactable_last90D,
  MAX(
  CASE
    WHEN DATE(call_history.callDatetime) between DATE_SUB(input_customers.ln_loan_appln_time, INTERVAL 90 DAY) and DATE(input_customers.ln_loan_appln_time)
  AND COALESCE(call_history.campaignName, 'NULL') IN ('UPSELL VERIFICATION CALLOUT',
    'UPSELL VERIFICATION_1st interval',
    'UPSELL VERIFICATION_2nd interval',
    'UPSELL VERIFICATION_3rd interval')
  AND genesysWrapupDisposition = 'UW - CLIENT ANSWERED'
  AND talkTime > 12 THEN 1
    ELSE 0
END
  ) AS flag_contactable_upsell_last90D,
  COUNT(
  CASE
    WHEN call_history.callDatetime between DATETIME_SUB(input_customers.ln_loan_appln_time, INTERVAL 90 DAY) and input_customers.ln_loan_appln_time
  AND COALESCE(call_history.campaignName, 'NULL') IN ('UPSELL VERIFICATION CALLOUT',
    'UPSELL VERIFICATION_1st interval',
    'UPSELL VERIFICATION_2nd interval',
    'UPSELL VERIFICATION_3rd interval')
  AND genesysWrapupDisposition = 'UW - CLIENT ANSWERED'
  AND talkTime > 12 THEN 1
    ELSE NULL
END
  ) AS count_contactable_upsell_last90D,
COUNT(CASE WHEN calldirection = 'outbound' and allcampaignName IN ('SIP_Reminder','Prod_Reminder','Special_Reminder','Tonik Agentless -Collection','Prod_Reminder_IVR','Agentless-Collection_UATtest_071122','COLLECTION REMINDER_B1B2','COLLECTION REMINDER_C1C2','Reminder_Voicebot_Calling_F','Reminder_Voicebot_Calling_M','Enhanced_Reminder_Voicebot_Calling_F','Enhanced_Reminder_Voicebot_Calling_M','COLLECTION_REMINDER (B1B2)','COLLECTION_REMINDER (A1A2)','COLLECTION_REMINDER (C1C2)','Special_Reminder Collection','Prod_Soft Collections (1-30DPD)', 'Special_Soft Collections (1-30DPD)', 'PROD_SOFT_Collection 1-30 DPD_1','PRODSPECIAL_B1_PROJECTNORM','BP_Soft Collection (1-30DPD)','Prod_Soft Collections (31-60DPD)', 'Special_Soft Collections (31-60DPD)', 'Prod_Soft Collections 31-60DPD','BP_Soft Collections (31-60DPD)','L3M No Payment_Mid Range Collections', 'L3M With Payment_Mid Range Collections', 'PTPr BPs_Mid Range Collections',
         'Prod_Mid Range Collections', 'Special_Mid Range Collections','BP_Mid Range Collections','PROD_MIDRANGE','PROD_SOFT_Collection 1-60DPD','PROD_SOFT_Collection 1-60 DPD','PROD_Ref Persons-Daily') THEN 1 ELSE null END) AS outbound_call_count_upsell_mkt,
COUNT(CASE WHEN calldirection = 'inbound' and allcampaignName IN ('SIP_Reminder','Prod_Reminder','Special_Reminder','Tonik Agentless -Collection','Prod_Reminder_IVR','Agentless-Collection_UATtest_071122','COLLECTION REMINDER_B1B2','COLLECTION REMINDER_C1C2','Reminder_Voicebot_Calling_F','Reminder_Voicebot_Calling_M','Enhanced_Reminder_Voicebot_Calling_F','Enhanced_Reminder_Voicebot_Calling_M','COLLECTION_REMINDER (B1B2)','COLLECTION_REMINDER (A1A2)','COLLECTION_REMINDER (C1C2)','Special_Reminder Collection','Prod_Soft Collections (1-30DPD)', 'Special_Soft Collections (1-30DPD)', 'PROD_SOFT_Collection 1-30 DPD_1','PRODSPECIAL_B1_PROJECTNORM','BP_Soft Collection (1-30DPD)','Prod_Soft Collections (31-60DPD)', 'Special_Soft Collections (31-60DPD)', 'Prod_Soft Collections 31-60DPD','BP_Soft Collections (31-60DPD)','L3M No Payment_Mid Range Collections', 'L3M With Payment_Mid Range Collections', 'PTPr BPs_Mid Range Collections',
         'Prod_Mid Range Collections', 'Special_Mid Range Collections','BP_Mid Range Collections','PROD_MIDRANGE','PROD_SOFT_Collection 1-60DPD','PROD_SOFT_Collection 1-60 DPD','PROD_Ref Persons-Daily')  THEN 1 ELSE null END) AS inbound_call_count_upsell_mkt,
COUNT(CASE WHEN calldirection = 'outbound' and COALESCE(allcampaignName,'#####') NOT IN ('SIP_Reminder','Prod_Reminder','Special_Reminder','Tonik Agentless -Collection','Prod_Reminder_IVR','Agentless-Collection_UATtest_071122','COLLECTION REMINDER_B1B2','COLLECTION REMINDER_C1C2','Reminder_Voicebot_Calling_F','Reminder_Voicebot_Calling_M','Enhanced_Reminder_Voicebot_Calling_F','Enhanced_Reminder_Voicebot_Calling_M','COLLECTION_REMINDER (B1B2)','COLLECTION_REMINDER (A1A2)','COLLECTION_REMINDER (C1C2)','Special_Reminder Collection','Prod_Soft Collections (1-30DPD)', 'Special_Soft Collections (1-30DPD)', 'PROD_SOFT_Collection 1-30 DPD_1','PRODSPECIAL_B1_PROJECTNORM','BP_Soft Collection (1-30DPD)','Prod_Soft Collections (31-60DPD)', 'Special_Soft Collections (31-60DPD)', 'Prod_Soft Collections 31-60DPD','BP_Soft Collections (31-60DPD)','L3M No Payment_Mid Range Collections', 'L3M With Payment_Mid Range Collections', 'PTPr BPs_Mid Range Collections',
         'Prod_Mid Range Collections', 'Special_Mid Range Collections','BP_Mid Range Collections','PROD_MIDRANGE','PROD_SOFT_Collection 1-60DPD','PROD_SOFT_Collection 1-60 DPD','PROD_Ref Persons-Daily') THEN 1 ELSE null END) AS outbound_call_count_others,
COUNT(CASE WHEN calldirection = 'inbound' and COALESCE(allcampaignName,'#####') NOT IN ('SIP_Reminder','Prod_Reminder','Special_Reminder','Tonik Agentless -Collection','Prod_Reminder_IVR','Agentless-Collection_UATtest_071122','COLLECTION REMINDER_B1B2','COLLECTION REMINDER_C1C2','Reminder_Voicebot_Calling_F','Reminder_Voicebot_Calling_M','Enhanced_Reminder_Voicebot_Calling_F','Enhanced_Reminder_Voicebot_Calling_M','COLLECTION_REMINDER (B1B2)','COLLECTION_REMINDER (A1A2)','COLLECTION_REMINDER (C1C2)','Special_Reminder Collection','Prod_Soft Collections (1-30DPD)', 'Special_Soft Collections (1-30DPD)', 'PROD_SOFT_Collection 1-30 DPD_1','PRODSPECIAL_B1_PROJECTNORM','BP_Soft Collection (1-30DPD)','Prod_Soft Collections (31-60DPD)', 'Special_Soft Collections (31-60DPD)', 'Prod_Soft Collections 31-60DPD','BP_Soft Collections (31-60DPD)','L3M No Payment_Mid Range Collections', 'L3M With Payment_Mid Range Collections', 'PTPr BPs_Mid Range Collections',
         'Prod_Mid Range Collections', 'Special_Mid Range Collections','BP_Mid Range Collections','PROD_MIDRANGE','PROD_SOFT_Collection 1-60DPD','PROD_SOFT_Collection 1-60 DPD','PROD_Ref Persons-Daily')  THEN 1 ELSE null END) AS inbound_call_count_others,

COUNT(CASE WHEN category = 'outbound' and COALESCE(allcampaignName,'#####') IN ('SIP_Reminder','Prod_Reminder','Special_Reminder','Tonik Agentless -Collection','Prod_Reminder_IVR','Agentless-Collection_UATtest_071122','COLLECTION REMINDER_B1B2','COLLECTION REMINDER_C1C2','Reminder_Voicebot_Calling_F','Reminder_Voicebot_Calling_M','Enhanced_Reminder_Voicebot_Calling_F','Enhanced_Reminder_Voicebot_Calling_M','COLLECTION_REMINDER (B1B2)','COLLECTION_REMINDER (A1A2)','COLLECTION_REMINDER (C1C2)','Special_Reminder Collection','Prod_Soft Collections (1-30DPD)', 'Special_Soft Collections (1-30DPD)', 'PROD_SOFT_Collection 1-30 DPD_1','PRODSPECIAL_B1_PROJECTNORM','BP_Soft Collection (1-30DPD)','Prod_Soft Collections (31-60DPD)', 'Special_Soft Collections (31-60DPD)', 'Prod_Soft Collections 31-60DPD','BP_Soft Collections (31-60DPD)','L3M No Payment_Mid Range Collections', 'L3M With Payment_Mid Range Collections', 'PTPr BPs_Mid Range Collections',
         'Prod_Mid Range Collections', 'Special_Mid Range Collections','BP_Mid Range Collections','PROD_MIDRANGE','PROD_SOFT_Collection 1-60DPD','PROD_SOFT_Collection 1-60 DPD','PROD_Ref Persons-Daily') THEN 1 ELSE null END) AS outbound_email_count_upsell_mkt,
COUNT(CASE WHEN category = 'inbound' and COALESCE(allcampaignName,'#####') IN ('SIP_Reminder','Prod_Reminder','Special_Reminder','Tonik Agentless -Collection','Prod_Reminder_IVR','Agentless-Collection_UATtest_071122','COLLECTION REMINDER_B1B2','COLLECTION REMINDER_C1C2','Reminder_Voicebot_Calling_F','Reminder_Voicebot_Calling_M','Enhanced_Reminder_Voicebot_Calling_F','Enhanced_Reminder_Voicebot_Calling_M','COLLECTION_REMINDER (B1B2)','COLLECTION_REMINDER (A1A2)','COLLECTION_REMINDER (C1C2)','Special_Reminder Collection','Prod_Soft Collections (1-30DPD)', 'Special_Soft Collections (1-30DPD)', 'PROD_SOFT_Collection 1-30 DPD_1','PRODSPECIAL_B1_PROJECTNORM','BP_Soft Collection (1-30DPD)','Prod_Soft Collections (31-60DPD)', 'Special_Soft Collections (31-60DPD)', 'Prod_Soft Collections 31-60DPD','BP_Soft Collections (31-60DPD)','L3M No Payment_Mid Range Collections', 'L3M With Payment_Mid Range Collections', 'PTPr BPs_Mid Range Collections',
         'Prod_Mid Range Collections', 'Special_Mid Range Collections','BP_Mid Range Collections','PROD_MIDRANGE','PROD_SOFT_Collection 1-60DPD','PROD_SOFT_Collection 1-60 DPD','PROD_Ref Persons-Daily')  THEN 1 ELSE null END) AS inbound_email_count_upsell_mkt,

COUNT(CASE WHEN category = 'outbound' and COALESCE(allcampaignName,'#####') NOT IN ('SIP_Reminder','Prod_Reminder','Special_Reminder','Tonik Agentless -Collection','Prod_Reminder_IVR','Agentless-Collection_UATtest_071122','COLLECTION REMINDER_B1B2','COLLECTION REMINDER_C1C2','Reminder_Voicebot_Calling_F','Reminder_Voicebot_Calling_M','Enhanced_Reminder_Voicebot_Calling_F','Enhanced_Reminder_Voicebot_Calling_M','COLLECTION_REMINDER (B1B2)','COLLECTION_REMINDER (A1A2)','COLLECTION_REMINDER (C1C2)','Special_Reminder Collection','Prod_Soft Collections (1-30DPD)', 'Special_Soft Collections (1-30DPD)', 'PROD_SOFT_Collection 1-30 DPD_1','PRODSPECIAL_B1_PROJECTNORM','BP_Soft Collection (1-30DPD)','Prod_Soft Collections (31-60DPD)', 'Special_Soft Collections (31-60DPD)', 'Prod_Soft Collections 31-60DPD','BP_Soft Collections (31-60DPD)','L3M No Payment_Mid Range Collections', 'L3M With Payment_Mid Range Collections', 'PTPr BPs_Mid Range Collections',
         'Prod_Mid Range Collections', 'Special_Mid Range Collections','BP_Mid Range Collections','PROD_MIDRANGE','PROD_SOFT_Collection 1-60DPD','PROD_SOFT_Collection 1-60 DPD','PROD_Ref Persons-Daily') THEN 1 ELSE null END) AS outbound_email_count_others,
COUNT(CASE WHEN category = 'inbound' and COALESCE(allcampaignName,'#####') NOT IN ('SIP_Reminder','Prod_Reminder','Special_Reminder','Tonik Agentless -Collection','Prod_Reminder_IVR','Agentless-Collection_UATtest_071122','COLLECTION REMINDER_B1B2','COLLECTION REMINDER_C1C2','Reminder_Voicebot_Calling_F','Reminder_Voicebot_Calling_M','Enhanced_Reminder_Voicebot_Calling_F','Enhanced_Reminder_Voicebot_Calling_M','COLLECTION_REMINDER (B1B2)','COLLECTION_REMINDER (A1A2)','COLLECTION_REMINDER (C1C2)','Special_Reminder Collection','Prod_Soft Collections (1-30DPD)', 'Special_Soft Collections (1-30DPD)', 'PROD_SOFT_Collection 1-30 DPD_1','PRODSPECIAL_B1_PROJECTNORM','BP_Soft Collection (1-30DPD)','Prod_Soft Collections (31-60DPD)', 'Special_Soft Collections (31-60DPD)', 'Prod_Soft Collections 31-60DPD','BP_Soft Collections (31-60DPD)','L3M No Payment_Mid Range Collections', 'L3M With Payment_Mid Range Collections', 'PTPr BPs_Mid Range Collections',
         'Prod_Mid Range Collections', 'Special_Mid Range Collections','BP_Mid Range Collections','PROD_MIDRANGE','PROD_SOFT_Collection 1-60DPD','PROD_SOFT_Collection 1-60 DPD','PROD_Ref Persons-Daily')  THEN 1 ELSE null END) AS inbound_email_count_others,
FROM input_customers
LEFT JOIN (SELECT DISTINCT cust_id,mobile_no, valid_from_dt FROM `datalake_worktables.customer_mobile_mail_dtls`
QUALIFY ROW_NUMBER() OVER (PARTITION BY cust_id,mobile_no order by valid_from_dt asc) = 1
) mobile_details on input_customers.customerid = mobile_details.cust_id and DATE(mobile_details.valid_from_dt) < DATE(ln_loan_appln_time)
LEFT JOIN (SELECT DISTINCT cust_id,email, valid_from_dt FROM `datalake_worktables.customer_mobile_mail_dtls`
QUALIFY ROW_NUMBER() OVER (PARTITION BY cust_id,email order by valid_from_dt asc) = 1
) email_details on input_customers.customerid = email_details.cust_id and DATE(email_details.valid_from_dt) < DATE(ln_loan_appln_time)
LEFT JOIN `risk_credit_mis.call_attempt_history_gensys` call_history ON RIGHT(mobile_details.mobile_no,10) = right(call_history.mobileNumber,10) and DATE(call_history.callDatetime) < DATE(input_customers.ln_loan_appln_time) and connected = 1 and mediaType = 'voice'
left JOIN cust_emails ON email_details.email = cust_emails.customer_email_address and DATE(cust_emails.creationTime) < DATE(input_customers.ln_loan_appln_time)
GROUP BY 1,2
)
SELECT
  input_customers.customerid,
  input_customers.ln_loan_appln_time,
 flag_contactable_last90D,
 count_contactable_last90D,
 count_contactable_upsell_last90D,
 flag_contactable_upsell_last90D,
inbound_call_count_upsell_mkt,
outbound_call_count_upsell_mkt,
inbound_call_count_others,
outbound_call_count_others,
outbound_email_count_upsell_mkt,
inbound_email_count_upsell_mkt,
outbound_email_count_others,
inbound_email_count_others


FROM input_customers
LEFT JOIN cust_mobile_genesys_data on cust_mobile_genesys_data.customerid = CAST(input_customers.customerid AS STRING) and cust_mobile_genesys_data.ln_loan_appln_time = input_customers.ln_loan_appln_time


"""


job = client.query(sq)
job.result()  # Wait for the job to complete.
time.sleep(5) # Delays for 30 seconds
print(f'Table {schema2}.{bscoresnapshotcontactability} created successfully')

Table worktable_data_analysis.b_score_applied_loans_contactability created successfully


# Combined

In [13]:
sq = f"""  
CREATE OR REPLACE TABLE `{schema2}.{bscoresnapshotcombineddata}` as
with cust_doc_data as (
SELECT 
custid,
case when docExpiryDate = "NA" then '9999-12-31'
          when docExpiryDate is null then '9999-12-31'
          else PARSE_DATE('%e %b %Y', docExpiryDate)
        end as docExpiryDate
FROM dl_loans_db_raw.tdbk_loan_customer_details
),
jira_data as (
SELECT customer_id,
ln_loan_appln_time,
count(*) as cnt_jira_tickets_created,
 from 
{schema2}.{bscoresnapshotcustomerdata} a 
JOIN `jira_raw.cc_tickets` b on a.customerid = b.customer_id and b.created_date < a.ln_loan_appln_time
group by 1,2
),
cust_email as (
SELECT cust_id,email as ln_email, valid_from_dt,ln_loan_appln_time FROM `datalake_worktables.customer_mobile_mail_dtls` email_details 
join {schema2}.{bscoresnapshotcustomerdata} input_customers on input_customers.customerid = email_details.cust_id and DATETIME(email_details.valid_from_dt) < DATETIME(ln_loan_appln_time)
QUALIFY ROW_NUMBER() OVER(PARTITION BY cust_id,ln_loan_appln_time order by valid_from_dt desc) = 1
)

SELECT 
acc.customer_id,
dob as birth_date,
loanaccountnumber,
onb_tsa_onboarding_datetime,
acc.digitalloanaccountid as digitalLoanAccountId,
onboarding_date,
tx_first_product,
tx_first_product_user_segment,
credo_inquiry_date,
cust_status_flag,
cust_status_close_date,
days_since_credo_call_onb,
days_since_credo_call_loan_application,
ln_prod_type ln_product_type,
--ln_repeat_loan_type,
acc.ln_loan_appln_time,
ln_disb_dtime ln_loan_disb_time,
ln_user_type,
ln_loan_type,
onb_doc_type,
ln_osversion,
onb_kyc_status,
onb_email_verified_flag,
onb_place_of_birth,
onb_country,
onb_province,
onb_city,
onb_barangay,
onb_postalcode,
onb_latitude,
onb_longitude,
onb_osversion,
onb_first_name,
onb_middle_name,
onb_last_name,
onb_age,
onb_gender,
onb_mobile_no,
onb_email,
COALESCE(ln_email) ln_email,
onb_self_dec_income,
onb_company_name,
onb_kyc_status_upgrade_datetime,
dob_observation_date,
ln_brand,
ln_cnt_dependents,
ln_source_funds_new,
ln_employment_type_new,
ln_industry_new,
ln_company_name,
ln_salary_scaled_income,
ln_self_dec_income,
ln_marital_status,
ln_education_level,
ln_nature_of_work_new,
ln_vas_opted_flag,
ln_age,
ln_mobile_no,
ln_alt_mobile_no,
ln_province,
ln_city,
ln_barangay,
ln_latitude,
ln_longitude,
ln_doc_type,
ln_ref1_type,
ln_ref2_type,
ln_loan_applied_flag,
ln_facta_flag,
ln_dl_rule_reject_flag,
ln_taran_rule_reject_flag,
ln_taran_scorecard_reject_flag,
ln_cdd_reject_flag,
ln_marked_underwriter_check_flag,
ln_underwriting_reject_flag,
ln_vas_used_flag,
ln_os_type,
ln_address,
ln_postal_code,
ln_doc_number,
ln_source_funds,
ln_employment_type,
ln_nature_of_work,
ln_industry,
ln_mature_fspd30_flag,
ln_fspd30_flag,
ln_mature_fpd30_flag,
ln_fpd30_flag,
first_applied_loan_appln_time,
first_applied_loan_type,
first_applied_product_type,
first_applied_loan_amount,
first_applied_loan_tenor,
first_disb_loan_appln_time,
first_disb_loan_disb_time,
first_disb_loan_type,
first_disb_product_type,
first_disb_loan_amount,
first_disb_loan_tenor,
last_disb_loan_appln_time,
last_disb_loan_disb_time,
last_disb_loan_type,
last_disb_product_type,
last_disb_loan_amount,
last_disb_loan_tenor,
last_disb_crif_id,
last_applied_loan_appln_time,
last_applied_loan_decision,
last_applied_loan_type,
last_applied_product_type,
last_applied_loan_amount,
last_applied_loan_tenor,
last_applied_os_type,
last_applied_crif_id last_applied_digitalloanaccountId,
last_applied_cic_called_flag,
last_applied_cic_hit_flag,
last_applied_crif_id,
last_applied_credo_ref_no,
last_applied_credo_score,
onb_year,
onb_month_of_year,
onb_week_of_month,
onb_day_of_month,
onb_time_of_day,
tx_cnt_cash_in_total,
tx_cnt_cash_in_ob2t,
tx_cnt_cash_in_ot2t,
tx_amt_cash_in_total,
tx_amt_cash_in_ob2t,
tx_amt_cash_in_ot2t,
tx_cnt_cash_out_total,
tx_cnt_cash_out_billpay,
tx_cnt_cash_out_cards,
tx_cnt_cash_out_t2ot,
tx_cnt_cash_out_t2ob,
tx_amt_cash_out_total,
tx_amt_cash_out_billpay,
tx_amt_cash_out_cards,
tx_amt_cash_out_t2ot,
tx_amt_cash_out_t2ob,
tx_deposit_accnt_cnt,
tx_stash_accnt_opened_cnt,
tx_stash_accnt_closed_cnt,
tx_stash_balance,
tx_td_accnt_opened_cnt,
tx_td_accnt_completed_cnt,
tx_td_accnt_broken_cnt,
tx_td_auto_roll_over_enabled,
tx_td_balance,
tx_td_max_duration,
tx_td_min_duration,
tx_td_avg_duration,
tx_stash_max_duration,
tx_stash_avg_duration,
tx_stash_min_duration,
tx_med_days_bw_td_tsa_acct_open,
tx_med_days_bw_new_dep_acct_open,
tx_med_days_bw_td_acct_open,
tx_avg_days_bt_trans,
tx_avg_days_bt_cash_in_trans,
tx_avg_days_bt_cash_out_trans,
tx_med_days_bt_trans,
tx_med_days_bt_cash_in_trans,
tx_med_days_bt_cash_out_trans,
tx_cnt_applied_loan,
tx_cnt_rejected_loans,
tx_cnt_approved_loans,
tx_cnt_disbursed_loans,
tx_cnt_completed_loans,
tx_cnt_active_loans,
tx_cnt_incomplete_loan_apps,
tx_cnt_installments_paid_tot,
tx_amt_installments_paid_tot,
tx_cnt_installments_paid_last_disb_withdpd,
tx_cnt_installments_paid_tot_with_dpd,
tx_amt_installments_paid_tot_with_dpd,
tx_amount_tot_due tx_loan_amount_tot_due,
tx_cnt_installments_paid_last_disb,
tx_amt_installments_paid_last_disb,
tx_min_age_completed_loans,
tx_max_age_completed_loans,
tx_avg_age_completed_loans,
tx_cnt_dpd_gt_1_ever,
tx_cnt_fpd10_ever,
tx_cnt_fpd30_ever,
tx_cnt_fspd30_ever,
tx_cnt_dpd_gt_5_ever,
tx_max_ever_dpd,
tx_max_dpd_30d,
tx_max_dpd_60d,
tx_max_dpd_120d,
tx_max_dpd_150d,
tx_max_dpd_180d,
tx_max_current_dpd,
CASE WHEN tdbk_referral_code_mtb.cust_id is not null THEN 1
ELSE 0 END AS onb_referral_flag,
in_fraud_blacklist as ln_fraud_blacklist_flag,
in_negative_location ln_negative_location_flag,
flag_contactable_last90D cs_contactable_last_90d_flag,
count_contactable_last90D cs_contactable_last_90d_cnt,
flag_contactable_upsell_last90D cs_contactable_last_90d_upsell_flag,
count_contactable_upsell_last90D cs_contactable_last_90d_upsell_cnt,
cnt_jira_tickets_created,
CASE WHEN cust_doc_data.docExpiryDate <> '9999-12-31' AND DATE_DIFF(Date(cust_doc_data.docExpiryDate), DATE(acc.ln_loan_appln_time) , DAY) >= 75 THEN 1
ELSE 0 END onb_valid_documents_flag,
ln_any_prev_disb_loan_sil_mobile_flag,
CASE WHEN appsflyer_install_to_registration_minutes < 0 THEN NULL
ELSE appsflyer_install_to_registration_minutes END appsflyer_install_to_registration_minutes,
meng_no_of_logins,
meng_calculator_count,
meng_calculator_tot_visit_cnt,
channel_source_group,
marketing_source_name,
outbound_call_count_upsell_mkt cs_cnt_outbound_calls_upsell_mkt,
inbound_call_count_upsell_mkt cs_cnt_inbound_calls_upsell_mkt,
outbound_email_count_upsell_mkt cs_cnt_outbound_emails_upsell_mkt,
inbound_email_count_upsell_mkt cs_cnt_inbound_emails_upsell_mkt,
outbound_call_count_others cs_cnt_outbound_calls_others,
inbound_call_count_others cs_cnt_inbound_calls_others,
outbound_email_count_others cs_cnt_outbound_emails_others,
inbound_email_count_others cs_cnt_inbound_emails_others,
FROM (SELECT customerid,ln_loan_appln_time,onb_tsa_onboarding_datetime from worktable_data_analysis.b_score_applied_loans_customer_information_data) input 
JOIN {schema2}.{bscoresnapshotcustomertransactiondata} acc ON input.customerid = acc.customer_id and input.ln_loan_appln_time = acc.ln_loan_appln_time
LEFT JOIN {schema2}.{bscoresnapshotcustomerevent} events on events.customerid = acc.customer_id and events.ln_loan_appln_time = acc.ln_loan_appln_time
LEFT JOIN (SELECT cust_id,member_type,referral_type FROM dl_customers_db_raw.tdbk_referral_code_mtb
WHERE tdbk_referral_code_mtb.member_type='REFEREE') tdbk_referral_code_mtb on  tdbk_referral_code_mtb.cust_id = acc.customer_id
LEFT JOIN worktable_data_analysis.fraud_blacklist_clean fraud_blacklist_clean ON cast(fraud_blacklist_clean.customerid as string) = acc.customer_id
LEFT JOIN worktable_data_analysis.reloan_customers_from_negative_locations negative_location ON cast(negative_location.customerid as string)= acc.customer_id
LEFT JOIN {schema2}.{bscoresnapshotcontactability} contactability ON cast(contactability.customerid as string)= acc.customer_id and contactability.ln_loan_appln_time = acc.ln_loan_appln_time
LEFT JOIN cust_doc_data ON cust_doc_data.custid = acc.customer_id 
LEFT JOIN jira_data on jira_data.customer_id = acc.customer_id and jira_data.ln_loan_appln_time = acc.ln_loan_appln_time
LEFT JOIN `dl_customers_db_raw.tdbk_customer_mtb` cust on cust.cust_id = acc.customer_id
--left join (SELECT customer_id, birth_date,ln_loan_appln_time from `worktable_data_analysis.b_score_model_combined_data_20250626`) backup on backup.customer_id = acc.customer_id and backup.ln_loan_appln_time = acc.ln_loan_appln_time
LEFT JOIN cust_email ON cust_email.cust_id = acc.customer_id and cust_email.ln_loan_appln_time = acc.ln_loan_appln_time
;
"""

job = client.query(sq)
job.result()  # Wait for the job to complete.
time.sleep(5) # Delays for 30 seconds
print(f'Table {schema2}.{bscoresnapshotcombineddata} created successfully')

Table worktable_data_analysis.b_score_model_applied_loans_20250930 created successfully
