In this notebook I will create a training table equivalent to ml_model_run_details

# Define Library

In [1]:
# %% [markdown]
# # Jupyter Notebook Loading Header
#
# This is a custom loading header for Jupyter Notebooks in Visual Studio Code.
# It includes common imports and settings to get you started quickly.
# %% [markdown]
## Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.cloud import bigquery
from google.cloud import storage
import os
import tempfile
import time
from datetime import datetime
import uuid
import joblib
import uuid

import gcsfs
import duckdb as dd
import pickle
import joblib
from typing import Union
import io
path = r'C:\Users\Dwaipayan\AppData\Roaming\gcloud\legacy_credentials\dchakroborti@tonikbank.com\adc.json'
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = path
client = bigquery.Client(project='prj-prod-dataplatform')
os.environ["GOOGLE_CLOUD_PROJECT"] = "prj-prod-dataplatform"

# %% [markdown]
## Configure Settings
# Set options or configurations as needed
pd.set_option('display.max_columns', None)
pd.set_option("Display.max_rows", 100)

### Function

#### expand_calc_features

In [2]:
import pandas as pd
import json

def expand_calc_features(df):
    """
    Expand the calcFeatures JSON column into separate columns and return the complete DataFrame.

    Parameters:
    df (pd.DataFrame): Input DataFrame with calcFeatures column containing JSON data

    Returns:
    pd.DataFrame: Expanded DataFrame with all original columns plus JSON features as separate columns
    """

    # Make a copy to avoid modifying the original DataFrame
    df_expanded = df.copy()

    # Parse the calcFeatures JSON column
    calc_features_list = []

    for idx, calc_features_str in enumerate(df['calcFeatures']):
        try:
            # Parse the JSON string
            features_dict = json.loads(calc_features_str.replace("'", '"'))  # Replace single quotes with double quotes for valid JSON
            calc_features_list.append(features_dict)
        except (json.JSONDecodeError, AttributeError) as e:
            # If parsing fails, create an empty dict and print warning
            print(f"Warning: Could not parse calcFeatures at index {idx}: {e}")
            calc_features_list.append({})

    # Create DataFrame from the parsed JSON data
    calc_features_df = pd.DataFrame(calc_features_list)

    # Add prefix to JSON-derived columns to avoid conflicts
    calc_features_df = calc_features_df.add_prefix('calc_')

    # Reset index to ensure proper alignment
    df_expanded = df_expanded.reset_index(drop=True)
    calc_features_df = calc_features_df.reset_index(drop=True)

    # Combine original DataFrame with expanded calcFeatures
    result_df = pd.concat([df_expanded, calc_features_df], axis=1)

    return result_df


#### expand_calc_features_robust

In [3]:
import pandas as pd
import json

def expand_calc_features_robust(df):
    """
    Expand the calcFeatures JSON column into separate columns with better error handling.

    Parameters:
    df (pd.DataFrame): Input DataFrame with calcFeatures column containing JSON data

    Returns:
    pd.DataFrame: Expanded DataFrame with all original columns plus JSON features as separate columns
    """

    # Make a copy to avoid modifying the original DataFrame
    df_expanded = df.copy()

    # Parse the calcFeatures JSON column
    calc_features_data = []

    for idx, row in df.iterrows():
        calc_features_str = row['calcFeatures']

        if pd.isna(calc_features_str) or calc_features_str == '':
            calc_features_data.append({})
            continue

        try:
            # Clean the string and parse JSON
            cleaned_str = calc_features_str.replace("'", '"').replace('None', 'null').replace('True', 'true').replace('False', 'false')
            features_dict = json.loads(cleaned_str)
            calc_features_data.append(features_dict)
        except Exception as e:
            print(f"Warning: Could not parse calcFeatures at index {idx}: {e}")
            print(f"Problematic string: {calc_features_str[:100]}...")  # Print first 100 chars
            calc_features_data.append({})

    # Create DataFrame from the parsed JSON data
    calc_features_df = pd.DataFrame(calc_features_data)

    # Add prefix to JSON-derived columns to avoid conflicts with existing columns
    calc_features_df = calc_features_df.add_prefix('feat_')

    # Combine DataFrames
    result_df = pd.concat([df_expanded, calc_features_df], axis=1)

    print(f"Original DataFrame shape: {df.shape}")
    print(f"Expanded DataFrame shape: {result_df.shape}")
    print(f"Added {len(calc_features_df.columns)} new columns from calcFeatures")

    return result_df

#### transform_data

In [4]:
# import pandas as pd
# import json
# import uuid
# from datetime import datetime
# from typing import List

# def transform_data(d1: pd.DataFrame, feature_column: List[str], a='demo_score', modelDisplayName = 'Cash_beta_trench1_Demo_backscore', subscription_name = 'sil_march 25 models'):
#     # Read the input CSV file
#     df = d1.copy()
    
#     # Create the output DataFrame with the required structure
#     output_data = []
    
#     for _, row in df.iterrows():
#         # Create the calcFeature JSON with all the feature columns
#         feature_columns = feature_column
        
#         calc_feature = {}
#         for col in feature_columns:
#             if col in row and pd.notna(row[col]):
#                 # Convert Timestamp objects to string
#                 if isinstance(row[col], pd.Timestamp):
#                     calc_feature[col] = row[col].isoformat()
#                 else:
#                     calc_feature[col] = row[col]
        
       
#         # Get current timestamp
#         current_time = datetime.now().isoformat()
        
#         # Create the output row
#         output_row = {
#             "customerId": row['customer_id'],
#             "digitalLoanAccountId": row['digitalLoanAccountId'],
#             "crifApplicationId": str(uuid.uuid4()),  # Generate random UUID
#             "prediction": row.get(a, 0),
#             "start_time": current_time,
#             "end_time": current_time,
#             "modelDisplayName":modelDisplayName,
#             "modelVersionId":"v1",
#             "subscription_name": subscription_name,
#             "message_id": str(uuid.uuid4()),  # Generate random UUID
#             "publish_time": current_time,
#             "attributes": "{}",  # Empty JSON object
#             "calcFeature": json.dumps(calc_feature, default=str)  # Use default=str to handle non-serializable objects
            
#         }
        
#         output_data.append(output_row)
    
#     # Create DataFrame from the output data
#     output_df = pd.DataFrame(output_data)
    
#     return output_df

# # Example usage:
# # transformeddata = 'cash_beta_trench1_applied_loans_backscored_20241001_20250831'
# # transform_data(f'{LOCALPATH}/{transformeddata}.csv')

## Transform data v1

In [5]:
import pandas as pd
import json
import uuid
from datetime import datetime
from typing import List

def transform_data(
    d1: pd.DataFrame, 
    feature_column: List[str], 
    a: str = 'demo_score', 
    modelDisplayName: str = 'Cash_beta_trench1_Demo_backscore', 
    tc: str = "", 
    subscription_name: str = 'sil_march 25 models'
) -> pd.DataFrame:
    """
    Transforms input data into a structured format suitable for model scoring output.

    Parameters:
    - d1 (pd.DataFrame): Input DataFrame containing raw data.
    - feature_column (List[str]): List of column names to include in the 'calcFeature' JSON.
    - a (str): Column name containing the prediction score. Default is 'demo_score'.
    - modelDisplayName (str): Name of the model used for scoring.
    - tc (str): Trench category (optional).
    - do (str): Device operating system. Default is 'android'.
    - subscription_name (str): Name of the subscription or model group.

    Returns:
    - pd.DataFrame: Transformed DataFrame with structured output.
    """

    # Make a copy of the input DataFrame to avoid modifying the original
    df = d1.copy()
    
    # Initialize an empty list to store transformed rows
    output_data = []
    
    # Iterate over each row in the DataFrame
    for _, row in df.iterrows():
        # Initialize dictionary to hold feature values
        calc_feature = {}
        
        # Loop through each feature column and extract its value from the row
        for col in feature_column:
            if col in row and pd.notna(row[col]):
                # Convert datetime values to ISO format strings
                if isinstance(row[col], pd.Timestamp):
                    calc_feature[col] = row[col].isoformat()
                else:
                    calc_feature[col] = row[col]
        
        # Get the current timestamp for start_time, end_time, and publish_time
        current_time = datetime.now().isoformat()
        
        # Construct the output row dictionary with required fields
        output_row = {
            "customerId": row['customer_id'],  # Unique customer identifier
            "digitalLoanAccountId": row['digitalLoanAccountId'],  # Loan account ID
            "crifApplicationId": str(uuid.uuid4()),  # Random UUID for application ID
            "prediction": row.get(a, 0),  # Prediction score from specified column
            "start_time": current_time,  # Timestamp when processing starts
            "end_time": current_time,    # Timestamp when processing ends
            "modelDisplayName": modelDisplayName,  # Name of the model used
            "modelVersionId": "v1",  # Static model version
            "calcFeature": json.dumps(calc_feature, default=str),  # Features as JSON string
            "subscription_name": subscription_name,  # Subscription name
            "message_id": str(uuid.uuid4()),  # Random UUID for message ID
            "publish_time": current_time,  # Timestamp when message is published
            "attributes": "{}",  # Placeholder for additional attributes
            "trenchCategory": tc,  # Optional trench category
            "deviceOs": row['osType'],
            "Data_selection": row['Data_selection'],  # Data selection
            "Application_date": row['application_date'],
        }
        
        # Append the transformed row to the output list
        output_data.append(output_row)
    
    # Convert the list of dictionaries to a DataFrame
    output_df = pd.DataFrame(output_data)
    
    # Return the transformed DataFrame
    return output_df


# transform_datav2

In [6]:
import pandas as pd
import json
import uuid
from datetime import datetime
from typing import List

def transform_datav2(
    d1: pd.DataFrame, 
    feature_column: List[str], 
    a: str = 'demo_score', 
    modelDisplayName: str = 'Cash_beta_trench1_Demo_backscore', 
    tc: str = "", 
    subscription_name: str = 'sil_march 25 models'
) -> pd.DataFrame:
    """
    Transforms input data into a structured format suitable for model scoring output.

    Parameters:
    - d1 (pd.DataFrame): Input DataFrame containing raw data.
    - feature_column (List[str]): List of column names to include in the 'calcFeature' JSON.
    - a (str): Column name containing the prediction score. Default is 'demo_score'.
    - modelDisplayName (str): Name of the model used for scoring.
    - tc (str): Trench category (optional).
    - do (str): Device operating system. Default is 'android'.
    - subscription_name (str): Name of the subscription or model group.

    Returns:
    - pd.DataFrame: Transformed DataFrame with structured output.
    """

    # Make a copy of the input DataFrame to avoid modifying the original
    df = d1.copy()
    
    # Initialize an empty list to store transformed rows
    output_data = []
    
    # Iterate over each row in the DataFrame
    for _, row in df.iterrows():
        # Initialize dictionary to hold feature values
        calc_feature = {}
        
        # Loop through each feature column and extract its value from the row
        for col in feature_column:
            if col in row and pd.notna(row[col]):
                # Convert datetime values to ISO format strings
                if isinstance(row[col], pd.Timestamp):
                    calc_feature[col] = row[col].isoformat()
                else:
                    calc_feature[col] = row[col]
        
        # Get the current timestamp for start_time, end_time, and publish_time
        current_time = datetime.now().isoformat()
        
        # Construct the output row dictionary with required fields
        output_row = {
            "customerId": row['customer_id'],  # Unique customer identifier
            "digitalLoanAccountId": row['digitalLoanAccountId'],  # Loan account ID
            "crifApplicationId": str(uuid.uuid4()),  # Random UUID for application ID
            "prediction": row.get(a, 0),  # Prediction score from specified column
            "start_time": current_time,  # Timestamp when processing starts
            "end_time": current_time,    # Timestamp when processing ends
            "modelDisplayName": modelDisplayName,  # Name of the model used
            "modelVersionId": "v2",  # Static model version
            "calcFeature": json.dumps(calc_feature, default=str),  # Features as JSON string
            "subscription_name": subscription_name,  # Subscription name
            "message_id": str(uuid.uuid4()),  # Random UUID for message ID
            "publish_time": current_time,  # Timestamp when message is published
            "attributes": "{}",  # Placeholder for additional attributes
            "trenchCategory": tc,  # Optional trench category
            "deviceOs": row['osType'],  # Device operating system
            "Data_selection": row['Data_selection'],  # Data selection
            "Application_date": row['application_date'],
        }
        
        # Append the transformed row to the output list
        output_data.append(output_row)
    
    # Convert the list of dictionaries to a DataFrame
    output_df = pd.DataFrame(output_data)
    
    # Return the transformed DataFrame
    return output_df


#### PSI Functions new

In [7]:
## Updated on 27-10-2025 - Modified for Training Period Baseline
import pandas as pd
import numpy as np
from typing import List, Dict, Tuple
import warnings
warnings.filterwarnings('ignore')

def identify_feature_types(df: pd.DataFrame, feature_list: List[str]) -> Dict[str, List[str]]:
    """
    Identify categorical and numerical features from the feature list.

    Parameters:
    -----------
    df : pd.DataFrame
        Input dataframe
    feature_list : List[str]
        List of features to classify

    Returns:
    --------
    Dict with 'categorical' and 'numerical' keys containing respective feature lists
    """
    categorical_features = []
    numerical_features = []

    for feature in feature_list:
        if feature not in df.columns:
            print(f"Warning: Feature '{feature}' not found in dataframe")
            continue

        # Check if feature is numeric
        if pd.api.types.is_numeric_dtype(df[feature]):
            # If unique values are less than 15 and all integers, treat as categorical
            unique_vals = df[feature].nunique()
            if unique_vals < 15 and df[feature].dropna().apply(lambda x: x == int(x) if isinstance(x, (int, float)) else False).all():
                categorical_features.append(feature)
            else:
                numerical_features.append(feature)
        else:
            categorical_features.append(feature)

    return {
        'categorical': categorical_features,
        'numerical': numerical_features
    }


def create_bins_for_features(df: pd.DataFrame,
                             numerical_features: List[str],
                             categorical_features: List[str],
                             train_period_df: pd.DataFrame) -> Dict:
    """
    Create bins for numerical features (deciles with fallback) and categorical features (top 6 + others)
    based on the entire training period data.

    Parameters:
    -----------
    df : pd.DataFrame
        Full input dataframe
    numerical_features : List[str]
        List of numerical features
    categorical_features : List[str]
        List of categorical features
    train_period_df : pd.DataFrame
        Training period dataframe (June 2024 to March 2025)

    Returns:
    --------
    Dictionary containing binning information for each feature
    """
    binning_info = {}

    # Create bins for numerical features with fallback strategy
    for feature in numerical_features:
        valid_data = train_period_df[feature].dropna()

        if len(valid_data) == 0:
            binning_info[feature] = {'type': 'numerical', 'bins': None, 'bin_ranges': {}}
            continue

        bins = None
        bin_count = None

        # Try 10 bins (deciles)
        try:
            test_bins = np.percentile(valid_data, np.arange(0, 101, 10))
            test_bins = np.unique(test_bins)
            if len(test_bins) >= 11:  # 11 edges = 10 bins
                bins = test_bins
                bin_count = 10
        except Exception as e:
            pass

        # If 10 bins not possible, try 5 bins
        if bins is None:
            try:
                test_bins = np.percentile(valid_data, np.arange(0, 101, 20))
                test_bins = np.unique(test_bins)
                if len(test_bins) >= 6:  # 6 edges = 5 bins
                    bins = test_bins
                    bin_count = 5
            except Exception as e:
                pass

        # If 5 bins not possible, try 3 bins
        if bins is None:
            try:
                test_bins = np.percentile(valid_data, [0, 33.33, 66.67, 100])
                test_bins = np.unique(test_bins)
                if len(test_bins) >= 4:  # 4 edges = 3 bins
                    bins = test_bins
                    bin_count = 3
            except Exception as e:
                pass

        # If still no bins possible, use equal distance bins of 5
        if bins is None:
            print(f"Warning: Feature '{feature}' has insufficient variance - cannot create standard bins")
            print(f"Feature '{feature}': Using equal distance bins of 5")

            min_val = valid_data.min()
            max_val = valid_data.max()

            # Create 5 equal distance bins
            bins = np.linspace(min_val, max_val, 6)  # 6 edges = 5 bins
            bins = np.unique(bins)
            bin_count = len(bins) - 1

            # If all values are the same, add slight buffer
            if bin_count == 1:
                bins = np.array([min_val - 0.1, min_val, min_val + 0.1])
                bin_count = 2
                print(f"Feature '{feature}': Constant value ({min_val}). Created 2 equal distance bins with buffer")

        # Add infinity edges to capture all values
        bins = bins.copy()
        bins[0] = -np.inf
        bins[-1] = np.inf

        print(f"Feature '{feature}': Created {bin_count} bins")

        # Create bin ranges dictionary
        bin_ranges = {}
        for i in range(len(bins)-1):
            bin_name = f"Bin_{i+1}"
            bin_ranges[bin_name] = {
                'min': bins[i],
                'max': bins[i+1],
                'range_str': f"[{bins[i]:.2f}, {bins[i+1]:.2f}]" if not np.isinf(bins[i]) and not np.isinf(bins[i+1]) else f"({bins[i]}, {bins[i+1]})"
            }

        binning_info[feature] = {
            'type': 'numerical',
            'bins': bins,
            'bin_ranges': bin_ranges,
            'bin_count': bin_count
        }

    # Create bins for categorical features (top 6 + others) using training period
    for feature in categorical_features:
        value_counts = train_period_df[feature].value_counts()
        unique_categories = value_counts.index.tolist()
        print(f"Unique categories: {unique_categories}")

        if len(unique_categories) <= 6:
            # Treat each category as a separate bin
            top_categories = unique_categories
        else:
            # Use top 6 categories only
            top_categories = value_counts.nlargest(6).index.tolist()

        print(f"Top categories for feature '{feature}': {top_categories}")

        binning_info[feature] = {
                'type': 'categorical',
                'top_categories': top_categories,
                'bin_ranges': {}  # No ranges for categorical
            }

    return binning_info


def apply_binning(df: pd.DataFrame,
                  feature: str,
                  binning_info: Dict) -> pd.Series:
    """
    Apply binning to a feature based on binning information.

    Parameters:
    -----------
    df : pd.DataFrame
        Input dataframe
    feature : str
        Feature name
    binning_info : Dict
        Binning information for the feature

    Returns:
    --------
    pd.Series with binned values
    """
    if binning_info['type'] == 'numerical':
        if binning_info['bins'] is None:
            return pd.Series(['Missing'] * len(df), index=df.index)

        bins = binning_info['bins']
        labels = [f"Bin_{i+1}" for i in range(len(bins)-1)]

        binned = pd.cut(df[feature],
                       bins=bins,
                       labels=labels,
                       include_lowest=True,
                       duplicates='drop')

        # Handle nulls - convert to string and then replace
        binned = binned.astype(str)
        binned[df[feature].isna()] = 'Missing'

        return binned

    else:  # categorical
        top_cats = binning_info['top_categories']

        # Convert to string for consistent comparison
        if pd.api.types.is_categorical_dtype(df[feature]):
            feature_data = df[feature].astype(str)
        else:
            feature_data = df[feature].astype(str)

        # Replace NaN string representation with 'Missing'
        feature_data = feature_data.replace('nan', 'Missing')

        # Convert top_cats to strings for comparison
        top_cats_str = [str(cat) for cat in top_cats]

        # Apply binning logic: use category name if in top_cats, else 'Others' (except for Missing)
        binned = feature_data.apply(lambda x: x if x in top_cats_str else ('Others' if x != 'Missing' else 'Missing'))

        return binned


def calculate_psi(expected_pct: pd.Series,
                  actual_pct: pd.Series,
                  epsilon: float = 0.0001) -> float:
    """
    Calculate Population Stability Index with proper epsilon handling and renormalization.

    Parameters:
    -----------
    expected_pct : pd.Series
        Expected (baseline) percentages
    actual_pct : pd.Series
        Actual percentages
    epsilon : float
        Small value to avoid log(0)

    Returns:
    --------
    PSI value
    """
    # Align indices
    all_bins = expected_pct.index.union(actual_pct.index)
    expected_pct = expected_pct.reindex(all_bins, fill_value=0)
    actual_pct = actual_pct.reindex(all_bins, fill_value=0)

    # Only add epsilon where values are zero
    expected_pct = expected_pct.apply(lambda x: epsilon if x == 0 else x)
    actual_pct = actual_pct.apply(lambda x: epsilon if x == 0 else x)

    # Renormalize to ensure they sum to 1 after adding epsilon
    expected_pct = expected_pct / expected_pct.sum()
    actual_pct = actual_pct / actual_pct.sum()

    # Calculate PSI
    psi_value = np.sum((actual_pct - expected_pct) * np.log(actual_pct / expected_pct))

    return psi_value


def calculate_month_on_month_psi(df: pd.DataFrame,
                                 feature_list: List[str],
                                 segment_columns: List[str],
                                 month_col: str = 'Application_month',
                                 data_selection_col: str = 'Data_selection',
                                 account_id_col: str = 'digitalLoanAccountId') -> pd.DataFrame:
    """
    Calculate PSI for each feature comparing training period (June 2024 to March 2025)
    vs each month after March 2025, overall and by segments.

    Parameters:
    -----------
    df : pd.DataFrame
        Input dataframe
    feature_list : List[str]
        List of features to calculate PSI for
    segment_columns : List[str]
        List of segment columns
    month_col : str
        Name of month column
    data_selection_col : str
        Name of data selection column (identifies train period)
    account_id_col : str
        Name of account ID column for counting distinct accounts

    Returns:
    --------
    pd.DataFrame with PSI values with one row per feature-month-segment combination
    """
    # Create a copy to avoid modifying original
    df = df.copy()

    # Identify training and test periods
    train_df = df[df[data_selection_col] == 'Train'].copy()
    test_df = df[df[data_selection_col] != 'Train'].copy()

    if len(train_df) == 0:
        raise ValueError("No training data found. Check Data_selection column.")

    print(f"Training period: {train_df[month_col].min()} to {train_df[month_col].max()}")
    print(f"Test period: {test_df[month_col].min()} to {test_df[month_col].max()}")

    # Identify feature types
    feature_types = identify_feature_types(df, feature_list)

    # Create binning strategy based on training period
    binning_info = create_bins_for_features(
        df,
        feature_types['numerical'],
        feature_types['categorical'],
        train_df
    )

    # Get sorted test months
    test_months = sorted(test_df[month_col].unique())

    results = []

    # Calculate overall PSI
    for feature in feature_list:
        if feature not in df.columns:
            continue

        # Apply binning to entire dataset
        df[f'{feature}_binned'] = apply_binning(df, feature, binning_info[feature])
        # print(f"Feature binned {df[f'{feature}_binned']}")
        # Get training period distribution (baseline)
        train_baseline = df[df[data_selection_col] == 'Train'][f'{feature}_binned'].value_counts(normalize=True)

        # Calculate PSI for each test month
        for month in test_months:
            actual_dist = df[df[month_col] == month][f'{feature}_binned'].value_counts(normalize=True)
            psi_value = calculate_psi(train_baseline, actual_dist)

            # Calculate average percentages across all bins
            expected_avg_pct = train_baseline.mean() * 100
            actual_avg_pct = actual_dist.mean() * 100

            # # Count distinct accounts for segment
            # base_segment_count = train_segment[account_id_col].nunique()
            # actual_segment_count = actual_segment[account_id_col].nunique()


            results.append({
                'Feature': feature,
                'Feature_Type': binning_info[feature]['type'],
                'Segment_Column': 'Overall',
                'Segment_Value': 'All',
                'Month': f"{month}",
                'Base_Month': 'Train (Jun 2024 - Mar 2025)',
                'Current_Month': month,
                'Expected_Percentage': expected_avg_pct,
                'Actual_Percentage': actual_avg_pct,
                'PSI': psi_value
            })

    # Calculate PSI by segments
    for segment_col in segment_columns:
        if segment_col not in df.columns:
            continue

        segments = df[segment_col].dropna().unique()

        for segment_val in segments:
            segment_df = df[df[segment_col] == segment_val]

            for feature in feature_list:
                if feature not in df.columns:
                    continue

                # Get training period distribution for segment
                train_segment = segment_df[segment_df[data_selection_col] == 'Train']
                if len(train_segment) == 0:
                    continue

                train_baseline = train_segment[f'{feature}_binned'].value_counts(normalize=True)

                # Calculate PSI for each test month
                for month in test_months:
                    actual_segment = segment_df[segment_df[month_col] == month]
                    if len(actual_segment) == 0:
                        continue

                    actual_dist = actual_segment[f'{feature}_binned'].value_counts(normalize=True)
                    psi_value = calculate_psi(train_baseline, actual_dist)

                    # Calculate average percentages across all bins
                    expected_avg_pct = train_baseline.mean() * 100
                    actual_avg_pct = actual_dist.mean() * 100

                    # Count distinct accounts for segment
                    base_segment_count = train_segment[account_id_col].nunique()
                    actual_segment_count = actual_segment[account_id_col].nunique()

                    results.append({
                        'Feature': feature,
                        'Feature_Type': binning_info[feature]['type'],
                        'Segment_Column': segment_col,
                        'Segment_Value': segment_val,
                        'Month': f"{month}",
                        'Base_Month': 'Train (Jun 2024 - Mar 2025)',
                        'Current_Month': month,
                        'Base_Count': base_segment_count,
                        'Actual_Count': actual_segment_count,
                        'Expected_Percentage': expected_avg_pct,
                        'Actual_Percentage': actual_avg_pct,
                        'PSI': psi_value
                    })

    return pd.DataFrame(results)


def calculate_bin_level_psi(df: pd.DataFrame,
                            feature_list: List[str],
                            segment_columns: List[str],
                            month_col: str = 'Application_month',
                            data_selection_col: str = 'Data_selection',
                            account_id_col: str = 'digitalLoanAccountId') -> pd.DataFrame:
    """
    Calculate bin-level PSI for each feature comparing training period
    vs each month after March 2025, overall and by segments.

    Parameters:
    -----------
    df : pd.DataFrame
        Input dataframe
    feature_list : List[str]
        List of features to calculate PSI for
    segment_columns : List[str]
        List of segment columns
    month_col : str
        Name of month column
    data_selection_col : str
        Name of data selection column
    account_id_col : str
        Name of account ID column for counting distinct accounts

    Returns:
    --------
    pd.DataFrame with bin-level PSI details including bin ranges
    """
    # Create a copy to avoid modifying original
    df = df.copy()

    # Identify training and test periods
    train_df = df[df[data_selection_col] == 'Train'].copy()
    test_df = df[df[data_selection_col] != 'Train'].copy()

    if len(train_df) == 0:
        raise ValueError("No training data found. Check Data_selection column.")

    print(f"Training period: {train_df[month_col].min()} to {train_df[month_col].max()}")
    print(f"Test period: {test_df[month_col].min()} to {test_df[month_col].max()}")

    # Identify feature types
    feature_types = identify_feature_types(df, feature_list)

    # Create binning strategy based on training period
    binning_info = create_bins_for_features(
        df,
        feature_types['numerical'],
        feature_types['categorical'],
        train_df
    )

    # Get sorted test months
    test_months = sorted(test_df[month_col].unique())

    results = []
    epsilon = 0.0001

    # Calculate overall bin-level PSI
    for feature in feature_list:
        if feature not in df.columns:
            continue

        # Apply binning to entire dataset
        df[f'{feature}_binned'] = apply_binning(df, feature, binning_info[feature])
        # print(df[f'{feature}_binned'])

        # Get training period distribution (baseline)
        train_baseline = df[df[data_selection_col] == 'Train'][f'{feature}_binned'].value_counts(normalize=True)

        # Calculate bin-level PSI for each test month
        for month in test_months:
            month_data = df[df[month_col] == month]
            actual_dist = month_data[f'{feature}_binned'].value_counts(normalize=True)

            # Count distinct accounts
            base_count = df[df[data_selection_col] == 'Train'][account_id_col].nunique()
            actual_count = month_data[account_id_col].nunique()

            # Get all bins
            all_bins = train_baseline.index.union(actual_dist.index)

            for bin_name in all_bins:
                # Simplified epsilon logic - no redundancy
                expected_pct = train_baseline.get(bin_name, 0)
                actual_pct = actual_dist.get(bin_name, 0)

                # Add epsilon only if zero
                expected_pct = epsilon if expected_pct == 0 else expected_pct
                actual_pct = epsilon if actual_pct == 0 else actual_pct

                # Calculate bin-level PSI
                bin_psi = (actual_pct - expected_pct) * np.log(actual_pct / expected_pct)

                # Get bin range information
                bin_ranges = binning_info[feature]['bin_ranges']
                if bin_name in bin_ranges:
                    bin_min = bin_ranges[bin_name]['min']
                    bin_max = bin_ranges[bin_name]['max']
                    bin_range = bin_ranges[bin_name]['range_str']
                else:
                    # For categorical or special bins (Missing, Others)
                    bin_min = None
                    bin_max = None
                    bin_range = bin_name

                results.append({
                    'Feature': feature,
                    'Feature_Type': binning_info[feature]['type'],
                    'Segment_Column': 'Overall',
                    'Segment_Value': 'All',
                    'Month': f"{month}",
                    'Base_Month': 'Train (Jun 2024 - Mar 2025)',
                    'Current_Month': month,
                    'Base_Count': base_count,
                    'Actual_Count': actual_count,
                    'Bin': bin_name,
                    'Bin_Range': bin_range,
                    'Bin_Min': bin_min,
                    'Bin_Max': bin_max,
                    'Base_Percentage': (train_baseline.get(bin_name, 0) * 100),
                    'Actual_Percentage': (actual_dist.get(bin_name, 0) * 100),
                    'Bin_PSI': bin_psi
                })

    # Calculate bin-level PSI by segments
    for segment_col in segment_columns:
        if segment_col not in df.columns:
            continue

        segments = df[segment_col].dropna().unique()

        for segment_val in segments:
            segment_df = df[df[segment_col] == segment_val]

            for feature in feature_list:
                if feature not in df.columns:
                    continue

                # Get training period distribution for segment
                train_segment = segment_df[segment_df[data_selection_col] == 'Train']
                if len(train_segment) == 0:
                    continue

                train_baseline = train_segment[f'{feature}_binned'].value_counts(normalize=True)

                # Calculate bin-level PSI for each test month
                for month in test_months:
                    actual_segment = segment_df[segment_df[month_col] == month]
                    if len(actual_segment) == 0:
                        continue

                    actual_dist = actual_segment[f'{feature}_binned'].value_counts(normalize=True)

                    # Count distinct accounts for segment
                    base_segment_count = train_segment[account_id_col].nunique()
                    actual_segment_count = actual_segment[account_id_col].nunique()

                    # Get all bins
                    all_bins = train_baseline.index.union(actual_dist.index)

                    for bin_name in all_bins:
                        # Simplified epsilon logic - no redundancy
                        expected_pct = train_baseline.get(bin_name, 0)
                        actual_pct = actual_dist.get(bin_name, 0)

                        # Add epsilon only if zero
                        expected_pct = epsilon if expected_pct == 0 else expected_pct
                        actual_pct = epsilon if actual_pct == 0 else actual_pct

                        # Calculate bin-level PSI
                        bin_psi = (actual_pct - expected_pct) * np.log(actual_pct / expected_pct)

                        # Get bin range information
                        bin_ranges = binning_info[feature]['bin_ranges']
                        if bin_name in bin_ranges:
                            bin_min = bin_ranges[bin_name]['min']
                            bin_max = bin_ranges[bin_name]['max']
                            bin_range = bin_ranges[bin_name]['range_str']
                        else:
                            # For categorical or special bins (Missing, Others)
                            bin_min = None
                            bin_max = None
                            bin_range = bin_name

                        results.append({
                            'Feature': feature,
                            'Feature_Type': binning_info[feature]['type'],
                            'Segment_Column': segment_col,
                            'Segment_Value': segment_val,
                            'Month': f"{month}",
                            'Base_Month': 'Train (Jun 2024 - Mar 2025)',
                            'Current_Month': month,
                            'Base_Count': base_segment_count,
                            'Actual_Count': actual_segment_count,
                            'Bin': bin_name,
                            'Bin_Range': bin_range,
                            'Bin_Min': bin_min,
                            'Bin_Max': bin_max,
                            'Base_Percentage': (train_baseline.get(bin_name, 0) * 100),
                            'Actual_Percentage': (actual_dist.get(bin_name, 0) * 100),
                            'Bin_PSI': bin_psi
                        })

    return pd.DataFrame(results)

# SIL V1

## SIL

#### Query from risk_mart.sil_risk_ds_master_20230101_20250309_v2

##### 'Alpha - CIC-SIL-Model'

In [8]:
sq = """select distinct
    r.customerId customer_id ,
    r.digitalLoanAccountId,
    r.cic_score,
    r.cic_Personal_Loans_granted_contracts_amt_24M,
    r.cic_days_since_last_inquiry, 
    r.cic_cnt_active_contracts,
    r.cic_vel_contract_nongranted_cnt_12on24,
    r.cic_max_amt_granted_24M, 
    r.cic_zero_non_granted_ever_flag,
    r.cic_tot_active_contracts_util,
    r.cic_vel_contract_granted_amt_12on24,
    r.cic_zero_granted_ever_flag,
    case when lower(coalesce(loanmaster.osversion_v2, loanmaster.osVersion)) like '%andro%' then 'android'
    when lower(coalesce(loanmaster.osversion_v2, loanmaster.osVersion)) like '%os%' then 'ios'
    when lower(loanmaster.deviceType) like '%andro%' then 'android'
    else 'ios' end osType,
    date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) application_date,
    case when date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime))
         between '2024-06-01' and '2024-09-30' then 'Dev_Train'
         when date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) < '2024-06-01' then 'Pre_Train'
                  else 'Dev_Test' end as Data_selection 
from risk_mart.sil_risk_ds_master_20230101_20250309_v2 r
left join risk_credit_mis.loan_master_table loanmaster
  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
where cic_score is not null
and date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) < '2025-03-24'
;"""
data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")

Job ID 07f74d21-e436-4b84-bd78-6ba07bdaa89f successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (204452, 15)


In [9]:
feature_column = ['cic_Personal_Loans_granted_contracts_amt_24M',
       'cic_days_since_last_inquiry', 'cic_cnt_active_contracts',
       'cic_vel_contract_nongranted_cnt_12on24', 'cic_max_amt_granted_24M',
       'cic_zero_non_granted_ever_flag', 'cic_tot_active_contracts_util',
       'cic_vel_contract_granted_amt_12on24', 'cic_zero_granted_ever_flag']

In [10]:
dfd = transform_data(data, feature_column, a='cic_score', modelDisplayName='Alpha - CIC-SIL-Model') 
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,3158206,c39ad1ba-a964-4e64-8243-738f06e558d9,8c74024c-b8af-4e2f-a3b7-f28d918854b9,0.111643,2026-01-16T18:58:04.150981,2026-01-16T18:58:04.150981,Alpha - CIC-SIL-Model,v1,"{""cic_Personal_Loans_granted_contracts_amt_24M...",sil_march 25 models,8e558cd3-2f4f-4ece-bcd6-b9d4bc0e96f9,2026-01-16T18:58:04.150981,{},,android,Dev_Test,2024-12-31
1,3225922,b570689e-786d-4a35-88f7-1a30e1b6198d,5920139b-10ac-46d2-946e-53a2a6531a0e,0.06254,2026-01-16T18:58:04.150981,2026-01-16T18:58:04.150981,Alpha - CIC-SIL-Model,v1,"{""cic_Personal_Loans_granted_contracts_amt_24M...",sil_march 25 models,c7abafdb-743d-4f67-a167-47a45e9b588f,2026-01-16T18:58:04.150981,{},,android,Dev_Test,2025-01-30
2,3128551,751a6d85-c838-4ea0-9f88-9c3a4bb0e59e,027f939b-394d-4fc1-b40a-69f8833cb588,0.126979,2026-01-16T18:58:04.150981,2026-01-16T18:58:04.150981,Alpha - CIC-SIL-Model,v1,"{""cic_days_since_last_inquiry"": 1788.0, ""cic_z...",sil_march 25 models,7c1b7555-8ab6-43a1-9dd0-442e61de3ab9,2026-01-16T18:58:04.150981,{},,android,Dev_Test,2024-12-22
3,3030181,97456544-c064-4ffb-ac9f-d0e766006c0b,f21b9b43-6647-44b4-b25e-14654a6398e9,0.171253,2026-01-16T18:58:04.150981,2026-01-16T18:58:04.150981,Alpha - CIC-SIL-Model,v1,"{""cic_days_since_last_inquiry"": 580.0, ""cic_cn...",sil_march 25 models,706b5699-c9cf-4163-8df4-26af06804cd4,2026-01-16T18:58:04.150981,{},,android,Dev_Test,2024-11-16
4,3134968,6d5b39aa-2be5-4307-9d62-53d2fb8e3803,652a5041-72b7-44e6-bfb7-902ac19503bc,0.123928,2026-01-16T18:58:04.150981,2026-01-16T18:58:04.150981,Alpha - CIC-SIL-Model,v1,"{""cic_days_since_last_inquiry"": 2139.0, ""cic_z...",sil_march 25 models,987f55ae-475a-42ac-957d-d82ba03d921e,2026-01-16T18:58:04.150981,{},,android,Dev_Test,2024-12-24


In [11]:

result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,105730,2024-10-01,2025-03-08
1,Dev_Train,62128,2024-06-01,2024-09-30
2,Pre_Train,36594,2023-01-10,2024-05-31


In [12]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=5ae69575-5225-426e-b3a5-e1e29dfdda46>

##### Alpha Sil Stack Model 

In [13]:
sq = """ 
select distinct 
r.customerId customer_id ,
r.digitalLoanAccountId,
r.alpha_stack_score,
r.beta_demo_score,
r.cic_score,
r.apps_score,
r.credo_gen_score,
    case when lower(coalesce(loanmaster.osversion_v2, loanmaster.osVersion)) like '%andro%' then 'android'
    when lower(coalesce(loanmaster.osversion_v2, loanmaster.osVersion)) like '%os%' then 'ios'
    when lower(loanmaster.deviceType) like '%andro%' then 'android'
    else 'ios' end osType,
date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) application_date,
case when date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime))
        between '2024-06-01' and '2024-09-30' then 'Dev_Train'
        when date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) < '2024-06-01' then 'Pre_Train'
                else 'Dev_Test' end as Data_selection 
from `risk_mart.sil_risk_ds_master_20230101_20250309_v2` r
left join risk_credit_mis.loan_master_table loanmaster
  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
where alpha_stack_score is not null
and date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) < '2025-03-24'
;
"""

data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")

Job ID 0d121a81-367e-4a65-864d-f931045c7181 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (204452, 10)


In [14]:
data.columns
data.rename(columns={'beta_demo_score':'sb_demo_score', 'cic_score':'s_cic_score', 
                    'apps_score':'s_apps_score', 'credo_gen_score':'s_credo_score'}, inplace = True)
data.columns

Index(['customer_id', 'digitalLoanAccountId', 'alpha_stack_score',
       'sb_demo_score', 's_cic_score', 's_apps_score', 's_credo_score',
       'osType', 'application_date', 'Data_selection'],
      dtype='object')

In [15]:
feature_column = ['sb_demo_score',
       's_cic_score', 's_apps_score',
       's_credo_score']

dfd = transform_data(data, feature_column, a='alpha_stack_score', modelDisplayName='Alpha - StackingModel') 
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,3087990,8468bd95-9923-4e0b-acce-1587086bbca0,5873f52e-1973-43b1-9597-efd3dc6b374b,0.104518,2026-01-16T18:58:42.049315,2026-01-16T18:58:42.049315,Alpha - StackingModel,v1,"{""sb_demo_score"": 0.04610937573196949, ""s_cic_...",sil_march 25 models,f8c6ed09-e42c-4ad4-a828-e7a67cd7b52f,2026-01-16T18:58:42.049315,{},,ios,Dev_Test,2024-12-08
1,2992547,74caa9f6-12a6-41f3-ad18-9395c8fe8332,ab82c81b-c52d-454d-80d0-eae47408ad94,0.16949,2026-01-16T18:58:42.049315,2026-01-16T18:58:42.049315,Alpha - StackingModel,v1,"{""sb_demo_score"": 0.050932196445083365, ""s_cic...",sil_march 25 models,b29bcd42-f1ed-4acd-bcf4-ba25eabc626c,2026-01-16T18:58:42.049315,{},,android,Dev_Test,2024-11-02
2,2625018,b66871f0-93e6-4911-aea5-adc28e5bfc39,5f27eb96-874c-4f24-b0d5-f09211f4929d,0.168892,2026-01-16T18:58:42.049859,2026-01-16T18:58:42.049859,Alpha - StackingModel,v1,"{""sb_demo_score"": 0.040556951574585776, ""s_cic...",sil_march 25 models,c6d2fe5d-dad6-4fd4-b0a3-dad4ebf4e063,2026-01-16T18:58:42.049859,{},,android,Dev_Test,2024-12-18
3,3282225,112cbc02-e738-4b58-b024-3a2465d912c9,f1867ddc-7a44-4f59-8b91-146576afd606,0.064417,2026-01-16T18:58:42.049859,2026-01-16T18:58:42.049859,Alpha - StackingModel,v1,"{""sb_demo_score"": 0.06461247927584647, ""s_cic_...",sil_march 25 models,d966d84c-a972-4ffb-b5e2-d66d71a321bd,2026-01-16T18:58:42.049859,{},,android,Dev_Test,2025-02-23
4,3151085,bde4c009-cbcd-4efd-9c48-1f6110ac4885,e17833e0-4666-40a1-a71a-0d5e7d627eda,0.062758,2026-01-16T18:58:42.049859,2026-01-16T18:58:42.049859,Alpha - StackingModel,v1,"{""sb_demo_score"": 0.037545327221006786, ""s_cic...",sil_march 25 models,6c4dad92-ac8a-487f-a66b-b23fed29280a,2026-01-16T18:58:42.049859,{},,android,Dev_Test,2024-12-29


In [16]:

result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,105730,2024-10-01,2025-03-08
1,Dev_Train,62128,2024-06-01,2024-09-30
2,Pre_Train,36594,2023-01-10,2024-05-31


In [17]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=7a3fe889-039c-4262-8a10-a51c78b64996>

##### Beta Sil App Score

In [18]:
sq = """ 
select distinct
r.customerId customer_id ,
r.digitalLoanAccountId,
r.apps_score,
r.app_cnt_absence_tag_30d,
r.app_cnt_absence_tag_90d ,
r.app_cnt_business_ever ,
r.app_cnt_competitors_30d ,
r.app_cnt_competitors_90d ,
r.app_cnt_education_ever ,
r.app_cnt_finance_7d ,
r.app_cnt_finance_90d ,
r.app_cnt_music_and_audio_ever ,
r.app_cnt_payday_90d ,
r.app_cnt_rated_for_3plus_ever ,
r.app_cnt_travel_and_local_ever ,
r.app_first_competitors_install_to_apply_days ,
r.app_first_payday_install_to_apply_days ,
r.app_median_time_bw_installed_mins_30d ,
r.app_vel_finance_30_over_365 ,
    case when lower(coalesce(loanmaster.osversion_v2, loanmaster.osVersion)) like '%andro%' then 'android'
    when lower(coalesce(loanmaster.osversion_v2, loanmaster.osVersion)) like '%os%' then 'ios'
    when lower(loanmaster.deviceType) like '%andro%' then 'android'
    else 'ios' end osType,
    date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) application_date,
    case when date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime))
         between '2023-12-01' and '2024-06-30' then 'Dev_Train'
         when date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) < '2023-12-01' then 'Pre_Train'
                  else 'Dev_Test' end as Data_selection 
from `risk_mart.sil_risk_ds_master_20230101_20250309_v2` r
left join risk_credit_mis.loan_master_table loanmaster
  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
where apps_score is not null
and date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) < '2025-03-20'
"""
data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")

Job ID dfba6daf-fada-414c-875d-5f74247176d8 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (317384, 22)


In [19]:
feature_column = ['app_cnt_rated_for_3plus_ever',
       'app_cnt_education_ever', 'app_cnt_business_ever',
       'app_cnt_music_and_audio_ever',
       'app_cnt_travel_and_local_ever', 'app_cnt_finance_7d',
       'app_cnt_competitors_30d', 'app_cnt_absence_tag_30d',
        'app_cnt_absence_tag_90d',
       'app_cnt_finance_90d', 'app_cnt_competitors_90d',
       'app_cnt_payday_90d',
       'app_median_time_bw_installed_mins_30d',
       'app_first_competitors_install_to_apply_days',
       'app_first_payday_install_to_apply_days',
       'app_vel_finance_30_over_365']

dfd = transform_data(data, feature_column, a='apps_score', modelDisplayName='Beta - AppsScoreModel') 
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,3103458,cc62d316-c660-44c9-8c05-ff1663e721e5,f93a6db6-79df-42f4-a75a-a1d5fc42718d,0.453011,2026-01-16T18:59:13.660184,2026-01-16T18:59:13.660184,Beta - AppsScoreModel,v1,"{""app_cnt_rated_for_3plus_ever"": 15.0, ""app_cn...",sil_march 25 models,98eb37dc-59b3-4bda-b57f-78063f6cfc08,2026-01-16T18:59:13.660184,{},,android,Dev_Test,2024-12-14
1,3070608,1414bc01-1610-481b-9138-3a5025a52e7b,2cbe2d03-1ebf-4141-bc0f-6809a2bcd0e1,0.449715,2026-01-16T18:59:13.660184,2026-01-16T18:59:13.660184,Beta - AppsScoreModel,v1,"{""app_cnt_rated_for_3plus_ever"": 17.0, ""app_cn...",sil_march 25 models,70cc6b9b-0d8d-4e9f-97a4-afd28eb3f45d,2026-01-16T18:59:13.660184,{},,android,Dev_Test,2024-12-01
2,2651145,627782b6-b173-4d80-85fe-d18b45fd6d94,51a8eac9-48f5-47c3-817b-2f74d0075875,0.344799,2026-01-16T18:59:13.660184,2026-01-16T18:59:13.660184,Beta - AppsScoreModel,v1,"{""app_cnt_rated_for_3plus_ever"": 31.0, ""app_cn...",sil_march 25 models,5d107aa1-c2e9-4f0a-90a0-5cb3067c212a,2026-01-16T18:59:13.660184,{},,android,Dev_Test,2024-07-11
3,3068948,6c6bff11-f5f3-447e-8563-cd58aa1047ca,af311097-89cd-4cd7-80ff-1af7e318ac37,0.67463,2026-01-16T18:59:13.660184,2026-01-16T18:59:13.660184,Beta - AppsScoreModel,v1,"{""app_cnt_rated_for_3plus_ever"": 6.0, ""app_cnt...",sil_march 25 models,db3301bc-7e29-4e8e-9522-fd6dc02d6723,2026-01-16T18:59:13.660184,{},,android,Dev_Test,2024-12-01
4,2597384,3dc38d31-1f65-418c-b5dd-26592a512f7d,9f9146db-2df3-4d53-bf47-b2f86cdd8530,0.547522,2026-01-16T18:59:13.661124,2026-01-16T18:59:13.661124,Beta - AppsScoreModel,v1,"{""app_cnt_rated_for_3plus_ever"": 16.0, ""app_cn...",sil_march 25 models,399b63b8-b589-4f22-8c06-796f9907db14,2026-01-16T18:59:13.661124,{},,android,Dev_Train,2024-06-21


In [20]:
result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,200327,2024-07-01,2025-03-08
1,Dev_Train,86480,2023-12-01,2024-06-30
2,Pre_Train,30577,2023-01-02,2023-11-30


In [21]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=b6d0efbe-991d-4846-8187-f2753fe65cf8>

##### Beta SIL Demo Score

In [22]:
sq = """
select distinct
r.customerId customer_id ,
r.digitalLoanAccountId,
r.beta_demo_score,
r.beta_de_ln_vas_opted_flag ,
r.beta_de_ln_doc_type_rolled ,
r.beta_de_ln_marital_status ,
r.beta_de_ln_age_bin ,
r.beta_de_ln_province_bin ,
r.beta_de_ln_ref2_type ,
r.beta_de_ln_education_level ,
r.beta_de_ln_ref1_type ,
r.beta_de_ln_industry_new_bin ,
r.beta_de_ln_appln_day_of_week ,
r.beta_de_onb_name_email_match_score ,
r.beta_de_ln_employment_type_new_bin ,
r.beta_de_ln_telconame ,
r.beta_de_time_bw_onb_loan_appln_mins ,
r.beta_de_ln_source_of_funds_new_bin ,
r.beta_de_ln_brand_bin ,
r.beta_de_ln_email_primary_domain ,
    case when lower(coalesce(loanmaster.osversion_v2, loanmaster.osVersion)) like '%andro%' then 'android'
    when lower(coalesce(loanmaster.osversion_v2, loanmaster.osVersion)) like '%os%' then 'ios'
    when lower(loanmaster.deviceType) like '%andro%' then 'android'
    else 'ios' end osType,
date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) application_date,
case when date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime))
        between '2023-07-01' and '2024-06-30' then 'Dev_Train'
        when date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) < '2023-07-01' then 'Pre_Train'
                else 'Dev_Test' end as Data_selection 
from `risk_mart.sil_risk_ds_master_20230101_20250309_v2` r
left join risk_credit_mis.loan_master_table loanmaster
  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
where beta_demo_score is not null
and date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) < '2025-03-20'
;
"""
data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")

Job ID 9646855d-b148-4a37-a2a4-685814524afb successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (349206, 23)


In [23]:
feature_column = ['beta_de_ln_vas_opted_flag',
       'beta_de_ln_doc_type_rolled', 'beta_de_ln_marital_status',
       'beta_de_ln_age_bin', 'beta_de_ln_province_bin',
       'beta_de_ln_ref2_type', 'beta_de_ln_education_level',
       'beta_de_ln_ref1_type', 'beta_de_ln_industry_new_bin',
       'beta_de_ln_appln_day_of_week',
       'beta_de_onb_name_email_match_score',
       'beta_de_ln_employment_type_new_bin', 'beta_de_ln_telconame',
       'beta_de_time_bw_onb_loan_appln_mins',
       'beta_de_ln_source_of_funds_new_bin', 'beta_de_ln_brand_bin',
       'beta_de_ln_email_primary_domain']

dfd = transform_data(data, feature_column, a='beta_demo_score', modelDisplayName='Beta - DemoScoreModel') 
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,2561460,eb3dd89c-76cc-4560-b166-0ea16f506462,bb8dd8c9-0453-4c55-866f-ea9161062e95,0.13724,2026-01-16T19:00:28.376357,2026-01-16T19:00:28.376357,Beta - DemoScoreModel,v1,"{""beta_de_ln_vas_opted_flag"": ""1"", ""beta_de_ln...",sil_march 25 models,8a1a28b3-6663-46e3-b9d9-1b76cbd7819e,2026-01-16T19:00:28.376357,{},,android,Dev_Train,2024-06-08
1,3158206,c39ad1ba-a964-4e64-8243-738f06e558d9,00f63b95-b135-4845-8e85-5ffe4fc7e7bb,0.024513,2026-01-16T19:00:28.377489,2026-01-16T19:00:28.377489,Beta - DemoScoreModel,v1,"{""beta_de_ln_vas_opted_flag"": ""0"", ""beta_de_ln...",sil_march 25 models,c40fbffa-ad0a-4ed7-bae5-8a772a24254e,2026-01-16T19:00:28.377489,{},,android,Dev_Test,2024-12-31
2,2455037,dd176042-215f-4a0e-9a65-3095319dd6dd,8b51fa67-fa72-46bc-9881-34e0442d9d45,0.030435,2026-01-16T19:00:28.378212,2026-01-16T19:00:28.378212,Beta - DemoScoreModel,v1,"{""beta_de_ln_vas_opted_flag"": ""0"", ""beta_de_ln...",sil_march 25 models,5a3c45a8-ad4b-4c2e-aadc-287c12ce6226,2026-01-16T19:00:28.378212,{},,android,Dev_Train,2024-04-07
3,2386268,9bcd390e-abea-48f0-b796-474665136faa,044d0b85-ff89-4e9d-a66a-d5460c1fde94,0.138983,2026-01-16T19:00:28.378682,2026-01-16T19:00:28.378682,Beta - DemoScoreModel,v1,"{""beta_de_ln_vas_opted_flag"": ""1"", ""beta_de_ln...",sil_march 25 models,0dede965-4a7b-4520-9cf6-ce8d0ab83a18,2026-01-16T19:00:28.378682,{},,android,Dev_Test,2024-09-11
4,2314459,60958308-f1c8-4302-b68e-c56e8a79a479,cfcb3ded-07ef-42de-9402-6b4def498383,0.033254,2026-01-16T19:00:28.378682,2026-01-16T19:00:28.378682,Beta - DemoScoreModel,v1,"{""beta_de_ln_vas_opted_flag"": ""0"", ""beta_de_ln...",sil_march 25 models,10ca105e-720e-434c-934a-d5aab4e8bd49,2026-01-16T19:00:28.378682,{},,android,Dev_Train,2023-11-25


In [24]:
result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,219881,2024-07-01,2025-03-08
1,Dev_Train,121661,2023-07-01,2024-06-30
2,Pre_Train,7664,2023-01-02,2023-06-30


In [25]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=1fead81f-3ad7-4bf8-8ca5-1ebf6f6b7440>

##### Beta SIL STACK Score Model

In [26]:
sq = """ 
select  distinct
r.customerId customer_id ,
r.digitalLoanAccountId,
r.beta_stack_score,
r.apps_score,
r.credo_gen_score,
r.beta_demo_score,
    case when lower(coalesce(loanmaster.osversion_v2, loanmaster.osVersion)) like '%andro%' then 'android'
    when lower(coalesce(loanmaster.osversion_v2, loanmaster.osVersion)) like '%os%' then 'ios'
    when lower(loanmaster.deviceType) like '%andro%' then 'android'
    else 'ios' end osType,
date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) application_date,
case when date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime))
        between '2023-07-01' and '2024-06-30' then 'Dev_Train'
        when date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) < '2023-07-01' then 'Pre_Train'
                else 'Dev_Test' end as Data_selection 
from `risk_mart.sil_risk_ds_master_20230101_20250309_v2` r
left join risk_credit_mis.loan_master_table loanmaster
  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
where beta_stack_score is not null
and date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) < '2025-03-20'
"""
data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")

Job ID 70e4c027-4e3a-4fb5-9a96-c467d448081c successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (349206, 9)


In [27]:
feature_column = ['apps_score', 'credo_gen_score', 'beta_demo_score']
dfd = transform_data(data, feature_column, a='beta_stack_score', modelDisplayName='Beta - StackScoreModel') 
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,2130424,734dca99-1855-4e7d-98da-ffce2c046821,62d28ebe-f399-4a9e-964a-16b4ec13f290,0.224589,2026-01-16T19:02:30.543857,2026-01-16T19:02:30.543857,Beta - StackScoreModel,v1,"{""apps_score"": 0.6629442952561168, ""credo_gen_...",sil_march 25 models,6c1c922c-5c24-4d00-a303-e64c2b48f95e,2026-01-16T19:02:30.543857,{},,android,Dev_Test,2024-09-21
1,2952382,65e263cb-eaf1-443f-9597-050f80e62115,c6b022fd-c28a-4142-8d1b-a462e2ec1b61,0.126538,2026-01-16T19:02:30.543857,2026-01-16T19:02:30.543857,Beta - StackScoreModel,v1,"{""apps_score"": 0.6466370819141358, ""credo_gen_...",sil_march 25 models,f082ba45-b1ba-4db4-8850-c33c7645ae60,2026-01-16T19:02:30.543857,{},,android,Dev_Test,2024-10-18
2,3167088,9f603063-d9df-42a4-ada4-67afb4b7e1a0,dda174d3-ca28-4d24-9a18-d260fd4958ab,0.037583,2026-01-16T19:02:30.543857,2026-01-16T19:02:30.543857,Beta - StackScoreModel,v1,"{""apps_score"": 0.483975863828331, ""credo_gen_s...",sil_march 25 models,5615abc2-ee2e-402f-a33e-67a5cb13cd9c,2026-01-16T19:02:30.543857,{},,android,Dev_Test,2025-01-04
3,3269771,bcb5a3b1-7cf9-466a-ad02-85d759f63815,ee89b2f9-a037-4ad5-8e84-142e0bf150f8,0.140824,2026-01-16T19:02:30.543857,2026-01-16T19:02:30.543857,Beta - StackScoreModel,v1,"{""apps_score"": 0.5683297917922486, ""credo_gen_...",sil_march 25 models,d77b9e76-3117-42ae-9c25-e88c1b62372d,2026-01-16T19:02:30.543857,{},,android,Dev_Test,2025-02-18
4,2828784,ca693717-f845-454c-9849-090dbd3fa2c2,b5356837-e140-40d8-92a0-9186fe1885b2,0.219287,2026-01-16T19:02:30.543857,2026-01-16T19:02:30.543857,Beta - StackScoreModel,v1,"{""apps_score"": 0.6306743857175309, ""credo_gen_...",sil_march 25 models,d8a66e5c-b108-4206-91d2-7bfeb60e3b72,2026-01-16T19:02:30.543857,{},,android,Dev_Test,2024-09-07


In [28]:
result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,219881,2024-07-01,2025-03-08
1,Dev_Train,121661,2023-07-01,2024-06-30
2,Pre_Train,7664,2023-01-02,2023-06-30


In [29]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=2a0fde92-56b5-4433-846a-a2b7f3528ecc>

##### Alpha  - IncomeEstimationModel

In [30]:
# sq = """  
# Select 
# distinct
# r.customerId customer_id ,
# r.digitalLoanAccountId,
# r.alpha_estimated_income,
# r.inc_alpha_cic_credit_avg_credit_limit,
# r.inc_alpha_cic_max_active_contracts_amt,
# r.inc_alpha_ln_company_name,
# r.inc_alpha_ln_age,
# r.inc_alpha_doc_type_rolled,
# r.inc_alpha_ln_brand,
# r.inc_alpha_ln_city,
# r.inc_alpha_ln_cnt_dependents,
# r.inc_alpha_ln_education_level,
# r.inc_alpha_ln_employment_type_new,
# r.inc_alpha_ln_gender,
# r.inc_alpha_ln_industry_new,
# r.inc_alpha_ln_loan_prod_type,
# r.inc_alpha_ln_marital_status_new,
# r.inc_alpha_ln_nature_of_work_new,
# r.inc_alpha_ln_osversion_bin,
# r.inc_alpha_ln_purpose,
# r.inc_alpha_ln_source_of_funds_new,
# r.inc_alpha_loan_monthly_income,
# r.inc_alpha_encoded_company_name_grouped,
#     case when lower(coalesce(loanmaster.osversion_v2, loanmaster.osVersion)) like '%andro%' then 'android'
#     when lower(coalesce(loanmaster.osversion_v2, loanmaster.osVersion)) like '%os%' then 'ios'
#     when lower(loanmaster.deviceType) like '%andro%' then 'android'
#     else 'ios' end osType,
# from `risk_mart.sil_risk_ds_master_20230101_20250309_v2` r
# left join risk_credit_mis.loan_master_table loanmaster
#   ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
# where r.alpha_estimated_income is not null
# ;
# """
# data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
# print(f"The shape of the dataframe is:\t {data.shape}")


In [31]:
# feature_column = ['inc_alpha_cic_credit_avg_credit_limit',
#        'inc_alpha_cic_max_active_contracts_amt', 'inc_alpha_ln_age',
#        'inc_alpha_doc_type_rolled', 'inc_alpha_ln_brand', 'inc_alpha_ln_city',
#        'inc_alpha_ln_cnt_dependents', 'inc_alpha_ln_education_level',
#        'inc_alpha_ln_employment_type_new', 'inc_alpha_ln_gender',
#        'inc_alpha_ln_industry_new', 'inc_alpha_ln_loan_prod_type',
#        'inc_alpha_ln_marital_status_new', 'inc_alpha_ln_nature_of_work_new',
#        'inc_alpha_ln_osversion_bin', 'inc_alpha_ln_purpose',
#        'inc_alpha_ln_source_of_funds_new',
#        'inc_alpha_encoded_company_name_grouped']
# dfd = transform_data(data, feature_column, a='alpha_estimated_income', modelDisplayName='Alpha  - IncomeEstimationModel') 
# dfd.head()

In [32]:
# # Upload to BigQuery
# table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details"
# job_config = bigquery.LoadJobConfig(
#     write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
# )
# job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
# job.result() 

##### Beta - IncomeEstimationModel

In [33]:
# sq = """ 
# select
# distinct
# r.customerId customer_id ,
# r.digitalLoanAccountId,
# r.beta_estimated_income,
# r.inc_beta_ln_loan_type,
# r.inc_beta_ln_education_level,
# r.inc_beta_ln_employment_type_new,
# r.inc_beta_ln_industry_new,
# r.inc_beta_ln_age,
# r.inc_beta_ln_brand,
# r.inc_beta_ln_city,
# r.inc_beta_ln_purpose,
# r.inc_beta_ln_osversion_bin,
# r.inc_beta_ln_postal_code,
# r.inc_beta_ln_gender,
# r.inc_beta_ln_doc_type_rolled,
# r.inc_beta_ln_cnt_dependents,
# r.inc_beta_ln_source_of_funds_new,
# r.inc_beta_ln_marital_status_new,
# r.inc_beta_encoded_company_name_grouped,
#     case when lower(coalesce(loanmaster.osversion_v2, loanmaster.osVersion)) like '%andro%' then 'android'
#     when lower(coalesce(loanmaster.osversion_v2, loanmaster.osVersion)) like '%os%' then 'ios'
#     when lower(loanmaster.deviceType) like '%andro%' then 'android'
#     else 'ios' end osType,
# from `risk_mart.sil_risk_ds_master_20230101_20250309_v2` r
# left join risk_credit_mis.loan_master_table loanmaster
#   ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
# where r.alpha_estimated_income is not null
# ;
# """

# # data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
# data = client.query(sq).result().to_arrow().to_pandas()
# print(f"The shape of the dataframe is:\t {data.shape}")

In [34]:
# feature_column = ['inc_beta_ln_loan_type',
#        'inc_beta_ln_education_level', 'inc_beta_ln_employment_type_new',
#        'inc_beta_ln_industry_new', 'inc_beta_ln_age', 'inc_beta_ln_brand',
#        'inc_beta_ln_city', 'inc_beta_ln_purpose', 'inc_beta_ln_osversion_bin',
#        'inc_beta_ln_postal_code', 'inc_beta_ln_gender',
#        'inc_beta_ln_doc_type_rolled', 'inc_beta_ln_cnt_dependents',
#        'inc_beta_ln_source_of_funds_new', 'inc_beta_ln_marital_status_new',
#        'inc_beta_encoded_company_name_grouped',]

# dfd = transform_data(data, feature_column, a='beta_estimated_income', modelDisplayName='Beta - IncomeEstimationModel') 
# dfd.head()

In [35]:
# # Upload to BigQuery
# table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details"
# job_config = bigquery.LoadJobConfig(
#     write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
# )
# job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
# job.result() 

# Cash V1

## Cash

##### Alpha Cash Stack Model

###### Trench 1

In [36]:
sq = """ 
select 
  r.customer_id,
  r.digitalLoanAccountId, 
  r.stack_score,
  r.demo_score,
  r.apps_score,
  r.credo_score,
  r.cic_score,
  r.stack_score_norm,
  r.ln_os_type osType,
   date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) application_date,
    case when date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime))
         between '2024-10-01' and '2025-02-28' then 'Dev_Train'
         when date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) < '2024-10-01' then 'Pre_Train'
                  else 'Dev_Test' end as Data_selection 
from worktable_data_analysis.cash_alpha_trench1_applied_loans_backscored_20241001_20250930 r
left join risk_credit_mis.loan_master_table loanmaster
  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
where r.stack_score is not null
and date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime))
< '2025-09-24'
;
"""
data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")

Job ID bf2a6d7f-914b-4152-98c0-c646abbca54d successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (66557, 11)


In [37]:
feature_column = ['demo_score', 'apps_score', 'credo_score', 'cic_score', 'stack_score_norm']

dfd = transform_data(data, feature_column, a='stack_score', modelDisplayName='Alpha-Cash-Stack-Model', tc='Trench 1', subscription_name = 'Cash September 25 Models') 
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,3539916,d9fc0103-3b8c-40b7-8d8f-08134a93ea40,ff6be8a5-2d10-4dda-9783-c33eb22a1eb2,0.649931,2026-01-16T19:03:36.251926,2026-01-16T19:03:36.251926,Alpha-Cash-Stack-Model,v1,"{""demo_score"": 0.6710895323120418, ""apps_score...",Cash September 25 Models,a5eb8476-3733-403b-9390-9a659eedfd1a,2026-01-16T19:03:36.251926,{},Trench 1,Android,Dev_Test,2025-07-05
1,3205807,29054353-e84c-4a9c-a1e2-f1d8909c4688,7b0c5b0e-8f06-4ddf-9b38-4763dafb5bf3,0.278879,2026-01-16T19:03:36.251926,2026-01-16T19:03:36.251926,Alpha-Cash-Stack-Model,v1,"{""demo_score"": 0.3578060436820136, ""apps_score...",Cash September 25 Models,61bfea04-3fd8-4af5-98cf-36db19963bae,2026-01-16T19:03:36.251926,{},Trench 1,Android,Dev_Train,2025-01-21
2,2935267,ef3ab600-5807-4ab4-a90b-d2782f3ab7e0,72bf4e46-087f-41ba-9d3d-33b2cb505013,0.660423,2026-01-16T19:03:36.251926,2026-01-16T19:03:36.251926,Alpha-Cash-Stack-Model,v1,"{""demo_score"": 0.45709714420066894, ""apps_scor...",Cash September 25 Models,af7c9af8-f7e8-4c0c-9885-67d15c0dc3af,2026-01-16T19:03:36.251926,{},Trench 1,Android,Dev_Train,2024-10-13
3,2998273,59c1820a-1856-4ea0-a650-d964e39c161e,557723f2-b3b7-4e2f-b0ec-757dcc3b039e,0.42468,2026-01-16T19:03:36.251926,2026-01-16T19:03:36.251926,Alpha-Cash-Stack-Model,v1,"{""demo_score"": 0.5780086653092011, ""apps_score...",Cash September 25 Models,af79b697-0624-476f-be18-37431f732111,2026-01-16T19:03:36.251926,{},Trench 1,Android,Dev_Train,2024-11-04
4,3649023,1ddeda21-9080-49b6-8463-39c270675282,f1653080-b33f-46c7-9d89-41aec3c4e79b,0.07285,2026-01-16T19:03:36.251926,2026-01-16T19:03:36.251926,Alpha-Cash-Stack-Model,v1,"{""demo_score"": 0.3100500464022585, ""apps_score...",Cash September 25 Models,b1a7d73d-b5a3-4179-a4d8-da036e1cefea,2026-01-16T19:03:36.251926,{},Trench 1,Android,Dev_Test,2025-08-29


In [38]:
result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,33061,2025-03-01,2025-09-23
1,Dev_Train,33496,2024-10-01,2025-02-28


In [39]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=57e02e01-9832-419f-a085-25ed5056c8cd>

###### Trench 2

In [40]:
sq = """ 
select 
  r.customer_id,
  r.digitalLoanAccountId, 
  r.stack_score,
  r.demo_score,
  r.apps_score,
  r.credo_score,
  r.cic_score,
  r.stack_score_norm,
  r.ln_os_type osType,
  date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) application_date,
    case when date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime))
         between '2024-10-01' and '2025-02-28' then 'Dev_Train'
         when date(if(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) < '2024-10-01' then 'Pre_Train'
                  else 'Dev_Test' end as Data_selection 
from worktable_data_analysis.cash_alpha_trench2_applied_loans_backscored_20241001_20250930 r
left join risk_credit_mis.loan_master_table loanmaster
  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
where r.stack_score is not null
and date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) < '2025-09-24'
;
"""
data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")

Job ID 53234f9b-c76a-440b-91c9-1b307e9c6702 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (42288, 11)


In [41]:
feature_column = ['demo_score', 'apps_score', 'credo_score', 'cic_score', 'stack_score_norm']

dfd = transform_data(data, feature_column, a='stack_score', modelDisplayName='Alpha-Cash-Stack-Model', tc='Trench 2', subscription_name = 'Cash September 25 Models') 
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,2550501,c6df6a9e-22af-4465-9a41-f93293eecf16,57354796-fd4e-4607-9ca7-6166a267de5b,0.819094,2026-01-16T19:03:57.502026,2026-01-16T19:03:57.502026,Alpha-Cash-Stack-Model,v1,"{""demo_score"": 0.5692332217370547, ""apps_score...",Cash September 25 Models,72afe0cb-eede-4963-aef0-66c6bae7072c,2026-01-16T19:03:57.502026,{},Trench 2,Android,Dev_Train,2024-11-30
1,3452143,eb3d4ed3-ed7e-4a90-8555-a40d5650759c,0ed7f199-1623-4ff4-be54-04b2d0240f18,0.374286,2026-01-16T19:03:57.502026,2026-01-16T19:03:57.502026,Alpha-Cash-Stack-Model,v1,"{""demo_score"": 0.5797325737583162, ""apps_score...",Cash September 25 Models,6b2f2a59-2342-4c48-a41e-52a22683282e,2026-01-16T19:03:57.502026,{},Trench 2,Android,Dev_Test,2025-08-26
2,2330008,bb56be1a-177b-498c-ac47-12eb91154be5,917274fd-96b2-4e80-a536-ae8048dfc96e,0.488352,2026-01-16T19:03:57.502026,2026-01-16T19:03:57.502026,Alpha-Cash-Stack-Model,v1,"{""demo_score"": 0.4540164082018209, ""apps_score...",Cash September 25 Models,18d19cf9-f065-456d-a93b-b3615806e3c6,2026-01-16T19:03:57.502026,{},Trench 2,Android,Dev_Test,2025-08-22
3,1071950,a79b513c-82ca-4eb3-a506-9b6e7e0ca9de,c0b9b03d-3fc7-4a5d-a74c-3c361f69e806,0.388656,2026-01-16T19:03:57.502026,2026-01-16T19:03:57.502026,Alpha-Cash-Stack-Model,v1,"{""demo_score"": 0.6700070194575577, ""apps_score...",Cash September 25 Models,115c75d0-a4bd-4be2-a067-437cc04e3206,2026-01-16T19:03:57.502026,{},Trench 2,Android,Dev_Train,2025-02-12
4,2512975,104cb6c6-3eb3-4d50-b9d3-cbee0bdc051b,321859f9-10c1-4f4f-8e41-6489cadc696c,0.350265,2026-01-16T19:03:57.502026,2026-01-16T19:03:57.502026,Alpha-Cash-Stack-Model,v1,"{""demo_score"": 0.526530330630122, ""apps_score""...",Cash September 25 Models,448ac944-3104-4f6a-8dea-5e95286ba74d,2026-01-16T19:03:57.502026,{},Trench 2,Android,Dev_Train,2025-02-18


In [42]:
result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,24849,2025-03-01,2025-09-23
1,Dev_Train,17439,2024-10-01,2025-02-28


In [43]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=32c25936-3f92-4fcc-a9c8-2b5fa0f15559>

###### Trench 3

In [44]:
sq = """ 
select 
  r.customer_id,
  r.digitalLoanAccountId, 
  r.stack_score,
  r.demo_score,
  r.apps_score,
  r.credo_score,
  r.cic_score,
  r.stack_score_norm,
  r.ln_os_type osType,
  date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) application_date,
    case when date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime))
         between '2024-10-01' and '2025-02-28' then 'Dev_Train'
         when date(if(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) < '2024-10-01' then 'Pre_Train'
                  else 'Dev_Test' end as Data_selection 
from worktable_data_analysis.cash_alpha_trench3_applied_loans_backscored_20241001_20250930 r
left join risk_credit_mis.loan_master_table loanmaster
  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
where r.stack_score is not null
and date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime))< '2025-09-24'
;
""" 
data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")

Job ID ff190199-6a17-49e7-aa40-5ae5e297bdde successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (17937, 11)


In [45]:
feature_column = ['demo_score', 'apps_score', 'credo_score', 'cic_score', 'stack_score_norm']

dfd = transform_data(data, feature_column, a='stack_score', modelDisplayName='Alpha-Cash-Stack-Model', tc='Trench 3', subscription_name = 'Cash September 25 Models') 
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,2624399,d790f5f3-a08d-4486-b7af-d297e039f321,81683f61-3c0a-4747-9911-380d773a72e4,0.411537,2026-01-16T19:04:12.734208,2026-01-16T19:04:12.734208,Alpha-Cash-Stack-Model,v1,"{""demo_score"": 0.5184751063484533, ""apps_score...",Cash September 25 Models,12a4cfcd-bc30-4240-bc01-377f3b74a483,2026-01-16T19:04:12.734208,{},Trench 3,Android,Dev_Test,2025-05-01
1,3227672,4f9ec89b-8763-4a6e-89f5-a72277f0a85c,7bfcea53-48f4-4b70-907b-370ddec1cc02,0.695057,2026-01-16T19:04:12.734208,2026-01-16T19:04:12.734208,Alpha-Cash-Stack-Model,v1,"{""demo_score"": 0.5479647928070218, ""apps_score...",Cash September 25 Models,9ef816cd-8936-4104-b939-be3b2a3ced08,2026-01-16T19:04:12.734208,{},Trench 3,Android,Dev_Test,2025-07-20
2,1549141,c19db874-4a3d-4022-b5c0-62782fbd4b8b,6b286ab5-5358-4386-a89d-c26373700447,0.331909,2026-01-16T19:04:12.734208,2026-01-16T19:04:12.734208,Alpha-Cash-Stack-Model,v1,"{""demo_score"": 0.30562466123846155, ""apps_scor...",Cash September 25 Models,d884fb11-c69b-4de3-9f66-5af25e452727,2026-01-16T19:04:12.734208,{},Trench 3,Android,Dev_Test,2025-03-16
3,1365293,8e10ab95-08ad-4b06-8029-59a8bd2e1c71,0a830de4-7820-493f-8d31-1c61f37f790a,0.63828,2026-01-16T19:04:12.734208,2026-01-16T19:04:12.734208,Alpha-Cash-Stack-Model,v1,"{""demo_score"": 0.41047368948967267, ""apps_scor...",Cash September 25 Models,9e839dd7-e2dd-4999-a845-8fd3aea8d403,2026-01-16T19:04:12.734208,{},Trench 3,Android,Dev_Test,2025-06-10
4,2710488,9adfdad4-4aca-4f50-a7fb-c49060f910ce,8928125e-5758-4c46-b913-41fbc22930d0,0.793719,2026-01-16T19:04:12.734208,2026-01-16T19:04:12.734208,Alpha-Cash-Stack-Model,v1,"{""demo_score"": 0.5986404769159818, ""apps_score...",Cash September 25 Models,acdcca25-65bd-4afc-9ee5-f21c96dc4dbc,2026-01-16T19:04:12.734208,{},Trench 3,Android,Dev_Train,2024-10-27


In [46]:
result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,11563,2025-03-01,2025-09-23
1,Dev_Train,6374,2024-10-01,2025-02-28


In [47]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=ff0adfd6-ce15-488d-9759-07390bb53b6b>

##### Alpha Cash CIC Model

###### Trench 1

In [48]:
sq = """ 
select 
lmt.customerId customer_id,
r.digitalLoanAccountId,
r.ca_cic_score,
r.max_age_all_contracts_snapshot,
r.ratio_overdue_contracts_to_granted_contracts,
r.ScoreRange,
r.ln_loan_level_user_type,
r.has_ever_been_overdue,
r.latest_granted_contract_overdue_flag,
r.ratio_closed_over_new_granted_cnt_24M,
r.ratio_risky_contracts_to_granted_contracts,
r.Short_and_Term_Loans_granted_contracts_cnt_24M,
r.flg_zero_non_granted_ever,
r.Personal_Loans_granted_contracts_amt_24M,
r.CreditAvgCreditLimit,
r.flg_zero_granted_ever,
    case when lower(coalesce(lmt.osversion_v2, lmt.osVersion)) like '%andro%' then 'android'
    when lower(coalesce(lmt.osversion_v2, lmt.osVersion)) like '%os%' then 'ios'
    when lower(lmt.deviceType) like '%andro%' then 'android'
    else 'ios' end osType,
date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) application_date,
case when date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime))
        between '2024-09-01' and '2025-01-31' then 'Dev_Train'
        when date(if(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2024-09-01' then 'Pre_Train'
                else 'Dev_Test' end as Data_selection  
from worktable_data_analysis.cash_alpha_cic_all_applied_backscored_20240901_20250930 r
left join `risk_credit_mis.loan_master_table` lmt on lmt.digitalLoanAccountId = r.digitalLoanAccountId
where trench_category = 'Trench 1'
and r.ca_cic_score is not null 
and date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2025-09-24'
;
"""

data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")


Job ID 1085c529-1d2a-447f-9860-1eb417930f03 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (76716, 19)


In [49]:
data.columns

Index(['customer_id', 'digitalLoanAccountId', 'ca_cic_score',
       'max_age_all_contracts_snapshot',
       'ratio_overdue_contracts_to_granted_contracts', 'ScoreRange',
       'ln_loan_level_user_type', 'has_ever_been_overdue',
       'latest_granted_contract_overdue_flag',
       'ratio_closed_over_new_granted_cnt_24M',
       'ratio_risky_contracts_to_granted_contracts',
       'Short_and_Term_Loans_granted_contracts_cnt_24M',
       'flg_zero_non_granted_ever', 'Personal_Loans_granted_contracts_amt_24M',
       'CreditAvgCreditLimit', 'flg_zero_granted_ever', 'osType',
       'application_date', 'Data_selection'],
      dtype='object')

In [50]:
feature_column = ['max_age_all_contracts_snapshot',
       'ratio_overdue_contracts_to_granted_contracts', 'ScoreRange',
       'ln_loan_level_user_type', 'has_ever_been_overdue',
       'latest_granted_contract_overdue_flag',
       'ratio_closed_over_new_granted_cnt_24M',
       'ratio_risky_contracts_to_granted_contracts',
       'Short_and_Term_Loans_granted_contracts_cnt_24M',
       'flg_zero_non_granted_ever', 'Personal_Loans_granted_contracts_amt_24M',
       'CreditAvgCreditLimit', 'flg_zero_granted_ever', 'ca_cic_score']

dfd = transform_data(data, feature_column, a='ca_cic_score', modelDisplayName='Alpha-Cash-CIC-Model', tc='Trench 1', subscription_name = 'Cash September 25 Models') 
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,3221788,f0ac7117-9e38-4995-81b1-c206c0338e9e,d4f02995-15aa-4c4f-ba01-ce538da33a51,0.497043,2026-01-16T19:04:22.569018,2026-01-16T19:04:22.569018,Alpha-Cash-CIC-Model,v1,"{""max_age_all_contracts_snapshot"": 918.0, ""rat...",Cash September 25 Models,9ea94980-d20b-4d01-87b1-166bf85264ef,2026-01-16T19:04:22.569018,{},Trench 1,ios,Dev_Test,2025-02-25
1,3082374,bc80ed7d-b723-4361-b906-9cf6fd2de0bb,5eccea69-fafa-4413-95cc-5e885886b504,0.538369,2026-01-16T19:04:22.570017,2026-01-16T19:04:22.570017,Alpha-Cash-CIC-Model,v1,"{""max_age_all_contracts_snapshot"": 839.0, ""rat...",Cash September 25 Models,b2e0f295-cc48-4184-beb7-fcbb58527cb5,2026-01-16T19:04:22.570017,{},Trench 1,android,Dev_Train,2024-12-06
2,3662927,3bdd36e1-516c-463e-80bf-e901f5a5e941,e3419d44-56c0-4cd1-bc42-8246f2c01eb4,0.667763,2026-01-16T19:04:22.570017,2026-01-16T19:04:22.570017,Alpha-Cash-CIC-Model,v1,"{""max_age_all_contracts_snapshot"": 304.0, ""rat...",Cash September 25 Models,55098391-7d88-430e-ab5a-837e6a434231,2026-01-16T19:04:22.570017,{},Trench 1,ios,Dev_Test,2025-09-04
3,3366539,b7ca7d59-a533-4010-a948-157e6897265f,895d1d5b-f0ef-414b-b795-c7451469abbd,0.45677,2026-01-16T19:04:22.570017,2026-01-16T19:04:22.570017,Alpha-Cash-CIC-Model,v1,"{""max_age_all_contracts_snapshot"": 3820.0, ""ra...",Cash September 25 Models,59bbb51d-fc11-4223-8a3e-03fe0d311977,2026-01-16T19:04:22.570017,{},Trench 1,android,Dev_Test,2025-04-08
4,3283548,8a60cb80-62d4-47aa-b1ac-047607aa865b,12f9fa20-fbae-44d8-b9a0-5d07864ae087,0.570899,2026-01-16T19:04:22.571017,2026-01-16T19:04:22.571017,Alpha-Cash-CIC-Model,v1,"{""max_age_all_contracts_snapshot"": 2081.0, ""ra...",Cash September 25 Models,62d24823-b9a3-405a-8b70-f29ea214f71b,2026-01-16T19:04:22.571017,{},Trench 1,ios,Dev_Test,2025-02-24


In [51]:
result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,38878,2025-02-01,2025-09-23
1,Dev_Train,37838,2024-09-01,2025-01-31


In [52]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=d0030be3-05c8-4368-b053-ac3b8abf086f>

###### Trench 2

In [53]:
sq = """ 
select 
lmt.customerId customer_id,
r.digitalLoanAccountId,
r.ca_cic_score,
r.max_age_all_contracts_snapshot,
r.ratio_overdue_contracts_to_granted_contracts,
r.ScoreRange,
r.ln_loan_level_user_type,
r.has_ever_been_overdue,
r.latest_granted_contract_overdue_flag,
r.ratio_closed_over_new_granted_cnt_24M,
r.ratio_risky_contracts_to_granted_contracts,
r.Short_and_Term_Loans_granted_contracts_cnt_24M,
r.flg_zero_non_granted_ever,
r.Personal_Loans_granted_contracts_amt_24M,
r.CreditAvgCreditLimit,
r.flg_zero_granted_ever,
    case when lower(coalesce(lmt.osversion_v2, lmt.osVersion)) like '%andro%' then 'android'
    when lower(coalesce(lmt.osversion_v2, lmt.osVersion)) like '%os%' then 'ios'
    when lower(lmt.deviceType) like '%andro%' then 'android'
    else 'ios' end osType,
date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) application_date,
case when date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime))
        between '2024-09-01' and '2025-01-31' then 'Dev_Train'
        when date(if(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2024-09-01' then 'Pre_Train'
                else 'Dev_Test' end as Data_selection  
from worktable_data_analysis.cash_alpha_cic_all_applied_backscored_20240901_20250930 r
left join `risk_credit_mis.loan_master_table` lmt on lmt.digitalLoanAccountId = r.digitalLoanAccountId
where trench_category = 'Trench 2'
and r.ca_cic_score is not null 
and date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2025-09-24'
;
"""


data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")

Job ID 1368b08a-7a37-4a8a-8359-44bbac16e247 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (47776, 19)


In [54]:
feature_column = ['max_age_all_contracts_snapshot',
       'ratio_overdue_contracts_to_granted_contracts', 'ScoreRange',
       'ln_loan_level_user_type', 'has_ever_been_overdue',
       'latest_granted_contract_overdue_flag',
       'ratio_closed_over_new_granted_cnt_24M',
       'ratio_risky_contracts_to_granted_contracts',
       'Short_and_Term_Loans_granted_contracts_cnt_24M',
       'flg_zero_non_granted_ever', 'Personal_Loans_granted_contracts_amt_24M',
       'CreditAvgCreditLimit', 'flg_zero_granted_ever', 'ca_cic_score']

dfd = transform_data(data, feature_column, a='ca_cic_score', modelDisplayName='Alpha-Cash-CIC-Model', tc='Trench 2', subscription_name = 'Cash September 25 Models') 
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,3267538,2066a84f-b4ac-4864-b1b2-646c6385e557,ae8ef945-60ed-42e0-8622-b4be20ef9f43,0.549529,2026-01-16T19:04:50.466274,2026-01-16T19:04:50.466274,Alpha-Cash-CIC-Model,v1,"{""max_age_all_contracts_snapshot"": 1549.0, ""ra...",Cash September 25 Models,9b740429-0beb-49b0-aeac-0776382d5236,2026-01-16T19:04:50.466274,{},Trench 2,ios,Dev_Test,2025-05-21
1,3301965,c4b0626f-fd5a-48f3-8669-90592df208a5,26d26a7c-81a1-423b-89b8-4d9c49e28af5,0.373362,2026-01-16T19:04:50.466274,2026-01-16T19:04:50.466274,Alpha-Cash-CIC-Model,v1,"{""max_age_all_contracts_snapshot"": 5185.0, ""ra...",Cash September 25 Models,cc12ebe4-e738-4bd6-93ec-d282f4922f30,2026-01-16T19:04:50.466274,{},Trench 2,ios,Dev_Test,2025-08-30
2,2608966,dc279402-5cc0-48ac-b29c-2705c602d881,e7d5ae59-bf9d-4c4b-8922-b80f70f4ce61,0.615803,2026-01-16T19:04:50.469328,2026-01-16T19:04:50.469328,Alpha-Cash-CIC-Model,v1,"{""max_age_all_contracts_snapshot"": 563.0, ""rat...",Cash September 25 Models,7fa3c748-7cca-43c7-953f-019fc99400b6,2026-01-16T19:04:50.469328,{},Trench 2,ios,Dev_Train,2024-12-21
3,1812757,d85ec758-e0be-41f7-8ae5-29ec166b7111,299e1fc8-14b4-4a32-87c0-fca7d83b48f3,0.596868,2026-01-16T19:04:50.469328,2026-01-16T19:04:50.469328,Alpha-Cash-CIC-Model,v1,"{""max_age_all_contracts_snapshot"": 126.0, ""rat...",Cash September 25 Models,06de1fcd-9472-4de1-bc0c-a77268be8dbd,2026-01-16T19:04:50.469328,{},Trench 2,ios,Dev_Test,2025-03-07
4,2938375,d5184a84-ee86-4b7d-baa7-556e6b5fc5ac,d51a8cf5-663d-47c9-b388-96b1ff58b79d,0.506186,2026-01-16T19:04:50.469328,2026-01-16T19:04:50.469328,Alpha-Cash-CIC-Model,v1,"{""max_age_all_contracts_snapshot"": 500.0, ""rat...",Cash September 25 Models,9730ff3d-ed17-42b0-9121-a65277742dfb,2026-01-16T19:04:50.469328,{},Trench 2,ios,Dev_Train,2025-01-14


In [55]:
result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,29036,2025-02-01,2025-09-23
1,Dev_Train,18740,2024-09-01,2025-01-31


In [56]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=3bd42505-2bd3-4dbc-98dc-eeb2fdfe84b4>

###### Trench 3

In [57]:
sq = """ 
select 
lmt.customerId customer_id,
r.digitalLoanAccountId,
r.ca_cic_score,
r.max_age_all_contracts_snapshot,
r.ratio_overdue_contracts_to_granted_contracts,
r.ScoreRange,
r.ln_loan_level_user_type,
r.has_ever_been_overdue,
r.latest_granted_contract_overdue_flag,
r.ratio_closed_over_new_granted_cnt_24M,
r.ratio_risky_contracts_to_granted_contracts,
r.Short_and_Term_Loans_granted_contracts_cnt_24M,
r.flg_zero_non_granted_ever,
r.Personal_Loans_granted_contracts_amt_24M,
r.CreditAvgCreditLimit,
r.flg_zero_granted_ever,
    case when lower(coalesce(lmt.osversion_v2, lmt.osVersion)) like '%andro%' then 'android'
    when lower(coalesce(lmt.osversion_v2, lmt.osVersion)) like '%os%' then 'ios'
    when lower(lmt.deviceType) like '%andro%' then 'android'
    else 'ios' end osType,
date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) application_date,
case when date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime))
        between '2024-09-01' and '2025-01-31' then 'Dev_Train'
        when date(if(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2024-09-01' then 'Pre_Train'
                else 'Dev_Test' end as Data_selection 
from worktable_data_analysis.cash_alpha_cic_all_applied_backscored_20240901_20250930 r
left join `risk_credit_mis.loan_master_table` lmt on lmt.digitalLoanAccountId = r.digitalLoanAccountId
where trench_category = 'Trench 3'
and r.ca_cic_score is not null 
and date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2025-09-24'
;
"""


data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")

Job ID 4093405d-3a5c-4804-851a-f3324b5c3a76 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (18967, 19)


In [58]:
feature_column = ['max_age_all_contracts_snapshot',
       'ratio_overdue_contracts_to_granted_contracts', 'ScoreRange',
       'ln_loan_level_user_type', 'has_ever_been_overdue',
       'latest_granted_contract_overdue_flag',
       'ratio_closed_over_new_granted_cnt_24M',
       'ratio_risky_contracts_to_granted_contracts',
       'Short_and_Term_Loans_granted_contracts_cnt_24M',
       'flg_zero_non_granted_ever', 'Personal_Loans_granted_contracts_amt_24M',
       'CreditAvgCreditLimit', 'flg_zero_granted_ever', 'ca_cic_score']

dfd = transform_data(data, feature_column, a='ca_cic_score', modelDisplayName='Alpha-Cash-CIC-Model', tc='Trench 3', subscription_name = 'Cash September 25 Models') 
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,2635255,ffb0db54-7ea2-4166-895e-a860dc5c4616,55c3483e-0de3-43be-b2bf-576c28576df4,0.469076,2026-01-16T19:05:10.016126,2026-01-16T19:05:10.016126,Alpha-Cash-CIC-Model,v1,"{""max_age_all_contracts_snapshot"": 737.0, ""rat...",Cash September 25 Models,c5b3bff3-d91a-49a3-b14f-a3d0c111bd6d,2026-01-16T19:05:10.016126,{},Trench 3,android,Dev_Test,2025-06-18
1,2812695,583e21f1-0eb1-4c80-a68f-c458d511d303,ffc79f35-59f8-4167-8f62-7a038f491ff0,0.365153,2026-01-16T19:05:10.016126,2026-01-16T19:05:10.016126,Alpha-Cash-CIC-Model,v1,"{""max_age_all_contracts_snapshot"": 1871.0, ""ra...",Cash September 25 Models,b8a321a6-504b-439d-904c-927000c46975,2026-01-16T19:05:10.016126,{},Trench 3,android,Dev_Test,2025-09-03
2,2508417,3e1b71a6-3051-42e9-abdc-2b47c25fd5cb,4f37e07f-a3af-4a74-b3fe-440d03ee3e2d,0.458994,2026-01-16T19:05:10.016126,2026-01-16T19:05:10.016126,Alpha-Cash-CIC-Model,v1,"{""max_age_all_contracts_snapshot"": 348.0, ""rat...",Cash September 25 Models,a274363f-8bd2-474e-90f1-579a1183f156,2026-01-16T19:05:10.016126,{},Trench 3,android,Dev_Test,2025-06-29
3,2047584,ccafd585-6a3f-4ab4-92af-5ceaf70e282f,72e18b0f-ac4d-452e-be6a-0e992d46e3f1,0.429049,2026-01-16T19:05:10.016126,2026-01-16T19:05:10.016126,Alpha-Cash-CIC-Model,v1,"{""max_age_all_contracts_snapshot"": 537.0, ""rat...",Cash September 25 Models,c0a3510e-0d04-4a70-b4e2-47901800d596,2026-01-16T19:05:10.016126,{},Trench 3,android,Dev_Train,2024-10-29
4,2688193,40433ac6-4872-4aa2-827b-112e63ccd0a3,8a240a62-39df-4998-9a06-af4bf8659002,0.449782,2026-01-16T19:05:10.016126,2026-01-16T19:05:10.016126,Alpha-Cash-CIC-Model,v1,"{""max_age_all_contracts_snapshot"": 251.0, ""rat...",Cash September 25 Models,62fded36-863f-4ae8-a05a-e4c37bd4fd92,2026-01-16T19:05:10.016126,{},Trench 3,ios,Dev_Test,2025-04-03


In [59]:
result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,12909,2025-02-01,2025-09-23
1,Dev_Train,6058,2024-09-01,2025-01-31


In [60]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=1255bee0-324f-4b28-af4c-8401efe37fff>

##### Beta-Cash-Demo-Model

###### Trench 1

In [61]:
sq = """ 
select 
r.customerId customer_id,
r.digitalLoanAccountId,
r.c_demo_score Beta_Cash_Demo_Score,
r.ln_vas_opted_flag, 
r.ln_self_dec_income, 
r.ln_age,
r.ln_source_funds_new_bin, 
r.ln_loan_level_user_type,
r.ln_industry_new_cat_bin, 
r.ln_marital_status,
r.ln_doc_type_rolled, 
r.ln_education_level,
r.ln_ref2_type, 
r.ln_email_primary_domain, 
r.ln_province_bin,
    case when lower(coalesce(lmt.osversion_v2, lmt.osVersion)) like '%andro%' then 'android'
    when lower(coalesce(lmt.osversion_v2, lmt.osVersion)) like '%os%' then 'ios'
    when lower(lmt.deviceType) like '%andro%' then 'android'
    else 'ios' end osType,
date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) application_date,
case when date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime))
        between '2024-10-01' and '2025-01-31' then 'Dev_Train'
        when date(if(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2024-10-01' then 'Pre_Train'
                else 'Dev_Test' end as Data_selection 
from worktable_data_analysis.cash_beta_demo_all_applied_backscored_20241001_20250930 r
left join `risk_credit_mis.loan_master_table` lmt on lmt.digitalLoanAccountId = r.digitalLoanAccountId
where trench_category = 'Trench 1'
and r.c_demo_score is not null
and date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2025-09-24'
;

"""

data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")

Job ID cd96c3b3-b2de-43f0-807d-c7d83a7a5a8d successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (318201, 18)


In [62]:
feature_column = ['ln_vas_opted_flag',
       'ln_self_dec_income', 'ln_age',
       'ln_source_funds_new_bin', 'ln_loan_level_user_type',
       'ln_industry_new_cat_bin',
       'ln_marital_status',
       'ln_doc_type_rolled',
       'ln_education_level',
       'ln_ref2_type', 'ln_email_primary_domain',
       'ln_province_bin','Beta_Cash_Demo_Score']

dfd = transform_data(data, feature_column, a='Beta_Cash_Demo_Score', modelDisplayName='Beta-Cash-Demo-Model', tc='Trench 1', subscription_name = 'Cash September 25 Models') 
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,3649483,88374105-92a1-4555-b7e9-d1dadc5ce697,df816ced-1299-4f5a-b272-af97f1621aab,0.620811,2026-01-16T19:05:26.181751,2026-01-16T19:05:26.181751,Beta-Cash-Demo-Model,v1,"{""ln_vas_opted_flag"": ""1"", ""ln_self_dec_income...",Cash September 25 Models,6bd22a85-9e8c-4d8c-ac74-752ef7fdd6f3,2026-01-16T19:05:26.181751,{},Trench 1,ios,Dev_Test,2025-08-29
1,3660720,94cc0771-1100-42d6-a9da-659a72f3a13d,7c2f43ba-7b7b-4d0a-8d79-651dd6bf4308,0.600122,2026-01-16T19:05:26.181751,2026-01-16T19:05:26.181751,Beta-Cash-Demo-Model,v1,"{""ln_vas_opted_flag"": ""1"", ""ln_self_dec_income...",Cash September 25 Models,b01ecf76-9302-4d5c-b4db-9ec158d4279b,2026-01-16T19:05:26.181751,{},Trench 1,android,Dev_Test,2025-09-04
2,3270538,efbbae15-1e7b-4e0a-a4a3-0050b989710c,9e3e0de9-267d-4c81-b976-cd70ebbf9960,0.467917,2026-01-16T19:05:26.182290,2026-01-16T19:05:26.182290,Beta-Cash-Demo-Model,v1,"{""ln_vas_opted_flag"": ""1"", ""ln_self_dec_income...",Cash September 25 Models,eff8ead0-dabe-4f6e-a12d-c32b62c4372e,2026-01-16T19:05:26.182290,{},Trench 1,android,Dev_Test,2025-02-19
3,3024132,13aa699a-f346-446b-8297-29c3b2939141,93f6f122-ba21-488f-bdf8-a991f5706a49,0.622411,2026-01-16T19:05:26.182290,2026-01-16T19:05:26.182290,Beta-Cash-Demo-Model,v1,"{""ln_vas_opted_flag"": ""1"", ""ln_self_dec_income...",Cash September 25 Models,6ba3360c-ec78-4f66-9cc9-9642d5827fef,2026-01-16T19:05:26.182290,{},Trench 1,android,Dev_Train,2024-11-14
4,3268280,1acfad4d-67b6-44ed-aa18-8f661da34be4,2e3d7eda-0afe-40b1-9d0b-b45059b3330e,0.433076,2026-01-16T19:05:26.182290,2026-01-16T19:05:26.182290,Beta-Cash-Demo-Model,v1,"{""ln_vas_opted_flag"": ""0"", ""ln_self_dec_income...",Cash September 25 Models,7a3724a9-c758-432e-91b8-f321ab4bf15f,2026-01-16T19:05:26.182290,{},Trench 1,android,Dev_Test,2025-02-22


In [63]:
result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,191471,2025-02-01,2025-09-23
1,Dev_Train,126730,2024-10-01,2025-01-31


In [64]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=be6aa399-1414-4e9d-9764-d74e9d8af22e>

###### Trench 2

In [65]:
sq = """ 
select 
r.customerId customer_id,
r.digitalLoanAccountId,
r.c_demo_score Beta_Cash_Demo_Score,
r.ln_vas_opted_flag, 
r.ln_self_dec_income, 
r.ln_age,
r.ln_source_funds_new_bin, 
r.ln_loan_level_user_type,
r.ln_industry_new_cat_bin, 
r.ln_marital_status,
r.ln_doc_type_rolled, 
r.ln_education_level,
r.ln_ref2_type, 
r.ln_email_primary_domain, 
r.ln_province_bin,
    case when lower(coalesce(lmt.osversion_v2, lmt.osVersion)) like '%andro%' then 'android'
    when lower(coalesce(lmt.osversion_v2, lmt.osVersion)) like '%os%' then 'ios'
    when lower(lmt.deviceType) like '%andro%' then 'android'
    else 'ios' end osType,
date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) application_date,
case when date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime))
        between '2024-10-01' and '2025-01-31' then 'Dev_Train'
        when date(if(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2024-10-01' then 'Pre_Train'
                else 'Dev_Test' end as Data_selection 
from worktable_data_analysis.cash_beta_demo_all_applied_backscored_20241001_20250930 r
left join `risk_credit_mis.loan_master_table` lmt on lmt.digitalLoanAccountId = r.digitalLoanAccountId
where trench_category = 'Trench 2'
and r.c_demo_score is not null 
and date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2025-09-24'
;

"""

data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")

Job ID 7e81c8b5-e253-47ae-89c2-5e7905b29980 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (122498, 18)


In [66]:
feature_column = ['ln_vas_opted_flag',
       'ln_self_dec_income', 'ln_age',
       'ln_source_funds_new_bin', 'ln_loan_level_user_type',
       'ln_industry_new_cat_bin',
       'ln_marital_status',
       'ln_doc_type_rolled',
       'ln_education_level',
       'ln_ref2_type', 'ln_email_primary_domain',
       'ln_province_bin','Beta_Cash_Demo_Score']

dfd = transform_data(data, feature_column, a='Beta_Cash_Demo_Score', modelDisplayName='Beta-Cash-Demo-Model', tc='Trench 2', subscription_name = 'Cash September 25 Models') 
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,3399442,4588304c-462e-46ba-abf9-e3c0fb4a869d,3f71a3f9-4ff6-4f9a-ba34-8fa307ca1afc,0.611906,2026-01-16T19:07:18.577083,2026-01-16T19:07:18.577083,Beta-Cash-Demo-Model,v1,"{""ln_vas_opted_flag"": ""1"", ""ln_self_dec_income...",Cash September 25 Models,5013aeba-238a-49d2-86dd-e7c55eb00889,2026-01-16T19:07:18.577083,{},Trench 2,ios,Dev_Test,2025-07-26
1,1135712,e29cca60-8010-460a-bb48-1e6a7079dcec,ab2f3e33-501f-4bff-90e0-2f3db71b8d87,0.536509,2026-01-16T19:07:18.577083,2026-01-16T19:07:18.577083,Beta-Cash-Demo-Model,v1,"{""ln_vas_opted_flag"": ""1"", ""ln_self_dec_income...",Cash September 25 Models,3bb63e7b-36f0-4502-9340-9aa59f00c390,2026-01-16T19:07:18.577083,{},Trench 2,android,Dev_Train,2024-10-18
2,3329859,1b7a2878-9571-4436-a97e-6253d60504b2,e18db680-27b7-450d-8864-bfb3b0a93af3,0.582968,2026-01-16T19:07:18.577083,2026-01-16T19:07:18.577083,Beta-Cash-Demo-Model,v1,"{""ln_vas_opted_flag"": ""1"", ""ln_self_dec_income...",Cash September 25 Models,dbfd43e9-d655-4acd-bdf7-82845102a147,2026-01-16T19:07:18.577083,{},Trench 2,ios,Dev_Test,2025-06-28
3,3076453,315aa07d-1e6c-4298-bbf0-f85e9e0de45b,2226fd35-b87e-42d5-8d4e-065f33c0ecdb,0.446162,2026-01-16T19:07:18.577083,2026-01-16T19:07:18.577083,Beta-Cash-Demo-Model,v1,"{""ln_vas_opted_flag"": ""0"", ""ln_self_dec_income...",Cash September 25 Models,0674176b-3bd8-4e3c-885d-fdeafea1fa30,2026-01-16T19:07:18.577083,{},Trench 2,android,Dev_Test,2025-04-20
4,3227469,51766cd4-8281-4fb1-b2d7-c09c852bb1e2,7052390d-ccc8-454a-9460-0c4d74706d8b,0.43502,2026-01-16T19:07:18.577083,2026-01-16T19:07:18.577083,Beta-Cash-Demo-Model,v1,"{""ln_vas_opted_flag"": ""0"", ""ln_self_dec_income...",Cash September 25 Models,bf612912-e60b-4ade-b1c7-5e747d19eef1,2026-01-16T19:07:18.577083,{},Trench 2,android,Dev_Test,2025-05-16


In [67]:
result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,84722,2025-02-01,2025-09-23
1,Dev_Train,37776,2024-10-01,2025-01-31


In [68]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=13c7e3a8-537a-4817-a8c4-cbd951b4a932>

###### Trench 3

In [69]:
sq = """ 
select 
r.customerId customer_id,
r.digitalLoanAccountId,
r.c_demo_score Beta_Cash_Demo_Score,
r.ln_vas_opted_flag, 
r.ln_self_dec_income, 
r.ln_age,
r.ln_source_funds_new_bin, 
r.ln_loan_level_user_type,
r.ln_industry_new_cat_bin, 
r.ln_marital_status,
r.ln_doc_type_rolled, 
r.ln_education_level,
r.ln_ref2_type, 
r.ln_email_primary_domain, 
r.ln_province_bin,
    case when lower(coalesce(lmt.osversion_v2, lmt.osVersion)) like '%andro%' then 'android'
    when lower(coalesce(lmt.osversion_v2, lmt.osVersion)) like '%os%' then 'ios'
    when lower(lmt.deviceType) like '%andro%' then 'android'
    else 'ios' end osType,
date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) application_date,
case when date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime))
        between '2024-10-01' and '2025-01-31' then 'Dev_Train'
        when date(if(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2024-10-01' then 'Pre_Train'
                else 'Dev_Test' end as Data_selection 
from worktable_data_analysis.cash_beta_demo_all_applied_backscored_20241001_20250930 r
left join `risk_credit_mis.loan_master_table` lmt on lmt.digitalLoanAccountId = r.digitalLoanAccountId
where trench_category = 'Trench 3'
and r.c_demo_score is not null
and date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2025-09-24'
;

"""

data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")

Job ID 8106e83b-3870-436a-87a0-c01966a55549 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (41635, 18)


In [70]:
feature_column = ['ln_vas_opted_flag',
       'ln_self_dec_income', 'ln_age',
       'ln_source_funds_new_bin', 'ln_loan_level_user_type',
       'ln_industry_new_cat_bin',
       'ln_marital_status',
       'ln_doc_type_rolled',
       'ln_education_level',
       'ln_ref2_type', 'ln_email_primary_domain',
       'ln_province_bin','Beta_Cash_Demo_Score']

dfd = transform_data(data, feature_column, a='Beta_Cash_Demo_Score', modelDisplayName='Beta-Cash-Demo-Model', tc='Trench 3', subscription_name = 'Cash September 25 Models') 
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,2546250,21310b39-34ef-4160-870b-00452ef106c3,df5ba0c6-9786-4504-984e-b1c4985d1c7c,0.343319,2026-01-16T19:08:00.661840,2026-01-16T19:08:00.661840,Beta-Cash-Demo-Model,v1,"{""ln_vas_opted_flag"": ""1"", ""ln_self_dec_income...",Cash September 25 Models,4073c475-16a0-4eff-bb12-38ca6195ae44,2026-01-16T19:08:00.661840,{},Trench 3,android,Dev_Test,2025-03-03
1,2888924,954adec6-1e8b-4cac-bccc-7072534729e7,9fe85813-6a40-4914-a1d7-2ee5da7d9b07,0.422414,2026-01-16T19:08:00.661840,2026-01-16T19:08:00.661840,Beta-Cash-Demo-Model,v1,"{""ln_vas_opted_flag"": ""0"", ""ln_self_dec_income...",Cash September 25 Models,103b54e7-7d48-479d-8c75-af27a680dc65,2026-01-16T19:08:00.661840,{},Trench 3,android,Dev_Test,2025-04-03
2,2894630,614c36aa-06aa-48c6-8c9f-270914d90916,84586dcd-4d07-4e92-b7d1-ea50a7c4262a,0.485569,2026-01-16T19:08:00.662855,2026-01-16T19:08:00.662855,Beta-Cash-Demo-Model,v1,"{""ln_vas_opted_flag"": ""0"", ""ln_self_dec_income...",Cash September 25 Models,78b0147f-b847-4918-ad74-21b8a3359888,2026-01-16T19:08:00.662855,{},Trench 3,android,Dev_Test,2025-03-29
3,2514498,65c464ae-5a8a-46cf-adea-e02a49e84928,5aba2f9f-1546-44f5-af06-f287c4d33938,0.324677,2026-01-16T19:08:00.662855,2026-01-16T19:08:00.662855,Beta-Cash-Demo-Model,v1,"{""ln_vas_opted_flag"": ""0"", ""ln_self_dec_income...",Cash September 25 Models,a3504589-86c5-46b4-8d38-d646a70a91da,2026-01-16T19:08:00.662855,{},Trench 3,android,Dev_Test,2025-07-19
4,1951771,1527ac51-8d37-43e1-a79a-b75410bfd13b,1bc55b1f-aa89-40ae-b472-83aefa27ef65,0.30188,2026-01-16T19:08:00.662855,2026-01-16T19:08:00.662855,Beta-Cash-Demo-Model,v1,"{""ln_vas_opted_flag"": ""0"", ""ln_self_dec_income...",Cash September 25 Models,b96f27f1-c33e-4271-80a3-18f63ca07d40,2026-01-16T19:08:00.662855,{},Trench 3,android,Dev_Test,2025-06-12


In [71]:

result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,30895,2025-02-01,2025-09-23
1,Dev_Train,10740,2024-10-01,2025-01-31


In [72]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=10e5d41a-ebd4-4c1f-82b0-ec90ed9efa62>

##### Beta-Cash-Stack-Model

In [73]:
sq = """ 
select r.customer_id,
r.digitalLoanAccountId,
r.demo_score,
r.apps_score,
r.credo_score,
r.stack_score Beta_cash_stack_score,
r.stack_score_norm,
    case when lower(coalesce(lmt.osversion_v2, lmt.osVersion)) like '%andro%' then 'android'
    when lower(coalesce(lmt.osversion_v2, lmt.osVersion)) like '%os%' then 'ios'
    when lower(lmt.deviceType) like '%andro%' then 'android'
    else 'ios' end osType,
date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) application_date,
case when date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime))
        between '2024-10-01' and '2025-02-28' then 'Dev_Train'
        when date(if(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2024-10-01' then 'Pre_Train'
                else 'Dev_Test' end as Data_selection 
 from worktable_data_analysis.cash_beta_trench1_applied_loans_backscored_20241001_20250930 r
 left join `risk_credit_mis.loan_master_table` lmt on lmt.digitalLoanAccountId = r.digitalLoanAccountId
 where r.stack_score is not null 
 and date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2025-09-24'
"""
data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")

Job ID ecee70c3-9df8-445f-a199-652cb2a8c1a9 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (318108, 10)


In [74]:
feature_column = ['demo_score',
       'apps_score', 'credo_score',
       'stack_score', 'stack_score_norm']

dfd = transform_data(data, feature_column, a='Beta_cash_stack_score', modelDisplayName='Beta-Cash-Stack-Model', tc='Trench 1', subscription_name = 'Cash September 25 Models') 
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,3423709,e1b69f85-c362-4aa2-a33b-271fa0a9d306,4e8c2241-8bf4-44ae-a2fd-42f84c013dee,0.639406,2026-01-16T19:08:23.191683,2026-01-16T19:08:23.191683,Beta-Cash-Stack-Model,v1,"{""demo_score"": 0.41346890602134695, ""apps_scor...",Cash September 25 Models,b38f6339-e1ca-4fd2-846b-bad021f45279,2026-01-16T19:08:23.191683,{},Trench 1,android,Dev_Test,2025-05-06
1,2948227,bbf9fe06-f0e2-4c69-a0cb-e43b1ccda708,d757f592-105c-4f67-b452-90ec7ebf1b02,0.557391,2026-01-16T19:08:23.191683,2026-01-16T19:08:23.191683,Beta-Cash-Stack-Model,v1,"{""demo_score"": 0.598875329009884, ""apps_score""...",Cash September 25 Models,db39bbfc-633c-48bd-9dc9-b7f2d99bc64d,2026-01-16T19:08:23.191683,{},Trench 1,android,Dev_Train,2024-10-17
2,3652857,af2e6ad1-42b6-4fd3-a9bf-460950ac8b93,da13a4e0-c279-482e-bdc9-b31176385da6,0.534326,2026-01-16T19:08:23.191683,2026-01-16T19:08:23.191683,Beta-Cash-Stack-Model,v1,"{""demo_score"": 0.49926035391431034, ""apps_scor...",Cash September 25 Models,bb32976f-5901-4167-b7a2-84be344a673c,2026-01-16T19:08:23.191683,{},Trench 1,android,Dev_Test,2025-09-01
3,3305416,ab7fe660-2c92-4dc6-a744-be115ac0195f,5fa23dfe-82a1-4a6d-9983-0fa17de474cf,0.660523,2026-01-16T19:08:23.191683,2026-01-16T19:08:23.191683,Beta-Cash-Stack-Model,v1,"{""demo_score"": 0.6146503742020991, ""apps_score...",Cash September 25 Models,60474e93-0b89-4b19-998b-7dc378ac9a5c,2026-01-16T19:08:23.191683,{},Trench 1,android,Dev_Test,2025-03-06
4,3587370,7624766d-3d2d-41a0-80af-62a68384fdbd,c400e8b8-84ad-402a-9bb3-e7c1fc9fc6b7,0.417116,2026-01-16T19:08:23.191683,2026-01-16T19:08:23.191683,Beta-Cash-Stack-Model,v1,"{""demo_score"": 0.5464633738130923, ""apps_score...",Cash September 25 Models,6b7dcf63-13f3-4885-8eeb-ebe40cae1709,2026-01-16T19:08:23.191683,{},Trench 1,android,Dev_Test,2025-07-30


In [75]:
result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,162675,2025-03-01,2025-09-23
1,Dev_Train,155433,2024-10-01,2025-02-28


In [76]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=f394fffb-1588-4b5b-b0c4-f186f47288cd>

###### Trench 2

In [77]:
sq = """ 
select r.customer_id,
r.digitalLoanAccountId,
r.demo_score,
r.apps_score,
r.credo_score,
r.stack_score Beta_cash_stack_score,
r.stack_score_norm,
    case when lower(coalesce(lmt.osversion_v2, lmt.osVersion)) like '%andro%' then 'android'
    when lower(coalesce(lmt.osversion_v2, lmt.osVersion)) like '%os%' then 'ios'
    when lower(lmt.deviceType) like '%andro%' then 'android'
    else 'ios' end osType,
date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) application_date,
case when date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime))
        between '2024-10-01' and '2025-02-28' then 'Dev_Train'
        when date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2024-10-01' then 'Pre_Train'
                else 'Dev_Test' end as Data_selection 
 from worktable_data_analysis.cash_beta_trench2_applied_loans_backscored_20241001_20250930 r
 left join `risk_credit_mis.loan_master_table` lmt on lmt.digitalLoanAccountId = r.digitalLoanAccountId
 where r.stack_score is not null
 and date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2025-09-24'
"""
data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")

Job ID b7c79a6c-3f52-46ad-91b7-5e522ee05f38 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (119665, 10)


In [78]:
feature_column = ['demo_score',
       'apps_score', 'credo_score',
       'stack_score', 'stack_score_norm']

dfd = transform_data(data, feature_column, a='Beta_cash_stack_score', modelDisplayName='Beta-Cash-Stack-Model', tc='Trench 2', subscription_name = 'Cash September 25 Models') 
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,3452485,e819e260-1353-4035-8194-4ab7f5b15e9b,64e50931-99d1-4dd3-86c4-208315eedfbf,0.809323,2026-01-16T19:09:29.905752,2026-01-16T19:09:29.905752,Beta-Cash-Stack-Model,v1,"{""demo_score"": 0.5162030655521818, ""apps_score...",Cash September 25 Models,b5e927bb-ca81-4352-b7d1-bd8b33870755,2026-01-16T19:09:29.905752,{},Trench 2,android,Dev_Test,2025-06-27
1,1032618,95e6edfa-aeba-4a5b-b0fc-ddb18f36ba94,8478ef26-6d95-4ead-a97e-29abb648ac7e,0.636747,2026-01-16T19:09:29.905752,2026-01-16T19:09:29.905752,Beta-Cash-Stack-Model,v1,"{""demo_score"": 0.46543677878024386, ""apps_scor...",Cash September 25 Models,44d73065-1457-492a-b99d-adf2a67ee08c,2026-01-16T19:09:29.905752,{},Trench 2,android,Dev_Train,2024-11-12
2,2823097,2c4140e1-f715-4277-9cf8-c89092aba034,f2949d09-bede-40e6-a533-717155c7590d,0.638983,2026-01-16T19:09:29.905752,2026-01-16T19:09:29.905752,Beta-Cash-Stack-Model,v1,"{""demo_score"": 0.589520721349472, ""apps_score""...",Cash September 25 Models,4b1d9280-fd05-4c73-8905-bff6265d64b9,2026-01-16T19:09:29.905752,{},Trench 2,android,Dev_Train,2024-11-10
3,2201684,f6ae7a7d-0404-4524-b737-29ec935eb446,6135952e-d787-422e-b4ac-66fed3d374c1,0.93505,2026-01-16T19:09:29.905752,2026-01-16T19:09:29.905752,Beta-Cash-Stack-Model,v1,"{""demo_score"": 0.49498197667043714, ""apps_scor...",Cash September 25 Models,eaadf67b-9047-422c-a876-cd5aaceeb6b4,2026-01-16T19:09:29.905752,{},Trench 2,android,Dev_Train,2024-10-24
4,3125864,6f245e30-b8dd-4a8b-8c8a-742a044e0f25,a3e4cc53-3a07-4c6c-ab5d-f309eae59ef8,0.866633,2026-01-16T19:09:29.905752,2026-01-16T19:09:29.905752,Beta-Cash-Stack-Model,v1,"{""demo_score"": 0.5990094119899243, ""apps_score...",Cash September 25 Models,bc6fd0e9-d989-409d-8904-216842ce8992,2026-01-16T19:09:29.905752,{},Trench 2,android,Dev_Test,2025-04-25


In [79]:
result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,72337,2025-03-01,2025-09-23
1,Dev_Train,47328,2024-10-01,2025-02-28


In [80]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=fb469903-b1e6-4edd-ba97-f9943f49218f>

###### Trench 3

In [81]:
sq = """ 
select r.customer_id,
r.digitalLoanAccountId,
r.demo_score,
r.apps_score,
r.credo_score,
r.stack_score Beta_cash_stack_score,
r.stack_score_norm,
    case when lower(coalesce(lmt.osversion_v2, lmt.osVersion)) like '%andro%' then 'android'
    when lower(coalesce(lmt.osversion_v2, lmt.osVersion)) like '%os%' then 'ios'
    when lower(lmt.deviceType) like '%andro%' then 'android'
    else 'ios' end osType,
date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) application_date,
case when date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime))
        between '2024-10-01' and '2025-02-28' then 'Dev_Train'
        when date(if(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2024-10-01' then 'Pre_Train'
                else 'Dev_Test' end as Data_selection 
 from worktable_data_analysis.cash_beta_trench3_applied_loans_backscored_20241001_20250930 r
 left join `risk_credit_mis.loan_master_table` lmt on lmt.digitalLoanAccountId = r.digitalLoanAccountId
 where r.stack_score is not null 
 and date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2025-09-24'

"""
data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")


Job ID 495968cb-79a2-40cf-a8f8-2bfb81da0928 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (41609, 10)


In [82]:
feature_column = ['demo_score',
       'apps_score', 'credo_score',
       'stack_score', 'stack_score_norm']

dfd = transform_data(data, feature_column, a='Beta_cash_stack_score', modelDisplayName='Beta-Cash-Stack-Model', tc='Trench 3', subscription_name = 'Cash September 25 Models') 
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,2606551,a07e799a-721c-43ec-8cda-5be2a52d7815,b8b7cd0d-3e6e-4820-8d81-4645e5617d83,0.265839,2026-01-16T19:09:59.241077,2026-01-16T19:09:59.241077,Beta-Cash-Stack-Model,v1,"{""demo_score"": 0.3752460488638665, ""apps_score...",Cash September 25 Models,cf208b4f-7881-4308-ac4a-efd68f4f94e9,2026-01-16T19:09:59.241077,{},Trench 3,android,Dev_Test,2025-08-15
1,3302428,714b1d63-f7f6-4d4b-829f-d069b446b8c0,5f011dd1-63e0-4daf-892f-3318c74688ee,0.610899,2026-01-16T19:09:59.241599,2026-01-16T19:09:59.241599,Beta-Cash-Stack-Model,v1,"{""demo_score"": 0.4095834955430448, ""apps_score...",Cash September 25 Models,a7546734-a4d1-48b4-b690-7da2f37c6b18,2026-01-16T19:09:59.241599,{},Trench 3,android,Dev_Test,2025-05-19
2,2980389,d65f4d12-d433-423d-95e8-9dd6bed378c0,1958be31-c9b9-49e8-944a-cadd7c3e2721,0.496895,2026-01-16T19:09:59.241599,2026-01-16T19:09:59.241599,Beta-Cash-Stack-Model,v1,"{""demo_score"": 0.4784989646697194, ""apps_score...",Cash September 25 Models,a505ae80-63f0-4dee-a5de-febb3e20c3c0,2026-01-16T19:09:59.241599,{},Trench 3,android,Dev_Test,2025-07-24
3,3413182,220e4cad-d94e-4b47-b86c-c744ed3ddd01,4e7a4e8d-c2bc-4b0d-8d29-aee2e4a93cde,0.27525,2026-01-16T19:09:59.241599,2026-01-16T19:09:59.241599,Beta-Cash-Stack-Model,v1,"{""demo_score"": 0.2666855102891388, ""apps_score...",Cash September 25 Models,cc5a5921-b8ce-4a3f-928d-2657f485a2e3,2026-01-16T19:09:59.241599,{},Trench 3,android,Dev_Test,2025-09-07
4,2314603,0575cda4-11ef-42cc-b5ee-2665a9fa19bf,5f17796a-ea4a-4ec5-8a89-8ed7eab3f7e0,0.648997,2026-01-16T19:09:59.241599,2026-01-16T19:09:59.241599,Beta-Cash-Stack-Model,v1,"{""demo_score"": 0.4634881673242549, ""apps_score...",Cash September 25 Models,16ea47fe-37fa-43e2-bb56-bd148d5d8b2b,2026-01-16T19:09:59.241599,{},Trench 3,android,Dev_Test,2025-06-05


In [83]:
result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,27473,2025-03-01,2025-09-23
1,Dev_Train,14136,2024-10-01,2025-02-28


In [84]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=170f129a-3eac-4b1d-8441-10999877dab7>

#####  Beta Cash AppScore Model

###### Trench 1

In [85]:
## query to add the trench category trench 1 and trench 2

# create or replace table risk_mart.applied_quick_loan_new_applicants_20230101_20250930_app_scored_copy as 
# select * from risk_mart.applied_quick_loan_new_applicants_20230101_20250930_app_scored;



# ALTER TABLE risk_mart.applied_quick_loan_new_applicants_20230101_20250930_app_scored_copy
# ADD COLUMN trench_category STRING;

# MERGE risk_mart.applied_quick_loan_new_applicants_20230101_20250930_app_scored_copy AS target
# USING (
#   SELECT 
#     a.digitalLoanAccountId,
#     CASE 
#       WHEN b.ln_loan_level_user_type = '2_New Applicant'
#            AND DATE_DIFF(DATE(b.ln_appln_submit_datetime), DATE(b.onb_tsa_onboarding_datetime), DAY) <= 30 THEN 'Trench 1'
#       WHEN b.ln_loan_level_user_type = '2_New Applicant'
#            AND DATE_DIFF(DATE(b.ln_appln_submit_datetime), DATE(b.onb_tsa_onboarding_datetime), DAY) > 30 THEN 'Trench 2'
#       WHEN b.ln_loan_level_user_type = '1_Repeat Applicant' THEN 'Trench 3'
#       ELSE NULL
#     END AS trench_category,
#     ROW_NUMBER() OVER (PARTITION BY a.digitalLoanAccountId ORDER BY b.ln_appln_submit_datetime DESC) AS row_num
#   FROM risk_mart.applied_quick_loan_new_applicants_20230101_20250930_app_scored_copy a
#   LEFT JOIN prj-prod-dataplatform.risk_mart.applied_loans_20210701_20250930_trans b
#     ON a.digitalLoanAccountId = b.digitalLoanAccountId
# ) AS source
# ON target.digitalLoanAccountId = source.digitalLoanAccountId
# WHEN MATCHED AND source.row_num = 1 THEN
#   UPDATE SET target.trench_category = source.trench_category;

In [86]:
sq = """ 
select 
  r.customerId customer_id,
  r.digitalLoanAccountId,
  r.apps_score beta_cash_app_score,  
  r.app_cnt_health_and_fitness_ever app_cnt_health_and_fitness_ever, 
  r.app_cnt_shopping_ever app_cnt_shopping_ever,
  r.app_cnt_crypto_ever app_cnt_crypto_ever, 
  r.app_cnt_driver_ever app_cnt_driver_ever,
  r.app_cnt_payday_180d app_cnt_payday_180d, 
  r.app_cnt_gambling_180d app_cnt_gambling_180d,
  r.app_avg_time_bw_installed_mins_3d app_avg_time_bw_installed_mins_3d,
  r.app_median_time_bw_installed_mins_ever app_median_time_bw_installed_mins_3d,
    case when lower(coalesce(lmt.osversion_v2, lmt.osVersion)) like '%andro%' then 'android'
    when lower(coalesce(lmt.osversion_v2, lmt.osVersion)) like '%os%' then 'ios'
    when lower(lmt.deviceType) like '%andro%' then 'android'
    else 'ios' end osType,
 date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) application_date,
case when date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime))
        between '2024-08-13' and '2025-01-31' then 'Dev_Train'
        when date(if(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2024-08-13' then 'Pre_Train'
                else 'Dev_Test' end as Data_selection 
from risk_mart.applied_quick_loan_new_applicants_20230101_20250930_app_scored_copy r 
left join `risk_credit_mis.loan_master_table` lmt on lmt.digitalLoanAccountId = r.digitalLoanAccountId
where r.trench_category = 'Trench 1'
and r.apps_score is not null
and date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2025-09-24'
;
"""
data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")

Job ID 2a937cfe-ec06-4356-9230-b6c73f572ac9 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (382443, 14)


In [87]:
data.columns

Index(['customer_id', 'digitalLoanAccountId', 'beta_cash_app_score',
       'app_cnt_health_and_fitness_ever', 'app_cnt_shopping_ever',
       'app_cnt_crypto_ever', 'app_cnt_driver_ever', 'app_cnt_payday_180d',
       'app_cnt_gambling_180d', 'app_avg_time_bw_installed_mins_3d',
       'app_median_time_bw_installed_mins_3d', 'osType', 'application_date',
       'Data_selection'],
      dtype='object')

In [88]:
feature_column = ['beta_cash_app_score',
       'app_cnt_health_and_fitness_ever', 'app_cnt_shopping_ever',
       'app_cnt_crypto_ever', 'app_cnt_driver_ever', 'app_cnt_payday_180d',
       'app_cnt_gambling_180d', 'app_avg_time_bw_installed_mins_3d',
       'app_median_time_bw_installed_mins_3d']

dfd = transform_data(data, feature_column, a='beta_cash_app_score', modelDisplayName='Beta-Cash-AppScore-Model', tc='Trench 1', subscription_name = 'Cash September 25 Models') 
dfd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 382443 entries, 0 to 382442
Data columns (total 17 columns):
 #   Column                Non-Null Count   Dtype  
---  ------                --------------   -----  
 0   customerId            382443 non-null  int64  
 1   digitalLoanAccountId  382443 non-null  object 
 2   crifApplicationId     382443 non-null  object 
 3   prediction            382443 non-null  float64
 4   start_time            382443 non-null  object 
 5   end_time              382443 non-null  object 
 6   modelDisplayName      382443 non-null  object 
 7   modelVersionId        382443 non-null  object 
 8   calcFeature           382443 non-null  object 
 9   subscription_name     382443 non-null  object 
 10  message_id            382443 non-null  object 
 11  publish_time          382443 non-null  object 
 12  attributes            382443 non-null  object 
 13  trenchCategory        382443 non-null  object 
 14  deviceOs              382443 non-null  object 
 15  

In [89]:
result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,125423,2025-02-01,2025-09-23
1,Dev_Train,141774,2024-08-13,2025-01-31
2,Pre_Train,115246,2024-01-01,2024-08-12


In [90]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=77790a92-c5bb-4645-9c2a-183b2c72c220>

###### Trench 2

In [91]:
sq = """ 
select 
  r.customerId customer_id,
  r.digitalLoanAccountId,
  r.apps_score beta_cash_app_score,  
  r.app_cnt_health_and_fitness_ever app_cnt_health_and_fitness_ever, 
  r.app_cnt_shopping_ever app_cnt_shopping_ever,
  r.app_cnt_crypto_ever app_cnt_crypto_ever, 
  r.app_cnt_driver_ever app_cnt_driver_ever,
  r.app_cnt_payday_180d app_cnt_payday_180d, 
  r.app_cnt_gambling_180d app_cnt_gambling_180d,
  r.app_avg_time_bw_installed_mins_3d app_avg_time_bw_installed_mins_3d,
  r.app_median_time_bw_installed_mins_ever app_median_time_bw_installed_mins_3d,
    case when lower(coalesce(lmt.osversion_v2, lmt.osVersion)) like '%andro%' then 'android'
    when lower(coalesce(lmt.osversion_v2, lmt.osVersion)) like '%os%' then 'ios'
    when lower(lmt.deviceType) like '%andro%' then 'android'
    else 'ios' end osType,
 date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) application_date,
 case when date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime))
        between '2024-08-13' and '2025-01-31' then 'Dev_Train'
        when date(if(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2024-08-13' then 'Pre_Train'
                else 'Dev_Test' end as Data_selection 
from risk_mart.applied_quick_loan_new_applicants_20230101_20250930_app_scored_copy r 
left join `risk_credit_mis.loan_master_table` lmt on lmt.digitalLoanAccountId = r.digitalLoanAccountId
where r.trench_category = 'Trench 2'
and r.apps_score is not null
and date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2025-09-24'
;
"""
data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")

Job ID 1f2fe125-d3b8-4f4a-ade8-aa9586d425cd successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (121596, 14)


In [92]:
feature_column = ['beta_cash_app_score',
       'app_cnt_health_and_fitness_ever', 'app_cnt_shopping_ever',
       'app_cnt_crypto_ever', 'app_cnt_driver_ever', 'app_cnt_payday_180d',
       'app_cnt_gambling_180d', 'app_avg_time_bw_installed_mins_3d',
       'app_median_time_bw_installed_mins_3d']

dfd = transform_data(data, feature_column, a='beta_cash_app_score', modelDisplayName='Beta-Cash-AppScore-Model', tc='Trench 2', subscription_name = 'Cash September 25 Models') 
dfd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 121596 entries, 0 to 121595
Data columns (total 17 columns):
 #   Column                Non-Null Count   Dtype  
---  ------                --------------   -----  
 0   customerId            121596 non-null  int64  
 1   digitalLoanAccountId  121596 non-null  object 
 2   crifApplicationId     121596 non-null  object 
 3   prediction            121596 non-null  float64
 4   start_time            121596 non-null  object 
 5   end_time              121596 non-null  object 
 6   modelDisplayName      121596 non-null  object 
 7   modelVersionId        121596 non-null  object 
 8   calcFeature           121596 non-null  object 
 9   subscription_name     121596 non-null  object 
 10  message_id            121596 non-null  object 
 11  publish_time          121596 non-null  object 
 12  attributes            121596 non-null  object 
 13  trenchCategory        121596 non-null  object 
 14  deviceOs              121596 non-null  object 
 15  

In [93]:
result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,54667,2025-02-01,2025-09-23
1,Dev_Train,36978,2024-08-13,2025-01-31
2,Pre_Train,29951,2024-01-01,2024-08-12


In [94]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=cf6d7eef-88eb-455a-acdf-fd5a7a046203>

###### Trench 3

In [95]:
sq = """   
 select 
  r.customerId customer_id, 
  r.digitalLoanAccountId,
  r.apps_score beta_cash_app_score,
  r.app_cnt_health_and_fitness_ever_binned app_cnt_health_and_fitness_ever,
  r.app_cnt_productivity_ever_binned app_cnt_productivity_ever, 
  r.app_cnt_rated_for_18plus_ever_binned app_cnt_rated_for_18plus_ever,
  r.app_cnt_books_and_reference_ever_binned app_cnt_books_and_reference_ever, 
  r.app_cnt_gaming_180d_binned app_cnt_gaming_180d,
  r.app_cnt_absence_tag_365d_binned app_cnt_absence_tag_365d,
  r.app_last_payday_install_to_apply_days_binned app_last_payday_install_to_apply_days,
  r.app_cnt_absence_tag_365d_binned,
  r.app_cnt_gaming_180d_binned,
  r.app_cnt_productivity_ever_binned,
  r.app_cnt_rated_for_18plus_ever_binned,
  r.app_cnt_health_and_fitness_ever_binned,
  r.app_cnt_books_and_reference_ever_binned,
  r.app_last_payday_install_to_apply_days_binned,
  r.ln_user_type,
  case when lower(coalesce(lmt.osversion_v2, lmt.osVersion)) like '%andro%' then 'android'
    when lower(coalesce(lmt.osversion_v2, lmt.osVersion)) like '%os%' then 'ios'
    when lower(lmt.deviceType) like '%andro%' then 'android'
    else 'ios' end osType,
  date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) application_date,
 case when date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime))
        between '2024-08-13' and '2025-01-31' then 'Dev_Train'
        when date(if(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2024-08-13' then 'Pre_Train'
                else 'Dev_Test' end as Data_selection 
from risk_mart.b_score_model_applied_loans_cash_20240101_20250930_app_scored r
left join `risk_credit_mis.loan_master_table` lmt on lmt.digitalLoanAccountId = r.digitalLoanAccountId
where r.apps_score is not null 
and date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2025-09-24'
;
"""
data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")

Job ID d9ab600b-1209-45ad-a31a-9210c8b9ead0 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (65874, 21)


In [96]:
feature_column = ['beta_cash_app_score',
       'app_cnt_health_and_fitness_ever', 'app_cnt_productivity_ever',
       'app_cnt_rated_for_18plus_ever', 'app_cnt_books_and_reference_ever',
       'app_cnt_gaming_180d', 'app_cnt_absence_tag_365d',
       'app_last_payday_install_to_apply_days',
       'app_cnt_absence_tag_365d_binned', 'app_cnt_gaming_180d_binned',
       'app_cnt_productivity_ever_binned',
       'app_cnt_rated_for_18plus_ever_binned',
       'app_cnt_health_and_fitness_ever_binned',
       'app_cnt_books_and_reference_ever_binned',
       'app_last_payday_install_to_apply_days_binned', 'ln_user_type']

dfd = transform_data(data, feature_column, a='beta_cash_app_score', modelDisplayName='Beta-Cash-AppScore-Model', tc='Trench 3', subscription_name = 'Cash September 25 Models') 
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,2303203,14cb13ff-3983-481d-b406-670e6b95b134,6ab8f34a-b7c0-47f5-8b56-f1f77e62e730,0.440576,2026-01-16T19:12:04.629971,2026-01-16T19:12:04.629971,Beta-Cash-AppScore-Model,v1,"{""beta_cash_app_score"": 0.4405763527299484, ""a...",Cash September 25 Models,5c2f514c-3c22-494e-b79a-b9f5050430d4,2026-01-16T19:12:04.629971,{},Trench 3,android,Pre_Train,2024-04-26
1,3110717,329d404f-d7f1-4920-8434-974d4b70ac50,0c5a69fc-1c65-4fed-8bce-f00a626c8186,0.551303,2026-01-16T19:12:04.630748,2026-01-16T19:12:04.630748,Beta-Cash-AppScore-Model,v1,"{""beta_cash_app_score"": 0.551303368963618, ""ap...",Cash September 25 Models,ecf9e9fb-3f48-4bd9-a674-8d87d4e4eb31,2026-01-16T19:12:04.630748,{},Trench 3,android,Dev_Test,2025-07-30
2,2352356,e34d1858-30ac-4402-afc4-e9e77ed62e41,8031acb4-4504-4184-8639-4ebe5795b939,0.531319,2026-01-16T19:12:04.630748,2026-01-16T19:12:04.630748,Beta-Cash-AppScore-Model,v1,"{""beta_cash_app_score"": 0.5313186532291243, ""a...",Cash September 25 Models,cd0a1ae4-4672-4b91-9411-80f89ddfe96d,2026-01-16T19:12:04.630748,{},Trench 3,android,Dev_Train,2024-12-23
3,3281846,bf03844c-687d-4a05-a67e-41a8efcfd88d,2273e0d7-b473-44b7-be61-eba816de1f8c,0.554841,2026-01-16T19:12:04.630748,2026-01-16T19:12:04.630748,Beta-Cash-AppScore-Model,v1,"{""beta_cash_app_score"": 0.5548406662124583, ""a...",Cash September 25 Models,0ea924bc-941d-4cb9-b0b5-f9f2903453cc,2026-01-16T19:12:04.630748,{},Trench 3,android,Dev_Test,2025-08-08
4,2463395,7eb6284c-a7a3-4155-81de-eea1a3c36e05,e1040a8d-dac2-449b-a118-eab9a8f8d472,0.547627,2026-01-16T19:12:04.631445,2026-01-16T19:12:04.631445,Beta-Cash-AppScore-Model,v1,"{""beta_cash_app_score"": 0.5476268203634211, ""a...",Cash September 25 Models,890cd79c-5397-478d-b9b2-30528929ad48,2026-01-16T19:12:04.631445,{},Trench 3,android,Dev_Train,2025-01-24


In [97]:
result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,43321,2025-02-01,2025-09-23
1,Dev_Train,12362,2024-08-13,2025-01-31
2,Pre_Train,10191,2023-01-01,2024-08-12


In [98]:
dfd.info()
dfd['customerId'] = pd.to_numeric(dfd['customerId'], errors='coerce')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 65874 entries, 0 to 65873
Data columns (total 17 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   customerId            65874 non-null  object 
 1   digitalLoanAccountId  65874 non-null  object 
 2   crifApplicationId     65874 non-null  object 
 3   prediction            65874 non-null  float64
 4   start_time            65874 non-null  object 
 5   end_time              65874 non-null  object 
 6   modelDisplayName      65874 non-null  object 
 7   modelVersionId        65874 non-null  object 
 8   calcFeature           65874 non-null  object 
 9   subscription_name     65874 non-null  object 
 10  message_id            65874 non-null  object 
 11  publish_time          65874 non-null  object 
 12  attributes            65874 non-null  object 
 13  trenchCategory        65874 non-null  object 
 14  deviceOs              65874 non-null  object 
 15  Data_selection     

In [99]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=ca8370d5-be83-49fe-93cb-16497b0c998a>

# SIL V2

*   Sil-Alpha-CIC-SIL-Model: worktable_data_analysis.sil_alpha_cic_all_applied_backscored_20240901_20250930
*   Sil-Alpha-StackingModel: worktable_data_analysis.sil_alpha_applied_loans_backscored_20240901_20251013_option3
*   Sil-Beta-AppsScoreModel:
*   New Applicants (T1, T2): risk_mart.applied_sil_trench1_trench2_loan_jan2024_30sep2025_app_scored, risk_mart.applied_sil_new_applicants_loan_oct0125_oct1325_app_scored
*   Repeat Applicants (T3): risk_mart.b_score_model_applied_loans_sil_20240101_20250930_app_scored, risk_mart.applied_sil_repeat_applicants_loan_oct0125_oct1325_app_scored
*   Sil-Beta-DemoScoreModel: worktable_data_analysis.sil_beta_demo_all_applied_backscored_20240801_20251015
*   Sil-Beta-StackScoreModel: worktable_data_analysis.sil_beta_applied_loans_backscored_20240801_20251013_option3m

##### 'Alpha - CIC-SIL-Model'

##### Trench 1

In [100]:
sq = """ 
select 
r.customerId customer_id,
r.digitalLoanAccountId, 
r.c_cic_score ,
r.ScoreRange,
       ln_loan_level_user_type, flg_zero_non_granted_ever,
       flg_zero_granted_ever,
       Personal_Loans_granted_contracts_amt_24M,
       granted_contracts_cnt_6M, total_overdue_granted_contracts,
       has_ever_been_overdue, cnt_nongranted_contracts_3M,
       cnt_active_contracts, max_amt_granted_24M,
       tot_active_contracts_util, days_since_last_closed,
       vel_contract_nongranted_cnt_6on12,
       vel_contract_granted_amt_6on12,
       vel_contract_closed_amt_3on12,
case 
  when lower(r.ln_os_type) like '%andro%' then 'android'
  when lower(r.ln_os_type) like '%os%' then 'ios'
  else 'ios' end  osType,
 date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) application_date,
 case when date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime))
        between '2024-09-01' and '2025-01-31' then 'Dev_Train'
        when date(if(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2024-09-01' then 'Pre_Train'
                else 'Dev_Test' end as Data_selection 
from worktable_data_analysis.sil_alpha_cic_all_applied_backscored_20240901_20250930 r
left join `risk_credit_mis.loan_master_table` lmt on lmt.digitalLoanAccountId = r.digitalLoanAccountId
where r.trench_category = 'Trench 1'
and r.c_cic_score is not null
and date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2025-11-17'
;
"""
data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")

Job ID ae2a6a5d-fb7f-4167-96ae-0f99a4f87647 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (224718, 22)


In [101]:
data.columns

Index(['customer_id', 'digitalLoanAccountId', 'c_cic_score', 'ScoreRange',
       'ln_loan_level_user_type', 'flg_zero_non_granted_ever',
       'flg_zero_granted_ever', 'Personal_Loans_granted_contracts_amt_24M',
       'granted_contracts_cnt_6M', 'total_overdue_granted_contracts',
       'has_ever_been_overdue', 'cnt_nongranted_contracts_3M',
       'cnt_active_contracts', 'max_amt_granted_24M',
       'tot_active_contracts_util', 'days_since_last_closed',
       'vel_contract_nongranted_cnt_6on12', 'vel_contract_granted_amt_6on12',
       'vel_contract_closed_amt_3on12', 'osType', 'application_date',
       'Data_selection'],
      dtype='object')

In [102]:
data.head()

Unnamed: 0,customer_id,digitalLoanAccountId,c_cic_score,ScoreRange,ln_loan_level_user_type,flg_zero_non_granted_ever,flg_zero_granted_ever,Personal_Loans_granted_contracts_amt_24M,granted_contracts_cnt_6M,total_overdue_granted_contracts,has_ever_been_overdue,cnt_nongranted_contracts_3M,cnt_active_contracts,max_amt_granted_24M,tot_active_contracts_util,days_since_last_closed,vel_contract_nongranted_cnt_6on12,vel_contract_granted_amt_6on12,vel_contract_closed_amt_3on12,osType,application_date,Data_selection
0,3195894,799a84c4-863b-47c0-81da-f3f3241f5008,0.482718,Ai,2_New Applicant,0,0,3999.0,1.0,5.0,1.0,,1.0,3999.0,1.272568,277.0,2.012048,1.0,,android,2025-01-17,Dev_Train
1,3550529,44276b9b-9313-4ecc-ba97-5a1228b110c8,0.343583,Bi,2_New Applicant,0,0,6148.0,5.0,7.0,1.0,2.0,2.0,10000.0,0.773333,8.0,1.334,2.232351,3.285191,android,2025-07-11,Dev_Test
2,3624086,376622e1-2d1a-4f23-ac79-ce16787f763d,0.224131,Di,2_New Applicant,0,0,34044.0,6.0,6.0,1.0,,2.0,10000.0,1.134233,11.0,,0.622222,2.060502,android,2025-08-16,Dev_Test
3,3296320,685f03ad-d40e-4f4e-9df9-480e2129bc9e,0.320591,Bi,2_New Applicant,1,0,10670.0,,13.0,1.0,,,10670.0,,161.0,,,,android,2025-03-01,Dev_Test
4,3442497,2972225e-d8eb-434c-976e-8d1d40638ba0,0.309949,NH_Hi,2_New Applicant,1,0,6556.0,,12.0,1.0,,,2888.0,,529.0,,,,android,2025-05-17,Dev_Test


In [103]:
feature_column = ['ScoreRange',
       'ln_loan_level_user_type', 'flg_zero_non_granted_ever',
       'flg_zero_granted_ever', 'Personal_Loans_granted_contracts_amt_24M',
       'granted_contracts_cnt_6M', 'total_overdue_granted_contracts',
       'has_ever_been_overdue', 'cnt_nongranted_contracts_3M',
       'cnt_active_contracts', 'max_amt_granted_24M',
       'tot_active_contracts_util', 'days_since_last_closed',
       'vel_contract_nongranted_cnt_6on12', 'vel_contract_granted_amt_6on12',
       'vel_contract_closed_amt_3on12',]

dfd = transform_datav2(data, feature_column, a='c_cic_score', modelDisplayName='cic_model_sil', tc='Trench 1', subscription_name = 'Cash November 25 Models') 
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,3195894,799a84c4-863b-47c0-81da-f3f3241f5008,6e73ee8b-63e6-4df3-8226-305cc4bbe6b2,0.482718,2026-01-16T19:12:23.883636,2026-01-16T19:12:23.883636,cic_model_sil,v2,"{""ScoreRange"": ""Ai"", ""ln_loan_level_user_type""...",Cash November 25 Models,444a2a68-c303-47f5-be8a-bd8a77dec966,2026-01-16T19:12:23.883636,{},Trench 1,android,Dev_Train,2025-01-17
1,3550529,44276b9b-9313-4ecc-ba97-5a1228b110c8,a5ed5ff9-f4bd-4b7a-b2b7-90e26209c660,0.343583,2026-01-16T19:12:23.883636,2026-01-16T19:12:23.883636,cic_model_sil,v2,"{""ScoreRange"": ""Bi"", ""ln_loan_level_user_type""...",Cash November 25 Models,b47c15be-ce09-4424-98a7-0f9861389aad,2026-01-16T19:12:23.883636,{},Trench 1,android,Dev_Test,2025-07-11
2,3624086,376622e1-2d1a-4f23-ac79-ce16787f763d,5954e08e-5b97-4bf2-b576-fcf1c587d1ee,0.224131,2026-01-16T19:12:23.883636,2026-01-16T19:12:23.883636,cic_model_sil,v2,"{""ScoreRange"": ""Di"", ""ln_loan_level_user_type""...",Cash November 25 Models,74711d40-675c-42ce-862d-e70e0cdfe453,2026-01-16T19:12:23.883636,{},Trench 1,android,Dev_Test,2025-08-16
3,3296320,685f03ad-d40e-4f4e-9df9-480e2129bc9e,a1e85e1b-910d-4a8c-bf0e-460ba1d5d085,0.320591,2026-01-16T19:12:23.884197,2026-01-16T19:12:23.884197,cic_model_sil,v2,"{""ScoreRange"": ""Bi"", ""ln_loan_level_user_type""...",Cash November 25 Models,4e9674bf-2ea1-4f80-a8e3-182df647df67,2026-01-16T19:12:23.884197,{},Trench 1,android,Dev_Test,2025-03-01
4,3442497,2972225e-d8eb-434c-976e-8d1d40638ba0,2466d620-e327-42f6-b163-93cce2db9ee0,0.309949,2026-01-16T19:12:23.884197,2026-01-16T19:12:23.884197,cic_model_sil,v2,"{""ScoreRange"": ""NH_Hi"", ""ln_loan_level_user_ty...",Cash November 25 Models,d5bb676f-679c-41dc-b613-f7ab3f42bdca,2026-01-16T19:12:23.884197,{},Trench 1,android,Dev_Test,2025-05-17


In [104]:
result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,130029,2025-02-01,2025-11-15
1,Dev_Train,94689,2024-09-01,2025-01-31


In [105]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=43f030e0-07bc-4297-a617-0397551641c1>

##### Trench 2

In [106]:
sq = """ 
select 
r.customerId customer_id,
r.digitalLoanAccountId, 
r.c_cic_score ,
r.ScoreRange,
       ln_loan_level_user_type, flg_zero_non_granted_ever,
       flg_zero_granted_ever,
       Personal_Loans_granted_contracts_amt_24M,
       granted_contracts_cnt_6M, total_overdue_granted_contracts,
       has_ever_been_overdue, cnt_nongranted_contracts_3M,
       cnt_active_contracts, max_amt_granted_24M,
       tot_active_contracts_util, days_since_last_closed,
       vel_contract_nongranted_cnt_6on12,
       vel_contract_granted_amt_6on12,
       vel_contract_closed_amt_3on12,
case 
  when lower(r.ln_os_type) like '%andro%' then 'android'
  when lower(r.ln_os_type) like '%os%' then 'ios'
  else 'ios' end  osType,
date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) application_date,
 case when date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime))
        between '2024-09-01' and '2025-01-31' then 'Dev_Train'
        when date(if(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2024-09-01' then 'Pre_Train'
                else 'Dev_Test' end as Data_selection 
from worktable_data_analysis.sil_alpha_cic_all_applied_backscored_20240901_20250930 r
left join `risk_credit_mis.loan_master_table` lmt on lmt.digitalLoanAccountId = r.digitalLoanAccountId
where r.trench_category = 'Trench 2'
and r.c_cic_score is not null
and date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2025-11-17'
 ;
"""
data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")

Job ID 8a6a376d-a8d5-44cf-993e-0b69143a92e2 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (10175, 22)


In [107]:
data.columns

Index(['customer_id', 'digitalLoanAccountId', 'c_cic_score', 'ScoreRange',
       'ln_loan_level_user_type', 'flg_zero_non_granted_ever',
       'flg_zero_granted_ever', 'Personal_Loans_granted_contracts_amt_24M',
       'granted_contracts_cnt_6M', 'total_overdue_granted_contracts',
       'has_ever_been_overdue', 'cnt_nongranted_contracts_3M',
       'cnt_active_contracts', 'max_amt_granted_24M',
       'tot_active_contracts_util', 'days_since_last_closed',
       'vel_contract_nongranted_cnt_6on12', 'vel_contract_granted_amt_6on12',
       'vel_contract_closed_amt_3on12', 'osType', 'application_date',
       'Data_selection'],
      dtype='object')

In [108]:
data.head()

Unnamed: 0,customer_id,digitalLoanAccountId,c_cic_score,ScoreRange,ln_loan_level_user_type,flg_zero_non_granted_ever,flg_zero_granted_ever,Personal_Loans_granted_contracts_amt_24M,granted_contracts_cnt_6M,total_overdue_granted_contracts,has_ever_been_overdue,cnt_nongranted_contracts_3M,cnt_active_contracts,max_amt_granted_24M,tot_active_contracts_util,days_since_last_closed,vel_contract_nongranted_cnt_6on12,vel_contract_granted_amt_6on12,vel_contract_closed_amt_3on12,osType,application_date,Data_selection
0,3022916,d1382548-aeb1-45bf-b89d-cfcbb9477f0a,0.384996,Missing,2_New Applicant,0,0,966.0,,14.0,1.0,,1.0,9000.0,0.0,49.0,2.012048,,2.144019,android,2025-09-02,Dev_Test
1,2705834,e105ebed-d92c-4ee7-8505-ff0af79ca52e,0.417764,Missing,2_New Applicant,0,0,22540.0,,9.0,1.0,,1.0,42141.0,0.703306,108.0,,,,ios,2025-06-01,Dev_Test
2,2092600,fdc7321a-3fa8-4922-8694-09ab5eccaad3,0.379514,Missing,2_New Applicant,0,0,22291.0,,11.0,1.0,,1.0,17039.0,0.0,132.0,,,,ios,2025-03-13,Dev_Test
3,2101993,defcafdf-f102-4f65-94b9-bff5cac35060,0.289817,Bi,2_New Applicant,0,0,15183.0,1.0,16.0,1.0,,1.0,15183.0,1.074096,301.0,1.994012,1.0,,android,2024-12-22,Dev_Train
4,2041409,8f37f563-2baa-4892-a36b-3065e6611621,0.419148,Missing,2_New Applicant,0,0,65.0,,22.0,1.0,,1.0,19048.0,0.753097,106.0,,,,android,2025-06-05,Dev_Test


In [109]:
feature_column = ['ScoreRange',
       'ln_loan_level_user_type', 'flg_zero_non_granted_ever',
       'flg_zero_granted_ever', 'Personal_Loans_granted_contracts_amt_24M',
       'granted_contracts_cnt_6M', 'total_overdue_granted_contracts',
       'has_ever_been_overdue', 'cnt_nongranted_contracts_3M',
       'cnt_active_contracts', 'max_amt_granted_24M',
       'tot_active_contracts_util', 'days_since_last_closed',
       'vel_contract_nongranted_cnt_6on12', 'vel_contract_granted_amt_6on12',
       'vel_contract_closed_amt_3on12',]

dfd = transform_datav2(data, feature_column, a='c_cic_score', modelDisplayName='cic_model_sil', tc='Trench 2', subscription_name = 'Cash November 25 Models') 
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,3022916,d1382548-aeb1-45bf-b89d-cfcbb9477f0a,03438d6f-10de-4c3e-9629-e24045f11d57,0.384996,2026-01-16T19:13:03.430941,2026-01-16T19:13:03.430941,cic_model_sil,v2,"{""ScoreRange"": ""Missing"", ""ln_loan_level_user_...",Cash November 25 Models,ff75a22c-1e4d-4b2b-ae50-33343b684c43,2026-01-16T19:13:03.430941,{},Trench 2,android,Dev_Test,2025-09-02
1,2705834,e105ebed-d92c-4ee7-8505-ff0af79ca52e,336c8201-8741-46ee-abd8-fc9877359563,0.417764,2026-01-16T19:13:03.430941,2026-01-16T19:13:03.430941,cic_model_sil,v2,"{""ScoreRange"": ""Missing"", ""ln_loan_level_user_...",Cash November 25 Models,836f0a84-b8a3-48e6-b9b6-c51ee4f49e9d,2026-01-16T19:13:03.430941,{},Trench 2,ios,Dev_Test,2025-06-01
2,2092600,fdc7321a-3fa8-4922-8694-09ab5eccaad3,98154147-af1b-4ad5-87da-994c594a4de0,0.379514,2026-01-16T19:13:03.430941,2026-01-16T19:13:03.430941,cic_model_sil,v2,"{""ScoreRange"": ""Missing"", ""ln_loan_level_user_...",Cash November 25 Models,48ad1814-759e-46d7-b981-4deb87964baa,2026-01-16T19:13:03.430941,{},Trench 2,ios,Dev_Test,2025-03-13
3,2101993,defcafdf-f102-4f65-94b9-bff5cac35060,5bbabb34-16aa-4348-b4ac-39574ef8ca05,0.289817,2026-01-16T19:13:03.430941,2026-01-16T19:13:03.430941,cic_model_sil,v2,"{""ScoreRange"": ""Bi"", ""ln_loan_level_user_type""...",Cash November 25 Models,5f54c0f8-cedf-4f51-8b72-92bc07f590d0,2026-01-16T19:13:03.430941,{},Trench 2,android,Dev_Train,2024-12-22
4,2041409,8f37f563-2baa-4892-a36b-3065e6611621,16d9f09d-5cf2-435c-af5b-90cc992e1e91,0.419148,2026-01-16T19:13:03.430941,2026-01-16T19:13:03.430941,cic_model_sil,v2,"{""ScoreRange"": ""Missing"", ""ln_loan_level_user_...",Cash November 25 Models,79863e30-4071-4c80-8a54-e6e45cf7fde1,2026-01-16T19:13:03.430941,{},Trench 2,android,Dev_Test,2025-06-05


In [110]:
result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,6943,2025-02-01,2025-09-30
1,Dev_Train,3232,2024-09-01,2025-01-31


In [111]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=e1817af4-fddd-4520-8fd7-ef76fb97b18e>

##### Trench 3

In [112]:
sq = """ 
select 
r.customerId customer_id,
r.digitalLoanAccountId, 
r.c_cic_score ,
r.ScoreRange,
       ln_loan_level_user_type, flg_zero_non_granted_ever,
       flg_zero_granted_ever,
       Personal_Loans_granted_contracts_amt_24M,
       granted_contracts_cnt_6M, total_overdue_granted_contracts,
       has_ever_been_overdue, cnt_nongranted_contracts_3M,
       cnt_active_contracts, max_amt_granted_24M,
       tot_active_contracts_util, days_since_last_closed,
       vel_contract_nongranted_cnt_6on12,
       vel_contract_granted_amt_6on12,
       vel_contract_closed_amt_3on12,
case 
  when lower(r.ln_os_type) like '%andro%' then 'android'
  when lower(r.ln_os_type) like '%os%' then 'ios'
  else 'ios' end  osType,
date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) application_date,
 case when date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime))
        between '2024-09-01' and '2025-01-31' then 'Dev_Train'
        when date(if(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2024-09-01' then 'Pre_Train'
                else 'Dev_Test' end as Data_selection 
from worktable_data_analysis.sil_alpha_cic_all_applied_backscored_20240901_20250930 r
left join `risk_credit_mis.loan_master_table` lmt on lmt.digitalLoanAccountId = r.digitalLoanAccountId
where r.trench_category = 'Trench 3'
and r.c_cic_score is not null
and date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2025-11-17'
;
"""
data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")

Job ID 259567ad-65d9-4988-95fb-02f29a5607b1 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (11515, 22)


In [113]:
data.columns

Index(['customer_id', 'digitalLoanAccountId', 'c_cic_score', 'ScoreRange',
       'ln_loan_level_user_type', 'flg_zero_non_granted_ever',
       'flg_zero_granted_ever', 'Personal_Loans_granted_contracts_amt_24M',
       'granted_contracts_cnt_6M', 'total_overdue_granted_contracts',
       'has_ever_been_overdue', 'cnt_nongranted_contracts_3M',
       'cnt_active_contracts', 'max_amt_granted_24M',
       'tot_active_contracts_util', 'days_since_last_closed',
       'vel_contract_nongranted_cnt_6on12', 'vel_contract_granted_amt_6on12',
       'vel_contract_closed_amt_3on12', 'osType', 'application_date',
       'Data_selection'],
      dtype='object')

In [114]:
data.head()

Unnamed: 0,customer_id,digitalLoanAccountId,c_cic_score,ScoreRange,ln_loan_level_user_type,flg_zero_non_granted_ever,flg_zero_granted_ever,Personal_Loans_granted_contracts_amt_24M,granted_contracts_cnt_6M,total_overdue_granted_contracts,has_ever_been_overdue,cnt_nongranted_contracts_3M,cnt_active_contracts,max_amt_granted_24M,tot_active_contracts_util,days_since_last_closed,vel_contract_nongranted_cnt_6on12,vel_contract_granted_amt_6on12,vel_contract_closed_amt_3on12,osType,application_date,Data_selection
0,3213289,d4e72f5d-10bd-4f67-abf7-be40c360b33d,0.211146,NH_Ei,1_Repeat Applicant,0,0,,1.0,0.0,0.0,,1.0,5000.0,1.0096,,,1.0,,android,2025-06-18,Dev_Test
1,2339187,61769da3-9174-4bee-b091-819936c048f2,0.182873,Missing,1_Repeat Applicant,0,0,,,1.0,1.0,,2.0,57000.0,0.094968,35.0,,,1.0,android,2025-08-28,Dev_Test
2,2393831,ff553295-c468-4208-bdfb-25c9ee9dc176,0.085827,Ai,1_Repeat Applicant,0,0,53700.0,,0.0,0.0,,,53700.0,,127.0,,,,android,2025-03-22,Dev_Test
3,2716521,de15670f-bec3-4920-a76d-a4567bc61a10,0.151268,Ai,1_Repeat Applicant,0,0,,,0.0,0.0,,1.0,8789.0,0.579929,,,,,android,2025-03-01,Dev_Test
4,3280607,3214e007-ea81-4749-9d0f-9420b07f5135,0.151516,NH_Ii,1_Repeat Applicant,1,0,,1.0,0.0,0.0,,1.0,11782.0,1.009761,,,1.0,,android,2025-08-21,Dev_Test


In [115]:
feature_column = ['ScoreRange',
       'ln_loan_level_user_type', 'flg_zero_non_granted_ever',
       'flg_zero_granted_ever', 'Personal_Loans_granted_contracts_amt_24M',
       'granted_contracts_cnt_6M', 'total_overdue_granted_contracts',
       'has_ever_been_overdue', 'cnt_nongranted_contracts_3M',
       'cnt_active_contracts', 'max_amt_granted_24M',
       'tot_active_contracts_util', 'days_since_last_closed',
       'vel_contract_nongranted_cnt_6on12', 'vel_contract_granted_amt_6on12',
       'vel_contract_closed_amt_3on12',]

dfd = transform_datav2(data, feature_column, a='c_cic_score', modelDisplayName='cic_model_sil', tc='Trench 3', subscription_name = 'Cash November 25 Models') 
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,3213289,d4e72f5d-10bd-4f67-abf7-be40c360b33d,aa6c7350-a30d-411a-9e13-b38565aa8615,0.211146,2026-01-16T19:13:09.830811,2026-01-16T19:13:09.830811,cic_model_sil,v2,"{""ScoreRange"": ""NH_Ei"", ""ln_loan_level_user_ty...",Cash November 25 Models,e606092d-22bb-4dd0-90ad-dd748150fa7a,2026-01-16T19:13:09.830811,{},Trench 3,android,Dev_Test,2025-06-18
1,2339187,61769da3-9174-4bee-b091-819936c048f2,c9aebc48-b1e3-4844-9954-7910cd4319b5,0.182873,2026-01-16T19:13:09.830811,2026-01-16T19:13:09.830811,cic_model_sil,v2,"{""ScoreRange"": ""Missing"", ""ln_loan_level_user_...",Cash November 25 Models,6cfe6252-b9e7-49ed-9d7d-a4551b2a3e08,2026-01-16T19:13:09.830811,{},Trench 3,android,Dev_Test,2025-08-28
2,2393831,ff553295-c468-4208-bdfb-25c9ee9dc176,54987100-f143-47cd-8372-9df440a5eb76,0.085827,2026-01-16T19:13:09.830811,2026-01-16T19:13:09.830811,cic_model_sil,v2,"{""ScoreRange"": ""Ai"", ""ln_loan_level_user_type""...",Cash November 25 Models,65d1ea2f-f5f2-4cb0-8d64-29930429d33c,2026-01-16T19:13:09.830811,{},Trench 3,android,Dev_Test,2025-03-22
3,2716521,de15670f-bec3-4920-a76d-a4567bc61a10,2a1e1bee-e505-4530-b5c7-e27b3ee36fdf,0.151268,2026-01-16T19:13:09.830811,2026-01-16T19:13:09.830811,cic_model_sil,v2,"{""ScoreRange"": ""Ai"", ""ln_loan_level_user_type""...",Cash November 25 Models,eb49ab9e-a0fc-4a38-bb40-4d267d55fb51,2026-01-16T19:13:09.830811,{},Trench 3,android,Dev_Test,2025-03-01
4,3280607,3214e007-ea81-4749-9d0f-9420b07f5135,f4cf0c98-dd27-4a67-b461-0fa02765fda6,0.151516,2026-01-16T19:13:09.830811,2026-01-16T19:13:09.830811,cic_model_sil,v2,"{""ScoreRange"": ""NH_Ii"", ""ln_loan_level_user_ty...",Cash November 25 Models,c6c92b57-97e0-4e41-834b-19ade2765d4e,2026-01-16T19:13:09.830811,{},Trench 3,android,Dev_Test,2025-08-21


In [116]:
result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,7246,2025-02-01,2025-09-30
1,Dev_Train,4269,2024-09-01,2025-01-31


In [117]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=ab5b71fe-3cf6-4717-92bd-3fa6455a52aa>

##### Alpha Sil Stack Model 

##### Trench 1

In [118]:
sq = """ 
select 
lmt.customerId customer_id,
r.digitalLoanAccountId,  
r.sa_stack_score ,
r.sb_demo_score,
r.apps_score,
r.s_credo_score,
r.sa_cic_score,
case 
  when lower(r.ln_os_type) like '%andro%' then 'android'
  when lower(r.ln_os_type) like '%os%' then 'ios'
  else 'ios' end  osType,
ln_loan_type,
date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) application_date,
case when date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime))
        between '2024-09-01' and '2025-02-28' then 'Dev_Train'
        when date(if(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2024-09-01' then 'Pre_Train'
                else 'Dev_Test' end as Data_selection
from worktable_data_analysis.sil_alpha_applied_loans_backscored_20240901_20251013_option3 r
left join `risk_credit_mis.loan_master_table` lmt on lmt.digitalLoanAccountId = r.digitalLoanAccountId
where r.trench_category = 'Trench 1'
and sa_stack_score is not null
and date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2025-11-17'
;
"""
data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")


Job ID f2c343cd-10e2-4cf9-8279-a8aedc93a02d successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (232107, 11)


In [119]:
data.columns

Index(['customer_id', 'digitalLoanAccountId', 'sa_stack_score',
       'sb_demo_score', 'apps_score', 's_credo_score', 'sa_cic_score',
       'osType', 'ln_loan_type', 'application_date', 'Data_selection'],
      dtype='object')

In [120]:
data.head()

Unnamed: 0,customer_id,digitalLoanAccountId,sa_stack_score,sb_demo_score,apps_score,s_credo_score,sa_cic_score,osType,ln_loan_type,application_date,Data_selection
0,3736951,43e8bac7-770e-47b1-bb02-575c10ffee01,0.632541,0.631529,0.520775,0.098305,0.355412,android,SIL Competitor,2025-10-11,Dev_Test
1,3304431,b6113c3b-b3a4-4b91-83b7-69e826b4113d,0.49193,0.444923,,0.123621,0.359897,ios,SIL-Instore,2025-03-05,Dev_Test
2,3503641,0e1599c4-1e58-4b88-aa89-c12c6ed9ee76,0.067394,0.322977,0.114667,0.027916,0.149299,android,SIL Competitor,2025-06-17,Dev_Test
3,3449711,67b8a908-09dc-4431-93f4-e05391f091c2,0.242566,0.248605,0.388681,0.08945,0.321306,android,SIL Competitor,2025-05-21,Dev_Test
4,3620273,eb3d59c2-8e10-4175-92ca-b257225ae84b,0.579218,0.578734,0.552404,0.19277,0.227156,android,SIL Competitor,2025-08-14,Dev_Test


In [121]:
feature_column = ['sb_demo_score', 'apps_score', 's_credo_score', 'sa_cic_score',]

dfd = transform_datav2(data, feature_column, a='sa_stack_score', modelDisplayName='alpha_stack_model_sil', tc='Trench 1', subscription_name = 'Cash November 25 Models') 
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,3736951,43e8bac7-770e-47b1-bb02-575c10ffee01,e7eed633-7e65-4744-aabd-90c753b6ad80,0.632541,2026-01-16T19:13:19.113352,2026-01-16T19:13:19.113352,alpha_stack_model_sil,v2,"{""sb_demo_score"": 0.6315289744907508, ""apps_sc...",Cash November 25 Models,41e72fb6-f8c4-4cdd-b8ed-4a17b5b20f43,2026-01-16T19:13:19.113352,{},Trench 1,android,Dev_Test,2025-10-11
1,3304431,b6113c3b-b3a4-4b91-83b7-69e826b4113d,cca9540c-89d7-493f-a296-ff18f0d8d9e0,0.49193,2026-01-16T19:13:19.113352,2026-01-16T19:13:19.113352,alpha_stack_model_sil,v2,"{""sb_demo_score"": 0.4449231011845208, ""s_credo...",Cash November 25 Models,1f8fb625-74c2-409a-af0f-5c481402e8bf,2026-01-16T19:13:19.113352,{},Trench 1,ios,Dev_Test,2025-03-05
2,3503641,0e1599c4-1e58-4b88-aa89-c12c6ed9ee76,2f897494-a9dc-4eb5-a6b5-26a9450a7abb,0.067394,2026-01-16T19:13:19.113352,2026-01-16T19:13:19.113352,alpha_stack_model_sil,v2,"{""sb_demo_score"": 0.3229769141644266, ""apps_sc...",Cash November 25 Models,158193b4-add3-485a-ac9e-3e65cb2e36ef,2026-01-16T19:13:19.113352,{},Trench 1,android,Dev_Test,2025-06-17
3,3449711,67b8a908-09dc-4431-93f4-e05391f091c2,5eb8959a-6dd2-4a15-a7f5-56048cdf09ab,0.242566,2026-01-16T19:13:19.113352,2026-01-16T19:13:19.113352,alpha_stack_model_sil,v2,"{""sb_demo_score"": 0.2486054811178959, ""apps_sc...",Cash November 25 Models,af2284dc-5a5a-4a4f-b7fb-51ed0bba06a8,2026-01-16T19:13:19.113352,{},Trench 1,android,Dev_Test,2025-05-21
4,3620273,eb3d59c2-8e10-4175-92ca-b257225ae84b,a58f040f-8f2e-43e0-848a-fa93421bdcb5,0.579218,2026-01-16T19:13:19.113352,2026-01-16T19:13:19.113352,alpha_stack_model_sil,v2,"{""sb_demo_score"": 0.578733934945953, ""apps_sco...",Cash November 25 Models,e8fa42a8-3e6b-4a6a-895e-c504ccab4f82,2026-01-16T19:13:19.113352,{},Trench 1,android,Dev_Test,2025-08-14


In [122]:
result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,124744,2025-03-01,2025-11-15
1,Dev_Train,107363,2024-09-01,2025-02-28


In [123]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=919caff3-6818-45d6-814d-9d60e26d1391>

##### Trench 2

In [124]:
sq = """ 
select 
lmt.customerId customer_id,
r.digitalLoanAccountId,  
r.sa_stack_score ,
r.sb_demo_score,
r.apps_score,
r.s_credo_score,
r.sa_cic_score,
case 
  when lower(r.ln_os_type) like '%andro%' then 'android'
  when lower(r.ln_os_type) like '%os%' then 'ios'
  else 'ios' end  osType,
ln_loan_type,
date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) application_date,
case when date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime))
        between '2024-09-01' and '2025-02-28' then 'Dev_Train'
        when date(if(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2024-09-01' then 'Pre_Train'
                else 'Dev_Test' end as Data_selection
from worktable_data_analysis.sil_alpha_applied_loans_backscored_20240901_20251013_option3 r
left join `risk_credit_mis.loan_master_table` lmt on lmt.digitalLoanAccountId = r.digitalLoanAccountId
where r.trench_category = 'Trench 2'
and sa_stack_score is not null
and date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2025-11-17'
;
"""
data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")


Job ID 4c8ad6bf-0bf1-4d4b-97a9-f0f604666685 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (10592, 11)


In [125]:
data.columns

Index(['customer_id', 'digitalLoanAccountId', 'sa_stack_score',
       'sb_demo_score', 'apps_score', 's_credo_score', 'sa_cic_score',
       'osType', 'ln_loan_type', 'application_date', 'Data_selection'],
      dtype='object')

In [126]:
data.head()

Unnamed: 0,customer_id,digitalLoanAccountId,sa_stack_score,sb_demo_score,apps_score,s_credo_score,sa_cic_score,osType,ln_loan_type,application_date,Data_selection
0,1706066,6f9b875d-f829-4aca-81a3-add1dd72d9bd,0.437056,0.393981,,0.085152,0.325654,ios,SIL Competitor,2025-04-06,Dev_Test
1,2881477,a607568c-2bc9-480f-b9cb-508b5176a773,0.6834,0.592346,0.409713,0.103203,0.500461,android,SIL Competitor,2025-07-27,Dev_Test
2,3066850,2ba5d05a-cb37-4395-93ff-aa00e9bf3aca,0.843675,0.718241,0.546141,0.29279,0.370947,android,SIL Competitor,2025-03-25,Dev_Test
3,3063973,f7e7becb-e14f-494f-859b-e03b19064be3,0.265898,0.414439,0.267552,0.067804,0.305599,android,SIL Competitor,2025-03-17,Dev_Test
4,3226035,bbd0c072-ae38-4a31-b209-511bc1d420eb,0.197171,0.45846,0.366575,0.052749,0.085191,android,SIL Competitor,2025-06-15,Dev_Test


In [127]:
feature_column = ['sb_demo_score', 'apps_score', 's_credo_score', 'sa_cic_score',]

dfd = transform_datav2(data, feature_column, a='sa_stack_score', modelDisplayName='alpha_stack_model_sil', tc='Trench 2', subscription_name = 'Cash November 25 Models') 
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,1706066,6f9b875d-f829-4aca-81a3-add1dd72d9bd,88dc4dee-95e2-4531-a2cc-917108275fd0,0.437056,2026-01-16T19:13:52.379683,2026-01-16T19:13:52.379683,alpha_stack_model_sil,v2,"{""sb_demo_score"": 0.3939805975736532, ""s_credo...",Cash November 25 Models,6dbbff05-5450-4ccb-b2c8-e96c6365bd11,2026-01-16T19:13:52.379683,{},Trench 2,ios,Dev_Test,2025-04-06
1,2881477,a607568c-2bc9-480f-b9cb-508b5176a773,9f0f3fcb-b612-4d33-a347-a6099bea7499,0.6834,2026-01-16T19:13:52.379683,2026-01-16T19:13:52.379683,alpha_stack_model_sil,v2,"{""sb_demo_score"": 0.5923459607500456, ""apps_sc...",Cash November 25 Models,0151155c-715c-4ba8-9dc9-4ef5f2d601a3,2026-01-16T19:13:52.379683,{},Trench 2,android,Dev_Test,2025-07-27
2,3066850,2ba5d05a-cb37-4395-93ff-aa00e9bf3aca,bb8eb3d1-5e23-41e8-9317-49c9db0b4ccf,0.843675,2026-01-16T19:13:52.379683,2026-01-16T19:13:52.379683,alpha_stack_model_sil,v2,"{""sb_demo_score"": 0.7182411102757954, ""apps_sc...",Cash November 25 Models,1399c398-8196-4fcb-b7a5-7452321503cf,2026-01-16T19:13:52.379683,{},Trench 2,android,Dev_Test,2025-03-25
3,3063973,f7e7becb-e14f-494f-859b-e03b19064be3,f1d7949c-1bb1-4551-9f81-2f97664d403d,0.265898,2026-01-16T19:13:52.379683,2026-01-16T19:13:52.379683,alpha_stack_model_sil,v2,"{""sb_demo_score"": 0.41443871122959447, ""apps_s...",Cash November 25 Models,e9a9d7b8-fe3a-427c-822b-c2ec06736835,2026-01-16T19:13:52.379683,{},Trench 2,android,Dev_Test,2025-03-17
4,3226035,bbd0c072-ae38-4a31-b209-511bc1d420eb,613c4cd8-e861-41fe-8f4a-7f224bf75a6b,0.197171,2026-01-16T19:13:52.379683,2026-01-16T19:13:52.379683,alpha_stack_model_sil,v2,"{""sb_demo_score"": 0.4584596087451035, ""apps_sc...",Cash November 25 Models,17cf23d3-a691-4e5b-bbc7-fda91bf087f1,2026-01-16T19:13:52.379683,{},Trench 2,android,Dev_Test,2025-06-15


In [128]:
result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,6825,2025-03-01,2025-10-13
1,Dev_Train,3767,2024-09-01,2025-02-28


In [129]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=0207a7e0-0838-4f50-9704-54c3ca66187a>

##### Trench 3

In [130]:
sq = """ 
select 
lmt.customerId customer_id,
r.digitalLoanAccountId,  
r.sa_stack_score ,
r.sb_demo_score,
r.apps_score,
r.s_credo_score,
r.sa_cic_score,
case 
  when lower(r.ln_os_type) like '%andro%' then 'android'
  when lower(r.ln_os_type) like '%os%' then 'ios'
  else 'ios' end  osType,
ln_loan_type,
date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) application_date,
case when date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime))
        between '2024-09-01' and '2025-02-28' then 'Dev_Train'
        when date(if(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2024-09-01' then 'Pre_Train'
                else 'Dev_Test' end as Data_selection
from worktable_data_analysis.sil_alpha_applied_loans_backscored_20240901_20251013_option3 r
left join `risk_credit_mis.loan_master_table` lmt on lmt.digitalLoanAccountId = r.digitalLoanAccountId
where r.trench_category = 'Trench 3'
and sa_stack_score is not null
and date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2025-11-17'
;
"""
data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")


Job ID ccaae7bc-8aff-4593-9e75-937e7af34014 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (11874, 11)


In [131]:
data.columns

Index(['customer_id', 'digitalLoanAccountId', 'sa_stack_score',
       'sb_demo_score', 'apps_score', 's_credo_score', 'sa_cic_score',
       'osType', 'ln_loan_type', 'application_date', 'Data_selection'],
      dtype='object')

In [132]:
data.head()

Unnamed: 0,customer_id,digitalLoanAccountId,sa_stack_score,sb_demo_score,apps_score,s_credo_score,sa_cic_score,osType,ln_loan_type,application_date,Data_selection
0,3341806,659b3d49-a4f9-4cc7-8632-01d73b0c02ec,0.616288,0.292474,0.561706,0.160925,0.424917,android,SIL-Instore,2025-10-09,Dev_Test
1,2744135,02e4edb7-a5de-4415-a034-07b63370b61b,0.113657,0.259625,0.277129,0.069794,0.084362,android,SIL-Instore,2025-05-01,Dev_Test
2,2892628,594e3af2-add4-4b39-b976-afc547f7576c,0.570522,0.305916,0.565756,0.400157,0.156382,android,SIL Competitor,2025-04-06,Dev_Test
3,1635782,32b92b17-bbf8-4321-937f-7b6b43444414,0.493745,0.331413,0.296944,0.144485,0.509808,android,SIL-Instore,2025-10-01,Dev_Test
4,2361363,cd611d84-e556-4f1d-9502-24d170b7fd2d,0.287853,0.272366,,0.126082,0.160988,ios,SIL-Instore,2025-08-17,Dev_Test


In [133]:
feature_column = ['sb_demo_score', 'apps_score', 's_credo_score', 'sa_cic_score',]

dfd = transform_datav2(data, feature_column, a='sa_stack_score', modelDisplayName='alpha_stack_model_sil', tc='Trench 3', subscription_name = 'Cash November 25 Models') 
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,3341806,659b3d49-a4f9-4cc7-8632-01d73b0c02ec,2d6005e1-244e-4532-a8bc-64320c2f0947,0.616288,2026-01-16T19:13:59.362845,2026-01-16T19:13:59.362845,alpha_stack_model_sil,v2,"{""sb_demo_score"": 0.29247418964989663, ""apps_s...",Cash November 25 Models,6c0e5153-3bd3-4883-be95-631834fdab5f,2026-01-16T19:13:59.362845,{},Trench 3,android,Dev_Test,2025-10-09
1,2744135,02e4edb7-a5de-4415-a034-07b63370b61b,80beb5c0-65d9-4862-8536-8b96043a2be8,0.113657,2026-01-16T19:13:59.362845,2026-01-16T19:13:59.362845,alpha_stack_model_sil,v2,"{""sb_demo_score"": 0.2596253005765629, ""apps_sc...",Cash November 25 Models,26124102-dc9f-4b6c-9b28-8e6a751c5eaf,2026-01-16T19:13:59.362845,{},Trench 3,android,Dev_Test,2025-05-01
2,2892628,594e3af2-add4-4b39-b976-afc547f7576c,c9229657-4754-4246-99a3-b1160a03057d,0.570522,2026-01-16T19:13:59.362845,2026-01-16T19:13:59.362845,alpha_stack_model_sil,v2,"{""sb_demo_score"": 0.30591639031746076, ""apps_s...",Cash November 25 Models,0d7cdf41-2b1f-48fb-961b-50a1d53d270a,2026-01-16T19:13:59.362845,{},Trench 3,android,Dev_Test,2025-04-06
3,1635782,32b92b17-bbf8-4321-937f-7b6b43444414,a184b562-de9b-43fb-a1b3-0f8382788346,0.493745,2026-01-16T19:13:59.362845,2026-01-16T19:13:59.362845,alpha_stack_model_sil,v2,"{""sb_demo_score"": 0.33141313319742866, ""apps_s...",Cash November 25 Models,75f0b9ae-66b8-4090-9365-d59240a243a2,2026-01-16T19:13:59.362845,{},Trench 3,android,Dev_Test,2025-10-01
4,2361363,cd611d84-e556-4f1d-9502-24d170b7fd2d,f292fbd9-4896-4f26-82b0-9d303c8dc1eb,0.287853,2026-01-16T19:13:59.362845,2026-01-16T19:13:59.362845,alpha_stack_model_sil,v2,"{""sb_demo_score"": 0.27236630907116643, ""s_cred...",Cash November 25 Models,3349bd6e-4921-4e2d-8496-8f06a87289e2,2026-01-16T19:13:59.362845,{},Trench 3,ios,Dev_Test,2025-08-17


In [134]:
result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,6833,2025-03-01,2025-10-13
1,Dev_Train,5041,2024-09-01,2025-02-28


In [135]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=5dca3d10-14fa-485e-bd29-0e0a752ddf09>

##### Beta SIL STACK Score Model

##### Trench 1

In [136]:
sq = """ 
select 
lmt.customerId customer_id,
r.digitalLoanAccountId,  
r.sb_stack_score ,
r.sb_demo_score,
r.apps_score,
r.s_credo_score,
case 
  when lower(r.ln_os_type) like '%andro%' then 'android'
  when lower(r.ln_os_type) like '%os%' then 'ios'
  else 'ios' end  osType,
ln_loan_type,
date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) application_date,
case when date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime))
        between '2024-08-01' and '2025-02-28' then 'Dev_Train'
        when date(if(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2024-08-01' then 'Pre_Train'
                else 'Dev_Test' end as Data_selection
from prj-prod-dataplatform.worktable_data_analysis.sil_beta_applied_loans_backscored_20240801_20251013_option3 r
left join `risk_credit_mis.loan_master_table` lmt on lmt.digitalLoanAccountId = r.digitalLoanAccountId
where r.trench_category = 'Trench 1'
and sb_stack_score is not null
and date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2025-11-17'
;
"""
data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")


Job ID 31de649b-422d-4cfd-b739-92ef399cd9ef successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (343968, 10)


In [137]:
data.columns

Index(['customer_id', 'digitalLoanAccountId', 'sb_stack_score',
       'sb_demo_score', 'apps_score', 's_credo_score', 'osType',
       'ln_loan_type', 'application_date', 'Data_selection'],
      dtype='object')

In [138]:
data.head()

Unnamed: 0,customer_id,digitalLoanAccountId,sb_stack_score,sb_demo_score,apps_score,s_credo_score,osType,ln_loan_type,application_date,Data_selection
0,3742465,119ffc38-a362-43cc-9519-2aded0e8c245,0.189065,0.386711,0.183631,0.13005,android,SIL-Instore,2025-10-13,Dev_Test
1,3555012,ff68645d-cefb-4778-9989-135b51f3cfe3,0.146709,0.199517,0.295672,0.101228,android,SIL-Instore,2025-07-13,Dev_Test
2,3473255,1402bfc1-998e-411f-9dbc-711bbe9eee3d,0.262834,0.484231,0.271845,0.069499,android,SIL Competitor,2025-06-02,Dev_Test
3,3643282,57e63be4-5a36-435e-8ed8-1598a5ddeecf,0.474806,0.542294,0.463375,0.08446,android,SIL-Instore,2025-08-25,Dev_Test
4,3546641,6b348c01-993a-4dac-baf9-3decab0524bb,0.411039,0.458586,,0.07265,ios,SIL-Instore,2025-07-09,Dev_Test


In [139]:
feature_column = ['sb_demo_score', 'apps_score', 's_credo_score']

dfd = transform_datav2(data, feature_column, a='sb_stack_score', modelDisplayName='beta_stack_model_sil', tc='Trench 1', subscription_name = 'Cash November 25 Models') 
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,3742465,119ffc38-a362-43cc-9519-2aded0e8c245,b942c1a8-d3a5-4fc5-a712-8e595c01e38e,0.189065,2026-01-16T19:14:10.596451,2026-01-16T19:14:10.596451,beta_stack_model_sil,v2,"{""sb_demo_score"": 0.38671090314290235, ""apps_s...",Cash November 25 Models,3479ad66-efe6-4fd8-9654-3db602491812,2026-01-16T19:14:10.596451,{},Trench 1,android,Dev_Test,2025-10-13
1,3555012,ff68645d-cefb-4778-9989-135b51f3cfe3,3eef10d1-44eb-4089-94d8-b579339a9328,0.146709,2026-01-16T19:14:10.596451,2026-01-16T19:14:10.596451,beta_stack_model_sil,v2,"{""sb_demo_score"": 0.19951719457009148, ""apps_s...",Cash November 25 Models,179023df-adae-4aa3-acc6-818f5b799cfd,2026-01-16T19:14:10.596451,{},Trench 1,android,Dev_Test,2025-07-13
2,3473255,1402bfc1-998e-411f-9dbc-711bbe9eee3d,1e8e76c7-86e4-4a36-9086-353b208de10a,0.262834,2026-01-16T19:14:10.596451,2026-01-16T19:14:10.596451,beta_stack_model_sil,v2,"{""sb_demo_score"": 0.48423068408830183, ""apps_s...",Cash November 25 Models,63675234-5ae9-46aa-86aa-c47474a89572,2026-01-16T19:14:10.596451,{},Trench 1,android,Dev_Test,2025-06-02
3,3643282,57e63be4-5a36-435e-8ed8-1598a5ddeecf,5eee6b58-8724-4059-80c0-472a3e40ce3e,0.474806,2026-01-16T19:14:10.596451,2026-01-16T19:14:10.596451,beta_stack_model_sil,v2,"{""sb_demo_score"": 0.5422941103330391, ""apps_sc...",Cash November 25 Models,4483132f-4f71-491e-82a5-ce457fb5aa5c,2026-01-16T19:14:10.596451,{},Trench 1,android,Dev_Test,2025-08-25
4,3546641,6b348c01-993a-4dac-baf9-3decab0524bb,782b215f-92c3-4a74-9079-3488acaa73b7,0.411039,2026-01-16T19:14:10.596451,2026-01-16T19:14:10.596451,beta_stack_model_sil,v2,"{""sb_demo_score"": 0.4585859920361209, ""s_credo...",Cash November 25 Models,76be7fc7-7f40-4fb7-be95-4c93db1482e5,2026-01-16T19:14:10.596451,{},Trench 1,ios,Dev_Test,2025-07-09


In [140]:
result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,165423,2025-03-01,2025-11-15
1,Dev_Train,178545,2024-08-01,2025-02-28


In [141]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=34a20a75-605b-4ef8-ad33-ca7df1991d5b>

##### Trench 2

In [142]:
sq = """ 
select 
lmt.customerId customer_id,
r.digitalLoanAccountId,  
r.sb_stack_score ,
r.sb_demo_score,
r.apps_score,
r.s_credo_score,
case 
  when lower(r.ln_os_type) like '%andro%' then 'android'
  when lower(r.ln_os_type) like '%os%' then 'ios'
  else 'ios' end  osType,
ln_loan_type,
date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) application_date,
case when date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime))
        between '2024-08-01' and '2025-02-28' then 'Dev_Train'
        when date(if(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2024-08-01' then 'Pre_Train'
                else 'Dev_Test' end as Data_selection
from prj-prod-dataplatform.worktable_data_analysis.sil_beta_applied_loans_backscored_20240801_20251013_option3 r
left join `risk_credit_mis.loan_master_table` lmt on lmt.digitalLoanAccountId = r.digitalLoanAccountId
where r.trench_category = 'Trench 2'
and sb_stack_score is not null
and date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2025-11-17'
;
"""
data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")


Job ID 39eea9ec-1e18-4732-aec8-d2d0202cf77f successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (13099, 10)


In [143]:
data.columns

Index(['customer_id', 'digitalLoanAccountId', 'sb_stack_score',
       'sb_demo_score', 'apps_score', 's_credo_score', 'osType',
       'ln_loan_type', 'application_date', 'Data_selection'],
      dtype='object')

In [144]:
data.head()

Unnamed: 0,customer_id,digitalLoanAccountId,sb_stack_score,sb_demo_score,apps_score,s_credo_score,osType,ln_loan_type,application_date,Data_selection
0,3280189,f8adb417-86da-469b-a8a8-197410cf9776,0.462539,0.50378,0.469983,0.097081,android,SIL ZERO,2025-09-16,Dev_Test
1,2215173,a31b0b1f-caa6-4741-8c34-c23341898e7c,0.582103,0.435959,0.653687,0.108681,android,SIL ZERO,2025-05-03,Dev_Test
2,3115380,cc5b232a-ed90-4dec-9b07-cb6be5f90e0b,0.526366,0.594303,0.39885,0.159925,android,SIL ZERO,2025-04-11,Dev_Test
3,2189425,bf400964-9a14-428e-81b1-14a1d827e992,0.309712,0.400669,0.362381,0.117657,android,SIL ZERO,2025-10-09,Dev_Test
4,2880114,1a122e33-3f7c-4fad-9a08-8e4f3e0708ff,0.229878,0.299266,0.402037,0.053916,android,SIL-Instore,2025-06-03,Dev_Test


In [145]:
feature_column = ['sb_demo_score', 'apps_score', 's_credo_score']

dfd = transform_datav2(data, feature_column, a='sb_stack_score', modelDisplayName='beta_stack_model_sil', tc='Trench 2', subscription_name = 'Cash November 25 Models') 
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,3280189,f8adb417-86da-469b-a8a8-197410cf9776,3a640b27-bc08-4d82-a11e-84c92f3c2010,0.462539,2026-01-16T19:14:51.274801,2026-01-16T19:14:51.274801,beta_stack_model_sil,v2,"{""sb_demo_score"": 0.5037803522853171, ""apps_sc...",Cash November 25 Models,f0ff364d-2c59-4053-af8c-ea1006887b4c,2026-01-16T19:14:51.274801,{},Trench 2,android,Dev_Test,2025-09-16
1,2215173,a31b0b1f-caa6-4741-8c34-c23341898e7c,103a7f3e-fe89-4794-8ee9-f5ed7b92c4fa,0.582103,2026-01-16T19:14:51.274801,2026-01-16T19:14:51.274801,beta_stack_model_sil,v2,"{""sb_demo_score"": 0.4359585807282797, ""apps_sc...",Cash November 25 Models,327c0a1c-db5b-427f-b60e-e25ce4333f7a,2026-01-16T19:14:51.274801,{},Trench 2,android,Dev_Test,2025-05-03
2,3115380,cc5b232a-ed90-4dec-9b07-cb6be5f90e0b,e0d42646-0b52-4fdd-807a-f9fc35a6b364,0.526366,2026-01-16T19:14:51.274801,2026-01-16T19:14:51.274801,beta_stack_model_sil,v2,"{""sb_demo_score"": 0.5943026314634269, ""apps_sc...",Cash November 25 Models,01610e98-dd92-443b-a140-ae0528a65d99,2026-01-16T19:14:51.274801,{},Trench 2,android,Dev_Test,2025-04-11
3,2189425,bf400964-9a14-428e-81b1-14a1d827e992,dbda7fd1-a54c-49c0-b96b-f895f1f78ed6,0.309712,2026-01-16T19:14:51.274801,2026-01-16T19:14:51.274801,beta_stack_model_sil,v2,"{""sb_demo_score"": 0.40066927917656237, ""apps_s...",Cash November 25 Models,b104a13b-77cd-4c99-9033-32d77df58d79,2026-01-16T19:14:51.274801,{},Trench 2,android,Dev_Test,2025-10-09
4,2880114,1a122e33-3f7c-4fad-9a08-8e4f3e0708ff,914858f1-6838-48d6-84ad-ec2b12f36f2a,0.229878,2026-01-16T19:14:51.274801,2026-01-16T19:14:51.274801,beta_stack_model_sil,v2,"{""sb_demo_score"": 0.2992660611467991, ""apps_sc...",Cash November 25 Models,f3042257-bb2f-45ed-ab69-2b0b1bf87fda,2026-01-16T19:14:51.274801,{},Trench 2,android,Dev_Test,2025-06-03


In [146]:
result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,7599,2025-03-01,2025-10-13
1,Dev_Train,5500,2024-08-01,2025-02-28


In [147]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=bbaab705-bec5-4959-834f-5a3c9f9d279b>

##### Trench 3

In [148]:
sq = """ 
select 
lmt.customerId customer_id,
r.digitalLoanAccountId,  
r.sb_stack_score ,
r.sb_demo_score,
r.apps_score,
r.s_credo_score,
case 
  when lower(r.ln_os_type) like '%andro%' then 'android'
  when lower(r.ln_os_type) like '%os%' then 'ios'
  else 'ios' end  osType,
ln_loan_type,
date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) application_date,
case when date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime))
        between '2024-08-01' and '2025-02-28' then 'Dev_Train'
        when date(if(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2024-08-01' then 'Pre_Train'
                else 'Dev_Test' end as Data_selection
from prj-prod-dataplatform.worktable_data_analysis.sil_beta_applied_loans_backscored_20240801_20251013_option3 r
left join `risk_credit_mis.loan_master_table` lmt on lmt.digitalLoanAccountId = r.digitalLoanAccountId
where r.trench_category = 'Trench 3'
and sb_stack_score is not null
and date(IF(lmt.new_loan_type = 'Flex-up', lmt.startApplyDateTime, lmt.termsAndConditionsSubmitDateTime)) < '2025-11-17'
;
"""
data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")


Job ID eab6dc21-09ed-4055-ba95-dc401015b96c successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (13802, 10)


In [149]:
data.columns

Index(['customer_id', 'digitalLoanAccountId', 'sb_stack_score',
       'sb_demo_score', 'apps_score', 's_credo_score', 'osType',
       'ln_loan_type', 'application_date', 'Data_selection'],
      dtype='object')

In [150]:
data.head()

Unnamed: 0,customer_id,digitalLoanAccountId,sb_stack_score,sb_demo_score,apps_score,s_credo_score,osType,ln_loan_type,application_date,Data_selection
0,2545858,9eb785e7-5567-4276-9cb8-7ad2a5bfb321,0.225571,0.175574,,0.081049,ios,SIL ZERO,2025-04-20,Dev_Test
1,2315681,e76a655f-2a13-4264-9609-2ff359438ce4,0.30801,0.304643,0.465938,0.095345,android,SIL ZERO,2025-03-06,Dev_Test
2,2503045,3f47ce34-da32-4ca4-946a-db3c6cfbe889,0.369089,0.25363,0.582042,0.101363,android,SIL Competitor,2025-04-04,Dev_Test
3,3309765,5c3fc7ba-0433-46f1-936c-a5e7c7287a77,0.452867,0.285785,0.56607,0.18796,android,SIL-Instore,2025-08-09,Dev_Test
4,2341857,92298a10-1d55-4264-8d37-7157bf53c413,0.341828,0.362918,0.480684,0.069894,android,SIL Competitor,2025-05-06,Dev_Test


In [151]:
feature_column = ['sb_demo_score', 'apps_score', 's_credo_score']

dfd = transform_datav2(data, feature_column, a='sb_stack_score', modelDisplayName='beta_stack_model_sil', tc='Trench 3', subscription_name = 'Cash November 25 Models') 
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,2545858,9eb785e7-5567-4276-9cb8-7ad2a5bfb321,0734375a-b485-443f-a706-a027d88ca45e,0.225571,2026-01-16T19:14:58.546279,2026-01-16T19:14:58.546279,beta_stack_model_sil,v2,"{""sb_demo_score"": 0.1755742572455905, ""s_credo...",Cash November 25 Models,f0929bee-af10-40d9-b631-53656befc986,2026-01-16T19:14:58.546279,{},Trench 3,ios,Dev_Test,2025-04-20
1,2315681,e76a655f-2a13-4264-9609-2ff359438ce4,5b4c6a02-b80f-4180-b444-fb0240c9b20d,0.30801,2026-01-16T19:14:58.546279,2026-01-16T19:14:58.546279,beta_stack_model_sil,v2,"{""sb_demo_score"": 0.3046433132639846, ""apps_sc...",Cash November 25 Models,1b25c2c1-996d-4711-8d71-18babaed718a,2026-01-16T19:14:58.546279,{},Trench 3,android,Dev_Test,2025-03-06
2,2503045,3f47ce34-da32-4ca4-946a-db3c6cfbe889,22205394-87b4-4cb4-80bf-c13b2b871fb5,0.369089,2026-01-16T19:14:58.546279,2026-01-16T19:14:58.546279,beta_stack_model_sil,v2,"{""sb_demo_score"": 0.25363013646064025, ""apps_s...",Cash November 25 Models,33c2c901-be08-4609-a4f0-ef4243da8e44,2026-01-16T19:14:58.546279,{},Trench 3,android,Dev_Test,2025-04-04
3,3309765,5c3fc7ba-0433-46f1-936c-a5e7c7287a77,abd47e40-b325-4454-b240-bf6077bc4dd2,0.452867,2026-01-16T19:14:58.546279,2026-01-16T19:14:58.546279,beta_stack_model_sil,v2,"{""sb_demo_score"": 0.2857845839576205, ""apps_sc...",Cash November 25 Models,72f8931e-1a30-413d-8b35-7855c9412ebd,2026-01-16T19:14:58.546279,{},Trench 3,android,Dev_Test,2025-08-09
4,2341857,92298a10-1d55-4264-8d37-7157bf53c413,127c25f0-65ee-46e9-b73d-24a74c363ea6,0.341828,2026-01-16T19:14:58.546279,2026-01-16T19:14:58.546279,beta_stack_model_sil,v2,"{""sb_demo_score"": 0.3629180003646723, ""apps_sc...",Cash November 25 Models,45dc7c10-17ef-4571-bea4-beb80fd2d5b3,2026-01-16T19:14:58.546279,{},Trench 3,android,Dev_Test,2025-05-06


In [152]:
result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,7383,2025-03-01,2025-10-13
1,Dev_Train,6419,2024-08-01,2025-02-28


In [153]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=4a42f5ec-de84-4a7b-a058-6b7847fc5308>

##### Beta Sil App Score

##### Trench 1

In [154]:
sq = """ 
select 
distinct 
loanmaster.customerId customer_id,
r.digitalLoanAccountId,
r.apps_score,
app_cnt_payday_ever,
app_cnt_finance_ever,
app_cnt_competitors_sil_ever,
app_cnt_competitors_ever,
app_cnt_finance_365d,
app_cnt_absence_tag_365d,
app_cnt_competitors_sil_365d,
app_cnt_finance_7d,
app_cnt_rated_for_3plus_ever,
app_cnt_payday_7d,
    case when lower(coalesce(loanmaster.osversion_v2, loanmaster.osVersion)) like '%andro%' then 'android'
    when lower(coalesce(loanmaster.osversion_v2, loanmaster.osVersion)) like '%os%' then 'ios'
    when lower(loanmaster.deviceType) like '%andro%' then 'android'
    else 'ios' end osType,
date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) application_date,
case when date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime))
        between '2024-08-01' and '2025-01-31' then 'Dev_Train'
        when date(if(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) < '2024-08-01' then 'Pre_Train'
                else 'Dev_Test' end as Data_selection
from  risk_mart.applied_sil_new_applicants_loan_20241001_20251124_app_scored r
left join risk_credit_mis.loan_master_table loanmaster
  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
where apps_score is not null
and trench_category = 'Trench 1'
and date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) < '2025-11-17'
;
"""
data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")


Job ID 0b2d60ed-0261-4300-80f4-70b823db0e86 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (289957, 16)


In [155]:
data.columns

Index(['customer_id', 'digitalLoanAccountId', 'apps_score',
       'app_cnt_payday_ever', 'app_cnt_finance_ever',
       'app_cnt_competitors_sil_ever', 'app_cnt_competitors_ever',
       'app_cnt_finance_365d', 'app_cnt_absence_tag_365d',
       'app_cnt_competitors_sil_365d', 'app_cnt_finance_7d',
       'app_cnt_rated_for_3plus_ever', 'app_cnt_payday_7d', 'osType',
       'application_date', 'Data_selection'],
      dtype='object')

In [156]:
data.head()

Unnamed: 0,customer_id,digitalLoanAccountId,apps_score,app_cnt_payday_ever,app_cnt_finance_ever,app_cnt_competitors_sil_ever,app_cnt_competitors_ever,app_cnt_finance_365d,app_cnt_absence_tag_365d,app_cnt_competitors_sil_365d,app_cnt_finance_7d,app_cnt_rated_for_3plus_ever,app_cnt_payday_7d,osType,application_date,Data_selection
0,3655004,8e5fe550-a1dc-4141-8663-5e784853a663,0.137928,2,19.0,2,5,4.0,18.0,0.0,0.0,50.0,0.0,android,2025-08-31,Dev_Test
1,3635320,73a54de1-e56c-465f-9ed3-e54b72c9673c,0.40406,10,19.0,5,11,9.0,14.0,5.0,0.0,90.0,0.0,android,2025-08-21,Dev_Test
2,3365227,e19d2222-0f19-449c-bfcd-41e54c9d9ac3,0.3265,4,14.0,2,7,14.0,18.0,2.0,0.0,70.0,0.0,android,2025-04-07,Dev_Test
3,3381407,4e0f9db0-5e8e-4233-a7e9-59b4c8523bb3,0.576843,4,12.0,4,5,12.0,14.0,4.0,4.0,40.0,3.0,android,2025-04-15,Dev_Test
4,3400685,84059931-ce71-4d8b-b14f-ae44ea67c7d7,0.477944,7,15.0,3,7,12.0,5.0,2.0,0.0,22.0,0.0,android,2025-04-25,Dev_Test


In [157]:
feature_column = ['app_cnt_payday_ever', 'app_cnt_finance_ever',
       'app_cnt_competitors_sil_ever', 'app_cnt_competitors_ever',
       'app_cnt_finance_365d', 'app_cnt_absence_tag_365d',
       'app_cnt_competitors_sil_365d', 'app_cnt_finance_7d',
       'app_cnt_rated_for_3plus_ever', 'app_cnt_payday_7d',]

dfd = transform_datav2(data, feature_column, a='apps_score', modelDisplayName='apps_score_model_sil', tc='Trench 1', subscription_name = 'Cash November 25 Models') 
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,3655004,8e5fe550-a1dc-4141-8663-5e784853a663,177d24e7-0a9e-4c96-a302-4d98f795cffe,0.137928,2026-01-16T19:15:08.008460,2026-01-16T19:15:08.008460,apps_score_model_sil,v2,"{""app_cnt_payday_ever"": 2, ""app_cnt_finance_ev...",Cash November 25 Models,2db9134c-a47c-4486-9a4d-2693992c2d43,2026-01-16T19:15:08.008460,{},Trench 1,android,Dev_Test,2025-08-31
1,3635320,73a54de1-e56c-465f-9ed3-e54b72c9673c,841e2210-8933-430a-bdbb-97bafcf016c2,0.40406,2026-01-16T19:15:08.008460,2026-01-16T19:15:08.008460,apps_score_model_sil,v2,"{""app_cnt_payday_ever"": 10, ""app_cnt_finance_e...",Cash November 25 Models,617b8b1a-d6a7-46d8-a051-8496f03a8fd1,2026-01-16T19:15:08.008460,{},Trench 1,android,Dev_Test,2025-08-21
2,3365227,e19d2222-0f19-449c-bfcd-41e54c9d9ac3,2c294c67-c735-41a5-98b5-13554c89b53e,0.3265,2026-01-16T19:15:08.008460,2026-01-16T19:15:08.008460,apps_score_model_sil,v2,"{""app_cnt_payday_ever"": 4, ""app_cnt_finance_ev...",Cash November 25 Models,74e3ab5c-4134-4def-b591-d760c174732e,2026-01-16T19:15:08.008460,{},Trench 1,android,Dev_Test,2025-04-07
3,3381407,4e0f9db0-5e8e-4233-a7e9-59b4c8523bb3,369fc6a9-9461-40a8-9962-a2af44e41a3a,0.576843,2026-01-16T19:15:08.008460,2026-01-16T19:15:08.008460,apps_score_model_sil,v2,"{""app_cnt_payday_ever"": 4, ""app_cnt_finance_ev...",Cash November 25 Models,2a4a6f82-67ba-462d-ae11-cd80b04d3237,2026-01-16T19:15:08.008460,{},Trench 1,android,Dev_Test,2025-04-15
4,3400685,84059931-ce71-4d8b-b14f-ae44ea67c7d7,513913b9-4db9-464e-905d-db2bcb5683b2,0.477944,2026-01-16T19:15:08.008460,2026-01-16T19:15:08.008460,apps_score_model_sil,v2,"{""app_cnt_payday_ever"": 7, ""app_cnt_finance_ev...",Cash November 25 Models,dced5d72-bfbd-4762-b197-31a8945171b5,2026-01-16T19:15:08.008460,{},Trench 1,android,Dev_Test,2025-04-25


In [158]:

result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,187810,2025-02-01,2025-11-16
1,Dev_Train,102147,2024-10-01,2025-01-31


In [159]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=f4f960c1-02ce-4e53-b99c-853d933b0311>

##### Trench 2

In [160]:
sq = """ 
select 
distinct 
loanmaster.customerId customer_id,
r.digitalLoanAccountId,
r.apps_score,
app_cnt_payday_ever,
app_cnt_finance_ever,
app_cnt_competitors_sil_ever,
app_cnt_competitors_ever,
app_cnt_finance_365d,
app_cnt_absence_tag_365d,
app_cnt_competitors_sil_365d,
app_cnt_finance_7d,
app_cnt_rated_for_3plus_ever,
app_cnt_payday_7d,
    case when lower(coalesce(loanmaster.osversion_v2, loanmaster.osVersion)) like '%andro%' then 'android'
    when lower(coalesce(loanmaster.osversion_v2, loanmaster.osVersion)) like '%os%' then 'ios'
    when lower(loanmaster.deviceType) like '%andro%' then 'android'
    else 'ios' end osType,
date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) application_date,
case when date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime))
        between '2024-08-01' and '2025-01-31' then 'Dev_Train'
        when date(if(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) < '2024-08-01' then 'Pre_Train'
                else 'Dev_Test' end as Data_selection
from  risk_mart.applied_sil_new_applicants_loan_20241001_20251124_app_scored r
left join risk_credit_mis.loan_master_table loanmaster
  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
where apps_score is not null
and trench_category = 'Trench 2'
and date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) < '2025-11-17'
;
"""
data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")


Job ID 79b530b9-2e24-4577-b122-4380cf831fdf successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (11680, 16)


In [161]:
data.columns

Index(['customer_id', 'digitalLoanAccountId', 'apps_score',
       'app_cnt_payday_ever', 'app_cnt_finance_ever',
       'app_cnt_competitors_sil_ever', 'app_cnt_competitors_ever',
       'app_cnt_finance_365d', 'app_cnt_absence_tag_365d',
       'app_cnt_competitors_sil_365d', 'app_cnt_finance_7d',
       'app_cnt_rated_for_3plus_ever', 'app_cnt_payday_7d', 'osType',
       'application_date', 'Data_selection'],
      dtype='object')

In [162]:
data.head()

Unnamed: 0,customer_id,digitalLoanAccountId,apps_score,app_cnt_payday_ever,app_cnt_finance_ever,app_cnt_competitors_sil_ever,app_cnt_competitors_ever,app_cnt_finance_365d,app_cnt_absence_tag_365d,app_cnt_competitors_sil_365d,app_cnt_finance_7d,app_cnt_rated_for_3plus_ever,app_cnt_payday_7d,osType,application_date,Data_selection
0,2138075,1ddc22f5-026e-4150-9c2c-b64151154575,0.201415,4,19.0,2,10,10.0,11.0,1.0,1.0,59.0,1.0,android,2025-06-08,Dev_Test
1,2476027,2e05d89c-cd58-42f8-afce-eb0c437b9cfa,0.391375,7,18.0,4,10,15.0,3.0,4.0,1.0,46.0,1.0,android,2025-06-12,Dev_Test
2,3211173,c8524054-f1e7-44c3-a634-d3e7fb704a25,0.438699,11,22.0,2,8,19.0,7.0,2.0,3.0,54.0,2.0,android,2025-07-05,Dev_Test
3,2124531,15133f4e-4205-4ae3-b2a5-02e1163cd7c9,0.454678,2,18.0,2,4,18.0,9.0,2.0,18.0,31.0,2.0,android,2025-04-14,Dev_Test
4,3026188,4f058f3a-7e7b-4e40-9d50-0af3f084af8a,0.396767,12,21.0,5,13,13.0,10.0,3.0,0.0,74.0,0.0,android,2025-03-28,Dev_Test


In [163]:
feature_column = ['app_cnt_payday_ever', 'app_cnt_finance_ever',
       'app_cnt_competitors_sil_ever', 'app_cnt_competitors_ever',
       'app_cnt_finance_365d', 'app_cnt_absence_tag_365d',
       'app_cnt_competitors_sil_365d', 'app_cnt_finance_7d',
       'app_cnt_rated_for_3plus_ever', 'app_cnt_payday_7d',]

dfd = transform_datav2(data, feature_column, a='apps_score', modelDisplayName='apps_score_model_sil', tc='Trench 2', subscription_name = 'Cash November 25 Models') 
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,2138075,1ddc22f5-026e-4150-9c2c-b64151154575,6be38363-8e2e-4997-b133-8f8719e56bdc,0.201415,2026-01-16T19:15:54.265915,2026-01-16T19:15:54.265915,apps_score_model_sil,v2,"{""app_cnt_payday_ever"": 4, ""app_cnt_finance_ev...",Cash November 25 Models,a1d25e83-3484-4a93-b359-0c40242751af,2026-01-16T19:15:54.265915,{},Trench 2,android,Dev_Test,2025-06-08
1,2476027,2e05d89c-cd58-42f8-afce-eb0c437b9cfa,97c3fe99-fa1b-409e-921a-a53db36a53a7,0.391375,2026-01-16T19:15:54.265915,2026-01-16T19:15:54.265915,apps_score_model_sil,v2,"{""app_cnt_payday_ever"": 7, ""app_cnt_finance_ev...",Cash November 25 Models,11a7eef2-db6c-45b6-a5ad-bfafdc5b7e85,2026-01-16T19:15:54.265915,{},Trench 2,android,Dev_Test,2025-06-12
2,3211173,c8524054-f1e7-44c3-a634-d3e7fb704a25,cfa0ae2f-322e-42ee-8eaa-e05a1654cf10,0.438699,2026-01-16T19:15:54.265915,2026-01-16T19:15:54.265915,apps_score_model_sil,v2,"{""app_cnt_payday_ever"": 11, ""app_cnt_finance_e...",Cash November 25 Models,4b8d8591-ea9d-4bda-b841-f37c46e23b55,2026-01-16T19:15:54.265915,{},Trench 2,android,Dev_Test,2025-07-05
3,2124531,15133f4e-4205-4ae3-b2a5-02e1163cd7c9,54e2a8dc-97fe-433f-adde-07a31d5f6727,0.454678,2026-01-16T19:15:54.266451,2026-01-16T19:15:54.266451,apps_score_model_sil,v2,"{""app_cnt_payday_ever"": 2, ""app_cnt_finance_ev...",Cash November 25 Models,efbc1b92-05af-4c35-8d91-96cbf219ab31,2026-01-16T19:15:54.266451,{},Trench 2,android,Dev_Test,2025-04-14
4,3026188,4f058f3a-7e7b-4e40-9d50-0af3f084af8a,e5fd092f-09ff-4829-8347-8527eb8134a0,0.396767,2026-01-16T19:15:54.266451,2026-01-16T19:15:54.266451,apps_score_model_sil,v2,"{""app_cnt_payday_ever"": 12, ""app_cnt_finance_e...",Cash November 25 Models,4e022fdd-fce4-4274-9d0f-1305f9ac5d3b,2026-01-16T19:15:54.266451,{},Trench 2,android,Dev_Test,2025-03-28


In [164]:
result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,8445,2025-02-01,2025-11-16
1,Dev_Train,3235,2024-10-01,2025-01-31


In [165]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=d70c3a82-e94b-463d-8f4c-b0aa9ff85471>

##### Trench 3

In [166]:
sq = """ 
select 
r.customerId customer_id,
r.digitalLoanAccountId,
r.apps_score,
app_cnt_productivity_ever,
app_cnt_rated_for_3plus_ever, 
app_cnt_books_and_reference_ever,
app_cnt_tools_ever, 
app_median_time_bw_installed_mins_3d,
app_median_time_bw_installed_mins_30d,
app_cnt_communication_ever, 
app_cnt_finance_90d,
app_cnt_absence_tag_180d, 
app_cnt_shopping_ever,
app_cnt_social_ever, 
app_cnt_driver_90d, 
app_cnt_payday_365d,
app_cnt_driver_365d, 
app_cnt_music_and_audio_ever,
app_cnt_finance_180d, 
app_cnt_art_and_design_ever,
app_cnt_gaming_90d, 
app_avg_time_bw_installed_mins_30d,
app_cnt_education_ever,
case when lower(coalesce(loanmaster.osversion_v2, loanmaster.osVersion)) like '%andro%' then 'android'
    when lower(coalesce(loanmaster.osversion_v2, loanmaster.osVersion)) like '%os%' then 'ios'
    when lower(loanmaster.deviceType) like '%andro%' then 'android'
    else 'ios' end osType,
date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) application_date,
case when date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime))
        between '2024-08-01' and '2025-01-31' then 'Dev_Train'
        when date(if(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) < '2024-08-01' then 'Pre_Train'
                else 'Dev_Test' end as Data_selection
from risk_mart.applied_sil_repeat_applicants_loan_20241001_20251124_app_scored r
left join risk_credit_mis.loan_master_table loanmaster
  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
where apps_score is not null
and date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) < '2025-11-17'
;
"""
data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")

Job ID 40e4db9d-8f3c-4260-9241-a51ec9fa97e1 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (19768, 26)


In [167]:
data.columns

Index(['customer_id', 'digitalLoanAccountId', 'apps_score',
       'app_cnt_productivity_ever', 'app_cnt_rated_for_3plus_ever',
       'app_cnt_books_and_reference_ever', 'app_cnt_tools_ever',
       'app_median_time_bw_installed_mins_3d',
       'app_median_time_bw_installed_mins_30d', 'app_cnt_communication_ever',
       'app_cnt_finance_90d', 'app_cnt_absence_tag_180d',
       'app_cnt_shopping_ever', 'app_cnt_social_ever', 'app_cnt_driver_90d',
       'app_cnt_payday_365d', 'app_cnt_driver_365d',
       'app_cnt_music_and_audio_ever', 'app_cnt_finance_180d',
       'app_cnt_art_and_design_ever', 'app_cnt_gaming_90d',
       'app_avg_time_bw_installed_mins_30d', 'app_cnt_education_ever',
       'osType', 'application_date', 'Data_selection'],
      dtype='object')

In [168]:
feature_column = ['app_cnt_productivity_ever', 'app_cnt_rated_for_3plus_ever',
       'app_cnt_books_and_reference_ever', 'app_cnt_tools_ever',
       'app_median_time_bw_installed_mins_3d',
       'app_median_time_bw_installed_mins_30d', 'app_cnt_communication_ever',
       'app_cnt_finance_90d', 'app_cnt_absence_tag_180d',
       'app_cnt_shopping_ever', 'app_cnt_social_ever', 'app_cnt_driver_90d',
       'app_cnt_payday_365d', 'app_cnt_driver_365d',
       'app_cnt_music_and_audio_ever', 'app_cnt_finance_180d',
       'app_cnt_art_and_design_ever', 'app_cnt_gaming_90d',
       'app_avg_time_bw_installed_mins_30d', 'app_cnt_education_ever',]

dfd = transform_datav2(data, feature_column, a='apps_score', modelDisplayName='apps_score_model_sil', tc='Trench 3', subscription_name = 'Cash November 25 Models') 
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,2654586,7c66a1d9-c8c8-4758-8be4-1c8df0ed0fe9,5a44d048-c16b-4503-8314-f302e97bd202,0.403125,2026-01-16T19:16:01.840840,2026-01-16T19:16:01.840840,apps_score_model_sil,v2,"{""app_cnt_productivity_ever"": 1.0, ""app_cnt_ra...",Cash November 25 Models,dc0531b4-9477-49b0-bcc3-698416e74117,2026-01-16T19:16:01.840840,{},Trench 3,android,Dev_Test,2025-04-12
1,2496300,bd9e7825-7ebb-4c2a-9fd0-3cc89a017e15,4e6783de-ae91-43f3-abff-6954ad016c20,0.427483,2026-01-16T19:16:01.840840,2026-01-16T19:16:01.840840,apps_score_model_sil,v2,"{""app_cnt_productivity_ever"": 0.0, ""app_cnt_ra...",Cash November 25 Models,8d2c4db1-bb0c-4041-870a-596bea621f6a,2026-01-16T19:16:01.840840,{},Trench 3,android,Dev_Train,2024-11-16
2,2708690,29b275ed-4938-4f6d-99ad-8cf47ef61339,778ed869-4943-4978-b005-3a90ec0e3e5d,0.261049,2026-01-16T19:16:01.840840,2026-01-16T19:16:01.840840,apps_score_model_sil,v2,"{""app_cnt_productivity_ever"": 5.0, ""app_cnt_ra...",Cash November 25 Models,63fa487e-40f1-4021-9004-edbaf13e319a,2026-01-16T19:16:01.840840,{},Trench 3,android,Dev_Train,2024-11-19
3,2841934,5a27f398-61f8-4719-a497-626c04cfcd37,6f60378e-2f01-4663-bda2-bd526c7496f0,0.330189,2026-01-16T19:16:01.840840,2026-01-16T19:16:01.840840,apps_score_model_sil,v2,"{""app_cnt_productivity_ever"": 5.0, ""app_cnt_ra...",Cash November 25 Models,35eaace9-53f7-4228-954d-8e5c43d27a8f,2026-01-16T19:16:01.840840,{},Trench 3,android,Dev_Train,2024-11-20
4,2476134,6a33e8be-263a-4190-ac73-d7a1629d63e6,c30baf3a-d1c8-4c93-8043-baee9ea5b4ad,0.369869,2026-01-16T19:16:01.840840,2026-01-16T19:16:01.840840,apps_score_model_sil,v2,"{""app_cnt_productivity_ever"": 2.0, ""app_cnt_ra...",Cash November 25 Models,b0e34e85-4710-4186-85ca-a3e78c2a0336,2026-01-16T19:16:01.840840,{},Trench 3,android,Dev_Train,2024-11-28


In [169]:
dfd['customerId'] = pd.to_numeric(dfd['customerId'], errors='coerce')

In [170]:
dfd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19768 entries, 0 to 19767
Data columns (total 17 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   customerId            19768 non-null  int64  
 1   digitalLoanAccountId  19768 non-null  object 
 2   crifApplicationId     19768 non-null  object 
 3   prediction            19768 non-null  float64
 4   start_time            19768 non-null  object 
 5   end_time              19768 non-null  object 
 6   modelDisplayName      19768 non-null  object 
 7   modelVersionId        19768 non-null  object 
 8   calcFeature           19768 non-null  object 
 9   subscription_name     19768 non-null  object 
 10  message_id            19768 non-null  object 
 11  publish_time          19768 non-null  object 
 12  attributes            19768 non-null  object 
 13  trenchCategory        19768 non-null  object 
 14  deviceOs              19768 non-null  object 
 15  Data_selection     

In [171]:
result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,14581,2025-02-01,2025-11-16
1,Dev_Train,5187,2024-10-01,2025-01-31


In [172]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=af6c71db-899d-4f14-9e44-dacbd61e622f>

##### Beta SIL Demo Score

##### Trench 1

In [173]:
sq = """select 
distinct 
loanmaster.customerId customer_id,
r.digitalLoanAccountId,
r.s_demo_score,
ln_vas_opted_flag,
ln_doc_type_rolled,
ln_industry_new_cat_bin,
ln_marital_status,
ln_age,
ln_education_level,
ln_cnt_dependents,
ln_ref2_type,
ln_loan_level_user_type,
ln_ref1_type,
ln_name_email_match_score,
ln_telconame,
ln_city_cat,
ln_brand_bin,
ln_apply_Is_Weekend,
r.ln_os_type osType,
date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) application_date,
case when date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime))
        between '2024-08-01' and '2025-01-31' then 'Dev_Train'
        when date(if(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) < '2024-08-01' then 'Pre_Train'
                else 'Dev_Test' end as Data_selection
from worktable_data_analysis.sil_beta_demo_all_applied_backscored_20240801_20251015 r
left join risk_credit_mis.loan_master_table loanmaster
  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
where s_demo_score is not null
and trench_category = 'Trench 1'
and date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) < '2025-11-17'
;
"""
data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")

Job ID b6e9b29c-9da7-4ea4-b6d1-deaf6d957cf1 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (345905, 21)


In [174]:
data.columns

Index(['customer_id', 'digitalLoanAccountId', 's_demo_score',
       'ln_vas_opted_flag', 'ln_doc_type_rolled', 'ln_industry_new_cat_bin',
       'ln_marital_status', 'ln_age', 'ln_education_level',
       'ln_cnt_dependents', 'ln_ref2_type', 'ln_loan_level_user_type',
       'ln_ref1_type', 'ln_name_email_match_score', 'ln_telconame',
       'ln_city_cat', 'ln_brand_bin', 'ln_apply_Is_Weekend', 'osType',
       'application_date', 'Data_selection'],
      dtype='object')

In [175]:
feature_column = [ 'ln_vas_opted_flag', 'ln_doc_type_rolled', 'ln_industry_new_cat_bin',
       'ln_marital_status', 'ln_age', 'ln_education_level',
       'ln_cnt_dependents', 'ln_ref2_type', 'ln_loan_level_user_type',
       'ln_ref1_type', 'ln_name_email_match_score', 'ln_telconame',
       'ln_city_cat', 'ln_brand_bin', 'ln_apply_Is_Weekend', 'osType']

dfd = transform_datav2(data, feature_column, a='s_demo_score', modelDisplayName='beta_demo_model_sil', tc='Trench 1', subscription_name = 'Cash November 25 Models') 
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,3574100,8d7a46db-9936-4267-81d4-9b0377e57299,95ff4875-107d-4e37-8a8b-9b441b50a439,0.391914,2026-01-16T19:16:16.744794,2026-01-16T19:16:16.744794,beta_demo_model_sil,v2,"{""ln_vas_opted_flag"": ""0"", ""ln_doc_type_rolled...",Cash November 25 Models,8fd092ba-a0f9-47ff-bcf9-d0690b755f99,2026-01-16T19:16:16.744794,{},Trench 1,Android,Dev_Test,2025-07-23
1,3247541,905b4aae-8bfc-4ee3-9f10-65bfe51c5f5e,bdb4a4b6-5ae0-4cea-9950-9890b44ecdc1,0.246292,2026-01-16T19:16:16.744794,2026-01-16T19:16:16.744794,beta_demo_model_sil,v2,"{""ln_vas_opted_flag"": ""0"", ""ln_doc_type_rolled...",Cash November 25 Models,3a2c34e1-2c3e-4980-bb9e-70c58bca633b,2026-01-16T19:16:16.744794,{},Trench 1,Android,Dev_Test,2025-02-09
2,3298891,d281ff9b-2197-4e82-b612-44e7b4676338,2553c833-0d9a-4fc7-b91a-ac5f10f19912,0.335671,2026-01-16T19:16:16.744794,2026-01-16T19:16:16.744794,beta_demo_model_sil,v2,"{""ln_vas_opted_flag"": ""0"", ""ln_doc_type_rolled...",Cash November 25 Models,df99297e-7dd4-40e5-9f20-efbc50f999e7,2026-01-16T19:16:16.744794,{},Trench 1,Android,Dev_Test,2025-03-02
3,3165295,3697a6c3-e3ac-4508-901e-0cc65cfa6e75,b7ba2500-900c-491d-a380-67bc40cc84fa,0.324512,2026-01-16T19:16:16.745791,2026-01-16T19:16:16.745791,beta_demo_model_sil,v2,"{""ln_vas_opted_flag"": ""0"", ""ln_doc_type_rolled...",Cash November 25 Models,9fe20da1-fb6a-4669-ae7a-e8b7a6cbf0ad,2026-01-16T19:16:16.745791,{},Trench 1,Android,Dev_Train,2025-01-04
4,2766763,aed75b9d-c004-42db-bada-e523929d76b8,62af1736-dc9f-48d4-95cb-1b6947dd4e66,0.231494,2026-01-16T19:16:16.745791,2026-01-16T19:16:16.745791,beta_demo_model_sil,v2,"{""ln_vas_opted_flag"": ""0"", ""ln_doc_type_rolled...",Cash November 25 Models,3d73f1c5-9703-4e87-98d6-01ae6e29743a,2026-01-16T19:16:16.745791,{},Trench 1,Android,Dev_Train,2024-08-18


In [176]:
dfd['customerId'] = pd.to_numeric(dfd['customerId'], errors='coerce')

In [177]:
dfd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 345905 entries, 0 to 345904
Data columns (total 17 columns):
 #   Column                Non-Null Count   Dtype  
---  ------                --------------   -----  
 0   customerId            345905 non-null  int64  
 1   digitalLoanAccountId  345905 non-null  object 
 2   crifApplicationId     345905 non-null  object 
 3   prediction            345905 non-null  float64
 4   start_time            345905 non-null  object 
 5   end_time              345905 non-null  object 
 6   modelDisplayName      345905 non-null  object 
 7   modelVersionId        345905 non-null  object 
 8   calcFeature           345905 non-null  object 
 9   subscription_name     345905 non-null  object 
 10  message_id            345905 non-null  object 
 11  publish_time          345905 non-null  object 
 12  attributes            345905 non-null  object 
 13  trenchCategory        345905 non-null  object 
 14  deviceOs              345905 non-null  object 
 15  

In [178]:
result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,185960,2025-02-01,2025-11-15
1,Dev_Train,159945,2024-08-01,2025-01-31


In [179]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=9e785917-cf0d-4c66-a17d-43f391dbcf8f>

##### Trench 2

In [180]:
sq = """select 
distinct 
loanmaster.customerId customer_id,
r.digitalLoanAccountId,
r.s_demo_score,
ln_vas_opted_flag,
ln_doc_type_rolled,
ln_industry_new_cat_bin,
ln_marital_status,
ln_age,
ln_education_level,
ln_cnt_dependents,
ln_ref2_type,
ln_loan_level_user_type,
ln_ref1_type,
ln_name_email_match_score,
ln_telconame,
ln_city_cat,
ln_brand_bin,
ln_apply_Is_Weekend,
r.ln_os_type osType,
date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) application_date,
case when date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime))
        between '2024-08-01' and '2025-01-31' then 'Dev_Train'
        when date(if(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) < '2024-08-01' then 'Pre_Train'
                else 'Dev_Test' end as Data_selection
from worktable_data_analysis.sil_beta_demo_all_applied_backscored_20240801_20251015 r
left join risk_credit_mis.loan_master_table loanmaster
  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
where s_demo_score is not null
and trench_category = 'Trench 2'
and date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) < '2025-11-17'
;
"""
data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")

Job ID 1c5d19f2-09b9-4f72-b418-ae85264dca5f successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (13192, 21)


In [181]:
data.columns

Index(['customer_id', 'digitalLoanAccountId', 's_demo_score',
       'ln_vas_opted_flag', 'ln_doc_type_rolled', 'ln_industry_new_cat_bin',
       'ln_marital_status', 'ln_age', 'ln_education_level',
       'ln_cnt_dependents', 'ln_ref2_type', 'ln_loan_level_user_type',
       'ln_ref1_type', 'ln_name_email_match_score', 'ln_telconame',
       'ln_city_cat', 'ln_brand_bin', 'ln_apply_Is_Weekend', 'osType',
       'application_date', 'Data_selection'],
      dtype='object')

In [182]:
feature_column = [ 'ln_vas_opted_flag', 'ln_doc_type_rolled', 'ln_industry_new_cat_bin',
       'ln_marital_status', 'ln_age', 'ln_education_level',
       'ln_cnt_dependents', 'ln_ref2_type', 'ln_loan_level_user_type',
       'ln_ref1_type', 'ln_name_email_match_score', 'ln_telconame',
       'ln_city_cat', 'ln_brand_bin', 'ln_apply_Is_Weekend', 'osType']

dfd = transform_datav2(data, feature_column, a='s_demo_score', modelDisplayName='beta_demo_model_sil', tc='Trench 2', subscription_name = 'Cash November 25 Models') 
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,3147455,7bd2018d-6fa9-4abb-ad87-2067dfcb0174,c791505d-80a4-4630-b765-2a114a9fc91a,0.510103,2026-01-16T19:17:39.466952,2026-01-16T19:17:39.466952,beta_demo_model_sil,v2,"{""ln_vas_opted_flag"": ""1"", ""ln_doc_type_rolled...",Cash November 25 Models,9e875040-07d0-4bb0-b479-c10255dd1b22,2026-01-16T19:17:39.466952,{},Trench 2,iOS,Dev_Test,2025-10-05
1,3224825,36b25813-702a-4326-9154-f067b19d49ea,9e92aa66-18f2-4991-ace6-388cd7a5e287,0.475738,2026-01-16T19:17:39.467483,2026-01-16T19:17:39.467483,beta_demo_model_sil,v2,"{""ln_vas_opted_flag"": ""1"", ""ln_doc_type_rolled...",Cash November 25 Models,59f5ecbe-5930-4fc3-9871-b82536b996e8,2026-01-16T19:17:39.467483,{},Trench 2,Android,Dev_Test,2025-07-13
2,3497052,6072249c-1e69-4e58-ad5f-779c96e2df7b,a1d77add-4874-40bc-9c84-59e4e132c006,0.735441,2026-01-16T19:17:39.467483,2026-01-16T19:17:39.467483,beta_demo_model_sil,v2,"{""ln_vas_opted_flag"": ""1"", ""ln_doc_type_rolled...",Cash November 25 Models,ad564f49-668f-475a-a547-caa4a15ed111,2026-01-16T19:17:39.467483,{},Trench 2,Android,Dev_Test,2025-10-02
3,3083946,22e306b7-2b9e-4d80-82b9-9a938f91ef6c,c9d7429f-8c4b-46dd-aada-912317f5bc83,0.517193,2026-01-16T19:17:39.468032,2026-01-16T19:17:39.468032,beta_demo_model_sil,v2,"{""ln_vas_opted_flag"": ""1"", ""ln_doc_type_rolled...",Cash November 25 Models,87098952-668a-46a3-8f4d-9bb82bb139f4,2026-01-16T19:17:39.468032,{},Trench 2,iOS,Dev_Test,2025-07-13
4,2487157,685b73f4-7689-4b47-861c-b346a776b311,3c71e4f2-85cc-4b0e-8de4-46d5d3fbcc3c,0.595762,2026-01-16T19:17:39.468032,2026-01-16T19:17:39.468032,beta_demo_model_sil,v2,"{""ln_vas_opted_flag"": ""1"", ""ln_doc_type_rolled...",Cash November 25 Models,ca9b8486-3382-474d-9383-1cefcf2d1543,2026-01-16T19:17:39.468032,{},Trench 2,Android,Dev_Train,2024-10-08


In [183]:
dfd['customerId'] = pd.to_numeric(dfd['customerId'], errors='coerce')

In [184]:
dfd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13192 entries, 0 to 13191
Data columns (total 17 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   customerId            13192 non-null  int64  
 1   digitalLoanAccountId  13192 non-null  object 
 2   crifApplicationId     13192 non-null  object 
 3   prediction            13192 non-null  float64
 4   start_time            13192 non-null  object 
 5   end_time              13192 non-null  object 
 6   modelDisplayName      13192 non-null  object 
 7   modelVersionId        13192 non-null  object 
 8   calcFeature           13192 non-null  object 
 9   subscription_name     13192 non-null  object 
 10  message_id            13192 non-null  object 
 11  publish_time          13192 non-null  object 
 12  attributes            13192 non-null  object 
 13  trenchCategory        13192 non-null  object 
 14  deviceOs              13192 non-null  object 
 15  Data_selection     

In [185]:
result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,8408,2025-02-01,2025-10-15
1,Dev_Train,4784,2024-08-01,2025-01-31


In [186]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=c9aa10c9-9b85-46dd-a3a4-a1ad928257e0>

##### Trench 3

In [187]:
sq = """select 
distinct 
loanmaster.customerId customer_id,
r.digitalLoanAccountId,
r.s_demo_score,
ln_vas_opted_flag,
ln_doc_type_rolled,
ln_industry_new_cat_bin,
ln_marital_status,
ln_age,
ln_education_level,
ln_cnt_dependents,
ln_ref2_type,
ln_loan_level_user_type,
ln_ref1_type,
ln_name_email_match_score,
ln_telconame,
ln_city_cat,
ln_brand_bin,
ln_apply_Is_Weekend,
r.ln_os_type osType,
date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) application_date,
case when date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime))
        between '2024-08-01' and '2025-01-31' then 'Dev_Train'
        when date(if(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) < '2024-08-01' then 'Pre_Train'
                else 'Dev_Test' end as Data_selection
from worktable_data_analysis.sil_beta_demo_all_applied_backscored_20240801_20251015 r
left join risk_credit_mis.loan_master_table loanmaster
  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
where s_demo_score is not null
and trench_category = 'Trench 3'
and date(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime)) < '2025-11-17'
 ;
"""
data = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the dataframe is:\t {data.shape}")

Job ID 0b0ca6f5-ac1d-444c-94e4-8b347f746b17 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the dataframe is:	 (13866, 21)


In [188]:
data.columns

Index(['customer_id', 'digitalLoanAccountId', 's_demo_score',
       'ln_vas_opted_flag', 'ln_doc_type_rolled', 'ln_industry_new_cat_bin',
       'ln_marital_status', 'ln_age', 'ln_education_level',
       'ln_cnt_dependents', 'ln_ref2_type', 'ln_loan_level_user_type',
       'ln_ref1_type', 'ln_name_email_match_score', 'ln_telconame',
       'ln_city_cat', 'ln_brand_bin', 'ln_apply_Is_Weekend', 'osType',
       'application_date', 'Data_selection'],
      dtype='object')

In [189]:
feature_column = [ 'ln_vas_opted_flag', 'ln_doc_type_rolled', 'ln_industry_new_cat_bin',
       'ln_marital_status', 'ln_age', 'ln_education_level',
       'ln_cnt_dependents', 'ln_ref2_type', 'ln_loan_level_user_type',
       'ln_ref1_type', 'ln_name_email_match_score', 'ln_telconame',
       'ln_city_cat', 'ln_brand_bin', 'ln_apply_Is_Weekend', 'osType']

dfd = transform_datav2(data, feature_column, a='s_demo_score', modelDisplayName='beta_demo_model_sil', tc='Trench 3', subscription_name = 'Cash November 25 Models') 
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,crifApplicationId,prediction,start_time,end_time,modelDisplayName,modelVersionId,calcFeature,subscription_name,message_id,publish_time,attributes,trenchCategory,deviceOs,Data_selection,Application_date
0,2603453,635ed803-deec-4b0b-8941-11b81674454b,795376b9-ff31-4b84-a498-e37c6f6393eb,0.135418,2026-01-16T19:17:48.287394,2026-01-16T19:17:48.287394,beta_demo_model_sil,v2,"{""ln_vas_opted_flag"": ""0"", ""ln_doc_type_rolled...",Cash November 25 Models,dec63272-e5e0-444c-9332-6eb75cbd28d9,2026-01-16T19:17:48.287394,{},Trench 3,Android,Dev_Train,2024-11-16
1,2990791,5b09c15b-1ef0-4e15-af2b-7111a8584964,78be01aa-efc7-40f9-9ac8-bcc5e5c81e48,0.100234,2026-01-16T19:17:48.287394,2026-01-16T19:17:48.287394,beta_demo_model_sil,v2,"{""ln_vas_opted_flag"": ""0"", ""ln_doc_type_rolled...",Cash November 25 Models,b8d13c79-b498-4b4c-aac3-9ae057c42d0f,2026-01-16T19:17:48.287394,{},Trench 3,Android,Dev_Test,2025-06-01
2,3334386,487a2bb8-03cc-4895-b561-4e81f519c19e,957678a7-18ab-4f59-9fa2-a27bb7c0d32e,0.206825,2026-01-16T19:17:48.287394,2026-01-16T19:17:48.287394,beta_demo_model_sil,v2,"{""ln_vas_opted_flag"": ""1"", ""ln_doc_type_rolled...",Cash November 25 Models,6f5b887a-8afc-4e48-a989-cf9f730dcfe4,2026-01-16T19:17:48.287394,{},Trench 3,Android,Dev_Test,2025-09-20
3,3131553,f3d34429-c090-4a35-8299-638a687b91d5,95792d62-1ef0-4077-96a9-e2c236130088,0.202157,2026-01-16T19:17:48.287394,2026-01-16T19:17:48.287394,beta_demo_model_sil,v2,"{""ln_vas_opted_flag"": ""1"", ""ln_doc_type_rolled...",Cash November 25 Models,c3629de9-c79a-4a93-8f9a-4b52e27ada21,2026-01-16T19:17:48.287394,{},Trench 3,Android,Dev_Test,2025-07-16
4,2496121,91d2b59b-6c4a-42c5-8208-f1f47e9a630e,189772d7-05ab-4570-9d4f-660658f0ce21,0.176284,2026-01-16T19:17:48.287394,2026-01-16T19:17:48.287394,beta_demo_model_sil,v2,"{""ln_vas_opted_flag"": ""0"", ""ln_doc_type_rolled...",Cash November 25 Models,675327e2-1e3a-4699-9243-4c85095a6c03,2026-01-16T19:17:48.287394,{},Trench 3,Android,Dev_Train,2024-09-27


In [190]:
dfd['customerId'] = pd.to_numeric(dfd['customerId'], errors='coerce')

In [191]:
dfd.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13866 entries, 0 to 13865
Data columns (total 17 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   customerId            13866 non-null  int64  
 1   digitalLoanAccountId  13866 non-null  object 
 2   crifApplicationId     13866 non-null  object 
 3   prediction            13866 non-null  float64
 4   start_time            13866 non-null  object 
 5   end_time              13866 non-null  object 
 6   modelDisplayName      13866 non-null  object 
 7   modelVersionId        13866 non-null  object 
 8   calcFeature           13866 non-null  object 
 9   subscription_name     13866 non-null  object 
 10  message_id            13866 non-null  object 
 11  publish_time          13866 non-null  object 
 12  attributes            13866 non-null  object 
 13  trenchCategory        13866 non-null  object 
 14  deviceOs              13866 non-null  object 
 15  Data_selection     

In [192]:

result = dfd.groupby('Data_selection').agg(
    digitalLoanAccountId_count=('digitalLoanAccountId', 'count'),
    Application_date_min=('Application_date', 'min'),
    Application_date_max=('Application_date', 'max')
).reset_index()

result

Unnamed: 0,Data_selection,digitalLoanAccountId_count,Application_date_min,Application_date_max
0,Dev_Test,8346,2025-02-01,2025-10-15
1,Dev_Train,5520,2024-08-01,2025-01-31


In [193]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details_20260116"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_APPEND",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(dfd, table_id, job_config=job_config)
job.result() 

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=025db971-6e40-479a-a13a-79e7081b675c>

# End