# Define Library

In [2570]:
# %% [markdown]
# # Jupyter Notebook Loading Header
#
# This is a custom loading header for Jupyter Notebooks in Visual Studio Code.
# It includes common imports and settings to get you started quickly.
# %% [markdown]
## Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.cloud import bigquery
from google.cloud import storage
import os
import tempfile
import time
from datetime import datetime
import uuid
import joblib
import uuid
from sklearn.metrics import roc_auc_score
from datetime import datetime, timedelta
import gcsfs
import duckdb as dd
import pickle
import joblib
from typing import Union
import io
path = r'C:\Users\Dwaipayan\AppData\Roaming\gcloud\legacy_credentials\dchakroborti@tonikbank.com\adc.json'
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = path
client = bigquery.Client(project='prj-prod-dataplatform')
os.environ["GOOGLE_CLOUD_PROJECT"] = "prj-prod-dataplatform"

# %% [markdown]
## Configure Settings
# Set options or configurations as needed
pd.set_option('display.max_columns', None)
pd.set_option("Display.max_rows", 100)

# Function

## calculate_gini_for_threedigitscore

In [2571]:
# def calculate_gini_for_threedigitscore(scores, labels):
#     """
#     Calculate Gini coefficient for three-digit scores and binary labels
    
#     Parameters:
#     scores: array-like, three-digit scores (higher is better)
#     labels: array-like, binary values (0 or 1, where 1 indicates default)
    
#     Returns:
#     float: Gini coefficient
#     """
#     # Combine scores and labels into a DataFrame
#     df = pd.DataFrame({'score': scores, 'label': labels})
    
#     # Sort by score in descending order (assuming higher score is better)
#     df = df.sort_values('score', ascending=False)
    
#     # Calculate cumulative values
#     total_pos = df['label'].sum()
#     total_neg = len(df) - total_pos
    
#     if total_pos == 0 or total_neg == 0:
#         return 0
    
#     # Calculate cumulative proportions
#     cum_pos = df['label'].cumsum()
#     cum_neg = np.arange(1, len(df) + 1) - cum_pos
    
#     # Convert to proportions
#     cum_pos_prop = cum_pos / total_pos
#     cum_neg_prop = cum_neg / total_neg
    
#     # Calculate Gini
#     gini = 1 - np.trapz(cum_pos_prop, cum_neg_prop)
    
#     return gini


## Modified one

def calculate_gini_for_threedigitscore(scores, labels):
    """
    Calculate Gini coefficient for three-digit scores and binary labels
    
    Parameters:
    scores: array-like, three-digit scores (higher is better)
    labels: array-like, binary values (0 or 1, where 1 indicates default)
    
    Returns:
    float: Gini coefficient
    """
    # Combine scores and labels into a DataFrame
    df = pd.DataFrame({'score': scores, 'label': labels})
    
    # Sort by score in descending order (assuming higher score means lower risk)
    # For default prediction, we want to sort scores in ascending order 
    # since higher default probability should correspond to higher risk
    df = df.sort_values('score', ascending=True)  # Changed to ascending=True
    
    # Calculate cumulative values
    total_pos = df['label'].sum()
    total_neg = len(df) - total_pos
    
    if total_pos == 0 or total_neg == 0:
        return 0
    
    # Calculate cumulative proportions
    cum_pos = df['label'].cumsum()
    cum_neg = np.arange(1, len(df) + 1) - cum_pos
    
    # Convert to proportions
    cum_pos_prop = cum_pos / total_pos
    cum_neg_prop = cum_neg / total_neg
    
    # Calculate area under curve
    auc = np.trapz(cum_pos_prop, cum_neg_prop)
    
    # Calculate Gini
    gini = 2 * auc - 1
    
    return gini

## calculate_gini

In [2572]:
def calculate_gini(pd_scores, bad_indicators):
    """
    Calculate Gini coefficient from scores and binary indicators
    
    Parameters:
    pd_scores: array-like of scores/probabilities
    bad_indicators: array-like of binary outcomes (0/1)
    
    Returns:
    float: Gini coefficient
    """
    # Convert inputs to numpy arrays and ensure they're numeric
    pd_scores = np.array(pd_scores, dtype=float)
    bad_indicators = np.array(bad_indicators, dtype=int)
    
    # Check for valid input data
    if len(pd_scores) == 0 or len(bad_indicators) == 0:
        return np.nan
    
    # Check if we have both good and bad cases (needed for ROC AUC)
    if len(np.unique(bad_indicators)) < 2:
        return np.nan
    
    # Calculate AUC using sklearn
    try:
        auc = roc_auc_score(bad_indicators, pd_scores)
        # Calculate Gini from AUC
        gini = 2 * auc - 1
        return gini
    except ValueError:
        return np.nan

## calculate_hybrid_gini

In [2573]:
# def calculate_hybrid_gini(scores, labels):
#     """
#     Calculate Gini coefficient handling both PD values and three-digit scores
    
#     Parameters:
#     scores: array-like, contains either PD values (0-1) or three-digit scores
#     labels: array-like, binary values (0 or 1, where 1 indicates default)
    
#     Returns:
#     float: Gini coefficient
#     """
#     # Convert inputs to numpy arrays
#     scores = np.array(scores, dtype=float)
#     labels = np.array(labels, dtype=int)
    
#     # Basic validation
#     if len(scores) == 0 or len(labels) == 0:
#         return np.nan
    
#     if len(np.unique(labels)) < 2:
#         return np.nan
        
#     # Determine if scores are PD values or three-digit scores
#     # PD values are between 0 and 1
#     is_pd = np.all((scores >= 0) & (scores <= 1))
    
#     if is_pd:
#         try:
#             auc = roc_auc_score(labels, scores)
#             gini = 2 * auc - 1
#             return gini
#         except ValueError:
#             return np.nan
#     else:
#         # Handle as three-digit score
#         df = pd.DataFrame({'score': scores, 'label': labels})
#         df = df.sort_values('score', ascending=False)
        
#         total_pos = df['label'].sum()
#         total_neg = len(df) - total_pos
        
#         if total_pos == 0 or total_neg == 0:
#             return np.nan
        
#         cum_pos = df['label'].cumsum()
#         cum_neg = np.arange(1, len(df) + 1) - cum_pos
        
#         cum_pos_prop = cum_pos / total_pos
#         cum_neg_prop = cum_neg / total_neg
        
#         gini = 1 - np.trapz(cum_pos_prop, cum_neg_prop)
#         return gini

## Modified one

def calculate_hybrid_gini(scores, labels):
    """
    Calculate Gini coefficient handling both PD values and three-digit scores
    
    Parameters:
    scores: array-like, contains either PD values (0-1) or three-digit scores
    labels: array-like, binary values (0 or 1, where 1 indicates default)
    
    Returns:
    float: Gini coefficient
    """
    # Convert inputs to numpy arrays
    scores = np.array(scores, dtype=float)
    labels = np.array(labels, dtype=int)
    
    # Basic validation
    if len(scores) == 0 or len(labels) == 0:
        return np.nan
    
    if len(np.unique(labels)) < 2:
        return np.nan
        
    # Determine if scores are PD values or three-digit scores
    # PD values are between 0 and 1
    is_pd = np.all((scores >= 0) & (scores <= 1))
    
    if is_pd:
        try:
            auc = roc_auc_score(labels, scores)
            gini = 2 * auc - 1
            return gini
        except ValueError:
            return np.nan
    else:
        # Handle as three-digit score
        df = pd.DataFrame({'score': scores, 'label': labels})
        # Sort by score in ascending order since higher score means higher risk
        df = df.sort_values('score', ascending=True)
        
        total_pos = df['label'].sum()
        total_neg = len(df) - total_pos
        
        if total_pos == 0 or total_neg == 0:
            return np.nan
        
        cum_pos = df['label'].cumsum()
        cum_neg = np.arange(1, len(df) + 1) - cum_pos
        
        cum_pos_prop = cum_pos / total_pos
        cum_neg_prop = cum_neg / total_neg
        
        # Calculate area under curve
        auc = np.trapz(cum_pos_prop, cum_neg_prop)
        
        # Calculate Gini using the same formula as PD values
        gini = 2 * auc - 1
        return gini

## calculate_periodic_gini_threedigit

In [2574]:
# Main processing code
def calculate_periodic_gini_threedigit(df, score_column, label_column, namecolumn):
    """
    Calculate periodic Gini coefficients
    
    Parameters:
    df: DataFrame with disbursement dates and score/label columns
    score_column: name of the score column
    label_column: name of the label column
    """
    # Ensure date is datetime type
    df['disbursementdate'] = pd.to_datetime(df['disbursementdate'])
    
    # Calculate weekly Gini
    df['week'] = df['disbursementdate'].dt.to_period('W')
    weekly_gini = df.groupby('week').apply(
        lambda x: calculate_gini_for_threedigitscore(x[score_column], x[label_column])
    ).reset_index(name='gini')
    weekly_gini['period'] = 'Week'
    weekly_gini['start_date'] = weekly_gini['week'].apply(lambda x: x.to_timestamp())
    weekly_gini['end_date'] = weekly_gini['start_date'] + timedelta(days=6)
    weekly_gini = weekly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Calculate monthly Gini
    df['month'] = df['disbursementdate'].dt.to_period('M')
    monthly_gini = df.groupby('month').apply(
        lambda x: calculate_gini_for_threedigitscore(x[score_column], x[label_column])
    ).reset_index(name='gini')
    monthly_gini['period'] = 'Month'
    monthly_gini['start_date'] = monthly_gini['month'].apply(lambda x: x.to_timestamp())
    monthly_gini['end_date'] = monthly_gini['start_date'] + pd.DateOffset(months=1) - pd.Timedelta(days=1)
    monthly_gini = monthly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Combine and sort results
    gini_results = pd.concat([weekly_gini, monthly_gini])
    gini_results = gini_results.sort_values(by='start_date').reset_index(drop=True)
    
    # Add metadata columns
    gini_results['Model_Name'] = score_column
    gini_results['version'] = '1.1.0'
    gini_results['bad_rate'] = namecolumn
    gini_results.rename(columns={'gini': f'{score_column}_{namecolumn}_gini'}, inplace=True)
    
    return gini_results

## calculate_periodic_gini

In [2575]:
def calculate_periodic_gini(df, score_column, label_column, namecolumn):
    """
    Calculate periodic Gini coefficients
    
    Parameters:
    df: DataFrame with disbursement dates and score/label columns
    score_column: name of the score column
    label_column: name of the label column
    """
    # Input validation
    required_columns = ['disbursementdate', score_column, label_column]
    if not all(col in df.columns for col in required_columns):
        raise ValueError(f"Missing required columns. Need: {required_columns}")
        
    # Create a copy to avoid modifying original dataframe
    df = df.copy()
    
    # Ensure date is datetime type
    df['disbursementdate'] = pd.to_datetime(df['disbursementdate'])
    
    # Ensure score and label columns are numeric
    df[score_column] = pd.to_numeric(df[score_column], errors='coerce')
    df[label_column] = pd.to_numeric(df[label_column], errors='coerce')
    
    # Drop rows with invalid values
    df = df.dropna(subset=[score_column, label_column])
    
    # Calculate weekly Gini
    df['week'] = df['disbursementdate'].dt.to_period('W')
    weekly_gini = df.groupby('week').apply(
        lambda x: calculate_gini(x[score_column], x[label_column])
        if len(x) >= 10 else np.nan  # Only calculate if we have enough samples
    ).reset_index(name='gini')
    weekly_gini['period'] = 'Week'
    weekly_gini['start_date'] = weekly_gini['week'].apply(lambda x: x.to_timestamp())
    weekly_gini['end_date'] = weekly_gini['start_date'] + timedelta(days=6)
    weekly_gini = weekly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Calculate monthly Gini
    df['month'] = df['disbursementdate'].dt.to_period('M')
    monthly_gini = df.groupby('month').apply(
        lambda x: calculate_gini(x[score_column], x[label_column])
        if len(x) >= 20 else np.nan  # Only calculate if we have enough samples
    ).reset_index(name='gini')
    monthly_gini['period'] = 'Month'
    monthly_gini['start_date'] = monthly_gini['month'].apply(lambda x: x.to_timestamp())
    monthly_gini['end_date'] = monthly_gini['start_date'] + pd.DateOffset(months=1) - pd.Timedelta(days=1)
    monthly_gini = monthly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Combine and sort results
    gini_results = pd.concat([weekly_gini, monthly_gini])
    gini_results = gini_results.sort_values(by='start_date').reset_index(drop=True)
    
    # Add metadata columns
    gini_results['Model_Name'] = score_column
    gini_results['version'] = '1.1.0'
    gini_results['bad_rate'] = namecolumn
    gini_results.rename(columns={'gini': f'{score_column}_{namecolumn}_gini'}, inplace=True)
    
    return gini_results

## calculate_periodic_hybrid_gini

In [2576]:
def calculate_periodic_hybrid_gini(df, score_column, label_column, namecolumn):
    """
    Calculate periodic Gini coefficients for mixed score types
    
    Parameters:
    df: DataFrame with disbursement dates and score/label columns
    score_column: name of the score column
    label_column: name of the label column
    """
    # Input validation
    required_columns = ['disbursementdate', score_column, label_column]
    if not all(col in df.columns for col in required_columns):
        raise ValueError(f"Missing required columns. Need: {required_columns}")
        
    # Create a copy to avoid modifying original dataframe
    df = df.copy()
    
    # Ensure date is datetime type
    df['disbursementdate'] = pd.to_datetime(df['disbursementdate'])
    
    # Ensure score and label columns are numeric
    df[score_column] = pd.to_numeric(df[score_column], errors='coerce')
    df[label_column] = pd.to_numeric(df[label_column], errors='coerce')
    
    # Drop rows with invalid values
    df = df.dropna(subset=[score_column, label_column])
    
    # Calculate weekly Gini
    df['week'] = df['disbursementdate'].dt.to_period('W')
    weekly_gini = df.groupby('week').apply(
        lambda x: calculate_hybrid_gini(x[score_column], x[label_column])
        if len(x) >= 10 else np.nan  # Only calculate if we have enough samples
    ).reset_index(name='gini')
    weekly_gini['period'] = 'Week'
    weekly_gini['start_date'] = weekly_gini['week'].apply(lambda x: x.to_timestamp())
    weekly_gini['end_date'] = weekly_gini['start_date'] + pd.Timedelta(days=6)
    weekly_gini = weekly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Calculate monthly Gini
    df['month'] = df['disbursementdate'].dt.to_period('M')
    monthly_gini = df.groupby('month').apply(
        lambda x: calculate_hybrid_gini(x[score_column], x[label_column])
        if len(x) >= 20 else np.nan  # Only calculate if we have enough samples
    ).reset_index(name='gini')
    monthly_gini['period'] = 'Month'
    monthly_gini['start_date'] = monthly_gini['month'].apply(lambda x: x.to_timestamp())
    monthly_gini['end_date'] = monthly_gini['start_date'] + pd.DateOffset(months=1) - pd.Timedelta(days=1)
    monthly_gini = monthly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Combine and sort results
    gini_results = pd.concat([weekly_gini, monthly_gini])
    gini_results = gini_results.sort_values(by='start_date').reset_index(drop=True)
    
    # Add metadata columns
    gini_results['Model_Name'] = score_column
    gini_results['version'] = '1.1.0'
    gini_results['bad_rate'] = namecolumn
    gini_results.rename(columns={'gini': f'{score_column}_{namecolumn}_gini'}, inplace=True)
    
    return gini_results

## calculate_periodic_gini_debug

In [2577]:
def calculate_periodic_gini_debug(df, score_column, label_column, namecolumn):
    """
    Calculate periodic Gini coefficients with detailed debugging
    """
    # Input validation
    required_columns = ['disbursementdate', score_column, label_column]
    if not all(col in df.columns for col in required_columns):
        raise ValueError(f"Missing required columns. Need: {required_columns}")
        
    # Create a copy to avoid modifying original dataframe
    df = df.copy()
    
    # Ensure date is datetime type
    print(f"Original disbursementdate dtype: {df['disbursementdate'].dtype}")
    df['disbursementdate'] = pd.to_datetime(df['disbursementdate'])
    print(f"Converted disbursementdate dtype: {df['disbursementdate'].dtype}")
    
    # Ensure score and label columns are numeric
    print(f"\nBefore conversion:")
    print(f"  {score_column} dtype: {df[score_column].dtype}, non-null: {df[score_column].notna().sum()}")
    print(f"  {label_column} dtype: {df[label_column].dtype}, non-null: {df[label_column].notna().sum()}")
    
    df[score_column] = pd.to_numeric(df[score_column], errors='coerce')
    df[label_column] = pd.to_numeric(df[label_column], errors='coerce')
    
    print(f"\nAfter conversion:")
    print(f"  {score_column} - non-null: {df[score_column].notna().sum()}, NaN: {df[score_column].isna().sum()}")
    print(f"  {label_column} - non-null: {df[label_column].notna().sum()}, NaN: {df[label_column].isna().sum()}")
    print(f"  {score_column} sample values: {df[score_column].dropna().head()}")
    print(f"  {label_column} unique values: {df[label_column].dropna().unique()}")
    
    # Drop rows with invalid values
    initial_rows = len(df)
    df = df.dropna(subset=[score_column, label_column])
    print(f"\nRows dropped: {initial_rows - len(df)}")
    print(f"Remaining rows: {len(df)}")
    
    # Calculate weekly Gini
    df['week'] = df['disbursementdate'].dt.to_period('W')
    weekly_groups = df.groupby('week').size()
    print(f"\nWeekly groups: {len(weekly_groups)} weeks")
    print(f"Weekly group sizes:\n{weekly_groups.describe()}\n{weekly_groups.value_counts().head()}")
    print(f"Weeks with < 10 samples: {(weekly_groups < 10).sum()}")
    
    weekly_results = []
    for week, group_df in df.groupby('week'):
        if len(group_df) >= 10:
            gini_val = calculate_gini(group_df[score_column], group_df[label_column])
            print(f"  Week {week}: n={len(group_df)}, gini={gini_val}")
            weekly_results.append({'week': week, 'gini': gini_val, 'count': len(group_df)})
        else:
            print(f"  Week {week}: n={len(group_df)} - SKIPPED (< 10 samples)")
    
    weekly_gini = pd.DataFrame(weekly_results) if weekly_results else pd.DataFrame()
    
    if len(weekly_gini) > 0:
        weekly_gini['period'] = 'Week'
        weekly_gini['start_date'] = weekly_gini['week'].apply(lambda x: x.to_timestamp())
        weekly_gini['end_date'] = weekly_gini['start_date'] + timedelta(days=6)
        weekly_gini = weekly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Calculate monthly Gini
    df['month'] = df['disbursementdate'].dt.to_period('M')
    monthly_groups = df.groupby('month').size()
    print(f"\nMonthly groups: {len(monthly_groups)} months")
    print(f"Monthly group sizes:\n{monthly_groups.describe()}\n{monthly_groups.value_counts().head()}")
    print(f"Months with < 20 samples: {(monthly_groups < 20).sum()}")
    
    monthly_results = []
    for month, group_df in df.groupby('month'):
        if len(group_df) >= 20:
            gini_val = calculate_gini(group_df[score_column], group_df[label_column])
            print(f"  Month {month}: n={len(group_df)}, gini={gini_val}")
            monthly_results.append({'month': month, 'gini': gini_val, 'count': len(group_df)})
        else:
            print(f"  Month {month}: n={len(group_df)} - SKIPPED (< 20 samples)")
    
    monthly_gini = pd.DataFrame(monthly_results) if monthly_results else pd.DataFrame()
    
    if len(monthly_gini) > 0:
        monthly_gini['period'] = 'Month'
        monthly_gini['start_date'] = monthly_gini['month'].apply(lambda x: x.to_timestamp())
        monthly_gini['end_date'] = monthly_gini['start_date'] + pd.DateOffset(months=1) - pd.Timedelta(days=1)
        monthly_gini = monthly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Combine results
    if len(weekly_gini) > 0 and len(monthly_gini) > 0:
        gini_results = pd.concat([weekly_gini, monthly_gini], ignore_index=True)
    elif len(weekly_gini) > 0:
        gini_results = weekly_gini.copy()
    elif len(monthly_gini) > 0:
        gini_results = monthly_gini.copy()
    else:
        print("\n⚠️  WARNING: No valid Gini calculations produced!")
        return pd.DataFrame()
    
    gini_results = gini_results.sort_values(by='start_date').reset_index(drop=True)
    
    # Add metadata
    gini_results['Model_Name'] = score_column
    gini_results['version'] = '1.1.0'
    gini_results['bad_rate'] = namecolumn
    gini_results.rename(columns={'gini': f'{score_column}_{namecolumn}_gini'}, inplace=True)
    
    print(f"\n✓ Final results: {len(gini_results)} rows")
    return gini_results

## calculate_periodic_gini_producttype

In [None]:
def calculate_periodic_gini_producttype(df, score_column, label_column, namecolumn, product_column=None):
    """
    Calculate periodic Gini coefficients overall and by product type
    
    Parameters:
    df: DataFrame with disbursement dates and score/label columns
    score_column: name of the score column
    label_column: name of the label column
    namecolumn: name for the bad rate label
    product_column: (optional) name of product type column to segment by
    """
    # Input validation
    required_columns = ['disbursementdate', score_column, label_column]
    if not all(col in df.columns for col in required_columns):
        raise ValueError(f"Missing required columns. Need: {required_columns}")
    
    if product_column and product_column not in df.columns:
        raise ValueError(f"Product column '{product_column}' not found in dataframe")
    
    # Create a copy to avoid modifying original dataframe
    df = df.copy()
    
    # Ensure date is datetime type
    df['disbursementdate'] = pd.to_datetime(df['disbursementdate'])
    
    # Ensure score and label columns are numeric
    df[score_column] = pd.to_numeric(df[score_column], errors='coerce')
    df[label_column] = pd.to_numeric(df[label_column], errors='coerce')
    
    # Drop rows with invalid values
    df = df.dropna(subset=[score_column, label_column])
    
    # Define list of datasets to process: overall + each product type
    datasets_to_process = [('Overall', df)]
    
    if product_column:
        for product_type in df[product_column].unique():
            if pd.notna(product_type):
                product_df = df[df[product_column] == product_type]
                datasets_to_process.append((str(product_type), product_df))
    
    all_results = []
    
    # Process each dataset
    for dataset_name, dataset_df in datasets_to_process:
        # Calculate weekly Gini
        dataset_df_copy = dataset_df.copy()
        dataset_df_copy['week'] = dataset_df_copy['disbursementdate'].dt.to_period('W')
        weekly_gini = dataset_df_copy.groupby('week').apply(
            lambda x: calculate_gini(x[score_column], x[label_column])
            if len(x) >= 10 else np.nan
        ).reset_index(name='gini')
        weekly_gini['period'] = 'Week'
        weekly_gini['start_date'] = weekly_gini['week'].apply(lambda x: x.to_timestamp())
        weekly_gini['end_date'] = weekly_gini['start_date'] + timedelta(days=6)
        weekly_gini = weekly_gini[['start_date', 'end_date', 'gini', 'period']]
        
        # Calculate monthly Gini
        dataset_df_copy = dataset_df.copy()
        dataset_df_copy['month'] = dataset_df_copy['disbursementdate'].dt.to_period('M')
        monthly_gini = dataset_df_copy.groupby('month').apply(
            lambda x: calculate_gini(x[score_column], x[label_column])
            if len(x) >= 20 else np.nan
        ).reset_index(name='gini')
        monthly_gini['period'] = 'Month'
        monthly_gini['start_date'] = monthly_gini['month'].apply(lambda x: x.to_timestamp())
        monthly_gini['end_date'] = monthly_gini['start_date'] + pd.DateOffset(months=1) - pd.Timedelta(days=1)
        monthly_gini = monthly_gini[['start_date', 'end_date', 'gini', 'period']]
        
        # Combine results for this dataset
        gini_results = pd.concat([weekly_gini, monthly_gini], ignore_index=True)
        gini_results = gini_results.sort_values(by='start_date').reset_index(drop=True)
        
        # Add metadata columns
        gini_results['Model_Name'] = score_column
        gini_results['version'] = '1.1.0'
        gini_results['bad_rate'] = namecolumn
        gini_results['product_type'] = dataset_name
        gini_results.rename(columns={'gini': f'{score_column}_{namecolumn}_gini'}, inplace=True)
        
        all_results.append(gini_results)
    
    # Combine all results
    final_results = pd.concat(all_results, ignore_index=True)
    
    return final_results


# Usage:
# Calculate overall + by product type
gini_results = calculate_periodic_gini(
    df_concat, 
    'Alpha_cic_sil_score', 
    'deffpd0', 
    'FPD0',
    product_column='new_loan_type'
)

# Filter results by product type if needed:
# overall_gini = gini_results[gini_results['product_type'] == 'Overall']
# sil_instore_gini = gini_results[gini_results['product_type'] == 'sil_instore']
# sil_zero_gini = gini_results[gini_results['product_type'] == 'sil_zero']

# SIL

# Alpha - CIC-SIL-Model

## FPD0

## Test

In [2578]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.audit_balance.ml_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  deffpd0,
  flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fpd0 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,2353324,a638d06b-1820-4071-998f-031169de6960,60823533240026,0.0876491921576961,2025-07-26 10:26:25,2025-07-26,2025-07,Test,1,1
1,3578708,4a21632d-2434-4d58-bd1e-b246d9f937c0,60835787080019,0.0880895386745904,2025-07-26 08:27:28,2025-07-26,2025-07,Test,0,1
2,3578714,8a2f49a6-30bb-40e3-a9d1-358b30035125,60835787140017,0.0917472866916413,2025-07-26 09:21:38,2025-07-26,2025-07,Test,0,1
3,3578737,4d1e6a04-90d0-4c3e-ac1a-60e1b70e34de,60835787370014,0.0442445577833325,2025-07-26 08:57:23,2025-07-26,2025-07,Test,0,1
4,3578779,079e1b66-3d2c-48a1-a73a-b877111d3e7b,60835787790014,0.1152634177788912,2025-07-26 09:29:34,2025-07-26,2025-07,Test,0,1


In [2579]:
df1 = dfd.copy()

## Train

In [2580]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    deffpd0,
  flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fpd0 = 1
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,2048463,e0d005fe-f325-49a6-80aa-2c541f6a48cd,60820484630015,0.096046,2023-05-17 12:31:15,2023-05-17,2023-05,Train,0,1
1,2005625,52227d60-7d71-4b7d-afa3-b7531b9da97a,60820056250013,0.155699,2023-04-20 16:11:51,2023-04-20,2023-04,Train,0,1
2,2110966,d2d971cf-2c20-4d2a-88cb-d93bf276a831,60821109660012,0.09394,2023-06-28 15:10:01,2023-06-28,2023-06,Train,0,1
3,2039652,842e9fd9-ca3d-40d0-b82e-aa0e4b63c4ad,60820396520017,0.153677,2023-05-11 16:49:08,2023-05-11,2023-05,Train,0,1
4,2102681,8e3d012a-852c-4330-9552-73bcbfedc26f,60821026810015,0.065193,2023-06-21 17:44:02,2023-06-24,2023-06,Train,0,1


In [2581]:
df2 = dfd.copy()

In [2582]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 177174 entries, 0 to 177173
Data columns (total 10 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             177174 non-null  object        
 1   digitalLoanAccountId   177174 non-null  object        
 2   loanAccountNumber      177174 non-null  object        
 3   Alpha_cic_sil_score    177174 non-null  object        
 4   appln_submit_datetime  177174 non-null  datetime64[us]
 5   disbursementdate       177174 non-null  dbdate        
 6   Application_month      177174 non-null  object        
 7   Data_selection         177174 non-null  object        
 8   deffpd0                177174 non-null  Int64         
 9   flg_mature_fpd0        177174 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(6)
memory usage: 13.9+ MB


In [2583]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['Alpha_cic_sil_score'] = pd.to_numeric(df_concat['Alpha_cic_sil_score'], errors='coerce')

In [2584]:
df_concat.to_csv(r"Alpha_cic_sil_score.csv")

In [2585]:
gini_results = calculate_periodic_gini(df_concat, 'Alpha_cic_sil_score', 'deffpd0', 'FPD0')

In [2586]:
f0 = gini_results.copy()

## FPD10

## Test

In [2587]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.audit_balance.ml_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fpd10 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,3408574,50eb1c6b-2597-43a0-a9c3-dc1749fa9d75,60834085740012,0.0607213549074042,2025-04-29 12:35:01,2025-04-29,2025-04,Test,0,1
1,3408290,254ab510-1ab7-490a-b32a-49fe846e1183,60834082900013,0.098974218005349,2025-04-29 10:07:40,2025-04-29,2025-04,Test,0,1
2,3408368,fdd65398-7a8a-420e-bb01-dd10773c3aa9,60834083680018,0.1239280241100046,2025-04-29 10:52:27,2025-04-29,2025-04,Test,0,1
3,3408388,3dd8b1e8-9aa1-4528-bd12-1355901bcb58,60834083880012,0.1984644076411129,2025-04-29 11:06:04,2025-04-29,2025-04,Test,0,1
4,3408660,b62030f2-ebde-41b8-a6bb-6677605f0e7d,60834086600019,0.1508793489205254,2025-04-29 13:19:52,2025-04-29,2025-04,Test,0,1


In [2588]:
df1 = dfd.copy()

## Train

In [2589]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffpd10,
  flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fpd0 = 1
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,2063866,b21045e8-1f35-413f-b000-058530986eb6,60820638660011,0.075149,2023-05-27 15:12:11,2023-05-27,2023-05,Train,0,1
1,2128581,ee471e6a-52bf-41b8-9338-64f3092aa4b0,60821285810016,0.089451,2023-07-10 12:58:41,2023-07-10,2023-07,Train,0,1
2,2067448,16c7b65f-ad13-4b4d-86de-d630dfcbf73d,60820674480012,0.066453,2023-05-29 18:17:39,2023-05-29,2023-05,Train,0,1
3,1905195,2af9de9b-2fd9-4fb1-a961-bcdd31c64065,60819051950019,0.090321,2023-02-17 19:07:38,2023-02-17,2023-02,Train,0,1
4,1917040,b8d79628-b025-4c6f-8a92-c2c3950e4285,60819170400014,0.092287,2023-02-25 16:54:12,2023-02-25,2023-02,Train,1,1


In [2590]:
df2 = dfd.copy()

In [2591]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 173949 entries, 0 to 173948
Data columns (total 10 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             173949 non-null  object        
 1   digitalLoanAccountId   173949 non-null  object        
 2   loanAccountNumber      173949 non-null  object        
 3   Alpha_cic_sil_score    173949 non-null  object        
 4   appln_submit_datetime  173949 non-null  datetime64[us]
 5   disbursementdate       173949 non-null  dbdate        
 6   Application_month      173949 non-null  object        
 7   Data_selection         173949 non-null  object        
 8   deffpd10               173949 non-null  Int64         
 9   flg_mature_fpd10       173949 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(6)
memory usage: 13.6+ MB


In [2592]:
df_concat['Alpha_cic_sil_score'] = pd.to_numeric(df_concat['Alpha_cic_sil_score'], errors='coerce')
gini_results = calculate_periodic_gini(df_concat, 'Alpha_cic_sil_score', 'deffpd10', 'FPD10')

In [2593]:
f1=gini_results.copy()

## FPD30

## Test

In [2594]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.audit_balance.ml_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fpd30 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30
0,2291594,bf7b6fbb-695c-48ea-a56e-a229edf82e61,60822915940014,0.0866302467297128,2025-08-26 16:29:08,2025-08-26,2025-08,Test,0,1
1,3431774,147a4bb6-e14b-4643-9c01-1fcef9bab326,60834317740015,0.1608379585444575,2025-08-26 15:39:08,2025-08-26,2025-08,Test,0,1
2,3634235,6d640f14-ffd2-43a3-b1e4-62ec3e82b5d9,60836342350017,0.0579105286074249,2025-08-26 16:43:52,2025-08-26,2025-08,Test,1,1
3,3644816,2976be11-1111-40ad-9cac-877b2d01abeb,60836448160017,0.0797563850419823,2025-08-26 15:38:29,2025-08-26,2025-08,Test,0,1
4,3644821,4bcaa555-d60b-46f6-9730-d6c5934292b1,60836448210014,0.1173719307619322,2025-08-26 15:38:54,2025-08-26,2025-08,Test,0,1


In [2595]:
df1 = dfd.copy()

## Train

In [2596]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName = 'Alpha - CIC-SIL-Model'),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffpd30,
  flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fpd30 = 1
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30
0,2147386,8ae91d78-b90e-44cf-8446-aa150e9ba327,60821473860011,0.158327,2023-07-20 17:59:02,2023-07-20,2023-07,Train,0,1
1,1971128,61fe6a8d-b2fe-4a4d-b29a-8304f1cc9b64,60819711280017,0.058531,2023-03-31 19:39:42,2023-04-01,2023-03,Train,0,1
2,1908771,abf37d88-5ece-4297-9d12-81954ab1c844,60819087710011,0.175721,2023-02-20 12:31:25,2023-02-20,2023-02,Train,0,1
3,1958097,534d661c-7e22-47c1-9a05-94feed085410,60819580970015,0.059622,2023-03-24 11:55:24,2023-03-24,2023-03,Train,0,1
4,1919457,a07bea81-2c45-432b-b6eb-d3ab756b1361,60819194570013,0.094258,2023-02-27 10:27:40,2023-02-27,2023-02,Train,0,1


In [2597]:
df2 = dfd.copy()

In [2598]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 167483 entries, 0 to 167482
Data columns (total 10 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             167483 non-null  object        
 1   digitalLoanAccountId   167483 non-null  object        
 2   loanAccountNumber      167483 non-null  object        
 3   Alpha_cic_sil_score    167483 non-null  object        
 4   appln_submit_datetime  167483 non-null  datetime64[us]
 5   disbursementdate       167483 non-null  dbdate        
 6   Application_month      167483 non-null  object        
 7   Data_selection         167483 non-null  object        
 8   deffpd30               167483 non-null  Int64         
 9   flg_mature_fpd30       167483 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(6)
memory usage: 13.1+ MB


In [2599]:
df_concat['Alpha_cic_sil_score'] = pd.to_numeric(df_concat['Alpha_cic_sil_score'], errors='coerce')
gini_results = calculate_periodic_gini(df_concat, 'Alpha_cic_sil_score', 'deffpd30', 'FPD30')

In [2600]:
f2=gini_results.copy()

## FSPD30

## Test

In [2601]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.audit_balance.ml_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fspd_30 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30
0,3636860,fa8495e7-3a83-43a9-ab31-876157bc0343,60836368600014,0.1019382396778828,2025-08-22 15:52:54,2025-08-22,2025-08,Test,0,1
1,3305921,2cfca166-b876-41ee-b341-2f00064b2b1f,60833059210013,0.1824453422160273,2025-08-22 16:49:35,2025-08-22,2025-08,Test,0,1
2,3630594,c5727901-5ae8-4878-9a9f-92f9686ad094,60836305940011,0.0684863849321493,2025-08-19 14:27:30,2025-08-22,2025-08,Test,0,1
3,3636785,b2ed62cd-ecec-4f8e-bb41-4869fc9a89a1,60836367850011,0.1477463758098642,2025-08-22 15:24:27,2025-08-22,2025-08,Test,0,1
4,3372802,0b78e779-840a-4722-a129-5ed5d28ccf95,60833728020011,0.1814924140831713,2025-08-22 12:42:03,2025-08-22,2025-08,Test,0,1


In [2602]:
df1 = dfd.copy()

## Train

In [2603]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffspd30,
  flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fspd_30 = 1
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30
0,2150460,8db684cb-37ca-40c6-b6c9-b8ab8d5af7ca,60821504600011,0.125483,2023-07-22 16:34:56,2023-07-22,2023-07,Train,1,1
1,2046673,33990289-05ad-4c50-bb6f-97d307b9cadc,60820466730015,0.153677,2023-05-16 10:55:02,2023-05-16,2023-05,Train,0,1
2,2136221,23f98da6-becf-44c7-934a-e4a70bfc5dc6,60821362210012,0.126872,2023-07-14 10:43:17,2023-07-14,2023-07,Train,1,1
3,2015905,cca2ad51-2b82-4da7-a223-132ae54bd364,60820159050013,0.053252,2023-04-27 12:13:10,2023-04-27,2023-04,Train,0,1
4,1977804,3fe28d61-53a1-4f6c-8c4e-a505b2322b1a,60819778040014,0.05151,2023-05-11 15:12:46,2023-05-11,2023-05,Train,0,1


In [2604]:
df2 = dfd.copy()

In [2605]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 158529 entries, 0 to 158528
Data columns (total 10 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             158529 non-null  object        
 1   digitalLoanAccountId   158529 non-null  object        
 2   loanAccountNumber      158529 non-null  object        
 3   Alpha_cic_sil_score    158529 non-null  object        
 4   appln_submit_datetime  158529 non-null  datetime64[us]
 5   disbursementdate       158529 non-null  dbdate        
 6   Application_month      158529 non-null  object        
 7   Data_selection         158529 non-null  object        
 8   deffspd30              158529 non-null  Int64         
 9   flg_mature_fspd_30     158529 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(6)
memory usage: 12.4+ MB


In [2606]:
df_concat['Alpha_cic_sil_score'] = pd.to_numeric(df_concat['Alpha_cic_sil_score'], errors='coerce')
gini_results = calculate_periodic_gini(df_concat, 'Alpha_cic_sil_score', 'deffspd30', 'FSPD30')

In [2607]:
f3=gini_results.copy()

## FSTPD30

## Test

In [2608]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.audit_balance.ml_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fstpd_30 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30
0,3443378,af0340b3-8d7f-4872-a2a3-a6298832b88e,60834433780011,0.1255901246223767,2025-05-18 11:16:32,2025-05-18,2025-05,Test,0,1
1,3451896,4faedd37-79df-49f5-a482-347ce4974207,60834518960014,0.1637498084239941,2025-05-22 17:21:32,2025-05-22,2025-05,Test,0,1
2,3469217,7b4f51e1-c169-439a-951a-834b0f581a25,60834692170018,0.0717961623176633,2025-05-31 12:13:24,2025-05-31,2025-05,Test,0,1
3,3408928,3de107a6-41e3-4ca3-809a-469507fc7e88,60834089280018,0.1026495046491097,2025-04-29 15:00:23,2025-04-29,2025-04,Test,0,1
4,3431235,cd6fa742-75c0-4403-bdbb-4864cab5f6fe,60834312350013,0.1246860644705703,2025-05-11 13:46:32,2025-05-11,2025-05,Test,0,1


In [2609]:
df1 = dfd.copy()

## Train

In [2610]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffstpd30,
  flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fstpd_30 = 1
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30
0,1899985,c2bb63a3-110f-4e98-9edd-128ae2dd1f08,60818999850016,0.088523,2023-03-04 18:58:20,2023-03-04,2023-03,Train,0,1
1,1987335,0acf5426-0384-4628-8873-834a8b752c60,60819873350011,0.089887,2023-04-10 15:43:34,2023-04-10,2023-04,Train,1,1
2,2051364,1d91dd58-1ff8-476b-8927-e2c5d514db06,60820513640011,0.125999,2023-05-19 10:43:25,2023-05-19,2023-05,Train,1,1
3,2114218,6e152f2b-6f13-4f60-9174-19ff922effef,60821142180019,0.073596,2023-06-30 19:37:18,2023-06-30,2023-06,Train,0,1
4,1958380,ebb446fa-7f0e-4a69-ba74-3a2a8fe10c66,60819583800015,0.10708,2023-03-24 14:58:14,2023-03-24,2023-03,Train,0,1


In [2611]:
df2 = dfd.copy()

In [2612]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 148228 entries, 0 to 148227
Data columns (total 10 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             148228 non-null  object        
 1   digitalLoanAccountId   148228 non-null  object        
 2   loanAccountNumber      148228 non-null  object        
 3   Alpha_cic_sil_score    148228 non-null  object        
 4   appln_submit_datetime  148228 non-null  datetime64[us]
 5   disbursementdate       148228 non-null  dbdate        
 6   Application_month      148228 non-null  object        
 7   Data_selection         148228 non-null  object        
 8   deffstpd30             148228 non-null  Int64         
 9   flg_mature_fstpd_30    148228 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(6)
memory usage: 11.6+ MB


In [2613]:
df_concat['Alpha_cic_sil_score'] = pd.to_numeric(df_concat['Alpha_cic_sil_score'], errors='coerce')
gini_results = calculate_periodic_gini(df_concat, 'Alpha_cic_sil_score', 'deffstpd30', 'FSTPD30')

In [2614]:
f4 = gini_results.copy()

## combining the dataframe

In [2615]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'Alpha_cic_sil_score_FPD0_gini',
       'period', 'Model_Name', 'version', 'bad_rate',
       'Alpha_cic_sil_score_FPD10_gini', 'Alpha_cic_sil_score_FPD30_gini',
       'Alpha_cic_sil_score_FSPD30_gini',
       'Alpha_cic_sil_score_FSTPD30_gini'], dtype=object)

In [2616]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','Alpha_cic_sil_score_FPD0_gini','Alpha_cic_sil_score_FPD10_gini', 'Alpha_cic_sil_score_FPD30_gini',  'Alpha_cic_sil_score_FSPD30_gini', 'Alpha_cic_sil_score_FSTPD30_gini']].copy()
final_df['Trench_category'] = 'All'
final_df['Model_display_name'] = 'cic_model_sil'
final_df['Product_type'] = 'SIL'
final_df.dtypes

start_date                          datetime64[ns]
end_date                            datetime64[ns]
period                                      object
Model_Name                                  object
version                                     object
bad_rate                                    object
Alpha_cic_sil_score_FPD0_gini              float64
Alpha_cic_sil_score_FPD10_gini             float64
Alpha_cic_sil_score_FPD30_gini             float64
Alpha_cic_sil_score_FSPD30_gini            float64
Alpha_cic_sil_score_FSTPD30_gini           float64
Trench_category                             object
Model_display_name                          object
Product_type                                object
dtype: object

In [2617]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,Alpha_cic_sil_score_FPD0_gini,Alpha_cic_sil_score_FPD10_gini,Alpha_cic_sil_score_FPD30_gini,Alpha_cic_sil_score_FSPD30_gini,Alpha_cic_sil_score_FSTPD30_gini,Trench_category,Model_display_name,Product_type
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,1.1.0,FPD0,0.205187,,,,,All,cic_model_sil,SIL
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,1.1.0,FPD0,0.226648,,,,,All,cic_model_sil,SIL
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,1.1.0,FPD0,0.375,,,,,All,cic_model_sil,SIL
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,1.1.0,FPD0,0.110811,,,,,All,cic_model_sil,SIL
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,1.1.0,FPD0,0.076412,,,,,All,cic_model_sil,SIL


In [2618]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.Alpha_cic_sil_score_gini_v4"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=7037aaa5-579a-45c3-b2ea-087e9a4b2c89>

# Alpha Sil Stack Model

## FPD0

## Test

In [2619]:
sq = """ 
with modelname as 
(
  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  deffpd0,
  flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and flg_mature_fpd0 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,3484482,2cbc6e1c-1f2d-42d4-a8ea-fe9cf8c10857,60834844820013,0.0222469884495415,2025-06-08 09:40:34,2025-06-08,2025-06,Test,0,1
1,3484555,31bd8c77-fed4-4f5b-8f0c-62364fa3cd27,60834845550013,0.0612996554543683,2025-06-08 10:14:46,2025-06-08,2025-06,Test,1,1
2,3484439,47448a16-dfcf-47e6-98b8-ee053ff7a52b,60834844390011,0.1618215703334135,2025-06-08 09:15:01,2025-06-08,2025-06,Test,0,1
3,3484470,4cb219c8-d978-4bcc-b699-e83f1faaaae0,60834844700019,0.1163728278670786,2025-06-08 09:29:56,2025-06-08,2025-06,Test,0,1
4,2727346,5f1a2a52-cb38-4b39-bb7e-32fc7a0a9873,60827273460027,0.0457478655466967,2025-06-08 09:33:44,2025-06-08,2025-06,Test,0,1


In [2620]:
df1 = dfd.copy()

## Train

In [2621]:
sq = """ 
  with modelname as 
  (  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    deffpd0,
  flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and flg_mature_fpd0 = 1
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,1916914,d537acfd-bde3-4721-a217-98a816664976,60819169140015,0.127306,2023-02-25 15:35:12,2023-02-25,2023-02,Train,0,1
1,1900517,8a89c380-3b87-4bf5-9bdb-cdf8ff261891,60819005170017,0.072265,2023-02-14 17:18:07,2023-02-14,2023-02,Train,0,1
2,2140186,2ce1b52e-7058-4644-9326-c9efe367d75b,60821401860017,0.030852,2023-07-16 17:06:39,2023-07-16,2023-07,Train,0,1
3,1964698,60f7d25e-c48a-4d86-8874-2705e727754f,60819646980013,0.052047,2023-03-28 11:43:41,2023-03-28,2023-03,Train,0,1
4,1917023,01d7ecaa-3bfb-426c-9d07-da0ca8f546aa,60819170230013,0.038395,2023-02-25 16:44:08,2023-02-25,2023-02,Train,0,1


In [2622]:
df2 = dfd.copy()

In [2623]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 177174 entries, 0 to 177173
Data columns (total 10 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             177174 non-null  object        
 1   digitalLoanAccountId   177174 non-null  object        
 2   loanAccountNumber      177174 non-null  object        
 3   Sil_Alpha_Stack_score  177174 non-null  object        
 4   appln_submit_datetime  177174 non-null  datetime64[us]
 5   disbursementdate       177174 non-null  dbdate        
 6   Application_month      177174 non-null  object        
 7   Data_selection         177174 non-null  object        
 8   deffpd0                177174 non-null  Int64         
 9   flg_mature_fpd0        177174 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(6)
memory usage: 13.9+ MB


In [2624]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['Sil_Alpha_Stack_score'] = pd.to_numeric(df_concat['Sil_Alpha_Stack_score'], errors='coerce')

In [2625]:
df_concat.to_csv(r"Sil_Alpha_Stack_score.csv")

In [2626]:
gini_results = calculate_periodic_gini(df_concat, 'Sil_Alpha_Stack_score', 'deffpd0', 'FPD0')

In [2627]:
f0 = gini_results.copy()

In [2628]:
f0.head(10)

Unnamed: 0,start_date,end_date,Sil_Alpha_Stack_score_FPD0_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.252502,Month,Sil_Alpha_Stack_score,1.1.0,FPD0
1,2023-01-09,2023-01-15,0.343407,Week,Sil_Alpha_Stack_score,1.1.0,FPD0
2,2023-01-16,2023-01-22,0.660714,Week,Sil_Alpha_Stack_score,1.1.0,FPD0
3,2023-01-23,2023-01-29,0.113514,Week,Sil_Alpha_Stack_score,1.1.0,FPD0
4,2023-01-30,2023-02-05,0.182724,Week,Sil_Alpha_Stack_score,1.1.0,FPD0
5,2023-02-01,2023-02-28,0.193609,Month,Sil_Alpha_Stack_score,1.1.0,FPD0
6,2023-02-06,2023-02-12,0.313609,Week,Sil_Alpha_Stack_score,1.1.0,FPD0
7,2023-02-13,2023-02-19,-0.0625,Week,Sil_Alpha_Stack_score,1.1.0,FPD0
8,2023-02-20,2023-02-26,0.272727,Week,Sil_Alpha_Stack_score,1.1.0,FPD0
9,2023-02-27,2023-03-05,0.522222,Week,Sil_Alpha_Stack_score,1.1.0,FPD0


## FPD10

## Test

In [2629]:
sq = """ 
with modelname as 
(
  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fpd10 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,2344472,22e8defc-7474-4787-91f2-2ab7e0b167a4,60823444720034,0.1076338498813921,2025-08-17 11:56:39,2025-08-17,2025-08,Test,0,1
1,2535729,482c9abd-59d1-4909-96bf-e780fd890252,60825357290023,0.1481407373720184,2025-08-17 11:24:07,2025-08-17,2025-08,Test,0,1
2,3450187,c19c4b1f-95c8-4274-8469-0350568b287e,60834501870026,0.2466475165534453,2025-08-17 13:33:55,2025-08-17,2025-08,Test,0,1
3,3580060,ab965735-87c6-4393-934c-d749dbc6c079,60835800600015,0.1102071298683012,2025-08-17 12:32:58,2025-08-17,2025-08,Test,0,1
4,3625796,9f2c95ac-506d-47dd-92b0-5b0a4fca9fc3,60836257960014,0.0327909594693338,2025-08-17 11:10:57,2025-08-17,2025-08,Test,0,1


In [2630]:
df1 = dfd.copy()

## Train

In [2631]:
sq = """ 
  with modelname as 
  (  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fpd10 = 1
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,2140051,df7fb1fd-74ab-4b67-8114-7dd9ec541068,60821400510013,0.023264,2023-07-16 15:52:10,2023-07-16,2023-07,Train,0,1
1,1778491,85ee1518-2ece-4529-8bd9-627050f1fbbe,60817784910019,0.110601,2023-05-28 18:03:22,2023-05-28,2023-05,Train,0,1
2,2117726,5a552855-edb9-4c6a-8c0e-7a6a1c93531b,60821177260016,0.162125,2023-07-03 15:26:21,2023-07-03,2023-07,Train,0,1
3,2007487,dfba99fe-81c5-4185-8f17-5cdfbd65ccd7,60820074870019,0.110336,2023-04-21 19:03:31,2023-04-21,2023-04,Train,1,1
4,2050734,31614f87-a041-4ff6-99db-2d01a47116d5,60820507340012,0.033421,2023-05-18 19:48:21,2023-05-18,2023-05,Train,0,1


In [2632]:
df2 = dfd.copy()

In [2633]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 173949 entries, 0 to 173948
Data columns (total 10 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             173949 non-null  object        
 1   digitalLoanAccountId   173949 non-null  object        
 2   loanAccountNumber      173949 non-null  object        
 3   Sil_Alpha_Stack_score  173949 non-null  object        
 4   appln_submit_datetime  173949 non-null  datetime64[us]
 5   disbursementdate       173949 non-null  dbdate        
 6   Application_month      173949 non-null  object        
 7   Data_selection         173949 non-null  object        
 8   deffpd10               173949 non-null  Int64         
 9   flg_mature_fpd10       173949 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(6)
memory usage: 13.6+ MB


In [2634]:
df_concat['Sil_Alpha_Stack_score'] = pd.to_numeric(df_concat['Sil_Alpha_Stack_score'], errors='coerce')
gini_results = calculate_periodic_gini(df_concat, 'Sil_Alpha_Stack_score', 'deffpd10', 'FPD10')

In [2635]:
f1=gini_results.copy()

In [2636]:
f1.head(10)

Unnamed: 0,start_date,end_date,Sil_Alpha_Stack_score_FPD10_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.263158,Month,Sil_Alpha_Stack_score,1.1.0,FPD10
1,2023-01-09,2023-01-15,0.275862,Week,Sil_Alpha_Stack_score,1.1.0,FPD10
2,2023-01-16,2023-01-22,0.789474,Week,Sil_Alpha_Stack_score,1.1.0,FPD10
3,2023-01-23,2023-01-29,0.237179,Week,Sil_Alpha_Stack_score,1.1.0,FPD10
4,2023-01-30,2023-02-05,0.181818,Week,Sil_Alpha_Stack_score,1.1.0,FPD10
5,2023-02-01,2023-02-28,0.133333,Month,Sil_Alpha_Stack_score,1.1.0,FPD10
6,2023-02-06,2023-02-12,0.419355,Week,Sil_Alpha_Stack_score,1.1.0,FPD10
7,2023-02-13,2023-02-19,-0.110512,Week,Sil_Alpha_Stack_score,1.1.0,FPD10
8,2023-02-20,2023-02-26,-0.192157,Week,Sil_Alpha_Stack_score,1.1.0,FPD10
9,2023-02-27,2023-03-05,0.522222,Week,Sil_Alpha_Stack_score,1.1.0,FPD10


## FPD30

## Test

In [2637]:
sq = """ 
with modelname as 
(
  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fpd30 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30
0,3629192,02f829fc-53a6-4db8-ba97-abf39fb68fcf,60836291920018,0.109901226944326,2025-08-18 17:14:18,2025-08-18,2025-08,Test,0,1
1,3629098,0936a926-1e63-4950-ac46-d89228151795,60836290980011,0.0453385525963183,2025-08-18 16:45:39,2025-08-18,2025-08,Test,0,1
2,3628944,09947ba4-b3e2-44d8-b057-c48908ce0b80,60836289440011,0.0935035715855221,2025-08-18 15:46:46,2025-08-18,2025-08,Test,0,1
3,3629088,12397c8f-c968-4b14-ba76-20a12cdede63,60836290880019,0.0151322028295627,2025-08-18 16:40:21,2025-08-18,2025-08,Test,0,1
4,3629158,31f21a10-2d99-4bbf-af3f-2978989a7ef1,60836291580016,0.0712496208272338,2025-08-18 16:59:38,2025-08-18,2025-08,Test,0,1


In [2638]:
df1 = dfd.copy()

## Train

In [2639]:
sq = """ 
  with modelname as 
  (  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fpd30 = 1
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30
0,2022992,ef9760ef-711c-444d-a63f-e87555300d56,60820229920011,0.026755,2023-05-01 15:20:29,2023-05-01,2023-05,Train,0,1
1,2147322,776541de-d0a7-42a1-ab3e-3549ff36e7f7,60821473220015,0.122249,2023-07-20 17:19:47,2023-07-20,2023-07,Train,0,1
2,2042658,1d6d919c-f5f7-43e0-b077-57bff89d8ae5,60820426580011,0.069868,2023-05-13 16:48:23,2023-05-13,2023-05,Train,0,1
3,2109729,48dbc0f9-a2be-43c7-b381-72c565731364,60821097290012,0.192001,2023-06-27 14:16:08,2023-06-27,2023-06,Train,0,1
4,2008944,c6e3f8ba-158c-4014-87c4-85c21d296e07,60820089440017,0.08137,2023-04-22 17:24:26,2023-04-22,2023-04,Train,1,1


In [2640]:
df2 = dfd.copy()

In [2641]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 167483 entries, 0 to 167482
Data columns (total 10 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             167483 non-null  object        
 1   digitalLoanAccountId   167483 non-null  object        
 2   loanAccountNumber      167483 non-null  object        
 3   Sil_Alpha_Stack_score  167483 non-null  object        
 4   appln_submit_datetime  167483 non-null  datetime64[us]
 5   disbursementdate       167483 non-null  dbdate        
 6   Application_month      167483 non-null  object        
 7   Data_selection         167483 non-null  object        
 8   deffpd30               167483 non-null  Int64         
 9   flg_mature_fpd30       167483 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(6)
memory usage: 13.1+ MB


In [2642]:
df_concat['Sil_Alpha_Stack_score'] = pd.to_numeric(df_concat['Sil_Alpha_Stack_score'], errors='coerce')
gini_results = calculate_periodic_gini(df_concat, 'Sil_Alpha_Stack_score', 'deffpd30', 'FPD30')

In [2643]:
f2=gini_results.copy()

In [2644]:
f2.head(10)

Unnamed: 0,start_date,end_date,Sil_Alpha_Stack_score_FPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.358431,Month,Sil_Alpha_Stack_score,1.1.0,FPD30
1,2023-01-09,2023-01-15,0.718033,Week,Sil_Alpha_Stack_score,1.1.0,FPD30
2,2023-01-16,2023-01-22,0.789474,Week,Sil_Alpha_Stack_score,1.1.0,FPD30
3,2023-01-23,2023-01-29,0.107143,Week,Sil_Alpha_Stack_score,1.1.0,FPD30
4,2023-01-30,2023-02-05,0.181818,Week,Sil_Alpha_Stack_score,1.1.0,FPD30
5,2023-02-01,2023-02-28,0.181795,Month,Sil_Alpha_Stack_score,1.1.0,FPD30
6,2023-02-06,2023-02-12,0.419355,Week,Sil_Alpha_Stack_score,1.1.0,FPD30
7,2023-02-13,2023-02-19,-0.110512,Week,Sil_Alpha_Stack_score,1.1.0,FPD30
8,2023-02-20,2023-02-26,0.009615,Week,Sil_Alpha_Stack_score,1.1.0,FPD30
9,2023-02-27,2023-03-05,0.87234,Week,Sil_Alpha_Stack_score,1.1.0,FPD30


## FSPD30

## Test

In [2645]:
sq = """ 
with modelname as 
(
  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fspd_30 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30
0,3636860,fa8495e7-3a83-43a9-ab31-876157bc0343,60836368600014,0.0559534628304777,2025-08-22 15:52:54,2025-08-22,2025-08,Test,0,1
1,3305921,2cfca166-b876-41ee-b341-2f00064b2b1f,60833059210013,0.2957411110042598,2025-08-22 16:49:35,2025-08-22,2025-08,Test,0,1
2,3630594,c5727901-5ae8-4878-9a9f-92f9686ad094,60836305940011,0.0254931673768062,2025-08-19 14:27:30,2025-08-22,2025-08,Test,0,1
3,3636785,b2ed62cd-ecec-4f8e-bb41-4869fc9a89a1,60836367850011,0.0794075997083141,2025-08-22 15:24:27,2025-08-22,2025-08,Test,0,1
4,3372802,0b78e779-840a-4722-a129-5ed5d28ccf95,60833728020011,0.2457341528455508,2025-08-22 12:42:03,2025-08-22,2025-08,Test,0,1


In [2646]:
df1 = dfd.copy()

## Train

In [2647]:
sq = """ 
  with modelname as 
  (  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fspd_30 = 1
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30
0,2041292,c2c51457-ddcc-4b12-a73f-2827c5dd3eab,60820412920016,0.258804,2023-05-12 17:41:42,2023-05-12,2023-05,Train,0,1
1,2107162,2b37c699-2455-440e-9e2f-a1ebefe04c17,60821071620017,0.113917,2023-06-25 12:10:17,2023-06-25,2023-06,Train,0,1
2,1899030,3e60c2d4-b979-42c9-8d39-8ac4ecc155e1,60818990300014,0.136407,2023-02-13 16:00:05,2023-02-13,2023-02,Train,0,1
3,1918068,2b5d5aab-d1c8-47ee-9ed1-656624304ad2,60819180680011,0.135868,2023-02-26 12:27:00,2023-02-26,2023-02,Train,0,1
4,2061921,0f1f0c4b-e188-4105-aa2a-e56e10b31711,60820619210012,0.085821,2023-05-26 12:54:30,2023-05-26,2023-05,Train,0,1


In [2648]:
df2 = dfd.copy()

In [2649]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 158529 entries, 0 to 158528
Data columns (total 10 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             158529 non-null  object        
 1   digitalLoanAccountId   158529 non-null  object        
 2   loanAccountNumber      158529 non-null  object        
 3   Sil_Alpha_Stack_score  158529 non-null  object        
 4   appln_submit_datetime  158529 non-null  datetime64[us]
 5   disbursementdate       158529 non-null  dbdate        
 6   Application_month      158529 non-null  object        
 7   Data_selection         158529 non-null  object        
 8   deffspd30              158529 non-null  Int64         
 9   flg_mature_fspd_30     158529 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(6)
memory usage: 12.4+ MB


In [2650]:
df_concat['Sil_Alpha_Stack_score'] = pd.to_numeric(df_concat['Sil_Alpha_Stack_score'], errors='coerce')
gini_results = calculate_periodic_gini(df_concat, 'Sil_Alpha_Stack_score', 'deffspd30', 'FSPD30')

In [2651]:
f3=gini_results.copy()

In [2652]:
f3.head(10)

Unnamed: 0,start_date,end_date,Sil_Alpha_Stack_score_FSPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.258788,Month,Sil_Alpha_Stack_score,1.1.0,FSPD30
1,2023-01-09,2023-01-15,0.418103,Week,Sil_Alpha_Stack_score,1.1.0,FSPD30
2,2023-01-16,2023-01-22,0.789474,Week,Sil_Alpha_Stack_score,1.1.0,FSPD30
3,2023-01-23,2023-01-29,0.032051,Week,Sil_Alpha_Stack_score,1.1.0,FSPD30
4,2023-01-30,2023-02-05,0.143631,Week,Sil_Alpha_Stack_score,1.1.0,FSPD30
5,2023-02-01,2023-02-28,0.117886,Month,Sil_Alpha_Stack_score,1.1.0,FSPD30
6,2023-02-06,2023-02-12,0.362963,Week,Sil_Alpha_Stack_score,1.1.0,FSPD30
7,2023-02-13,2023-02-19,-0.08061,Week,Sil_Alpha_Stack_score,1.1.0,FSPD30
8,2023-02-20,2023-02-26,-0.02,Week,Sil_Alpha_Stack_score,1.1.0,FSPD30
9,2023-02-27,2023-03-05,0.449612,Week,Sil_Alpha_Stack_score,1.1.0,FSPD30


## FSTPD30

## Test

In [2653]:
sq = """ 
with modelname as 
(
  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fstpd_30 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30
0,3443378,af0340b3-8d7f-4872-a2a3-a6298832b88e,60834433780011,0.0499349558672149,2025-05-18 11:16:32,2025-05-18,2025-05,Test,0,1
1,3451896,4faedd37-79df-49f5-a482-347ce4974207,60834518960014,0.126699436684828,2025-05-22 17:21:32,2025-05-22,2025-05,Test,0,1
2,3469217,7b4f51e1-c169-439a-951a-834b0f581a25,60834692170018,0.0325544470886858,2025-05-31 12:13:24,2025-05-31,2025-05,Test,0,1
3,3408928,3de107a6-41e3-4ca3-809a-469507fc7e88,60834089280018,0.0367540218498825,2025-04-29 15:00:23,2025-04-29,2025-04,Test,0,1
4,3431235,cd6fa742-75c0-4403-bdbb-4864cab5f6fe,60834312350013,0.0509591977578569,2025-05-11 13:46:32,2025-05-11,2025-05,Test,0,1


In [2654]:
df1 = dfd.copy()

## Train

In [2655]:
sq = """ 
  with modelname as 
  (  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fstpd_30 = 1
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30
0,1939745,d3aa0801-f0e0-4dab-a958-4fd0dfa3991b,60819397450018,0.054005,2023-03-12 11:28:57,2023-03-12,2023-03,Train,0,1
1,1862731,94abcb5a-0361-4acf-a534-7b9873b854a0,60818627310013,0.189225,2023-01-15 17:43:36,2023-01-15,2023-01,Train,1,1
2,1998212,15d7638f-53a8-4b13-a5cb-cea1c737a80b,60819982120011,0.02608,2023-04-16 15:20:51,2023-04-16,2023-04,Train,0,1
3,1994333,541836ac-3718-44de-a2a6-78f76072eea5,60819943330012,0.082002,2023-04-14 10:38:30,2023-04-14,2023-04,Train,0,1
4,1867749,db942dec-8a80-4ec9-83f7-2b029eeb9988,60818677490013,0.083662,2023-01-20 11:20:39,2023-01-20,2023-01,Train,0,1


In [2656]:
df2 = dfd.copy()

In [2657]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 148228 entries, 0 to 148227
Data columns (total 10 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             148228 non-null  object        
 1   digitalLoanAccountId   148228 non-null  object        
 2   loanAccountNumber      148228 non-null  object        
 3   Sil_Alpha_Stack_score  148228 non-null  object        
 4   appln_submit_datetime  148228 non-null  datetime64[us]
 5   disbursementdate       148228 non-null  dbdate        
 6   Application_month      148228 non-null  object        
 7   Data_selection         148228 non-null  object        
 8   deffstpd30             148228 non-null  Int64         
 9   flg_mature_fstpd_30    148228 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(6)
memory usage: 11.6+ MB


In [2658]:
df_concat['Sil_Alpha_Stack_score'] = pd.to_numeric(df_concat['Sil_Alpha_Stack_score'], errors='coerce')
gini_results = calculate_periodic_gini(df_concat, 'Sil_Alpha_Stack_score', 'deffstpd30', 'FSTPD30')

In [2659]:
f4=gini_results.copy()

In [2660]:
f4.head(10)

Unnamed: 0,start_date,end_date,Sil_Alpha_Stack_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.265602,Month,Sil_Alpha_Stack_score,1.1.0,FSTPD30
1,2023-01-09,2023-01-15,0.302479,Week,Sil_Alpha_Stack_score,1.1.0,FSTPD30
2,2023-01-16,2023-01-22,0.575758,Week,Sil_Alpha_Stack_score,1.1.0,FSTPD30
3,2023-01-23,2023-01-29,0.157895,Week,Sil_Alpha_Stack_score,1.1.0,FSTPD30
4,2023-01-30,2023-02-05,0.324561,Week,Sil_Alpha_Stack_score,1.1.0,FSTPD30
5,2023-02-01,2023-02-28,0.202272,Month,Sil_Alpha_Stack_score,1.1.0,FSTPD30
6,2023-02-06,2023-02-12,0.393103,Week,Sil_Alpha_Stack_score,1.1.0,FSTPD30
7,2023-02-13,2023-02-19,0.028,Week,Sil_Alpha_Stack_score,1.1.0,FSTPD30
8,2023-02-20,2023-02-26,-0.036458,Week,Sil_Alpha_Stack_score,1.1.0,FSTPD30
9,2023-02-27,2023-03-05,0.347561,Week,Sil_Alpha_Stack_score,1.1.0,FSTPD30


## combining the dataframe

In [2661]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'Sil_Alpha_Stack_score_FPD0_gini',
       'period', 'Model_Name', 'version', 'bad_rate',
       'Sil_Alpha_Stack_score_FPD10_gini',
       'Sil_Alpha_Stack_score_FPD30_gini',
       'Sil_Alpha_Stack_score_FSPD30_gini',
       'Sil_Alpha_Stack_score_FSTPD30_gini'], dtype=object)

In [2662]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','Sil_Alpha_Stack_score_FPD0_gini','Sil_Alpha_Stack_score_FPD10_gini','Sil_Alpha_Stack_score_FPD30_gini',  'Sil_Alpha_Stack_score_FSPD30_gini', 'Sil_Alpha_Stack_score_FSTPD30_gini']].copy()
final_df['Trench_category'] = 'All'
final_df['Model_display_name'] = 'alpha_stack_model_sil'
final_df['Product_type'] = 'SIL'
final_df.dtypes

start_date                            datetime64[ns]
end_date                              datetime64[ns]
period                                        object
Model_Name                                    object
version                                       object
bad_rate                                      object
Sil_Alpha_Stack_score_FPD0_gini              float64
Sil_Alpha_Stack_score_FPD10_gini             float64
Sil_Alpha_Stack_score_FPD30_gini             float64
Sil_Alpha_Stack_score_FSPD30_gini            float64
Sil_Alpha_Stack_score_FSTPD30_gini           float64
Trench_category                               object
Model_display_name                            object
Product_type                                  object
dtype: object

In [2663]:
final_df.head() 

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,Sil_Alpha_Stack_score_FPD0_gini,Sil_Alpha_Stack_score_FPD10_gini,Sil_Alpha_Stack_score_FPD30_gini,Sil_Alpha_Stack_score_FSPD30_gini,Sil_Alpha_Stack_score_FSTPD30_gini,Trench_category,Model_display_name,Product_type
0,2023-01-01,2023-01-31,Month,Sil_Alpha_Stack_score,1.1.0,FPD0,0.252502,,,,,All,alpha_stack_model_sil,SIL
1,2023-01-09,2023-01-15,Week,Sil_Alpha_Stack_score,1.1.0,FPD0,0.343407,,,,,All,alpha_stack_model_sil,SIL
2,2023-01-16,2023-01-22,Week,Sil_Alpha_Stack_score,1.1.0,FPD0,0.660714,,,,,All,alpha_stack_model_sil,SIL
3,2023-01-23,2023-01-29,Week,Sil_Alpha_Stack_score,1.1.0,FPD0,0.113514,,,,,All,alpha_stack_model_sil,SIL
4,2023-01-30,2023-02-05,Week,Sil_Alpha_Stack_score,1.1.0,FPD0,0.182724,,,,,All,alpha_stack_model_sil,SIL


In [2664]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.Sil_Alpha_Stack_scorev1_gini_v4"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=a1380ac8-0990-4879-a17c-371f7e1680d3>

# Beta Sil App Score

## FPD10

## Test

In [2665]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,

    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64) as sil_beta_app_score,
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fpd0 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,1055817,87638f02-a75b-46c1-a287-15c391582302,60810558170041,0.377615,2025-07-01 14:18:42,2025-07-01,2025-07,Test,0,1
1,2815618,d1b0c8a2-c7da-4d10-8dae-f85431ff5186,60828156180025,0.458392,2025-07-01 14:40:56,2025-07-01,2025-07,Test,1,1
2,3135049,e77c630e-c203-4c76-a2fd-7a0d64749072,60831350490029,0.527765,2025-07-01 14:29:04,2025-07-01,2025-07,Test,0,1
3,3531616,e9a9c071-5a43-4974-a72f-2b73c8a136c4,60835316160018,0.522573,2025-07-01 14:06:36,2025-07-01,2025-07,Test,0,1
4,3531621,91d7199b-5f79-4fbe-96f3-6127e908d6af,60835316210015,0.452834,2025-07-01 14:00:36,2025-07-01,2025-07,Test,0,1


In [2666]:
df1 = dfd.copy()

## Train

In [2667]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,

    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(cast(prediction as string), "'", '"'), "None", "null") AS prediction_clean
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  coalesce(prediction, safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64)) as sil_beta_app_score, 
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fpd0 = 1
  ;

"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,2077493,11a57f9e-79bc-4257-95d9-d730412e9407,60820774930012,0.650773,2023-06-04 18:28:45,2023-06-04,2023-06,Train,0,1
1,1972051,85671b2f-4c18-47a9-80b6-d009beca2311,60819720510017,0.677378,2023-04-01 11:57:11,2023-04-01,2023-04,Train,0,1
2,2077259,dded8a17-1aba-447a-b4ba-fab0c270d4ac,60820772590014,0.608312,2023-06-04 16:05:37,2023-06-04,2023-06,Train,0,1
3,2095387,8e06b332-2578-4537-a107-3e8667f00657,60820953870013,0.68019,2023-06-16 10:22:24,2023-06-16,2023-06,Train,1,1
4,1998004,6e936bfd-d201-4c7c-a0be-8afb23521f48,60819980040014,0.489443,2023-04-16 14:04:04,2023-04-18,2023-04,Train,1,1


In [2668]:
df2 = dfd.copy()

In [2669]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 245200 entries, 0 to 245199
Data columns (total 10 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             245200 non-null  object        
 1   digitalLoanAccountId   245200 non-null  object        
 2   loanAccountNumber      245200 non-null  object        
 3   sil_beta_app_score     245200 non-null  float64       
 4   appln_submit_datetime  245200 non-null  datetime64[us]
 5   disbursementdate       245200 non-null  dbdate        
 6   Application_month      245200 non-null  object        
 7   Data_selection         245200 non-null  object        
 8   deffpd0                245200 non-null  Int64         
 9   flg_mature_fpd0        245200 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 19.2+ MB


In [2670]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_app_score'] = pd.to_numeric(df_concat['sil_beta_app_score'], errors='coerce')

In [2671]:
df_concat.to_csv(r"sil_beta_app_score.csv")

In [2672]:
gini_results = calculate_periodic_gini(df_concat, 'sil_beta_app_score', 'deffpd0', 'FPD0')

In [2673]:
f0 = gini_results.copy()

In [2674]:
f0.head(10)

Unnamed: 0,start_date,end_date,sil_beta_app_score_FPD0_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.156053,Month,sil_beta_app_score,1.1.0,FPD0
1,2023-01-02,2023-01-08,0.181917,Week,sil_beta_app_score,1.1.0,FPD0
2,2023-01-09,2023-01-15,0.297158,Week,sil_beta_app_score,1.1.0,FPD0
3,2023-01-16,2023-01-22,0.170139,Week,sil_beta_app_score,1.1.0,FPD0
4,2023-01-23,2023-01-29,0.083217,Week,sil_beta_app_score,1.1.0,FPD0
5,2023-01-30,2023-02-05,0.297507,Week,sil_beta_app_score,1.1.0,FPD0
6,2023-02-01,2023-02-28,0.293496,Month,sil_beta_app_score,1.1.0,FPD0
7,2023-02-06,2023-02-12,0.413246,Week,sil_beta_app_score,1.1.0,FPD0
8,2023-02-13,2023-02-19,0.164119,Week,sil_beta_app_score,1.1.0,FPD0
9,2023-02-20,2023-02-26,0.302374,Week,sil_beta_app_score,1.1.0,FPD0


## FPD10

## Test

In [2675]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,

    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64) as sil_beta_app_score,
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fpd10 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,3402562,8a2c3182-0779-4b8e-86f0-43fa5ea88f87,60834025620014,0.506557,2025-04-26 09:55:00,2025-04-26,2025-04,Test,0,1
1,3402574,ad60fe48-706a-46e2-b2e2-09f573a108ca,60834025740018,0.390252,2025-04-26 10:04:54,2025-04-26,2025-04,Test,0,1
2,3402518,b5710c73-1a8c-4e4a-bd2d-7464dcd77e91,60834025180011,0.643085,2025-04-26 09:38:15,2025-04-26,2025-04,Test,0,1
3,3402606,e7b79a25-f625-4f9f-a4ad-4e70c4400519,60834026060019,0.442921,2025-04-26 10:32:17,2025-04-26,2025-04,Test,0,1
4,3402653,b9e1b8fc-d825-4cde-a06a-38dd07634b1c,60834026530016,0.507898,2025-04-26 10:38:59,2025-04-26,2025-04,Test,0,1


In [2676]:
df1 = dfd.copy()

## Train

In [2677]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,

    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(cast(prediction as string), "'", '"'), "None", "null") AS prediction_clean
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  coalesce(prediction, safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64)) as sil_beta_app_score, 
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fpd10 = 1
  ;

"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,1923407,a03dd006-3f61-4112-974c-5a5d976e1e90,60819234070014,0.345618,2023-03-01 19:18:39,2023-03-01,2023-03,Train,0,1
1,2025308,b5496680-3385-497c-bf11-26b604959546,60820253080013,0.654765,2023-05-02 20:05:06,2023-05-02,2023-05,Train,0,1
2,2065299,6d3f42c2-5675-4b94-8b0c-54c1ce9b2600,60820652990019,0.492965,2023-05-28 14:41:06,2023-05-28,2023-05,Train,0,1
3,2042837,61517169-3320-4565-b98c-8b435517c215,60820428370011,0.606918,2023-05-13 19:04:48,2023-05-13,2023-05,Train,0,1
4,2113607,5ebeca2c-8813-40ae-affc-41391ef372a6,60821136070014,0.522492,2023-06-30 12:55:14,2023-06-30,2023-06,Train,0,1


In [2678]:
df2 = dfd.copy()

In [2679]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 241037 entries, 0 to 241036
Data columns (total 10 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             241037 non-null  object        
 1   digitalLoanAccountId   241037 non-null  object        
 2   loanAccountNumber      241037 non-null  object        
 3   sil_beta_app_score     241037 non-null  float64       
 4   appln_submit_datetime  241037 non-null  datetime64[us]
 5   disbursementdate       241037 non-null  dbdate        
 6   Application_month      241037 non-null  object        
 7   Data_selection         241037 non-null  object        
 8   deffpd10               241037 non-null  Int64         
 9   flg_mature_fpd10       241037 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 18.8+ MB


In [2680]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_app_score'] = pd.to_numeric(df_concat['sil_beta_app_score'], errors='coerce')

In [2681]:
df_concat.to_csv(r"sil_beta_app_scorefpd10.csv")

In [2682]:
gini_results = calculate_periodic_gini(df_concat, 'sil_beta_app_score', 'deffpd10', 'FPD10')

In [2683]:
f1 = gini_results.copy()

In [2684]:
f1.head(10)

Unnamed: 0,start_date,end_date,sil_beta_app_score_FPD10_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.22585,Month,sil_beta_app_score,1.1.0,FPD10
1,2023-01-02,2023-01-08,0.196935,Week,sil_beta_app_score,1.1.0,FPD10
2,2023-01-09,2023-01-15,0.381288,Week,sil_beta_app_score,1.1.0,FPD10
3,2023-01-16,2023-01-22,0.210526,Week,sil_beta_app_score,1.1.0,FPD10
4,2023-01-23,2023-01-29,0.191667,Week,sil_beta_app_score,1.1.0,FPD10
5,2023-01-30,2023-02-05,0.411765,Week,sil_beta_app_score,1.1.0,FPD10
6,2023-02-01,2023-02-28,0.275626,Month,sil_beta_app_score,1.1.0,FPD10
7,2023-02-06,2023-02-12,0.378472,Week,sil_beta_app_score,1.1.0,FPD10
8,2023-02-13,2023-02-19,0.03794,Week,sil_beta_app_score,1.1.0,FPD10
9,2023-02-20,2023-02-26,0.318296,Week,sil_beta_app_score,1.1.0,FPD10


## FPD30

## Test

In [2685]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,

    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64) as sil_beta_app_score,
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fpd30 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30
0,2989461,bad4a268-8745-4a69-9d11-f68f8d3db0a2,60829894610011,0.503622,2025-04-01 09:37:47,2025-04-01,2025-04,Test,0,1
1,3352360,b919beac-1a43-4a44-a896-51565863db3d,60833523600015,0.561767,2025-04-01 08:49:31,2025-04-01,2025-04,Test,0,1
2,3352367,37c95d5e-8d01-4967-8069-dee129384854,60833523670012,0.503878,2025-04-01 08:55:22,2025-04-01,2025-04,Test,0,1
3,3352416,7a79cb59-c29f-42fa-b11a-779cb376f381,60833524160014,0.584391,2025-04-01 09:35:56,2025-04-01,2025-04,Test,0,1
4,3352463,b340f7b0-3491-49a8-b0b6-8189d7750f97,60833524630011,0.556827,2025-04-01 09:47:54,2025-04-01,2025-04,Test,0,1


In [2686]:
df1 = dfd.copy()

## Train

In [2687]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,

    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(cast(prediction as string), "'", '"'), "None", "null") AS prediction_clean
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  coalesce(prediction, safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64)) as sil_beta_app_score, 
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fpd30 = 1
  ;

"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30
0,1930574,eceb9a84-be27-424f-96dd-bc9aecfe9ddb,60819305740011,0.296253,2023-03-06 11:49:51,2023-03-06,2023-03,Train,0,1
1,2108891,139282ab-a4b1-4f9e-bc5f-ea1dbe1ccd02,60821088910017,0.607453,2023-06-26 19:13:44,2023-06-26,2023-06,Train,0,1
2,2076806,1bb51e13-eb45-43f4-9ecc-ab8c8a5f4b40,60820768060015,0.400603,2023-06-04 11:25:10,2023-06-04,2023-06,Train,0,1
3,2019634,f23e61d9-9ea5-4fd3-bbb3-bc97e44b67f5,60820196340015,0.561465,2023-04-29 16:46:51,2023-04-29,2023-04,Train,0,1
4,1907460,4eb8892b-a1f8-4ad0-8e0a-923cb038e564,60819074600015,0.452984,2023-02-19 14:12:36,2023-02-19,2023-02,Train,0,1


In [2688]:
df2 = dfd.copy()

In [2689]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 232582 entries, 0 to 232581
Data columns (total 10 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             232582 non-null  object        
 1   digitalLoanAccountId   232582 non-null  object        
 2   loanAccountNumber      232582 non-null  object        
 3   sil_beta_app_score     232582 non-null  float64       
 4   appln_submit_datetime  232582 non-null  datetime64[us]
 5   disbursementdate       232582 non-null  dbdate        
 6   Application_month      232582 non-null  object        
 7   Data_selection         232582 non-null  object        
 8   deffpd30               232582 non-null  Int64         
 9   flg_mature_fpd30       232582 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 18.2+ MB


In [2690]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_app_score'] = pd.to_numeric(df_concat['sil_beta_app_score'], errors='coerce')

In [2691]:
df_concat.to_csv(r"sil_beta_app_scorefpd30.csv")

In [2692]:
gini_results = calculate_periodic_gini(df_concat, 'sil_beta_app_score', 'deffpd30', 'FPD30')

In [2693]:
f2 = gini_results.copy()

In [2694]:
f2.head(10)

Unnamed: 0,start_date,end_date,sil_beta_app_score_FPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.26979,Month,sil_beta_app_score,1.1.0,FPD30
1,2023-01-02,2023-01-08,0.257051,Week,sil_beta_app_score,1.1.0,FPD30
2,2023-01-09,2023-01-15,0.555556,Week,sil_beta_app_score,1.1.0,FPD30
3,2023-01-16,2023-01-22,0.210526,Week,sil_beta_app_score,1.1.0,FPD30
4,2023-01-23,2023-01-29,0.130579,Week,sil_beta_app_score,1.1.0,FPD30
5,2023-01-30,2023-02-05,0.496911,Week,sil_beta_app_score,1.1.0,FPD30
6,2023-02-01,2023-02-28,0.305164,Month,sil_beta_app_score,1.1.0,FPD30
7,2023-02-06,2023-02-12,0.334583,Week,sil_beta_app_score,1.1.0,FPD30
8,2023-02-13,2023-02-19,0.096774,Week,sil_beta_app_score,1.1.0,FPD30
9,2023-02-20,2023-02-26,0.437681,Week,sil_beta_app_score,1.1.0,FPD30


## FSPD30

## Test

In [2695]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,

    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64) as sil_beta_app_score,
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fspd_30 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30
0,3636860,fa8495e7-3a83-43a9-ab31-876157bc0343,60836368600014,0.439551,2025-08-22 15:52:54,2025-08-22,2025-08,Test,0,1
1,3636046,7360dd77-0fcd-4013-8140-18f2b649b230,60836360460012,0.525547,2025-08-22 09:54:07,2025-08-22,2025-08,Test,0,1
2,3636930,4c003d22-db7b-4e65-a30c-625d26eab024,60836369300011,0.595919,2025-08-22 16:17:54,2025-08-22,2025-08,Test,0,1
3,3305921,2cfca166-b876-41ee-b341-2f00064b2b1f,60833059210013,0.57256,2025-08-22 16:49:35,2025-08-22,2025-08,Test,0,1
4,3630594,c5727901-5ae8-4878-9a9f-92f9686ad094,60836305940011,0.448084,2025-08-19 14:27:30,2025-08-22,2025-08,Test,0,1


In [2696]:
df1 = dfd.copy()

## Train

In [2697]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,

    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(cast(prediction as string), "'", '"'), "None", "null") AS prediction_clean
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  coalesce(prediction, safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64)) as sil_beta_app_score, 
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fspd_30 = 1
  ;

"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30
0,1930574,eceb9a84-be27-424f-96dd-bc9aecfe9ddb,60819305740011,0.296253,2023-03-06 11:49:51,2023-03-06,2023-03,Train,0,1
1,2108891,139282ab-a4b1-4f9e-bc5f-ea1dbe1ccd02,60821088910017,0.607453,2023-06-26 19:13:44,2023-06-26,2023-06,Train,0,1
2,2076806,1bb51e13-eb45-43f4-9ecc-ab8c8a5f4b40,60820768060015,0.400603,2023-06-04 11:25:10,2023-06-04,2023-06,Train,0,1
3,2019634,f23e61d9-9ea5-4fd3-bbb3-bc97e44b67f5,60820196340015,0.561465,2023-04-29 16:46:51,2023-04-29,2023-04,Train,0,1
4,1907460,4eb8892b-a1f8-4ad0-8e0a-923cb038e564,60819074600015,0.452984,2023-02-19 14:12:36,2023-02-19,2023-02,Train,0,1


In [2698]:
df2 = dfd.copy()

In [2699]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 220858 entries, 0 to 220857
Data columns (total 10 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             220858 non-null  object        
 1   digitalLoanAccountId   220858 non-null  object        
 2   loanAccountNumber      220858 non-null  object        
 3   sil_beta_app_score     220858 non-null  float64       
 4   appln_submit_datetime  220858 non-null  datetime64[us]
 5   disbursementdate       220858 non-null  dbdate        
 6   Application_month      220858 non-null  object        
 7   Data_selection         220858 non-null  object        
 8   deffspd30              220858 non-null  Int64         
 9   flg_mature_fspd_30     220858 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 17.3+ MB


In [2700]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_app_score'] = pd.to_numeric(df_concat['sil_beta_app_score'], errors='coerce')

In [2701]:
df_concat.to_csv(r"sil_beta_app_scorefspd30.csv")

In [2702]:
gini_results = calculate_periodic_gini(df_concat, 'sil_beta_app_score', 'deffspd30', 'FSPD30')

In [2703]:
f3 = gini_results.copy()

In [2704]:
f3.head(10)

Unnamed: 0,start_date,end_date,sil_beta_app_score_FSPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.247928,Month,sil_beta_app_score,1.1.0,FSPD30
1,2023-01-02,2023-01-08,0.124845,Week,sil_beta_app_score,1.1.0,FSPD30
2,2023-01-09,2023-01-15,0.399069,Week,sil_beta_app_score,1.1.0,FSPD30
3,2023-01-16,2023-01-22,0.364341,Week,sil_beta_app_score,1.1.0,FSPD30
4,2023-01-23,2023-01-29,0.246377,Week,sil_beta_app_score,1.1.0,FSPD30
5,2023-01-30,2023-02-05,0.419042,Week,sil_beta_app_score,1.1.0,FSPD30
6,2023-02-01,2023-02-28,0.330061,Month,sil_beta_app_score,1.1.0,FSPD30
7,2023-02-06,2023-02-12,0.419753,Week,sil_beta_app_score,1.1.0,FSPD30
8,2023-02-13,2023-02-19,0.188025,Week,sil_beta_app_score,1.1.0,FSPD30
9,2023-02-20,2023-02-26,0.318043,Week,sil_beta_app_score,1.1.0,FSPD30


## FSTPD30

## Test

In [2705]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,

    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64) as sil_beta_app_score,
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fstpd_30 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30
0,3443378,af0340b3-8d7f-4872-a2a3-a6298832b88e,60834433780011,0.494163,2025-05-18 11:16:32,2025-05-18,2025-05,Test,0,1
1,3451896,4faedd37-79df-49f5-a482-347ce4974207,60834518960014,0.540002,2025-05-22 17:21:32,2025-05-22,2025-05,Test,0,1
2,3346427,172888b1-2bbf-4a9d-95a9-6a30ef577f76,60833464270018,0.436026,2025-03-29 10:29:29,2025-03-29,2025-03,Test,0,1
3,3446711,8a6b59c2-a53d-42f8-89e0-b56b77ae0e0c,60834467110015,0.429066,2025-05-19 20:05:04,2025-05-19,2025-05,Test,0,1
4,3408928,3de107a6-41e3-4ca3-809a-469507fc7e88,60834089280018,0.341658,2025-04-29 15:00:23,2025-04-29,2025-04,Test,0,1


In [2706]:
df1 = dfd.copy()

## Train

In [2707]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,

    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(cast(prediction as string), "'", '"'), "None", "null") AS prediction_clean
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  coalesce(prediction, safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64)) as sil_beta_app_score, 
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fstpd_30 = 1
  ;

"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30
0,2042856,9f428e8d-773c-44e4-8455-bd2890bfec80,60820428560014,0.575913,2023-05-13 19:05:09,2023-05-13,2023-05,Train,0,1
1,1940506,417bfec1-dd87-40ea-9ff9-6171284a4bc7,60819405060014,0.543571,2023-06-17 19:09:04,2023-06-17,2023-06,Train,1,1
2,1871137,ab8fbf2d-d414-471d-82bf-3a5a2f5ad71f,60818711370012,0.443696,2023-01-23 13:40:42,2023-01-23,2023-01,Train,0,1
3,2006944,a3ddc8c3-11d6-4ac0-a9f0-f03435522fe8,60820069440019,0.572059,2023-04-21 12:30:33,2023-04-21,2023-04,Train,0,1
4,2070689,9515af0d-432b-4496-8395-a78df7882501,60820706890014,0.496503,2023-05-31 16:47:44,2023-05-31,2023-05,Train,0,1


In [2708]:
df2 = dfd.copy()

In [2709]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 207466 entries, 0 to 207465
Data columns (total 10 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             207466 non-null  object        
 1   digitalLoanAccountId   207466 non-null  object        
 2   loanAccountNumber      207466 non-null  object        
 3   sil_beta_app_score     207466 non-null  float64       
 4   appln_submit_datetime  207466 non-null  datetime64[us]
 5   disbursementdate       207466 non-null  dbdate        
 6   Application_month      207466 non-null  object        
 7   Data_selection         207466 non-null  object        
 8   deffstpd30             207466 non-null  Int64         
 9   flg_mature_fstpd_30    207466 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 16.2+ MB


In [2710]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_app_score'] = pd.to_numeric(df_concat['sil_beta_app_score'], errors='coerce')

In [2711]:
df_concat.to_csv(r"sil_beta_app_scorefstpd30.csv")

In [2712]:
gini_results = calculate_periodic_gini(df_concat, 'sil_beta_app_score', 'deffstpd30', 'FSTPD30')

In [2713]:
f4 = gini_results.copy()

In [2714]:
f4.head(10)

Unnamed: 0,start_date,end_date,sil_beta_app_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.260324,Month,sil_beta_app_score,1.1.0,FSTPD30
1,2023-01-02,2023-01-08,0.145503,Week,sil_beta_app_score,1.1.0,FSTPD30
2,2023-01-09,2023-01-15,0.362537,Week,sil_beta_app_score,1.1.0,FSTPD30
3,2023-01-16,2023-01-22,0.261324,Week,sil_beta_app_score,1.1.0,FSTPD30
4,2023-01-23,2023-01-29,0.30878,Week,sil_beta_app_score,1.1.0,FSTPD30
5,2023-01-30,2023-02-05,0.436764,Week,sil_beta_app_score,1.1.0,FSTPD30
6,2023-02-01,2023-02-28,0.356994,Month,sil_beta_app_score,1.1.0,FSTPD30
7,2023-02-06,2023-02-12,0.503968,Week,sil_beta_app_score,1.1.0,FSTPD30
8,2023-02-13,2023-02-19,0.242287,Week,sil_beta_app_score,1.1.0,FSTPD30
9,2023-02-20,2023-02-26,0.238095,Week,sil_beta_app_score,1.1.0,FSTPD30


## combining the dataframe

In [2715]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'sil_beta_app_score_FPD0_gini', 'period',
       'Model_Name', 'version', 'bad_rate',
       'sil_beta_app_score_FPD10_gini', 'sil_beta_app_score_FPD30_gini',
       'sil_beta_app_score_FSPD30_gini',
       'sil_beta_app_score_FSTPD30_gini'], dtype=object)

In [2716]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','sil_beta_app_score_FPD0_gini','sil_beta_app_score_FPD10_gini', 'sil_beta_app_score_FPD30_gini',
       'sil_beta_app_score_FSPD30_gini',
       'sil_beta_app_score_FSTPD30_gini']].copy()
final_df['Trench_category'] = 'All'
final_df['Model_display_name'] = 'apps_score_model_sil'
final_df['Product_type'] = 'SIL'
final_df.dtypes

start_date                         datetime64[ns]
end_date                           datetime64[ns]
period                                     object
Model_Name                                 object
version                                    object
bad_rate                                   object
sil_beta_app_score_FPD0_gini              float64
sil_beta_app_score_FPD10_gini             float64
sil_beta_app_score_FPD30_gini             float64
sil_beta_app_score_FSPD30_gini            float64
sil_beta_app_score_FSTPD30_gini           float64
Trench_category                            object
Model_display_name                         object
Product_type                               object
dtype: object

In [2717]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,sil_beta_app_score_FPD0_gini,sil_beta_app_score_FPD10_gini,sil_beta_app_score_FPD30_gini,sil_beta_app_score_FSPD30_gini,sil_beta_app_score_FSTPD30_gini,Trench_category,Model_display_name,Product_type
0,2023-01-01,2023-01-31,Month,sil_beta_app_score,1.1.0,FPD0,0.156053,,,,,All,apps_score_model_sil,SIL
1,2023-01-02,2023-01-08,Week,sil_beta_app_score,1.1.0,FPD0,0.181917,,,,,All,apps_score_model_sil,SIL
2,2023-01-09,2023-01-15,Week,sil_beta_app_score,1.1.0,FPD0,0.297158,,,,,All,apps_score_model_sil,SIL
3,2023-01-16,2023-01-22,Week,sil_beta_app_score,1.1.0,FPD0,0.170139,,,,,All,apps_score_model_sil,SIL
4,2023-01-23,2023-01-29,Week,sil_beta_app_score,1.1.0,FPD0,0.083217,,,,,All,apps_score_model_sil,SIL


In [2718]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.sil_beta_app_score_v1_gini_v4"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=aa6d1290-a40e-48a9-9613-cadf5b1463f4>

# Beta SIL Demo Score

## FPD0

## Test

In [2719]:
sq = """WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fpd0 = 1
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,3406547,d58d4c78-e14a-40ed-b7bd-019caaa51f1f,60834065470011,0.169802564,2025-04-28 10:14:30,2025-04-28,2025-04,Test,0,1
1,3406567,0f277365-6230-4153-83c5-e4af6deb64da,60834065670015,0.0721067001,2025-04-28 10:21:05,2025-04-28,2025-04,Test,0,1
2,3406565,96a719f2-4b37-4504-96fd-32cbfd63be7a,60834065650013,0.0410550525,2025-04-28 10:27:57,2025-04-28,2025-04,Test,0,1
3,3406604,0639704e-695e-48bd-9816-ea97f0ae25ac,60834066040013,0.0466156812,2025-04-28 10:44:03,2025-04-28,2025-04,Test,0,1
4,3406633,5ef6f81d-f1f9-4839-8aec-b791bd964e30,60834066330018,0.0286626338,2025-04-28 10:54:27,2025-04-28,2025-04,Test,1,1


In [2720]:
df1 = dfd.copy()

## Train

In [2721]:
sq = """ 
WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fpd0 = 1
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,2062528,00885e9f-5a86-4b78-a4fd-f0fc8a20ad4b,60820625280011,0.087039,2023-05-26 18:29:02,2023-05-26,2023-05,Train,0,1
1,1806766,16bd36f9-c18d-487b-bc17-b1b3b666c1ac,60818067660011,0.224749,2023-01-13 11:04:01,2023-01-13,2023-01,Train,0,1
2,2106186,84f7c30c-f5b4-4431-85b3-be66ce953d83,60821061860011,0.15748,2023-06-24 14:29:47,2023-06-24,2023-06,Train,0,1
3,2099641,82e7479b-ccb0-4b88-8ce6-3a16adec07c8,60820996410014,0.107514,2023-06-19 13:38:50,2023-06-19,2023-06,Train,0,1
4,1778491,85ee1518-2ece-4529-8bd9-627050f1fbbe,60817784910019,0.195919,2023-05-28 18:03:22,2023-05-28,2023-05,Train,1,1


In [2722]:
df2 = dfd.copy()

In [2723]:
df2 = dfd.copy()

In [2724]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 273188 entries, 0 to 273187
Data columns (total 10 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             273188 non-null  object        
 1   digitalLoanAccountId   273188 non-null  object        
 2   loanAccountNumber      273188 non-null  object        
 3   sil_beta_demo_score    273188 non-null  object        
 4   appln_submit_datetime  273188 non-null  datetime64[us]
 5   disbursementdate       273188 non-null  dbdate        
 6   Application_month      273188 non-null  object        
 7   Data_selection         273188 non-null  object        
 8   deffpd0                273188 non-null  Int64         
 9   flg_mature_fpd0        273188 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(6)
memory usage: 21.4+ MB


In [2725]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_demo_score'] = pd.to_numeric(df_concat['sil_beta_demo_score'], errors='coerce')

In [2726]:
df_concat.to_csv(r"sil_beta_demo_scorefpd0.csv")

In [2727]:
gini_results = calculate_periodic_gini(df_concat, 'sil_beta_demo_score', 'deffpd0', 'FPD0')

In [2728]:
f0 = gini_results.copy()

In [2729]:
f0.head(10)

Unnamed: 0,start_date,end_date,sil_beta_demo_score_FPD0_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.141958,Month,sil_beta_demo_score,1.1.0,FPD0
1,2023-01-02,2023-01-08,0.279249,Week,sil_beta_demo_score,1.1.0,FPD0
2,2023-01-09,2023-01-15,0.118033,Week,sil_beta_demo_score,1.1.0,FPD0
3,2023-01-16,2023-01-22,0.15302,Week,sil_beta_demo_score,1.1.0,FPD0
4,2023-01-23,2023-01-29,-0.006286,Week,sil_beta_demo_score,1.1.0,FPD0
5,2023-01-30,2023-02-05,0.196649,Week,sil_beta_demo_score,1.1.0,FPD0
6,2023-02-01,2023-02-28,0.150587,Month,sil_beta_demo_score,1.1.0,FPD0
7,2023-02-06,2023-02-12,0.030928,Week,sil_beta_demo_score,1.1.0,FPD0
8,2023-02-13,2023-02-19,0.379882,Week,sil_beta_demo_score,1.1.0,FPD0
9,2023-02-20,2023-02-26,0.056522,Week,sil_beta_demo_score,1.1.0,FPD0


## FPD10

## Test

In [2730]:
sq = """WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fpd10 = 1
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,3467015,6c364614-55f0-4f10-9ad8-7913ed2a2366,60834670150012,0.0834439608,2025-05-30 10:30:27,2025-05-30,2025-05,Test,0,1
1,3467038,508a4cdb-c782-4995-91e3-281c63ac4339,60834670380019,0.0608743217,2025-05-30 10:47:21,2025-05-30,2025-05,Test,0,1
2,3467060,01f71dab-b8fc-4f18-9a28-c67e09e25011,60834670600017,0.051590939,2025-05-30 10:53:50,2025-05-30,2025-05,Test,0,1
3,3467089,57c7bc9a-b1fd-47af-84f4-8d4910a0d475,60834670890011,0.0449010996,2025-05-30 10:57:07,2025-05-30,2025-05,Test,0,1
4,3467085,7576062e-d827-49df-97d4-93e82fb64c06,60834670850016,0.0749216911,2025-05-30 11:12:15,2025-05-30,2025-05,Test,1,1


In [2731]:
df1 = dfd.copy()

## Train

In [2732]:
sq = """ 
WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fpd10 = 1
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,1929423,299d3ae0-a8bc-45b6-aa57-38a5419d5af4,60819294230018,0.086464,2023-03-05 17:24:29,2023-03-05,2023-03,Train,0,1
1,1877238,8ab3595f-88a8-40d1-9aab-09ec00c53345,60818772380011,0.109116,2023-01-28 18:31:42,2023-01-28,2023-01,Train,0,1
2,2064940,01fdfb53-8bb0-4a8b-8360-4ec7a944872f,60820649400017,0.062144,2023-05-28 10:57:29,2023-05-28,2023-05,Train,0,1
3,1961561,befe7a2d-3807-46bd-88cf-0d7f3ff99136,60819615610015,0.137556,2023-03-26 16:22:21,2023-03-26,2023-03,Train,0,1
4,1922775,f4eb86ff-bdbd-4da9-b0db-16af0105c132,60819227750011,0.110434,2023-03-01 13:00:32,2023-03-01,2023-03,Train,0,1


In [2733]:
df2 = dfd.copy()

In [2734]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 268486 entries, 0 to 268485
Data columns (total 10 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             268486 non-null  object        
 1   digitalLoanAccountId   268486 non-null  object        
 2   loanAccountNumber      268486 non-null  object        
 3   sil_beta_demo_score    268486 non-null  object        
 4   appln_submit_datetime  268486 non-null  datetime64[us]
 5   disbursementdate       268486 non-null  dbdate        
 6   Application_month      268486 non-null  object        
 7   Data_selection         268486 non-null  object        
 8   deffpd10               268486 non-null  Int64         
 9   flg_mature_fpd10       268486 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(6)
memory usage: 21.0+ MB


In [2735]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_demo_score'] = pd.to_numeric(df_concat['sil_beta_demo_score'], errors='coerce')

In [2736]:
df_concat.to_csv(r"sil_beta_demo_scorefpd10.csv")

In [2737]:
gini_results = calculate_periodic_gini(df_concat, 'sil_beta_demo_score', 'deffpd10', 'FPD10')

In [2738]:
f1 = gini_results.copy()

In [2739]:
f1.head(10)

Unnamed: 0,start_date,end_date,sil_beta_demo_score_FPD10_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.231433,Month,sil_beta_demo_score,1.1.0,FPD10
1,2023-01-02,2023-01-08,0.410532,Week,sil_beta_demo_score,1.1.0,FPD10
2,2023-01-09,2023-01-15,0.113578,Week,sil_beta_demo_score,1.1.0,FPD10
3,2023-01-16,2023-01-22,0.485714,Week,sil_beta_demo_score,1.1.0,FPD10
4,2023-01-23,2023-01-29,0.102941,Week,sil_beta_demo_score,1.1.0,FPD10
5,2023-01-30,2023-02-05,0.042026,Week,sil_beta_demo_score,1.1.0,FPD10
6,2023-02-01,2023-02-28,0.113077,Month,sil_beta_demo_score,1.1.0,FPD10
7,2023-02-06,2023-02-12,0.17038,Week,sil_beta_demo_score,1.1.0,FPD10
8,2023-02-13,2023-02-19,0.384306,Week,sil_beta_demo_score,1.1.0,FPD10
9,2023-02-20,2023-02-26,-0.136752,Week,sil_beta_demo_score,1.1.0,FPD10


## FPD30

## Test

In [2740]:
sq = """WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fpd30 = 1
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30
0,2390994,9178b377-f11b-4270-9584-6842fe66f1d3,60823909940018,0.0908876778,2025-08-08 13:33:23,2025-08-08,2025-08,Test,1,1
1,2988081,8a2925f7-1cfd-4f92-9358-9dcc0e42f67f,60829880810027,0.1763140734,2025-08-08 12:30:38,2025-08-08,2025-08,Test,0,1
2,3002190,6ccdbd5b-d7b7-4b2d-9b09-d3b80bcd736e,60830021900012,0.1382441976,2025-08-08 10:38:59,2025-08-08,2025-08,Test,0,1
3,3121656,fb5a86db-3dea-406c-acd0-d23bf9ed1277,60831216560017,0.0743203121,2025-08-08 11:23:39,2025-08-08,2025-08,Test,0,1
4,3153113,c035147b-d796-494c-b03f-a92d5d2d9569,60831531130022,0.1326933257,2025-08-08 13:56:39,2025-08-08,2025-08,Test,0,1


In [2741]:
df1 = dfd.copy()

## Train

In [2742]:
sq = """ 
WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fpd30 = 1
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30
0,2095992,d3202127-a5b3-4542-bc31-340526fd37a9,60820959920018,0.139643,2023-06-16 16:32:50,2023-06-16,2023-06,Train,0,1
1,2074935,93eadec0-dd64-4fbb-a44a-8ffcfe919700,60820749350015,0.116053,2023-06-08 11:04:07,2023-06-08,2023-06,Train,0,1
2,2075704,8ca4cb7c-7a5f-4533-9675-b2710fe5a653,60820757040016,0.141425,2023-06-03 16:21:44,2023-06-03,2023-06,Train,0,1
3,2089395,2d695825-77cc-4c29-ad4d-482c0e2eac9f,60820893950014,0.103269,2023-06-12 15:49:56,2023-06-12,2023-06,Train,0,1
4,1954987,95969fe4-d03d-4b70-95db-1e8d0c34875d,60819549870014,0.163692,2023-03-22 13:39:26,2023-03-22,2023-03,Train,0,1


In [2743]:
df2 = dfd.copy()

In [2744]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 258928 entries, 0 to 258927
Data columns (total 10 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             258928 non-null  object        
 1   digitalLoanAccountId   258928 non-null  object        
 2   loanAccountNumber      258928 non-null  object        
 3   sil_beta_demo_score    258928 non-null  object        
 4   appln_submit_datetime  258928 non-null  datetime64[us]
 5   disbursementdate       258928 non-null  dbdate        
 6   Application_month      258928 non-null  object        
 7   Data_selection         258928 non-null  object        
 8   deffpd30               258928 non-null  Int64         
 9   flg_mature_fpd30       258928 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(6)
memory usage: 20.2+ MB


In [2745]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_demo_score'] = pd.to_numeric(df_concat['sil_beta_demo_score'], errors='coerce')

In [2746]:
df_concat.to_csv(r"sil_beta_demo_scorefpd30.csv")

In [2747]:
gini_results = calculate_periodic_gini(df_concat, 'sil_beta_demo_score', 'deffpd30', 'FPD30')

In [2748]:
f2 = gini_results.copy()

In [2749]:
f2.head(10)

Flushing oldest 200 entries.
  warn('Output cache limit (currently {sz} entries) hit.\n'


Unnamed: 0,start_date,end_date,sil_beta_demo_score_FPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.286133,Month,sil_beta_demo_score,1.1.0,FPD30
1,2023-01-02,2023-01-08,0.382369,Week,sil_beta_demo_score,1.1.0,FPD30
2,2023-01-09,2023-01-15,0.290358,Week,sil_beta_demo_score,1.1.0,FPD30
3,2023-01-16,2023-01-22,0.485714,Week,sil_beta_demo_score,1.1.0,FPD30
4,2023-01-23,2023-01-29,0.119526,Week,sil_beta_demo_score,1.1.0,FPD30
5,2023-01-30,2023-02-05,0.090598,Week,sil_beta_demo_score,1.1.0,FPD30
6,2023-02-01,2023-02-28,0.133196,Month,sil_beta_demo_score,1.1.0,FPD30
7,2023-02-06,2023-02-12,0.153846,Week,sil_beta_demo_score,1.1.0,FPD30
8,2023-02-13,2023-02-19,0.417967,Week,sil_beta_demo_score,1.1.0,FPD30
9,2023-02-20,2023-02-26,-0.04771,Week,sil_beta_demo_score,1.1.0,FPD30


## FSPD30

## Test

In [2750]:
sq = """WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fspd_30 = 1
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30
0,3636860,fa8495e7-3a83-43a9-ab31-876157bc0343,60836368600014,0.065951985,2025-08-22 15:52:54,2025-08-22,2025-08,Test,0,1
1,3636046,7360dd77-0fcd-4013-8140-18f2b649b230,60836360460012,0.1217160363,2025-08-22 09:54:07,2025-08-22,2025-08,Test,0,1
2,3636930,4c003d22-db7b-4e65-a30c-625d26eab024,60836369300011,0.0501459143,2025-08-22 16:17:54,2025-08-22,2025-08,Test,0,1
3,3305921,2cfca166-b876-41ee-b341-2f00064b2b1f,60833059210013,0.1861481356,2025-08-22 16:49:35,2025-08-22,2025-08,Test,0,1
4,3630594,c5727901-5ae8-4878-9a9f-92f9686ad094,60836305940011,0.0275888996,2025-08-19 14:27:30,2025-08-22,2025-08,Test,0,1


In [2751]:
df1 = dfd.copy()

## Train

In [2752]:
sq = """ 
WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fspd_30 = 1
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30
0,1862554,79869aff-229b-4447-8a92-5a732a561218,60818625540014,0.047235,2023-01-15 15:23:36,2023-01-15,2023-01,Train,0,1
1,2059837,5f9a2b85-6cfa-4200-9188-97b81a9df0b9,60820598370014,0.169916,2023-05-24 12:35:39,2023-05-24,2023-05,Train,0,1
2,1824836,672128c7-0257-4efe-acf9-810db4f80f98,60818248360019,0.286349,2023-03-31 18:27:18,2023-03-31,2023-03,Train,0,1
3,1960416,8fcad544-7ae1-4d60-8b7e-817288633189,60819604160013,0.055241,2023-03-25 19:16:53,2023-03-26,2023-03,Train,1,1
4,2006827,ca75b813-6074-4fa8-a6df-8c10bf02b79b,60820068270015,0.088839,2023-04-22 12:44:49,2023-04-22,2023-04,Train,0,1


In [2753]:
df2 = dfd.copy()

In [2754]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 245665 entries, 0 to 245664
Data columns (total 10 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             245665 non-null  object        
 1   digitalLoanAccountId   245665 non-null  object        
 2   loanAccountNumber      245665 non-null  object        
 3   sil_beta_demo_score    245665 non-null  object        
 4   appln_submit_datetime  245665 non-null  datetime64[us]
 5   disbursementdate       245665 non-null  dbdate        
 6   Application_month      245665 non-null  object        
 7   Data_selection         245665 non-null  object        
 8   deffspd30              245665 non-null  Int64         
 9   flg_mature_fspd_30     245665 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(6)
memory usage: 19.2+ MB


In [2755]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_demo_score'] = pd.to_numeric(df_concat['sil_beta_demo_score'], errors='coerce')

In [2756]:
df_concat.to_csv(r"sil_beta_demo_scorefspd30.csv")

In [2757]:
gini_results = calculate_periodic_gini(df_concat, 'sil_beta_demo_score', 'deffspd30', 'FSPD30')

In [2758]:
f3 = gini_results.copy()

In [2759]:
f3.head(10)

Unnamed: 0,start_date,end_date,sil_beta_demo_score_FSPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.25096,Month,sil_beta_demo_score,1.1.0,FSPD30
1,2023-01-02,2023-01-08,0.355795,Week,sil_beta_demo_score,1.1.0,FSPD30
2,2023-01-09,2023-01-15,0.235256,Week,sil_beta_demo_score,1.1.0,FSPD30
3,2023-01-16,2023-01-22,0.371812,Week,sil_beta_demo_score,1.1.0,FSPD30
4,2023-01-23,2023-01-29,0.131773,Week,sil_beta_demo_score,1.1.0,FSPD30
5,2023-01-30,2023-02-05,0.066116,Week,sil_beta_demo_score,1.1.0,FSPD30
6,2023-02-01,2023-02-28,0.120064,Month,sil_beta_demo_score,1.1.0,FSPD30
7,2023-02-06,2023-02-12,0.150435,Week,sil_beta_demo_score,1.1.0,FSPD30
8,2023-02-13,2023-02-19,0.309559,Week,sil_beta_demo_score,1.1.0,FSPD30
9,2023-02-20,2023-02-26,-0.036571,Week,sil_beta_demo_score,1.1.0,FSPD30


## FSTPD30

## Test

In [2760]:
sq = """WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fstpd_30 = 1
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30
0,3443378,af0340b3-8d7f-4872-a2a3-a6298832b88e,60834433780011,0.027380532,2025-05-18 11:16:32,2025-05-18,2025-05,Test,0,1
1,3451896,4faedd37-79df-49f5-a482-347ce4974207,60834518960014,0.0629634574,2025-05-22 17:21:32,2025-05-22,2025-05,Test,0,1
2,3346427,172888b1-2bbf-4a9d-95a9-6a30ef577f76,60833464270018,0.0432430336,2025-03-29 10:29:29,2025-03-29,2025-03,Test,0,1
3,3469217,7b4f51e1-c169-439a-951a-834b0f581a25,60834692170018,0.0259871787,2025-05-31 12:13:24,2025-05-31,2025-05,Test,0,1
4,3446711,8a6b59c2-a53d-42f8-89e0-b56b77ae0e0c,60834467110015,0.1426732033,2025-05-19 20:05:04,2025-05-19,2025-05,Test,0,1


In [2761]:
df1 = dfd.copy()

## Train

In [2762]:
sq = """ 
WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fstpd_30 = 1
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30
0,2021168,da877fa9-bfc1-4c3a-a1ff-08678b0e2af8,60820211680013,0.114377,2023-04-30 15:50:11,2023-04-30,2023-04,Train,0,1
1,2060868,234dd05a-5f23-452c-9b7b-9d5fd96c2dd1,60820608680011,0.229062,2023-05-25 10:31:49,2023-05-25,2023-05,Train,0,1
2,1940396,cd8857f1-76d8-442d-b33d-b724d867038e,60819403960016,0.054331,2023-03-12 18:56:30,2023-03-12,2023-03,Train,0,1
3,1942460,ff1653a1-b8e1-40fe-a62d-3c5823a70835,60819424600015,0.166225,2023-03-14 15:36:48,2023-03-14,2023-03,Train,0,1
4,1927449,5f3eef49-d134-4fe8-8646-0b39d43d50f3,60819274490011,0.149347,2023-03-04 12:18:34,2023-03-04,2023-03,Train,0,1


In [2763]:
df2 = dfd.copy()

In [2764]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 230343 entries, 0 to 230342
Data columns (total 10 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             230343 non-null  object        
 1   digitalLoanAccountId   230343 non-null  object        
 2   loanAccountNumber      230343 non-null  object        
 3   sil_beta_demo_score    230343 non-null  object        
 4   appln_submit_datetime  230343 non-null  datetime64[us]
 5   disbursementdate       230343 non-null  dbdate        
 6   Application_month      230343 non-null  object        
 7   Data_selection         230343 non-null  object        
 8   deffstpd30             230343 non-null  Int64         
 9   flg_mature_fstpd_30    230343 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(6)
memory usage: 18.0+ MB


In [2765]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_demo_score'] = pd.to_numeric(df_concat['sil_beta_demo_score'], errors='coerce')

In [2766]:
df_concat.to_csv(r"sil_beta_demo_scorefstpd30.csv")

In [2767]:
gini_results = calculate_periodic_gini(df_concat, 'sil_beta_demo_score', 'deffstpd30', 'FSTPD30')

In [2768]:
f4 = gini_results.copy()

In [2769]:
f4.head(10)

Unnamed: 0,start_date,end_date,sil_beta_demo_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.253371,Month,sil_beta_demo_score,1.1.0,FSTPD30
1,2023-01-02,2023-01-08,0.335695,Week,sil_beta_demo_score,1.1.0,FSTPD30
2,2023-01-09,2023-01-15,0.219488,Week,sil_beta_demo_score,1.1.0,FSTPD30
3,2023-01-16,2023-01-22,0.363712,Week,sil_beta_demo_score,1.1.0,FSTPD30
4,2023-01-23,2023-01-29,0.146972,Week,sil_beta_demo_score,1.1.0,FSTPD30
5,2023-01-30,2023-02-05,0.121693,Week,sil_beta_demo_score,1.1.0,FSTPD30
6,2023-02-01,2023-02-28,0.128934,Month,sil_beta_demo_score,1.1.0,FSTPD30
7,2023-02-06,2023-02-12,0.118855,Week,sil_beta_demo_score,1.1.0,FSTPD30
8,2023-02-13,2023-02-19,0.343576,Week,sil_beta_demo_score,1.1.0,FSTPD30
9,2023-02-20,2023-02-26,-0.04775,Week,sil_beta_demo_score,1.1.0,FSTPD30


## combining the dataframe

In [2770]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'sil_beta_demo_score_FPD0_gini',
       'period', 'Model_Name', 'version', 'bad_rate',
       'sil_beta_demo_score_FPD10_gini', 'sil_beta_demo_score_FPD30_gini',
       'sil_beta_demo_score_FSPD30_gini',
       'sil_beta_demo_score_FSTPD30_gini'], dtype=object)

In [2771]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','sil_beta_demo_score_FPD0_gini','sil_beta_demo_score_FPD10_gini', 'sil_beta_demo_score_FPD30_gini',
       'sil_beta_demo_score_FSPD30_gini',
       'sil_beta_demo_score_FSTPD30_gini']].copy()
final_df['Trench_category'] = 'All'
final_df['Model_display_name'] = 'beta_demo_model_sil'
final_df['Product_type'] = 'SIL'
final_df.dtypes

start_date                          datetime64[ns]
end_date                            datetime64[ns]
period                                      object
Model_Name                                  object
version                                     object
bad_rate                                    object
sil_beta_demo_score_FPD0_gini              float64
sil_beta_demo_score_FPD10_gini             float64
sil_beta_demo_score_FPD30_gini             float64
sil_beta_demo_score_FSPD30_gini            float64
sil_beta_demo_score_FSTPD30_gini           float64
Trench_category                             object
Model_display_name                          object
Product_type                                object
dtype: object

In [2772]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,sil_beta_demo_score_FPD0_gini,sil_beta_demo_score_FPD10_gini,sil_beta_demo_score_FPD30_gini,sil_beta_demo_score_FSPD30_gini,sil_beta_demo_score_FSTPD30_gini,Trench_category,Model_display_name,Product_type
0,2023-01-01,2023-01-31,Month,sil_beta_demo_score,1.1.0,FPD0,0.141958,,,,,All,beta_demo_model_sil,SIL
1,2023-01-02,2023-01-08,Week,sil_beta_demo_score,1.1.0,FPD0,0.279249,,,,,All,beta_demo_model_sil,SIL
2,2023-01-09,2023-01-15,Week,sil_beta_demo_score,1.1.0,FPD0,0.118033,,,,,All,beta_demo_model_sil,SIL
3,2023-01-16,2023-01-22,Week,sil_beta_demo_score,1.1.0,FPD0,0.15302,,,,,All,beta_demo_model_sil,SIL
4,2023-01-23,2023-01-29,Week,sil_beta_demo_score,1.1.0,FPD0,-0.006286,,,,,All,beta_demo_model_sil,SIL


In [2773]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.sil_beta_demo_score_v1_gini4"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=caa4b023-178b-4524-9fb5-df87cf551c0f>

# Beta SIL STACK Score Model

## FPD0

## Test

In [2774]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fpd0 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,2422071,0007f464-8632-4319-99c4-262a92e57540,60824220710021,0.0349195041596331,2025-09-05 17:12:09,2025-09-05,2025-09,Test,0,1
1,3666195,00922403-a7d8-498c-9677-7d656d214e93,60836661950019,0.010722357464055,2025-09-05 17:43:14,2025-09-05,2025-09,Test,0,1
2,3665581,00a7bee3-f6a8-4b3f-92e5-d3891abf0531,60836655810011,0.0231080654715041,2025-09-05 14:16:40,2025-09-05,2025-09,Test,0,1
3,3666246,012dcf6e-480a-4e64-a6cc-85a8608296c9,60836662460013,0.0260076928190445,2025-09-05 18:03:37,2025-09-05,2025-09,Test,0,1
4,3665838,028a1c48-cd78-48e4-aca8-efced85ebcd8,60836658380017,0.0570349702440121,2025-09-05 15:28:39,2025-09-05,2025-09,Test,0,1


In [2775]:
df1 = dfd.copy()

## Train

In [2776]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM  prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fpd0 = 1
  ;

"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,3206286,6b2d3c9c-efd4-4d62-a4d3-db28826ffd7a,60832062860017,0.026586,2025-11-15 10:23:43,2025-01-21,2025-11,Train,0,1
1,1863436,c04a9f9d-9035-43db-b5aa-c46a8925f5b8,60818634360013,0.150032,2023-01-16 13:22:43,2023-01-16,2023-01,Train,0,1
2,1970309,b9f16c72-a7cc-4cde-8398-bd915bb80afe,60819703090016,0.132745,2023-03-31 12:26:55,2023-03-31,2023-03,Train,0,1
3,2083972,e856b3f9-3f87-4c92-bb22-b5fd010b2bc6,60820839720011,0.021948,2023-06-08 18:02:02,2023-06-08,2023-06,Train,0,1
4,2086616,836f8cb6-3331-4dba-917d-affe9b7cc018,60820866160016,0.146285,2023-06-10 16:32:51,2023-06-10,2023-06,Train,0,1


In [2777]:
df2 = dfd.copy()

In [2778]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 273188 entries, 0 to 273187
Data columns (total 10 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             273188 non-null  object        
 1   digitalLoanAccountId   273188 non-null  object        
 2   loanAccountNumber      273188 non-null  object        
 3   sil_beta_stack_score   273188 non-null  object        
 4   appln_submit_datetime  273188 non-null  datetime64[us]
 5   disbursementdate       273188 non-null  dbdate        
 6   Application_month      273188 non-null  object        
 7   Data_selection         273188 non-null  object        
 8   deffpd0                273188 non-null  Int64         
 9   flg_mature_fpd0        273188 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(6)
memory usage: 21.4+ MB


In [2779]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_stack_score'] = pd.to_numeric(df_concat['sil_beta_stack_score'], errors='coerce')

In [2780]:
df_concat.to_csv(r"sil_beta_stack_scorefpd0.csv")

In [2781]:
gini_results = calculate_periodic_gini(df_concat, 'sil_beta_stack_score', 'deffpd0', 'FPD0')

In [2782]:
f0 = gini_results.copy()

In [2783]:
f0.head(10)

Unnamed: 0,start_date,end_date,sil_beta_stack_score_FPD0_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.207885,Month,sil_beta_stack_score,1.1.0,FPD0
1,2023-01-02,2023-01-08,0.330022,Week,sil_beta_stack_score,1.1.0,FPD0
2,2023-01-09,2023-01-15,0.260013,Week,sil_beta_stack_score,1.1.0,FPD0
3,2023-01-16,2023-01-22,0.234899,Week,sil_beta_stack_score,1.1.0,FPD0
4,2023-01-23,2023-01-29,0.045143,Week,sil_beta_stack_score,1.1.0,FPD0
5,2023-01-30,2023-02-05,0.348225,Week,sil_beta_stack_score,1.1.0,FPD0
6,2023-02-01,2023-02-28,0.28873,Month,sil_beta_stack_score,1.1.0,FPD0
7,2023-02-06,2023-02-12,0.316416,Week,sil_beta_stack_score,1.1.0,FPD0
8,2023-02-13,2023-02-19,0.318935,Week,sil_beta_stack_score,1.1.0,FPD0
9,2023-02-20,2023-02-26,0.257971,Week,sil_beta_stack_score,1.1.0,FPD0


## FPD10

## Test

In [2784]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fpd10 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,3736352,0b6eb0e7-d0af-40a3-bcaa-e273fc2ec45b,60837363520015,0.0746725822494509,2025-10-11 08:52:19,2025-10-11,2025-10,Test,0,1
1,3736371,eed323bd-ced4-434e-8a5d-a51b6c6a86d5,60837363710018,0.0897313948481277,2025-10-11 09:07:54,2025-10-11,2025-10,Test,0,1
2,3736333,f2ee0838-84e0-46cc-9bb5-bbf34d0a9b70,60837363330012,0.0329080842543231,2025-10-11 08:28:06,2025-10-11,2025-10,Test,0,1
3,3736347,b2b651c9-3c12-4af5-9890-a67d36bfcbac,60837363470018,0.0928341084785266,2025-10-11 08:51:56,2025-10-11,2025-10,Test,1,1
4,3736355,f9a94758-5f3f-4439-a129-691f5af50af9,60837363550018,0.0604283870059251,2025-10-11 08:55:59,2025-10-11,2025-10,Test,0,1


In [2785]:
df1 = dfd.copy()

## Train

In [2786]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM  prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fpd10 = 1
  ;

"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,1930574,eceb9a84-be27-424f-96dd-bc9aecfe9ddb,60819305740011,0.01694,2023-03-06 11:49:51,2023-03-06,2023-03,Train,0,1
1,2108891,139282ab-a4b1-4f9e-bc5f-ea1dbe1ccd02,60821088910017,0.160689,2023-06-26 19:13:44,2023-06-26,2023-06,Train,0,1
2,2076806,1bb51e13-eb45-43f4-9ecc-ab8c8a5f4b40,60820768060015,0.014167,2023-06-04 11:25:10,2023-06-04,2023-06,Train,0,1
3,2019634,f23e61d9-9ea5-4fd3-bbb3-bc97e44b67f5,60820196340015,0.130995,2023-04-29 16:46:51,2023-04-29,2023-04,Train,0,1
4,1907460,4eb8892b-a1f8-4ad0-8e0a-923cb038e564,60819074600015,0.042436,2023-02-19 14:12:36,2023-02-19,2023-02,Train,0,1


In [2787]:
df2 = dfd.copy()

In [2788]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 268486 entries, 0 to 268485
Data columns (total 10 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             268486 non-null  object        
 1   digitalLoanAccountId   268486 non-null  object        
 2   loanAccountNumber      268486 non-null  object        
 3   sil_beta_stack_score   268486 non-null  object        
 4   appln_submit_datetime  268486 non-null  datetime64[us]
 5   disbursementdate       268486 non-null  dbdate        
 6   Application_month      268486 non-null  object        
 7   Data_selection         268486 non-null  object        
 8   deffpd10               268486 non-null  Int64         
 9   flg_mature_fpd10       268486 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(6)
memory usage: 21.0+ MB


In [2789]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_stack_score'] = pd.to_numeric(df_concat['sil_beta_stack_score'], errors='coerce')

In [2790]:
df_concat.to_csv(r"sil_beta_stack_scorefpd10.csv")

In [2791]:
gini_results = calculate_periodic_gini(df_concat, 'sil_beta_stack_score', 'deffpd10', 'FPD10')

In [2792]:
f1 = gini_results.copy()

In [2793]:
f1.head(10)

Unnamed: 0,start_date,end_date,sil_beta_stack_score_FPD10_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.295238,Month,sil_beta_stack_score,1.1.0,FPD10
1,2023-01-02,2023-01-08,0.390082,Week,sil_beta_stack_score,1.1.0,FPD10
2,2023-01-09,2023-01-15,0.302997,Week,sil_beta_stack_score,1.1.0,FPD10
3,2023-01-16,2023-01-22,0.475325,Week,sil_beta_stack_score,1.1.0,FPD10
4,2023-01-23,2023-01-29,0.189446,Week,sil_beta_stack_score,1.1.0,FPD10
5,2023-01-30,2023-02-05,0.30819,Week,sil_beta_stack_score,1.1.0,FPD10
6,2023-02-01,2023-02-28,0.269499,Month,sil_beta_stack_score,1.1.0,FPD10
7,2023-02-06,2023-02-12,0.338139,Week,sil_beta_stack_score,1.1.0,FPD10
8,2023-02-13,2023-02-19,0.272636,Week,sil_beta_stack_score,1.1.0,FPD10
9,2023-02-20,2023-02-26,0.201709,Week,sil_beta_stack_score,1.1.0,FPD10


## FPD30

## Test

In [2794]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fpd30 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30
0,2815618,d1b0c8a2-c7da-4d10-8dae-f85431ff5186,60828156180025,0.0584998026306635,2025-07-01 14:40:56,2025-07-01,2025-07,Test,1,1
1,3135049,e77c630e-c203-4c76-a2fd-7a0d64749072,60831350490029,0.1452804193728001,2025-07-01 14:29:04,2025-07-01,2025-07,Test,0,1
2,3208552,499040f5-23fa-4101-8763-b7ef9c70b82d,60832085520022,0.225644587433908,2025-07-01 15:07:43,2025-07-01,2025-07,Test,0,1
3,3421244,f5205576-3520-4ad4-9bf7-7344402e67ba,60834212440027,0.1020274754647371,2025-07-01 14:25:33,2025-07-01,2025-07,Test,0,1
4,3520719,9502d11c-a2d3-4f27-aac0-95c78c8096ad,60835207190015,0.0534763643376721,2025-07-01 15:18:39,2025-07-01,2025-07,Test,0,1


In [2795]:
df1 = dfd.copy()

## Train

In [2796]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM  prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fpd30 = 1
  ;

"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30
0,1929423,299d3ae0-a8bc-45b6-aa57-38a5419d5af4,60819294230018,0.102729,2023-03-05 17:24:29,2023-03-05,2023-03,Train,0,1
1,1877238,8ab3595f-88a8-40d1-9aab-09ec00c53345,60818772380011,0.039279,2023-01-28 18:31:42,2023-01-28,2023-01,Train,0,1
2,2064940,01fdfb53-8bb0-4a8b-8360-4ec7a944872f,60820649400017,0.032968,2023-05-28 10:57:29,2023-05-28,2023-05,Train,0,1
3,1961561,befe7a2d-3807-46bd-88cf-0d7f3ff99136,60819615610015,0.115739,2023-03-26 16:22:21,2023-03-26,2023-03,Train,0,1
4,1922775,f4eb86ff-bdbd-4da9-b0db-16af0105c132,60819227750011,0.06942,2023-03-01 13:00:32,2023-03-01,2023-03,Train,0,1


In [2797]:
df2 = dfd.copy()

In [2798]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 258928 entries, 0 to 258927
Data columns (total 10 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             258928 non-null  object        
 1   digitalLoanAccountId   258928 non-null  object        
 2   loanAccountNumber      258928 non-null  object        
 3   sil_beta_stack_score   258928 non-null  object        
 4   appln_submit_datetime  258928 non-null  datetime64[us]
 5   disbursementdate       258928 non-null  dbdate        
 6   Application_month      258928 non-null  object        
 7   Data_selection         258928 non-null  object        
 8   deffpd30               258928 non-null  Int64         
 9   flg_mature_fpd30       258928 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(6)
memory usage: 20.2+ MB


In [2799]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_stack_score'] = pd.to_numeric(df_concat['sil_beta_stack_score'], errors='coerce')

In [2800]:
df_concat.to_csv(r"sil_beta_stack_scorefpd30.csv")

In [2801]:
gini_results = calculate_periodic_gini(df_concat, 'sil_beta_stack_score', 'deffpd30', 'FPD30')

In [2802]:
f2 = gini_results.copy()

In [2803]:
f2.head(10)

Unnamed: 0,start_date,end_date,sil_beta_stack_score_FPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.353518,Month,sil_beta_stack_score,1.1.0,FPD30
1,2023-01-02,2023-01-08,0.418182,Week,sil_beta_stack_score,1.1.0,FPD30
2,2023-01-09,2023-01-15,0.563636,Week,sil_beta_stack_score,1.1.0,FPD30
3,2023-01-16,2023-01-22,0.475325,Week,sil_beta_stack_score,1.1.0,FPD30
4,2023-01-23,2023-01-29,0.133212,Week,sil_beta_stack_score,1.1.0,FPD30
5,2023-01-30,2023-02-05,0.349288,Week,sil_beta_stack_score,1.1.0,FPD30
6,2023-02-01,2023-02-28,0.284919,Month,sil_beta_stack_score,1.1.0,FPD30
7,2023-02-06,2023-02-12,0.282517,Week,sil_beta_stack_score,1.1.0,FPD30
8,2023-02-13,2023-02-19,0.288865,Week,sil_beta_stack_score,1.1.0,FPD30
9,2023-02-20,2023-02-26,0.332061,Week,sil_beta_stack_score,1.1.0,FPD30


## FSPD30

## Test

In [2804]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fspd_30 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30
0,3636860,fa8495e7-3a83-43a9-ab31-876157bc0343,60836368600014,0.0428506655795078,2025-08-22 15:52:54,2025-08-22,2025-08,Test,0,1
1,3636046,7360dd77-0fcd-4013-8140-18f2b649b230,60836360460012,0.0884252501231652,2025-08-22 09:54:07,2025-08-22,2025-08,Test,0,1
2,3636930,4c003d22-db7b-4e65-a30c-625d26eab024,60836369300011,0.1088460588011757,2025-08-22 16:17:54,2025-08-22,2025-08,Test,0,1
3,3305921,2cfca166-b876-41ee-b341-2f00064b2b1f,60833059210013,0.2617204576775592,2025-08-22 16:49:35,2025-08-22,2025-08,Test,0,1
4,3630594,c5727901-5ae8-4878-9a9f-92f9686ad094,60836305940011,0.0338344924242993,2025-08-19 14:27:30,2025-08-22,2025-08,Test,0,1


In [2805]:
df1 = dfd.copy()

## Train

In [2806]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM  prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fspd_30 = 1
  ;

"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30
0,2094537,ecd62d48-e437-4c48-bdaf-d692e2820bb1,60820945370015,0.066527,2023-06-15 17:43:57,2023-06-15,2023-06,Train,0,1
1,1872122,4497c440-efd7-43b9-9eea-8ea79f71587a,60818721220019,0.280309,2023-01-24 12:08:09,2023-01-24,2023-01,Train,0,1
2,1923293,7c202e3d-8fdc-4e56-8c8b-485ae02efe81,60819232930012,0.019168,2023-03-01 17:46:10,2023-03-01,2023-03,Train,0,1
3,2014600,968cdfef-96c3-486a-a17f-ec68c8891a10,60820146000015,0.144451,2023-04-26 12:55:23,2023-04-26,2023-04,Train,0,1
4,2022896,440c4c5a-eb42-43ca-bd57-ab1d782e4d78,60820228960012,0.123124,2023-05-01 14:30:46,2023-05-01,2023-05,Train,0,1


In [2807]:
df2 = dfd.copy()

In [2808]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 245665 entries, 0 to 245664
Data columns (total 10 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             245665 non-null  object        
 1   digitalLoanAccountId   245665 non-null  object        
 2   loanAccountNumber      245665 non-null  object        
 3   sil_beta_stack_score   245665 non-null  object        
 4   appln_submit_datetime  245665 non-null  datetime64[us]
 5   disbursementdate       245665 non-null  dbdate        
 6   Application_month      245665 non-null  object        
 7   Data_selection         245665 non-null  object        
 8   deffspd30              245665 non-null  Int64         
 9   flg_mature_fspd_30     245665 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(6)
memory usage: 19.2+ MB


In [2809]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_stack_score'] = pd.to_numeric(df_concat['sil_beta_stack_score'], errors='coerce')

In [2810]:
df_concat.to_csv(r"sil_beta_stack_scorefspd30.csv")

In [2811]:
gini_results = calculate_periodic_gini(df_concat, 'sil_beta_stack_score', 'deffspd30', 'FSPD30')

In [2812]:
f3 = gini_results.copy()

In [2813]:
f3.head(10)

Unnamed: 0,start_date,end_date,sil_beta_stack_score_FSPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.310982,Month,sil_beta_stack_score,1.1.0,FSPD30
1,2023-01-02,2023-01-08,0.309075,Week,sil_beta_stack_score,1.1.0,FSPD30
2,2023-01-09,2023-01-15,0.405128,Week,sil_beta_stack_score,1.1.0,FSPD30
3,2023-01-16,2023-01-22,0.485906,Week,sil_beta_stack_score,1.1.0,FSPD30
4,2023-01-23,2023-01-29,0.2,Week,sil_beta_stack_score,1.1.0,FSPD30
5,2023-01-30,2023-02-05,0.309917,Week,sil_beta_stack_score,1.1.0,FSPD30
6,2023-02-01,2023-02-28,0.300713,Month,sil_beta_stack_score,1.1.0,FSPD30
7,2023-02-06,2023-02-12,0.366957,Week,sil_beta_stack_score,1.1.0,FSPD30
8,2023-02-13,2023-02-19,0.311029,Week,sil_beta_stack_score,1.1.0,FSPD30
9,2023-02-20,2023-02-26,0.225143,Week,sil_beta_stack_score,1.1.0,FSPD30


## FSTPD30

## Test

In [2814]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fstpd_30 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30
0,3443378,af0340b3-8d7f-4872-a2a3-a6298832b88e,60834433780011,0.0296603336684655,2025-05-18 11:16:32,2025-05-18,2025-05,Test,0,1
1,3451896,4faedd37-79df-49f5-a482-347ce4974207,60834518960014,0.0679457044130169,2025-05-22 17:21:32,2025-05-22,2025-05,Test,0,1
2,3346427,172888b1-2bbf-4a9d-95a9-6a30ef577f76,60833464270018,0.0275346748603456,2025-03-29 10:29:29,2025-03-29,2025-03,Test,0,1
3,3469217,7b4f51e1-c169-439a-951a-834b0f581a25,60834692170018,0.0426864664284067,2025-05-31 12:13:24,2025-05-31,2025-05,Test,0,1
4,3446711,8a6b59c2-a53d-42f8-89e0-b56b77ae0e0c,60834467110015,0.0703616200781203,2025-05-19 20:05:04,2025-05-19,2025-05,Test,0,1


In [2815]:
df1 = dfd.copy()

## Train

In [2816]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM  prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fstpd_30 = 1
  ;

"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30
0,2080302,8600beb4-68fb-4da9-8ecc-42f8c69b5d14,60820803020017,0.051647,2023-06-06 13:29:16,2023-06-06,2023-06,Train,0,1
1,1951727,87823dfb-14bf-4137-a787-be70fb7b25ef,60819517270014,0.023966,2023-03-20 14:56:02,2023-03-20,2023-03,Train,0,1
2,2023165,afb1f10d-4a91-49b1-866e-0092cae94d59,60820231650018,0.133853,2023-05-01 16:44:46,2023-05-01,2023-05,Train,0,1
3,2030181,93d41561-d826-4719-b337-d04f1e428689,60820301810011,0.170902,2023-05-05 16:21:22,2023-05-05,2023-05,Train,1,1
4,2098171,c99ddbed-2dcd-4f1c-88f9-53d717d3e1d3,60820981710011,0.241905,2023-06-18 10:57:08,2023-06-18,2023-06,Train,0,1


In [2817]:
df2 = dfd.copy()

In [2818]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 230343 entries, 0 to 230342
Data columns (total 10 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             230343 non-null  object        
 1   digitalLoanAccountId   230343 non-null  object        
 2   loanAccountNumber      230343 non-null  object        
 3   sil_beta_stack_score   230343 non-null  object        
 4   appln_submit_datetime  230343 non-null  datetime64[us]
 5   disbursementdate       230343 non-null  dbdate        
 6   Application_month      230343 non-null  object        
 7   Data_selection         230343 non-null  object        
 8   deffstpd30             230343 non-null  Int64         
 9   flg_mature_fstpd_30    230343 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(6)
memory usage: 18.0+ MB


In [2819]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_stack_score'] = pd.to_numeric(df_concat['sil_beta_stack_score'], errors='coerce')

In [2820]:
df_concat.to_csv(r"sil_beta_stack_scorefstpd30.csv")

In [2821]:
gini_results = calculate_periodic_gini(df_concat, 'sil_beta_stack_score', 'deffstpd30', 'FSTPD30')

In [2822]:
f4 = gini_results.copy()

In [2823]:
f4.head(10)

Unnamed: 0,start_date,end_date,sil_beta_stack_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.317218,Month,sil_beta_stack_score,1.1.0,FSTPD30
1,2023-01-02,2023-01-08,0.304604,Week,sil_beta_stack_score,1.1.0,FSTPD30
2,2023-01-09,2023-01-15,0.367139,Week,sil_beta_stack_score,1.1.0,FSTPD30
3,2023-01-16,2023-01-22,0.373654,Week,sil_beta_stack_score,1.1.0,FSTPD30
4,2023-01-23,2023-01-29,0.292769,Week,sil_beta_stack_score,1.1.0,FSTPD30
5,2023-01-30,2023-02-05,0.362257,Week,sil_beta_stack_score,1.1.0,FSTPD30
6,2023-02-01,2023-02-28,0.325214,Month,sil_beta_stack_score,1.1.0,FSTPD30
7,2023-02-06,2023-02-12,0.43507,Week,sil_beta_stack_score,1.1.0,FSTPD30
8,2023-02-13,2023-02-19,0.326577,Week,sil_beta_stack_score,1.1.0,FSTPD30
9,2023-02-20,2023-02-26,0.167126,Week,sil_beta_stack_score,1.1.0,FSTPD30


## combining the dataframe

In [2824]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'sil_beta_stack_score_FPD0_gini',
       'period', 'Model_Name', 'version', 'bad_rate',
       'sil_beta_stack_score_FPD10_gini',
       'sil_beta_stack_score_FPD30_gini',
       'sil_beta_stack_score_FSPD30_gini',
       'sil_beta_stack_score_FSTPD30_gini'], dtype=object)

In [2825]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','sil_beta_stack_score_FPD0_gini','sil_beta_stack_score_FPD10_gini',
       'sil_beta_stack_score_FPD30_gini',
       'sil_beta_stack_score_FSPD30_gini',
       'sil_beta_stack_score_FSTPD30_gini']].copy()
final_df['Trench_category'] = 'All'
final_df['Model_display_name'] = 'beta_stack_model_sil'
final_df['Product_type'] = 'SIL'
final_df.dtypes

start_date                           datetime64[ns]
end_date                             datetime64[ns]
period                                       object
Model_Name                                   object
version                                      object
bad_rate                                     object
sil_beta_stack_score_FPD0_gini              float64
sil_beta_stack_score_FPD10_gini             float64
sil_beta_stack_score_FPD30_gini             float64
sil_beta_stack_score_FSPD30_gini            float64
sil_beta_stack_score_FSTPD30_gini           float64
Trench_category                              object
Model_display_name                           object
Product_type                                 object
dtype: object

In [2826]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,sil_beta_stack_score_FPD0_gini,sil_beta_stack_score_FPD10_gini,sil_beta_stack_score_FPD30_gini,sil_beta_stack_score_FSPD30_gini,sil_beta_stack_score_FSTPD30_gini,Trench_category,Model_display_name,Product_type
0,2023-01-01,2023-01-31,Month,sil_beta_stack_score,1.1.0,FPD0,0.207885,,,,,All,beta_stack_model_sil,SIL
1,2023-01-02,2023-01-08,Week,sil_beta_stack_score,1.1.0,FPD0,0.330022,,,,,All,beta_stack_model_sil,SIL
2,2023-01-09,2023-01-15,Week,sil_beta_stack_score,1.1.0,FPD0,0.260013,,,,,All,beta_stack_model_sil,SIL
3,2023-01-16,2023-01-22,Week,sil_beta_stack_score,1.1.0,FPD0,0.234899,,,,,All,beta_stack_model_sil,SIL
4,2023-01-23,2023-01-29,Week,sil_beta_stack_score,1.1.0,FPD0,0.045143,,,,,All,beta_stack_model_sil,SIL


In [2827]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.sil_beta_stack_score_v1_gini4"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=8c3843ad-33d5-4068-a2d2-b092a36e3ae5>

# Alpha-Cash-CIC-Model

## Trench 1

## FPD0

## Test

In [2828]:
sq = r"""
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
--REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aCicScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,3724609,4ba81131-474b-44a7-b127-4f1ea1de1fc4,60837246090018,0.504232128839248,Trench 1,2025-10-05 12:26:56,2025-10-06,2025-10,Test,1,1
1,3724716,ebfbd098-aa48-4161-a112-cf2e0b25ba9a,60837247160011,0.4563446486097042,Trench 1,2025-10-05 13:12:34,2025-10-05,2025-10,Test,0,1
2,3726073,b6e2666e-d8d8-409e-8315-907d922e8d23,60837260730016,0.4044819242828416,Trench 1,2025-10-05 20:20:43,2025-10-06,2025-10,Test,0,1
3,3726372,31d6e06d-01c1-489e-8e20-1525b5f6907e,60837263720014,0.4422389640299675,Trench 1,2025-10-06 02:36:52,2025-10-06,2025-10,Test,1,1
4,3755813,18a3a196-f9f7-480c-bae0-d6e2dd58e292,60837558130019,0.492525055294494,Trench 1,2025-10-20 08:18:32,2025-10-20,2025-10,Test,0,1


In [2829]:
df1 = dfd.copy()

## Train

In [2830]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aCicScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,2800690,8071cdb5-d99c-4bea-86e2-9c84e8e1c40f,60828006900011,0.50613,Trench 1,2024-09-08 19:10:22,2024-09-08,2024-09,Train,1,1
1,2825994,106062cd-92e3-4051-9777-e3da1565c8b1,60828259940013,0.441568,Trench 1,2024-09-06 15:15:47,2024-09-06,2024-09,Train,1,1
2,2846597,6ab44aab-8919-4041-9219-74024497c5cd,60828465970018,0.397242,Trench 1,2024-09-15 06:30:25,2024-09-15,2024-09,Train,0,1
3,2849266,bce2350e-5917-4c6a-8cee-392ba66c364b,60828492660014,0.44412,Trench 1,2024-09-15 20:30:10,2024-09-16,2024-09,Train,0,1
4,2881731,67ec8b07-d420-40fa-b5c7-867547bf4f7f,60828817310016,0.472892,Trench 1,2024-09-26 09:33:11,2024-09-26,2024-09,Train,0,1


In [2831]:
df2 = dfd.copy()

In [2832]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17490 entries, 0 to 17489
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             17490 non-null  object        
 1   digitalLoanAccountId   17490 non-null  object        
 2   loanAccountNumber      17490 non-null  object        
 3   aCicScore              17490 non-null  object        
 4   trenchCategory         17490 non-null  object        
 5   appln_submit_datetime  17490 non-null  datetime64[us]
 6   disbursementdate       17490 non-null  dbdate        
 7   Application_month      17490 non-null  object        
 8   Data_selection         17490 non-null  object        
 9   deffpd0                17490 non-null  Int64         
 10  flg_mature_fpd0        17490 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(7)
memory usage: 1.5+ MB


In [2833]:
df_concat['aCicScore'] = pd.to_numeric(df_concat['aCicScore'], errors='coerce')

In [2834]:
df_concat.to_csv(r"aCicScorefpd0.csv")

In [2835]:
gini_results = calculate_periodic_gini(df_concat, 'aCicScore', 'deffpd0', 'FPD0')

In [2836]:
f0 = gini_results.copy()
f0.head()

Unnamed: 0,start_date,end_date,aCicScore_FPD0_gini,period,Model_Name,version,bad_rate
0,2024-08-26,2024-09-01,-0.17094,Week,aCicScore,1.1.0,FPD0
1,2024-09-01,2024-09-30,0.217332,Month,aCicScore,1.1.0,FPD0
2,2024-09-02,2024-09-08,0.18755,Week,aCicScore,1.1.0,FPD0
3,2024-09-09,2024-09-15,0.286166,Week,aCicScore,1.1.0,FPD0
4,2024-09-16,2024-09-22,0.271593,Week,aCicScore,1.1.0,FPD0


## FPD10

## Test

In [2837]:
sq = r"""
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
--REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aCicScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,3704941,8c979bb5-6c9f-490a-b0da-ee86863eebc4,60837049410017,0.4581197085922175,Trench 1,2025-09-25 19:31:42,2025-09-26,2025-09,Test,0,1
2,3724609,4ba81131-474b-44a7-b127-4f1ea1de1fc4,60837246090018,0.504232128839248,Trench 1,2025-10-05 12:26:56,2025-10-06,2025-10,Test,1,1
3,3724716,ebfbd098-aa48-4161-a112-cf2e0b25ba9a,60837247160011,0.4563446486097042,Trench 1,2025-10-05 13:12:34,2025-10-05,2025-10,Test,0,1
4,3726073,b6e2666e-d8d8-409e-8315-907d922e8d23,60837260730016,0.4044819242828416,Trench 1,2025-10-05 20:20:43,2025-10-06,2025-10,Test,0,1
5,3726372,31d6e06d-01c1-489e-8e20-1525b5f6907e,60837263720014,0.4422389640299675,Trench 1,2025-10-06 02:36:52,2025-10-06,2025-10,Test,0,1


In [2838]:
df1 = dfd.copy()

## Train

In [2839]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aCicScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,2800690,8071cdb5-d99c-4bea-86e2-9c84e8e1c40f,60828006900011,0.50613,Trench 1,2024-09-08 19:10:22,2024-09-08,2024-09,Train,1,1
1,2825994,106062cd-92e3-4051-9777-e3da1565c8b1,60828259940013,0.441568,Trench 1,2024-09-06 15:15:47,2024-09-06,2024-09,Train,1,1
2,2846597,6ab44aab-8919-4041-9219-74024497c5cd,60828465970018,0.397242,Trench 1,2024-09-15 06:30:25,2024-09-15,2024-09,Train,0,1
3,2849266,bce2350e-5917-4c6a-8cee-392ba66c364b,60828492660014,0.44412,Trench 1,2024-09-15 20:30:10,2024-09-16,2024-09,Train,0,1
4,2881731,67ec8b07-d420-40fa-b5c7-867547bf4f7f,60828817310016,0.472892,Trench 1,2024-09-26 09:33:11,2024-09-26,2024-09,Train,0,1


In [2840]:
df2 = dfd.copy()

In [2841]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16967 entries, 0 to 16966
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             16967 non-null  object        
 1   digitalLoanAccountId   16967 non-null  object        
 2   loanAccountNumber      16967 non-null  object        
 3   aCicScore              16967 non-null  object        
 4   trenchCategory         16967 non-null  object        
 5   appln_submit_datetime  16967 non-null  datetime64[us]
 6   disbursementdate       16967 non-null  dbdate        
 7   Application_month      16967 non-null  object        
 8   Data_selection         16967 non-null  object        
 9   deffpd10               16967 non-null  Int64         
 10  flg_mature_fpd10       16967 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(7)
memory usage: 1.5+ MB


In [2842]:
df_concat['aCicScore'] = pd.to_numeric(df_concat['aCicScore'], errors='coerce')

In [2843]:
df_concat.to_csv(r"aCicScorefpd10.csv")

In [2844]:
gini_results = calculate_periodic_gini(df_concat, 'aCicScore', 'deffpd10', 'FPD10')

In [2845]:
f1 = gini_results.copy()
f1.head()

Unnamed: 0,start_date,end_date,aCicScore_FPD10_gini,period,Model_Name,version,bad_rate
0,2024-08-26,2024-09-01,0.036585,Week,aCicScore,1.1.0,FPD10
1,2024-09-01,2024-09-30,0.236056,Month,aCicScore,1.1.0,FPD10
2,2024-09-02,2024-09-08,0.183358,Week,aCicScore,1.1.0,FPD10
3,2024-09-09,2024-09-15,0.334879,Week,aCicScore,1.1.0,FPD10
4,2024-09-16,2024-09-22,0.278113,Week,aCicScore,1.1.0,FPD10


## FPD30

## Test

In [2846]:
sq = r"""
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
--REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aCicScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30


In [2847]:
df1 = dfd.copy()

## Train

In [2848]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aCicScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30
0,2813811,2848e655-4aae-491f-81b1-8f5f7c409152,60828138110018,0.508106,Trench 1,2024-09-01 13:04:47,2024-09-01,2024-09,Train,0,1
1,2875278,2fadae7e-13fe-49af-a791-8cc585efccdd,60828752780017,0.485978,Trench 1,2024-09-23 21:20:22,2024-09-26,2024-09,Train,1,1
2,2845426,a934341f-d34f-48e6-99d1-8ed8766af55a,60828454260016,0.497255,Trench 1,2024-09-14 16:12:48,2024-09-14,2024-09,Train,1,1
3,2871487,6ee29463-4815-4878-828c-9df15a31e93c,60828714870018,0.348808,Trench 1,2024-09-22 20:15:07,2024-09-28,2024-09,Train,0,1
4,2837079,f6c94454-98dc-41e1-8833-a0fe1a133b64,60828370790011,0.500782,Trench 1,2024-09-11 10:45:27,2024-09-13,2024-09,Train,0,1


In [2849]:
df2 = dfd.copy()

In [2850]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16215 entries, 0 to 16214
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             16215 non-null  Int64         
 1   digitalLoanAccountId   16215 non-null  object        
 2   loanAccountNumber      16215 non-null  object        
 3   aCicScore              16215 non-null  float64       
 4   trenchCategory         16215 non-null  object        
 5   appln_submit_datetime  16215 non-null  datetime64[us]
 6   disbursementdate       16215 non-null  dbdate        
 7   Application_month      16215 non-null  object        
 8   Data_selection         16215 non-null  object        
 9   deffpd30               16215 non-null  Int64         
 10  flg_mature_fpd30       16215 non-null  Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 1.4+ MB


  df_concat = pd.concat([df2, df1], ignore_index=True)


In [2851]:
df_concat['aCicScore'] = pd.to_numeric(df_concat['aCicScore'], errors='coerce')

In [2852]:
df_concat.to_csv(r"aCicScorefpd30.csv")

In [2853]:
gini_results = calculate_periodic_gini(df_concat, 'aCicScore', 'deffpd30', 'FPD30')

In [2854]:
f2 = gini_results.copy()
f2.head()

Unnamed: 0,start_date,end_date,aCicScore_FPD30_gini,period,Model_Name,version,bad_rate
0,2024-08-26,2024-09-01,0.036585,Week,aCicScore,1.1.0,FPD30
1,2024-09-01,2024-09-30,0.265266,Month,aCicScore,1.1.0,FPD30
2,2024-09-02,2024-09-08,0.24121,Week,aCicScore,1.1.0,FPD30
3,2024-09-09,2024-09-15,0.338013,Week,aCicScore,1.1.0,FPD30
4,2024-09-16,2024-09-22,0.2925,Week,aCicScore,1.1.0,FPD30


## FSPD30

## Test

In [2855]:
sq = r"""
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
--REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aCicScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fpd30


In [2856]:
df1 = dfd.copy()

## Train

In [2857]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aCicScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30
0,2841091,28cd2859-9363-4838-9e73-b3bea2786f27,60828410910017,0.486808,Trench 1,2024-09-12 21:09:19,2024-09-13,2024-09,Train,1,1
1,2834059,4a301552-902d-4d69-a57e-e346208c384f,60828340590014,0.754935,Trench 1,2024-09-10 09:08:04,2024-09-11,2024-09,Train,1,1
2,2875045,6d38446a-f6d1-4a31-87b5-f0d3236f6310,60828750450012,0.478535,Trench 1,2024-09-23 19:57:14,2024-09-23,2024-09,Train,1,1
3,2891725,7696193d-cc6d-4a46-bc7d-c2261e014fc7,60828917250013,0.360785,Trench 1,2024-09-29 09:53:05,2024-09-29,2024-09,Train,1,1
4,2872240,34e239c3-e9ff-4928-97ae-b28f26a11dc2,60828722400011,0.465872,Trench 1,2024-09-23 02:56:57,2024-09-23,2024-09,Train,1,1


In [2858]:
df2 = dfd.copy()

In [2859]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15042 entries, 0 to 15041
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             15042 non-null  Int64         
 1   digitalLoanAccountId   15042 non-null  object        
 2   loanAccountNumber      15042 non-null  object        
 3   aCicScore              15042 non-null  float64       
 4   trenchCategory         15042 non-null  object        
 5   appln_submit_datetime  15042 non-null  datetime64[us]
 6   disbursementdate       15042 non-null  dbdate        
 7   Application_month      15042 non-null  object        
 8   Data_selection         15042 non-null  object        
 9   deffspd30              15042 non-null  Int64         
 10  flg_mature_fspd_30     15042 non-null  Int64         
 11  flg_mature_fpd30       0 non-null      object        
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), objec

  df_concat = pd.concat([df2, df1], ignore_index=True)


In [2860]:
df_concat['aCicScore'] = pd.to_numeric(df_concat['aCicScore'], errors='coerce')

In [2861]:
df_concat.to_csv(r"aCicScorefspd30.csv")

In [2862]:
gini_results = calculate_periodic_gini(df_concat, 'aCicScore', 'deffspd30', 'FSPD30')

In [2863]:
f3 = gini_results.copy()
f3.head()

Unnamed: 0,start_date,end_date,aCicScore_FSPD30_gini,period,Model_Name,version,bad_rate
0,2024-08-26,2024-09-01,0.182432,Week,aCicScore,1.1.0,FSPD30
1,2024-09-01,2024-09-30,0.254053,Month,aCicScore,1.1.0,FSPD30
2,2024-09-02,2024-09-08,0.29238,Week,aCicScore,1.1.0,FSPD30
3,2024-09-09,2024-09-15,0.265693,Week,aCicScore,1.1.0,FSPD30
4,2024-09-16,2024-09-22,0.260664,Week,aCicScore,1.1.0,FSPD30


## FSTPD30

## Test

In [2864]:
sq = r"""
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
--REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aCicScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30


In [2865]:
df1 = dfd.copy()

## Train

In [2866]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aCicScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30
0,2781542,efd8664a-8c3c-4a56-aaf3-6e122452bd83,60827815420017,0.454842,Trench 1,2024-09-17 21:37:09,2024-09-19,2024-09,Train,1,1
1,2831825,67a6c3d9-aca9-4db5-a0ef-ed8c3c78c5e4,60828318250016,0.484412,Trench 1,2024-09-08 21:19:12,2024-09-08,2024-09,Train,0,1
2,2829156,6eabf27e-d454-477a-a28d-b8eaf8d39df4,60828291560019,0.412449,Trench 1,2024-09-21 00:39:37,2024-09-21,2024-09,Train,0,1
3,2884001,c42a12c0-6000-4cb7-a793-21ebf52dbfc0,60828840010011,0.565607,Trench 1,2024-09-29 02:12:41,2024-09-29,2024-09,Train,1,1
4,2894787,2e755c95-6a94-4885-bbde-5ac550194e31,60828947870019,0.340953,Trench 1,2024-09-30 09:35:46,2024-09-30,2024-09,Train,0,1


In [2867]:
df2 = dfd.copy()

In [2868]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13892 entries, 0 to 13891
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             13892 non-null  Int64         
 1   digitalLoanAccountId   13892 non-null  object        
 2   loanAccountNumber      13892 non-null  object        
 3   aCicScore              13892 non-null  float64       
 4   trenchCategory         13892 non-null  object        
 5   appln_submit_datetime  13892 non-null  datetime64[us]
 6   disbursementdate       13892 non-null  dbdate        
 7   Application_month      13892 non-null  object        
 8   Data_selection         13892 non-null  object        
 9   deffstpd30             13892 non-null  Int64         
 10  flg_mature_fstpd_30    13892 non-null  Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 1.2+ MB


  df_concat = pd.concat([df2, df1], ignore_index=True)


In [2869]:
df_concat['aCicScore'] = pd.to_numeric(df_concat['aCicScore'], errors='coerce')

In [2870]:
df_concat.to_csv(r"aCicScorefstpd30.csv")

In [2871]:
gini_results = calculate_periodic_gini(df_concat, 'aCicScore', 'deffstpd30', 'FSTPD30')

In [2872]:
f4 = gini_results.copy()
f4.head()

Unnamed: 0,start_date,end_date,aCicScore_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2024-08-26,2024-09-01,0.28877,Week,aCicScore,1.1.0,FSTPD30
1,2024-09-01,2024-09-30,0.254391,Month,aCicScore,1.1.0,FSTPD30
2,2024-09-02,2024-09-08,0.311464,Week,aCicScore,1.1.0,FSTPD30
3,2024-09-09,2024-09-15,0.243421,Week,aCicScore,1.1.0,FSTPD30
4,2024-09-16,2024-09-22,0.28589,Week,aCicScore,1.1.0,FSTPD30


## combining the dataframe

In [2873]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'aCicScore_FPD0_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'aCicScore_FPD10_gini',
       'aCicScore_FPD30_gini', 'aCicScore_FSPD30_gini',
       'aCicScore_FSTPD30_gini'], dtype=object)

In [2874]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','aCicScore_FPD0_gini','aCicScore_FPD10_gini',
       'aCicScore_FPD30_gini', 'aCicScore_FSPD30_gini',
       'aCicScore_FSTPD30_gini']].copy()
final_df['Model_display_name'] = 'cic_model_cash'
final_df['Trench_category'] = 'Trench 1'
final_df['Product_type'] = 'CASH'
final_df.dtypes

start_date                datetime64[ns]
end_date                  datetime64[ns]
period                            object
Model_Name                        object
version                           object
bad_rate                          object
aCicScore_FPD0_gini              float64
aCicScore_FPD10_gini             float64
aCicScore_FPD30_gini             float64
aCicScore_FSPD30_gini            float64
aCicScore_FSTPD30_gini           float64
Model_display_name                object
Trench_category                   object
Product_type                      object
dtype: object

In [2875]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,aCicScore_FPD0_gini,aCicScore_FPD10_gini,aCicScore_FPD30_gini,aCicScore_FSPD30_gini,aCicScore_FSTPD30_gini,Model_display_name,Trench_category,Product_type
0,2024-08-26,2024-09-01,Week,aCicScore,1.1.0,FPD0,-0.17094,,,,,cic_model_cash,Trench 1,CASH
1,2024-09-01,2024-09-30,Month,aCicScore,1.1.0,FPD0,0.217332,,,,,cic_model_cash,Trench 1,CASH
2,2024-09-02,2024-09-08,Week,aCicScore,1.1.0,FPD0,0.18755,,,,,cic_model_cash,Trench 1,CASH
3,2024-09-09,2024-09-15,Week,aCicScore,1.1.0,FPD0,0.286166,,,,,cic_model_cash,Trench 1,CASH
4,2024-09-16,2024-09-22,Week,aCicScore,1.1.0,FPD0,0.271593,,,,,cic_model_cash,Trench 1,CASH


In [2876]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.alpha_cash_cic_model_v1_gini4"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=206b4ffb-ca8e-4fba-af0f-e198e521e027>

## Trench 2

## FPD0

## Test

In [2877]:
sq = r"""
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
--REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aCicScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,2600620,bab27c5b-a230-4d53-9e96-0e28d43ecbcc,60826006200014,0.4522276677448863,Trench 2,2025-10-07 13:23:10,2025-10-07,2025-10,Test,0,1
1,1507000,b0a512f7-843a-42a4-8f1e-7291ac91c955,60815070000017,0.4960021010712719,Trench 2,2025-10-07 20:34:00,2025-10-08,2025-10,Test,0,1
2,3535446,25b1d5a8-103c-4a78-a89c-241c47aee2b9,60835354460014,0.4346275397775516,Trench 2,2025-10-07 19:32:28,2025-10-10,2025-10,Test,0,1
3,3206558,377373c6-c886-4d7a-84b2-96c4bd0826f9,60832065580038,0.4873978190269432,Trench 2,2025-10-07 22:47:02,2025-10-09,2025-10,Test,0,1
4,3097865,52657965-b6d9-40fa-a657-a1bf3694d10b,60830978650017,0.4566640348878079,Trench 2,2025-10-07 23:09:46,2025-10-08,2025-10,Test,1,1


In [2878]:
df1 = dfd.copy()

## Train

In [2879]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aCicScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,2598449,9d6b313c-ca97-4adf-9664-1b5985ada003,60825984490012,0.45939,Trench 2,2024-09-25 18:48:35,2024-09-25,2024-09,Train,1,1
1,2854362,52aed292-3279-4afc-a858-540c9f032611,60828543620018,0.673198,Trench 2,2024-09-17 13:53:09,2024-09-17,2024-09,Train,0,1
2,2240631,ccd34bfb-4c81-4874-a0f0-725104dbe3a3,60822406310013,0.583203,Trench 2,2024-09-12 16:21:09,2024-09-12,2024-09,Train,1,1
3,1279889,70303781-4c89-41cb-83fb-17a4eac6787a,60812798890053,0.558024,Trench 2,2024-09-28 11:14:02,2024-10-02,2024-09,Train,0,1
4,1215801,80370d10-c7a7-4d21-b798-b0c78279ce3e,60812158010011,0.519138,Trench 2,2024-09-18 15:40:10,2024-09-18,2024-09,Train,1,1


In [2880]:
df2 = dfd.copy()

In [2881]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12790 entries, 0 to 12789
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             12790 non-null  object        
 1   digitalLoanAccountId   12790 non-null  object        
 2   loanAccountNumber      12790 non-null  object        
 3   aCicScore              12790 non-null  object        
 4   trenchCategory         12790 non-null  object        
 5   appln_submit_datetime  12790 non-null  datetime64[us]
 6   disbursementdate       12790 non-null  dbdate        
 7   Application_month      12790 non-null  object        
 8   Data_selection         12790 non-null  object        
 9   deffpd0                12790 non-null  Int64         
 10  flg_mature_fpd0        12790 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(7)
memory usage: 1.1+ MB


In [2882]:
df_concat['aCicScore'] = pd.to_numeric(df_concat['aCicScore'], errors='coerce')

In [2883]:
df_concat.to_csv(r"aCicScoretrench2fpd0.csv")

In [2884]:
gini_results = calculate_periodic_gini(df_concat, 'aCicScore', 'deffpd0', 'FPD0')

In [2885]:
f0 = gini_results.copy()
f0.head()

Unnamed: 0,start_date,end_date,aCicScore_FPD0_gini,period,Model_Name,version,bad_rate
0,2024-08-26,2024-09-01,0.495652,Week,aCicScore,1.1.0,FPD0
1,2024-09-01,2024-09-30,0.29075,Month,aCicScore,1.1.0,FPD0
2,2024-09-02,2024-09-08,0.285824,Week,aCicScore,1.1.0,FPD0
3,2024-09-09,2024-09-15,0.301728,Week,aCicScore,1.1.0,FPD0
4,2024-09-16,2024-09-22,0.224814,Week,aCicScore,1.1.0,FPD0


## FPD10

## Test

In [2886]:
sq = r"""
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
--REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aCicScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,1509429,f14352f6-dd75-479a-a514-26f6dda85e9d,60815094290036,0.4571110175229723,Trench 2,2025-10-11 08:52:38,2025-10-11,2025-10,Test,1,1
1,2890332,4b3e0967-6f7b-4393-ac77-427c5178fbc3,60828903320024,0.4229075266481307,Trench 2,2025-10-11 08:47:49,2025-10-11,2025-10,Test,1,1
2,3370392,94b3dd32-183d-47ce-9736-5e7bc2d7ef4a,60833703920015,0.381769866306212,Trench 2,2025-10-11 10:03:05,2025-10-11,2025-10,Test,0,1
3,3606235,7fbb26d1-cc8e-4c52-b9b7-565ccc9b1bff,60836062350011,0.4646471296002878,Trench 2,2025-10-11 11:13:18,2025-10-14,2025-10,Test,1,1
4,2185008,7733a8d1-e171-4a5b-97cc-57c058513aba,60821850080011,0.6149299644822173,Trench 2,2025-10-11 12:25:48,2025-10-12,2025-10,Test,0,1


In [2887]:
df1 = dfd.copy()

## Train

In [2888]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aCicScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,2537956,530e12c3-cbfc-4d66-862d-6bb763f0141b,60825379560012,0.440264,Trench 2,2024-09-09 14:50:02,2024-09-09,2024-09,Train,1,1
1,2074646,ce92e3b8-15ab-4679-ac3f-d077501c99bd,60820746460019,0.402035,Trench 2,2024-09-10 19:18:09,2024-09-11,2024-09,Train,0,1
2,2193493,b0eff5a9-f604-45c7-9a59-fca5c437cb20,60821934930012,0.472053,Trench 2,2024-09-25 16:56:33,2024-09-25,2024-09,Train,0,1
3,1327922,edfe00b6-d215-4a1d-a75e-0714e49906b4,60813279220068,0.42244,Trench 2,2024-09-17 09:36:38,2024-09-17,2024-09,Train,0,1
4,1167936,88af911d-2d0e-4e6e-a324-312e59145522,60811679360012,0.706974,Trench 2,2024-09-14 06:22:30,2024-09-17,2024-09,Train,1,1


In [2889]:
df2 = dfd.copy()

In [2890]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12590 entries, 0 to 12589
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             12590 non-null  object        
 1   digitalLoanAccountId   12590 non-null  object        
 2   loanAccountNumber      12590 non-null  object        
 3   aCicScore              12590 non-null  object        
 4   trenchCategory         12590 non-null  object        
 5   appln_submit_datetime  12590 non-null  datetime64[us]
 6   disbursementdate       12590 non-null  dbdate        
 7   Application_month      12590 non-null  object        
 8   Data_selection         12590 non-null  object        
 9   deffpd10               12590 non-null  Int64         
 10  flg_mature_fpd10       12590 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(7)
memory usage: 1.1+ MB


In [2891]:
df_concat['aCicScore'] = pd.to_numeric(df_concat['aCicScore'], errors='coerce')

In [2892]:
df_concat.to_csv(r"aCicScoretrench2fpd10.csv")

In [2893]:
gini_results = calculate_periodic_gini(df_concat, 'aCicScore', 'deffpd10', 'FPD10')

In [2894]:
f1 = gini_results.copy()
f1.head()

Unnamed: 0,start_date,end_date,aCicScore_FPD10_gini,period,Model_Name,version,bad_rate
0,2024-08-26,2024-09-01,0.495652,Week,aCicScore,1.1.0,FPD10
1,2024-09-01,2024-09-30,0.291819,Month,aCicScore,1.1.0,FPD10
2,2024-09-02,2024-09-08,0.298729,Week,aCicScore,1.1.0,FPD10
3,2024-09-09,2024-09-15,0.277186,Week,aCicScore,1.1.0,FPD10
4,2024-09-16,2024-09-22,0.284906,Week,aCicScore,1.1.0,FPD10


## FPD30

## Test

In [2895]:
sq = r"""
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
--REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aCicScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30


In [2896]:
df1 = dfd.copy()

## Train

In [2897]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aCicScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30
0,2739756,3c66ef49-c219-4dcf-a193-e9c05015e291,60827397560016,0.442862,Trench 2,2024-09-22 10:21:41,2024-09-22,2024-09,Train,0,1
1,2054782,d86156e4-785d-440f-b647-7091229fc580,60820547820016,0.468271,Trench 2,2024-09-20 02:46:18,2024-09-21,2024-09,Train,0,1
2,1086498,c8ea823c-bbcb-457e-a9ec-fbe67a8ac0bb,60810864980011,0.467182,Trench 2,2024-09-06 18:29:34,2024-09-11,2024-09,Train,0,1
3,2544544,f8d5a87c-6108-4fa8-b26f-ee1da954ee92,60825445440019,0.456046,Trench 2,2024-09-15 12:22:54,2024-09-16,2024-09,Train,0,1
4,2841638,0894c7d1-c707-4e20-80a4-51f209fe276e,60828416380011,0.303532,Trench 2,2024-09-13 10:14:10,2024-09-13,2024-09,Train,0,1


In [2898]:
df2 = dfd.copy()

In [2899]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12245 entries, 0 to 12244
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             12245 non-null  Int64         
 1   digitalLoanAccountId   12245 non-null  object        
 2   loanAccountNumber      12245 non-null  object        
 3   aCicScore              12245 non-null  float64       
 4   trenchCategory         12245 non-null  object        
 5   appln_submit_datetime  12245 non-null  datetime64[us]
 6   disbursementdate       12245 non-null  dbdate        
 7   Application_month      12245 non-null  object        
 8   Data_selection         12245 non-null  object        
 9   deffpd30               12245 non-null  Int64         
 10  flg_mature_fpd30       12245 non-null  Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 1.1+ MB


  df_concat = pd.concat([df2, df1], ignore_index=True)


In [2900]:
df_concat['aCicScore'] = pd.to_numeric(df_concat['aCicScore'], errors='coerce')

In [2901]:
df_concat.to_csv(r"aCicScoretrench2fpd30.csv")

In [2902]:
gini_results = calculate_periodic_gini(df_concat, 'aCicScore', 'deffpd30', 'FPD30')

In [2903]:
f2 = gini_results.copy()
f2.head()

Unnamed: 0,start_date,end_date,aCicScore_FPD30_gini,period,Model_Name,version,bad_rate
0,2024-08-26,2024-09-01,0.495652,Week,aCicScore,1.1.0,FPD30
1,2024-09-01,2024-09-30,0.318568,Month,aCicScore,1.1.0,FPD30
2,2024-09-02,2024-09-08,0.291139,Week,aCicScore,1.1.0,FPD30
3,2024-09-09,2024-09-15,0.285386,Week,aCicScore,1.1.0,FPD30
4,2024-09-16,2024-09-22,0.317084,Week,aCicScore,1.1.0,FPD30


## FSPD30

## Test

In [2904]:
sq = r"""
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
--REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aCicScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fpd30


In [2905]:
df1 = dfd.copy()

## Train

In [2906]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aCicScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30
0,2174562,172210a1-7a29-46ce-b466-9fecdafa8840,60821745620046,0.482082,Trench 2,2024-09-16 09:42:06,2024-09-16,2024-09,Train,1,1
1,2432622,caabceab-620c-4d53-a6da-9d48bf69cd37,60824326220017,0.621387,Trench 2,2024-09-05 06:04:06,2024-09-05,2024-09,Train,1,1
2,2000798,fa26359b-6dc8-4954-8a93-769ed7fa7ae6,60820007980013,0.535052,Trench 2,2024-09-07 16:07:29,2024-09-07,2024-09,Train,0,1
3,2582544,2bb16b5b-c96b-4d4f-8dc0-feb06cd8bfa0,60825825440011,0.412163,Trench 2,2024-09-29 18:59:10,2024-09-29,2024-09,Train,0,1
4,1775031,53eea465-4209-4930-b1db-23a5d7bc74bb,60817750310013,0.47386,Trench 2,2024-09-14 17:33:48,2024-09-15,2024-09,Train,0,1


In [2907]:
df2 = dfd.copy()

In [2908]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11567 entries, 0 to 11566
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             11567 non-null  Int64         
 1   digitalLoanAccountId   11567 non-null  object        
 2   loanAccountNumber      11567 non-null  object        
 3   aCicScore              11567 non-null  float64       
 4   trenchCategory         11567 non-null  object        
 5   appln_submit_datetime  11567 non-null  datetime64[us]
 6   disbursementdate       11567 non-null  dbdate        
 7   Application_month      11567 non-null  object        
 8   Data_selection         11567 non-null  object        
 9   deffspd30              11567 non-null  Int64         
 10  flg_mature_fspd_30     11567 non-null  Int64         
 11  flg_mature_fpd30       0 non-null      object        
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), objec

  df_concat = pd.concat([df2, df1], ignore_index=True)


In [2909]:
df_concat['aCicScore'] = pd.to_numeric(df_concat['aCicScore'], errors='coerce')

In [2910]:
df_concat.to_csv(r"aCicScoretrench2fspd30.csv")

In [2911]:
gini_results = calculate_periodic_gini(df_concat, 'aCicScore', 'deffspd30', 'FSPD30')

In [2912]:
f3 = gini_results.copy()
f3.head()

Unnamed: 0,start_date,end_date,aCicScore_FSPD30_gini,period,Model_Name,version,bad_rate
0,2024-08-26,2024-09-01,0.636364,Week,aCicScore,1.1.0,FSPD30
1,2024-09-01,2024-09-30,0.330574,Month,aCicScore,1.1.0,FSPD30
2,2024-09-02,2024-09-08,0.34218,Week,aCicScore,1.1.0,FSPD30
3,2024-09-09,2024-09-15,0.325465,Week,aCicScore,1.1.0,FSPD30
4,2024-09-16,2024-09-22,0.320003,Week,aCicScore,1.1.0,FSPD30


## FSTPD30

## Test

In [2913]:
sq = r"""
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
--REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aCicScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30


In [2914]:
df1 = dfd.copy()

## Train

In [2915]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aCicScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30
0,2073189,90dacded-6757-48a3-9d9b-58f85efdaa1e,60820731890011,0.636984,Trench 2,2024-09-11 11:00:36,2024-09-11,2024-09,Train,1,1
1,2548361,6ebbafc0-0996-4f76-bf7b-6862e63279a5,60825483610011,0.683903,Trench 2,2024-09-16 17:58:35,2024-09-17,2024-09,Train,1,1
2,1878709,8f155e49-c842-4c3b-aa40-49aea8a2b93d,60818787090014,0.619431,Trench 2,2024-09-09 02:31:48,2024-09-09,2024-09,Train,1,1
3,2572970,3c967dca-8a11-4668-bb7b-a4805a84ef21,60825729700011,0.449146,Trench 2,2024-09-25 06:22:43,2024-09-25,2024-09,Train,0,1
4,2491029,27742d70-9912-4f49-b74c-009f9e775062,60824910290012,0.476187,Trench 2,2024-09-23 23:57:56,2024-09-25,2024-09,Train,0,1


In [2916]:
df2 = dfd.copy()

In [2917]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10832 entries, 0 to 10831
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             10832 non-null  Int64         
 1   digitalLoanAccountId   10832 non-null  object        
 2   loanAccountNumber      10832 non-null  object        
 3   aCicScore              10832 non-null  float64       
 4   trenchCategory         10832 non-null  object        
 5   appln_submit_datetime  10832 non-null  datetime64[us]
 6   disbursementdate       10832 non-null  dbdate        
 7   Application_month      10832 non-null  object        
 8   Data_selection         10832 non-null  object        
 9   deffstpd30             10832 non-null  Int64         
 10  flg_mature_fstpd_30    10832 non-null  Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 962.7+ KB


  df_concat = pd.concat([df2, df1], ignore_index=True)


In [2918]:
df_concat['aCicScore'] = pd.to_numeric(df_concat['aCicScore'], errors='coerce')

In [2919]:
df_concat.to_csv(r"aCicScoretrench2fstpd30.csv")

In [2920]:
gini_results = calculate_periodic_gini(df_concat, 'aCicScore', 'deffstpd30', 'FSTPD30')

In [2921]:
f4 = gini_results.copy()
f4.head()

Unnamed: 0,start_date,end_date,aCicScore_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2024-08-26,2024-09-01,0.426901,Week,aCicScore,1.1.0,FSTPD30
1,2024-09-01,2024-09-30,0.291161,Month,aCicScore,1.1.0,FSTPD30
2,2024-09-02,2024-09-08,0.308345,Week,aCicScore,1.1.0,FSTPD30
3,2024-09-09,2024-09-15,0.262134,Week,aCicScore,1.1.0,FSTPD30
4,2024-09-16,2024-09-22,0.30391,Week,aCicScore,1.1.0,FSTPD30


## combining the dataframe

In [2922]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'aCicScore_FPD0_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'aCicScore_FPD10_gini',
       'aCicScore_FPD30_gini', 'aCicScore_FSPD30_gini',
       'aCicScore_FSTPD30_gini'], dtype=object)

In [2923]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','aCicScore_FPD0_gini','aCicScore_FPD10_gini',
       'aCicScore_FPD30_gini', 'aCicScore_FSPD30_gini',
       'aCicScore_FSTPD30_gini']].copy()
final_df.rename(columns={'aCicScore_FPD0_gini':'aCicScore_t2_FPD0_gini', 'aCicScore_FPD10_gini':'aCicScore_t2_FPD10_gini', 'aCicScore_FPD30_gini':'aCicScore_t2_FPD30_gini', 'aCicScore_FSPD30_gini':'aCicScore_t2_FSPD30_gini'
                        , 'aCicScore_FSPD30_gini':'aCicScore_t2_FSPD30_gini', 'aCicScore_FSTPD30_gini':'aCicScore_t2_FSTPD30_gini'
                        }, inplace = True)
final_df['Trench_category'] = 'Trench 2'
final_df['Model_display_name'] = 'cic_model_cash'
final_df['Product_type'] = 'CASH'
final_df.dtypes

start_date                   datetime64[ns]
end_date                     datetime64[ns]
period                               object
Model_Name                           object
version                              object
bad_rate                             object
aCicScore_t2_FPD0_gini              float64
aCicScore_t2_FPD10_gini             float64
aCicScore_t2_FPD30_gini             float64
aCicScore_t2_FSPD30_gini            float64
aCicScore_t2_FSTPD30_gini           float64
Trench_category                      object
Model_display_name                   object
Product_type                         object
dtype: object

In [2924]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,aCicScore_t2_FPD0_gini,aCicScore_t2_FPD10_gini,aCicScore_t2_FPD30_gini,aCicScore_t2_FSPD30_gini,aCicScore_t2_FSTPD30_gini,Trench_category,Model_display_name,Product_type
0,2024-08-26,2024-09-01,Week,aCicScore,1.1.0,FPD0,0.495652,,,,,Trench 2,cic_model_cash,CASH
1,2024-09-01,2024-09-30,Month,aCicScore,1.1.0,FPD0,0.29075,,,,,Trench 2,cic_model_cash,CASH
2,2024-09-02,2024-09-08,Week,aCicScore,1.1.0,FPD0,0.285824,,,,,Trench 2,cic_model_cash,CASH
3,2024-09-09,2024-09-15,Week,aCicScore,1.1.0,FPD0,0.301728,,,,,Trench 2,cic_model_cash,CASH
4,2024-09-16,2024-09-22,Week,aCicScore,1.1.0,FPD0,0.224814,,,,,Trench 2,cic_model_cash,CASH


In [2925]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.alpha_cash_cic_model_t2_v1_gini4"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=74c424d3-5333-47f9-b85f-370a1f210ae9>

## Trench 3

## FPD0

## Test

In [2926]:
sq = r"""
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
--REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aCicScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,2161307,b09215e1-3284-479d-8f99-349b292fcd61,60821613070021,0.3702853189649125,Trench 3,2025-10-06 09:25:01,2025-10-06,2025-10,Test,0,1
1,2832430,89aa711e-cc9b-466e-a14f-7b1073fa1ce2,60828324300031,0.461063857225889,Trench 3,2025-10-06 13:24:58,2025-10-06,2025-10,Test,1,1
2,1113618,e978009d-0db1-40cf-a258-ba4277fd9acb,60811136180036,0.4246102939895185,Trench 3,2025-10-18 09:34:24,2025-10-18,2025-10,Test,0,1
3,3243707,9d48ef88-5f60-43a4-ab56-55b31fb775a0,60832437070023,0.4158824725973302,Trench 3,2025-10-18 10:22:38,2025-10-18,2025-10,Test,0,1
4,3363138,3573ac36-c975-47b5-9466-4c6e7fa89a63,60833631380028,0.3546283247780397,Trench 3,2025-10-18 12:48:54,2025-10-18,2025-10,Test,0,1


In [2927]:
df1 = dfd.copy()

## Train

In [2928]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aCicScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,1932656,862fee44-1e47-451f-8f77-d236e4685484,60819326560036,0.629037,Trench 3,2024-09-14 00:23:16,2024-09-24,2024-09,Train,1,1
1,2181345,289bf6da-7c74-432d-b959-1e7eb3e0fedc,60821813450034,0.458429,Trench 3,2024-09-14 10:08:55,2024-09-14,2024-09,Train,1,1
2,1288053,0a13518e-2ac3-48fc-a7a8-760789f79c3e,60812880530032,0.333842,Trench 3,2024-09-18 10:33:24,2024-09-18,2024-09,Train,0,1
3,2179535,78a51e8a-a7f2-4ef3-8d20-d5229994deea,60821795350035,0.329342,Trench 3,2024-09-27 11:16:09,2024-09-27,2024-09,Train,0,1
4,2276151,913b6a73-24c5-4950-a53a-dbbad74dc709,60822761510037,0.338641,Trench 3,2024-09-28 11:47:35,2024-09-28,2024-09,Train,0,1


In [2929]:
df2 = dfd.copy()

In [2930]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11586 entries, 0 to 11585
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             11586 non-null  object        
 1   digitalLoanAccountId   11586 non-null  object        
 2   loanAccountNumber      11586 non-null  object        
 3   aCicScore              11586 non-null  object        
 4   trenchCategory         11586 non-null  object        
 5   appln_submit_datetime  11586 non-null  datetime64[us]
 6   disbursementdate       11586 non-null  dbdate        
 7   Application_month      11586 non-null  object        
 8   Data_selection         11586 non-null  object        
 9   deffpd0                11586 non-null  Int64         
 10  flg_mature_fpd0        11586 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(7)
memory usage: 1018.4+ KB


In [2931]:
df_concat['aCicScore'] = pd.to_numeric(df_concat['aCicScore'], errors='coerce')

In [2932]:
df_concat.to_csv(r"aCicScoretrench3fpd0.csv")

In [2933]:
gini_results = calculate_periodic_gini(df_concat, 'aCicScore', 'deffpd0', 'FPD0')

In [2934]:
f0 = gini_results.copy()
f0.head()

Unnamed: 0,start_date,end_date,aCicScore_FPD0_gini,period,Model_Name,version,bad_rate
0,2024-08-26,2024-09-01,0.214286,Week,aCicScore,1.1.0,FPD0
1,2024-09-01,2024-09-30,0.168616,Month,aCicScore,1.1.0,FPD0
2,2024-09-02,2024-09-08,0.256825,Week,aCicScore,1.1.0,FPD0
3,2024-09-09,2024-09-15,0.14,Week,aCicScore,1.1.0,FPD0
4,2024-09-16,2024-09-22,0.108178,Week,aCicScore,1.1.0,FPD0


## FPD10

## Test

In [2935]:
sq = r"""
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
--REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aCicScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,2826643,3f344a2a-fa6f-4916-8d53-b8aa6789925e,60828266430033,0.360487689665674,Trench 3,2025-10-14 11:43:37,2025-10-14,2025-10,Test,1,1
1,3043380,2c9602f2-2603-42e6-be02-610a2cb5cf96,60830433800036,0.3282533282345162,Trench 3,2025-10-14 09:57:25,2025-10-14,2025-10,Test,0,1
2,2779937,e9655051-8a0d-48b7-b6eb-a20b4bc17faa,60827799370035,0.5712002157704452,Trench 3,2025-10-14 16:56:02,2025-10-14,2025-10,Test,0,1
3,2954891,6b45935e-6986-4c7c-aa82-cf10f5d233c3,60829548910022,0.3833760540830279,Trench 3,2025-10-14 16:38:09,2025-10-15,2025-10,Test,0,1
4,3344057,005baedb-6523-449e-a360-dbd9dbe68a48,60833440570022,0.5317976483199327,Trench 3,2025-10-14 13:43:28,2025-10-14,2025-10,Test,0,1


In [2936]:
df1 = dfd.copy()

## Train

In [2937]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aCicScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,2493993,b7b4632c-273b-4820-bbf3-6afcbe4750a1,60824939930023,0.352975,Trench 3,2024-09-06 13:31:57,2024-09-06,2024-09,Train,0,1
1,2150055,8f5bfa0d-2e79-485d-8336-2fcf97e8fcdc,60821500550062,0.419928,Trench 3,2024-09-05 17:53:47,2024-09-05,2024-09,Train,0,1
2,2076548,5491ec31-cd00-4cc0-895e-0f70f1eae0f6,60820765480041,0.535943,Trench 3,2024-09-08 11:47:01,2024-09-08,2024-09,Train,0,1
3,2149653,99675aee-145c-4a64-a95e-8b5c1e934782,60821496530035,0.331523,Trench 3,2024-09-05 01:32:09,2024-09-05,2024-09,Train,0,1
4,2377053,3cf143d6-16a7-431c-8a19-f2f9d9f86f15,60823770530028,0.465467,Trench 3,2024-09-06 22:27:33,2024-09-09,2024-09,Train,0,1


In [2938]:
df2 = dfd.copy()

In [2939]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11418 entries, 0 to 11417
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             11418 non-null  object        
 1   digitalLoanAccountId   11418 non-null  object        
 2   loanAccountNumber      11418 non-null  object        
 3   aCicScore              11418 non-null  object        
 4   trenchCategory         11418 non-null  object        
 5   appln_submit_datetime  11418 non-null  datetime64[us]
 6   disbursementdate       11418 non-null  dbdate        
 7   Application_month      11418 non-null  object        
 8   Data_selection         11418 non-null  object        
 9   deffpd10               11418 non-null  Int64         
 10  flg_mature_fpd10       11418 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(7)
memory usage: 1003.7+ KB


In [2940]:
df_concat['aCicScore'] = pd.to_numeric(df_concat['aCicScore'], errors='coerce')

In [2941]:
df_concat.to_csv(r"aCicScoretrench3fpd10.csv")

In [2942]:
gini_results = calculate_periodic_gini(df_concat, 'aCicScore', 'deffpd10', 'FPD10')

In [2943]:
f1 = gini_results.copy()
f1.head()

Unnamed: 0,start_date,end_date,aCicScore_FPD10_gini,period,Model_Name,version,bad_rate
0,2024-08-26,2024-09-01,0.055556,Week,aCicScore,1.1.0,FPD10
1,2024-09-01,2024-09-30,0.191107,Month,aCicScore,1.1.0,FPD10
2,2024-09-02,2024-09-08,0.098118,Week,aCicScore,1.1.0,FPD10
3,2024-09-09,2024-09-15,0.497942,Week,aCicScore,1.1.0,FPD10
4,2024-09-16,2024-09-22,0.09188,Week,aCicScore,1.1.0,FPD10


## FPD30

## Test

In [2944]:
sq = r"""
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
--REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aCicScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30


In [2945]:
df1 = dfd.copy()

## Train

In [2946]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aCicScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30
0,1422398,34f14bd9-6aad-4929-8fd3-aaa60d9df8dd,60814223980107,0.376647,Trench 3,2024-09-04 10:06:06,2024-09-04,2024-09,Train,1,1
1,2660295,c016cfca-d09b-4231-a935-7e308beac9c4,60826602950029,0.45315,Trench 3,2024-09-03 17:20:35,2024-09-29,2024-09,Train,0,1
2,2602032,add2b3ed-b32d-435e-8a26-c80d517c6c2f,60826020320051,0.350905,Trench 3,2024-09-13 19:07:49,2024-09-13,2024-09,Train,0,1
3,2271217,ce5fb2b3-00a1-42bf-a173-c9c63d0d07ff,60822712170021,0.327088,Trench 3,2024-09-15 18:22:43,2024-09-15,2024-09,Train,0,1
4,2310005,0971ed41-7412-4b28-81aa-ffbea7a73388,60823100050022,0.396233,Trench 3,2024-09-23 14:21:23,2024-10-02,2024-09,Train,0,1


In [2947]:
df2 = dfd.copy()

In [2948]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10946 entries, 0 to 10945
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             10946 non-null  Int64         
 1   digitalLoanAccountId   10946 non-null  object        
 2   loanAccountNumber      10946 non-null  object        
 3   aCicScore              10946 non-null  float64       
 4   trenchCategory         10946 non-null  object        
 5   appln_submit_datetime  10946 non-null  datetime64[us]
 6   disbursementdate       10946 non-null  dbdate        
 7   Application_month      10946 non-null  object        
 8   Data_selection         10946 non-null  object        
 9   deffpd30               10946 non-null  Int64         
 10  flg_mature_fpd30       10946 non-null  Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 972.9+ KB


  df_concat = pd.concat([df2, df1], ignore_index=True)


In [2949]:
df_concat['aCicScore'] = pd.to_numeric(df_concat['aCicScore'], errors='coerce')

In [2950]:
df_concat.to_csv(r"aCicScoretrench3fpd30.csv")

In [2951]:
gini_results = calculate_periodic_gini(df_concat, 'aCicScore', 'deffpd30', 'FPD30')

In [2952]:
f2 = gini_results.copy()
f2.head()

Unnamed: 0,start_date,end_date,aCicScore_FPD30_gini,period,Model_Name,version,bad_rate
0,2024-08-26,2024-09-01,-0.230769,Week,aCicScore,1.1.0,FPD30
1,2024-09-01,2024-09-30,0.210121,Month,aCicScore,1.1.0,FPD30
2,2024-09-02,2024-09-08,0.168794,Week,aCicScore,1.1.0,FPD30
3,2024-09-09,2024-09-15,0.332143,Week,aCicScore,1.1.0,FPD30
4,2024-09-16,2024-09-22,0.455556,Week,aCicScore,1.1.0,FPD30


## FSPD30

## Test

In [2953]:
sq = r"""
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
--REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aCicScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fpd30


In [2954]:
df1 = dfd.copy()

## Train

In [2955]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aCicScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30
0,2346240,0b22f2d0-4f30-4c89-a1c5-d85346de8394,60823462400021,0.419928,Trench 3,2024-09-07 19:50:17,2024-09-07,2024-09,Train,0,1
1,2819404,99e3b101-48db-447e-aa71-90c8e87a09b5,60828194040029,0.461064,Trench 3,2024-09-13 17:18:16,2024-09-13,2024-09,Train,0,1
2,2829427,e3c02934-5e1b-411f-a70f-332908a3912c,60828294270021,0.413554,Trench 3,2024-09-23 04:28:57,2024-09-23,2024-09,Train,0,1
3,2055044,320d31e3-f4c6-445f-9f56-ef3d467a8808,60820550440021,0.486799,Trench 3,2024-09-01 07:28:21,2024-09-01,2024-09,Train,0,1
4,2329906,75ef0d69-574a-4866-9f68-09a3fc9a8f6e,60823299060039,0.436756,Trench 3,2024-09-28 13:44:07,2024-09-28,2024-09,Train,0,1


In [2956]:
df2 = dfd.copy()

In [2957]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9755 entries, 0 to 9754
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             9755 non-null   Int64         
 1   digitalLoanAccountId   9755 non-null   object        
 2   loanAccountNumber      9755 non-null   object        
 3   aCicScore              9755 non-null   float64       
 4   trenchCategory         9755 non-null   object        
 5   appln_submit_datetime  9755 non-null   datetime64[us]
 6   disbursementdate       9755 non-null   dbdate        
 7   Application_month      9755 non-null   object        
 8   Data_selection         9755 non-null   object        
 9   deffspd30              9755 non-null   Int64         
 10  flg_mature_fspd_30     9755 non-null   Int64         
 11  flg_mature_fpd30       0 non-null      object        
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(

  df_concat = pd.concat([df2, df1], ignore_index=True)


In [2958]:
df_concat['aCicScore'] = pd.to_numeric(df_concat['aCicScore'], errors='coerce')

In [2959]:
df_concat.to_csv(r"aCicScoretrench2fspd30.csv")

In [2960]:
gini_results = calculate_periodic_gini(df_concat, 'aCicScore', 'deffspd30', 'FSPD30')

In [2961]:
f3 = gini_results.copy()
f3.head()

Unnamed: 0,start_date,end_date,aCicScore_FSPD30_gini,period,Model_Name,version,bad_rate
0,2024-08-26,2024-09-01,-0.230769,Week,aCicScore,1.1.0,FSPD30
1,2024-09-01,2024-09-30,0.276687,Month,aCicScore,1.1.0,FSPD30
2,2024-09-02,2024-09-08,0.234417,Week,aCicScore,1.1.0,FSPD30
3,2024-09-09,2024-09-15,0.499065,Week,aCicScore,1.1.0,FSPD30
4,2024-09-16,2024-09-22,0.316923,Week,aCicScore,1.1.0,FSPD30


## FSTPD30

## Test

In [2962]:
sq = r"""
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
--REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aCicScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30


In [2963]:
df1 = dfd.copy()

## Train

In [2964]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aCicScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30
0,1422398,34f14bd9-6aad-4929-8fd3-aaa60d9df8dd,60814223980107,0.376647,Trench 3,2024-09-04 10:06:06,2024-09-04,2024-09,Train,1,1
1,2660295,c016cfca-d09b-4231-a935-7e308beac9c4,60826602950029,0.45315,Trench 3,2024-09-03 17:20:35,2024-09-29,2024-09,Train,0,1
2,2602032,add2b3ed-b32d-435e-8a26-c80d517c6c2f,60826020320051,0.350905,Trench 3,2024-09-13 19:07:49,2024-09-13,2024-09,Train,0,1
3,2271217,ce5fb2b3-00a1-42bf-a173-c9c63d0d07ff,60822712170021,0.327088,Trench 3,2024-09-15 18:22:43,2024-09-15,2024-09,Train,0,1
4,2310005,0971ed41-7412-4b28-81aa-ffbea7a73388,60823100050022,0.396233,Trench 3,2024-09-23 14:21:23,2024-10-02,2024-09,Train,1,1


In [2965]:
df2 = dfd.copy()

In [2966]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8521 entries, 0 to 8520
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             8521 non-null   Int64         
 1   digitalLoanAccountId   8521 non-null   object        
 2   loanAccountNumber      8521 non-null   object        
 3   aCicScore              8521 non-null   float64       
 4   trenchCategory         8521 non-null   object        
 5   appln_submit_datetime  8521 non-null   datetime64[us]
 6   disbursementdate       8521 non-null   dbdate        
 7   Application_month      8521 non-null   object        
 8   Data_selection         8521 non-null   object        
 9   deffstpd30             8521 non-null   Int64         
 10  flg_mature_fstpd_30    8521 non-null   Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 757.4+ KB


  df_concat = pd.concat([df2, df1], ignore_index=True)


In [2967]:
df_concat['aCicScore'] = pd.to_numeric(df_concat['aCicScore'], errors='coerce')

In [2968]:
df_concat.to_csv(r"aCicScoretrench3fstpd30.csv")

In [2969]:
gini_results = calculate_periodic_gini(df_concat, 'aCicScore', 'deffstpd30', 'FSTPD30')

In [2970]:
f4 = gini_results.copy()
f4.head()

Unnamed: 0,start_date,end_date,aCicScore_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2024-08-26,2024-09-01,-0.272727,Week,aCicScore,1.1.0,FSTPD30
1,2024-09-01,2024-09-30,0.283827,Month,aCicScore,1.1.0,FSTPD30
2,2024-09-02,2024-09-08,0.234417,Week,aCicScore,1.1.0,FSTPD30
3,2024-09-09,2024-09-15,0.495425,Week,aCicScore,1.1.0,FSTPD30
4,2024-09-16,2024-09-22,0.307951,Week,aCicScore,1.1.0,FSTPD30


## combining the dataframe

In [2971]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'aCicScore_FPD0_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'aCicScore_FPD10_gini',
       'aCicScore_FPD30_gini', 'aCicScore_FSPD30_gini',
       'aCicScore_FSTPD30_gini'], dtype=object)

In [2972]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','aCicScore_FPD0_gini','aCicScore_FPD10_gini',
       'aCicScore_FPD30_gini', 'aCicScore_FSPD30_gini',
       'aCicScore_FSTPD30_gini']].copy()
final_df.rename(columns={'aCicScore_FPD0_gini':'aCicScore_t3_FPD0_gini', 'aCicScore_FPD10_gini':'aCicScore_t3_FPD10_gini', 'aCicScore_FPD30_gini':'aCicScore_t3_FPD30_gini', 'aCicScore_FSPD30_gini':'aCicScore_t3_FSPD30_gini'
                        , 'aCicScore_FSPD30_gini':'aCicScore_t3_FSPD30_gini', 'aCicScore_FSTPD30_gini':'aCicScore_t3_FSTPD30_gini'
                        }, inplace = True)
final_df['Trench_category'] = 'Trench 3'
final_df['Model_display_name'] = 'cic_model_cash'
final_df['Product_type'] = 'CASH'
final_df.dtypes

start_date                   datetime64[ns]
end_date                     datetime64[ns]
period                               object
Model_Name                           object
version                              object
bad_rate                             object
aCicScore_t3_FPD0_gini              float64
aCicScore_t3_FPD10_gini             float64
aCicScore_t3_FPD30_gini             float64
aCicScore_t3_FSPD30_gini            float64
aCicScore_t3_FSTPD30_gini           float64
Trench_category                      object
Model_display_name                   object
Product_type                         object
dtype: object

In [2973]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,aCicScore_t3_FPD0_gini,aCicScore_t3_FPD10_gini,aCicScore_t3_FPD30_gini,aCicScore_t3_FSPD30_gini,aCicScore_t3_FSTPD30_gini,Trench_category,Model_display_name,Product_type
0,2024-08-26,2024-09-01,Week,aCicScore,1.1.0,FPD0,0.214286,,,,,Trench 3,cic_model_cash,CASH
1,2024-09-01,2024-09-30,Month,aCicScore,1.1.0,FPD0,0.168616,,,,,Trench 3,cic_model_cash,CASH
2,2024-09-02,2024-09-08,Week,aCicScore,1.1.0,FPD0,0.256825,,,,,Trench 3,cic_model_cash,CASH
3,2024-09-09,2024-09-15,Week,aCicScore,1.1.0,FPD0,0.14,,,,,Trench 3,cic_model_cash,CASH
4,2024-09-16,2024-09-22,Week,aCicScore,1.1.0,FPD0,0.108178,,,,,Trench 3,cic_model_cash,CASH


In [2974]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.alpha_cash_cic_model_t3_v1_gini4"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=e0659dd4-d885-4ed1-96ec-3a726dcb3a80>

# Alpha-Cash-Stack-Model

# Trench 1

## FPD0

## Test

In [2975]:
sq = r""" 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aStackScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,3716231,64b86ec3-a037-4e33-bf27-c0f2603ab1b8,60837162310017,0.4126319703234993,Trench 1,2025-10-01 10:49:31,2025-10-01,2025-10,Test,0,1
1,3716782,1616f4ec-1811-4744-8ec4-9ec17cf862bb,60837167820013,0.4397932307036851,Trench 1,2025-10-01 14:33:37,2025-10-06,2025-10,Test,0,1
2,3716699,ebf77a21-f66f-4aad-a017-50f61cf5ff41,60837166990019,0.3100514426375333,Trench 1,2025-10-01 15:09:43,2025-10-01,2025-10,Test,0,1
3,3712217,6b85a5fb-2b6a-4b71-afde-838f71e3f3b2,60837122170013,0.4176009434556032,Trench 1,2025-10-01 19:16:49,2025-10-03,2025-10,Test,0,1
4,3758379,bb8b9304-72fd-409f-9707-348b90d4a571,60837583790014,0.3818239084103975,Trench 1,2025-10-21 12:18:01,2025-10-21,2025-10,Test,0,1


In [2976]:
df1 = dfd.copy()

## Train

In [2977]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aStackScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,2934979,f2aba3fb-56d9-4289-9fba-bd84644e5368,60829349790011,0.652905,Trench 1,2024-10-13 00:42:40,2024-10-13,2024-10,Train,1,1
1,2919780,c47cabfe-b223-4b2c-9ef9-44b67dcc9e57,60829197800018,0.361813,Trench 1,2024-10-08 13:21:53,2024-10-08,2024-10,Train,0,1
2,2961271,e65874ab-4389-4a06-a295-eea107f4273e,60829612710015,0.640347,Trench 1,2024-10-25 10:38:19,2024-10-26,2024-10,Train,0,1
3,2974608,6952c236-f5b5-4620-9825-e044ebe00aaf,60829746080017,0.458123,Trench 1,2024-10-26 16:42:48,2024-10-26,2024-10,Train,0,1
4,2951243,4814ce5c-62b5-41bb-afd1-f7096da3a424,60829512430016,0.454181,Trench 1,2024-10-18 12:48:22,2024-10-18,2024-10,Train,0,1


In [2978]:
df2 = dfd.copy()

In [2979]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16046 entries, 0 to 16045
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             16046 non-null  object        
 1   digitalLoanAccountId   16046 non-null  object        
 2   loanAccountNumber      16046 non-null  object        
 3   aStackScore            16046 non-null  object        
 4   trenchCategory         16046 non-null  object        
 5   appln_submit_datetime  16046 non-null  datetime64[us]
 6   disbursementdate       16046 non-null  dbdate        
 7   Application_month      16046 non-null  object        
 8   Data_selection         16046 non-null  object        
 9   deffpd0                16046 non-null  Int64         
 10  flg_mature_fpd0        16046 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(7)
memory usage: 1.4+ MB


In [2980]:
df_concat['aStackScore'] = pd.to_numeric(df_concat['aStackScore'], errors='coerce')

In [2981]:
df_concat.to_csv(r"aStackScoretrench1fpd0.csv")

In [2982]:
gini_results = calculate_periodic_gini(df_concat, 'aStackScore', 'deffpd0', 'FPD0')

In [2983]:
f0 = gini_results.copy()
f0.head()

Unnamed: 0,start_date,end_date,aStackScore_FPD0_gini,period,Model_Name,version,bad_rate
0,2024-09-30,2024-10-06,0.492727,Week,aStackScore,1.1.0,FPD0
1,2024-10-01,2024-10-31,0.394791,Month,aStackScore,1.1.0,FPD0
2,2024-10-07,2024-10-13,0.379712,Week,aStackScore,1.1.0,FPD0
3,2024-10-14,2024-10-20,0.359015,Week,aStackScore,1.1.0,FPD0
4,2024-10-21,2024-10-27,0.380435,Week,aStackScore,1.1.0,FPD0


## FPD10

## Test

In [2984]:
sq = r""" 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aStackScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,3740667,1c070070-9d84-4ce2-8eaf-bcdae6860b8e,60837406670012,0.3298613143112987,Trench 1,2025-10-13 11:16:58,2025-10-13,2025-10,Test,0,1
1,3741204,826ee67b-5d56-4089-b33d-7233cf57970e,60837412040019,0.3787663675484058,Trench 1,2025-10-13 10:23:05,2025-10-15,2025-10,Test,0,1
2,3741237,44ea0607-e683-4ac7-b018-589199fae419,60837412370018,0.4175571350332116,Trench 1,2025-10-13 10:45:10,2025-10-13,2025-10,Test,0,1
3,3741241,ff51cbfb-c7fa-4f71-94c2-fe0ec0fcff9d,60837412410014,0.2564180796210573,Trench 1,2025-10-13 11:06:31,2025-10-13,2025-10,Test,0,1
4,3741291,f7b44415-cf59-4083-8e70-ea9d18731be8,60837412910014,0.2223321585325679,Trench 1,2025-10-13 11:05:11,2025-10-13,2025-10,Test,0,1


In [2985]:
df1 = dfd.copy()

## Train

In [2986]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aStackScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,2958080,7596f16b-6531-4e41-b591-35ec4059f945,60829580800013,0.98192,Trench 1,2024-10-29 15:07:37,2024-10-29,2024-10,Train,1,1
1,2943052,0510a9b7-ff0d-4e9e-a29a-d8fffe1f587d,60829430520014,0.407492,Trench 1,2024-10-15 17:39:40,2024-10-17,2024-10,Train,0,1
2,2974593,54cd05eb-7bd5-45b5-b244-926b33a68b01,60829745930017,0.653006,Trench 1,2024-10-26 20:31:16,2024-10-26,2024-10,Train,1,1
3,2942051,ce7ab08a-f507-40c7-8ab5-54d0bab8b715,60829420510019,0.305861,Trench 1,2024-10-17 16:05:58,2024-10-17,2024-10,Train,0,1
4,2918833,b0b5daee-b4b0-4232-9b74-c9a225a2792e,60829188330011,0.416347,Trench 1,2024-10-09 19:09:26,2024-10-09,2024-10,Train,0,1


In [2987]:
df2 = dfd.copy()

In [2988]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15523 entries, 0 to 15522
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             15523 non-null  object        
 1   digitalLoanAccountId   15523 non-null  object        
 2   loanAccountNumber      15523 non-null  object        
 3   aStackScore            15523 non-null  object        
 4   trenchCategory         15523 non-null  object        
 5   appln_submit_datetime  15523 non-null  datetime64[us]
 6   disbursementdate       15523 non-null  dbdate        
 7   Application_month      15523 non-null  object        
 8   Data_selection         15523 non-null  object        
 9   deffpd10               15523 non-null  Int64         
 10  flg_mature_fpd10       15523 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(7)
memory usage: 1.3+ MB


In [2989]:
df_concat['aStackScore'] = pd.to_numeric(df_concat['aStackScore'], errors='coerce')

In [2990]:
df_concat.to_csv(r"aStackScoretrench1fpd10.csv")

In [2991]:
gini_results = calculate_periodic_gini(df_concat, 'aStackScore', 'deffpd10', 'FPD10')

In [2992]:
f1 = gini_results.copy()
f1.head()

Unnamed: 0,start_date,end_date,aStackScore_FPD10_gini,period,Model_Name,version,bad_rate
0,2024-09-30,2024-10-06,0.544336,Week,aStackScore,1.1.0,FPD10
1,2024-10-01,2024-10-31,0.549608,Month,aStackScore,1.1.0,FPD10
2,2024-10-07,2024-10-13,0.585794,Week,aStackScore,1.1.0,FPD10
3,2024-10-14,2024-10-20,0.518239,Week,aStackScore,1.1.0,FPD10
4,2024-10-21,2024-10-27,0.562636,Week,aStackScore,1.1.0,FPD10


## FPD30

## Test

In [2993]:
sq = r""" 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aStackScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30


In [2994]:
df1 = dfd.copy()

## Train

In [2995]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aStackScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30
0,2988891,bf18ea40-5f40-4ef2-ac8e-16b4970dec53,60829888910015,0.410083,Trench 1,2024-10-31 23:47:10,2024-11-02,2024-10,Train,1,1
1,2985036,5d1f2272-e554-43fe-a214-8d0082f4d03c,60829850360012,0.798153,Trench 1,2024-10-30 15:34:22,2024-10-30,2024-10,Train,1,1
2,2959520,f8ec19cd-d0a3-4f63-aa98-49dbacf224e0,60829595200011,0.760854,Trench 1,2024-10-21 17:36:54,2024-10-21,2024-10,Train,1,1
3,2898664,ef2abf4c-c474-47c0-8326-2fb491d28372,60828986640015,0.550294,Trench 1,2024-10-17 20:07:21,2024-10-18,2024-10,Train,1,1
4,2923194,4833c1f5-d707-4647-be65-3c159c157a19,60829231940017,0.499907,Trench 1,2024-10-08 20:32:07,2024-10-08,2024-10,Train,0,1


In [2996]:
df2 = dfd.copy()

In [2997]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14772 entries, 0 to 14771
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             14772 non-null  Int64         
 1   digitalLoanAccountId   14772 non-null  object        
 2   loanAccountNumber      14772 non-null  object        
 3   aStackScore            14772 non-null  float64       
 4   trenchCategory         14772 non-null  object        
 5   appln_submit_datetime  14772 non-null  datetime64[us]
 6   disbursementdate       14772 non-null  dbdate        
 7   Application_month      14772 non-null  object        
 8   Data_selection         14772 non-null  object        
 9   deffpd30               14772 non-null  Int64         
 10  flg_mature_fpd30       14772 non-null  Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 1.3+ MB


  df_concat = pd.concat([df2, df1], ignore_index=True)


In [2998]:
df_concat['aStackScore'] = pd.to_numeric(df_concat['aStackScore'], errors='coerce')

In [2999]:
df_concat.to_csv(r"aStackScoretrench1fpd30.csv")

In [3000]:
gini_results = calculate_periodic_gini(df_concat, 'aStackScore', 'deffpd30', 'FPD30')

In [3001]:
f2 = gini_results.copy()
f2.head()

Unnamed: 0,start_date,end_date,aStackScore_FPD30_gini,period,Model_Name,version,bad_rate
0,2024-09-30,2024-10-06,0.609023,Week,aStackScore,1.1.0,FPD30
1,2024-10-01,2024-10-31,0.577049,Month,aStackScore,1.1.0,FPD30
2,2024-10-07,2024-10-13,0.592929,Week,aStackScore,1.1.0,FPD30
3,2024-10-14,2024-10-20,0.537136,Week,aStackScore,1.1.0,FPD30
4,2024-10-21,2024-10-27,0.582035,Week,aStackScore,1.1.0,FPD30


## FSPD30

## Test

In [3002]:
sq = r""" 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aStackScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30


In [3003]:
df1 = dfd.copy()

## Train

In [3004]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aStackScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30
0,2921407,d585838f-bb62-4d9b-ae80-2d87d8a9cf7f,60829214070013,0.071104,Trench 1,2024-10-08 11:12:15,2024-10-08,2024-10,Train,0,1
1,2963436,2cf708e0-4b78-4449-a761-54efe30e420c,60829634360016,0.601189,Trench 1,2024-10-22 11:53:05,2024-10-22,2024-10,Train,1,1
2,2913950,f3037a37-b289-410d-b86f-d041f0644dc9,60829139500014,0.601431,Trench 1,2024-10-05 22:40:49,2024-10-06,2024-10,Train,1,1
3,2963184,f5d26023-ad23-4ab6-ae0b-c24c3c7fd440,60829631840015,0.562391,Trench 1,2024-10-22 10:28:23,2024-10-22,2024-10,Train,1,1
4,2951331,37d636b5-0a4d-4d6f-801a-bbba383263c0,60829513310015,0.625926,Trench 1,2024-10-21 10:23:35,2024-10-21,2024-10,Train,0,1


In [3005]:
df2 = dfd.copy()

In [3006]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13599 entries, 0 to 13598
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             13599 non-null  Int64         
 1   digitalLoanAccountId   13599 non-null  object        
 2   loanAccountNumber      13599 non-null  object        
 3   aStackScore            13599 non-null  float64       
 4   trenchCategory         13599 non-null  object        
 5   appln_submit_datetime  13599 non-null  datetime64[us]
 6   disbursementdate       13599 non-null  dbdate        
 7   Application_month      13599 non-null  object        
 8   Data_selection         13599 non-null  object        
 9   deffspd30              13599 non-null  Int64         
 10  flg_mature_fspd_30     13599 non-null  Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 1.2+ MB


  df_concat = pd.concat([df2, df1], ignore_index=True)


In [3007]:
df_concat['aStackScore'] = pd.to_numeric(df_concat['aStackScore'], errors='coerce')

In [3008]:
df_concat.to_csv(r"aStackScoretrench1fspd30.csv")

In [3009]:
gini_results = calculate_periodic_gini(df_concat, 'aStackScore', 'deffspd30', 'FSPD30')

In [3010]:
f3 = gini_results.copy()
f3.head()

Unnamed: 0,start_date,end_date,aStackScore_FSPD30_gini,period,Model_Name,version,bad_rate
0,2024-09-30,2024-10-06,0.630674,Week,aStackScore,1.1.0,FSPD30
1,2024-10-01,2024-10-31,0.552296,Month,aStackScore,1.1.0,FSPD30
2,2024-10-07,2024-10-13,0.589437,Week,aStackScore,1.1.0,FSPD30
3,2024-10-14,2024-10-20,0.455286,Week,aStackScore,1.1.0,FSPD30
4,2024-10-21,2024-10-27,0.559849,Week,aStackScore,1.1.0,FSPD30


## FSTPD30

## Test

In [3011]:
sq = r""" 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aStackScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30


In [3012]:
df1 = dfd.copy()

## Train

In [3013]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aStackScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30
0,2967699,0b519934-9ee3-4a4c-9e41-093606d0a065,60829676990013,0.403368,Trench 1,2024-10-24 06:38:41,2024-10-24,2024-10,Train,1,1
1,2964324,68be00cc-ff3f-4213-91e3-b5487839a8ac,60829643240013,0.61982,Trench 1,2024-10-22 17:04:36,2024-10-22,2024-10,Train,0,1
2,2923705,eec18a49-ea1f-4ffa-8f93-887030833704,60829237050018,0.734507,Trench 1,2024-10-09 11:24:48,2024-10-22,2024-10,Train,1,1
3,2936454,a58b71b2-ce08-462c-9fec-3fd665c13f82,60829364540015,0.474313,Trench 1,2024-10-13 19:42:07,2024-10-13,2024-10,Train,0,1
4,2968783,22487db3-2893-4338-990a-fa0bb3c83aa4,60829687830012,0.467374,Trench 1,2024-10-24 13:33:39,2024-10-30,2024-10,Train,0,1


In [3014]:
df2 = dfd.copy()

In [3015]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12449 entries, 0 to 12448
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             12449 non-null  Int64         
 1   digitalLoanAccountId   12449 non-null  object        
 2   loanAccountNumber      12449 non-null  object        
 3   aStackScore            12449 non-null  float64       
 4   trenchCategory         12449 non-null  object        
 5   appln_submit_datetime  12449 non-null  datetime64[us]
 6   disbursementdate       12449 non-null  dbdate        
 7   Application_month      12449 non-null  object        
 8   Data_selection         12449 non-null  object        
 9   deffstpd30             12449 non-null  Int64         
 10  flg_mature_fstpd_30    12449 non-null  Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 1.1+ MB


  df_concat = pd.concat([df2, df1], ignore_index=True)


In [3016]:
df_concat['aStackScore'] = pd.to_numeric(df_concat['aStackScore'], errors='coerce')

In [3017]:
df_concat.to_csv(r"aStackScoretrench1fstpd30.csv")

In [3018]:
gini_results = calculate_periodic_gini(df_concat, 'aStackScore', 'deffstpd30', 'FSTPD30')

In [3019]:
f4 = gini_results.copy()
f4.head()

Unnamed: 0,start_date,end_date,aStackScore_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2024-09-30,2024-10-06,0.493301,Week,aStackScore,1.1.0,FSTPD30
1,2024-10-01,2024-10-31,0.491028,Month,aStackScore,1.1.0,FSTPD30
2,2024-10-07,2024-10-13,0.500463,Week,aStackScore,1.1.0,FSTPD30
3,2024-10-14,2024-10-20,0.414163,Week,aStackScore,1.1.0,FSTPD30
4,2024-10-21,2024-10-27,0.516269,Week,aStackScore,1.1.0,FSTPD30


## combining the dataframe

In [3020]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'aStackScore_FPD0_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'aStackScore_FPD10_gini',
       'aStackScore_FPD30_gini', 'aStackScore_FSPD30_gini',
       'aStackScore_FSTPD30_gini'], dtype=object)

In [3021]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','aStackScore_FPD0_gini','aStackScore_FPD10_gini',
       'aStackScore_FPD30_gini', 'aStackScore_FSPD30_gini',
       'aStackScore_FSTPD30_gini']].copy()
final_df.rename(columns={'aStackScore_FPD0_gini':'aStackScore_t1_FPD0_gini'
                         , 'aStackScore_FPD10_gini':'aStackScore_t1_FPD10_gini'
                         , 'aStackScore_FPD30_gini':'aStackScore_t1_FPD30_gini'
                         , 'aStackScore_FSPD30_gini':'aStackScore_t1_FSPD30_gini'
                        , 'aStackScore_FSTPD30_gini':'aStackScore_t1_FSTPD30_gini', 
                        }, inplace = True)
final_df['Trench_category'] = 'Trench 1'
final_df['Model_display_name'] = 'alpha_stack_model_cash'
final_df['Product_type'] = 'CASH'
final_df.dtypes

start_date                     datetime64[ns]
end_date                       datetime64[ns]
period                                 object
Model_Name                             object
version                                object
bad_rate                               object
aStackScore_t1_FPD0_gini              float64
aStackScore_t1_FPD10_gini             float64
aStackScore_t1_FPD30_gini             float64
aStackScore_t1_FSPD30_gini            float64
aStackScore_t1_FSTPD30_gini           float64
Trench_category                        object
Model_display_name                     object
Product_type                           object
dtype: object

In [3022]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,aStackScore_t1_FPD0_gini,aStackScore_t1_FPD10_gini,aStackScore_t1_FPD30_gini,aStackScore_t1_FSPD30_gini,aStackScore_t1_FSTPD30_gini,Trench_category,Model_display_name,Product_type
0,2024-09-30,2024-10-06,Week,aStackScore,1.1.0,FPD0,0.492727,,,,,Trench 1,alpha_stack_model_cash,CASH
1,2024-10-01,2024-10-31,Month,aStackScore,1.1.0,FPD0,0.394791,,,,,Trench 1,alpha_stack_model_cash,CASH
2,2024-10-07,2024-10-13,Week,aStackScore,1.1.0,FPD0,0.379712,,,,,Trench 1,alpha_stack_model_cash,CASH
3,2024-10-14,2024-10-20,Week,aStackScore,1.1.0,FPD0,0.359015,,,,,Trench 1,alpha_stack_model_cash,CASH
4,2024-10-21,2024-10-27,Week,aStackScore,1.1.0,FPD0,0.380435,,,,,Trench 1,alpha_stack_model_cash,CASH


In [3023]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.alpha_cash_stack_model_t1_v1_gini4"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=97f639da-b661-4ee6-879f-0398dfae9980>

# Trench 2

## FPD0

## Test

In [3024]:
sq = r""" 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aStackScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,1945496,a069a1f0-c467-4c7e-945f-609d46f5df01,60819454960027,0.4496350808305948,Trench 2,2025-10-12 08:09:23,2025-10-12,2025-10,Test,0,1
1,3508511,ce128857-2a6c-4c41-9c1c-b9ee1c15ace9,60835085110013,0.4869271108565844,Trench 2,2025-10-12 08:37:51,2025-10-12,2025-10,Test,0,1
2,3528771,d8b78718-d90a-43a3-94ad-d0cf842d4447,60835287710011,0.3224622592946706,Trench 2,2025-10-12 08:54:38,2025-10-13,2025-10,Test,1,1
3,2245137,ea96ebc8-11fd-4213-bc3c-bf5ed20153e0,60822451370014,0.2362121730477518,Trench 2,2025-10-12 11:47:52,2025-10-12,2025-10,Test,0,1
4,3520612,ded06602-1a34-4ac0-bedb-145da1a50ee4,60835206120015,0.2088737246766409,Trench 2,2025-10-12 11:21:55,2025-10-12,2025-10,Test,0,1


In [3025]:
df1 = dfd.copy()

## Train

In [3026]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aStackScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,2841190,e9caa027-d893-42e6-8758-c22861583102,60828411900019,0.619318,Trench 2,2024-10-20 10:12:22,2024-10-20,2024-10,Train,0,1
1,1133667,c94d8c47-3ee6-4782-95d7-ba5a31073ce1,60811336670027,0.583106,Trench 2,2024-10-19 06:42:54,2024-10-20,2024-10,Train,0,1
2,1608256,736d427b-139d-4362-a6fd-9f4188de136e,60816082560019,0.826327,Trench 2,2024-10-30 09:22:03,2024-10-30,2024-10,Train,0,1
3,2231422,182cd65b-ff3e-4514-bc6c-aef821bc193b,60822314220023,0.744346,Trench 2,2024-10-31 05:04:00,2024-10-31,2024-10,Train,0,1
4,2140381,d8bbf7f7-6e6e-431f-b764-093183b5f2c0,60821403810018,0.956642,Trench 2,2024-10-29 12:51:34,2024-10-29,2024-10,Train,1,1


In [3027]:
df2 = dfd.copy()

In [3028]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11114 entries, 0 to 11113
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             11114 non-null  object        
 1   digitalLoanAccountId   11114 non-null  object        
 2   loanAccountNumber      11114 non-null  object        
 3   aStackScore            11114 non-null  object        
 4   trenchCategory         11114 non-null  object        
 5   appln_submit_datetime  11114 non-null  datetime64[us]
 6   disbursementdate       11114 non-null  dbdate        
 7   Application_month      11114 non-null  object        
 8   Data_selection         11114 non-null  object        
 9   deffpd0                11114 non-null  Int64         
 10  flg_mature_fpd0        11114 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(7)
memory usage: 976.9+ KB


In [3029]:
df_concat['aStackScore'] = pd.to_numeric(df_concat['aStackScore'], errors='coerce')

In [3030]:
df_concat.to_csv(r"aStackScoretrench2fpd0.csv")

In [3031]:
gini_results = calculate_periodic_gini(df_concat, 'aStackScore', 'deffpd0', 'FPD0')

In [3032]:
f0 = gini_results.copy()
f0.head()

Unnamed: 0,start_date,end_date,aStackScore_FPD0_gini,period,Model_Name,version,bad_rate
0,2024-09-30,2024-10-06,0.465684,Week,aStackScore,1.1.0,FPD0
1,2024-10-01,2024-10-31,0.422023,Month,aStackScore,1.1.0,FPD0
2,2024-10-07,2024-10-13,0.472046,Week,aStackScore,1.1.0,FPD0
3,2024-10-14,2024-10-20,0.415392,Week,aStackScore,1.1.0,FPD0
4,2024-10-21,2024-10-27,0.342969,Week,aStackScore,1.1.0,FPD0


## FPD10

## Test

In [3033]:
sq = r""" 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aStackScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,3441995,80d0d64f-b71a-4952-bf26-c7543362d564,60834419950011,0.4662383913744448,Trench 2,2025-10-16 08:33:15,2025-10-16,2025-10,Test,0,1
1,2878150,782532a4-9443-4c7c-8da0-6655dd9e9fa4,60828781500022,0.2439797793875914,Trench 2,2025-10-16 09:24:26,2025-10-16,2025-10,Test,0,1
2,1000278,b5fdae72-70bf-46ab-b5db-137a59743ccd,60810002780025,0.2954564972998347,Trench 2,2025-10-16 10:57:48,2025-10-16,2025-10,Test,0,1
3,3559952,e09b35d8-81b2-457f-8b66-e48b0ee1cdeb,60835599520021,0.3554363273434955,Trench 2,2025-10-16 10:41:38,2025-10-16,2025-10,Test,1,1
4,3252879,515fa3fe-0614-4246-b131-136b0ae3cca3,60832528790015,0.3492246430719,Trench 2,2025-10-16 16:48:27,2025-10-16,2025-10,Test,0,1


In [3034]:
df1 = dfd.copy()

## Train

In [3035]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aStackScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,2164917,d5700a48-39b0-40d1-8fb1-daa0adbd740e,60821649170014,0.789096,Trench 2,2024-10-05 19:51:55,2024-10-05,2024-10,Train,1,1
1,1985229,6d6ada92-fe0a-41bc-9175-c143da8553fb,60819852290012,0.14464,Trench 2,2024-10-02 18:23:26,2024-10-04,2024-10,Train,0,1
2,2660272,314a1226-2e4f-426f-a37e-d95c0d1464cb,60826602720014,0.277698,Trench 2,2024-10-08 10:46:17,2024-10-08,2024-10,Train,0,1
3,2472711,87d80caa-fc20-43ed-b6cb-e1bca37cfc53,60824727110017,0.394631,Trench 2,2024-10-01 18:42:24,2024-10-07,2024-10,Train,0,1
4,1504093,fc9edfd6-948a-4ea9-a62a-c803167853ad,60815040930016,0.428027,Trench 2,2024-10-25 21:37:38,2024-10-28,2024-10,Train,0,1


In [3036]:
df2 = dfd.copy()

In [3037]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10914 entries, 0 to 10913
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             10914 non-null  object        
 1   digitalLoanAccountId   10914 non-null  object        
 2   loanAccountNumber      10914 non-null  object        
 3   aStackScore            10914 non-null  object        
 4   trenchCategory         10914 non-null  object        
 5   appln_submit_datetime  10914 non-null  datetime64[us]
 6   disbursementdate       10914 non-null  dbdate        
 7   Application_month      10914 non-null  object        
 8   Data_selection         10914 non-null  object        
 9   deffpd10               10914 non-null  Int64         
 10  flg_mature_fpd10       10914 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(7)
memory usage: 959.4+ KB


In [3038]:
df_concat['aStackScore'] = pd.to_numeric(df_concat['aStackScore'], errors='coerce')

In [3039]:
df_concat.to_csv(r"aStackScoretrench1fpd10.csv")

In [3040]:
gini_results = calculate_periodic_gini(df_concat, 'aStackScore', 'deffpd10', 'FPD10')

In [3041]:
f1 = gini_results.copy()
f1.head()

Unnamed: 0,start_date,end_date,aStackScore_FPD10_gini,period,Model_Name,version,bad_rate
0,2024-09-30,2024-10-06,0.615146,Week,aStackScore,1.1.0,FPD10
1,2024-10-01,2024-10-31,0.547853,Month,aStackScore,1.1.0,FPD10
2,2024-10-07,2024-10-13,0.591411,Week,aStackScore,1.1.0,FPD10
3,2024-10-14,2024-10-20,0.555232,Week,aStackScore,1.1.0,FPD10
4,2024-10-21,2024-10-27,0.428945,Week,aStackScore,1.1.0,FPD10


## FPD30

## Test

In [3042]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
)
, 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aStackScore,
  trenchCategory
  from parsed
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30


In [3043]:
df1 = dfd.copy()

## Train

In [3044]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aStackScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30
0,2562259,e879cb89-58cc-4e80-8138-239328086e13,60825622590019,0.637908,Trench 2,2024-10-15 05:24:08,2024-10-16,2024-10,Train,0,1
1,2389232,af2be9eb-c1e1-419e-b739-68ade1898f88,60823892320014,0.854924,Trench 2,2024-10-20 12:53:42,2024-10-20,2024-10,Train,0,1
2,2266723,9cd834e3-b1a3-4675-b4d3-88473266e72f,60822667230036,0.307032,Trench 2,2024-10-03 13:35:29,2024-10-03,2024-10,Train,0,1
3,2621691,36d01504-e86f-4854-8e75-02af1b8e05f2,60826216910013,0.602462,Trench 2,2024-10-09 21:37:30,2024-10-09,2024-10,Train,1,1
4,2277479,f817e161-d3e4-43fe-97b8-c63122ced66c,60822774790016,0.577268,Trench 2,2024-10-15 20:26:56,2024-10-16,2024-10,Train,0,1


In [3045]:
df2 = dfd.copy()

In [3046]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10578 entries, 0 to 10577
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             10578 non-null  Int64         
 1   digitalLoanAccountId   10578 non-null  object        
 2   loanAccountNumber      10578 non-null  object        
 3   aStackScore            10578 non-null  float64       
 4   trenchCategory         10578 non-null  object        
 5   appln_submit_datetime  10578 non-null  datetime64[us]
 6   disbursementdate       10578 non-null  dbdate        
 7   Application_month      10578 non-null  object        
 8   Data_selection         10578 non-null  object        
 9   deffpd30               10578 non-null  Int64         
 10  flg_mature_fpd30       10578 non-null  Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 940.2+ KB


  df_concat = pd.concat([df2, df1], ignore_index=True)


In [3047]:
df_concat['aStackScore'] = pd.to_numeric(df_concat['aStackScore'], errors='coerce')

In [3048]:
df_concat.to_csv(r"aStackScoretrench1fpd30.csv")

In [3049]:
gini_results = calculate_periodic_gini(df_concat, 'aStackScore', 'deffpd30', 'FPD30')

In [3050]:
f2 = gini_results.copy()
f2.head()

Unnamed: 0,start_date,end_date,aStackScore_FPD30_gini,period,Model_Name,version,bad_rate
0,2024-09-30,2024-10-06,0.672161,Week,aStackScore,1.1.0,FPD30
1,2024-10-01,2024-10-31,0.559772,Month,aStackScore,1.1.0,FPD30
2,2024-10-07,2024-10-13,0.627584,Week,aStackScore,1.1.0,FPD30
3,2024-10-14,2024-10-20,0.529913,Week,aStackScore,1.1.0,FPD30
4,2024-10-21,2024-10-27,0.448268,Week,aStackScore,1.1.0,FPD30


## FSPD30

## Test

In [3051]:
sq = r""" 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aStackScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30


In [3052]:
df1 = dfd.copy()

## Train

In [3053]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aStackScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30
0,2276313,a089d89c-832d-4329-b805-f996676851be,60822763130011,0.801892,Trench 2,2024-10-29 18:27:21,2024-10-29,2024-10,Train,1,1
1,2598949,112a4720-b222-45d9-b684-a25062568c69,60825989490017,0.585853,Trench 2,2024-10-02 17:09:11,2024-10-02,2024-10,Train,0,1
2,2428444,d241a2d6-aa07-4059-8932-5e7660b107c0,60824284440016,0.779096,Trench 2,2024-10-30 16:35:23,2024-10-30,2024-10,Train,1,1
3,1972301,13aae713-f548-493b-916c-1474f046290b,60819723010016,0.491881,Trench 2,2024-10-30 16:40:18,2024-10-30,2024-10,Train,0,1
4,2596718,bdb05cc3-80f7-4a24-9d0e-50a3cab7552f,60825967180016,0.726844,Trench 2,2024-10-08 10:33:28,2024-10-08,2024-10,Train,0,1


In [3054]:
df2 = dfd.copy()

In [3055]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9921 entries, 0 to 9920
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             9921 non-null   Int64         
 1   digitalLoanAccountId   9921 non-null   object        
 2   loanAccountNumber      9921 non-null   object        
 3   aStackScore            9921 non-null   float64       
 4   trenchCategory         9921 non-null   object        
 5   appln_submit_datetime  9921 non-null   datetime64[us]
 6   disbursementdate       9921 non-null   dbdate        
 7   Application_month      9921 non-null   object        
 8   Data_selection         9921 non-null   object        
 9   deffspd30              9921 non-null   Int64         
 10  flg_mature_fspd_30     9921 non-null   Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 881.8+ KB


  df_concat = pd.concat([df2, df1], ignore_index=True)


In [3056]:
df_concat['aStackScore'] = pd.to_numeric(df_concat['aStackScore'], errors='coerce')

In [3057]:
df_concat.to_csv(r"aStackScoretrench2fspd30.csv")

In [3058]:
gini_results = calculate_periodic_gini(df_concat, 'aStackScore', 'deffspd30', 'FSPD30')

In [3059]:
f3 = gini_results.copy()
f3.head()

Unnamed: 0,start_date,end_date,aStackScore_FSPD30_gini,period,Model_Name,version,bad_rate
0,2024-09-30,2024-10-06,0.583149,Week,aStackScore,1.1.0,FSPD30
1,2024-10-01,2024-10-31,0.578261,Month,aStackScore,1.1.0,FSPD30
2,2024-10-07,2024-10-13,0.604414,Week,aStackScore,1.1.0,FSPD30
3,2024-10-14,2024-10-20,0.615705,Week,aStackScore,1.1.0,FSPD30
4,2024-10-21,2024-10-27,0.454545,Week,aStackScore,1.1.0,FSPD30


## FSTPD30

## Test

In [3060]:
sq = r""" 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aStackScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30


In [3061]:
df1 = dfd.copy()

## Train

In [3062]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aStackScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30
0,2543751,3e1517b5-d369-4605-9446-b9a565344e21,60825437510011,0.866397,Trench 2,2024-10-29 19:00:40,2024-11-02,2024-10,Train,1,1
1,1177581,18f38f40-555a-4d60-a676-d1a8d9e047d6,60811775810052,0.473575,Trench 2,2024-10-22 17:57:28,2024-10-24,2024-10,Train,1,1
2,2631539,cd14c9d1-0c0b-4453-83c2-d0333ef07016,60826315390011,0.143799,Trench 2,2024-10-04 21:36:27,2024-10-04,2024-10,Train,0,1
3,2605724,eba38865-675b-433d-ac3d-272077054769,60826057240011,0.434202,Trench 2,2024-10-11 20:28:47,2024-10-11,2024-10,Train,0,1
4,2792621,edfca120-d762-4d75-88cf-f284a093b464,60827926210014,0.080925,Trench 2,2024-10-25 09:48:12,2024-10-25,2024-10,Train,0,1


In [3063]:
df2 = dfd.copy()

In [3064]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9206 entries, 0 to 9205
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             9206 non-null   Int64         
 1   digitalLoanAccountId   9206 non-null   object        
 2   loanAccountNumber      9206 non-null   object        
 3   aStackScore            9206 non-null   float64       
 4   trenchCategory         9206 non-null   object        
 5   appln_submit_datetime  9206 non-null   datetime64[us]
 6   disbursementdate       9206 non-null   dbdate        
 7   Application_month      9206 non-null   object        
 8   Data_selection         9206 non-null   object        
 9   deffstpd30             9206 non-null   Int64         
 10  flg_mature_fstpd_30    9206 non-null   Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 818.2+ KB


  df_concat = pd.concat([df2, df1], ignore_index=True)


In [3065]:
df_concat['aStackScore'] = pd.to_numeric(df_concat['aStackScore'], errors='coerce')

In [3066]:
df_concat.to_csv(r"aStackScoretrench2fstpd30.csv")

In [3067]:
gini_results = calculate_periodic_gini(df_concat, 'aStackScore', 'deffstpd30', 'FSTPD30')

In [3068]:
f4 = gini_results.copy()
f4.head()

Unnamed: 0,start_date,end_date,aStackScore_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2024-09-30,2024-10-06,0.505994,Week,aStackScore,1.1.0,FSTPD30
1,2024-10-01,2024-10-31,0.530044,Month,aStackScore,1.1.0,FSTPD30
2,2024-10-07,2024-10-13,0.566274,Week,aStackScore,1.1.0,FSTPD30
3,2024-10-14,2024-10-20,0.548177,Week,aStackScore,1.1.0,FSTPD30
4,2024-10-21,2024-10-27,0.449289,Week,aStackScore,1.1.0,FSTPD30


## combining the dataframe

In [3069]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'aStackScore_FPD0_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'aStackScore_FPD10_gini',
       'aStackScore_FPD30_gini', 'aStackScore_FSPD30_gini',
       'aStackScore_FSTPD30_gini'], dtype=object)

In [3070]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','aStackScore_FPD0_gini','aStackScore_FPD10_gini',
       'aStackScore_FPD30_gini', 'aStackScore_FSPD30_gini',
       'aStackScore_FSTPD30_gini']].copy()
final_df.rename(columns={'aStackScore_FPD0_gini':'aStackScore_t2_FPD0_gini'
                         , 'aStackScore_FPD10_gini':'aStackScore_t2_FPD10_gini'
                         , 'aStackScore_FPD30_gini':'aStackScore_t2_FPD30_gini'
                         , 'aStackScore_FSPD30_gini':'aStackScore_t2_FSPD30_gini'
                        , 'aStackScore_FSTPD30_gini':'aStackScore_t2_FSTPD30_gini', 
                        }, inplace = True)
final_df['Trench_category'] = 'Trench 2'
final_df['Model_display_name'] = 'alpha_stack_model_cash'
final_df['Product_type'] = 'CASH'
final_df.dtypes

start_date                     datetime64[ns]
end_date                       datetime64[ns]
period                                 object
Model_Name                             object
version                                object
bad_rate                               object
aStackScore_t2_FPD0_gini              float64
aStackScore_t2_FPD10_gini             float64
aStackScore_t2_FPD30_gini             float64
aStackScore_t2_FSPD30_gini            float64
aStackScore_t2_FSTPD30_gini           float64
Trench_category                        object
Model_display_name                     object
Product_type                           object
dtype: object

In [3071]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,aStackScore_t2_FPD0_gini,aStackScore_t2_FPD10_gini,aStackScore_t2_FPD30_gini,aStackScore_t2_FSPD30_gini,aStackScore_t2_FSTPD30_gini,Trench_category,Model_display_name,Product_type
0,2024-09-30,2024-10-06,Week,aStackScore,1.1.0,FPD0,0.465684,,,,,Trench 2,alpha_stack_model_cash,CASH
1,2024-10-01,2024-10-31,Month,aStackScore,1.1.0,FPD0,0.422023,,,,,Trench 2,alpha_stack_model_cash,CASH
2,2024-10-07,2024-10-13,Week,aStackScore,1.1.0,FPD0,0.472046,,,,,Trench 2,alpha_stack_model_cash,CASH
3,2024-10-14,2024-10-20,Week,aStackScore,1.1.0,FPD0,0.415392,,,,,Trench 2,alpha_stack_model_cash,CASH
4,2024-10-21,2024-10-27,Week,aStackScore,1.1.0,FPD0,0.342969,,,,,Trench 2,alpha_stack_model_cash,CASH


In [3072]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.alpha_cash_stack_model_t2_v1_gini4"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=1ee7c74a-d980-4aca-9529-d61a6296c604>

# Trench 3

## FPD0

## Test

In [3073]:
sq = r""" 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aStackScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,3284580,5347c420-2fd6-486f-a896-d270a8862f62,60832845800038,0.4894251214589769,Trench 3,2025-10-04 09:06:38,2025-10-05,2025-10,Test,0,1
1,3097677,6353794c-2484-4d7b-8d7e-cd7663ae3b3a,60830976770031,0.413338381428348,Trench 3,2025-10-04 16:38:06,2025-10-05,2025-10,Test,1,1
2,2523031,e6b7c0e2-e96f-457e-875e-87eb3063bfdb,60825230310023,0.4433331348560717,Trench 3,2025-10-04 20:47:58,2025-10-04,2025-10,Test,0,1
3,3338490,9da8823e-ac4c-4d0f-890a-05788b02b3a5,60833384900026,0.5400094902394105,Trench 3,2025-10-04 23:10:38,2025-10-05,2025-10,Test,0,1
4,1797275,9850659d-25e1-4d8e-b836-e3587713d60d,60817972750085,0.2642031438175278,Trench 3,2025-10-15 10:05:13,2025-10-15,2025-10,Test,0,1


In [3074]:
df1 = dfd.copy()

## Train

In [3075]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aStackScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,2867204,28b84943-6745-4387-9871-b8e23997faa6,60828672040021,0.707833,Trench 3,2024-10-20 01:43:28,2024-10-20,2024-10,Train,0,1
1,1995264,db7e1891-0598-4e31-99fc-94d35f0afca6,60819952640028,0.467746,Trench 3,2024-10-17 10:15:53,2024-10-17,2024-10,Train,0,1
2,1743906,50883fda-af8e-49a5-99d7-e51a901f184b,60817439060041,0.288597,Trench 3,2024-10-09 17:58:02,2024-10-09,2024-10,Train,0,1
3,2019824,bd220923-079c-4bc6-8424-8dd15bafa839,60820198240027,0.663142,Trench 3,2024-10-02 07:13:10,2024-10-02,2024-10,Train,1,1
4,2457904,83599f89-a6cd-4474-abd2-3ddae3c0a758,60824579040022,0.393647,Trench 3,2024-10-20 16:44:23,2024-10-20,2024-10,Train,0,1


In [3076]:
df2 = dfd.copy()

In [3077]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11069 entries, 0 to 11068
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             11069 non-null  object        
 1   digitalLoanAccountId   11069 non-null  object        
 2   loanAccountNumber      11069 non-null  object        
 3   aStackScore            11069 non-null  object        
 4   trenchCategory         11069 non-null  object        
 5   appln_submit_datetime  11069 non-null  datetime64[us]
 6   disbursementdate       11069 non-null  dbdate        
 7   Application_month      11069 non-null  object        
 8   Data_selection         11069 non-null  object        
 9   deffpd0                11069 non-null  Int64         
 10  flg_mature_fpd0        11069 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(7)
memory usage: 973.0+ KB


In [3078]:
df_concat['aStackScore'] = pd.to_numeric(df_concat['aStackScore'], errors='coerce')

In [3079]:
df_concat.to_csv(r"aStackScoretrench3fpd0.csv")

In [3080]:
gini_results = calculate_periodic_gini(df_concat, 'aStackScore', 'deffpd0', 'FPD0')

In [3081]:
f0 = gini_results.copy()
f0.head()

Unnamed: 0,start_date,end_date,aStackScore_FPD0_gini,period,Model_Name,version,bad_rate
0,2024-09-30,2024-10-06,0.380488,Week,aStackScore,1.1.0,FPD0
1,2024-10-01,2024-10-31,0.365971,Month,aStackScore,1.1.0,FPD0
2,2024-10-07,2024-10-13,0.357708,Week,aStackScore,1.1.0,FPD0
3,2024-10-14,2024-10-20,0.459742,Week,aStackScore,1.1.0,FPD0
4,2024-10-21,2024-10-27,0.378547,Week,aStackScore,1.1.0,FPD0


## FPD10

## Test

In [3082]:
sq = r""" 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aStackScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,2750896,8ad9a58b-9413-4578-b1cc-0d3ce10e1c48,60827508960029,0.379628839593531,Trench 3,2025-10-13 10:39:31,2025-10-13,2025-10,Test,0,1
1,3496099,50a5ce8a-9133-4200-84a5-2aaad9edccad,60834960990021,0.3692990178304161,Trench 3,2025-10-13 08:43:56,2025-10-13,2025-10,Test,0,1
2,2915510,386f23f3-7a9d-4c87-944b-b66b734d3ff2,60829155100021,0.5276025374775627,Trench 3,2025-10-13 08:01:18,2025-10-13,2025-10,Test,0,1
3,1019666,f088c453-c480-474d-b969-ddf0fde5dbbd,60810196660066,0.3551434225122978,Trench 3,2025-10-13 10:15:25,2025-10-13,2025-10,Test,0,1
4,3373318,c1754bc9-3dc3-4849-b13f-3c5df444305d,60833733180025,0.4797913306610957,Trench 3,2025-10-13 10:07:55,2025-10-14,2025-10,Test,1,1


In [3083]:
df1 = dfd.copy()

## Train

In [3084]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aStackScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,2567762,997bc685-8f53-4f87-978d-565263e749a8,60825677620027,0.681296,Trench 3,2024-10-16 10:17:28,2024-10-16,2024-10,Train,1,1
1,2252914,8d6ea137-f243-466b-a55a-0f206e3202f5,60822529140022,0.336648,Trench 3,2024-10-02 11:45:35,2024-10-04,2024-10,Train,0,1
2,2574279,0c4f7047-7ba0-42aa-9ff5-591322734b4c,60825742790024,0.527494,Trench 3,2024-10-26 15:50:34,2024-10-26,2024-10,Train,0,1
3,2867204,28b84943-6745-4387-9871-b8e23997faa6,60828672040021,0.707833,Trench 3,2024-10-20 01:43:28,2024-10-20,2024-10,Train,0,1
4,1995264,db7e1891-0598-4e31-99fc-94d35f0afca6,60819952640028,0.467746,Trench 3,2024-10-17 10:15:53,2024-10-17,2024-10,Train,0,1


In [3085]:
df2 = dfd.copy()

In [3086]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10901 entries, 0 to 10900
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             10901 non-null  object        
 1   digitalLoanAccountId   10901 non-null  object        
 2   loanAccountNumber      10901 non-null  object        
 3   aStackScore            10901 non-null  object        
 4   trenchCategory         10901 non-null  object        
 5   appln_submit_datetime  10901 non-null  datetime64[us]
 6   disbursementdate       10901 non-null  dbdate        
 7   Application_month      10901 non-null  object        
 8   Data_selection         10901 non-null  object        
 9   deffpd10               10901 non-null  Int64         
 10  flg_mature_fpd10       10901 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(7)
memory usage: 958.2+ KB


In [3087]:
df_concat['aStackScore'] = pd.to_numeric(df_concat['aStackScore'], errors='coerce')

In [3088]:
df_concat.to_csv(r"aStackScoretrench1fpd10.csv")

In [3089]:
gini_results = calculate_periodic_gini(df_concat, 'aStackScore', 'deffpd10', 'FPD10')

In [3090]:
f1 = gini_results.copy()
f1.head()

Unnamed: 0,start_date,end_date,aStackScore_FPD10_gini,period,Model_Name,version,bad_rate
0,2024-09-30,2024-10-06,0.61039,Week,aStackScore,1.1.0,FPD10
1,2024-10-01,2024-10-31,0.595177,Month,aStackScore,1.1.0,FPD10
2,2024-10-07,2024-10-13,0.45,Week,aStackScore,1.1.0,FPD10
3,2024-10-14,2024-10-20,0.759259,Week,aStackScore,1.1.0,FPD10
4,2024-10-21,2024-10-27,0.489115,Week,aStackScore,1.1.0,FPD10


## FPD30

## Test

In [3091]:
sq = r""" 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aStackScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30


In [3092]:
df1 = dfd.copy()

## Train

In [3093]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName = 'Alpha-Cash-Stack-Model'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aStackScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30
0,2668912,ec8fe243-36aa-4389-afcc-36f7dc208d9e,60826689120023,0.454604,Trench 3,2024-10-29 01:28:50,2024-10-29,2024-10,Train,0,1
1,2605440,10d2d4c9-97e7-47b0-9a9d-94570c79c28e,60826054400071,0.583808,Trench 3,2024-10-31 12:11:11,2024-10-31,2024-10,Train,0,1
2,1903954,12491693-9a13-4972-a0fa-196411a9b64d,60819039540032,0.385935,Trench 3,2024-10-25 17:24:52,2024-10-26,2024-10,Train,0,1
3,1904964,ca3aee69-6dc1-49c3-8722-b6bb52925015,60819049640071,0.838606,Trench 3,2024-10-13 17:20:54,2024-10-13,2024-10,Train,0,1
4,2539525,1f910b03-be20-4fa4-8ac2-bf8dd4172031,60825395250037,0.473852,Trench 3,2024-10-31 12:20:52,2024-10-31,2024-10,Train,0,1


In [3094]:
df2 = dfd.copy()

In [3095]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10433 entries, 0 to 10432
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             10433 non-null  Int64         
 1   digitalLoanAccountId   10433 non-null  object        
 2   loanAccountNumber      10433 non-null  object        
 3   aStackScore            10433 non-null  float64       
 4   trenchCategory         10433 non-null  object        
 5   appln_submit_datetime  10433 non-null  datetime64[us]
 6   disbursementdate       10433 non-null  dbdate        
 7   Application_month      10433 non-null  object        
 8   Data_selection         10433 non-null  object        
 9   deffpd30               10433 non-null  Int64         
 10  flg_mature_fpd30       10433 non-null  Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 927.3+ KB


  df_concat = pd.concat([df2, df1], ignore_index=True)


In [3096]:
df_concat['aStackScore'] = pd.to_numeric(df_concat['aStackScore'], errors='coerce')

In [3097]:
df_concat.to_csv(r"aStackScoretrench3fpd30.csv")

In [3098]:
gini_results = calculate_periodic_gini(df_concat, 'aStackScore', 'deffpd30', 'FPD30')

In [3099]:
f2 = gini_results.copy()
f2.head()

Unnamed: 0,start_date,end_date,aStackScore_FPD30_gini,period,Model_Name,version,bad_rate
0,2024-09-30,2024-10-06,0.61039,Week,aStackScore,1.1.0,FPD30
1,2024-10-01,2024-10-31,0.627072,Month,aStackScore,1.1.0,FPD30
2,2024-10-07,2024-10-13,0.516509,Week,aStackScore,1.1.0,FPD30
3,2024-10-14,2024-10-20,0.801835,Week,aStackScore,1.1.0,FPD30
4,2024-10-21,2024-10-27,0.457207,Week,aStackScore,1.1.0,FPD30


## FSPD30

## Test

In [3100]:
sq = r""" 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aStackScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30


In [3101]:
df1 = dfd.copy()

## Train

In [3102]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aStackScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30
0,2842456,1d7d7b04-6916-4847-9472-a7f1f1085c98,60828424560028,0.504032,Trench 3,2024-10-04 11:06:40,2024-10-04,2024-10,Train,0,1
1,2679082,c175334e-0da8-4c3f-a4c7-316c59e59d84,60826790820029,0.487073,Trench 3,2024-10-30 22:53:26,2024-10-31,2024-10,Train,0,1
2,2847149,7dea0767-589c-45ba-8ae4-b3823f6f22d5,60828471490038,0.544386,Trench 3,2024-10-31 11:51:03,2024-10-31,2024-10,Train,0,1
3,1103729,fdba9cfe-0ed3-47e8-85e7-2ddb2dc1af16,60811037290178,0.130934,Trench 3,2024-10-27 17:45:13,2024-10-27,2024-10,Train,0,1
4,2942253,ccd3c2e7-e1dd-46c8-9f4f-9c0192979471,60829422530025,0.704917,Trench 3,2024-10-30 12:07:15,2024-10-30,2024-10,Train,0,1


In [3103]:
df2 = dfd.copy()

In [3104]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9249 entries, 0 to 9248
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             9249 non-null   Int64         
 1   digitalLoanAccountId   9249 non-null   object        
 2   loanAccountNumber      9249 non-null   object        
 3   aStackScore            9249 non-null   float64       
 4   trenchCategory         9249 non-null   object        
 5   appln_submit_datetime  9249 non-null   datetime64[us]
 6   disbursementdate       9249 non-null   dbdate        
 7   Application_month      9249 non-null   object        
 8   Data_selection         9249 non-null   object        
 9   deffspd30              9249 non-null   Int64         
 10  flg_mature_fspd_30     9249 non-null   Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 822.1+ KB


  df_concat = pd.concat([df2, df1], ignore_index=True)


In [3105]:
df_concat['aStackScore'] = pd.to_numeric(df_concat['aStackScore'], errors='coerce')

In [3106]:
df_concat.to_csv(r"aStackScoretrench3fspd30.csv")

In [3107]:
gini_results = calculate_periodic_gini(df_concat, 'aStackScore', 'deffspd30', 'FSPD30')

In [3108]:
f3 = gini_results.copy()
f3.head()

Unnamed: 0,start_date,end_date,aStackScore_FSPD30_gini,period,Model_Name,version,bad_rate
0,2024-09-30,2024-10-06,0.646802,Week,aStackScore,1.1.0,FSPD30
1,2024-10-01,2024-10-31,0.611353,Month,aStackScore,1.1.0,FSPD30
2,2024-10-07,2024-10-13,0.430312,Week,aStackScore,1.1.0,FSPD30
3,2024-10-14,2024-10-20,0.76335,Week,aStackScore,1.1.0,FSPD30
4,2024-10-21,2024-10-27,0.527891,Week,aStackScore,1.1.0,FSPD30


## FSTPD30

## Test

In [3109]:
sq = r""" 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aStackScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30


In [3110]:
df1 = dfd.copy()

## Train

In [3111]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aStackScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30
0,2037876,f2c5eefd-8ed7-4a78-bfb5-89a457ebc768,60820378760021,0.488863,Trench 3,2024-10-01 13:45:03,2024-10-02,2024-10,Train,0,1
1,2206299,7319f2dd-ac6e-43e8-ad22-f0dfca4737c3,60822062990031,0.549859,Trench 3,2024-10-23 02:57:22,2024-10-23,2024-10,Train,0,1
2,2818339,06b68eb8-fd66-4d1c-b356-28e7ae97c19b,60828183390049,0.59948,Trench 3,2024-10-28 20:33:16,2024-10-29,2024-10,Train,0,1
3,2527102,0d8e9604-8de2-40dd-9eaa-fbdbb35171d0,60825271020027,0.560694,Trench 3,2024-10-26 19:42:08,2024-10-26,2024-10,Train,0,1
4,2536141,eb0ad1a4-0ef1-4f1c-a7fb-659ad6ac5eb6,60825361410025,0.710999,Trench 3,2024-10-28 11:11:02,2024-10-28,2024-10,Train,0,1


In [3112]:
df2 = dfd.copy()

In [3113]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8020 entries, 0 to 8019
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             8020 non-null   Int64         
 1   digitalLoanAccountId   8020 non-null   object        
 2   loanAccountNumber      8020 non-null   object        
 3   aStackScore            8020 non-null   float64       
 4   trenchCategory         8020 non-null   object        
 5   appln_submit_datetime  8020 non-null   datetime64[us]
 6   disbursementdate       8020 non-null   dbdate        
 7   Application_month      8020 non-null   object        
 8   Data_selection         8020 non-null   object        
 9   deffstpd30             8020 non-null   Int64         
 10  flg_mature_fstpd_30    8020 non-null   Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 712.8+ KB


  df_concat = pd.concat([df2, df1], ignore_index=True)


In [3114]:
df_concat['aStackScore'] = pd.to_numeric(df_concat['aStackScore'], errors='coerce')

In [3115]:
df_concat.to_csv(r"aStackScoretrench3fstpd30.csv")

In [3116]:
gini_results = calculate_periodic_gini(df_concat, 'aStackScore', 'deffstpd30', 'FSTPD30')

In [3117]:
f4 = gini_results.copy()
f4.head()

Unnamed: 0,start_date,end_date,aStackScore_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2024-09-30,2024-10-06,0.543902,Week,aStackScore,1.1.0,FSTPD30
1,2024-10-01,2024-10-31,0.518087,Month,aStackScore,1.1.0,FSTPD30
2,2024-10-07,2024-10-13,0.30382,Week,aStackScore,1.1.0,FSTPD30
3,2024-10-14,2024-10-20,0.772277,Week,aStackScore,1.1.0,FSTPD30
4,2024-10-21,2024-10-27,0.523652,Week,aStackScore,1.1.0,FSTPD30


## combining the dataframe

In [3118]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'aStackScore_FPD0_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'aStackScore_FPD10_gini',
       'aStackScore_FPD30_gini', 'aStackScore_FSPD30_gini',
       'aStackScore_FSTPD30_gini'], dtype=object)

In [3119]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','aStackScore_FPD0_gini','aStackScore_FPD10_gini',
       'aStackScore_FPD30_gini', 'aStackScore_FSPD30_gini',
       'aStackScore_FSTPD30_gini']].copy()
final_df.rename(columns={'aStackScore_FPD0_gini':'aStackScore_t3_FPD0_gini'
                         , 'aStackScore_FPD10_gini':'aStackScore_t3_FPD10_gini'
                         , 'aStackScore_FPD30_gini':'aStackScore_t3_FPD30_gini'
                         , 'aStackScore_FSPD30_gini':'aStackScore_t3_FSPD30_gini'
                        , 'aStackScore_FSTPD30_gini':'aStackScore_t3_FSTPD30_gini', 
                        }, inplace = True)
final_df['Trench_category'] = 'Trench 3'
final_df['Model_display_name'] = 'alpha_stack_model_cash'
final_df['Product_type'] = 'CASH'
final_df.dtypes

start_date                     datetime64[ns]
end_date                       datetime64[ns]
period                                 object
Model_Name                             object
version                                object
bad_rate                               object
aStackScore_t3_FPD0_gini              float64
aStackScore_t3_FPD10_gini             float64
aStackScore_t3_FPD30_gini             float64
aStackScore_t3_FSPD30_gini            float64
aStackScore_t3_FSTPD30_gini           float64
Trench_category                        object
Model_display_name                     object
Product_type                           object
dtype: object

In [3120]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,aStackScore_t3_FPD0_gini,aStackScore_t3_FPD10_gini,aStackScore_t3_FPD30_gini,aStackScore_t3_FSPD30_gini,aStackScore_t3_FSTPD30_gini,Trench_category,Model_display_name,Product_type
0,2024-09-30,2024-10-06,Week,aStackScore,1.1.0,FPD0,0.380488,,,,,Trench 3,alpha_stack_model_cash,CASH
1,2024-10-01,2024-10-31,Month,aStackScore,1.1.0,FPD0,0.365971,,,,,Trench 3,alpha_stack_model_cash,CASH
2,2024-10-07,2024-10-13,Week,aStackScore,1.1.0,FPD0,0.357708,,,,,Trench 3,alpha_stack_model_cash,CASH
3,2024-10-14,2024-10-20,Week,aStackScore,1.1.0,FPD0,0.459742,,,,,Trench 3,alpha_stack_model_cash,CASH
4,2024-10-21,2024-10-27,Week,aStackScore,1.1.0,FPD0,0.378547,,,,,Trench 3,alpha_stack_model_cash,CASH


In [3121]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.alpha_cash_stack_model_t3_v1_gini4"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=660378f0-15c3-41a3-8a3e-cc8b828761d3>

# Beta-Cash-Demo-Model

## Trench 1

## Test

In [3122]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory
  from latest_request p 
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,3747792,04488cba-10ee-497a-a248-b9ce54082379,60837477920014,0.5307901305745478,Trench 1,2025-10-16 12:43:20,2025-10-17,2025-10,Test,1,1
1,3756411,57cbe0f5-29d5-4141-9ad9-28826f787a95,60837564110019,0.4308420284974494,Trench 1,2025-10-20 13:26:42,2025-10-20,2025-10,Test,0,1
2,3742304,220c3f42-48cd-4045-877b-dafc62e6639f,60837423040016,0.4171585881145368,Trench 1,2025-10-13 17:47:35,2025-10-16,2025-10,Test,0,1
3,3732440,03952368-14cf-4c6c-8ac3-840aff36b570,60837324400018,0.4427335940390416,Trench 1,2025-10-09 08:32:55,2025-10-09,2025-10,Test,0,1
4,3744671,47328125-2072-480f-afb7-ecae3e6c223f,60837446710014,0.5116666412268175,Trench 1,2025-10-14 19:52:40,2025-10-14,2025-10,Test,0,1


In [3123]:
df1 = dfd.copy()

## Train

In [3124]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd0 = 1
 and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,2931280,e8edbd75-2ea8-44c7-9a4b-3ebd964be1ab,60829312800011,0.639576,Trench 1,2024-10-11 17:24:41,2024-10-11,2024-10,Train,1,1
1,2910404,61d46255-656f-43bb-a50d-52c6286cdf95,60829104040011,0.377503,Trench 1,2024-10-07 08:01:35,2024-10-08,2024-10,Train,0,1
2,2911137,e3440620-cc51-4ca4-a626-7c68e8ca001e,60829111370015,0.495372,Trench 1,2024-10-05 09:08:02,2024-10-05,2024-10,Train,1,1
3,2911289,92cef2ac-9ab6-45ca-a55b-9caa2b597008,60829112890011,0.489178,Trench 1,2024-10-05 06:31:22,2024-10-05,2024-10,Train,0,1
4,2980843,1397ac70-98da-449c-a449-2be881422ff1,60829808430015,0.49903,Trench 1,2024-10-29 14:46:39,2024-10-29,2024-10,Train,1,1


In [3125]:
df2 = dfd.copy()

In [3126]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16860 entries, 0 to 16859
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             16860 non-null  object        
 1   digitalLoanAccountId   16860 non-null  object        
 2   loanAccountNumber      16860 non-null  object        
 3   Beta_Cash_Demo_Score   16860 non-null  object        
 4   trenchCategory         16860 non-null  object        
 5   appln_submit_datetime  16860 non-null  datetime64[us]
 6   disbursementdate       16860 non-null  dbdate        
 7   Application_month      16860 non-null  object        
 8   Data_selection         16860 non-null  object        
 9   deffpd0                16860 non-null  Int64         
 10  flg_mature_fpd0        16860 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(7)
memory usage: 1.4+ MB


In [3127]:
df_concat['Beta_Cash_Demo_Score'] = pd.to_numeric(df_concat['Beta_Cash_Demo_Score'], errors='coerce')

In [3128]:
df_concat.to_csv(r"Beta_Cash_Demo_Scoretrenchnunfpd0.csv")

In [3129]:
gini_results = calculate_periodic_gini(df_concat, 'Beta_Cash_Demo_Score', 'deffpd0', 'FPD0')

In [3130]:
f0 = gini_results.copy()
f0.head()

Unnamed: 0,start_date,end_date,Beta_Cash_Demo_Score_FPD0_gini,period,Model_Name,version,bad_rate
0,2024-09-30,2024-10-06,0.162108,Week,Beta_Cash_Demo_Score,1.1.0,FPD0
1,2024-10-01,2024-10-31,0.23941,Month,Beta_Cash_Demo_Score,1.1.0,FPD0
2,2024-10-07,2024-10-13,0.260402,Week,Beta_Cash_Demo_Score,1.1.0,FPD0
3,2024-10-14,2024-10-20,0.270544,Week,Beta_Cash_Demo_Score,1.1.0,FPD0
4,2024-10-21,2024-10-27,0.261032,Week,Beta_Cash_Demo_Score,1.1.0,FPD0


## FPD10

## Test

In [3131]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory
  from latest_request p 
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,3737064,7f1d3d4e-2859-4ab7-865f-934fdd05a6b2,60837370640014,0.349986439083384,Trench 1,2025-10-11 14:06:35,2025-10-11,2025-10,Test,0,1
1,3730502,d2fe391c-42e1-480d-af9e-80d8c7f7b631,60837305020017,0.5052199927462862,Trench 1,2025-10-08 00:35:10,2025-10-08,2025-10,Test,0,1
2,3747136,9e2e4499-afa2-475e-8aa3-19e91d18cfa8,60837471360018,0.4466912888550019,Trench 1,2025-10-16 01:04:07,2025-10-16,2025-10,Test,0,1
3,3706096,98118b0f-5bae-4544-8a77-f77404a58b50,60837060960013,0.3653376733470351,Trench 1,2025-09-26 17:13:14,2025-09-27,2025-09,Test,0,1
4,3733400,16003553-7248-400a-92d3-e5d2f7a2f03d,60837334000014,0.2841739249025936,Trench 1,2025-10-09 14:46:23,2025-10-09,2025-10,Test,0,1


In [3132]:
df1 = dfd.copy()

## Train

In [3133]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,2971121,8b50c64c-8b90-4cca-bab1-e0cda4bbb4ec,60829711210017,0.520827,Trench 1,2024-10-25 18:41:41,2024-10-25,2024-10,Train,1,1
1,2907528,3e9ad64f-fa23-4418-9f4c-414738eca555,60829075280015,0.71762,Trench 1,2024-10-03 19:04:26,2024-10-03,2024-10,Train,1,1
2,2983553,20024cbd-557c-4954-9028-bf26baad5897,60829835530011,0.388371,Trench 1,2024-10-29 22:13:15,2024-10-30,2024-10,Train,1,1
3,2946919,24972dac-b08d-4063-bf9c-d8bb8342f789,60829469190012,0.424286,Trench 1,2024-10-16 23:07:40,2024-10-17,2024-10,Train,0,1
4,2943204,a03c33e4-90de-4ad0-926d-9e8f45520f61,60829432040012,0.508775,Trench 1,2024-10-15 18:29:55,2024-10-15,2024-10,Train,0,1


In [3134]:
df2 = dfd.copy()

In [3135]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16309 entries, 0 to 16308
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             16309 non-null  object        
 1   digitalLoanAccountId   16309 non-null  object        
 2   loanAccountNumber      16309 non-null  object        
 3   Beta_Cash_Demo_Score   16309 non-null  object        
 4   trenchCategory         16309 non-null  object        
 5   appln_submit_datetime  16309 non-null  datetime64[us]
 6   disbursementdate       16309 non-null  dbdate        
 7   Application_month      16309 non-null  object        
 8   Data_selection         16309 non-null  object        
 9   deffpd10               16309 non-null  Int64         
 10  flg_mature_fpd10       16309 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(7)
memory usage: 1.4+ MB


In [3136]:
df_concat['Beta_Cash_Demo_Score'] = pd.to_numeric(df_concat['Beta_Cash_Demo_Score'], errors='coerce')

In [3137]:
df_concat.to_csv(r"Beta_Cash_Demo_Scoretrenchnunfpd10.csv")

In [3138]:
gini_results = calculate_periodic_gini(df_concat, 'Beta_Cash_Demo_Score', 'deffpd10', 'FPD10')

In [3139]:
f1 = gini_results.copy()
f1.head()

Unnamed: 0,start_date,end_date,Beta_Cash_Demo_Score_FPD10_gini,period,Model_Name,version,bad_rate
0,2024-09-30,2024-10-06,0.235812,Week,Beta_Cash_Demo_Score,1.1.0,FPD10
1,2024-10-01,2024-10-31,0.347381,Month,Beta_Cash_Demo_Score,1.1.0,FPD10
2,2024-10-07,2024-10-13,0.379712,Week,Beta_Cash_Demo_Score,1.1.0,FPD10
3,2024-10-14,2024-10-20,0.319219,Week,Beta_Cash_Demo_Score,1.1.0,FPD10
4,2024-10-21,2024-10-27,0.44878,Week,Beta_Cash_Demo_Score,1.1.0,FPD10


## FPD30

## Test

In [3140]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory
  from latest_request p 
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30


In [3141]:
df1 = dfd.copy()

## Train

In [3142]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30
0,2913966,3f74b258-1494-4147-9725-d9aaf4e74152,60829139660012,0.601266,Trench 1,2024-10-08 10:50:06,2024-10-15,2024-10,Train,0,1
1,2963765,f46578b7-a83d-4813-aa43-912e9c9dfe81,60829637650011,0.468295,Trench 1,2024-10-22 14:04:27,2024-10-22,2024-10,Train,0,1
2,2945802,cd16eaa7-18b1-46b0-80aa-2233a893e292,60829458020016,0.543932,Trench 1,2024-10-16 15:57:50,2024-10-16,2024-10,Train,1,1
3,2908311,1bda74e5-5f92-4c28-b68b-613d7c06a2d4,60829083110014,0.458282,Trench 1,2024-10-07 06:17:48,2024-10-07,2024-10,Train,1,1
4,2983729,0ab7c82a-d521-4cb3-9d04-16879ca3ff29,60829837290016,0.523101,Trench 1,2024-10-30 00:12:35,2024-11-03,2024-10,Train,0,1


In [3143]:
df2 = dfd.copy()

In [3144]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15515 entries, 0 to 15514
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             15515 non-null  Int64         
 1   digitalLoanAccountId   15515 non-null  object        
 2   loanAccountNumber      15515 non-null  object        
 3   Beta_Cash_Demo_Score   15515 non-null  float64       
 4   trenchCategory         15515 non-null  object        
 5   appln_submit_datetime  15515 non-null  datetime64[us]
 6   disbursementdate       15515 non-null  dbdate        
 7   Application_month      15515 non-null  object        
 8   Data_selection         15515 non-null  object        
 9   deffpd30               15515 non-null  Int64         
 10  flg_mature_fpd30       15515 non-null  Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 1.3+ MB


  df_concat = pd.concat([df2, df1], ignore_index=True)


In [3145]:
df_concat['Beta_Cash_Demo_Score'] = pd.to_numeric(df_concat['Beta_Cash_Demo_Score'], errors='coerce')

In [3146]:
df_concat.to_csv(r"Beta_Cash_Demo_Scoretrenchnunfpd30.csv")

In [3147]:
gini_results = calculate_periodic_gini(df_concat, 'Beta_Cash_Demo_Score', 'deffpd30', 'FPD30')

In [3148]:
f2 = gini_results.copy()
f2.head()

Unnamed: 0,start_date,end_date,Beta_Cash_Demo_Score_FPD30_gini,period,Model_Name,version,bad_rate
0,2024-09-30,2024-10-06,0.285632,Week,Beta_Cash_Demo_Score,1.1.0,FPD30
1,2024-10-01,2024-10-31,0.355098,Month,Beta_Cash_Demo_Score,1.1.0,FPD30
2,2024-10-07,2024-10-13,0.323583,Week,Beta_Cash_Demo_Score,1.1.0,FPD30
3,2024-10-14,2024-10-20,0.327721,Week,Beta_Cash_Demo_Score,1.1.0,FPD30
4,2024-10-21,2024-10-27,0.464095,Week,Beta_Cash_Demo_Score,1.1.0,FPD30


## FSPD30

## Test

In [3149]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory
  from latest_request p 
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30


In [3150]:
df1 = dfd.copy()

## Train

In [3151]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30
0,2943878,278db654-8c5d-4c9c-b3ad-353a5d89aa51,60829438780018,0.608458,Trench 1,2024-10-15 23:20:40,2024-10-16,2024-10,Train,0,1
1,2979749,a794e014-a57b-4c90-a044-a1e6863ee604,60829797490019,0.600384,Trench 1,2024-10-29 12:42:13,2024-10-29,2024-10,Train,1,1
2,2940348,41c33eaf-d67e-4bb5-bf77-76cb4a094743,60829403480023,0.461087,Trench 1,2024-10-17 16:55:06,2024-10-17,2024-10,Train,0,1
3,2973864,e05e40ec-64bd-40b0-a9ee-ee7c8e111763,60829738640017,0.680524,Trench 1,2024-10-26 13:31:08,2024-10-26,2024-10,Train,0,1
4,2914554,5db8fd55-745d-4ee9-a47d-928afe1bc9ab,60829145540011,0.526106,Trench 1,2024-10-06 09:24:25,2024-10-06,2024-10,Train,1,1


In [3152]:
df2 = dfd.copy()

In [3153]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14299 entries, 0 to 14298
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             14299 non-null  Int64         
 1   digitalLoanAccountId   14299 non-null  object        
 2   loanAccountNumber      14299 non-null  object        
 3   Beta_Cash_Demo_Score   14299 non-null  float64       
 4   trenchCategory         14299 non-null  object        
 5   appln_submit_datetime  14299 non-null  datetime64[us]
 6   disbursementdate       14299 non-null  dbdate        
 7   Application_month      14299 non-null  object        
 8   Data_selection         14299 non-null  object        
 9   deffspd30              14299 non-null  Int64         
 10  flg_mature_fspd_30     14299 non-null  Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 1.2+ MB


  df_concat = pd.concat([df2, df1], ignore_index=True)


In [3154]:
df_concat['Beta_Cash_Demo_Score'] = pd.to_numeric(df_concat['Beta_Cash_Demo_Score'], errors='coerce')

In [3155]:
df_concat.to_csv(r"Beta_Cash_Demo_Scoretrenchnunfspd30.csv")

In [3156]:
gini_results = calculate_periodic_gini(df_concat, 'Beta_Cash_Demo_Score', 'deffspd30', 'FSPD30')

In [3157]:
f3 = gini_results.copy()
f3.head()

Unnamed: 0,start_date,end_date,Beta_Cash_Demo_Score_FSPD30_gini,period,Model_Name,version,bad_rate
0,2024-09-30,2024-10-06,0.319646,Week,Beta_Cash_Demo_Score,1.1.0,FSPD30
1,2024-10-01,2024-10-31,0.318281,Month,Beta_Cash_Demo_Score,1.1.0,FSPD30
2,2024-10-07,2024-10-13,0.332984,Week,Beta_Cash_Demo_Score,1.1.0,FSPD30
3,2024-10-14,2024-10-20,0.30673,Week,Beta_Cash_Demo_Score,1.1.0,FSPD30
4,2024-10-21,2024-10-27,0.339559,Week,Beta_Cash_Demo_Score,1.1.0,FSPD30


## FSTPD30

## Test

In [3158]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory
  from latest_request p 
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30


In [3159]:
df1 = dfd.copy()

## Train

In [3160]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30
0,2901668,068a7c48-172e-4443-9db1-f6442db10214,60829016680012,0.39711,Trench 1,2024-10-01 20:36:56,2024-10-03,2024-10,Train,1,1
1,2936699,8c71bdcc-9df4-4708-97a3-d9f6d7b51abf,60829366990014,0.574279,Trench 1,2024-10-13 14:57:07,2024-10-14,2024-10,Train,1,1
2,2969963,0496fee8-c7be-4c22-a602-22e6f246ff22,60829699630018,0.496295,Trench 1,2024-10-24 22:34:46,2024-10-25,2024-10,Train,1,1
3,2987267,0f38b852-f364-4e5e-9d5a-b9059b5eb2cb,60829872670013,0.566809,Trench 1,2024-10-31 12:54:28,2024-10-31,2024-10,Train,1,1
4,2963570,3da434ca-81f6-43db-8f8a-bffa2f6ae3f2,60829635700011,0.578933,Trench 1,2024-10-25 15:57:32,2024-10-25,2024-10,Train,0,1


In [3161]:
df2 = dfd.copy()

In [3162]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13106 entries, 0 to 13105
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             13106 non-null  Int64         
 1   digitalLoanAccountId   13106 non-null  object        
 2   loanAccountNumber      13106 non-null  object        
 3   Beta_Cash_Demo_Score   13106 non-null  float64       
 4   trenchCategory         13106 non-null  object        
 5   appln_submit_datetime  13106 non-null  datetime64[us]
 6   disbursementdate       13106 non-null  dbdate        
 7   Application_month      13106 non-null  object        
 8   Data_selection         13106 non-null  object        
 9   deffstpd30             13106 non-null  Int64         
 10  flg_mature_fstpd_30    13106 non-null  Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 1.1+ MB


  df_concat = pd.concat([df2, df1], ignore_index=True)


In [3163]:
df_concat['Beta_Cash_Demo_Score'] = pd.to_numeric(df_concat['Beta_Cash_Demo_Score'], errors='coerce')

In [3164]:
df_concat.to_csv(r"Beta_Cash_Demo_Scoretrenchnunfstpd30.csv")

In [3165]:
gini_results = calculate_periodic_gini(df_concat, 'Beta_Cash_Demo_Score', 'deffstpd30', 'FSTPD30')

In [3166]:
f4 = gini_results.copy()
f4.head()

Unnamed: 0,start_date,end_date,Beta_Cash_Demo_Score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2024-09-30,2024-10-06,0.274895,Week,Beta_Cash_Demo_Score,1.1.0,FSTPD30
1,2024-10-01,2024-10-31,0.293095,Month,Beta_Cash_Demo_Score,1.1.0,FSTPD30
2,2024-10-07,2024-10-13,0.326968,Week,Beta_Cash_Demo_Score,1.1.0,FSTPD30
3,2024-10-14,2024-10-20,0.231325,Week,Beta_Cash_Demo_Score,1.1.0,FSTPD30
4,2024-10-21,2024-10-27,0.333658,Week,Beta_Cash_Demo_Score,1.1.0,FSTPD30


## combining the dataframe

In [3167]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'Beta_Cash_Demo_Score_FPD0_gini',
       'period', 'Model_Name', 'version', 'bad_rate',
       'Beta_Cash_Demo_Score_FPD10_gini',
       'Beta_Cash_Demo_Score_FPD30_gini',
       'Beta_Cash_Demo_Score_FSPD30_gini',
       'Beta_Cash_Demo_Score_FSTPD30_gini'], dtype=object)

In [3168]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','Beta_Cash_Demo_Score_FPD0_gini','Beta_Cash_Demo_Score_FPD10_gini',
      'Beta_Cash_Demo_Score_FPD30_gini',
       'Beta_Cash_Demo_Score_FSPD30_gini',
       'Beta_Cash_Demo_Score_FSTPD30_gini']].copy()
final_df.rename(columns={'Beta_Cash_Demo_Score_FPD0_gini':'Beta_Cash_Demo_Score_FPD0_t1_gini'
                         , 'Beta_Cash_Demo_Score_FPD10_gini':'Beta_Cash_Demo_Score_FPD10_t1_gini'
                         , 'Beta_Cash_Demo_Score_FPD30_gini':'Beta_Cash_Demo_Score_FPD30_t1_gini'
                         , 'Beta_Cash_Demo_Score_FSPD30_gini':'Beta_Cash_Demo_Score_FSPD30_t1_gini'
                        , 'Beta_Cash_Demo_Score_FSTPD30_gini':'Beta_Cash_Demo_Score_FSTPD30_t1_gini', 
                        }, inplace = True)
final_df['Trench_category'] = 'Trench 1'
final_df['Model_display_name'] = 'beta_demo_model_cash'
final_df['Product_type'] = 'CASH'
final_df.dtypes

start_date                              datetime64[ns]
end_date                                datetime64[ns]
period                                          object
Model_Name                                      object
version                                         object
bad_rate                                        object
Beta_Cash_Demo_Score_FPD0_t1_gini              float64
Beta_Cash_Demo_Score_FPD10_t1_gini             float64
Beta_Cash_Demo_Score_FPD30_t1_gini             float64
Beta_Cash_Demo_Score_FSPD30_t1_gini            float64
Beta_Cash_Demo_Score_FSTPD30_t1_gini           float64
Trench_category                                 object
Model_display_name                              object
Product_type                                    object
dtype: object

In [3169]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,Beta_Cash_Demo_Score_FPD0_t1_gini,Beta_Cash_Demo_Score_FPD10_t1_gini,Beta_Cash_Demo_Score_FPD30_t1_gini,Beta_Cash_Demo_Score_FSPD30_t1_gini,Beta_Cash_Demo_Score_FSTPD30_t1_gini,Trench_category,Model_display_name,Product_type
0,2024-09-30,2024-10-06,Week,Beta_Cash_Demo_Score,1.1.0,FPD0,0.162108,,,,,Trench 1,beta_demo_model_cash,CASH
1,2024-10-01,2024-10-31,Month,Beta_Cash_Demo_Score,1.1.0,FPD0,0.23941,,,,,Trench 1,beta_demo_model_cash,CASH
2,2024-10-07,2024-10-13,Week,Beta_Cash_Demo_Score,1.1.0,FPD0,0.260402,,,,,Trench 1,beta_demo_model_cash,CASH
3,2024-10-14,2024-10-20,Week,Beta_Cash_Demo_Score,1.1.0,FPD0,0.270544,,,,,Trench 1,beta_demo_model_cash,CASH
4,2024-10-21,2024-10-27,Week,Beta_Cash_Demo_Score,1.1.0,FPD0,0.261032,,,,,Trench 1,beta_demo_model_cash,CASH


In [3170]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.beta_demo_model_cash_t1_v1_gini4"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=54db3864-d92b-496e-907c-470471d24d87>

## Trench 2

## FPD0

## Test

In [3171]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory
  from latest_request p 
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,1395465,5f9991e8-7041-4f81-b540-41577433f14b,60813954650061,0.4613358178472596,Trench 2,2025-10-23 01:35:32,2025-10-23,2025-10,Test,0,1
1,3309587,7fa98ca1-7602-42de-84fd-328b5a220ec1,60833095870015,0.5945761015634671,Trench 2,2025-10-25 18:09:40,2025-10-25,2025-10,Test,0,1
2,3214810,c7165ad9-e130-459f-bbd0-2ac4f8a25c44,60832148100021,0.5631522553791855,Trench 2,2025-10-13 13:23:28,2025-10-13,2025-10,Test,0,1
3,3289432,44405cdc-fe81-41bf-a9ae-d3aad275e886,60832894320011,0.500990318905541,Trench 2,2025-10-21 11:46:59,2025-10-21,2025-10,Test,0,1
4,3280007,172d92b2-91bf-4e3f-9bcd-9f7557d9017e,60832800070012,0.473464879404791,Trench 2,2025-10-12 16:15:24,2025-10-12,2025-10,Test,1,1


In [3172]:
df1 = dfd.copy()

## Train

In [3173]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd0 = 1
 and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,1034329,e028cc69-e00a-4ec9-aac0-cadb714b8f6b,60810343290021,0.46849,Trench 2,2024-10-17 01:17:34,2024-10-17,2024-10,Train,1,1
1,2509760,6fc34da2-6a3b-4ec1-af36-970e535b78d0,60825097600015,0.487978,Trench 2,2024-10-15 00:54:29,2024-10-15,2024-10,Train,0,1
2,2490952,80fd28e2-6845-4d60-9d9b-8bfbab4a60dd,60824909520014,0.699492,Trench 2,2024-10-05 11:46:18,2024-10-05,2024-10,Train,1,1
3,2441698,b59fb911-c22a-42ac-84cc-314c3d56142d,60824416980018,0.547507,Trench 2,2024-10-20 12:32:46,2024-10-23,2024-10,Train,1,1
4,1038187,18fa7b93-efa7-4ae9-9e45-973f506d29ae,60810381870153,0.565306,Trench 2,2024-10-30 20:18:03,2024-10-31,2024-10,Train,0,1


In [3174]:
df2 = dfd.copy()

In [3175]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12220 entries, 0 to 12219
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             12220 non-null  object        
 1   digitalLoanAccountId   12220 non-null  object        
 2   loanAccountNumber      12220 non-null  object        
 3   Beta_Cash_Demo_Score   12220 non-null  object        
 4   trenchCategory         12220 non-null  object        
 5   appln_submit_datetime  12220 non-null  datetime64[us]
 6   disbursementdate       12220 non-null  dbdate        
 7   Application_month      12220 non-null  object        
 8   Data_selection         12220 non-null  object        
 9   deffpd0                12220 non-null  Int64         
 10  flg_mature_fpd0        12220 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(7)
memory usage: 1.0+ MB


In [3176]:
df_concat['Beta_Cash_Demo_Score'] = pd.to_numeric(df_concat['Beta_Cash_Demo_Score'], errors='coerce')

In [3177]:
df_concat.to_csv(r"Beta_Cash_Demo_Scoretrenchnunfpd0.csv")

In [3178]:
gini_results = calculate_periodic_gini(df_concat, 'Beta_Cash_Demo_Score', 'deffpd0', 'FPD0')

In [3179]:
f0 = gini_results.copy()
f0.head()

Unnamed: 0,start_date,end_date,Beta_Cash_Demo_Score_FPD0_gini,period,Model_Name,version,bad_rate
0,2024-09-30,2024-10-06,0.256061,Week,Beta_Cash_Demo_Score,1.1.0,FPD0
1,2024-10-01,2024-10-31,0.217154,Month,Beta_Cash_Demo_Score,1.1.0,FPD0
2,2024-10-07,2024-10-13,0.272374,Week,Beta_Cash_Demo_Score,1.1.0,FPD0
3,2024-10-14,2024-10-20,0.252783,Week,Beta_Cash_Demo_Score,1.1.0,FPD0
4,2024-10-21,2024-10-27,0.12816,Week,Beta_Cash_Demo_Score,1.1.0,FPD0


## FPD10

## Test

In [3180]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory
  from latest_request p 
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,3542301,17691502-e31b-4c89-a9b5-a831f59f3b44,60835423010011,0.2380665569706894,Trench 2,2025-10-10 13:53:26,2025-10-11,2025-10,Test,0,1
1,3238233,0c565a70-5dde-41c6-aff6-ee43b08d5f4a,60832382330017,0.5730692581451752,Trench 2,2025-10-13 08:23:04,2025-10-13,2025-10,Test,0,1
2,3414945,079d2cc8-7504-4ae9-b705-5c849d7dd1c3,60834149450018,0.4054937983050984,Trench 2,2025-10-15 05:59:53,2025-10-15,2025-10,Test,1,1
3,3441140,4200b450-7550-4b11-a73e-6e9be00f30da,60834411400012,0.4604311844471552,Trench 2,2025-10-10 10:55:23,2025-10-12,2025-10,Test,1,1
4,2972716,f3ddbb3e-afea-4f96-861b-768c1abb7e5b,60829727160012,0.5450310640829273,Trench 2,2025-09-29 09:20:58,2025-09-29,2025-09,Test,0,1


In [3181]:
df1 = dfd.copy()

## Train

In [3182]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,2708256,ebb745c2-cd52-4a1c-b4c5-9fea96016b92,60827082560021,0.590725,Trench 2,2024-10-31 16:38:31,2024-10-31,2024-10,Train,0,1
1,2142046,52b312d4-ba99-4663-940b-23819858af58,60821420460014,0.466664,Trench 2,2024-10-25 20:14:06,2024-10-25,2024-10,Train,1,1
2,2470466,b9fa144b-4f06-4ea9-87b5-b59f0857025c,60824704660015,0.516886,Trench 2,2024-10-09 15:07:47,2024-10-09,2024-10,Train,0,1
3,2660847,2b8a516f-555d-48ea-9fc9-ee60ee2b0244,60826608470011,0.526341,Trench 2,2024-10-18 13:34:11,2024-10-18,2024-10,Train,0,1
4,2254279,37c91156-4df4-437e-8496-dc6845f2fa7b,60822542790018,0.4612,Trench 2,2024-10-24 12:56:14,2024-10-24,2024-10,Train,0,1


In [3183]:
df2 = dfd.copy()

In [3184]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12016 entries, 0 to 12015
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             12016 non-null  object        
 1   digitalLoanAccountId   12016 non-null  object        
 2   loanAccountNumber      12016 non-null  object        
 3   Beta_Cash_Demo_Score   12016 non-null  object        
 4   trenchCategory         12016 non-null  object        
 5   appln_submit_datetime  12016 non-null  datetime64[us]
 6   disbursementdate       12016 non-null  dbdate        
 7   Application_month      12016 non-null  object        
 8   Data_selection         12016 non-null  object        
 9   deffpd10               12016 non-null  Int64         
 10  flg_mature_fpd10       12016 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(7)
memory usage: 1.0+ MB


In [3185]:
df_concat['Beta_Cash_Demo_Score'] = pd.to_numeric(df_concat['Beta_Cash_Demo_Score'], errors='coerce')

In [3186]:
df_concat.to_csv(r"Beta_Cash_Demo_Scoretrenchnunfpd10.csv")

In [3187]:
gini_results = calculate_periodic_gini(df_concat, 'Beta_Cash_Demo_Score', 'deffpd10', 'FPD10')

In [3188]:
f1 = gini_results.copy()
f1.head()

Unnamed: 0,start_date,end_date,Beta_Cash_Demo_Score_FPD10_gini,period,Model_Name,version,bad_rate
0,2024-09-30,2024-10-06,0.453619,Week,Beta_Cash_Demo_Score,1.1.0,FPD10
1,2024-10-01,2024-10-31,0.28765,Month,Beta_Cash_Demo_Score,1.1.0,FPD10
2,2024-10-07,2024-10-13,0.274212,Week,Beta_Cash_Demo_Score,1.1.0,FPD10
3,2024-10-14,2024-10-20,0.334702,Week,Beta_Cash_Demo_Score,1.1.0,FPD10
4,2024-10-21,2024-10-27,0.11477,Week,Beta_Cash_Demo_Score,1.1.0,FPD10


## FPD30

## Test

In [3189]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory
  from latest_request p 
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30
0,1346048,896716c5-e4e5-4b3c-9218-0b605d293c72,60813460480017,0.5258142934829556,Trench 2,2025-09-25 11:37:29,2025-09-25,2025-09,Test,0,1


In [3190]:
df1 = dfd.copy()

## Train

In [3191]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30
0,2708256,ebb745c2-cd52-4a1c-b4c5-9fea96016b92,60827082560021,0.590725,Trench 2,2024-10-31 16:38:31,2024-10-31,2024-10,Train,0,1
1,2142046,52b312d4-ba99-4663-940b-23819858af58,60821420460014,0.466664,Trench 2,2024-10-25 20:14:06,2024-10-25,2024-10,Train,1,1
2,2470466,b9fa144b-4f06-4ea9-87b5-b59f0857025c,60824704660015,0.516886,Trench 2,2024-10-09 15:07:47,2024-10-09,2024-10,Train,0,1
3,2660847,2b8a516f-555d-48ea-9fc9-ee60ee2b0244,60826608470011,0.526341,Trench 2,2024-10-18 13:34:11,2024-10-18,2024-10,Train,0,1
4,2254279,37c91156-4df4-437e-8496-dc6845f2fa7b,60822542790018,0.4612,Trench 2,2024-10-24 12:56:14,2024-10-24,2024-10,Train,0,1


In [3192]:
df2 = dfd.copy()

In [3193]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11660 entries, 0 to 11659
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             11660 non-null  object        
 1   digitalLoanAccountId   11660 non-null  object        
 2   loanAccountNumber      11660 non-null  object        
 3   Beta_Cash_Demo_Score   11660 non-null  object        
 4   trenchCategory         11660 non-null  object        
 5   appln_submit_datetime  11660 non-null  datetime64[us]
 6   disbursementdate       11660 non-null  dbdate        
 7   Application_month      11660 non-null  object        
 8   Data_selection         11660 non-null  object        
 9   deffpd30               11660 non-null  Int64         
 10  flg_mature_fpd30       11660 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(7)
memory usage: 1.0+ MB


In [3194]:
df_concat['Beta_Cash_Demo_Score'] = pd.to_numeric(df_concat['Beta_Cash_Demo_Score'], errors='coerce')

In [3195]:
df_concat.to_csv(r"Beta_Cash_Demo_Scoretrenchnunfpd30.csv")

In [3196]:
gini_results = calculate_periodic_gini(df_concat, 'Beta_Cash_Demo_Score', 'deffpd30', 'FPD30')

In [3197]:
f2 = gini_results.copy()
f2.head()

Unnamed: 0,start_date,end_date,Beta_Cash_Demo_Score_FPD30_gini,period,Model_Name,version,bad_rate
0,2024-09-30,2024-10-06,0.435796,Week,Beta_Cash_Demo_Score,1.1.0,FPD30
1,2024-10-01,2024-10-31,0.282906,Month,Beta_Cash_Demo_Score,1.1.0,FPD30
2,2024-10-07,2024-10-13,0.286411,Week,Beta_Cash_Demo_Score,1.1.0,FPD30
3,2024-10-14,2024-10-20,0.30303,Week,Beta_Cash_Demo_Score,1.1.0,FPD30
4,2024-10-21,2024-10-27,0.135635,Week,Beta_Cash_Demo_Score,1.1.0,FPD30


## FSPD30

## Test

In [3198]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory
  from latest_request p 
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30


In [3199]:
df1 = dfd.copy()

## Train

In [3200]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30
0,2315941,4e6a9ac2-ed71-4122-90de-1d02c878d017,60823159410015,0.437452,Trench 2,2024-10-20 08:23:34,2024-10-31,2024-10,Train,1,1
1,2450280,47d316c5-5d6f-42e6-ab2f-1de3f898e345,60824502800017,0.45766,Trench 2,2024-10-26 21:13:26,2024-10-26,2024-10,Train,1,1
2,1001488,1d0a0b69-3d16-4de9-a467-b28a7ed3b3bd,60810014880051,0.481842,Trench 2,2024-10-28 21:30:34,2024-10-29,2024-10,Train,0,1
3,2566422,fc297aab-8566-4f63-bf90-e0b80d29ee20,60825664220018,0.469877,Trench 2,2024-10-27 08:07:20,2024-10-27,2024-10,Train,1,1
4,1308326,1ddc9d98-b447-4e7d-b3b0-a9a7e94d1984,60813083260018,0.432298,Trench 2,2024-10-22 04:10:50,2024-10-24,2024-10,Train,0,1


In [3201]:
df2 = dfd.copy()

In [3202]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10972 entries, 0 to 10971
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             10972 non-null  Int64         
 1   digitalLoanAccountId   10972 non-null  object        
 2   loanAccountNumber      10972 non-null  object        
 3   Beta_Cash_Demo_Score   10972 non-null  float64       
 4   trenchCategory         10972 non-null  object        
 5   appln_submit_datetime  10972 non-null  datetime64[us]
 6   disbursementdate       10972 non-null  dbdate        
 7   Application_month      10972 non-null  object        
 8   Data_selection         10972 non-null  object        
 9   deffspd30              10972 non-null  Int64         
 10  flg_mature_fspd_30     10972 non-null  Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 975.2+ KB


  df_concat = pd.concat([df2, df1], ignore_index=True)


In [3203]:
df_concat['Beta_Cash_Demo_Score'] = pd.to_numeric(df_concat['Beta_Cash_Demo_Score'], errors='coerce')

In [3204]:
df_concat.to_csv(r"Beta_Cash_Demo_Scoretrenchnunfspd30.csv")

In [3205]:
gini_results = calculate_periodic_gini(df_concat, 'Beta_Cash_Demo_Score', 'deffspd30', 'FSPD30')

In [3206]:
f3 = gini_results.copy()
f3.head()

Unnamed: 0,start_date,end_date,Beta_Cash_Demo_Score_FSPD30_gini,period,Model_Name,version,bad_rate
0,2024-09-30,2024-10-06,0.427312,Week,Beta_Cash_Demo_Score,1.1.0,FSPD30
1,2024-10-01,2024-10-31,0.309332,Month,Beta_Cash_Demo_Score,1.1.0,FSPD30
2,2024-10-07,2024-10-13,0.358267,Week,Beta_Cash_Demo_Score,1.1.0,FSPD30
3,2024-10-14,2024-10-20,0.363044,Week,Beta_Cash_Demo_Score,1.1.0,FSPD30
4,2024-10-21,2024-10-27,0.163012,Week,Beta_Cash_Demo_Score,1.1.0,FSPD30


## FSTPD30

## Test

In [3207]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory
  from latest_request p 
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30


In [3208]:
df1 = dfd.copy()

## Train

In [3209]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30
0,2449267,fbd2124c-0472-4942-ab63-cf748be7b855,60824492670011,0.47923,Trench 2,2024-10-08 21:53:13,2024-10-08,2024-10,Train,0,1
1,2823236,55813852-6451-4663-a9cb-9483a5a6fc34,60828232360014,0.544488,Trench 2,2024-10-17 16:31:08,2024-10-17,2024-10,Train,0,1
2,2207045,1b45b3d7-3ec6-4a12-9766-a1b21a6f8691,60822070450019,0.557384,Trench 2,2024-10-27 01:24:04,2024-10-27,2024-10,Train,1,1
3,2723775,be26d1ce-f427-4b95-b9e0-82389f29f6aa,60827237750011,0.397411,Trench 2,2024-10-03 09:23:20,2024-10-03,2024-10,Train,0,1
4,1178685,4d5388c9-11c7-4d6c-b5ed-a9352481e9e6,60811786850011,0.589641,Trench 2,2024-10-01 18:58:45,2024-10-01,2024-10,Train,0,1


In [3210]:
df2 = dfd.copy()

In [3211]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10225 entries, 0 to 10224
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             10225 non-null  Int64         
 1   digitalLoanAccountId   10225 non-null  object        
 2   loanAccountNumber      10225 non-null  object        
 3   Beta_Cash_Demo_Score   10225 non-null  float64       
 4   trenchCategory         10225 non-null  object        
 5   appln_submit_datetime  10225 non-null  datetime64[us]
 6   disbursementdate       10225 non-null  dbdate        
 7   Application_month      10225 non-null  object        
 8   Data_selection         10225 non-null  object        
 9   deffstpd30             10225 non-null  Int64         
 10  flg_mature_fstpd_30    10225 non-null  Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 908.8+ KB


  df_concat = pd.concat([df2, df1], ignore_index=True)


In [3212]:
df_concat['Beta_Cash_Demo_Score'] = pd.to_numeric(df_concat['Beta_Cash_Demo_Score'], errors='coerce')

In [3213]:
df_concat.to_csv(r"Beta_Cash_Demo_Scoretrenchnunfstpd30.csv")

In [3214]:
gini_results = calculate_periodic_gini(df_concat, 'Beta_Cash_Demo_Score', 'deffstpd30', 'FSTPD30')

In [3215]:
f4 = gini_results.copy()
f4.head()

Unnamed: 0,start_date,end_date,Beta_Cash_Demo_Score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2024-09-30,2024-10-06,0.3585,Week,Beta_Cash_Demo_Score,1.1.0,FSTPD30
1,2024-10-01,2024-10-31,0.315782,Month,Beta_Cash_Demo_Score,1.1.0,FSTPD30
2,2024-10-07,2024-10-13,0.369074,Week,Beta_Cash_Demo_Score,1.1.0,FSTPD30
3,2024-10-14,2024-10-20,0.3735,Week,Beta_Cash_Demo_Score,1.1.0,FSTPD30
4,2024-10-21,2024-10-27,0.203213,Week,Beta_Cash_Demo_Score,1.1.0,FSTPD30


## combining the dataframe

In [3216]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'Beta_Cash_Demo_Score_FPD0_gini',
       'period', 'Model_Name', 'version', 'bad_rate',
       'Beta_Cash_Demo_Score_FPD10_gini',
       'Beta_Cash_Demo_Score_FPD30_gini',
       'Beta_Cash_Demo_Score_FSPD30_gini',
       'Beta_Cash_Demo_Score_FSTPD30_gini'], dtype=object)

In [3217]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','Beta_Cash_Demo_Score_FPD0_gini','Beta_Cash_Demo_Score_FPD10_gini',
      'Beta_Cash_Demo_Score_FPD30_gini',
       'Beta_Cash_Demo_Score_FSPD30_gini',
       'Beta_Cash_Demo_Score_FSTPD30_gini']].copy()
final_df.rename(columns={'Beta_Cash_Demo_Score_FPD0_gini':'Beta_Cash_Demo_Score_FPD0_t2_gini'
                         , 'Beta_Cash_Demo_Score_FPD10_gini':'Beta_Cash_Demo_Score_FPD10_t2_gini'
                         , 'Beta_Cash_Demo_Score_FPD30_gini':'Beta_Cash_Demo_Score_FPD30_t2_gini'
                         , 'Beta_Cash_Demo_Score_FSPD30_gini':'Beta_Cash_Demo_Score_FSPD30_t2_gini'
                        , 'Beta_Cash_Demo_Score_FSTPD30_gini':'Beta_Cash_Demo_Score_FSTPD30_t2_gini', 
                        }, inplace = True)
final_df['Trench_category'] = 'Trench 2'
final_df['Model_display_name'] = 'beta_demo_model_cash'
final_df['Product_type'] = 'CASH'
final_df.dtypes

start_date                              datetime64[ns]
end_date                                datetime64[ns]
period                                          object
Model_Name                                      object
version                                         object
bad_rate                                        object
Beta_Cash_Demo_Score_FPD0_t2_gini              float64
Beta_Cash_Demo_Score_FPD10_t2_gini             float64
Beta_Cash_Demo_Score_FPD30_t2_gini             float64
Beta_Cash_Demo_Score_FSPD30_t2_gini            float64
Beta_Cash_Demo_Score_FSTPD30_t2_gini           float64
Trench_category                                 object
Model_display_name                              object
Product_type                                    object
dtype: object

In [3218]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,Beta_Cash_Demo_Score_FPD0_t2_gini,Beta_Cash_Demo_Score_FPD10_t2_gini,Beta_Cash_Demo_Score_FPD30_t2_gini,Beta_Cash_Demo_Score_FSPD30_t2_gini,Beta_Cash_Demo_Score_FSTPD30_t2_gini,Trench_category,Model_display_name,Product_type
0,2024-09-30,2024-10-06,Week,Beta_Cash_Demo_Score,1.1.0,FPD0,0.256061,,,,,Trench 2,beta_demo_model_cash,CASH
1,2024-10-01,2024-10-31,Month,Beta_Cash_Demo_Score,1.1.0,FPD0,0.217154,,,,,Trench 2,beta_demo_model_cash,CASH
2,2024-10-07,2024-10-13,Week,Beta_Cash_Demo_Score,1.1.0,FPD0,0.272374,,,,,Trench 2,beta_demo_model_cash,CASH
3,2024-10-14,2024-10-20,Week,Beta_Cash_Demo_Score,1.1.0,FPD0,0.252783,,,,,Trench 2,beta_demo_model_cash,CASH
4,2024-10-21,2024-10-27,Week,Beta_Cash_Demo_Score,1.1.0,FPD0,0.12816,,,,,Trench 2,beta_demo_model_cash,CASH


In [3219]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.beta_demo_model_cash_t2_v1_gini4"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=830625bc-2200-4f5d-b9c0-fde3bd9c243b>

## Trench 3

## FPD0

## Test

In [3220]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory
  from latest_request p 
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,1103892,5fd7d0be-13bb-495d-9caf-c2b4c1529c40,60811038920061,0.5011261539712852,Trench 3,2025-10-19 17:39:45,2025-10-19,2025-10,Test,1,1
1,2120741,f5a444be-5468-4177-9a7f-a9b76ff0873c,60821207410046,0.2213769065102644,Trench 3,2025-10-15 17:27:17,2025-10-15,2025-10,Test,1,1
2,1064330,6bbe8a4e-7b4d-4799-b18a-63db74121e24,60810643300061,0.384713221842678,Trench 3,2025-10-19 11:32:47,2025-10-19,2025-10,Test,0,1
3,1421326,22dc3ae4-e0c4-4dc8-a4b9-374ce8b6b2ba,60814213260123,0.3430881978052133,Trench 3,2025-10-24 18:30:37,2025-10-24,2025-10,Test,0,1
4,1732040,7e20cd1c-ce87-4a0e-9b8a-b901d5d72ab7,60817320400021,0.4271632260062578,Trench 3,2025-10-20 22:14:35,2025-10-21,2025-10,Test,0,1


In [3221]:
df1 = dfd.copy()

## Train

In [3222]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd0 = 1
 and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,1261609,4e4e5ce4-db69-451f-a894-060b2d692b34,60812616090046,0.470081,Trench 3,2024-10-14 01:00:18,2024-10-14,2024-10,Train,1,1
1,2475072,34ff7eab-741f-4e15-8dcc-8970a7c32115,60824750720029,0.382201,Trench 3,2024-10-19 18:00:33,2024-10-19,2024-10,Train,1,1
2,2478152,2414fbc7-7ce6-48f2-ad78-f4757d0d5066,60824781520021,0.463289,Trench 3,2024-10-27 22:36:50,2024-10-29,2024-10,Train,0,1
3,1717485,bbea2b77-bf7b-4bb2-927f-1b9020dfa7ca,60817174850025,0.438143,Trench 3,2024-10-15 15:58:17,2024-10-15,2024-10,Train,0,1
4,2838946,0f4c7d20-2e2b-4b3a-806c-97d9ece4aa12,60828389460021,0.382831,Trench 3,2024-10-04 12:09:03,2024-10-05,2024-10,Train,0,1


In [3223]:
df2 = dfd.copy()

In [3224]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11455 entries, 0 to 11454
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             11455 non-null  object        
 1   digitalLoanAccountId   11455 non-null  object        
 2   loanAccountNumber      11455 non-null  object        
 3   Beta_Cash_Demo_Score   11455 non-null  object        
 4   trenchCategory         11455 non-null  object        
 5   appln_submit_datetime  11455 non-null  datetime64[us]
 6   disbursementdate       11455 non-null  dbdate        
 7   Application_month      11455 non-null  object        
 8   Data_selection         11455 non-null  object        
 9   deffpd0                11455 non-null  Int64         
 10  flg_mature_fpd0        11455 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(7)
memory usage: 1006.9+ KB


In [3225]:
df_concat['Beta_Cash_Demo_Score'] = pd.to_numeric(df_concat['Beta_Cash_Demo_Score'], errors='coerce')

In [3226]:
df_concat.to_csv(r"Beta_Cash_Demo_Scoretrenchnunfpd0.csv")

In [3227]:
gini_results = calculate_periodic_gini(df_concat, 'Beta_Cash_Demo_Score', 'deffpd0', 'FPD0')

In [3228]:
f0 = gini_results.copy()
f0.head()

Unnamed: 0,start_date,end_date,Beta_Cash_Demo_Score_FPD0_gini,period,Model_Name,version,bad_rate
0,2024-09-30,2024-10-06,0.061224,Week,Beta_Cash_Demo_Score,1.1.0,FPD0
1,2024-10-01,2024-10-31,0.123607,Month,Beta_Cash_Demo_Score,1.1.0,FPD0
2,2024-10-07,2024-10-13,-0.001855,Week,Beta_Cash_Demo_Score,1.1.0,FPD0
3,2024-10-14,2024-10-20,0.195829,Week,Beta_Cash_Demo_Score,1.1.0,FPD0
4,2024-10-21,2024-10-27,0.309463,Week,Beta_Cash_Demo_Score,1.1.0,FPD0


## FPD10

## Test

In [3229]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory
  from latest_request p 
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,2248295,b90d3125-4f3e-4538-9f58-313adb001808,60822482950043,0.3985564650013071,Trench 3,2025-10-09 09:50:34,2025-10-09,2025-10,Test,0,1
1,2972614,d44ce295-3a60-470d-93aa-75e476e5b292,60829726140033,0.3769142160265958,Trench 3,2025-10-16 15:12:29,2025-10-16,2025-10,Test,0,1
2,2750896,8ad9a58b-9413-4578-b1cc-0d3ce10e1c48,60827508960029,0.3885018457902646,Trench 3,2025-10-13 10:39:31,2025-10-13,2025-10,Test,0,1
3,2642293,c81869ae-c522-4839-91a8-b8e7285e3c0f,60826422930033,0.5521915931467914,Trench 3,2025-09-26 15:49:49,2025-09-26,2025-09,Test,1,1
4,3114718,c8fd4ba4-3022-439e-8169-5a4329d0e400,60831147180045,0.3886167503162845,Trench 3,2025-10-08 21:08:16,2025-10-08,2025-10,Test,0,1


In [3230]:
df1 = dfd.copy()

## Train

In [3231]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,1579809,3dd2d8c0-90ec-4838-b39f-4b8960134bfc,60815798090039,0.39455,Trench 3,2024-10-02 11:26:41,2024-10-02,2024-10,Train,0,1
1,2435867,19b42d45-b2c8-4fa5-a730-d7c55d5ead03,60824358670026,0.564068,Trench 3,2024-10-29 17:42:08,2024-10-29,2024-10,Train,0,1
2,2501968,8550cddd-073d-4d02-8a5b-88b4a876a5c2,60825019680025,0.406863,Trench 3,2024-10-22 08:33:44,2024-10-22,2024-10,Train,1,1
3,2157119,4128534d-5d98-4945-90ce-9e9a0bf21840,60821571190027,0.373175,Trench 3,2024-10-23 19:32:57,2024-10-23,2024-10,Train,0,1
4,2926310,3f9bad05-7bdd-4e16-b04d-f11f81e7bc8b,60829263100023,0.247749,Trench 3,2024-10-10 20:50:14,2024-10-10,2024-10,Train,0,1


In [3232]:
df2 = dfd.copy()

In [3233]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11286 entries, 0 to 11285
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             11286 non-null  object        
 1   digitalLoanAccountId   11286 non-null  object        
 2   loanAccountNumber      11286 non-null  object        
 3   Beta_Cash_Demo_Score   11286 non-null  object        
 4   trenchCategory         11286 non-null  object        
 5   appln_submit_datetime  11286 non-null  datetime64[us]
 6   disbursementdate       11286 non-null  dbdate        
 7   Application_month      11286 non-null  object        
 8   Data_selection         11286 non-null  object        
 9   deffpd10               11286 non-null  Int64         
 10  flg_mature_fpd10       11286 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(7)
memory usage: 992.1+ KB


In [3234]:
df_concat['Beta_Cash_Demo_Score'] = pd.to_numeric(df_concat['Beta_Cash_Demo_Score'], errors='coerce')

In [3235]:
df_concat.to_csv(r"Beta_Cash_Demo_Scoretrenchnunfpd10.csv")

In [3236]:
gini_results = calculate_periodic_gini(df_concat, 'Beta_Cash_Demo_Score', 'deffpd10', 'FPD10')

In [3237]:
f1 = gini_results.copy()
f1.head()

Unnamed: 0,start_date,end_date,Beta_Cash_Demo_Score_FPD10_gini,period,Model_Name,version,bad_rate
0,2024-09-30,2024-10-06,0.211931,Week,Beta_Cash_Demo_Score,1.1.0,FPD10
1,2024-10-01,2024-10-31,0.235389,Month,Beta_Cash_Demo_Score,1.1.0,FPD10
2,2024-10-07,2024-10-13,0.145455,Week,Beta_Cash_Demo_Score,1.1.0,FPD10
3,2024-10-14,2024-10-20,0.281073,Week,Beta_Cash_Demo_Score,1.1.0,FPD10
4,2024-10-21,2024-10-27,0.323349,Week,Beta_Cash_Demo_Score,1.1.0,FPD10


## FPD30

## Test

In [3238]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory
  from latest_request p 
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30


In [3239]:
df1 = dfd.copy()

## Train

In [3240]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30
0,2382664,01c250ff-03f3-4b69-97a6-3384dbb281cb,60823826640024,0.403982,Trench 3,2024-10-18 08:36:11,2024-10-18,2024-10,Train,0,1
1,1515735,532a74d6-8842-454c-9b6b-d4d87888a8da,60815157350032,0.426699,Trench 3,2024-10-05 20:25:38,2024-10-05,2024-10,Train,0,1
2,2529603,b3351b68-0a36-4a56-8831-e6e881ab4320,60825296030021,0.395325,Trench 3,2024-10-09 14:07:21,2024-10-09,2024-10,Train,0,1
3,2512910,d1f187b7-b464-4fb6-a1d2-5934662e8e1a,60825129100026,0.448347,Trench 3,2024-10-30 16:01:54,2024-10-30,2024-10,Train,0,1
4,2847235,d8f28c98-faf5-467f-8927-43d3bdeea769,60828472350027,0.519244,Trench 3,2024-10-08 21:13:42,2024-10-09,2024-10,Train,0,1


In [3241]:
df2 = dfd.copy()

In [3242]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10792 entries, 0 to 10791
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             10792 non-null  Int64         
 1   digitalLoanAccountId   10792 non-null  object        
 2   loanAccountNumber      10792 non-null  object        
 3   Beta_Cash_Demo_Score   10792 non-null  float64       
 4   trenchCategory         10792 non-null  object        
 5   appln_submit_datetime  10792 non-null  datetime64[us]
 6   disbursementdate       10792 non-null  dbdate        
 7   Application_month      10792 non-null  object        
 8   Data_selection         10792 non-null  object        
 9   deffpd30               10792 non-null  Int64         
 10  flg_mature_fpd30       10792 non-null  Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 959.2+ KB


  df_concat = pd.concat([df2, df1], ignore_index=True)


In [3243]:
df_concat['Beta_Cash_Demo_Score'] = pd.to_numeric(df_concat['Beta_Cash_Demo_Score'], errors='coerce')

In [3244]:
df_concat.to_csv(r"Beta_Cash_Demo_Scoretrenchnunfpd30.csv")

In [3245]:
gini_results = calculate_periodic_gini(df_concat, 'Beta_Cash_Demo_Score', 'deffpd30', 'FPD30')

In [3246]:
f2 = gini_results.copy()
f2.head()

Unnamed: 0,start_date,end_date,Beta_Cash_Demo_Score_FPD30_gini,period,Model_Name,version,bad_rate
0,2024-09-30,2024-10-06,0.211931,Week,Beta_Cash_Demo_Score,1.1.0,FPD30
1,2024-10-01,2024-10-31,0.243771,Month,Beta_Cash_Demo_Score,1.1.0,FPD30
2,2024-10-07,2024-10-13,0.169643,Week,Beta_Cash_Demo_Score,1.1.0,FPD30
3,2024-10-14,2024-10-20,0.236058,Week,Beta_Cash_Demo_Score,1.1.0,FPD30
4,2024-10-21,2024-10-27,0.415254,Week,Beta_Cash_Demo_Score,1.1.0,FPD30


## FSPD30

## Test

In [3247]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory
  from latest_request p 
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30


In [3248]:
df1 = dfd.copy()

## Train

In [3249]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30
0,2407493,5ce5c7d7-cb83-4788-8b22-a0ef63aaa0f0,60824074930029,0.421418,Trench 3,2024-10-10 19:08:22,2024-10-10,2024-10,Train,0,1
1,2601300,1e7cbdba-540c-4493-aaf0-e4a08750a93a,60826013000023,0.396923,Trench 3,2024-10-01 21:34:09,2024-10-01,2024-10,Train,0,1
2,1371177,75515663-5030-48d4-a189-bc4eac9f8453,60813711770044,0.427336,Trench 3,2024-10-02 02:40:28,2024-10-02,2024-10,Train,0,1
3,2477699,92cd21cf-747e-4354-8009-51e2133bfdfa,60824776990026,0.551348,Trench 3,2024-10-03 12:52:06,2024-10-03,2024-10,Train,0,1
4,2255402,6882c9ca-06e9-4276-a5fd-9ceb949521de,60822554020033,0.52932,Trench 3,2024-10-27 01:02:29,2024-10-27,2024-10,Train,0,1


In [3250]:
df2 = dfd.copy()

In [3251]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9594 entries, 0 to 9593
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             9594 non-null   Int64         
 1   digitalLoanAccountId   9594 non-null   object        
 2   loanAccountNumber      9594 non-null   object        
 3   Beta_Cash_Demo_Score   9594 non-null   float64       
 4   trenchCategory         9594 non-null   object        
 5   appln_submit_datetime  9594 non-null   datetime64[us]
 6   disbursementdate       9594 non-null   dbdate        
 7   Application_month      9594 non-null   object        
 8   Data_selection         9594 non-null   object        
 9   deffspd30              9594 non-null   Int64         
 10  flg_mature_fspd_30     9594 non-null   Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 852.7+ KB


  df_concat = pd.concat([df2, df1], ignore_index=True)


In [3252]:
df_concat['Beta_Cash_Demo_Score'] = pd.to_numeric(df_concat['Beta_Cash_Demo_Score'], errors='coerce')

In [3253]:
df_concat.to_csv(r"Beta_Cash_Demo_Scoretrenchnunfspd30.csv")

In [3254]:
gini_results = calculate_periodic_gini(df_concat, 'Beta_Cash_Demo_Score', 'deffspd30', 'FSPD30')

In [3255]:
f3 = gini_results.copy()
f3.head()

Unnamed: 0,start_date,end_date,Beta_Cash_Demo_Score_FSPD30_gini,period,Model_Name,version,bad_rate
0,2024-09-30,2024-10-06,0.259831,Week,Beta_Cash_Demo_Score,1.1.0,FSPD30
1,2024-10-01,2024-10-31,0.283025,Month,Beta_Cash_Demo_Score,1.1.0,FSPD30
2,2024-10-07,2024-10-13,0.002157,Week,Beta_Cash_Demo_Score,1.1.0,FSPD30
3,2024-10-14,2024-10-20,0.363873,Week,Beta_Cash_Demo_Score,1.1.0,FSPD30
4,2024-10-21,2024-10-27,0.404337,Week,Beta_Cash_Demo_Score,1.1.0,FSPD30


## FSTPD30

## Test

In [3256]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory
  from latest_request p 
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30


In [3257]:
df1 = dfd.copy()

## Train

In [3258]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30
0,2449267,fbd2124c-0472-4942-ab63-cf748be7b855,60824492670011,0.47923,Trench 2,2024-10-08 21:53:13,2024-10-08,2024-10,Train,0,1
1,2823236,55813852-6451-4663-a9cb-9483a5a6fc34,60828232360014,0.544488,Trench 2,2024-10-17 16:31:08,2024-10-17,2024-10,Train,0,1
2,2207045,1b45b3d7-3ec6-4a12-9766-a1b21a6f8691,60822070450019,0.557384,Trench 2,2024-10-27 01:24:04,2024-10-27,2024-10,Train,1,1
3,2723775,be26d1ce-f427-4b95-b9e0-82389f29f6aa,60827237750011,0.397411,Trench 2,2024-10-03 09:23:20,2024-10-03,2024-10,Train,0,1
4,1178685,4d5388c9-11c7-4d6c-b5ed-a9352481e9e6,60811786850011,0.589641,Trench 2,2024-10-01 18:58:45,2024-10-01,2024-10,Train,0,1


In [3259]:
df2 = dfd.copy()

In [3260]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10225 entries, 0 to 10224
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             10225 non-null  Int64         
 1   digitalLoanAccountId   10225 non-null  object        
 2   loanAccountNumber      10225 non-null  object        
 3   Beta_Cash_Demo_Score   10225 non-null  float64       
 4   trenchCategory         10225 non-null  object        
 5   appln_submit_datetime  10225 non-null  datetime64[us]
 6   disbursementdate       10225 non-null  dbdate        
 7   Application_month      10225 non-null  object        
 8   Data_selection         10225 non-null  object        
 9   deffstpd30             10225 non-null  Int64         
 10  flg_mature_fstpd_30    10225 non-null  Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 908.8+ KB


  df_concat = pd.concat([df2, df1], ignore_index=True)


In [3261]:
df_concat['Beta_Cash_Demo_Score'] = pd.to_numeric(df_concat['Beta_Cash_Demo_Score'], errors='coerce')

In [3262]:
df_concat.to_csv(r"Beta_Cash_Demo_Scoretrenchnunfstpd30.csv")

In [3263]:
gini_results = calculate_periodic_gini(df_concat, 'Beta_Cash_Demo_Score', 'deffstpd30', 'FSTPD30')

In [3264]:
f4 = gini_results.copy()
f4.head()

Unnamed: 0,start_date,end_date,Beta_Cash_Demo_Score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2024-09-30,2024-10-06,0.3585,Week,Beta_Cash_Demo_Score,1.1.0,FSTPD30
1,2024-10-01,2024-10-31,0.315782,Month,Beta_Cash_Demo_Score,1.1.0,FSTPD30
2,2024-10-07,2024-10-13,0.369074,Week,Beta_Cash_Demo_Score,1.1.0,FSTPD30
3,2024-10-14,2024-10-20,0.3735,Week,Beta_Cash_Demo_Score,1.1.0,FSTPD30
4,2024-10-21,2024-10-27,0.203213,Week,Beta_Cash_Demo_Score,1.1.0,FSTPD30


## combining the dataframe

In [3265]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'Beta_Cash_Demo_Score_FPD0_gini',
       'period', 'Model_Name', 'version', 'bad_rate',
       'Beta_Cash_Demo_Score_FPD10_gini',
       'Beta_Cash_Demo_Score_FPD30_gini',
       'Beta_Cash_Demo_Score_FSPD30_gini',
       'Beta_Cash_Demo_Score_FSTPD30_gini'], dtype=object)

In [3266]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','Beta_Cash_Demo_Score_FPD0_gini','Beta_Cash_Demo_Score_FPD10_gini',
      'Beta_Cash_Demo_Score_FPD30_gini',
       'Beta_Cash_Demo_Score_FSPD30_gini',
       'Beta_Cash_Demo_Score_FSTPD30_gini']].copy()
final_df.rename(columns={'Beta_Cash_Demo_Score_FPD0_gini':'Beta_Cash_Demo_Score_FPD0_t3_gini'
                         , 'Beta_Cash_Demo_Score_FPD10_gini':'Beta_Cash_Demo_Score_FPD10_t3_gini'
                         , 'Beta_Cash_Demo_Score_FPD30_gini':'Beta_Cash_Demo_Score_FPD30_t3_gini'
                         , 'Beta_Cash_Demo_Score_FSPD30_gini':'Beta_Cash_Demo_Score_FSPD30_t3_gini'
                        , 'Beta_Cash_Demo_Score_FSTPD30_gini':'Beta_Cash_Demo_Score_FSTPD30_t3_gini', 
                        }, inplace = True)
final_df['Trench_category'] = 'Trench 3'
final_df['Model_display_name'] = 'beta_demo_model_cash'
final_df['Product_type'] = 'CASH'
final_df.dtypes

start_date                              datetime64[ns]
end_date                                datetime64[ns]
period                                          object
Model_Name                                      object
version                                         object
bad_rate                                        object
Beta_Cash_Demo_Score_FPD0_t3_gini              float64
Beta_Cash_Demo_Score_FPD10_t3_gini             float64
Beta_Cash_Demo_Score_FPD30_t3_gini             float64
Beta_Cash_Demo_Score_FSPD30_t3_gini            float64
Beta_Cash_Demo_Score_FSTPD30_t3_gini           float64
Trench_category                                 object
Model_display_name                              object
Product_type                                    object
dtype: object

In [3267]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,Beta_Cash_Demo_Score_FPD0_t3_gini,Beta_Cash_Demo_Score_FPD10_t3_gini,Beta_Cash_Demo_Score_FPD30_t3_gini,Beta_Cash_Demo_Score_FSPD30_t3_gini,Beta_Cash_Demo_Score_FSTPD30_t3_gini,Trench_category,Model_display_name,Product_type
0,2024-09-30,2024-10-06,Week,Beta_Cash_Demo_Score,1.1.0,FPD0,0.061224,,,,,Trench 3,beta_demo_model_cash,CASH
1,2024-10-01,2024-10-31,Month,Beta_Cash_Demo_Score,1.1.0,FPD0,0.123607,,,,,Trench 3,beta_demo_model_cash,CASH
2,2024-10-07,2024-10-13,Week,Beta_Cash_Demo_Score,1.1.0,FPD0,-0.001855,,,,,Trench 3,beta_demo_model_cash,CASH
3,2024-10-14,2024-10-20,Week,Beta_Cash_Demo_Score,1.1.0,FPD0,0.195829,,,,,Trench 3,beta_demo_model_cash,CASH
4,2024-10-21,2024-10-27,Week,Beta_Cash_Demo_Score,1.1.0,FPD0,0.309463,,,,,Trench 3,beta_demo_model_cash,CASH


In [3268]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.beta_demo_model_cash_t3_v1_gini4"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete

Flushing oldest 200 entries.
  warn('Output cache limit (currently {sz} entries) hit.\n'


LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=72e0df40-91d8-4841-b01f-c78d64314048>

# Beta-Cash-AppScore-Model

## Trench 1

## FPD0

## Test

In [3269]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  coalesce(SAFE_CAST(JSON_VALUE(p.prediction_clean, "$.combined_score") AS Float64)) AS beta_cash_app_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory"))  trenchCategory
  from latest_request p
  left join model_run m on p.digitalLoanAccountId = m.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,3713908,ff29594e-6d08-4439-9aff-12debc426602,60837139080017,0.490016,Trench 1,2025-09-30 17:11:30,2025-09-30,2025-09,Test,1,1
1,3714828,b2e2d175-f153-4224-bfa0-3b02fa3e5c03,60837148280012,0.44969,Trench 1,2025-10-09 08:13:11,2025-10-09,2025-10,Test,0,1
2,3731055,f8a72aba-a39a-41f3-846a-825d55559343,60837310550019,0.44743,Trench 1,2025-10-08 12:04:26,2025-10-08,2025-10,Test,0,1
3,3741204,826ee67b-5d56-4089-b33d-7233cf57970e,60837412040019,0.4429,Trench 1,2025-10-13 10:23:05,2025-10-15,2025-10,Test,0,1
4,3741720,2ae087c6-1b95-426f-beef-d03352c34917,60837417200014,0.289677,Trench 1,2025-10-13 13:59:18,2025-10-13,2025-10,Test,0,1


In [3270]:
df1 = dfd.copy()

## Train

In [3271]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction beta_cash_app_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 1'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,2362474,92bfd19b-af22-4656-aa19-a2b30250aba4,60823624740012,0.41215,Trench 1,2024-01-08 14:04:44,2024-01-08,2024-01,Train,0,1
1,2370367,2b5700af-142e-4d5f-aec1-17749a13aa23,60823703670017,0.501125,Trench 1,2024-01-16 13:13:10,2024-01-18,2024-01,Train,1,1
2,2378762,37e79ce0-96ef-4b7d-9f39-310e2448b0fb,60823787620016,0.572945,Trench 1,2024-01-24 19:34:56,2024-01-25,2024-01,Train,1,1
3,2381621,1cc3fdc5-c5e5-4bb0-a6f6-ad03ee7f6201,60823816210011,0.48846,Trench 1,2024-01-27 16:53:41,2024-01-28,2024-01,Train,0,1
4,2365447,a4cc6507-81ab-45f4-a246-0a1fce3eb51c,60823654470011,0.472277,Trench 1,2024-01-11 14:40:08,2024-01-11,2024-01,Train,0,1


In [3272]:
df2 = dfd.copy()

In [3273]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10404 entries, 0 to 10403
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             10404 non-null  object        
 1   digitalLoanAccountId   10404 non-null  object        
 2   loanAccountNumber      10404 non-null  object        
 3   beta_cash_app_score    10404 non-null  float64       
 4   trenchCategory         10404 non-null  object        
 5   appln_submit_datetime  10404 non-null  datetime64[us]
 6   disbursementdate       10404 non-null  dbdate        
 7   Application_month      10404 non-null  object        
 8   Data_selection         10404 non-null  object        
 9   deffpd0                10404 non-null  Int64         
 10  flg_mature_fpd0        10404 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), float64(1), object(6)
memory usage: 914.5+ KB


In [3274]:
df_concat['beta_cash_app_score'] = pd.to_numeric(df_concat['beta_cash_app_score'], errors='coerce')

In [3275]:
df_concat.to_csv(r"beta_cash_app_scoretrench1fpd0.csv")

In [3276]:
gini_results = calculate_periodic_gini(df_concat, 'beta_cash_app_score', 'deffpd0', 'FPD0')

In [3277]:
f0 = gini_results.copy()
f0.head()

Unnamed: 0,start_date,end_date,beta_cash_app_score_FPD0_gini,period,Model_Name,version,bad_rate
0,2024-01-01,2024-01-07,-0.076923,Week,beta_cash_app_score,1.1.0,FPD0
1,2024-01-01,2024-01-31,0.255906,Month,beta_cash_app_score,1.1.0,FPD0
2,2024-01-08,2024-01-14,0.875,Week,beta_cash_app_score,1.1.0,FPD0
3,2024-01-15,2024-01-21,0.430233,Week,beta_cash_app_score,1.1.0,FPD0
4,2024-01-22,2024-01-28,-0.051429,Week,beta_cash_app_score,1.1.0,FPD0


## FPD10

## Test

In [3278]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  coalesce(SAFE_CAST(JSON_VALUE(p.prediction_clean, "$.combined_score") AS Float64)) AS beta_cash_app_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory"))  trenchCategory
  from latest_request p
  left join model_run m on p.digitalLoanAccountId = m.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 1'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,3742304,220c3f42-48cd-4045-877b-dafc62e6639f,60837423040016,0.421974,Trench 1,2025-10-13 17:47:35,2025-10-16,2025-10,Test,0,1
1,3732440,03952368-14cf-4c6c-8ac3-840aff36b570,60837324400018,0.471122,Trench 1,2025-10-09 08:32:55,2025-10-09,2025-10,Test,0,1
2,3743051,4260b30d-ec1d-470d-abf3-2e9055dbae56,60837430510018,0.510704,Trench 1,2025-10-14 07:16:04,2025-10-15,2025-10,Test,0,1
3,3728879,e6a42269-5af2-472a-9d07-387c9f61a9af,60837288790014,0.374427,Trench 1,2025-10-07 22:46:41,2025-10-08,2025-10,Test,0,1
4,3740732,1f0631d9-e42a-421c-b90f-2206714d2d0a,60837407320014,0.454868,Trench 1,2025-10-12 23:29:14,2025-10-13,2025-10,Test,0,1


In [3279]:
df1 = dfd.copy()

## Train

In [3280]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction beta_cash_app_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 1'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,2380527,cd836049-d388-4b91-9304-1970ce11f0a7,60823805270011,0.52279,Trench 1,2024-01-26 15:47:00,2024-01-26,2024-01,Train,0,1
1,2375304,3e17511a-ae6b-4ad0-a1be-c0e88b51d23d,60823753040012,0.682634,Trench 1,2024-01-21 14:01:25,2024-01-24,2024-01,Train,0,1
2,2374217,590870ae-1caf-46c1-b1c8-68e2cc58e515,60823742170011,0.603269,Trench 1,2024-01-20 13:46:14,2024-01-20,2024-01,Train,0,1
3,2374479,7b650e5d-e32c-4fc9-b2a9-3b09dd70910a,60823744790011,0.433075,Trench 1,2024-01-20 16:50:13,2024-01-20,2024-01,Train,0,1
4,2385016,8f9d2965-137e-4671-9703-42f496ca5d76,60823850160012,0.588631,Trench 1,2024-01-31 00:36:32,2024-02-03,2024-01,Train,0,1


In [3281]:
df2 = dfd.copy()

In [3282]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10081 entries, 0 to 10080
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             10081 non-null  object        
 1   digitalLoanAccountId   10081 non-null  object        
 2   loanAccountNumber      10081 non-null  object        
 3   beta_cash_app_score    10081 non-null  float64       
 4   trenchCategory         10081 non-null  object        
 5   appln_submit_datetime  10081 non-null  datetime64[us]
 6   disbursementdate       10081 non-null  dbdate        
 7   Application_month      10081 non-null  object        
 8   Data_selection         10081 non-null  object        
 9   deffpd10               10081 non-null  Int64         
 10  flg_mature_fpd10       10081 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), float64(1), object(6)
memory usage: 886.2+ KB


In [3283]:
df_concat['beta_cash_app_score'] = pd.to_numeric(df_concat['beta_cash_app_score'], errors='coerce')

In [3284]:
df_concat.to_csv(r"beta_cash_app_scoretrench1fpd0.csv")

In [3285]:
gini_results = calculate_periodic_gini(df_concat, 'beta_cash_app_score', 'deffpd10', 'FPD10')

In [3286]:
f1 = gini_results.copy()
f1.head()

Unnamed: 0,start_date,end_date,beta_cash_app_score_FPD10_gini,period,Model_Name,version,bad_rate
0,2024-01-01,2024-01-07,0.071429,Week,beta_cash_app_score,1.1.0,FPD10
1,2024-01-01,2024-01-31,0.289216,Month,beta_cash_app_score,1.1.0,FPD10
2,2024-01-08,2024-01-14,0.69697,Week,beta_cash_app_score,1.1.0,FPD10
3,2024-01-15,2024-01-21,0.691489,Week,beta_cash_app_score,1.1.0,FPD10
4,2024-01-22,2024-01-28,-0.081481,Week,beta_cash_app_score,1.1.0,FPD10


## FPD30

## Test

In [3287]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  coalesce(SAFE_CAST(JSON_VALUE(p.prediction_clean, "$.combined_score") AS Float64)) AS beta_cash_app_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory"))  trenchCategory
  from latest_request p
  left join model_run m on p.digitalLoanAccountId = m.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 1'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30


In [3288]:
df1 = dfd.copy()

## Train

In [3289]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction beta_cash_app_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 1'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30
0,2363182,8ff60ac4-91b5-432d-8fc8-ead3d1354aa2,60823631820017,0.533065,Trench 1,2024-01-09 04:46:10,2024-01-10,2024-01,Train,0,1
1,2372310,9c10c1ee-3c2e-4235-9469-a16b79378a32,60823723100018,0.449566,Trench 1,2024-01-18 13:08:34,2024-01-18,2024-01,Train,0,1
2,2376924,984c431e-f25e-45cb-ab3a-b1a462cb4eb0,60823769240026,0.502811,Trench 1,2024-01-23 00:18:33,2024-01-23,2024-01,Train,0,1
3,2355213,924cbca0-b08f-4e11-bf2b-a9e08bbca47f,60823552130011,0.643342,Trench 1,2024-01-01 06:36:41,2024-01-01,2024-01,Train,1,1
4,2372273,7429134e-0d90-4cb2-b292-22a6f1589525,60823722730011,0.454981,Trench 1,2024-01-18 13:03:22,2024-01-27,2024-01,Train,0,1


In [3290]:
df2 = dfd.copy()

In [3291]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9630 entries, 0 to 9629
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             9630 non-null   Int64         
 1   digitalLoanAccountId   9630 non-null   object        
 2   loanAccountNumber      9630 non-null   object        
 3   beta_cash_app_score    9630 non-null   float64       
 4   trenchCategory         9630 non-null   object        
 5   appln_submit_datetime  9630 non-null   datetime64[us]
 6   disbursementdate       9630 non-null   dbdate        
 7   Application_month      9630 non-null   object        
 8   Data_selection         9630 non-null   object        
 9   deffpd30               9630 non-null   Int64         
 10  flg_mature_fpd30       9630 non-null   Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 855.9+ KB


  df_concat = pd.concat([df2, df1], ignore_index=True)


In [3292]:
df_concat['beta_cash_app_score'] = pd.to_numeric(df_concat['beta_cash_app_score'], errors='coerce')

In [3293]:
df_concat.to_csv(r"beta_cash_app_scoretrench1fpd30.csv")

In [3294]:
gini_results = calculate_periodic_gini(df_concat, 'beta_cash_app_score', 'deffpd30', 'FPD30')

In [3295]:
f2 = gini_results.copy()
f2.head()

Unnamed: 0,start_date,end_date,beta_cash_app_score_FPD30_gini,period,Model_Name,version,bad_rate
0,2024-01-01,2024-01-07,0.071429,Week,beta_cash_app_score,1.1.0,FPD30
1,2024-01-01,2024-01-31,0.210702,Month,beta_cash_app_score,1.1.0,FPD30
2,2024-01-08,2024-01-14,0.69697,Week,beta_cash_app_score,1.1.0,FPD30
3,2024-01-15,2024-01-21,0.652778,Week,beta_cash_app_score,1.1.0,FPD30
4,2024-01-22,2024-01-28,-0.303571,Week,beta_cash_app_score,1.1.0,FPD30


## FSPD30

## Test

In [3296]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  coalesce(SAFE_CAST(JSON_VALUE(p.prediction_clean, "$.combined_score") AS Float64)) AS beta_cash_app_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory"))  trenchCategory
  from latest_request p
  left join model_run m on p.digitalLoanAccountId = m.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30


In [3297]:
df1 = dfd.copy()

## Train

In [3298]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction beta_cash_app_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30
0,2382128,8b098f90-02df-49bc-8f89-ceafed9ed06e,60823821280017,0.586119,Trench 1,2024-01-28 10:43:40,2024-02-27,2024-01,Train,0,1
1,2376280,a3d94f32-3f85-4e0c-80d4-dbf8a3657026,60823762800015,0.309969,Trench 1,2024-01-26 12:43:17,2024-01-30,2024-01,Train,0,1
2,2371202,bebfb712-e0c5-43f7-9a8d-c4736a8f9700,60823712020011,0.481233,Trench 1,2024-01-17 17:17:07,2024-01-17,2024-01,Train,0,1
3,2375605,bf6d8639-f48f-4341-9d8f-796058dc0445,60823756050012,0.458095,Trench 1,2024-01-21 17:38:24,2024-01-21,2024-01,Train,0,1
4,2385016,8f9d2965-137e-4671-9703-42f496ca5d76,60823850160012,0.588631,Trench 1,2024-01-31 00:36:32,2024-02-03,2024-01,Train,1,1


In [3299]:
df2 = dfd.copy()

In [3300]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9163 entries, 0 to 9162
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             9163 non-null   Int64         
 1   digitalLoanAccountId   9163 non-null   object        
 2   loanAccountNumber      9163 non-null   object        
 3   beta_cash_app_score    9163 non-null   float64       
 4   trenchCategory         9163 non-null   object        
 5   appln_submit_datetime  9163 non-null   datetime64[us]
 6   disbursementdate       9163 non-null   dbdate        
 7   Application_month      9163 non-null   object        
 8   Data_selection         9163 non-null   object        
 9   deffspd30              9163 non-null   Int64         
 10  flg_mature_fspd_30     9163 non-null   Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 814.4+ KB


  df_concat = pd.concat([df2, df1], ignore_index=True)


In [3301]:
df_concat['beta_cash_app_score'] = pd.to_numeric(df_concat['beta_cash_app_score'], errors='coerce')

In [3302]:
df_concat.to_csv(r"beta_cash_app_scoretrench1fspd30.csv")

In [3303]:
gini_results = calculate_periodic_gini(df_concat, 'beta_cash_app_score', 'deffspd30', 'FSPD30')

In [3304]:
f3 = gini_results.copy()
f3.head()

Unnamed: 0,start_date,end_date,beta_cash_app_score_FSPD30_gini,period,Model_Name,version,bad_rate
0,2024-01-01,2024-01-07,-0.090909,Week,beta_cash_app_score,1.1.0,FSPD30
1,2024-01-01,2024-01-31,0.116168,Month,beta_cash_app_score,1.1.0,FSPD30
2,2024-01-08,2024-01-14,0.875,Week,beta_cash_app_score,1.1.0,FSPD30
3,2024-01-15,2024-01-21,0.185185,Week,beta_cash_app_score,1.1.0,FSPD30
4,2024-01-22,2024-01-28,-0.081481,Week,beta_cash_app_score,1.1.0,FSPD30


## FSTPD30

## Test

In [3305]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  coalesce(SAFE_CAST(JSON_VALUE(p.prediction_clean, "$.combined_score") AS Float64)) AS beta_cash_app_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory"))  trenchCategory
  from latest_request p
  left join model_run m on p.digitalLoanAccountId = m.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30


In [3306]:
df1 = dfd.copy()

## Train

In [3307]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction beta_cash_app_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30
0,2369174,2319c638-c967-4bb7-b766-fea089e2f371,60823691740011,0.391338,Trench 1,2024-01-15 17:44:19,2024-01-16,2024-01,Train,0,1
1,2371598,a8548a1e-ed9d-427c-b309-2bf04f3586a9,60823715980014,0.512296,Trench 1,2024-01-17 16:13:44,2024-01-18,2024-01,Train,0,1
2,2376051,e918fb74-00be-4668-8dce-afb174f002b6,60823760510014,0.373666,Trench 1,2024-01-22 09:52:21,2024-01-28,2024-01,Train,0,1
3,2369816,e54c1e64-616f-4a4a-89e3-199f712a2bf3,60823698160012,0.514735,Trench 1,2024-01-15 19:44:37,2024-01-16,2024-01,Train,1,1
4,2368621,30724107-65e0-4b97-b109-479f22282400,60823686210019,0.423962,Trench 1,2024-01-14 17:40:22,2024-01-14,2024-01,Train,0,1


In [3308]:
df2 = dfd.copy()

In [3309]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8770 entries, 0 to 8769
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             8770 non-null   Int64         
 1   digitalLoanAccountId   8770 non-null   object        
 2   loanAccountNumber      8770 non-null   object        
 3   beta_cash_app_score    8770 non-null   float64       
 4   trenchCategory         8770 non-null   object        
 5   appln_submit_datetime  8770 non-null   datetime64[us]
 6   disbursementdate       8770 non-null   dbdate        
 7   Application_month      8770 non-null   object        
 8   Data_selection         8770 non-null   object        
 9   deffstpd30             8770 non-null   Int64         
 10  flg_mature_fstpd_30    8770 non-null   Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 779.5+ KB


  df_concat = pd.concat([df2, df1], ignore_index=True)


In [3310]:
df_concat['beta_cash_app_score'] = pd.to_numeric(df_concat['beta_cash_app_score'], errors='coerce')

In [3311]:
df_concat.to_csv(r"beta_cash_app_scoretrench1fstpd30.csv")

In [3312]:
gini_results = calculate_periodic_gini(df_concat, 'beta_cash_app_score', 'deffstpd30', 'FSTPD30')

In [3313]:
f4 = gini_results.copy()
f4.head()

Unnamed: 0,start_date,end_date,beta_cash_app_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2024-01-01,2024-01-07,-0.135802,Week,beta_cash_app_score,1.1.0,FSTPD30
1,2024-01-01,2024-01-31,0.13273,Month,beta_cash_app_score,1.1.0,FSTPD30
2,2024-01-08,2024-01-14,0.569892,Week,beta_cash_app_score,1.1.0,FSTPD30
3,2024-01-15,2024-01-21,0.086364,Week,beta_cash_app_score,1.1.0,FSTPD30
4,2024-01-22,2024-01-28,0.005714,Week,beta_cash_app_score,1.1.0,FSTPD30


## combining the dataframe

In [3314]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'beta_cash_app_score_FPD0_gini',
       'period', 'Model_Name', 'version', 'bad_rate',
       'beta_cash_app_score_FPD10_gini', 'beta_cash_app_score_FPD30_gini',
       'beta_cash_app_score_FSPD30_gini',
       'beta_cash_app_score_FSTPD30_gini'], dtype=object)

In [3315]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','beta_cash_app_score_FPD0_gini'
                     ,'beta_cash_app_score_FPD10_gini', 'beta_cash_app_score_FPD30_gini',
       'beta_cash_app_score_FSPD30_gini',
       'beta_cash_app_score_FSTPD30_gini']].copy()
final_df.rename(columns={'beta_cash_app_score_FPD0_gini':'beta_cash_app_score_FPD0_t1_gini'
                         , 'beta_cash_app_score_FPD10_gini':'beta_cash_app_score_FPD10_t1_gini'
                         , 'beta_cash_app_score_FPD30_gini':'beta_cash_app_score_FPD30_t1_gini'
                         , 'beta_cash_app_score_FSPD30_gini':'beta_cash_app_score_FSPD30_t1_gini'
                        , 'beta_cash_app_score_FSTPD30_gini':'beta_cash_app_score_FSTPD30_t1_gini', 
                        }, inplace = True)
final_df['Trench_category'] = 'Trench 1'
final_df['Model_display_name'] = 'apps_score_cash - t1&t2'
final_df['Product_type'] = 'CASH'
final_df.dtypes

start_date                             datetime64[ns]
end_date                               datetime64[ns]
period                                         object
Model_Name                                     object
version                                        object
bad_rate                                       object
beta_cash_app_score_FPD0_t1_gini              float64
beta_cash_app_score_FPD10_t1_gini             float64
beta_cash_app_score_FPD30_t1_gini             float64
beta_cash_app_score_FSPD30_t1_gini            float64
beta_cash_app_score_FSTPD30_t1_gini           float64
Trench_category                                object
Model_display_name                             object
Product_type                                   object
dtype: object

In [3316]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,beta_cash_app_score_FPD0_t1_gini,beta_cash_app_score_FPD10_t1_gini,beta_cash_app_score_FPD30_t1_gini,beta_cash_app_score_FSPD30_t1_gini,beta_cash_app_score_FSTPD30_t1_gini,Trench_category,Model_display_name,Product_type
0,2024-01-01,2024-01-07,Week,beta_cash_app_score,1.1.0,FPD0,-0.076923,,,,,Trench 1,apps_score_cash - t1&t2,CASH
1,2024-01-01,2024-01-31,Month,beta_cash_app_score,1.1.0,FPD0,0.255906,,,,,Trench 1,apps_score_cash - t1&t2,CASH
2,2024-01-08,2024-01-14,Week,beta_cash_app_score,1.1.0,FPD0,0.875,,,,,Trench 1,apps_score_cash - t1&t2,CASH
3,2024-01-15,2024-01-21,Week,beta_cash_app_score,1.1.0,FPD0,0.430233,,,,,Trench 1,apps_score_cash - t1&t2,CASH
4,2024-01-22,2024-01-28,Week,beta_cash_app_score,1.1.0,FPD0,-0.051429,,,,,Trench 1,apps_score_cash - t1&t2,CASH


In [3317]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.app_score_cash_t1_v1_gini4"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=8d71c206-e4d8-48ab-893f-323333a0f8c5>

## Trench 2

## FPD0

## Test

In [3318]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  coalesce(SAFE_CAST(JSON_VALUE(p.prediction_clean, "$.combined_score") AS Float64)) AS beta_cash_app_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory"))  trenchCategory
  from latest_request p
  left join model_run m on p.digitalLoanAccountId = m.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 2'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,2900835,09acb9dc-b1dd-4070-bebc-60c4efcd8473,60829008350015,0.473203,Trench 2,2025-10-20 09:26:01,2025-10-20,2025-10,Test,0,1
1,3192456,6c7ce98d-8119-43a9-b96e-a4a77632406a,60831924560011,0.454498,Trench 2,2025-10-07 22:44:26,2025-10-08,2025-10,Test,0,1
2,3384522,7c841859-a890-4050-b951-0bbeee5ed342,60833845220018,0.444418,Trench 2,2025-10-20 21:18:53,2025-10-21,2025-10,Test,0,1
3,3305506,fa820cc8-6ea8-40f6-90a5-310f698925a8,60833055060012,0.474032,Trench 2,2025-10-21 18:38:11,2025-10-21,2025-10,Test,0,1
4,3181001,e4f78e4b-1ab2-47ef-929c-469f7c552cd5,60831810010014,0.437396,Trench 2,2025-10-15 22:32:57,2025-10-16,2025-10,Test,0,1


In [3319]:
df1 = dfd.copy()

## Train

In [3320]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction beta_cash_app_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 2'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,1963212,f64d6609-b0a4-4a79-9c0a-bff60ed781d1,60819632120031,0.434867,Trench 2,2024-01-17 00:26:29,2024-01-18,2024-01,Train,1,1
1,2135604,cff5676c-1977-4b63-be60-d39577785d79,60821356040019,0.452397,Trench 2,2024-01-12 22:38:24,2024-01-13,2024-01,Train,0,1
2,1881046,80d0b2cc-fee2-40bf-9b87-2770b242b740,60818810460023,0.454475,Trench 2,2024-01-14 19:56:03,2024-01-27,2024-01,Train,0,1
3,2290166,d60a2e8e-4b7d-4562-a84d-b9883aa01555,60822901660014,0.465123,Trench 2,2024-01-08 15:15:33,2024-01-08,2024-01,Train,0,1
4,1235435,df77ce69-9ee9-46cb-a6b9-ba39d7b744bd,60812354350019,0.510893,Trench 2,2024-01-25 11:16:34,2024-01-26,2024-01,Train,0,1


In [3321]:
df2 = dfd.copy()

In [3322]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8501 entries, 0 to 8500
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             8501 non-null   object        
 1   digitalLoanAccountId   8501 non-null   object        
 2   loanAccountNumber      8501 non-null   object        
 3   beta_cash_app_score    8501 non-null   float64       
 4   trenchCategory         8501 non-null   object        
 5   appln_submit_datetime  8501 non-null   datetime64[us]
 6   disbursementdate       8501 non-null   dbdate        
 7   Application_month      8501 non-null   object        
 8   Data_selection         8501 non-null   object        
 9   deffpd0                8501 non-null   Int64         
 10  flg_mature_fpd0        8501 non-null   Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), float64(1), object(6)
memory usage: 747.3+ KB


In [3323]:
df_concat['beta_cash_app_score'] = pd.to_numeric(df_concat['beta_cash_app_score'], errors='coerce')

In [3324]:
df_concat.to_csv(r"beta_cash_app_scoretrench2fpd0.csv")

In [3325]:
gini_results = calculate_periodic_gini(df_concat, 'beta_cash_app_score', 'deffpd0', 'FPD0')

In [3326]:
f0 = gini_results.copy()
f0.head()

Unnamed: 0,start_date,end_date,beta_cash_app_score_FPD0_gini,period,Model_Name,version,bad_rate
0,2024-01-01,2024-01-07,,Week,beta_cash_app_score,1.1.0,FPD0
1,2024-01-01,2024-01-31,0.448521,Month,beta_cash_app_score,1.1.0,FPD0
2,2024-01-08,2024-01-14,0.59375,Week,beta_cash_app_score,1.1.0,FPD0
3,2024-01-15,2024-01-21,0.404762,Week,beta_cash_app_score,1.1.0,FPD0
4,2024-01-22,2024-01-28,0.769231,Week,beta_cash_app_score,1.1.0,FPD0


## FPD10

## Test

In [3327]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  coalesce(SAFE_CAST(JSON_VALUE(p.prediction_clean, "$.combined_score") AS Float64)) AS beta_cash_app_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory"))  trenchCategory
  from latest_request p
  left join model_run m on p.digitalLoanAccountId = m.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 2'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,3301484,96d1d523-5824-4b1c-abb7-5c6b43f74c75,60833014840017,0.416162,Trench 2,2025-09-26 19:26:32,2025-09-30,2025-09,Test,0,1
1,3393196,8532a086-368d-439b-b5aa-783507807f46,60833931960015,0.426932,Trench 2,2025-10-08 09:08:00,2025-10-08,2025-10,Test,0,1
2,2818584,11a6e7c7-5733-49ee-b6e0-fd575421485c,60828185840016,0.362118,Trench 2,2025-10-05 15:55:29,2025-10-15,2025-10,Test,0,1
3,2918075,b22c3b40-2913-4c6c-a5df-b0ec0c0654e3,60829180750016,0.468057,Trench 2,2025-10-10 01:02:34,2025-10-10,2025-10,Test,0,1
4,3524672,ff9f5d93-edfa-434b-b3fe-9eb6fa4239b4,60835246720013,0.507323,Trench 2,2025-10-11 07:14:15,2025-10-11,2025-10,Test,0,1


In [3328]:
df1 = dfd.copy()

## Train

In [3329]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction beta_cash_app_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 2'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,2253207,3f43d407-c02d-4791-8d4a-7f73daea7848,60822532070026,0.463107,Trench 2,2024-01-14 01:06:35,2024-02-01,2024-01,Train,0,1
1,2343393,3e295588-7eee-47ee-845e-3879fd4a2b12,60823433930016,0.529612,Trench 2,2024-01-23 18:40:10,2024-01-23,2024-01,Train,0,1
2,1490321,17467efb-ee68-4a88-837d-567d1e129780,60814903210012,0.531591,Trench 2,2024-01-09 11:03:12,2024-01-09,2024-01,Train,0,1
3,1344038,ab3c1442-1b97-422e-bce9-a997035abd3b,60813440380025,0.492479,Trench 2,2024-01-23 21:28:30,2024-02-02,2024-01,Train,1,1
4,1414505,5dba5b9f-64e2-4748-998c-29234b140c4f,60814145050014,0.409939,Trench 2,2024-01-19 16:54:22,2024-01-19,2024-01,Train,0,1


In [3330]:
df2 = dfd.copy()

In [3331]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8383 entries, 0 to 8382
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             8383 non-null   object        
 1   digitalLoanAccountId   8383 non-null   object        
 2   loanAccountNumber      8383 non-null   object        
 3   beta_cash_app_score    8383 non-null   float64       
 4   trenchCategory         8383 non-null   object        
 5   appln_submit_datetime  8383 non-null   datetime64[us]
 6   disbursementdate       8383 non-null   dbdate        
 7   Application_month      8383 non-null   object        
 8   Data_selection         8383 non-null   object        
 9   deffpd10               8383 non-null   Int64         
 10  flg_mature_fpd10       8383 non-null   Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), float64(1), object(6)
memory usage: 736.9+ KB


In [3332]:
df_concat['beta_cash_app_score'] = pd.to_numeric(df_concat['beta_cash_app_score'], errors='coerce')

In [3333]:
df_concat.to_csv(r"beta_cash_app_scoretrench1fpd0.csv")

In [3334]:
gini_results = calculate_periodic_gini(df_concat, 'beta_cash_app_score', 'deffpd10', 'FPD10')

In [3335]:
f1 = gini_results.copy()
f1.head()

Unnamed: 0,start_date,end_date,beta_cash_app_score_FPD10_gini,period,Model_Name,version,bad_rate
0,2024-01-01,2024-01-07,,Week,beta_cash_app_score,1.1.0,FPD10
1,2024-01-01,2024-01-31,0.429412,Month,beta_cash_app_score,1.1.0,FPD10
2,2024-01-08,2024-01-14,0.59375,Week,beta_cash_app_score,1.1.0,FPD10
3,2024-01-15,2024-01-21,0.4,Week,beta_cash_app_score,1.1.0,FPD10
4,2024-01-22,2024-01-28,,Week,beta_cash_app_score,1.1.0,FPD10


## FPD30

## Test

In [3336]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  coalesce(SAFE_CAST(JSON_VALUE(p.prediction_clean, "$.combined_score") AS Float64)) AS beta_cash_app_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory"))  trenchCategory
  from latest_request p
  left join model_run m on p.digitalLoanAccountId = m.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 2'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30
0,1346048,896716c5-e4e5-4b3c-9218-0b605d293c72,60813460480017,0.59739,Trench 2,2025-09-25 11:37:29,2025-09-25,2025-09,Test,0,1


In [3337]:
df1 = dfd.copy()

## Train

In [3338]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction beta_cash_app_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 2'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30
0,1904439,0b9a0b6c-9f29-4050-b3f0-e09726136f7b,60819044390016,0.422564,Trench 2,2024-01-26 20:38:24,2024-01-26,2024-01,Train,0,1
1,2249854,6685f7fe-828e-43e9-8e3c-dee593dcc0b6,60822498540012,0.498946,Trench 2,2024-01-15 02:21:24,2024-01-15,2024-01,Train,0,1
2,2042637,2ce0348a-d51d-4b24-bf57-223aa3b9380e,60820426370015,0.471704,Trench 2,2024-01-11 22:07:41,2024-01-12,2024-01,Train,1,1
3,1719998,de75f9ac-4ef6-4170-88c9-a4934e6d164d,60817199980015,0.653952,Trench 2,2024-01-11 11:22:34,2024-01-11,2024-01,Train,1,1
4,1302809,a473b0b4-c355-4135-b4a5-2fa04de93417,60813028090018,0.401579,Trench 2,2024-01-21 19:13:13,2024-01-21,2024-01,Train,0,1


In [3339]:
df2 = dfd.copy()

In [3340]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8175 entries, 0 to 8174
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             8175 non-null   object        
 1   digitalLoanAccountId   8175 non-null   object        
 2   loanAccountNumber      8175 non-null   object        
 3   beta_cash_app_score    8175 non-null   float64       
 4   trenchCategory         8175 non-null   object        
 5   appln_submit_datetime  8175 non-null   datetime64[us]
 6   disbursementdate       8175 non-null   dbdate        
 7   Application_month      8175 non-null   object        
 8   Data_selection         8175 non-null   object        
 9   deffpd30               8175 non-null   Int64         
 10  flg_mature_fpd30       8175 non-null   Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), float64(1), object(6)
memory usage: 718.6+ KB


In [3341]:
df_concat['beta_cash_app_score'] = pd.to_numeric(df_concat['beta_cash_app_score'], errors='coerce')

In [3342]:
df_concat.to_csv(r"beta_cash_app_scoretrench1fpd30.csv")

In [3343]:
gini_results = calculate_periodic_gini(df_concat, 'beta_cash_app_score', 'deffpd30', 'FPD30')

In [3344]:
f2 = gini_results.copy()
f2.head()

Unnamed: 0,start_date,end_date,beta_cash_app_score_FPD30_gini,period,Model_Name,version,bad_rate
0,2024-01-01,2024-01-07,,Week,beta_cash_app_score,1.1.0,FPD30
1,2024-01-01,2024-01-31,0.429412,Month,beta_cash_app_score,1.1.0,FPD30
2,2024-01-08,2024-01-14,0.59375,Week,beta_cash_app_score,1.1.0,FPD30
3,2024-01-15,2024-01-21,0.4,Week,beta_cash_app_score,1.1.0,FPD30
4,2024-01-22,2024-01-28,,Week,beta_cash_app_score,1.1.0,FPD30


## FSPD30

## Test

In [3345]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  coalesce(SAFE_CAST(JSON_VALUE(p.prediction_clean, "$.combined_score") AS Float64)) AS beta_cash_app_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory"))  trenchCategory
  from latest_request p
  left join model_run m on p.digitalLoanAccountId = m.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30


In [3346]:
df1 = dfd.copy()

## Train

In [3347]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction beta_cash_app_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30
0,2090858,5e70b003-58c3-4cf0-8c9b-6742fd7ef8f3,60820908580013,0.476913,Trench 2,2024-01-19 12:03:00,2024-01-24,2024-01,Train,0,1
1,2093691,7a4d835a-7666-42e2-9bed-1568fc75f915,60820936910011,0.370256,Trench 2,2024-01-30 10:25:21,2024-02-01,2024-01,Train,0,1
2,2244767,12c50ebc-9a23-4d23-a6e4-58bd8c03387e,60822447670014,0.510171,Trench 2,2024-01-21 04:46:54,2024-01-21,2024-01,Train,1,1
3,2165749,a042d84f-417f-4d68-8b1e-edc7406a6b0c,60821657490011,0.526756,Trench 2,2024-01-03 20:20:27,2024-01-09,2024-01,Train,0,1
4,1334303,633480ab-01ea-4568-80e0-9af0d12aa1ad,60813343030011,0.413218,Trench 2,2024-01-28 13:38:00,2024-01-28,2024-01,Train,0,1


In [3348]:
df2 = dfd.copy()

In [3349]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7858 entries, 0 to 7857
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             7858 non-null   Int64         
 1   digitalLoanAccountId   7858 non-null   object        
 2   loanAccountNumber      7858 non-null   object        
 3   beta_cash_app_score    7858 non-null   float64       
 4   trenchCategory         7858 non-null   object        
 5   appln_submit_datetime  7858 non-null   datetime64[us]
 6   disbursementdate       7858 non-null   dbdate        
 7   Application_month      7858 non-null   object        
 8   Data_selection         7858 non-null   object        
 9   deffspd30              7858 non-null   Int64         
 10  flg_mature_fspd_30     7858 non-null   Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 698.4+ KB


  df_concat = pd.concat([df2, df1], ignore_index=True)


In [3350]:
df_concat['beta_cash_app_score'] = pd.to_numeric(df_concat['beta_cash_app_score'], errors='coerce')

In [3351]:
df_concat.to_csv(r"beta_cash_app_scoretrench1fspd30.csv")

In [3352]:
gini_results = calculate_periodic_gini(df_concat, 'beta_cash_app_score', 'deffspd30', 'FSPD30')

In [3353]:
f3 = gini_results.copy()
f3.head()

Unnamed: 0,start_date,end_date,beta_cash_app_score_FSPD30_gini,period,Model_Name,version,bad_rate
0,2024-01-01,2024-01-07,,Week,beta_cash_app_score,1.1.0,FSPD30
1,2024-01-01,2024-01-31,0.371429,Month,beta_cash_app_score,1.1.0,FSPD30
2,2024-01-08,2024-01-14,0.261905,Week,beta_cash_app_score,1.1.0,FSPD30
3,2024-01-15,2024-01-21,0.52381,Week,beta_cash_app_score,1.1.0,FSPD30
4,2024-01-22,2024-01-28,0.230769,Week,beta_cash_app_score,1.1.0,FSPD30


## FSTPD30

## Test

In [3354]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  coalesce(SAFE_CAST(JSON_VALUE(p.prediction_clean, "$.combined_score") AS Float64)) AS beta_cash_app_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory"))  trenchCategory
  from latest_request p
  left join model_run m on p.digitalLoanAccountId = m.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30


In [3355]:
df1 = dfd.copy()

## Train

In [3356]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction beta_cash_app_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30
0,2174868,72b05611-fb40-4c68-a38a-c7dc3355fd94,60821748680017,0.446971,Trench 2,2024-01-05 18:05:33,2024-01-05,2024-01,Train,0,1
1,1935488,03cc33e2-dc27-4f65-b5ac-ef20c5e9f479,60819354880014,0.654685,Trench 2,2024-01-18 12:46:13,2024-01-18,2024-01,Train,1,1
2,2095790,3c43d7df-a837-47c6-8d24-3b22ae9a201a,60820957900011,0.522231,Trench 2,2024-01-03 18:01:57,2024-01-03,2024-01,Train,0,1
3,2286814,88ad469d-23b7-416f-8fa2-ed8c8dbcae88,60822868140012,0.494798,Trench 2,2024-01-26 14:19:19,2024-01-26,2024-01,Train,1,1
4,2164270,5b83735a-8e19-4372-bf45-edca6e959f0d,60821642700018,0.431495,Trench 2,2024-01-30 12:57:22,2024-01-30,2024-01,Train,0,1


In [3357]:
df2 = dfd.copy()

In [3358]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7525 entries, 0 to 7524
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             7525 non-null   Int64         
 1   digitalLoanAccountId   7525 non-null   object        
 2   loanAccountNumber      7525 non-null   object        
 3   beta_cash_app_score    7525 non-null   float64       
 4   trenchCategory         7525 non-null   object        
 5   appln_submit_datetime  7525 non-null   datetime64[us]
 6   disbursementdate       7525 non-null   dbdate        
 7   Application_month      7525 non-null   object        
 8   Data_selection         7525 non-null   object        
 9   deffstpd30             7525 non-null   Int64         
 10  flg_mature_fstpd_30    7525 non-null   Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 668.9+ KB


  df_concat = pd.concat([df2, df1], ignore_index=True)


In [3359]:
df_concat['beta_cash_app_score'] = pd.to_numeric(df_concat['beta_cash_app_score'], errors='coerce')

In [3360]:
df_concat.to_csv(r"beta_cash_app_scoretrench1fstpd30.csv")

In [3361]:
gini_results = calculate_periodic_gini(df_concat, 'beta_cash_app_score', 'deffstpd30', 'FSTPD30')

In [3362]:
f4 = gini_results.copy()
f4.head()

Unnamed: 0,start_date,end_date,beta_cash_app_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2024-01-01,2024-01-07,,Week,beta_cash_app_score,1.1.0,FSTPD30
1,2024-01-01,2024-01-31,0.283333,Month,beta_cash_app_score,1.1.0,FSTPD30
2,2024-01-08,2024-01-14,0.274725,Week,beta_cash_app_score,1.1.0,FSTPD30
3,2024-01-15,2024-01-21,0.340909,Week,beta_cash_app_score,1.1.0,FSTPD30
4,2024-01-22,2024-01-28,0.04,Week,beta_cash_app_score,1.1.0,FSTPD30


## combining the dataframe

In [3363]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'beta_cash_app_score_FPD0_gini',
       'period', 'Model_Name', 'version', 'bad_rate',
       'beta_cash_app_score_FPD10_gini', 'beta_cash_app_score_FPD30_gini',
       'beta_cash_app_score_FSPD30_gini',
       'beta_cash_app_score_FSTPD30_gini'], dtype=object)

In [3364]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','beta_cash_app_score_FPD0_gini'
                     ,'beta_cash_app_score_FPD10_gini', 'beta_cash_app_score_FPD30_gini',
       'beta_cash_app_score_FSPD30_gini',
       'beta_cash_app_score_FSTPD30_gini']].copy()
final_df.rename(columns={'beta_cash_app_score_FPD0_gini':'beta_cash_app_score_FPD0_t2_gini'
                         , 'beta_cash_app_score_FPD10_gini':'beta_cash_app_score_FPD10_t2_gini'
                         , 'beta_cash_app_score_FPD30_gini':'beta_cash_app_score_FPD30_t2_gini'
                         , 'beta_cash_app_score_FSPD30_gini':'beta_cash_app_score_FSPD30_t2_gini'
                        , 'beta_cash_app_score_FSTPD30_gini':'beta_cash_app_score_FSTPD30_t2_gini', 
                        }, inplace = True)
final_df['Trench_category'] = 'Trench 2'
final_df['Model_display_name'] = 'apps_score_cash - t1&t2'
final_df['Product_type'] = 'CASH'
final_df.dtypes

start_date                             datetime64[ns]
end_date                               datetime64[ns]
period                                         object
Model_Name                                     object
version                                        object
bad_rate                                       object
beta_cash_app_score_FPD0_t2_gini              float64
beta_cash_app_score_FPD10_t2_gini             float64
beta_cash_app_score_FPD30_t2_gini             float64
beta_cash_app_score_FSPD30_t2_gini            float64
beta_cash_app_score_FSTPD30_t2_gini           float64
Trench_category                                object
Model_display_name                             object
Product_type                                   object
dtype: object

In [3365]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,beta_cash_app_score_FPD0_t2_gini,beta_cash_app_score_FPD10_t2_gini,beta_cash_app_score_FPD30_t2_gini,beta_cash_app_score_FSPD30_t2_gini,beta_cash_app_score_FSTPD30_t2_gini,Trench_category,Model_display_name,Product_type
0,2024-01-01,2024-01-07,Week,beta_cash_app_score,1.1.0,FPD0,,,,,,Trench 2,apps_score_cash - t1&t2,CASH
1,2024-01-01,2024-01-31,Month,beta_cash_app_score,1.1.0,FPD0,0.448521,,,,,Trench 2,apps_score_cash - t1&t2,CASH
2,2024-01-08,2024-01-14,Week,beta_cash_app_score,1.1.0,FPD0,0.59375,,,,,Trench 2,apps_score_cash - t1&t2,CASH
3,2024-01-15,2024-01-21,Week,beta_cash_app_score,1.1.0,FPD0,0.404762,,,,,Trench 2,apps_score_cash - t1&t2,CASH
4,2024-01-22,2024-01-28,Week,beta_cash_app_score,1.1.0,FPD0,0.769231,,,,,Trench 2,apps_score_cash - t1&t2,CASH


In [3366]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.app_score_cash_t2_v1_gini4"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=de914b6d-9809-4be5-b01f-9f259eb3d53a>

## Trench 3

## FPD0

## Test

In [3367]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  coalesce(SAFE_CAST(JSON_VALUE(p.prediction_clean, "$.combined_score") AS Float64)) AS beta_cash_app_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory"))  trenchCategory
  from latest_request p
  left join model_run m on p.digitalLoanAccountId = m.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 3'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,2643990,a36bcf6d-6f58-4055-9e3d-1d9494e07516,60826439900027,0.507023,Trench 3,2025-10-13 06:48:32,2025-10-13,2025-10,Test,0,1
1,2900148,a788f0dc-708e-466c-a8b4-334d307e7208,60829001480035,0.425958,Trench 3,2025-10-13 18:11:15,2025-10-13,2025-10,Test,0,1
2,2802174,b6613875-164f-4ca7-95db-46c1f3618293,60828021740021,0.530636,Trench 3,2025-10-10 15:26:23,2025-10-11,2025-10,Test,0,1
3,3088103,89ce46eb-ebeb-429f-bc01-1d78fb33a4df,60830881030029,0.503477,Trench 3,2025-10-13 09:52:24,2025-10-14,2025-10,Test,1,1
4,2915510,386f23f3-7a9d-4c87-944b-b66b734d3ff2,60829155100021,0.526072,Trench 3,2025-10-13 08:01:18,2025-10-13,2025-10,Test,0,1


In [3368]:
df1 = dfd.copy()

## Train

In [3369]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction beta_cash_app_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 3'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0
0,1750689,f75af30d-9c10-4f89-8b4d-cc906dc760e4,60817506890027,0.524279,Trench 3,2023-05-10 09:29:27,2023-05-12,2023-05,Train,0,1
1,1726422,559a340a-1f44-43ea-a78a-2cd5f6988dc0,60817264220021,0.446255,Trench 3,2023-05-16 10:38:19,2023-05-17,2023-05,Train,0,1
2,1913357,f20ff19c-dd9d-4cc4-ac0c-121ec7d4f65b,60819133570024,0.430654,Trench 3,2023-05-26 07:34:35,2023-05-26,2023-05,Train,0,1
3,1668194,1786d2a7-d743-4522-8381-6607108227e8,60816681940038,0.481926,Trench 3,2023-06-26 16:18:32,2023-06-27,2023-06,Train,0,1
4,1950175,e11256eb-2c1b-4fdc-8a0c-639840aa18ce,60819501750028,0.494012,Trench 3,2023-05-27 14:34:48,2023-05-27,2023-05,Train,0,1


In [3370]:
df2 = dfd.copy()

In [3371]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42566 entries, 0 to 42565
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             42566 non-null  object        
 1   digitalLoanAccountId   42566 non-null  object        
 2   loanAccountNumber      42566 non-null  object        
 3   beta_cash_app_score    42566 non-null  float64       
 4   trenchCategory         42566 non-null  object        
 5   appln_submit_datetime  42566 non-null  datetime64[us]
 6   disbursementdate       42566 non-null  dbdate        
 7   Application_month      42566 non-null  object        
 8   Data_selection         42566 non-null  object        
 9   deffpd0                42566 non-null  Int64         
 10  flg_mature_fpd0        42566 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), float64(1), object(6)
memory usage: 3.7+ MB


In [3372]:
df_concat['beta_cash_app_score'] = pd.to_numeric(df_concat['beta_cash_app_score'], errors='coerce')

In [3373]:
df_concat.to_csv(r"beta_cash_app_scoretrench2fpd0.csv")

In [3374]:
gini_results = calculate_periodic_gini(df_concat, 'beta_cash_app_score', 'deffpd0', 'FPD0')

In [3375]:
f0 = gini_results.copy()
f0.head()

Unnamed: 0,start_date,end_date,beta_cash_app_score_FPD0_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.085346,Month,beta_cash_app_score,1.1.0,FPD0
1,2023-01-02,2023-01-08,0.05,Week,beta_cash_app_score,1.1.0,FPD0
2,2023-01-09,2023-01-15,0.0,Week,beta_cash_app_score,1.1.0,FPD0
3,2023-01-16,2023-01-22,0.121212,Week,beta_cash_app_score,1.1.0,FPD0
4,2023-01-23,2023-01-29,0.222222,Week,beta_cash_app_score,1.1.0,FPD0


## FPD10

## Test

In [3376]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  coalesce(SAFE_CAST(JSON_VALUE(p.prediction_clean, "$.combined_score") AS Float64)) AS beta_cash_app_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory"))  trenchCategory
  from latest_request p
  left join model_run m on p.digitalLoanAccountId = m.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 3'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,3342183,14461507-8dad-4033-ad2d-1d87db375402,60833421830037,0.475069,Trench 3,2025-10-11 20:40:22,2025-10-11,2025-10,Test,0,1
1,3528452,bd357d92-fe0d-4eb2-9e40-a7fb7b612b75,60835284520027,0.467884,Trench 3,2025-10-10 17:49:11,2025-10-10,2025-10,Test,0,1
2,2826643,3f344a2a-fa6f-4916-8d53-b8aa6789925e,60828266430033,0.481588,Trench 3,2025-10-14 11:43:37,2025-10-14,2025-10,Test,1,1
3,2786515,73d6047f-62af-499a-bb58-1455bb78e1e1,60827865150031,0.472286,Trench 3,2025-10-11 07:33:05,2025-10-11,2025-10,Test,0,1
4,3373318,c1754bc9-3dc3-4849-b13f-3c5df444305d,60833733180025,0.500644,Trench 3,2025-10-13 10:07:55,2025-10-14,2025-10,Test,1,1


In [3377]:
df1 = dfd.copy()

## Train

In [3378]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction beta_cash_app_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 3'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10
0,1575532,748d8d2b-6aa6-41c7-a69d-e2e48eaecab8,60815755320025,0.459146,Trench 3,2023-05-31 10:14:04,2023-06-18,2023-05,Train,0,1
1,1074894,fe3eeb96-10a1-4fc5-aa80-2251358b9b99,60810748940032,0.435077,Trench 3,2023-05-23 21:50:26,2023-05-23,2023-05,Train,0,1
2,1947253,f115a92a-f82f-47c6-bd82-100af2beda89,60819472530028,0.451023,Trench 3,2023-10-25 12:36:26,2023-10-25,2023-10,Train,0,1
3,1655570,7af5bf0a-58de-402f-b36f-f29e2c87d2a6,60816555700035,0.507299,Trench 3,2023-05-21 06:48:25,2023-05-21,2023-05,Train,0,1
4,1829626,8ddb22da-e0ab-40a1-a579-f77a96ae5050,60818296260029,0.453202,Trench 3,2023-10-25 10:21:52,2023-10-25,2023-10,Train,0,1


In [3379]:
df2 = dfd.copy()

In [3380]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42331 entries, 0 to 42330
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             42331 non-null  object        
 1   digitalLoanAccountId   42331 non-null  object        
 2   loanAccountNumber      42331 non-null  object        
 3   beta_cash_app_score    42331 non-null  float64       
 4   trenchCategory         42331 non-null  object        
 5   appln_submit_datetime  42331 non-null  datetime64[us]
 6   disbursementdate       42331 non-null  dbdate        
 7   Application_month      42331 non-null  object        
 8   Data_selection         42331 non-null  object        
 9   deffpd10               42331 non-null  Int64         
 10  flg_mature_fpd10       42331 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), float64(1), object(6)
memory usage: 3.6+ MB


In [3381]:
df_concat['beta_cash_app_score'] = pd.to_numeric(df_concat['beta_cash_app_score'], errors='coerce')

In [3382]:
df_concat.to_csv(r"beta_cash_app_scoretrench1fpd0.csv")

In [3383]:
gini_results = calculate_periodic_gini(df_concat, 'beta_cash_app_score', 'deffpd10', 'FPD10')

In [3384]:
f1 = gini_results.copy()
f1.head()

Unnamed: 0,start_date,end_date,beta_cash_app_score_FPD10_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,-0.212418,Month,beta_cash_app_score,1.1.0,FPD10
1,2023-01-02,2023-01-08,0.083333,Week,beta_cash_app_score,1.1.0,FPD10
2,2023-01-09,2023-01-15,-0.444444,Week,beta_cash_app_score,1.1.0,FPD10
3,2023-01-16,2023-01-22,0.266667,Week,beta_cash_app_score,1.1.0,FPD10
4,2023-01-23,2023-01-29,-0.575758,Week,beta_cash_app_score,1.1.0,FPD10


## FPD30

## Test

In [3385]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  coalesce(SAFE_CAST(JSON_VALUE(p.prediction_clean, "$.combined_score") AS Float64)) AS beta_cash_app_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory"))  trenchCategory
  from latest_request p
  left join model_run m on p.digitalLoanAccountId = m.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 3'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30


In [3386]:
df1 = dfd.copy()

## Train

In [3387]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction beta_cash_app_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 3'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30
0,1459184,ca25e25d-bd63-4c76-b794-8ca2ab5f11db,60814591840025,0.476371,Trench 3,2023-04-11 11:17:07,2023-04-11,2023-04,Train,0,1
1,2269961,d2e856b4-7f57-4251-a822-e1ede897df79,60822699610022,0.493158,Trench 3,2023-10-19 11:39:36,2023-10-20,2023-10,Train,0,1
2,1657938,44304a34-2ace-439d-8d41-5931d0858efa,60816579380035,0.536394,Trench 3,2023-06-01 11:14:35,2023-06-01,2023-06,Train,0,1
3,1999674,e9f5e38c-d436-4d3c-be0d-8a09a3d32cf0,60819996740028,0.466496,Trench 3,2023-10-28 13:51:51,2023-10-28,2023-10,Train,0,1
4,1502114,e7c15ec8-123b-4dd9-aaec-71d60c2be54a,60815021140024,0.477969,Trench 3,2023-01-04 15:45:16,2023-01-04,2023-01,Train,0,1


In [3388]:
df2 = dfd.copy()

In [3389]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40832 entries, 0 to 40831
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             40832 non-null  Int64         
 1   digitalLoanAccountId   40832 non-null  object        
 2   loanAccountNumber      40832 non-null  object        
 3   beta_cash_app_score    40832 non-null  float64       
 4   trenchCategory         40832 non-null  object        
 5   appln_submit_datetime  40832 non-null  datetime64[us]
 6   disbursementdate       40832 non-null  dbdate        
 7   Application_month      40832 non-null  object        
 8   Data_selection         40832 non-null  object        
 9   deffpd30               40832 non-null  Int64         
 10  flg_mature_fpd30       40832 non-null  Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 3.5+ MB


  df_concat = pd.concat([df2, df1], ignore_index=True)


In [3390]:
df_concat['beta_cash_app_score'] = pd.to_numeric(df_concat['beta_cash_app_score'], errors='coerce')

In [3391]:
df_concat.to_csv(r"beta_cash_app_scoretrench1fpd30.csv")

In [3392]:
gini_results = calculate_periodic_gini(df_concat, 'beta_cash_app_score', 'deffpd30', 'FPD30')

In [3393]:
f2 = gini_results.copy()
f2.head()

Unnamed: 0,start_date,end_date,beta_cash_app_score_FPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,-0.25,Month,beta_cash_app_score,1.1.0,FPD30
1,2023-01-02,2023-01-08,0.083333,Week,beta_cash_app_score,1.1.0,FPD30
2,2023-01-09,2023-01-15,-0.444444,Week,beta_cash_app_score,1.1.0,FPD30
3,2023-01-16,2023-01-22,0.125,Week,beta_cash_app_score,1.1.0,FPD30
4,2023-01-23,2023-01-29,-0.575758,Week,beta_cash_app_score,1.1.0,FPD30


## FSPD30

## Test

In [3394]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  coalesce(SAFE_CAST(JSON_VALUE(p.prediction_clean, "$.combined_score") AS Float64)) AS beta_cash_app_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory"))  trenchCategory
  from latest_request p
  left join model_run m on p.digitalLoanAccountId = m.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30


In [3395]:
df1 = dfd.copy()

## Train

In [3396]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction beta_cash_app_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30
0,1949158,95258325-e632-47f0-9a0e-29fd4c56498b,60819491580028,0.539017,Trench 3,2023-03-24 14:37:45,2023-03-30,2023-03,Train,0,1
1,1517405,9f21b1de-0674-43f6-8a08-fd4fd424be69,60815174050077,0.473077,Trench 3,2023-03-09 13:02:57,2023-03-09,2023-03,Train,0,1
2,1344293,fad03ced-a84f-4989-9f1e-a41684d91da4,60813442930028,0.462802,Trench 3,2023-02-28 12:40:11,2023-02-28,2023-02,Train,0,1
3,1513854,cad2ee27-37c2-403c-9462-17290d680acc,60815138540022,0.480921,Trench 3,2023-02-17 14:35:49,2023-02-17,2023-02,Train,0,1
4,1489909,0e6df6a0-6322-492d-aa5c-187446c8fa94,60814899090049,0.458079,Trench 3,2023-03-07 20:18:19,2023-03-07,2023-03,Train,0,1


In [3397]:
df2 = dfd.copy()

In [3398]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36397 entries, 0 to 36396
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             36397 non-null  Int64         
 1   digitalLoanAccountId   36397 non-null  object        
 2   loanAccountNumber      36397 non-null  object        
 3   beta_cash_app_score    36397 non-null  float64       
 4   trenchCategory         36397 non-null  object        
 5   appln_submit_datetime  36397 non-null  datetime64[us]
 6   disbursementdate       36397 non-null  dbdate        
 7   Application_month      36397 non-null  object        
 8   Data_selection         36397 non-null  object        
 9   deffspd30              36397 non-null  Int64         
 10  flg_mature_fspd_30     36397 non-null  Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 3.2+ MB


  df_concat = pd.concat([df2, df1], ignore_index=True)


In [3399]:
df_concat['beta_cash_app_score'] = pd.to_numeric(df_concat['beta_cash_app_score'], errors='coerce')

In [3400]:
df_concat.to_csv(r"beta_cash_app_scoretrench1fspd30.csv")

In [3401]:
gini_results = calculate_periodic_gini(df_concat, 'beta_cash_app_score', 'deffspd30', 'FSPD30')

In [3402]:
f3 = gini_results.copy()
f3.head()

Unnamed: 0,start_date,end_date,beta_cash_app_score_FSPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,-0.045082,Month,beta_cash_app_score,1.1.0,FSPD30
1,2023-01-02,2023-01-08,0.272727,Week,beta_cash_app_score,1.1.0,FSPD30
2,2023-01-09,2023-01-15,0.083333,Week,beta_cash_app_score,1.1.0,FSPD30
3,2023-01-16,2023-01-22,0.153846,Week,beta_cash_app_score,1.1.0,FSPD30
4,2023-01-23,2023-01-29,-0.18,Week,beta_cash_app_score,1.1.0,FSPD30


## FSTPD30

## Test

In [3403]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  coalesce(SAFE_CAST(JSON_VALUE(p.prediction_clean, "$.combined_score") AS Float64)) AS beta_cash_app_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory"))  trenchCategory
  from latest_request p
  left join model_run m on p.digitalLoanAccountId = m.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30


In [3404]:
df1 = dfd.copy()

## Train

In [3405]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction beta_cash_app_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30
0,1620070,6105b681-7dd2-4726-a25b-22a139c98f69,60816200700027,0.500896,Trench 3,2023-01-08 07:49:21,2023-01-09,2023-01,Train,0,1
1,1501663,37d30751-08a7-4d4a-a825-e144a683e271,60815016630024,0.504762,Trench 3,2023-02-12 13:11:25,2023-02-12,2023-02,Train,0,1
2,1501718,4028c456-8bff-4570-b9b4-caebc02cd9fa,60815017180022,0.475654,Trench 3,2023-03-23 13:59:47,2023-03-23,2023-03,Train,0,1
3,1580958,0fe72668-338a-4b8d-85ee-1db765d40832,60815809580022,0.420875,Trench 3,2023-01-27 21:04:07,2023-01-30,2023-01,Train,1,1
4,1528735,dab3ed1f-76d4-4a9f-89f2-e3f9f26275fe,60815287350039,0.484893,Trench 3,2023-03-06 11:57:37,2023-03-06,2023-03,Train,0,1


In [3406]:
df2 = dfd.copy()

In [3407]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31592 entries, 0 to 31591
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             31592 non-null  Int64         
 1   digitalLoanAccountId   31592 non-null  object        
 2   loanAccountNumber      31592 non-null  object        
 3   beta_cash_app_score    31592 non-null  float64       
 4   trenchCategory         31592 non-null  object        
 5   appln_submit_datetime  31592 non-null  datetime64[us]
 6   disbursementdate       31592 non-null  dbdate        
 7   Application_month      31592 non-null  object        
 8   Data_selection         31592 non-null  object        
 9   deffstpd30             31592 non-null  Int64         
 10  flg_mature_fstpd_30    31592 non-null  Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 2.7+ MB


  df_concat = pd.concat([df2, df1], ignore_index=True)


In [3408]:
df_concat['beta_cash_app_score'] = pd.to_numeric(df_concat['beta_cash_app_score'], errors='coerce')

In [3409]:
df_concat.to_csv(r"beta_cash_app_scoretrench1fstpd30.csv")

In [3410]:
gini_results = calculate_periodic_gini(df_concat, 'beta_cash_app_score', 'deffstpd30', 'FSTPD30')

In [3411]:
f4 = gini_results.copy()
f4.head()

Unnamed: 0,start_date,end_date,beta_cash_app_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.074074,Month,beta_cash_app_score,1.1.0,FSTPD30
1,2023-01-02,2023-01-08,0.3,Week,beta_cash_app_score,1.1.0,FSTPD30
2,2023-01-09,2023-01-15,0.357143,Week,beta_cash_app_score,1.1.0,FSTPD30
3,2023-01-16,2023-01-22,0.151515,Week,beta_cash_app_score,1.1.0,FSTPD30
4,2023-01-23,2023-01-29,0.031746,Week,beta_cash_app_score,1.1.0,FSTPD30


## combining the dataframe

In [3412]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'beta_cash_app_score_FPD0_gini',
       'period', 'Model_Name', 'version', 'bad_rate',
       'beta_cash_app_score_FPD10_gini', 'beta_cash_app_score_FPD30_gini',
       'beta_cash_app_score_FSPD30_gini',
       'beta_cash_app_score_FSTPD30_gini'], dtype=object)

In [3413]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','beta_cash_app_score_FPD0_gini'
                     ,'beta_cash_app_score_FPD10_gini', 'beta_cash_app_score_FPD30_gini',
       'beta_cash_app_score_FSPD30_gini',
       'beta_cash_app_score_FSTPD30_gini']].copy()
final_df.rename(columns={'beta_cash_app_score_FPD0_gini':'beta_cash_app_score_FPD0_t3_gini'
                         , 'beta_cash_app_score_FPD10_gini':'beta_cash_app_score_FPD10_t3_gini'
                         , 'beta_cash_app_score_FPD30_gini':'beta_cash_app_score_FPD30_t3_gini'
                         , 'beta_cash_app_score_FSPD30_gini':'beta_cash_app_score_FSPD30_t3_gini'
                        , 'beta_cash_app_score_FSTPD30_gini':'beta_cash_app_score_FSTPD30_t3_gini', 
                        }, inplace = True)
final_df['Trench_category'] = 'Trench 2'
final_df['Model_display_name'] = 'apps_score_cash - t3'
final_df['Product_type'] = 'CASH'
final_df.dtypes

start_date                             datetime64[ns]
end_date                               datetime64[ns]
period                                         object
Model_Name                                     object
version                                        object
bad_rate                                       object
beta_cash_app_score_FPD0_t3_gini              float64
beta_cash_app_score_FPD10_t3_gini             float64
beta_cash_app_score_FPD30_t3_gini             float64
beta_cash_app_score_FSPD30_t3_gini            float64
beta_cash_app_score_FSTPD30_t3_gini           float64
Trench_category                                object
Model_display_name                             object
Product_type                                   object
dtype: object

In [3414]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,beta_cash_app_score_FPD0_t3_gini,beta_cash_app_score_FPD10_t3_gini,beta_cash_app_score_FPD30_t3_gini,beta_cash_app_score_FSPD30_t3_gini,beta_cash_app_score_FSTPD30_t3_gini,Trench_category,Model_display_name,Product_type
0,2023-01-01,2023-01-31,Month,beta_cash_app_score,1.1.0,FPD0,0.085346,,,,,Trench 2,apps_score_cash - t3,CASH
1,2023-01-02,2023-01-08,Week,beta_cash_app_score,1.1.0,FPD0,0.05,,,,,Trench 2,apps_score_cash - t3,CASH
2,2023-01-09,2023-01-15,Week,beta_cash_app_score,1.1.0,FPD0,0.0,,,,,Trench 2,apps_score_cash - t3,CASH
3,2023-01-16,2023-01-22,Week,beta_cash_app_score,1.1.0,FPD0,0.121212,,,,,Trench 2,apps_score_cash - t3,CASH
4,2023-01-23,2023-01-29,Week,beta_cash_app_score,1.1.0,FPD0,0.222222,,,,,Trench 2,apps_score_cash - t3,CASH


In [3415]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.app_score_cash_t3_v1_gini4"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=c859f3b5-4c33-4010-860a-3726b1f820b0>

# Beta-Cash-Stack-Model

## Trench 1

## FPD0

In [3416]:
## Trench 1
## Test

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction AS Beta_cash_stack_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory,
  from latest_request p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df1 = dfd.copy()

## Train

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_cash_stack_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 1'
  ;
  """
  
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df2 = dfd.copy()

df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

df_concat['Beta_cash_stack_score'] = pd.to_numeric(df_concat['Beta_cash_stack_score'], errors='coerce')

gini_results = calculate_periodic_gini(df_concat, 'Beta_cash_stack_score', 'deffpd0', 'FPD0')

f0 = gini_results.copy()
print(f0.head())



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16857 entries, 0 to 16856
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             16857 non-null  object        
 1   digitalLoanAccountId   16857 non-null  object        
 2   loanAccountNumber      16857 non-null  object        
 3   Beta_cash_stack_score  16857 non-null  object        
 4   trenchCategory         16857 non-null  object        
 5   appln_submit_datetime  16857 non-null  datetime64[us]
 6   disbursementdate       16857 non-null  dbdate        
 7   Application_month      16857 non-null  object        
 8   Data_selection         16857 non-null  object        
 9   deffpd0                16857 non-null  Int64         
 10  flg_mature_fpd0        16857 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(7)
memory usage: 1.4+ MB
  start_date   end_date  Beta_cash_stack_sco

## FPD10

In [3417]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction AS Beta_cash_stack_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory,
  from latest_request p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df1 = dfd.copy()

## Train

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_cash_stack_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 1'
  ;
  """
  
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df2 = dfd.copy()

df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

df_concat['Beta_cash_stack_score'] = pd.to_numeric(df_concat['Beta_cash_stack_score'], errors='coerce')

gini_results = calculate_periodic_gini(df_concat, 'Beta_cash_stack_score', 'deffpd10', 'FPD10')

f1 = gini_results.copy()
print(f1.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16306 entries, 0 to 16305
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             16306 non-null  object        
 1   digitalLoanAccountId   16306 non-null  object        
 2   loanAccountNumber      16306 non-null  object        
 3   Beta_cash_stack_score  16306 non-null  object        
 4   trenchCategory         16306 non-null  object        
 5   appln_submit_datetime  16306 non-null  datetime64[us]
 6   disbursementdate       16306 non-null  dbdate        
 7   Application_month      16306 non-null  object        
 8   Data_selection         16306 non-null  object        
 9   deffpd10               16306 non-null  Int64         
 10  flg_mature_fpd10       16306 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(7)
memory usage: 1.4+ MB
  start_date   end_date  Beta_cash_stack_sco

## FPD30

In [3418]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction AS Beta_cash_stack_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory,
  from latest_request p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df1 = dfd.copy()

## Train

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_cash_stack_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
  """
  
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df2 = dfd.copy()

df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

df_concat['Beta_cash_stack_score'] = pd.to_numeric(df_concat['Beta_cash_stack_score'], errors='coerce')

gini_results = calculate_periodic_gini(df_concat, 'Beta_cash_stack_score', 'deffpd30', 'FPD30')

f2 = gini_results.copy()
print(f2.head())

  df_concat = pd.concat([df2, df1], ignore_index=True)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15513 entries, 0 to 15512
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             15513 non-null  Int64         
 1   digitalLoanAccountId   15513 non-null  object        
 2   loanAccountNumber      15513 non-null  object        
 3   Beta_cash_stack_score  15513 non-null  float64       
 4   trenchCategory         15513 non-null  object        
 5   appln_submit_datetime  15513 non-null  datetime64[us]
 6   disbursementdate       15513 non-null  dbdate        
 7   Application_month      15513 non-null  object        
 8   Data_selection         15513 non-null  object        
 9   deffpd30               15513 non-null  Int64         
 10  flg_mature_fpd30       15513 non-null  Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 1.3+ MB
  start_date   end_date  Beta_ca

## FSPD30

In [3419]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction AS Beta_cash_stack_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory,
  from latest_request p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df1 = dfd.copy()

## Train

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_cash_stack_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
  """
  
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df2 = dfd.copy()

df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

df_concat['Beta_cash_stack_score'] = pd.to_numeric(df_concat['Beta_cash_stack_score'], errors='coerce')

gini_results = calculate_periodic_gini(df_concat, 'Beta_cash_stack_score', 'deffspd30', 'FSPD30')

f3 = gini_results.copy()
print(f3.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14297 entries, 0 to 14296
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             14297 non-null  Int64         
 1   digitalLoanAccountId   14297 non-null  object        
 2   loanAccountNumber      14297 non-null  object        
 3   Beta_cash_stack_score  14297 non-null  float64       
 4   trenchCategory         14297 non-null  object        
 5   appln_submit_datetime  14297 non-null  datetime64[us]
 6   disbursementdate       14297 non-null  dbdate        
 7   Application_month      14297 non-null  object        
 8   Data_selection         14297 non-null  object        
 9   deffspd30              14297 non-null  Int64         
 10  flg_mature_fspd_30     14297 non-null  Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 1.2+ MB
  start_date   end_date  Beta_ca

  df_concat = pd.concat([df2, df1], ignore_index=True)


## FSTPD30

In [3420]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction AS Beta_cash_stack_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory,
  from latest_request p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df1 = dfd.copy()

## Train

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_cash_stack_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
  """
  
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df2 = dfd.copy()

df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

df_concat['Beta_cash_stack_score'] = pd.to_numeric(df_concat['Beta_cash_stack_score'], errors='coerce')

gini_results = calculate_periodic_gini(df_concat, 'Beta_cash_stack_score', 'deffstpd30', 'FSTPD30')

f4 = gini_results.copy()
print(f4.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13104 entries, 0 to 13103
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             13104 non-null  Int64         
 1   digitalLoanAccountId   13104 non-null  object        
 2   loanAccountNumber      13104 non-null  object        
 3   Beta_cash_stack_score  13104 non-null  float64       
 4   trenchCategory         13104 non-null  object        
 5   appln_submit_datetime  13104 non-null  datetime64[us]
 6   disbursementdate       13104 non-null  dbdate        
 7   Application_month      13104 non-null  object        
 8   Data_selection         13104 non-null  object        
 9   deffstpd30             13104 non-null  Int64         
 10  flg_mature_fstpd_30    13104 non-null  Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 1.1+ MB
  start_date   end_date  Beta_ca

  df_concat = pd.concat([df2, df1], ignore_index=True)


## combining the dataframe

In [3421]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','Beta_cash_stack_score_FPD0_gini',
                     'Beta_cash_stack_score_FPD10_gini',
       'Beta_cash_stack_score_FPD30_gini',
       'Beta_cash_stack_score_FSPD30_gini',
       'Beta_cash_stack_score_FSTPD30_gini']].copy()
final_df.rename(columns={'Beta_cash_stack_score_FPD0_gini':'Beta_cash_stack_score_FPD0_t1_gini'
                         , 'Beta_cash_stack_score_FPD10_gini':'Beta_cash_stack_score_FPD10_t1_gini'
                         , 'Beta_cash_stack_score_FPD30_gini':'Beta_cash_stack_score_FPD30_t1_gini'
                         , 'Beta_cash_stack_score_FSPD30_gini':'Beta_cash_stack_score_FSPD30_t1_gini'
                        , 'Beta_cash_stack_score_FSTPD30_gini':'Beta_cash_stack_score_FSTPD30_t1_gini', 
                        }, inplace = True)
final_df['Trench_category'] = 'Trench 1'
final_df['Model_display_name'] = 'beta_stack_model_cash - t1'
final_df['Product_type'] = 'CASH'
final_df.dtypes

final_df.head()

# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.beta_stack_model_cash_t1_v1_gini_v4"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=4bab9ec6-d4a9-46b2-8b81-bc10dfeefa7b>

## Trench 2

In [3422]:
## Trench 2
## Test

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction AS Beta_cash_stack_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory,
  from latest_request p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df1 = dfd.copy()

## Train

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_cash_stack_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 2'
  ;
  """
  
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df2 = dfd.copy()

df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

df_concat['Beta_cash_stack_score'] = pd.to_numeric(df_concat['Beta_cash_stack_score'], errors='coerce')

gini_results = calculate_periodic_gini(df_concat, 'Beta_cash_stack_score', 'deffpd0', 'FPD0')

f0 = gini_results.copy()
print(f0.head())

## FPD10

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction AS Beta_cash_stack_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory,
  from latest_request p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df1 = dfd.copy()

## Train

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_cash_stack_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 2'
  ;
  """
  
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df2 = dfd.copy()

df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

df_concat['Beta_cash_stack_score'] = pd.to_numeric(df_concat['Beta_cash_stack_score'], errors='coerce')

gini_results = calculate_periodic_gini(df_concat, 'Beta_cash_stack_score', 'deffpd10', 'FPD10')

f1 = gini_results.copy()
print(f1.head())

## FPD30

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction AS Beta_cash_stack_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory,
  from latest_request p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df1 = dfd.copy()

## Train

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_cash_stack_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
  """
  
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df2 = dfd.copy()

df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

df_concat['Beta_cash_stack_score'] = pd.to_numeric(df_concat['Beta_cash_stack_score'], errors='coerce')

gini_results = calculate_periodic_gini(df_concat, 'Beta_cash_stack_score', 'deffpd30', 'FPD30')

f2 = gini_results.copy()
print(f2.head())

## FSPD30

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction AS Beta_cash_stack_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory,
  from latest_request p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df1 = dfd.copy()

## Train

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_cash_stack_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
  """
  
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df2 = dfd.copy()

df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

df_concat['Beta_cash_stack_score'] = pd.to_numeric(df_concat['Beta_cash_stack_score'], errors='coerce')

gini_results = calculate_periodic_gini(df_concat, 'Beta_cash_stack_score', 'deffspd30', 'FSPD30')

f3 = gini_results.copy()
print(f3.head())

## FSTPD30

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction AS Beta_cash_stack_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory,
  from latest_request p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df1 = dfd.copy()

## Train

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_cash_stack_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
  """
  
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df2 = dfd.copy()

df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

df_concat['Beta_cash_stack_score'] = pd.to_numeric(df_concat['Beta_cash_stack_score'], errors='coerce')

gini_results = calculate_periodic_gini(df_concat, 'Beta_cash_stack_score', 'deffstpd30', 'FSTPD30')

f4 = gini_results.copy()
print(f4.head())

import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','Beta_cash_stack_score_FPD0_gini',
                     'Beta_cash_stack_score_FPD10_gini',
       'Beta_cash_stack_score_FPD30_gini',
       'Beta_cash_stack_score_FSPD30_gini',
       'Beta_cash_stack_score_FSTPD30_gini']].copy()
final_df.rename(columns={'Beta_cash_stack_score_FPD0_gini':'Beta_cash_stack_score_FPD0_t2_gini'
                         , 'Beta_cash_stack_score_FPD10_gini':'Beta_cash_stack_score_FPD10_t2_gini'
                         , 'Beta_cash_stack_score_FPD30_gini':'Beta_cash_stack_score_FPD30_t2_gini'
                         , 'Beta_cash_stack_score_FSPD30_gini':'Beta_cash_stack_score_FSPD30_t2_gini'
                        , 'Beta_cash_stack_score_FSTPD30_gini':'Beta_cash_stack_score_FSTPD30_t2_gini', 
                        }, inplace = True)
final_df['Trench_category'] = 'Trench 2'
final_df['Model_display_name'] = 'beta_stack_model_cash - t2'
final_df['Product_type'] = 'CASH'
final_df.dtypes

final_df.head()

# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.beta_stack_model_cash_t2_v1_gini_v4"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11621 entries, 0 to 11620
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             11621 non-null  object        
 1   digitalLoanAccountId   11621 non-null  object        
 2   loanAccountNumber      11621 non-null  object        
 3   Beta_cash_stack_score  11621 non-null  object        
 4   trenchCategory         11621 non-null  object        
 5   appln_submit_datetime  11621 non-null  datetime64[us]
 6   disbursementdate       11621 non-null  dbdate        
 7   Application_month      11621 non-null  object        
 8   Data_selection         11621 non-null  object        
 9   deffpd0                11621 non-null  Int64         
 10  flg_mature_fpd0        11621 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(7)
memory usage: 1021.5+ KB
  start_date   end_date  Beta_cash_stack_

  df_concat = pd.concat([df2, df1], ignore_index=True)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10405 entries, 0 to 10404
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             10405 non-null  Int64         
 1   digitalLoanAccountId   10405 non-null  object        
 2   loanAccountNumber      10405 non-null  object        
 3   Beta_cash_stack_score  10405 non-null  float64       
 4   trenchCategory         10405 non-null  object        
 5   appln_submit_datetime  10405 non-null  datetime64[us]
 6   disbursementdate       10405 non-null  dbdate        
 7   Application_month      10405 non-null  object        
 8   Data_selection         10405 non-null  object        
 9   deffspd30              10405 non-null  Int64         
 10  flg_mature_fspd_30     10405 non-null  Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 924.8+ KB
  start_date   end_date  Beta_

  df_concat = pd.concat([df2, df1], ignore_index=True)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9678 entries, 0 to 9677
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             9678 non-null   Int64         
 1   digitalLoanAccountId   9678 non-null   object        
 2   loanAccountNumber      9678 non-null   object        
 3   Beta_cash_stack_score  9678 non-null   float64       
 4   trenchCategory         9678 non-null   object        
 5   appln_submit_datetime  9678 non-null   datetime64[us]
 6   disbursementdate       9678 non-null   dbdate        
 7   Application_month      9678 non-null   object        
 8   Data_selection         9678 non-null   object        
 9   deffstpd30             9678 non-null   Int64         
 10  flg_mature_fstpd_30    9678 non-null   Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 860.2+ KB
  start_date   end_date  Beta_ca



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=3528e996-cbfd-4e91-b976-7d8b7e666361>

## Trench 3

In [3423]:
## Trench 3
## Test

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction AS Beta_cash_stack_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory,
  from latest_request p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df1 = dfd.copy()

## Train

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_cash_stack_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 3'
  ;
  """
  
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df2 = dfd.copy()

df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

df_concat['Beta_cash_stack_score'] = pd.to_numeric(df_concat['Beta_cash_stack_score'], errors='coerce')

gini_results = calculate_periodic_gini(df_concat, 'Beta_cash_stack_score', 'deffpd0', 'FPD0')

f0 = gini_results.copy()
print(f0.head())

## FPD10

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction AS Beta_cash_stack_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory,
  from latest_request p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df1 = dfd.copy()

## Train

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_cash_stack_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 3'
  ;
  """
  
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df2 = dfd.copy()

df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

df_concat['Beta_cash_stack_score'] = pd.to_numeric(df_concat['Beta_cash_stack_score'], errors='coerce')

gini_results = calculate_periodic_gini(df_concat, 'Beta_cash_stack_score', 'deffpd10', 'FPD10')

f1 = gini_results.copy()
print(f1.head())

## FPD30

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction AS Beta_cash_stack_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory,
  from latest_request p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df1 = dfd.copy()

## Train

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_cash_stack_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
  """
  
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df2 = dfd.copy()

df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

df_concat['Beta_cash_stack_score'] = pd.to_numeric(df_concat['Beta_cash_stack_score'], errors='coerce')

gini_results = calculate_periodic_gini(df_concat, 'Beta_cash_stack_score', 'deffpd30', 'FPD30')

f2 = gini_results.copy()
print(f2.head())

## FSPD30

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction AS Beta_cash_stack_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory,
  from latest_request p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df1 = dfd.copy()

## Train

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_cash_stack_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
  """
  
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df2 = dfd.copy()

df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

df_concat['Beta_cash_stack_score'] = pd.to_numeric(df_concat['Beta_cash_stack_score'], errors='coerce')

gini_results = calculate_periodic_gini(df_concat, 'Beta_cash_stack_score', 'deffspd30', 'FSPD30')

f3 = gini_results.copy()
print(f3.head())

## FSTPD30

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction AS Beta_cash_stack_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory,
  from latest_request p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df1 = dfd.copy()

## Train

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_cash_stack_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
  """
  
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df2 = dfd.copy()

df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

df_concat['Beta_cash_stack_score'] = pd.to_numeric(df_concat['Beta_cash_stack_score'], errors='coerce')

gini_results = calculate_periodic_gini(df_concat, 'Beta_cash_stack_score', 'deffstpd30', 'FSTPD30')

f4 = gini_results.copy()
print(f4.head())

import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','Beta_cash_stack_score_FPD0_gini',
                     'Beta_cash_stack_score_FPD10_gini',
       'Beta_cash_stack_score_FPD30_gini',
       'Beta_cash_stack_score_FSPD30_gini',
       'Beta_cash_stack_score_FSTPD30_gini']].copy()
final_df.rename(columns={'Beta_cash_stack_score_FPD0_gini':'Beta_cash_stack_score_FPD0_t3_gini'
                         , 'Beta_cash_stack_score_FPD10_gini':'Beta_cash_stack_score_FPD10_t3_gini'
                         , 'Beta_cash_stack_score_FPD30_gini':'Beta_cash_stack_score_FPD30_t3_gini'
                         , 'Beta_cash_stack_score_FSPD30_gini':'Beta_cash_stack_score_FSPD30_t3_gini'
                        , 'Beta_cash_stack_score_FSTPD30_gini':'Beta_cash_stack_score_FSTPD30_t3_gini', 
                        }, inplace = True)
final_df['Trench_category'] = 'Trench 3'
final_df['Model_display_name'] = 'beta_stack_model_cash - t3'
final_df['Product_type'] = 'CASH'
final_df.dtypes

final_df.head()

# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.beta_stack_model_cash_t3_v1_gini_v4"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11439 entries, 0 to 11438
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             11439 non-null  object        
 1   digitalLoanAccountId   11439 non-null  object        
 2   loanAccountNumber      11439 non-null  object        
 3   Beta_cash_stack_score  11439 non-null  object        
 4   trenchCategory         11439 non-null  object        
 5   appln_submit_datetime  11439 non-null  datetime64[us]
 6   disbursementdate       11439 non-null  dbdate        
 7   Application_month      11439 non-null  object        
 8   Data_selection         11439 non-null  object        
 9   deffpd0                11439 non-null  Int64         
 10  flg_mature_fpd0        11439 non-null  Int64         
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(7)
memory usage: 1005.5+ KB
  start_date   end_date  Beta_cash_stack_

  df_concat = pd.concat([df2, df1], ignore_index=True)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10780 entries, 0 to 10779
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             10780 non-null  Int64         
 1   digitalLoanAccountId   10780 non-null  object        
 2   loanAccountNumber      10780 non-null  object        
 3   Beta_cash_stack_score  10780 non-null  float64       
 4   trenchCategory         10780 non-null  object        
 5   appln_submit_datetime  10780 non-null  datetime64[us]
 6   disbursementdate       10780 non-null  dbdate        
 7   Application_month      10780 non-null  object        
 8   Data_selection         10780 non-null  object        
 9   deffpd30               10780 non-null  Int64         
 10  flg_mature_fpd30       10780 non-null  Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 958.1+ KB
  start_date   end_date  Beta_

  df_concat = pd.concat([df2, df1], ignore_index=True)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9589 entries, 0 to 9588
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             9589 non-null   Int64         
 1   digitalLoanAccountId   9589 non-null   object        
 2   loanAccountNumber      9589 non-null   object        
 3   Beta_cash_stack_score  9589 non-null   float64       
 4   trenchCategory         9589 non-null   object        
 5   appln_submit_datetime  9589 non-null   datetime64[us]
 6   disbursementdate       9589 non-null   dbdate        
 7   Application_month      9589 non-null   object        
 8   Data_selection         9589 non-null   object        
 9   deffspd30              9589 non-null   Int64         
 10  flg_mature_fspd_30     9589 non-null   Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 852.3+ KB
  start_date   end_date  Beta_ca

  df_concat = pd.concat([df2, df1], ignore_index=True)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8355 entries, 0 to 8354
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             8355 non-null   Int64         
 1   digitalLoanAccountId   8355 non-null   object        
 2   loanAccountNumber      8355 non-null   object        
 3   Beta_cash_stack_score  8355 non-null   float64       
 4   trenchCategory         8355 non-null   object        
 5   appln_submit_datetime  8355 non-null   datetime64[us]
 6   disbursementdate       8355 non-null   dbdate        
 7   Application_month      8355 non-null   object        
 8   Data_selection         8355 non-null   object        
 9   deffstpd30             8355 non-null   Int64         
 10  flg_mature_fstpd_30    8355 non-null   Int64         
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(5)
memory usage: 742.6+ KB
  start_date   end_date  Beta_ca

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=2e5fb005-511b-473e-b636-918dfd0fd89b>