# Define Library

In [1]:
# %% [markdown]
# # Jupyter Notebook Loading Header
#
# This is a custom loading header for Jupyter Notebooks in Visual Studio Code.
# It includes common imports and settings to get you started quickly.
# %% [markdown]
## Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.cloud import bigquery
from google.cloud import storage
import os
import tempfile
import time
from datetime import datetime
import uuid
import joblib
import uuid
from sklearn.metrics import roc_auc_score
from datetime import datetime, timedelta
import gcsfs
import duckdb as dd
import pickle
import joblib
from typing import Union
import io
path = r'C:\Users\Dwaipayan\AppData\Roaming\gcloud\legacy_credentials\dchakroborti@tonikbank.com\adc.json'
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = path
client = bigquery.Client(project='prj-prod-dataplatform')
os.environ["GOOGLE_CLOUD_PROJECT"] = "prj-prod-dataplatform"

# %% [markdown]
## Configure Settings
# Set options or configurations as needed
pd.set_option('display.max_columns', None)
pd.set_option("Display.max_rows", 100)

# Function

## calculate_gini_for_threedigitscore

In [2]:
# def calculate_gini_for_threedigitscore(scores, labels):
#     """
#     Calculate Gini coefficient for three-digit scores and binary labels
    
#     Parameters:
#     scores: array-like, three-digit scores (higher is better)
#     labels: array-like, binary values (0 or 1, where 1 indicates default)
    
#     Returns:
#     float: Gini coefficient
#     """
#     # Combine scores and labels into a DataFrame
#     df = pd.DataFrame({'score': scores, 'label': labels})
    
#     # Sort by score in descending order (assuming higher score is better)
#     df = df.sort_values('score', ascending=False)
    
#     # Calculate cumulative values
#     total_pos = df['label'].sum()
#     total_neg = len(df) - total_pos
    
#     if total_pos == 0 or total_neg == 0:
#         return 0
    
#     # Calculate cumulative proportions
#     cum_pos = df['label'].cumsum()
#     cum_neg = np.arange(1, len(df) + 1) - cum_pos
    
#     # Convert to proportions
#     cum_pos_prop = cum_pos / total_pos
#     cum_neg_prop = cum_neg / total_neg
    
#     # Calculate Gini
#     gini = 1 - np.trapz(cum_pos_prop, cum_neg_prop)
    
#     return gini


## Modified one

def calculate_gini_for_threedigitscore(scores, labels):
    """
    Calculate Gini coefficient for three-digit scores and binary labels
    
    Parameters:
    scores: array-like, three-digit scores (higher is better)
    labels: array-like, binary values (0 or 1, where 1 indicates default)
    
    Returns:
    float: Gini coefficient
    """
    # Combine scores and labels into a DataFrame
    df = pd.DataFrame({'score': scores, 'label': labels})
    
    # Sort by score in descending order (assuming higher score means lower risk)
    # For default prediction, we want to sort scores in ascending order 
    # since higher default probability should correspond to higher risk
    df = df.sort_values('score', ascending=True)  # Changed to ascending=True
    
    # Calculate cumulative values
    total_pos = df['label'].sum()
    total_neg = len(df) - total_pos
    
    if total_pos == 0 or total_neg == 0:
        return 0
    
    # Calculate cumulative proportions
    cum_pos = df['label'].cumsum()
    cum_neg = np.arange(1, len(df) + 1) - cum_pos
    
    # Convert to proportions
    cum_pos_prop = cum_pos / total_pos
    cum_neg_prop = cum_neg / total_neg
    
    # Calculate area under curve
    auc = np.trapz(cum_pos_prop, cum_neg_prop)
    
    # Calculate Gini
    gini = 2 * auc - 1
    
    return gini

## calculate_gini

In [3]:
def calculate_gini(pd_scores, bad_indicators):
    """
    Calculate Gini coefficient from scores and binary indicators
    
    Parameters:
    pd_scores: array-like of scores/probabilities
    bad_indicators: array-like of binary outcomes (0/1)
    
    Returns:
    float: Gini coefficient
    """
    # Convert inputs to numpy arrays and ensure they're numeric
    pd_scores = np.array(pd_scores, dtype=float)
    bad_indicators = np.array(bad_indicators, dtype=int)
    
    # Check for valid input data
    if len(pd_scores) == 0 or len(bad_indicators) == 0:
        return np.nan
    
    # Check if we have both good and bad cases (needed for ROC AUC)
    if len(np.unique(bad_indicators)) < 2:
        return np.nan
    
    # Calculate AUC using sklearn
    try:
        auc = roc_auc_score(bad_indicators, pd_scores)
        # Calculate Gini from AUC
        gini = 2 * auc - 1
        return gini
    except ValueError:
        return np.nan

## calculate_hybrid_gini

In [4]:
# def calculate_hybrid_gini(scores, labels):
#     """
#     Calculate Gini coefficient handling both PD values and three-digit scores
    
#     Parameters:
#     scores: array-like, contains either PD values (0-1) or three-digit scores
#     labels: array-like, binary values (0 or 1, where 1 indicates default)
    
#     Returns:
#     float: Gini coefficient
#     """
#     # Convert inputs to numpy arrays
#     scores = np.array(scores, dtype=float)
#     labels = np.array(labels, dtype=int)
    
#     # Basic validation
#     if len(scores) == 0 or len(labels) == 0:
#         return np.nan
    
#     if len(np.unique(labels)) < 2:
#         return np.nan
        
#     # Determine if scores are PD values or three-digit scores
#     # PD values are between 0 and 1
#     is_pd = np.all((scores >= 0) & (scores <= 1))
    
#     if is_pd:
#         try:
#             auc = roc_auc_score(labels, scores)
#             gini = 2 * auc - 1
#             return gini
#         except ValueError:
#             return np.nan
#     else:
#         # Handle as three-digit score
#         df = pd.DataFrame({'score': scores, 'label': labels})
#         df = df.sort_values('score', ascending=False)
        
#         total_pos = df['label'].sum()
#         total_neg = len(df) - total_pos
        
#         if total_pos == 0 or total_neg == 0:
#             return np.nan
        
#         cum_pos = df['label'].cumsum()
#         cum_neg = np.arange(1, len(df) + 1) - cum_pos
        
#         cum_pos_prop = cum_pos / total_pos
#         cum_neg_prop = cum_neg / total_neg
        
#         gini = 1 - np.trapz(cum_pos_prop, cum_neg_prop)
#         return gini

## Modified one

def calculate_hybrid_gini(scores, labels):
    """
    Calculate Gini coefficient handling both PD values and three-digit scores
    
    Parameters:
    scores: array-like, contains either PD values (0-1) or three-digit scores
    labels: array-like, binary values (0 or 1, where 1 indicates default)
    
    Returns:
    float: Gini coefficient
    """
    # Convert inputs to numpy arrays
    scores = np.array(scores, dtype=float)
    labels = np.array(labels, dtype=int)
    
    # Basic validation
    if len(scores) == 0 or len(labels) == 0:
        return np.nan
    
    if len(np.unique(labels)) < 2:
        return np.nan
        
    # Determine if scores are PD values or three-digit scores
    # PD values are between 0 and 1
    is_pd = np.all((scores >= 0) & (scores <= 1))
    
    if is_pd:
        try:
            auc = roc_auc_score(labels, scores)
            gini = 2 * auc - 1
            return gini
        except ValueError:
            return np.nan
    else:
        # Handle as three-digit score
        df = pd.DataFrame({'score': scores, 'label': labels})
        # Sort by score in ascending order since higher score means higher risk
        df = df.sort_values('score', ascending=True)
        
        total_pos = df['label'].sum()
        total_neg = len(df) - total_pos
        
        if total_pos == 0 or total_neg == 0:
            return np.nan
        
        cum_pos = df['label'].cumsum()
        cum_neg = np.arange(1, len(df) + 1) - cum_pos
        
        cum_pos_prop = cum_pos / total_pos
        cum_neg_prop = cum_neg / total_neg
        
        # Calculate area under curve
        auc = np.trapz(cum_pos_prop, cum_neg_prop)
        
        # Calculate Gini using the same formula as PD values
        gini = 2 * auc - 1
        return gini

## calculate_periodic_gini_threedigit

In [5]:
# Main processing code
def calculate_periodic_gini_threedigit(df, score_column, label_column, namecolumn):
    """
    Calculate periodic Gini coefficients
    
    Parameters:
    df: DataFrame with disbursement dates and score/label columns
    score_column: name of the score column
    label_column: name of the label column
    """
    # Ensure date is datetime type
    df['disbursementdate'] = pd.to_datetime(df['disbursementdate'])
    
    # Calculate weekly Gini
    df['week'] = df['disbursementdate'].dt.to_period('W')
    weekly_gini = df.groupby('week').apply(
        lambda x: calculate_gini_for_threedigitscore(x[score_column], x[label_column])
    ).reset_index(name='gini')
    weekly_gini['period'] = 'Week'
    weekly_gini['start_date'] = weekly_gini['week'].apply(lambda x: x.to_timestamp())
    weekly_gini['end_date'] = weekly_gini['start_date'] + timedelta(days=6)
    weekly_gini = weekly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Calculate monthly Gini
    df['month'] = df['disbursementdate'].dt.to_period('M')
    monthly_gini = df.groupby('month').apply(
        lambda x: calculate_gini_for_threedigitscore(x[score_column], x[label_column])
    ).reset_index(name='gini')
    monthly_gini['period'] = 'Month'
    monthly_gini['start_date'] = monthly_gini['month'].apply(lambda x: x.to_timestamp())
    monthly_gini['end_date'] = monthly_gini['start_date'] + pd.DateOffset(months=1) - pd.Timedelta(days=1)
    monthly_gini = monthly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Combine and sort results
    gini_results = pd.concat([weekly_gini, monthly_gini])
    gini_results = gini_results.sort_values(by='start_date').reset_index(drop=True)
    
    # Add metadata columns
    gini_results['Model_Name'] = score_column
    gini_results['version'] = '1.1.0'
    gini_results['bad_rate'] = namecolumn
    gini_results.rename(columns={'gini': f'{score_column}_{namecolumn}_gini'}, inplace=True)
    
    return gini_results

## calculate_periodic_gini

In [6]:
def calculate_periodic_gini(df, score_column, label_column, namecolumn):
    """
    Calculate periodic Gini coefficients
    
    Parameters:
    df: DataFrame with disbursement dates and score/label columns
    score_column: name of the score column
    label_column: name of the label column
    """
    # Input validation
    required_columns = ['disbursementdate', score_column, label_column]
    if not all(col in df.columns for col in required_columns):
        raise ValueError(f"Missing required columns. Need: {required_columns}")
        
    # Create a copy to avoid modifying original dataframe
    df = df.copy()
    
    # Ensure date is datetime type
    df['disbursementdate'] = pd.to_datetime(df['disbursementdate'])
    
    # Ensure score and label columns are numeric
    df[score_column] = pd.to_numeric(df[score_column], errors='coerce')
    df[label_column] = pd.to_numeric(df[label_column], errors='coerce')
    
    # Drop rows with invalid values
    df = df.dropna(subset=[score_column, label_column])
    
    # Calculate weekly Gini
    df['week'] = df['disbursementdate'].dt.to_period('W')
    weekly_gini = df.groupby('week').apply(
        lambda x: calculate_gini(x[score_column], x[label_column])
        if len(x) >= 10 else np.nan  # Only calculate if we have enough samples
    ).reset_index(name='gini')
    weekly_gini['period'] = 'Week'
    weekly_gini['start_date'] = weekly_gini['week'].apply(lambda x: x.to_timestamp())
    weekly_gini['end_date'] = weekly_gini['start_date'] + timedelta(days=6)
    weekly_gini = weekly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Calculate monthly Gini
    df['month'] = df['disbursementdate'].dt.to_period('M')
    monthly_gini = df.groupby('month').apply(
        lambda x: calculate_gini(x[score_column], x[label_column])
        if len(x) >= 20 else np.nan  # Only calculate if we have enough samples
    ).reset_index(name='gini')
    monthly_gini['period'] = 'Month'
    monthly_gini['start_date'] = monthly_gini['month'].apply(lambda x: x.to_timestamp())
    monthly_gini['end_date'] = monthly_gini['start_date'] + pd.DateOffset(months=1) - pd.Timedelta(days=1)
    monthly_gini = monthly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Combine and sort results
    gini_results = pd.concat([weekly_gini, monthly_gini])
    gini_results = gini_results.sort_values(by='start_date').reset_index(drop=True)
    
    # Add metadata columns
    gini_results['Model_Name'] = score_column
    gini_results['version'] = '1.1.0'
    gini_results['bad_rate'] = namecolumn
    gini_results.rename(columns={'gini': f'{score_column}_{namecolumn}_gini'}, inplace=True)
    
    return gini_results

## calculate_periodic_hybrid_gini

In [7]:
def calculate_periodic_hybrid_gini(df, score_column, label_column, namecolumn):
    """
    Calculate periodic Gini coefficients for mixed score types
    
    Parameters:
    df: DataFrame with disbursement dates and score/label columns
    score_column: name of the score column
    label_column: name of the label column
    """
    # Input validation
    required_columns = ['disbursementdate', score_column, label_column]
    if not all(col in df.columns for col in required_columns):
        raise ValueError(f"Missing required columns. Need: {required_columns}")
        
    # Create a copy to avoid modifying original dataframe
    df = df.copy()
    
    # Ensure date is datetime type
    df['disbursementdate'] = pd.to_datetime(df['disbursementdate'])
    
    # Ensure score and label columns are numeric
    df[score_column] = pd.to_numeric(df[score_column], errors='coerce')
    df[label_column] = pd.to_numeric(df[label_column], errors='coerce')
    
    # Drop rows with invalid values
    df = df.dropna(subset=[score_column, label_column])
    
    # Calculate weekly Gini
    df['week'] = df['disbursementdate'].dt.to_period('W')
    weekly_gini = df.groupby('week').apply(
        lambda x: calculate_hybrid_gini(x[score_column], x[label_column])
        if len(x) >= 10 else np.nan  # Only calculate if we have enough samples
    ).reset_index(name='gini')
    weekly_gini['period'] = 'Week'
    weekly_gini['start_date'] = weekly_gini['week'].apply(lambda x: x.to_timestamp())
    weekly_gini['end_date'] = weekly_gini['start_date'] + pd.Timedelta(days=6)
    weekly_gini = weekly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Calculate monthly Gini
    df['month'] = df['disbursementdate'].dt.to_period('M')
    monthly_gini = df.groupby('month').apply(
        lambda x: calculate_hybrid_gini(x[score_column], x[label_column])
        if len(x) >= 20 else np.nan  # Only calculate if we have enough samples
    ).reset_index(name='gini')
    monthly_gini['period'] = 'Month'
    monthly_gini['start_date'] = monthly_gini['month'].apply(lambda x: x.to_timestamp())
    monthly_gini['end_date'] = monthly_gini['start_date'] + pd.DateOffset(months=1) - pd.Timedelta(days=1)
    monthly_gini = monthly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Combine and sort results
    gini_results = pd.concat([weekly_gini, monthly_gini])
    gini_results = gini_results.sort_values(by='start_date').reset_index(drop=True)
    
    # Add metadata columns
    gini_results['Model_Name'] = score_column
    gini_results['version'] = '1.1.0'
    gini_results['bad_rate'] = namecolumn
    gini_results.rename(columns={'gini': f'{score_column}_{namecolumn}_gini'}, inplace=True)
    
    return gini_results

## calculate_periodic_gini_debug

In [8]:
def calculate_periodic_gini_debug(df, score_column, label_column, namecolumn):
    """
    Calculate periodic Gini coefficients with detailed debugging
    """
    # Input validation
    required_columns = ['disbursementdate', score_column, label_column]
    if not all(col in df.columns for col in required_columns):
        raise ValueError(f"Missing required columns. Need: {required_columns}")
        
    # Create a copy to avoid modifying original dataframe
    df = df.copy()
    
    # Ensure date is datetime type
    print(f"Original disbursementdate dtype: {df['disbursementdate'].dtype}")
    df['disbursementdate'] = pd.to_datetime(df['disbursementdate'])
    print(f"Converted disbursementdate dtype: {df['disbursementdate'].dtype}")
    
    # Ensure score and label columns are numeric
    print(f"\nBefore conversion:")
    print(f"  {score_column} dtype: {df[score_column].dtype}, non-null: {df[score_column].notna().sum()}")
    print(f"  {label_column} dtype: {df[label_column].dtype}, non-null: {df[label_column].notna().sum()}")
    
    df[score_column] = pd.to_numeric(df[score_column], errors='coerce')
    df[label_column] = pd.to_numeric(df[label_column], errors='coerce')
    
    print(f"\nAfter conversion:")
    print(f"  {score_column} - non-null: {df[score_column].notna().sum()}, NaN: {df[score_column].isna().sum()}")
    print(f"  {label_column} - non-null: {df[label_column].notna().sum()}, NaN: {df[label_column].isna().sum()}")
    print(f"  {score_column} sample values: {df[score_column].dropna().head()}")
    print(f"  {label_column} unique values: {df[label_column].dropna().unique()}")
    
    # Drop rows with invalid values
    initial_rows = len(df)
    df = df.dropna(subset=[score_column, label_column])
    print(f"\nRows dropped: {initial_rows - len(df)}")
    print(f"Remaining rows: {len(df)}")
    
    # Calculate weekly Gini
    df['week'] = df['disbursementdate'].dt.to_period('W')
    weekly_groups = df.groupby('week').size()
    print(f"\nWeekly groups: {len(weekly_groups)} weeks")
    print(f"Weekly group sizes:\n{weekly_groups.describe()}\n{weekly_groups.value_counts().head()}")
    print(f"Weeks with < 10 samples: {(weekly_groups < 10).sum()}")
    
    weekly_results = []
    for week, group_df in df.groupby('week'):
        if len(group_df) >= 10:
            gini_val = calculate_gini(group_df[score_column], group_df[label_column])
            print(f"  Week {week}: n={len(group_df)}, gini={gini_val}")
            weekly_results.append({'week': week, 'gini': gini_val, 'count': len(group_df)})
        else:
            print(f"  Week {week}: n={len(group_df)} - SKIPPED (< 10 samples)")
    
    weekly_gini = pd.DataFrame(weekly_results) if weekly_results else pd.DataFrame()
    
    if len(weekly_gini) > 0:
        weekly_gini['period'] = 'Week'
        weekly_gini['start_date'] = weekly_gini['week'].apply(lambda x: x.to_timestamp())
        weekly_gini['end_date'] = weekly_gini['start_date'] + timedelta(days=6)
        weekly_gini = weekly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Calculate monthly Gini
    df['month'] = df['disbursementdate'].dt.to_period('M')
    monthly_groups = df.groupby('month').size()
    print(f"\nMonthly groups: {len(monthly_groups)} months")
    print(f"Monthly group sizes:\n{monthly_groups.describe()}\n{monthly_groups.value_counts().head()}")
    print(f"Months with < 20 samples: {(monthly_groups < 20).sum()}")
    
    monthly_results = []
    for month, group_df in df.groupby('month'):
        if len(group_df) >= 20:
            gini_val = calculate_gini(group_df[score_column], group_df[label_column])
            print(f"  Month {month}: n={len(group_df)}, gini={gini_val}")
            monthly_results.append({'month': month, 'gini': gini_val, 'count': len(group_df)})
        else:
            print(f"  Month {month}: n={len(group_df)} - SKIPPED (< 20 samples)")
    
    monthly_gini = pd.DataFrame(monthly_results) if monthly_results else pd.DataFrame()
    
    if len(monthly_gini) > 0:
        monthly_gini['period'] = 'Month'
        monthly_gini['start_date'] = monthly_gini['month'].apply(lambda x: x.to_timestamp())
        monthly_gini['end_date'] = monthly_gini['start_date'] + pd.DateOffset(months=1) - pd.Timedelta(days=1)
        monthly_gini = monthly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Combine results
    if len(weekly_gini) > 0 and len(monthly_gini) > 0:
        gini_results = pd.concat([weekly_gini, monthly_gini], ignore_index=True)
    elif len(weekly_gini) > 0:
        gini_results = weekly_gini.copy()
    elif len(monthly_gini) > 0:
        gini_results = monthly_gini.copy()
    else:
        print("\n⚠️  WARNING: No valid Gini calculations produced!")
        return pd.DataFrame()
    
    gini_results = gini_results.sort_values(by='start_date').reset_index(drop=True)
    
    # Add metadata
    gini_results['Model_Name'] = score_column
    gini_results['version'] = '1.1.0'
    gini_results['bad_rate'] = namecolumn
    gini_results.rename(columns={'gini': f'{score_column}_{namecolumn}_gini'}, inplace=True)
    
    print(f"\n✓ Final results: {len(gini_results)} rows")
    return gini_results

## calculate_periodic_gini_producttype

In [9]:
def calculate_periodic_gini_producttype(df, score_column, label_column, namecolumn, product_column=None):
    """
    Calculate periodic Gini coefficients overall and by product type
    
    Parameters:
    df: DataFrame with disbursement dates and score/label columns
    score_column: name of the score column
    label_column: name of the label column
    namecolumn: name for the bad rate label
    product_column: (optional) name of product type column to segment by
    """
    # Input validation
    required_columns = ['disbursementdate', score_column, label_column]
    if not all(col in df.columns for col in required_columns):
        raise ValueError(f"Missing required columns. Need: {required_columns}")
    
    if product_column and product_column not in df.columns:
        raise ValueError(f"Product column '{product_column}' not found in dataframe")
    
    # Create a copy to avoid modifying original dataframe
    df = df.copy()
    
    # Ensure date is datetime type
    df['disbursementdate'] = pd.to_datetime(df['disbursementdate'])
    
    # Ensure score and label columns are numeric
    df[score_column] = pd.to_numeric(df[score_column], errors='coerce')
    df[label_column] = pd.to_numeric(df[label_column], errors='coerce')
    
    # Drop rows with invalid values
    df = df.dropna(subset=[score_column, label_column])
    
    # Define list of datasets to process: overall + each product type
    datasets_to_process = [('Overall', df)]
    
    if product_column:
        for product_type in df[product_column].unique():
            if pd.notna(product_type):
                product_df = df[df[product_column] == product_type]
                datasets_to_process.append((str(product_type), product_df))
    
    all_results = []
    
    # Process each dataset
    for dataset_name, dataset_df in datasets_to_process:
        # Calculate weekly Gini
        dataset_df_copy = dataset_df.copy()
        dataset_df_copy['week'] = dataset_df_copy['disbursementdate'].dt.to_period('W')
        weekly_gini = dataset_df_copy.groupby('week').apply(
            lambda x: calculate_gini(x[score_column], x[label_column])
            if len(x) >= 10 else np.nan
        ).reset_index(name='gini')
        weekly_gini['period'] = 'Week'
        weekly_gini['start_date'] = weekly_gini['week'].apply(lambda x: x.to_timestamp())
        weekly_gini['end_date'] = weekly_gini['start_date'] + timedelta(days=6)
        weekly_gini = weekly_gini[['start_date', 'end_date', 'gini', 'period']]
        
        # Calculate monthly Gini
        dataset_df_copy = dataset_df.copy()
        dataset_df_copy['month'] = dataset_df_copy['disbursementdate'].dt.to_period('M')
        monthly_gini = dataset_df_copy.groupby('month').apply(
            lambda x: calculate_gini(x[score_column], x[label_column])
            if len(x) >= 20 else np.nan
        ).reset_index(name='gini')
        monthly_gini['period'] = 'Month'
        monthly_gini['start_date'] = monthly_gini['month'].apply(lambda x: x.to_timestamp())
        monthly_gini['end_date'] = monthly_gini['start_date'] + pd.DateOffset(months=1) - pd.Timedelta(days=1)
        monthly_gini = monthly_gini[['start_date', 'end_date', 'gini', 'period']]
        
        # Combine results for this dataset
        gini_results = pd.concat([weekly_gini, monthly_gini], ignore_index=True)
        gini_results = gini_results.sort_values(by='start_date').reset_index(drop=True)
        
        # Add metadata columns
        gini_results['Model_Name'] = score_column
        gini_results['version'] = '1.1.0'
        gini_results['bad_rate'] = namecolumn
        gini_results['loan_type'] = dataset_name
        gini_results.rename(columns={'gini': f'{score_column}_{namecolumn}_gini'}, inplace=True)
        
        all_results.append(gini_results)
    
    # Combine all results
    final_results = pd.concat(all_results, ignore_index=True)
    
    return final_results


# Usage:
# Calculate overall + by product type
# gini_results = calculate_periodic_gini(
#     df_concat, 
#     'Alpha_cic_sil_score', 
#     'deffpd0', 
#     'FPD0',
#     product_column='new_loan_type'
# )

# Filter results by product type if needed:
# overall_gini = gini_results[gini_results['product_type'] == 'Overall']
# sil_instore_gini = gini_results[gini_results['product_type'] == 'sil_instore']
# sil_zero_gini = gini_results[gini_results['product_type'] == 'sil_zero']

## calculate_periodic_gini_prod_ver_trench

In [10]:
def calculate_periodic_gini_prod_ver_trench(df, score_column, label_column, namecolumn, 
                                        model_version_column=None, trench_column=None, product_column=None):
    """
    Calculate periodic Gini coefficients overall and by product type
    
    Parameters:
    df: DataFrame with disbursement dates and score/label columns
    score_column: name of the score column
    label_column: name of the label column
    namecolumn: name for the bad rate label
    model_version_column: (optional) name of column to extract model version from
    trench_column: (optional) name of column to extract trench category from
    product_column: (optional) name of product type column to segment by
    """
    # Input validation
    required_columns = ['disbursementdate', score_column, label_column]
    if not all(col in df.columns for col in required_columns):
        raise ValueError(f"Missing required columns. Need: {required_columns}")
    
    if product_column and product_column not in df.columns:
        raise ValueError(f"Product column '{product_column}' not found in dataframe")
    
    if model_version_column and model_version_column not in df.columns:
        raise ValueError(f"Model version column '{model_version_column}' not found in dataframe")
    
    if trench_column and trench_column not in df.columns:
        raise ValueError(f"Trench column '{trench_column}' not found in dataframe")
    
    # Create a copy to avoid modifying original dataframe
    df = df.copy()
    
    # Ensure date is datetime type
    df['disbursementdate'] = pd.to_datetime(df['disbursementdate'])
    
    # Ensure score and label columns are numeric
    df[score_column] = pd.to_numeric(df[score_column], errors='coerce')
    df[label_column] = pd.to_numeric(df[label_column], errors='coerce')
    
    # Drop rows with invalid values
    df = df.dropna(subset=[score_column, label_column])
    
    # Get version and trench values (take first non-null value as they should be consistent)
    version_value = df[model_version_column].iloc[0] if model_version_column else '1.1.0'
    trench_value = df[trench_column].iloc[0] if trench_column else None
    
    # Define list of datasets to process: overall + each product type
    datasets_to_process = [('Overall', df)]
    
    if product_column:
        for product_type in df[product_column].unique():
            if pd.notna(product_type):
                product_df = df[df[product_column] == product_type]
                datasets_to_process.append((str(product_type), product_df))
    
    all_results = []
    
    # Process each dataset
    for dataset_name, dataset_df in datasets_to_process:
        # Calculate weekly Gini
        dataset_df_copy = dataset_df.copy()
        dataset_df_copy['week'] = dataset_df_copy['disbursementdate'].dt.to_period('W')
        weekly_gini = dataset_df_copy.groupby('week').apply(
            lambda x: calculate_gini(x[score_column], x[label_column])
            if len(x) >= 10 else np.nan
        ).reset_index(name='gini')
        weekly_gini['period'] = 'Week'
        weekly_gini['start_date'] = weekly_gini['week'].apply(lambda x: x.to_timestamp())
        weekly_gini['end_date'] = weekly_gini['start_date'] + timedelta(days=6)
        weekly_gini = weekly_gini[['start_date', 'end_date', 'gini', 'period']]
        
        # Calculate monthly Gini
        dataset_df_copy = dataset_df.copy()
        dataset_df_copy['month'] = dataset_df_copy['disbursementdate'].dt.to_period('M')
        monthly_gini = dataset_df_copy.groupby('month').apply(
            lambda x: calculate_gini(x[score_column], x[label_column])
            if len(x) >= 20 else np.nan
        ).reset_index(name='gini')
        monthly_gini['period'] = 'Month'
        monthly_gini['start_date'] = monthly_gini['month'].apply(lambda x: x.to_timestamp())
        monthly_gini['end_date'] = monthly_gini['start_date'] + pd.DateOffset(months=1) - pd.Timedelta(days=1)
        monthly_gini = monthly_gini[['start_date', 'end_date', 'gini', 'period']]
        
        # Combine results for this dataset
        gini_results = pd.concat([weekly_gini, monthly_gini], ignore_index=True)
        gini_results = gini_results.sort_values(by='start_date').reset_index(drop=True)
        
        # Add metadata columns
        gini_results['Model_Name'] = score_column
        gini_results['version'] = version_value
        gini_results['bad_rate'] = namecolumn
        gini_results['loan_type'] = dataset_name
        if trench_column:
            gini_results['trench_category'] = trench_value
        gini_results.rename(columns={'gini': f'{score_column}_{namecolumn}_gini'}, inplace=True)
        
        all_results.append(gini_results)
    
    # Combine all results
    final_results = pd.concat(all_results, ignore_index=True)
    
    return final_results


# Usage:
# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'Alpha_cic_sil_score', 
#     'deffpd0', 
#     'FPD0',
#     model_version_column='modelVersionId',
#     trench_column='trenchCategory',
#     product_column='new_loan_type'
# )

# SIL

# Alpha - CIC-SIL-Model

## V1

## FPD0

## Test

In [11]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId, 
    case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.audit_balance.ml_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil', 'Sil-Alpha-CIC-SIL-Model')
  and modelVersionId = 'v1'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.modelDisplayName,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  deffpd0,
  flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fpd0 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,modelDisplayName,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory
0,2757224,7d9e9b1b-9704-48dc-b5d4-e1f00d6367bc,60827572240023,cic_model_sil,0.0670188900177304,2025-06-27 14:12:43,2025-06-27,2025-06,Test,0,1,SIL-Instore,v1,ALL
1,3064349,1b14fc74-e072-41c0-8cb4-e0bc76641324,60830643490023,cic_model_sil,0.1312233188466483,2025-06-27 14:47:01,2025-06-27,2025-06,Test,0,1,SIL-Instore,v1,ALL
2,3280410,57332877-b6e1-4b70-9cca-c9cc826a440e,60832804100019,cic_model_sil,0.0854430199656074,2025-06-27 14:19:24,2025-06-27,2025-06,Test,0,1,SIL-Instore,v1,ALL
3,3522986,b73bc9cc-6670-4bc9-97b2-1360fd36e885,60835229860011,cic_model_sil,0.1022984160986753,2025-06-27 13:52:00,2025-06-27,2025-06,Test,0,1,SIL-Instore,v1,ALL
4,3522992,528b20b5-72d2-4061-92d0-7fed6709294b,60835229920018,cic_model_sil,0.1656376977469904,2025-06-27 13:56:16,2025-06-27,2025-06,Test,0,1,SIL-Instore,v1,ALL


In [12]:
df1 = dfd.copy()

In [13]:
df1['modelVersionId'].value_counts()

modelVersionId
v1    49314
Name: count, dtype: int64

## Train

In [14]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId, 
    case when trenchCategory is null then 'ALL' 
        when trenchCategory = ''then 'ALL' 
        else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil', 'Sil-Alpha-CIC-SIL-Model')
  and modelVersionId = 'v1'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.modelDisplayName,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    deffpd0,
  flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId,
    trenchCategory,
    from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fpd0 = 1
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,modelDisplayName,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory
0,1906261,aa42d0e6-aca4-4917-a065-8cfdf17ccc0d,60819062610016,Alpha - CIC-SIL-Model,0.065794,2023-02-18 14:49:12,2023-02-18,2023-02,Train,0,1,SIL-Instore,v1,ALL
1,1891979,07d71b05-eebb-4e3f-b2c9-77a057d91539,60818919790016,Alpha - CIC-SIL-Model,0.107859,2023-02-08 16:19:56,2023-02-08,2023-02,Train,0,1,SIL-Instore,v1,ALL
2,1861570,38bad92e-ad5c-4d5e-b1b5-7e4add8c233c,60818615700011,Alpha - CIC-SIL-Model,0.072651,2023-01-14 15:41:55,2023-01-15,2023-01,Train,0,1,SIL-Instore,v1,ALL
3,2120659,065b5161-0246-4da2-abf7-4b2dd21dc03d,60821206590019,Alpha - CIC-SIL-Model,0.12559,2023-07-05 17:19:01,2023-07-05,2023-07,Train,0,1,SIL-Instore,v1,ALL
4,1416748,78e01765-e029-4f14-a04d-75c576ad66f2,60814167480027,Alpha - CIC-SIL-Model,0.137399,2023-04-17 18:23:51,2023-04-17,2023-04,Train,0,1,SIL-Instore,v1,ALL


In [15]:
df2 = dfd.copy()

In [16]:
df2[['modelVersionId', 'modelDisplayName']].value_counts()    

modelVersionId  modelDisplayName     
v1              Alpha - CIC-SIL-Model    132827
Name: count, dtype: int64

In [17]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 182141 entries, 0 to 182140
Data columns (total 14 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             182141 non-null  object        
 1   digitalLoanAccountId   182141 non-null  object        
 2   loanAccountNumber      182141 non-null  object        
 3   modelDisplayName       182141 non-null  object        
 4   Alpha_cic_sil_score    182141 non-null  object        
 5   appln_submit_datetime  182141 non-null  datetime64[us]
 6   disbursementdate       182141 non-null  dbdate        
 7   Application_month      182141 non-null  object        
 8   Data_selection         182141 non-null  object        
 9   deffpd0                182141 non-null  Int64         
 10  flg_mature_fpd0        182141 non-null  Int64         
 11  new_loan_type          182141 non-null  object        
 12  modelVersionId         182141 non-null  obje

In [18]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['Alpha_cic_sil_score'] = pd.to_numeric(df_concat['Alpha_cic_sil_score'], errors='coerce')

In [19]:
df_concat.to_csv(r"Alpha_cic_sil_scorefpd0.csv")

In [20]:
# gini_results = calculate_periodic_gini(df_concat, 'Alpha_cic_sil_score', 'deffpd0', 'FPD0')

# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'Alpha_cic_sil_score', 
#     'deffpd0', 
#     'FPD0',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'Alpha_cic_sil_score', 
    'deffpd0', 
    'FPD0',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [21]:
f0 = gini_results.copy()
f0.head()

Unnamed: 0,start_date,end_date,Alpha_cic_sil_score_FPD0_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2023-01-01,2023-01-31,0.205187,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL
1,2023-01-09,2023-01-15,0.226648,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL
2,2023-01-16,2023-01-22,0.375,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL
3,2023-01-23,2023-01-29,0.110811,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL
4,2023-01-30,2023-02-05,0.076412,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL


In [22]:
f0.columns

Index(['start_date', 'end_date', 'Alpha_cic_sil_score_FPD0_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category'],
      dtype='object')

In [23]:
f01 = f0[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Alpha_cic_sil_score_FPD0_gini']].copy()
f01.rename(columns={'Alpha_cic_sil_score_FPD0_gini':'FPD0'}, inplace = True)
f01['category'] = 'cic_sil_score_fpd0_v1_all'

In [24]:
f0.groupby(['loan_type', 'version', 'trench_category']).size()

loan_type       version  trench_category
Overall         v1       ALL                181
SIL Competitor  v1       ALL                 51
SIL Repeat      v1       ALL                 14
SIL ZERO        v1       ALL                 81
SIL-Instore     v1       ALL                181
dtype: int64

In [25]:
f01.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all


## FPD10

## Test

In [26]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId, 
    case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.audit_balance.ml_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  and modelVersionId = 'v1'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fpd10 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory
0,3426010,a6985485-b408-4ccb-9015-664964e6e567,60834260100013,0.2229344139129745,2025-05-08 13:03:08,2025-05-08,2025-05,Test,1,1,SIL-Instore,v1,ALL
1,3425862,d45ceaba-c40f-4d7c-bc1f-e5ee47ba5b72,60834258620015,0.1262708673650728,2025-05-08 11:22:41,2025-05-08,2025-05,Test,0,1,SIL ZERO,v1,ALL
2,3425891,9e45a458-b613-4d74-85cd-4372e0eea9d5,60834258910011,0.078509374217436,2025-05-08 11:39:43,2025-05-08,2025-05,Test,0,1,SIL-Instore,v1,ALL
3,3425892,796f990f-e937-4e46-910c-2213d06c1b53,60834258920011,0.0596137167345508,2025-05-08 11:40:10,2025-05-08,2025-05,Test,0,1,SIL-Instore,v1,ALL
4,3425927,4ab62816-df9c-41a8-838d-412568c28abd,60834259270015,0.1027507450243416,2025-05-08 11:56:27,2025-05-08,2025-05,Test,0,1,SIL-Instore,v1,ALL


In [27]:
df1 = dfd.copy()

## Train

In [28]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId,
        case when trenchCategory is null then 'ALL' 
        when trenchCategory = ''then 'ALL' 
        else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
   and modelVersionId = 'v1'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffpd10,
  flg_mature_fpd10,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fpd0 = 1
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory
0,2141786,44fc8f86-5601-45cf-b7dc-da0b1fb733a9,60821417860019,0.165001,2023-07-21 12:31:29,2023-07-21,2023-07,Train,0,1,SIL-Instore,v1,ALL
1,1863820,99f48e3e-3bb3-4a1b-986f-5c13e0b1401f,60818638200017,0.125541,2023-01-16 18:05:17,2023-01-16,2023-01,Train,0,1,SIL-Instore,v1,ALL
2,2136695,174429d8-70c5-47f6-b093-afb971b9acbb,60821366950012,0.065391,2023-07-14 14:20:14,2023-07-14,2023-07,Train,0,1,SIL-Instore,v1,ALL
3,1879674,4e372a56-e549-4a6b-b370-92742ce48ae9,60818796740014,0.14576,2023-01-30 18:34:57,2023-01-31,2023-01,Train,1,1,SIL-Instore,v1,ALL
4,1961799,4c88ad53-a283-4d81-b7b1-003e17d26e06,60819617990015,0.125483,2023-03-26 17:22:57,2023-03-26,2023-03,Train,0,1,SIL-Instore,v1,ALL


In [29]:
df2 = dfd.copy()

In [30]:
df2['modelVersionId'].value_counts()

modelVersionId
v1    132827
Name: count, dtype: int64

In [31]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 177174 entries, 0 to 177173
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             177174 non-null  object        
 1   digitalLoanAccountId   177174 non-null  object        
 2   loanAccountNumber      177174 non-null  object        
 3   Alpha_cic_sil_score    177174 non-null  object        
 4   appln_submit_datetime  177174 non-null  datetime64[us]
 5   disbursementdate       177174 non-null  dbdate        
 6   Application_month      177174 non-null  object        
 7   Data_selection         177174 non-null  object        
 8   deffpd10               177174 non-null  Int64         
 9   flg_mature_fpd10       177174 non-null  Int64         
 10  new_loan_type          177174 non-null  object        
 11  modelVersionId         177174 non-null  object        
 12  trenchCategory         177174 non-null  obje

In [32]:
df_concat['Alpha_cic_sil_score'] = pd.to_numeric(df_concat['Alpha_cic_sil_score'], errors='coerce')
# gini_results = calculate_periodic_gini(df_concat, 'Alpha_cic_sil_score', 'deffpd10', 'FPD10')
# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'Alpha_cic_sil_score', 
#     'deffpd10', 
#     'FPD10',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat,
    'Alpha_cic_sil_score', 
    'deffpd10', 
    'FPD10',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [33]:
f1=gini_results.copy()
f1.groupby('loan_type').size()

loan_type
Overall           178
SIL Competitor     48
SIL Repeat         14
SIL ZERO           78
SIL-Instore       178
dtype: int64

In [34]:
f1.columns

Index(['start_date', 'end_date', 'Alpha_cic_sil_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category'],
      dtype='object')

In [35]:
f10 = f1[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Alpha_cic_sil_score_FPD10_gini']].copy()
f10.rename(columns={'Alpha_cic_sil_score_FPD10_gini':'FPD10'}, inplace = True)
f10['category'] = 'cic_sil_score_fpd10_v1_all'

## FPD30

## Test

In [36]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId, 
    case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.audit_balance.ml_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  and modelVersionId = 'v1'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fpd30 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory
0,3388385,317b896a-1ad4-4771-8656-dd48e9301661,60833883850013,0.1141257415129711,2025-04-19 11:08:29,2025-04-19,2025-04,Test,0,1,SIL-Instore,v1,ALL
1,3388242,5b6dc3b2-98b7-4eca-8e42-52e2f69610b1,60833882420019,0.05539151271959,2025-04-19 10:25:30,2025-04-19,2025-04,Test,0,1,SIL ZERO,v1,ALL
2,3388247,8ba0951d-f590-49db-a199-e9626120de96,60833882470014,0.0948654829632301,2025-04-19 10:29:06,2025-04-19,2025-04,Test,0,1,SIL-Instore,v1,ALL
3,3388449,6523ad01-5b33-40be-895c-eb8e0df294e8,60833884490012,0.0647777688403419,2025-04-19 11:45:16,2025-04-19,2025-04,Test,0,1,SIL-Instore,v1,ALL
4,3388484,af6bc244-126c-4b47-b69a-5bbf49b292f1,60833884840015,0.1034354109242058,2025-04-19 11:48:38,2025-04-19,2025-04,Test,0,1,SIL-Instore,v1,ALL


In [37]:
df1 = dfd.copy()

## Train

In [38]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL' 
        when trenchCategory = ''then 'ALL' 
        else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  and modelVersionId = 'v1'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffpd30,
  flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fpd30 = 1
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory
0,2019225,84b799a9-0494-42a8-ac11-66c1bf293f0b,60820192250012,0.092452,2023-04-29 13:18:58,2023-04-29,2023-04,Train,0,1,SIL-Instore,v1,ALL
1,1964698,60f7d25e-c48a-4d86-8874-2705e727754f,60819646980013,0.04872,2023-03-28 11:43:41,2023-03-28,2023-03,Train,0,1,SIL-Instore,v1,ALL
2,1874879,54fdcfef-90e3-44c3-a338-efced1047721,60818748790015,0.175205,2023-01-26 17:06:12,2023-01-26,2023-01,Train,0,1,SIL-Instore,v1,ALL
3,2010226,8e3bfb4d-563b-4a39-8a75-26bf69b8184d,60820102260017,0.171776,2023-04-23 15:02:43,2023-04-23,2023-04,Train,0,1,SIL-Instore,v1,ALL
4,2117533,19924a73-d771-486d-9706-bcadd7231914,60821175330019,0.123671,2023-07-03 13:22:05,2023-07-03,2023-07,Train,0,1,SIL-Instore,v1,ALL


In [39]:
df2 = dfd.copy()

In [40]:
df2['modelVersionId'].value_counts()

modelVersionId
v1    132827
Name: count, dtype: int64

In [41]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 170947 entries, 0 to 170946
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             170947 non-null  object        
 1   digitalLoanAccountId   170947 non-null  object        
 2   loanAccountNumber      170947 non-null  object        
 3   Alpha_cic_sil_score    170947 non-null  object        
 4   appln_submit_datetime  170947 non-null  datetime64[us]
 5   disbursementdate       170947 non-null  dbdate        
 6   Application_month      170947 non-null  object        
 7   Data_selection         170947 non-null  object        
 8   deffpd30               170947 non-null  Int64         
 9   flg_mature_fpd30       170947 non-null  Int64         
 10  new_loan_type          170947 non-null  object        
 11  modelVersionId         170947 non-null  object        
 12  trenchCategory         170947 non-null  obje

In [42]:
df_concat['Alpha_cic_sil_score'] = pd.to_numeric(df_concat['Alpha_cic_sil_score'], errors='coerce')
# gini_results = calculate_periodic_gini(df_concat, 'Alpha_cic_sil_score', 'deffpd30', 'FPD30')


# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'Alpha_cic_sil_score', 
#     'deffpd30', 
#     'FPD30',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat,
    'Alpha_cic_sil_score', 
    'deffpd30', 
    'FPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [43]:
f2=gini_results.copy()
f2.groupby('loan_type').size()

loan_type
Overall           176
SIL Competitor     46
SIL Repeat         14
SIL ZERO           76
SIL-Instore       176
dtype: int64

In [44]:
f2.columns

Index(['start_date', 'end_date', 'Alpha_cic_sil_score_FPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category'],
      dtype='object')

In [45]:
f20 = f2[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Alpha_cic_sil_score_FPD30_gini']].copy()
f20.rename(columns={'Alpha_cic_sil_score_FPD30_gini':'FPD30'}, inplace = True)
f20['category'] = 'cic_sil_score_fpd30_v1_all'

## FSPD30

## Test

In [46]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId, 
    case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.audit_balance.ml_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  and modelVersionId = 'v1'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fspd_30 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory
0,3667643,4f9d9181-a9f3-4550-b2d0-9cd5f47c7e8e,60836676430016,0.09060850999242,2025-09-06 13:42:54,2025-09-06,2025-09,Test,0,1,SIL-Instore,v1,ALL
1,3672802,3ed24d4c-71ab-4407-a100-7a93d7ffb0b8,60836728020018,0.1848570855795629,2025-09-08 17:39:37,2025-09-08,2025-09,Test,0,1,SIL-Instore,v1,ALL
2,3667766,210089f8-2643-4085-831b-df09ff59ab8f,60836677660016,0.1027676391646116,2025-09-06 14:28:15,2025-09-06,2025-09,Test,0,1,SIL-Instore,v1,ALL
3,3664307,0a429fdc-81af-409c-b9ad-30ebb02f4ab1,60836643070011,0.065295151950431,2025-09-04 19:16:55,2025-09-04,2025-09,Test,0,1,SIL Competitor,v1,ALL
4,3668457,11d08ad4-f4fa-4def-b3ca-9069209eb5ee,60836684570011,0.1098027291805355,2025-09-06 18:40:08,2025-09-06,2025-09,Test,0,1,SIL-Instore,v1,ALL


In [47]:
df1 = dfd.copy()

## Train

In [48]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId,
        case when trenchCategory is null then 'ALL' 
        when trenchCategory = ''then 'ALL' 
        else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  and modelVersionId = 'v1'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffspd30,
  flg_mature_fspd_30,
  loanmaster.new_loan_type,
    modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fspd_30 = 1
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory
0,1906261,aa42d0e6-aca4-4917-a065-8cfdf17ccc0d,60819062610016,0.065794,2023-02-18 14:49:12,2023-02-18,2023-02,Train,0,1,SIL-Instore,v1,ALL
1,1891979,07d71b05-eebb-4e3f-b2c9-77a057d91539,60818919790016,0.107859,2023-02-08 16:19:56,2023-02-08,2023-02,Train,0,1,SIL-Instore,v1,ALL
2,1861570,38bad92e-ad5c-4d5e-b1b5-7e4add8c233c,60818615700011,0.072651,2023-01-14 15:41:55,2023-01-15,2023-01,Train,0,1,SIL-Instore,v1,ALL
3,2120659,065b5161-0246-4da2-abf7-4b2dd21dc03d,60821206590019,0.12559,2023-07-05 17:19:01,2023-07-05,2023-07,Train,0,1,SIL-Instore,v1,ALL
4,1416748,78e01765-e029-4f14-a04d-75c576ad66f2,60814167480027,0.137399,2023-04-17 18:23:51,2023-04-17,2023-04,Train,0,1,SIL-Instore,v1,ALL


In [49]:
df2 = dfd.copy()

In [50]:
df2['modelVersionId'].value_counts()

modelVersionId
v1    132827
Name: count, dtype: int64

In [51]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 161997 entries, 0 to 161996
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             161997 non-null  object        
 1   digitalLoanAccountId   161997 non-null  object        
 2   loanAccountNumber      161997 non-null  object        
 3   Alpha_cic_sil_score    161997 non-null  object        
 4   appln_submit_datetime  161997 non-null  datetime64[us]
 5   disbursementdate       161997 non-null  dbdate        
 6   Application_month      161997 non-null  object        
 7   Data_selection         161997 non-null  object        
 8   deffspd30              161997 non-null  Int64         
 9   flg_mature_fspd_30     161997 non-null  Int64         
 10  new_loan_type          161997 non-null  object        
 11  modelVersionId         161997 non-null  object        
 12  trenchCategory         161997 non-null  obje

In [52]:
df_concat['Alpha_cic_sil_score'] = pd.to_numeric(df_concat['Alpha_cic_sil_score'], errors='coerce')
# gini_results = calculate_periodic_gini(df_concat, 'Alpha_cic_sil_score', 'deffspd30', 'FSPD30')
# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'Alpha_cic_sil_score', 
#     'deffspd30', 
#     'FSPD30',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat,
    'Alpha_cic_sil_score', 
    'deffspd30', 
    'FSPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [53]:
f3=gini_results.copy()

In [54]:
f3.columns

Index(['start_date', 'end_date', 'Alpha_cic_sil_score_FSPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category'],
      dtype='object')

In [55]:
f30 = f3[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Alpha_cic_sil_score_FSPD30_gini']].copy()
f30.rename(columns={'Alpha_cic_sil_score_FSPD30_gini':'FSPD30'}, inplace = True)
f30['category'] = 'cic_sil_score_fspd30_v1_all'

## FSTPD30

## Test

In [56]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId, 
    case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.audit_balance.ml_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  and modelVersionId = 'v1'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fstpd_30 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory
0,3603221,67c928fa-e469-4471-b6fa-0ccfc459d6b5,60836032210012,0.1581296026312763,2025-08-06 15:08:17,2025-08-06,2025-08,Test,0,1,SIL-Instore,v1,ALL
1,3452144,e5ced157-e148-4f14-8843-9e9a78bd3455,60834521440015,0.1665979767606213,2025-05-22 19:37:58,2025-05-22,2025-05,Test,0,1,SIL-Instore,v1,ALL
2,3381741,64dc0d73-2ee4-402f-a011-75b2a711d917,60833817410015,0.0633023828072635,2025-04-15 18:26:12,2025-04-15,2025-04,Test,0,1,SIL-Instore,v1,ALL
3,3605049,20dc9554-bb7a-47fe-abbf-a79febabfaf3,60836050490016,0.1113608502014433,2025-08-07 13:28:53,2025-08-07,2025-08,Test,0,1,SIL-Instore,v1,ALL
4,3603759,838f536b-7d43-444a-b856-250535d2975f,60836037590011,0.1501846923878645,2025-08-06 17:59:33,2025-08-06,2025-08,Test,0,1,SIL-Instore,v1,ALL


In [57]:
df1 = dfd.copy()

## Train

In [58]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId,
        case when trenchCategory is null then 'ALL' 
        when trenchCategory = ''then 'ALL' 
        else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  and modelVersionId = 'v1'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffstpd30,
  flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fstpd_30 = 1
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory
0,1906261,aa42d0e6-aca4-4917-a065-8cfdf17ccc0d,60819062610016,0.065794,2023-02-18 14:49:12,2023-02-18,2023-02,Train,0,1,SIL-Instore,v1,ALL
1,1891979,07d71b05-eebb-4e3f-b2c9-77a057d91539,60818919790016,0.107859,2023-02-08 16:19:56,2023-02-08,2023-02,Train,0,1,SIL-Instore,v1,ALL
2,1861570,38bad92e-ad5c-4d5e-b1b5-7e4add8c233c,60818615700011,0.072651,2023-01-14 15:41:55,2023-01-15,2023-01,Train,0,1,SIL-Instore,v1,ALL
3,2120659,065b5161-0246-4da2-abf7-4b2dd21dc03d,60821206590019,0.12559,2023-07-05 17:19:01,2023-07-05,2023-07,Train,0,1,SIL-Instore,v1,ALL
4,1416748,78e01765-e029-4f14-a04d-75c576ad66f2,60814167480027,0.137399,2023-04-17 18:23:51,2023-04-17,2023-04,Train,0,1,SIL-Instore,v1,ALL


In [59]:
df2 = dfd.copy()

In [60]:
df2['modelVersionId'].value_counts()

modelVersionId
v1    132827
Name: count, dtype: int64

In [61]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 152436 entries, 0 to 152435
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             152436 non-null  object        
 1   digitalLoanAccountId   152436 non-null  object        
 2   loanAccountNumber      152436 non-null  object        
 3   Alpha_cic_sil_score    152436 non-null  object        
 4   appln_submit_datetime  152436 non-null  datetime64[us]
 5   disbursementdate       152436 non-null  dbdate        
 6   Application_month      152436 non-null  object        
 7   Data_selection         152436 non-null  object        
 8   deffstpd30             152436 non-null  Int64         
 9   flg_mature_fstpd_30    152436 non-null  Int64         
 10  new_loan_type          152436 non-null  object        
 11  modelVersionId         152436 non-null  object        
 12  trenchCategory         152436 non-null  obje

In [62]:
df_concat['Alpha_cic_sil_score'] = pd.to_numeric(df_concat['Alpha_cic_sil_score'], errors='coerce')
# gini_results = calculate_periodic_gini(df_concat, 'Alpha_cic_sil_score', 'deffstpd30', 'FSTPD30')

# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'Alpha_cic_sil_score', 
#     'deffstpd30', 
#     'FSTPD30',
#     product_column='new_loan_type'  
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat,
    'Alpha_cic_sil_score', 
    'deffstpd30', 
    'FSTPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [63]:
f4 = gini_results.copy()
f4

Unnamed: 0,start_date,end_date,Alpha_cic_sil_score_FSTPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2023-01-01,2023-01-31,0.091195,Month,Alpha_cic_sil_score,v1,FSTPD30,Overall,ALL
1,2023-01-09,2023-01-15,0.175207,Week,Alpha_cic_sil_score,v1,FSTPD30,Overall,ALL
2,2023-01-16,2023-01-22,-0.133333,Week,Alpha_cic_sil_score,v1,FSTPD30,Overall,ALL
3,2023-01-23,2023-01-29,-0.035088,Week,Alpha_cic_sil_score,v1,FSTPD30,Overall,ALL
4,2023-01-30,2023-02-05,0.285088,Week,Alpha_cic_sil_score,v1,FSTPD30,Overall,ALL
...,...,...,...,...,...,...,...,...,...
439,2024-12-23,2024-12-29,0.118359,Week,Alpha_cic_sil_score,v1,FSTPD30,SIL Repeat,ALL
440,2024-12-30,2025-01-05,0.234065,Week,Alpha_cic_sil_score,v1,FSTPD30,SIL Repeat,ALL
441,2025-01-01,2025-01-31,0.326338,Month,Alpha_cic_sil_score,v1,FSTPD30,SIL Repeat,ALL
442,2025-01-06,2025-01-12,0.329517,Week,Alpha_cic_sil_score,v1,FSTPD30,SIL Repeat,ALL


In [64]:
f4['version'].value_counts()

version
v1    444
Name: count, dtype: int64

In [65]:
f40 = f4[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Alpha_cic_sil_score_FSTPD30_gini']].copy()
f40.rename(columns={'Alpha_cic_sil_score_FSTPD30_gini':'FSTPD30'}, inplace = True)
f40['category'] = 'cic_sil_score_fstpd30_v1_all'

## combining the dataframe

In [66]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type','trench_category', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'Alpha_cic_sil_score_FPD0_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'trench_category', 'Alpha_cic_sil_score_FPD10_gini',
       'Alpha_cic_sil_score_FPD30_gini',
       'Alpha_cic_sil_score_FSPD30_gini',
       'Alpha_cic_sil_score_FSTPD30_gini'], dtype=object)

In [67]:
final_df['trench_category'].value_counts(dropna=False)

trench_category
ALL    2404
Name: count, dtype: int64

In [68]:
final_df.rename(columns={'trench_category':'Trench_category',
'Alpha_cic_sil_score_FPD0_gini':'Alpha_cic_sil_score_FPD0_v1_gini',
'Alpha_cic_sil_score_FPD10_gini':'Alpha_cic_sil_score_FPD10_v1_gini',
'Alpha_cic_sil_score_FPD30_gini':'Alpha_cic_sil_score_FPD30_v1_gini',
'Alpha_cic_sil_score_FSPD30_gini':'Alpha_cic_sil_score_FSPD30_v1_gini', 
'Alpha_cic_sil_score_FSTPD30_gini':'Alpha_cic_sil_score_FSTPD30_v1_gini'
}, inplace=True)
final_df.columns

Index(['start_date', 'end_date', 'Alpha_cic_sil_score_FPD0_v1_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'loan_type', 'Trench_category',
       'Alpha_cic_sil_score_FPD10_v1_gini',
       'Alpha_cic_sil_score_FPD30_v1_gini',
       'Alpha_cic_sil_score_FSPD30_v1_gini',
       'Alpha_cic_sil_score_FSTPD30_v1_gini'],
      dtype='object')

In [69]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate', 'loan_type', 'Trench_category', 'Alpha_cic_sil_score_FPD0_v1_gini',
                     'Alpha_cic_sil_score_FPD10_v1_gini',
       'Alpha_cic_sil_score_FPD30_v1_gini',
       'Alpha_cic_sil_score_FSPD30_v1_gini',
       'Alpha_cic_sil_score_FSTPD30_v1_gini']].copy()
final_df['Model_display_name'] = 'cic_model_sil'
final_df['Product_type'] = 'SIL'
final_df.dtypes

start_date                             datetime64[ns]
end_date                               datetime64[ns]
period                                         object
Model_Name                                     object
version                                        object
bad_rate                                       object
loan_type                                      object
Trench_category                                object
Alpha_cic_sil_score_FPD0_v1_gini              float64
Alpha_cic_sil_score_FPD10_v1_gini             float64
Alpha_cic_sil_score_FPD30_v1_gini             float64
Alpha_cic_sil_score_FSPD30_v1_gini            float64
Alpha_cic_sil_score_FSTPD30_v1_gini           float64
Model_display_name                             object
Product_type                                   object
dtype: object

In [70]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,Trench_category,Alpha_cic_sil_score_FPD0_v1_gini,Alpha_cic_sil_score_FPD10_v1_gini,Alpha_cic_sil_score_FPD30_v1_gini,Alpha_cic_sil_score_FSPD30_v1_gini,Alpha_cic_sil_score_FSTPD30_v1_gini,Model_display_name,Product_type
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,,,,,cic_model_sil,SIL
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,,,,,cic_model_sil,SIL
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,,,,,cic_model_sil,SIL
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,,,,,cic_model_sil,SIL
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,,,,,cic_model_sil,SIL


In [71]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.cic_sil_score_v1_all_giniv1"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=f24f7062-053f-4942-ad0a-60e93a0456ae>

In [72]:
f01.columns

Index(['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate',
       'loan_type', 'trench_category', 'FPD0', 'category'],
      dtype='object')

In [73]:
import functools

dataframes = [f01, f10, f20, f30, f40]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type','trench_category', 'bad_rate', 'category']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

cicsilscorev1all = functools.reduce(merge_dataframes, dataframes)

cicsilscorev1all.columns.values

array(['start_date', 'end_date', 'period', 'Model_Name', 'version',
       'bad_rate', 'loan_type', 'trench_category', 'FPD0', 'category',
       'FPD10', 'FPD30', 'FSPD30', 'FSTPD30'], dtype=object)

In [74]:
masterdf = cicsilscorev1all.copy()
masterdf.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


## V2

## Trench 1

## FPD0

## Test

In [75]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId, 
    case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.audit_balance.ml_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil', 'Sil-Alpha-CIC-SIL-Model')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.modelDisplayName,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  deffpd0,
  flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,modelDisplayName,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory


In [76]:
df1 = dfd.copy()

In [77]:
df1['modelVersionId'].value_counts()

Series([], Name: count, dtype: int64)

## Train

In [78]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId, 
    case when trenchCategory is null then 'ALL' 
        when trenchCategory = ''then 'ALL' 
        else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil', 'Sil-Alpha-CIC-SIL-Model')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.modelDisplayName,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    deffpd0,
  flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId,
    trenchCategory,
    from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 1'
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,modelDisplayName,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory
0,3206286,6b2d3c9c-efd4-4d62-a4d3-db28826ffd7a,60832062860017,cic_model_sil,0.394028,2025-11-15 10:23:43,2025-01-21,2025-11,Train,0,1,SIL-Instore,v2,Trench 1
1,2824212,f2d289fd-12f0-4ae0-8f1f-5f5b348f9680,60828242120011,cic_model_sil,0.455857,2024-09-05 17:24:48,2024-09-05,2024-09,Train,1,1,SIL-Instore,v2,Trench 1
2,2893305,4c510d3e-a46f-4cdd-ad3e-a62129a48f9a,60828933050015,cic_model_sil,0.304766,2024-09-29 15:01:04,2024-09-29,2024-09,Train,1,1,SIL-Instore,v2,Trench 1
3,2880072,73acd3db-b0eb-4981-82ee-bcc331de48d8,60828800720011,cic_model_sil,0.53182,2024-09-25 15:12:46,2024-09-25,2024-09,Train,0,1,SIL-Instore,v2,Trench 1
4,2860511,fc323956-fbfe-49cb-9618-ee85883f8d28,60828605110012,cic_model_sil,0.255585,2024-09-19 14:24:01,2024-09-19,2024-09,Train,1,1,SIL-Instore,v2,Trench 1


In [79]:
df2 = dfd.copy()

In [80]:
df2[['modelVersionId', 'modelDisplayName']].value_counts()    

modelVersionId  modelDisplayName
v2              cic_model_sil       147502
Name: count, dtype: int64

In [81]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 147502 entries, 0 to 147501
Data columns (total 14 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             147502 non-null  Int64         
 1   digitalLoanAccountId   147502 non-null  object        
 2   loanAccountNumber      147502 non-null  object        
 3   modelDisplayName       147502 non-null  object        
 4   Alpha_cic_sil_score    147502 non-null  float64       
 5   appln_submit_datetime  147502 non-null  datetime64[us]
 6   disbursementdate       147502 non-null  dbdate        
 7   Application_month      147502 non-null  object        
 8   Data_selection         147502 non-null  object        
 9   deffpd0                147502 non-null  Int64         
 10  flg_mature_fpd0        147502 non-null  Int64         
 11  new_loan_type          147502 non-null  object        
 12  modelVersionId         147502 non-null  obje

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [82]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['Alpha_cic_sil_score'] = pd.to_numeric(df_concat['Alpha_cic_sil_score'], errors='coerce')

In [83]:
# gini_results = calculate_periodic_gini(df_concat, 'Alpha_cic_sil_score', 'deffpd0', 'FPD0')

# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'Alpha_cic_sil_score', 
#     'deffpd0', 
#     'FPD0',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'Alpha_cic_sil_score', 
    'deffpd0', 
    'FPD0',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [84]:
f0 = gini_results.copy()
f0.head()

Unnamed: 0,start_date,end_date,Alpha_cic_sil_score_FPD0_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-08-26,2024-09-01,0.285491,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 1
1,2024-09-01,2024-09-30,0.20148,Month,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 1
2,2024-09-02,2024-09-08,0.229599,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 1
3,2024-09-09,2024-09-15,0.183286,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 1
4,2024-09-16,2024-09-22,0.17828,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 1


In [85]:
f0.groupby(['loan_type', 'version', 'trench_category']).size()

loan_type       version  trench_category
Overall         v2       Trench 1           73
SIL Competitor  v2       Trench 1           60
SIL ZERO        v2       Trench 1           71
SIL-Instore     v2       Trench 1           73
dtype: int64

In [86]:
f01 = f0[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Alpha_cic_sil_score_FPD0_gini']].copy()
f01.rename(columns={'Alpha_cic_sil_score_FPD0_gini':'FPD0'}, inplace = True)
f01['category'] = 'cic_sil_score_fpd0_v2_t1'

## FPD10

## Test

In [87]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId, 
    case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.audit_balance.ml_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory


In [88]:
df1 = dfd.copy()

## Train

In [89]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId,
        case when trenchCategory is null then 'ALL' 
        when trenchCategory = ''then 'ALL' 
        else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
   and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffpd10,
  flg_mature_fpd10,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 1'
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory
0,3206286,6b2d3c9c-efd4-4d62-a4d3-db28826ffd7a,60832062860017,0.394028,2025-11-15 10:23:43,2025-01-21,2025-11,Train,0,1,SIL-Instore,v2,Trench 1
1,2829034,f678f079-9b86-4332-bc59-826c944653f3,60828290340011,0.430764,2024-09-07 18:35:31,2024-09-07,2024-09,Train,0,1,SIL-Instore,v2,Trench 1
2,2828901,2571d522-9ca0-43ae-a786-1f41a75c7bb4,60828289010014,0.405341,2024-09-07 17:48:42,2024-09-07,2024-09,Train,0,1,SIL-Instore,v2,Trench 1
3,2895821,a391388b-7643-4ffd-a6a6-1e807f6f0043,60828958210018,0.180259,2024-09-30 09:54:26,2024-09-30,2024-09,Train,0,1,SIL-Instore,v2,Trench 1
4,2819749,1d23cf0f-271d-45ec-9485-ad81b40dccdd,60828197490013,0.286202,2024-09-03 14:52:27,2024-09-03,2024-09,Train,1,1,SIL-Instore,v2,Trench 1


In [90]:
df2 = dfd.copy()

In [91]:
df2['modelVersionId'].value_counts()

modelVersionId
v2    147502
Name: count, dtype: int64

In [92]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 147502 entries, 0 to 147501
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             147502 non-null  Int64         
 1   digitalLoanAccountId   147502 non-null  object        
 2   loanAccountNumber      147502 non-null  object        
 3   Alpha_cic_sil_score    147502 non-null  float64       
 4   appln_submit_datetime  147502 non-null  datetime64[us]
 5   disbursementdate       147502 non-null  dbdate        
 6   Application_month      147502 non-null  object        
 7   Data_selection         147502 non-null  object        
 8   deffpd10               147502 non-null  Int64         
 9   flg_mature_fpd10       147502 non-null  Int64         
 10  new_loan_type          147502 non-null  object        
 11  modelVersionId         147502 non-null  object        
 12  trenchCategory         147502 non-null  obje

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [93]:
df_concat['Alpha_cic_sil_score'] = pd.to_numeric(df_concat['Alpha_cic_sil_score'], errors='coerce')
# gini_results = calculate_periodic_gini(df_concat, 'Alpha_cic_sil_score', 'deffpd10', 'FPD10')
# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'Alpha_cic_sil_score', 
#     'deffpd10', 
#     'FPD10',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat,
    'Alpha_cic_sil_score', 
    'deffpd10', 
    'FPD10',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [94]:
f1=gini_results.copy()
f1.groupby('loan_type').size()

loan_type
Overall           73
SIL Competitor    60
SIL ZERO          71
SIL-Instore       73
dtype: int64

In [95]:
f1.head()

Unnamed: 0,start_date,end_date,Alpha_cic_sil_score_FPD10_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-08-26,2024-09-01,0.320699,Week,Alpha_cic_sil_score,v2,FPD10,Overall,Trench 1
1,2024-09-01,2024-09-30,0.269141,Month,Alpha_cic_sil_score,v2,FPD10,Overall,Trench 1
2,2024-09-02,2024-09-08,0.281195,Week,Alpha_cic_sil_score,v2,FPD10,Overall,Trench 1
3,2024-09-09,2024-09-15,0.27376,Week,Alpha_cic_sil_score,v2,FPD10,Overall,Trench 1
4,2024-09-16,2024-09-22,0.193007,Week,Alpha_cic_sil_score,v2,FPD10,Overall,Trench 1


In [96]:
f10 = f1[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Alpha_cic_sil_score_FPD10_gini']].copy()
f10.rename(columns={'Alpha_cic_sil_score_FPD10_gini':'FPD10'}, inplace = True)
f10['category'] = 'cic_sil_score_fpd10_v2_t1'

## FPD30

## Test

In [97]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId, 
    case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.audit_balance.ml_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory


In [98]:
df1 = dfd.copy()

## Train

In [99]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL' 
        when trenchCategory = ''then 'ALL' 
        else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffpd30,
  flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 1'
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory
0,3206286,6b2d3c9c-efd4-4d62-a4d3-db28826ffd7a,60832062860017,0.394028,2025-11-15 10:23:43,2025-01-21,2025-11,Train,0,1,SIL-Instore,v2,Trench 1
1,2847778,6f1166e4-efcd-418f-965a-b140a9f93d09,60828477780015,0.293337,2024-09-15 13:47:31,2024-09-15,2024-09,Train,0,1,SIL-Instore,v2,Trench 1
2,2870277,8c415e77-df3a-4b93-aaec-86d269cbf026,60828702770016,0.28651,2024-09-22 15:34:30,2024-09-22,2024-09,Train,0,1,SIL-Instore,v2,Trench 1
3,2848589,7ec3f6f7-bbb7-43f5-b906-edc636263e0f,60828485890016,0.365858,2024-09-15 16:52:16,2024-09-15,2024-09,Train,0,1,SIL-Instore,v2,Trench 1
4,2823669,ed4b5dea-c501-4a09-8de2-d6219f39ef6a,60828236690015,0.466448,2024-09-05 14:09:58,2024-09-05,2024-09,Train,0,1,SIL-Instore,v2,Trench 1


In [100]:
df2 = dfd.copy()

In [101]:
df2['modelVersionId'].value_counts()

modelVersionId
v2    147493
Name: count, dtype: int64

In [102]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 147493 entries, 0 to 147492
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             147493 non-null  Int64         
 1   digitalLoanAccountId   147493 non-null  object        
 2   loanAccountNumber      147493 non-null  object        
 3   Alpha_cic_sil_score    147493 non-null  float64       
 4   appln_submit_datetime  147493 non-null  datetime64[us]
 5   disbursementdate       147493 non-null  dbdate        
 6   Application_month      147493 non-null  object        
 7   Data_selection         147493 non-null  object        
 8   deffpd30               147493 non-null  Int64         
 9   flg_mature_fpd30       147493 non-null  Int64         
 10  new_loan_type          147493 non-null  object        
 11  modelVersionId         147493 non-null  object        
 12  trenchCategory         147493 non-null  obje

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [103]:
df_concat['Alpha_cic_sil_score'] = pd.to_numeric(df_concat['Alpha_cic_sil_score'], errors='coerce')
# gini_results = calculate_periodic_gini(df_concat, 'Alpha_cic_sil_score', 'deffpd30', 'FPD30')


# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'Alpha_cic_sil_score', 
#     'deffpd30', 
#     'FPD30',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat,
    'Alpha_cic_sil_score', 
    'deffpd30', 
    'FPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [104]:
f2=gini_results.copy()
f2.groupby('loan_type').size()

loan_type
Overall           72
SIL Competitor    60
SIL ZERO          71
SIL-Instore       72
dtype: int64

In [105]:
f2.head()

Unnamed: 0,start_date,end_date,Alpha_cic_sil_score_FPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-08-26,2024-09-01,0.351741,Week,Alpha_cic_sil_score,v2,FPD30,Overall,Trench 1
1,2024-09-01,2024-09-30,0.290666,Month,Alpha_cic_sil_score,v2,FPD30,Overall,Trench 1
2,2024-09-02,2024-09-08,0.28842,Week,Alpha_cic_sil_score,v2,FPD30,Overall,Trench 1
3,2024-09-09,2024-09-15,0.316883,Week,Alpha_cic_sil_score,v2,FPD30,Overall,Trench 1
4,2024-09-16,2024-09-22,0.225854,Week,Alpha_cic_sil_score,v2,FPD30,Overall,Trench 1


In [106]:
f20 = f2[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Alpha_cic_sil_score_FPD30_gini']].copy()
f20.rename(columns={'Alpha_cic_sil_score_FPD30_gini':'FPD30'}, inplace = True)
f20['category'] = 'cic_sil_score_fpd30_v2_t1'

## FSPD30

## Test

In [107]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId, 
    case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.audit_balance.ml_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory


In [108]:
df1 = dfd.copy()

## Train

In [109]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId,
        case when trenchCategory is null then 'ALL' 
        when trenchCategory = ''then 'ALL' 
        else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffspd30,
  flg_mature_fspd_30,
  loanmaster.new_loan_type,
    modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 1'
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory
0,3206286,6b2d3c9c-efd4-4d62-a4d3-db28826ffd7a,60832062860017,0.394028,2025-11-15 10:23:43,2025-01-21,2025-11,Train,0,1,SIL-Instore,v2,Trench 1
1,2892182,bef7195c-ff08-4200-8b8b-6a26a9e7be3b,60828921820018,0.4486,2024-09-29 11:21:30,2024-09-29,2024-09,Train,0,1,SIL-Instore,v2,Trench 1
2,2870501,b1d4cf4e-86b5-4fd2-ad90-8c69a81b22a4,60828705010015,0.304766,2024-09-22 16:08:53,2024-09-22,2024-09,Train,0,1,SIL-Instore,v2,Trench 1
3,2828272,35678cc7-93e6-4a6e-a0a9-e37efff3a15b,60828282720018,0.561725,2024-09-07 14:49:03,2024-09-07,2024-09,Train,0,1,SIL ZERO,v2,Trench 1
4,2883063,f7078bb5-91aa-4fd3-9171-3ca4547d6cd7,60828830630011,0.255585,2024-09-26 16:03:03,2024-09-26,2024-09,Train,0,1,SIL-Instore,v2,Trench 1


In [110]:
df2 = dfd.copy()

In [111]:
df2['modelVersionId'].value_counts()

modelVersionId
v2    139067
Name: count, dtype: int64

In [112]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 139067 entries, 0 to 139066
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             139067 non-null  Int64         
 1   digitalLoanAccountId   139067 non-null  object        
 2   loanAccountNumber      139067 non-null  object        
 3   Alpha_cic_sil_score    139067 non-null  float64       
 4   appln_submit_datetime  139067 non-null  datetime64[us]
 5   disbursementdate       139067 non-null  dbdate        
 6   Application_month      139067 non-null  object        
 7   Data_selection         139067 non-null  object        
 8   deffspd30              139067 non-null  Int64         
 9   flg_mature_fspd_30     139067 non-null  Int64         
 10  new_loan_type          139067 non-null  object        
 11  modelVersionId         139067 non-null  object        
 12  trenchCategory         139067 non-null  obje

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [113]:
df_concat['Alpha_cic_sil_score'] = pd.to_numeric(df_concat['Alpha_cic_sil_score'], errors='coerce')
# gini_results = calculate_periodic_gini(df_concat, 'Alpha_cic_sil_score', 'deffspd30', 'FSPD30')
# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'Alpha_cic_sil_score', 
#     'deffspd30', 
#     'FSPD30',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat,
    'Alpha_cic_sil_score', 
    'deffspd30', 
    'FSPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [114]:
f3=gini_results.copy()

In [115]:
f3.head()

Unnamed: 0,start_date,end_date,Alpha_cic_sil_score_FSPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-08-26,2024-09-01,0.401137,Week,Alpha_cic_sil_score,v2,FSPD30,Overall,Trench 1
1,2024-09-01,2024-09-30,0.30552,Month,Alpha_cic_sil_score,v2,FSPD30,Overall,Trench 1
2,2024-09-02,2024-09-08,0.337141,Week,Alpha_cic_sil_score,v2,FSPD30,Overall,Trench 1
3,2024-09-09,2024-09-15,0.310992,Week,Alpha_cic_sil_score,v2,FSPD30,Overall,Trench 1
4,2024-09-16,2024-09-22,0.252785,Week,Alpha_cic_sil_score,v2,FSPD30,Overall,Trench 1


In [116]:
f30 = f3[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Alpha_cic_sil_score_FSPD30_gini']].copy()
f30.rename(columns={'Alpha_cic_sil_score_FSPD30_gini':'FSPD30'}, inplace = True)
f30['category'] = 'cic_sil_score_fspd30_v2_t1'

## FSTPD30

## Test

In [117]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId, 
    case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.audit_balance.ml_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory


In [118]:
df1 = dfd.copy()

## Train

In [119]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId,
        case when trenchCategory is null then 'ALL' 
        when trenchCategory = ''then 'ALL' 
        else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffstpd30,
  flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 1'
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory
0,3206286,6b2d3c9c-efd4-4d62-a4d3-db28826ffd7a,60832062860017,0.394028,2025-11-15 10:23:43,2025-01-21,2025-11,Train,0,1,SIL-Instore,v2,Trench 1
1,2823203,343d0e35-6a53-488c-a286-4505224b262a,60828232030015,0.31363,2024-09-05 11:00:25,2024-09-05,2024-09,Train,0,1,SIL-Instore,v2,Trench 1
2,2816773,3c7d1017-1fba-43cd-9b5c-2bf47bf6b217,60828167730011,0.344301,2024-09-02 11:40:49,2024-09-02,2024-09,Train,1,1,SIL-Instore,v2,Trench 1
3,2876912,9ea7b7ea-31fd-43cb-98a7-f6f5d15bcbb5,60828769120014,0.494136,2024-09-24 13:39:48,2024-09-24,2024-09,Train,0,1,SIL ZERO,v2,Trench 1
4,2815390,1254f6cc-10d6-4ec4-8c64-8d25aaa5521f,60828153900016,0.510943,2024-09-01 18:16:48,2024-09-01,2024-09,Train,1,1,SIL-Instore,v2,Trench 1


In [120]:
df2 = dfd.copy()

In [121]:
df2['modelVersionId'].value_counts()

modelVersionId
v2    128404
Name: count, dtype: int64

In [122]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 128404 entries, 0 to 128403
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             128404 non-null  Int64         
 1   digitalLoanAccountId   128404 non-null  object        
 2   loanAccountNumber      128404 non-null  object        
 3   Alpha_cic_sil_score    128404 non-null  float64       
 4   appln_submit_datetime  128404 non-null  datetime64[us]
 5   disbursementdate       128404 non-null  dbdate        
 6   Application_month      128404 non-null  object        
 7   Data_selection         128404 non-null  object        
 8   deffstpd30             128404 non-null  Int64         
 9   flg_mature_fstpd_30    128404 non-null  Int64         
 10  new_loan_type          128404 non-null  object        
 11  modelVersionId         128404 non-null  object        
 12  trenchCategory         128404 non-null  obje

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [123]:
df_concat['Alpha_cic_sil_score'] = pd.to_numeric(df_concat['Alpha_cic_sil_score'], errors='coerce')
# gini_results = calculate_periodic_gini(df_concat, 'Alpha_cic_sil_score', 'deffstpd30', 'FSTPD30')

# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'Alpha_cic_sil_score', 
#     'deffstpd30', 
#     'FSTPD30',
#     product_column='new_loan_type'  
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat,
    'Alpha_cic_sil_score', 
    'deffstpd30', 
    'FSTPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [124]:
f4 = gini_results.copy()
f4

Unnamed: 0,start_date,end_date,Alpha_cic_sil_score_FSTPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-08-26,2024-09-01,0.333759,Week,Alpha_cic_sil_score,v2,FSTPD30,Overall,Trench 1
1,2024-09-01,2024-09-30,0.308531,Month,Alpha_cic_sil_score,v2,FSTPD30,Overall,Trench 1
2,2024-09-02,2024-09-08,0.329879,Week,Alpha_cic_sil_score,v2,FSTPD30,Overall,Trench 1
3,2024-09-09,2024-09-15,0.285094,Week,Alpha_cic_sil_score,v2,FSTPD30,Overall,Trench 1
4,2024-09-16,2024-09-22,0.276597,Week,Alpha_cic_sil_score,v2,FSTPD30,Overall,Trench 1
...,...,...,...,...,...,...,...,...,...
231,2025-07-14,2025-07-20,0.413800,Week,Alpha_cic_sil_score,v2,FSTPD30,SIL Competitor,Trench 1
232,2025-07-21,2025-07-27,0.399109,Week,Alpha_cic_sil_score,v2,FSTPD30,SIL Competitor,Trench 1
233,2025-07-28,2025-08-03,0.426709,Week,Alpha_cic_sil_score,v2,FSTPD30,SIL Competitor,Trench 1
234,2025-08-01,2025-08-31,0.360611,Month,Alpha_cic_sil_score,v2,FSTPD30,SIL Competitor,Trench 1


In [125]:
f4['version'].value_counts()

version
v2    236
Name: count, dtype: int64

In [126]:
f40 = f4[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Alpha_cic_sil_score_FSTPD30_gini']].copy()
f40.rename(columns={'Alpha_cic_sil_score_FSTPD30_gini':'FSTPD30'}, inplace = True)
f40['category'] = 'cic_sil_score_fstpd10_v2_t1'

## combining the dataframe

In [127]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type','trench_category', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'Alpha_cic_sil_score_FPD0_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'trench_category', 'Alpha_cic_sil_score_FPD10_gini',
       'Alpha_cic_sil_score_FPD30_gini',
       'Alpha_cic_sil_score_FSPD30_gini',
       'Alpha_cic_sil_score_FSTPD30_gini'], dtype=object)

In [128]:
final_df.head()

Unnamed: 0,start_date,end_date,Alpha_cic_sil_score_FPD0_gini,period,Model_Name,version,bad_rate,loan_type,trench_category,Alpha_cic_sil_score_FPD10_gini,Alpha_cic_sil_score_FPD30_gini,Alpha_cic_sil_score_FSPD30_gini,Alpha_cic_sil_score_FSTPD30_gini
0,2024-08-26,2024-09-01,0.285491,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 1,,,,
1,2024-09-01,2024-09-30,0.20148,Month,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 1,,,,
2,2024-09-02,2024-09-08,0.229599,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 1,,,,
3,2024-09-09,2024-09-15,0.183286,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 1,,,,
4,2024-09-16,2024-09-22,0.17828,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 1,,,,


In [129]:
final_df[['trench_category', 'version']].value_counts(dropna=False)

trench_category  version
Trench 1         v2         1325
Name: count, dtype: int64

In [130]:
final_df.rename(columns={'trench_category':'Trench_category', 
'Alpha_cic_sil_score_FPD0_gini':'Alpha_cic_sil_score_FPD0_v2_t1_gini',
'Alpha_cic_sil_score_FPD10_gini':'Alpha_cic_sil_score_FPD10_v2_t1_gini',
'Alpha_cic_sil_score_FPD30_gini':'Alpha_cic_sil_score_FPD30_v2_t1_gini',
'Alpha_cic_sil_score_FSPD30_gini':'Alpha_cic_sil_score_FSPD30_v2_t1_gini',
'Alpha_cic_sil_score_FSTPD30_gini':'Alpha_cic_sil_score_FSTPD30_v2_t1_gini'                         
}, inplace=True)
final_df.columns

Index(['start_date', 'end_date', 'Alpha_cic_sil_score_FPD0_v2_t1_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'Trench_category', 'Alpha_cic_sil_score_FPD10_v2_t1_gini',
       'Alpha_cic_sil_score_FPD30_v2_t1_gini',
       'Alpha_cic_sil_score_FSPD30_v2_t1_gini',
       'Alpha_cic_sil_score_FSTPD30_v2_t1_gini'],
      dtype='object')

In [131]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate', 'loan_type', 'Trench_category'
                     ,'Alpha_cic_sil_score_FPD0_v2_t1_gini'
                     ,'Alpha_cic_sil_score_FPD10_v2_t1_gini', 'Alpha_cic_sil_score_FPD30_v2_t1_gini',
       'Alpha_cic_sil_score_FSPD30_v2_t1_gini', 'Alpha_cic_sil_score_FSTPD30_v2_t1_gini']].copy()
final_df['Model_display_name'] = 'cic_model_sil'
final_df['Product_type'] = 'SIL'
final_df.dtypes

start_date                                datetime64[ns]
end_date                                  datetime64[ns]
period                                            object
Model_Name                                        object
version                                           object
bad_rate                                          object
loan_type                                         object
Trench_category                                   object
Alpha_cic_sil_score_FPD0_v2_t1_gini              float64
Alpha_cic_sil_score_FPD10_v2_t1_gini             float64
Alpha_cic_sil_score_FPD30_v2_t1_gini             float64
Alpha_cic_sil_score_FSPD30_v2_t1_gini            float64
Alpha_cic_sil_score_FSTPD30_v2_t1_gini           float64
Model_display_name                                object
Product_type                                      object
dtype: object

In [132]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,Trench_category,Alpha_cic_sil_score_FPD0_v2_t1_gini,Alpha_cic_sil_score_FPD10_v2_t1_gini,Alpha_cic_sil_score_FPD30_v2_t1_gini,Alpha_cic_sil_score_FSPD30_v2_t1_gini,Alpha_cic_sil_score_FSTPD30_v2_t1_gini,Model_display_name,Product_type
0,2024-08-26,2024-09-01,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 1,0.285491,,,,,cic_model_sil,SIL
1,2024-09-01,2024-09-30,Month,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 1,0.20148,,,,,cic_model_sil,SIL
2,2024-09-02,2024-09-08,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 1,0.229599,,,,,cic_model_sil,SIL
3,2024-09-09,2024-09-15,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 1,0.183286,,,,,cic_model_sil,SIL
4,2024-09-16,2024-09-22,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 1,0.17828,,,,,cic_model_sil,SIL


In [133]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.cic_sil_score_v2_t1_giniv1"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=4a109e82-6a14-48fd-982a-8557bbe9ee7e>

In [134]:
import functools

dataframes = [f01, f10, f20, f30, f40]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type','trench_category', 'bad_rate', 'category']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

cicsilscorev2t1 = functools.reduce(merge_dataframes, dataframes)

cicsilscorev2t1.columns.values

array(['start_date', 'end_date', 'period', 'Model_Name', 'version',
       'bad_rate', 'loan_type', 'trench_category', 'FPD0', 'category',
       'FPD10', 'FPD30', 'FSPD30', 'FSTPD30'], dtype=object)

In [135]:
cicsilscorev2t1.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2024-08-26,2024-09-01,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 1,0.285491,cic_sil_score_fpd0_v2_t1,,,,
1,2024-09-01,2024-09-30,Month,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 1,0.20148,cic_sil_score_fpd0_v2_t1,,,,
2,2024-09-02,2024-09-08,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 1,0.229599,cic_sil_score_fpd0_v2_t1,,,,
3,2024-09-09,2024-09-15,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 1,0.183286,cic_sil_score_fpd0_v2_t1,,,,
4,2024-09-16,2024-09-22,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 1,0.17828,cic_sil_score_fpd0_v2_t1,,,,


In [136]:
result = pd.concat([cicsilscorev1all, cicsilscorev2t1], ignore_index=True)
result.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


In [137]:
masterdf = result.copy()
masterdf.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


## Trench 2

## FPD0

## Test

In [138]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId, 
    case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.audit_balance.ml_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil', 'Sil-Alpha-CIC-SIL-Model')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.modelDisplayName,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  deffpd0,
  flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,modelDisplayName,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory


In [139]:
df1 = dfd.copy()

In [140]:
df1['modelVersionId'].value_counts()

Series([], Name: count, dtype: int64)

## Train

In [141]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId, 
    case when trenchCategory is null then 'ALL' 
        when trenchCategory = ''then 'ALL' 
        else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil', 'Sil-Alpha-CIC-SIL-Model')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.modelDisplayName,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    deffpd0,
  flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId,
    trenchCategory,
    from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 2'
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,modelDisplayName,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory
0,2104763,f5208059-7ceb-4d8c-a664-d4c2e11ce249,60821047630022,cic_model_sil,0.499639,2024-09-24 14:15:47,2024-09-24,2024-09,Train,1,1,SIL-Instore,v2,Trench 2
1,2275327,0eba084a-e278-4005-a9f5-5f4954d38e55,60822753270013,cic_model_sil,0.17034,2024-09-30 13:42:33,2024-09-30,2024-09,Train,0,1,SIL-Instore,v2,Trench 2
2,2739003,6d8b43b1-7b24-4b98-914c-2f68c737e928,60827390030012,cic_model_sil,0.452874,2024-09-22 15:48:29,2024-09-22,2024-09,Train,1,1,SIL-Instore,v2,Trench 2
3,2804764,ff66b523-10c1-4730-aae5-2c9368ff1bc8,60828047640017,cic_model_sil,0.241805,2024-09-28 10:40:04,2024-09-28,2024-09,Train,0,1,SIL-Instore,v2,Trench 2
4,2060871,55f74bb9-6c3e-4da4-8297-487125735295,60820608710015,cic_model_sil,0.350887,2024-09-10 10:10:01,2024-09-10,2024-09,Train,0,1,SIL-Instore,v2,Trench 2


In [142]:
df2 = dfd.copy()

In [143]:
df2[['modelVersionId', 'modelDisplayName']].value_counts()    

modelVersionId  modelDisplayName
v2              cic_model_sil       3869
Name: count, dtype: int64

In [144]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3869 entries, 0 to 3868
Data columns (total 14 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             3869 non-null   Int64         
 1   digitalLoanAccountId   3869 non-null   object        
 2   loanAccountNumber      3869 non-null   object        
 3   modelDisplayName       3869 non-null   object        
 4   Alpha_cic_sil_score    3869 non-null   float64       
 5   appln_submit_datetime  3869 non-null   datetime64[us]
 6   disbursementdate       3869 non-null   dbdate        
 7   Application_month      3869 non-null   object        
 8   Data_selection         3869 non-null   object        
 9   deffpd0                3869 non-null   Int64         
 10  flg_mature_fpd0        3869 non-null   Int64         
 11  new_loan_type          3869 non-null   object        
 12  modelVersionId         3869 non-null   object        
 13  tre

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [145]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['Alpha_cic_sil_score'] = pd.to_numeric(df_concat['Alpha_cic_sil_score'], errors='coerce')

In [146]:
# gini_results = calculate_periodic_gini(df_concat, 'Alpha_cic_sil_score', 'deffpd0', 'FPD0')

# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'Alpha_cic_sil_score', 
#     'deffpd0', 
#     'FPD0',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'Alpha_cic_sil_score', 
    'deffpd0', 
    'FPD0',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [147]:
f0 = gini_results.copy()
f0.head()

Unnamed: 0,start_date,end_date,Alpha_cic_sil_score_FPD0_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-08-26,2024-09-01,,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 2
1,2024-09-01,2024-09-30,0.200312,Month,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 2
2,2024-09-02,2024-09-08,0.182609,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 2
3,2024-09-09,2024-09-15,-0.451613,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 2
4,2024-09-16,2024-09-22,0.373333,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 2


In [148]:
f0.groupby(['loan_type', 'version', 'trench_category']).size()

loan_type       version  trench_category
Overall         v2       Trench 2           73
SIL Competitor  v2       Trench 2           59
SIL ZERO        v2       Trench 2           65
SIL-Instore     v2       Trench 2           73
dtype: int64

In [149]:
f01 = f0[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Alpha_cic_sil_score_FPD0_gini']].copy()
f01.rename(columns={'Alpha_cic_sil_score_FPD0_gini':'FPD0'}, inplace = True)
f01['category'] = 'cic_sil_score_fpd0_v2_t2'

## FPD10

## Test

In [150]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId, 
    case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.audit_balance.ml_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory


In [151]:
df1 = dfd.copy()

## Train

In [152]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId,
        case when trenchCategory is null then 'ALL' 
        when trenchCategory = ''then 'ALL' 
        else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
   and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffpd10,
  flg_mature_fpd10,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 2'
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory
0,1452097,0f62cc46-3250-45e7-807f-5d00a6516c96,60814520970011,0.338034,2024-09-22 13:04:49,2024-09-22,2024-09,Train,0,1,SIL-Instore,v2,Trench 2
1,2387799,9e56bffc-5898-4fc8-836a-18c37bc7c893,60823877990011,0.46755,2024-09-05 14:51:13,2024-09-05,2024-09,Train,0,1,SIL ZERO,v2,Trench 2
2,2345935,42f31946-be24-40e8-8373-80cb455d42c8,60823459350012,0.270001,2024-09-30 14:55:12,2024-09-30,2024-09,Train,0,1,SIL-Instore,v2,Trench 2
3,2606821,1011268c-0ba9-400b-808e-ef0d8241d6e3,60826068210015,0.189677,2024-09-17 12:55:34,2024-09-17,2024-09,Train,0,1,SIL-Instore,v2,Trench 2
4,2741718,e2e504e1-150f-41b1-b586-b1262af0d68e,60827417180013,0.227389,2024-09-29 14:37:02,2024-09-29,2024-09,Train,0,1,SIL-Instore,v2,Trench 2


In [153]:
df2 = dfd.copy()

In [154]:
df2['modelVersionId'].value_counts()

modelVersionId
v2    3869
Name: count, dtype: int64

In [155]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3869 entries, 0 to 3868
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             3869 non-null   Int64         
 1   digitalLoanAccountId   3869 non-null   object        
 2   loanAccountNumber      3869 non-null   object        
 3   Alpha_cic_sil_score    3869 non-null   float64       
 4   appln_submit_datetime  3869 non-null   datetime64[us]
 5   disbursementdate       3869 non-null   dbdate        
 6   Application_month      3869 non-null   object        
 7   Data_selection         3869 non-null   object        
 8   deffpd10               3869 non-null   Int64         
 9   flg_mature_fpd10       3869 non-null   Int64         
 10  new_loan_type          3869 non-null   object        
 11  modelVersionId         3869 non-null   object        
 12  trenchCategory         3869 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [156]:
df_concat['Alpha_cic_sil_score'] = pd.to_numeric(df_concat['Alpha_cic_sil_score'], errors='coerce')
# gini_results = calculate_periodic_gini(df_concat, 'Alpha_cic_sil_score', 'deffpd10', 'FPD10')
# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'Alpha_cic_sil_score', 
#     'deffpd10', 
#     'FPD10',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat,
    'Alpha_cic_sil_score', 
    'deffpd10', 
    'FPD10',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [157]:
f1=gini_results.copy()
f1.groupby('loan_type').size()

loan_type
Overall           73
SIL Competitor    59
SIL ZERO          65
SIL-Instore       73
dtype: int64

In [158]:
f10 = f1[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Alpha_cic_sil_score_FPD10_gini']].copy()
f10.rename(columns={'Alpha_cic_sil_score_FPD10_gini':'FPD10'}, inplace = True)
f10['category'] = 'cic_sil_score_fpd10_v2_t2'

## FPD30

## Test

In [159]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId, 
    case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.audit_balance.ml_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory


In [160]:
df1 = dfd.copy()

## Train

In [161]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL' 
        when trenchCategory = ''then 'ALL' 
        else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffpd30,
  flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 2'
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory
0,2433735,20d0edff-0109-45ed-a926-400d5be295e5,60824337350019,0.340672,2024-09-07 19:28:39,2024-09-07,2024-09,Train,0,1,SIL-Instore,v2,Trench 2
1,2609084,720a90b7-ae56-4a06-9ee4-05151efd5042,60826090840018,0.28651,2024-09-21 14:04:42,2024-09-21,2024-09,Train,0,1,SIL-Instore,v2,Trench 2
2,2577510,073b3c5d-c108-4428-b836-4f16d8afbed5,60825775100016,0.551389,2024-09-24 17:50:24,2024-09-24,2024-09,Train,0,1,SIL-Instore,v2,Trench 2
3,2520989,626af283-ea98-49e1-8f01-71c2f58f493f,60825209890018,0.37396,2024-09-22 14:46:12,2024-09-22,2024-09,Train,0,1,SIL-Instore,v2,Trench 2
4,2475754,96f540e2-81e7-4f0f-9801-ac0d9af4dc62,60824757540011,0.409057,2024-09-22 16:28:52,2024-09-22,2024-09,Train,1,1,SIL-Instore,v2,Trench 2


In [162]:
df2 = dfd.copy()

In [163]:
df2['modelVersionId'].value_counts()

modelVersionId
v2    3866
Name: count, dtype: int64

In [164]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3866 entries, 0 to 3865
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             3866 non-null   Int64         
 1   digitalLoanAccountId   3866 non-null   object        
 2   loanAccountNumber      3866 non-null   object        
 3   Alpha_cic_sil_score    3866 non-null   float64       
 4   appln_submit_datetime  3866 non-null   datetime64[us]
 5   disbursementdate       3866 non-null   dbdate        
 6   Application_month      3866 non-null   object        
 7   Data_selection         3866 non-null   object        
 8   deffpd30               3866 non-null   Int64         
 9   flg_mature_fpd30       3866 non-null   Int64         
 10  new_loan_type          3866 non-null   object        
 11  modelVersionId         3866 non-null   object        
 12  trenchCategory         3866 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [165]:
df_concat['Alpha_cic_sil_score'] = pd.to_numeric(df_concat['Alpha_cic_sil_score'], errors='coerce')
# gini_results = calculate_periodic_gini(df_concat, 'Alpha_cic_sil_score', 'deffpd30', 'FPD30')


# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'Alpha_cic_sil_score', 
#     'deffpd30', 
#     'FPD30',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat,
    'Alpha_cic_sil_score', 
    'deffpd30', 
    'FPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [166]:
f2=gini_results.copy()
f2.groupby('loan_type').size()

loan_type
Overall           72
SIL Competitor    58
SIL ZERO          65
SIL-Instore       72
dtype: int64

In [167]:
f2.head()

Unnamed: 0,start_date,end_date,Alpha_cic_sil_score_FPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-08-26,2024-09-01,,Week,Alpha_cic_sil_score,v2,FPD30,Overall,Trench 2
1,2024-09-01,2024-09-30,0.183509,Month,Alpha_cic_sil_score,v2,FPD30,Overall,Trench 2
2,2024-09-02,2024-09-08,0.093333,Week,Alpha_cic_sil_score,v2,FPD30,Overall,Trench 2
3,2024-09-09,2024-09-15,,Week,Alpha_cic_sil_score,v2,FPD30,Overall,Trench 2
4,2024-09-16,2024-09-22,0.215686,Week,Alpha_cic_sil_score,v2,FPD30,Overall,Trench 2


In [168]:
f20 = f2[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Alpha_cic_sil_score_FPD30_gini']].copy()
f20.rename(columns={'Alpha_cic_sil_score_FPD30_gini':'FPD30'}, inplace = True)
f20['category'] = 'cic_sil_score_fpd30_v2_t2'

## FSPD30

## Test

In [169]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId, 
    case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.audit_balance.ml_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory


In [170]:
df1 = dfd.copy()

## Train

In [171]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId,
        case when trenchCategory is null then 'ALL' 
        when trenchCategory = ''then 'ALL' 
        else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffspd30,
  flg_mature_fspd_30,
  loanmaster.new_loan_type,
    modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 2'
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory
0,2707025,4ce97484-22a7-4c7f-a0f7-0e532ffa8758,60827070250015,0.353054,2024-09-20 16:55:35,2024-09-20,2024-09,Train,0,1,SIL-Instore,v2,Trench 2
1,2488255,6d744495-7fea-4328-9ef9-a8a626b2295e,60824882550013,0.254551,2024-09-06 14:20:53,2024-09-06,2024-09,Train,0,1,SIL ZERO,v2,Trench 2
2,2598467,461c0814-9d49-4261-96eb-ec85f4cf9686,60825984670014,0.215851,2024-09-29 15:14:12,2024-09-29,2024-09,Train,0,1,SIL-Instore,v2,Trench 2
3,2589228,0a29da1c-b53b-491c-a690-6f90329536a7,60825892280011,0.241676,2024-09-28 12:06:43,2024-09-28,2024-09,Train,0,1,SIL-Instore,v2,Trench 2
4,2787477,afea5757-346f-4a54-9716-6d9d85dac3e5,60827874770019,0.182126,2024-09-27 10:44:34,2024-09-27,2024-09,Train,0,1,SIL-Instore,v2,Trench 2


In [172]:
df2 = dfd.copy()

In [173]:
df2['modelVersionId'].value_counts()

modelVersionId
v2    3622
Name: count, dtype: int64

In [174]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3622 entries, 0 to 3621
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             3622 non-null   Int64         
 1   digitalLoanAccountId   3622 non-null   object        
 2   loanAccountNumber      3622 non-null   object        
 3   Alpha_cic_sil_score    3622 non-null   float64       
 4   appln_submit_datetime  3622 non-null   datetime64[us]
 5   disbursementdate       3622 non-null   dbdate        
 6   Application_month      3622 non-null   object        
 7   Data_selection         3622 non-null   object        
 8   deffspd30              3622 non-null   Int64         
 9   flg_mature_fspd_30     3622 non-null   Int64         
 10  new_loan_type          3622 non-null   object        
 11  modelVersionId         3622 non-null   object        
 12  trenchCategory         3622 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [175]:
df_concat['Alpha_cic_sil_score'] = pd.to_numeric(df_concat['Alpha_cic_sil_score'], errors='coerce')
# gini_results = calculate_periodic_gini(df_concat, 'Alpha_cic_sil_score', 'deffspd30', 'FSPD30')
# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'Alpha_cic_sil_score', 
#     'deffspd30', 
#     'FSPD30',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat,
    'Alpha_cic_sil_score', 
    'deffspd30', 
    'FSPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [176]:
f3=gini_results.copy()

In [177]:
f3.head()

Unnamed: 0,start_date,end_date,Alpha_cic_sil_score_FSPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-08-26,2024-09-01,,Week,Alpha_cic_sil_score,v2,FSPD30,Overall,Trench 2
1,2024-09-01,2024-09-30,0.196839,Month,Alpha_cic_sil_score,v2,FSPD30,Overall,Trench 2
2,2024-09-02,2024-09-08,0.25,Week,Alpha_cic_sil_score,v2,FSPD30,Overall,Trench 2
3,2024-09-09,2024-09-15,0.677419,Week,Alpha_cic_sil_score,v2,FSPD30,Overall,Trench 2
4,2024-09-16,2024-09-22,0.099567,Week,Alpha_cic_sil_score,v2,FSPD30,Overall,Trench 2


In [178]:
f30 = f3[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Alpha_cic_sil_score_FSPD30_gini']].copy()
f30.rename(columns={'Alpha_cic_sil_score_FSPD30_gini':'FSPD30'}, inplace = True)
f30['category'] = 'cic_sil_score_fspd30_v2_t2'

## FSTPD30

## Test

In [179]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId, 
    case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.audit_balance.ml_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory


In [180]:
df1 = dfd.copy()

## Train

In [181]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId,
        case when trenchCategory is null then 'ALL' 
        when trenchCategory = ''then 'ALL' 
        else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffstpd30,
  flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 2'
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory
0,2441503,07263a3b-e735-418d-8371-454e9814a544,60824415030012,0.290722,2025-07-12 14:18:21,2025-07-12,2025-07,Train,1,1,SIL Competitor,v2,Trench 2
1,3254309,1afa61dd-1f15-469a-b13e-70b67bf072a2,60832543090014,0.118481,2025-07-30 12:18:28,2025-07-30,2025-07,Train,0,1,SIL Competitor,v2,Trench 2
2,1625502,802d614e-c90f-4d8f-bb79-ef6ef0c0e6be,60816255020021,0.217631,2025-05-04 18:46:31,2025-05-04,2025-05,Train,0,1,SIL Competitor,v2,Trench 2
3,2744558,891c9ea1-c977-4b41-af32-b26e28f4e4e3,60827445580017,0.346224,2025-05-22 14:10:44,2025-05-22,2025-05,Train,1,1,SIL-Instore,v2,Trench 2
4,3109678,fb11673a-b9b9-470f-9710-0513c31df07d,60831096780015,0.193054,2025-07-22 10:05:01,2025-07-22,2025-07,Train,1,1,SIL Competitor,v2,Trench 2


In [182]:
df2 = dfd.copy()

In [183]:
df2['modelVersionId'].value_counts()

modelVersionId
v2    3306
Name: count, dtype: int64

In [184]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3306 entries, 0 to 3305
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             3306 non-null   Int64         
 1   digitalLoanAccountId   3306 non-null   object        
 2   loanAccountNumber      3306 non-null   object        
 3   Alpha_cic_sil_score    3306 non-null   float64       
 4   appln_submit_datetime  3306 non-null   datetime64[us]
 5   disbursementdate       3306 non-null   dbdate        
 6   Application_month      3306 non-null   object        
 7   Data_selection         3306 non-null   object        
 8   deffstpd30             3306 non-null   Int64         
 9   flg_mature_fstpd_30    3306 non-null   Int64         
 10  new_loan_type          3306 non-null   object        
 11  modelVersionId         3306 non-null   object        
 12  trenchCategory         3306 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [185]:
df_concat['Alpha_cic_sil_score'] = pd.to_numeric(df_concat['Alpha_cic_sil_score'], errors='coerce')
# gini_results = calculate_periodic_gini(df_concat, 'Alpha_cic_sil_score', 'deffstpd30', 'FSTPD30')

# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'Alpha_cic_sil_score', 
#     'deffstpd30', 
#     'FSTPD30',
#     product_column='new_loan_type'  
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat,
    'Alpha_cic_sil_score', 
    'deffstpd30', 
    'FSTPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [186]:
f4 = gini_results.copy()
f4

Unnamed: 0,start_date,end_date,Alpha_cic_sil_score_FSTPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-08-26,2024-09-01,,Week,Alpha_cic_sil_score,v2,FSTPD30,Overall,Trench 2
1,2024-09-01,2024-09-30,0.272189,Month,Alpha_cic_sil_score,v2,FSTPD30,Overall,Trench 2
2,2024-09-02,2024-09-08,0.286957,Week,Alpha_cic_sil_score,v2,FSTPD30,Overall,Trench 2
3,2024-09-09,2024-09-15,0.116667,Week,Alpha_cic_sil_score,v2,FSTPD30,Overall,Trench 2
4,2024-09-16,2024-09-22,0.322884,Week,Alpha_cic_sil_score,v2,FSTPD30,Overall,Trench 2
...,...,...,...,...,...,...,...,...,...
225,2025-07-01,2025-07-31,,Month,Alpha_cic_sil_score,v2,FSTPD30,SIL ZERO,Trench 2
226,2025-07-07,2025-07-13,,Week,Alpha_cic_sil_score,v2,FSTPD30,SIL ZERO,Trench 2
227,2025-07-21,2025-07-27,,Week,Alpha_cic_sil_score,v2,FSTPD30,SIL ZERO,Trench 2
228,2025-07-28,2025-08-03,,Week,Alpha_cic_sil_score,v2,FSTPD30,SIL ZERO,Trench 2


In [187]:
f40 = f4[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Alpha_cic_sil_score_FSTPD30_gini']].copy()
f40.rename(columns={'Alpha_cic_sil_score_FSTPD30_gini':'FSTPD30'}, inplace = True)
f40['category'] = 'cic_sil_score_fstpd30_v2_t2'

## combining the dataframe

In [188]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type','trench_category', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'Alpha_cic_sil_score_FPD0_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'trench_category', 'Alpha_cic_sil_score_FPD10_gini',
       'Alpha_cic_sil_score_FPD30_gini',
       'Alpha_cic_sil_score_FSPD30_gini',
       'Alpha_cic_sil_score_FSTPD30_gini'], dtype=object)

In [189]:
final_df.head()

Unnamed: 0,start_date,end_date,Alpha_cic_sil_score_FPD0_gini,period,Model_Name,version,bad_rate,loan_type,trench_category,Alpha_cic_sil_score_FPD10_gini,Alpha_cic_sil_score_FPD30_gini,Alpha_cic_sil_score_FSPD30_gini,Alpha_cic_sil_score_FSTPD30_gini
0,2024-08-26,2024-09-01,,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 2,,,,
1,2024-09-01,2024-09-30,0.200312,Month,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 2,,,,
2,2024-09-02,2024-09-08,0.182609,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 2,,,,
3,2024-09-09,2024-09-15,-0.451613,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 2,,,,
4,2024-09-16,2024-09-22,0.373333,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 2,,,,


In [190]:
final_df.rename(columns={'trench_category':'Trench_category', 
'Alpha_cic_sil_score_FPD0_gini':'Alpha_cic_sil_score_FPD0_v2_t2_gini',
'Alpha_cic_sil_score_FPD10_gini':'Alpha_cic_sil_score_FPD10_v2_t2_gini',
'Alpha_cic_sil_score_FPD30_gini':'Alpha_cic_sil_score_FPD30_v2_t2_gini',
'Alpha_cic_sil_score_FSPD30_gini':'Alpha_cic_sil_score_FSPD30_v2_t2_gini',
'Alpha_cic_sil_score_FSTPD30_gini':'Alpha_cic_sil_score_FSTPD30_v2_t2_gini'                         
}, inplace=True)
final_df.columns

Index(['start_date', 'end_date', 'Alpha_cic_sil_score_FPD0_v2_t2_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'Trench_category', 'Alpha_cic_sil_score_FPD10_v2_t2_gini',
       'Alpha_cic_sil_score_FPD30_v2_t2_gini',
       'Alpha_cic_sil_score_FSPD30_v2_t2_gini',
       'Alpha_cic_sil_score_FSTPD30_v2_t2_gini'],
      dtype='object')

In [191]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate', 'loan_type', 'Trench_category'
                     ,'Alpha_cic_sil_score_FPD0_v2_t2_gini'
                     ,'Alpha_cic_sil_score_FPD10_v2_t2_gini', 'Alpha_cic_sil_score_FPD30_v2_t2_gini',
       'Alpha_cic_sil_score_FSPD30_v2_t2_gini', 'Alpha_cic_sil_score_FSTPD30_v2_t2_gini']].copy()
final_df['Model_display_name'] = 'cic_model_sil'
final_df['Product_type'] = 'SIL'
final_df.dtypes

start_date                                datetime64[ns]
end_date                                  datetime64[ns]
period                                            object
Model_Name                                        object
version                                           object
bad_rate                                          object
loan_type                                         object
Trench_category                                   object
Alpha_cic_sil_score_FPD0_v2_t2_gini              float64
Alpha_cic_sil_score_FPD10_v2_t2_gini             float64
Alpha_cic_sil_score_FPD30_v2_t2_gini             float64
Alpha_cic_sil_score_FSPD30_v2_t2_gini            float64
Alpha_cic_sil_score_FSTPD30_v2_t2_gini           float64
Model_display_name                                object
Product_type                                      object
dtype: object

In [192]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,Trench_category,Alpha_cic_sil_score_FPD0_v2_t2_gini,Alpha_cic_sil_score_FPD10_v2_t2_gini,Alpha_cic_sil_score_FPD30_v2_t2_gini,Alpha_cic_sil_score_FSPD30_v2_t2_gini,Alpha_cic_sil_score_FSTPD30_v2_t2_gini,Model_display_name,Product_type
0,2024-08-26,2024-09-01,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 2,,,,,,cic_model_sil,SIL
1,2024-09-01,2024-09-30,Month,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 2,0.200312,,,,,cic_model_sil,SIL
2,2024-09-02,2024-09-08,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 2,0.182609,,,,,cic_model_sil,SIL
3,2024-09-09,2024-09-15,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 2,-0.451613,,,,,cic_model_sil,SIL
4,2024-09-16,2024-09-22,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 2,0.373333,,,,,cic_model_sil,SIL


In [193]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.cic_sil_score_v2_t2_giniv1"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=d89dc111-adf4-4e8b-86eb-22afda4a6d63>

In [194]:
import functools

dataframes = [f01, f10, f20, f30, f40]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type','trench_category', 'bad_rate', 'category']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

cicsilscorev2t2 = functools.reduce(merge_dataframes, dataframes)

print(cicsilscorev2t2.columns.values)

result = pd.concat([cicsilscorev1all, cicsilscorev2t1, cicsilscorev2t2], ignore_index=True)
result.head()



['start_date' 'end_date' 'period' 'Model_Name' 'version' 'bad_rate'
 'loan_type' 'trench_category' 'FPD0' 'category' 'FPD10' 'FPD30' 'FSPD30'
 'FSTPD30']


Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


In [195]:
result['category'].value_counts(dropna=False)

category
cic_sil_score_fpd0_v1_all       508
cic_sil_score_fpd10_v1_all      496
cic_sil_score_fpd30_v1_all      488
cic_sil_score_fspd30_v1_all     468
cic_sil_score_fstpd30_v1_all    444
cic_sil_score_fpd0_v2_t1        277
cic_sil_score_fpd10_v2_t1       277
cic_sil_score_fpd30_v2_t1       275
cic_sil_score_fpd0_v2_t2        270
cic_sil_score_fpd10_v2_t2       270
cic_sil_score_fpd30_v2_t2       267
cic_sil_score_fspd30_v2_t1      260
cic_sil_score_fspd30_v2_t2      250
cic_sil_score_fstpd10_v2_t1     236
cic_sil_score_fstpd30_v2_t2     230
Name: count, dtype: int64

In [196]:
masterdf = result.copy()
masterdf.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


## Trench 3

## FPD0

## Test

In [197]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId, 
    case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.audit_balance.ml_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil', 'Sil-Alpha-CIC-SIL-Model')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.modelDisplayName,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  deffpd0,
  flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,modelDisplayName,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory


In [198]:
df1 = dfd.copy()

In [199]:
df1['modelVersionId'].value_counts()

Series([], Name: count, dtype: int64)

## Train

In [200]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId, 
    case when trenchCategory is null then 'ALL' 
        when trenchCategory = ''then 'ALL' 
        else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil', 'Sil-Alpha-CIC-SIL-Model')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.modelDisplayName,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    deffpd0,
  flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId,
    trenchCategory,
    from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 3'
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,modelDisplayName,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory
0,2321974,21dfb821-cc2c-495d-bcf3-75b151375397,60823219740021,cic_model_sil,0.158369,2024-09-06 17:46:54,2024-09-06,2024-09,Train,0,1,SIL-Instore,v2,Trench 3
1,2361418,0278a148-bbbe-427f-b720-40da4c16e137,60823614180028,cic_model_sil,0.153611,2024-09-03 11:39:02,2024-09-03,2024-09,Train,0,1,SIL ZERO,v2,Trench 3
2,2345890,a631449f-d75a-47e3-9479-9adb2beaf2ff,60823458900024,cic_model_sil,0.1395,2024-09-22 18:33:23,2024-09-22,2024-09,Train,0,1,SIL-Instore,v2,Trench 3
3,2317325,b30ea9f5-ea64-40c5-8253-cb6f89570b89,60823173250023,cic_model_sil,0.145376,2024-09-28 18:48:57,2024-09-28,2024-09,Train,0,1,SIL-Instore,v2,Trench 3
4,2382286,e53e8349-398d-47f6-a570-9efb8f7e0b1c,60823822860044,cic_model_sil,0.124408,2024-09-08 16:59:58,2024-09-08,2024-09,Train,0,1,SIL-Instore,v2,Trench 3


In [201]:
df2 = dfd.copy()

In [202]:
df2[['modelVersionId', 'modelDisplayName', 'trenchCategory']].value_counts()    

modelVersionId  modelDisplayName  trenchCategory
v2              cic_model_sil     Trench 3          7983
Name: count, dtype: int64

In [203]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7983 entries, 0 to 7982
Data columns (total 14 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             7983 non-null   Int64         
 1   digitalLoanAccountId   7983 non-null   object        
 2   loanAccountNumber      7983 non-null   object        
 3   modelDisplayName       7983 non-null   object        
 4   Alpha_cic_sil_score    7983 non-null   float64       
 5   appln_submit_datetime  7983 non-null   datetime64[us]
 6   disbursementdate       7983 non-null   dbdate        
 7   Application_month      7983 non-null   object        
 8   Data_selection         7983 non-null   object        
 9   deffpd0                7983 non-null   Int64         
 10  flg_mature_fpd0        7983 non-null   Int64         
 11  new_loan_type          7983 non-null   object        
 12  modelVersionId         7983 non-null   object        
 13  tre

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [204]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['Alpha_cic_sil_score'] = pd.to_numeric(df_concat['Alpha_cic_sil_score'], errors='coerce')

In [205]:
# gini_results = calculate_periodic_gini(df_concat, 'Alpha_cic_sil_score', 'deffpd0', 'FPD0')

# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'Alpha_cic_sil_score', 
#     'deffpd0', 
#     'FPD0',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'Alpha_cic_sil_score', 
    'deffpd0', 
    'FPD0',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [206]:
f0 = gini_results.copy()
f0.head()

Unnamed: 0,start_date,end_date,Alpha_cic_sil_score_FPD0_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-08-26,2024-09-01,,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 3
1,2024-09-01,2024-09-30,0.003041,Month,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 3
2,2024-09-02,2024-09-08,0.05772,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 3
3,2024-09-09,2024-09-15,0.298246,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 3
4,2024-09-16,2024-09-22,-0.466891,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 3


In [207]:
f0.groupby(['loan_type', 'version', 'trench_category']).size()

loan_type       version  trench_category
Overall         v2       Trench 3           72
SIL Competitor  v2       Trench 3           60
SIL ZERO        v2       Trench 3           71
SIL-Instore     v2       Trench 3           71
dtype: int64

In [208]:
f01 = f0[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Alpha_cic_sil_score_FPD0_gini']].copy()
f01.rename(columns={'Alpha_cic_sil_score_FPD0_gini':'FPD0'}, inplace = True)
f01['category'] = 'cic_sil_score_fpd0_v2_t3'
f01.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category
0,2024-08-26,2024-09-01,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 3,,cic_sil_score_fpd0_v2_t3
1,2024-09-01,2024-09-30,Month,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 3,0.003041,cic_sil_score_fpd0_v2_t3
2,2024-09-02,2024-09-08,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 3,0.05772,cic_sil_score_fpd0_v2_t3
3,2024-09-09,2024-09-15,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 3,0.298246,cic_sil_score_fpd0_v2_t3
4,2024-09-16,2024-09-22,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 3,-0.466891,cic_sil_score_fpd0_v2_t3


## FPD10

## Test

In [209]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId, 
    case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.audit_balance.ml_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory


In [210]:
df1 = dfd.copy()

## Train

In [211]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId,
        case when trenchCategory is null then 'ALL' 
        when trenchCategory = ''then 'ALL' 
        else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
   and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffpd10,
  flg_mature_fpd10,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 3'
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory
0,2241314,e401d83b-7ff0-4f82-9498-c3ba8c41804d,60822413140025,0.143551,2024-09-29 17:23:03,2024-09-29,2024-09,Train,0,1,SIL ZERO,v2,Trench 3
1,2503193,447f8bb2-4e2f-4895-b7ca-411386ccd8b3,60825031930021,0.180096,2024-09-17 13:56:24,2024-09-17,2024-09,Train,1,1,SIL-Instore,v2,Trench 3
2,2544317,b536f69d-26df-4df8-ae76-36fd5201e906,60825443170028,0.180096,2024-09-29 14:36:16,2024-09-29,2024-09,Train,0,1,SIL-Instore,v2,Trench 3
3,2412132,07e2b79c-a4c1-46f2-827c-70d78713377a,60824121320022,0.136189,2024-09-27 17:54:20,2024-09-27,2024-09,Train,0,1,SIL-Instore,v2,Trench 3
4,2492634,d0014c07-8709-4c00-9543-b5047cef5cc8,60824926340029,0.167149,2024-09-29 11:33:31,2024-09-29,2024-09,Train,0,1,SIL-Instore,v2,Trench 3


In [212]:
df2 = dfd.copy()

In [213]:
df2['modelVersionId'].value_counts()

modelVersionId
v2    7983
Name: count, dtype: int64

In [214]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7983 entries, 0 to 7982
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             7983 non-null   Int64         
 1   digitalLoanAccountId   7983 non-null   object        
 2   loanAccountNumber      7983 non-null   object        
 3   Alpha_cic_sil_score    7983 non-null   float64       
 4   appln_submit_datetime  7983 non-null   datetime64[us]
 5   disbursementdate       7983 non-null   dbdate        
 6   Application_month      7983 non-null   object        
 7   Data_selection         7983 non-null   object        
 8   deffpd10               7983 non-null   Int64         
 9   flg_mature_fpd10       7983 non-null   Int64         
 10  new_loan_type          7983 non-null   object        
 11  modelVersionId         7983 non-null   object        
 12  trenchCategory         7983 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [215]:
df_concat['Alpha_cic_sil_score'] = pd.to_numeric(df_concat['Alpha_cic_sil_score'], errors='coerce')
# gini_results = calculate_periodic_gini(df_concat, 'Alpha_cic_sil_score', 'deffpd10', 'FPD10')
# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'Alpha_cic_sil_score', 
#     'deffpd10', 
#     'FPD10',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat,
    'Alpha_cic_sil_score', 
    'deffpd10', 
    'FPD10',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [216]:
f1=gini_results.copy()
f1.groupby(['loan_type','trench_category']).size()

loan_type       trench_category
Overall         Trench 3           72
SIL Competitor  Trench 3           60
SIL ZERO        Trench 3           71
SIL-Instore     Trench 3           71
dtype: int64

In [217]:
f1.head()

Unnamed: 0,start_date,end_date,Alpha_cic_sil_score_FPD10_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-08-26,2024-09-01,,Week,Alpha_cic_sil_score,v2,FPD10,Overall,Trench 3
1,2024-09-01,2024-09-30,0.311703,Month,Alpha_cic_sil_score,v2,FPD10,Overall,Trench 3
2,2024-09-02,2024-09-08,0.62716,Week,Alpha_cic_sil_score,v2,FPD10,Overall,Trench 3
3,2024-09-09,2024-09-15,-0.033613,Week,Alpha_cic_sil_score,v2,FPD10,Overall,Trench 3
4,2024-09-16,2024-09-22,-0.183521,Week,Alpha_cic_sil_score,v2,FPD10,Overall,Trench 3


In [218]:
f10 = f1[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Alpha_cic_sil_score_FPD10_gini']].copy()
f10.rename(columns={'Alpha_cic_sil_score_FPD10_gini':'FPD10'}, inplace = True)
f10['category'] = 'cic_sil_score_fpd10_v2_t3'

## FPD30

## Test

In [219]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId, 
    case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.audit_balance.ml_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory


In [220]:
df1 = dfd.copy()

## Train

In [221]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL' 
        when trenchCategory = ''then 'ALL' 
        else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffpd30,
  flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 3'
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory
0,2506080,d8bd7c3b-8e4d-4e6d-889a-f6bc2e67c5b6,60825060800024,0.158192,2024-09-08 12:49:43,2024-09-08,2024-09,Train,0,1,SIL-Instore,v2,Trench 3
1,2291210,12fb5948-0bc6-4c56-af2d-d4259ba58117,60822912100023,0.153492,2024-09-26 10:22:47,2024-09-26,2024-09,Train,0,1,SIL-Instore,v2,Trench 3
2,2409631,e1c154d0-855e-40b0-9784-2d881d2b9edd,60824096310029,0.14004,2024-09-15 13:30:28,2024-09-15,2024-09,Train,0,1,SIL-Instore,v2,Trench 3
3,2247281,fc3fa0d4-c691-4953-8126-f33f61b26825,60822472810027,0.11452,2024-09-08 15:36:52,2024-09-08,2024-09,Train,0,1,SIL-Instore,v2,Trench 3
4,2799963,221d4e1a-b13e-4a6a-850b-ca40dec79e58,60827999630029,0.194094,2024-09-29 19:45:34,2024-09-29,2024-09,Train,0,1,SIL-Instore,v2,Trench 3


In [222]:
df2 = dfd.copy()

In [223]:
df2['modelVersionId'].value_counts()

modelVersionId
v2    7982
Name: count, dtype: int64

In [224]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7982 entries, 0 to 7981
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             7982 non-null   Int64         
 1   digitalLoanAccountId   7982 non-null   object        
 2   loanAccountNumber      7982 non-null   object        
 3   Alpha_cic_sil_score    7982 non-null   float64       
 4   appln_submit_datetime  7982 non-null   datetime64[us]
 5   disbursementdate       7982 non-null   dbdate        
 6   Application_month      7982 non-null   object        
 7   Data_selection         7982 non-null   object        
 8   deffpd30               7982 non-null   Int64         
 9   flg_mature_fpd30       7982 non-null   Int64         
 10  new_loan_type          7982 non-null   object        
 11  modelVersionId         7982 non-null   object        
 12  trenchCategory         7982 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [225]:
df_concat['Alpha_cic_sil_score'] = pd.to_numeric(df_concat['Alpha_cic_sil_score'], errors='coerce')
# gini_results = calculate_periodic_gini(df_concat, 'Alpha_cic_sil_score', 'deffpd30', 'FPD30')


# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'Alpha_cic_sil_score', 
#     'deffpd30', 
#     'FPD30',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat,
    'Alpha_cic_sil_score', 
    'deffpd30', 
    'FPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [226]:
f2=gini_results.copy()
f2.groupby(['loan_type', 'trench_category']).size()

loan_type       trench_category
Overall         Trench 3           71
SIL Competitor  Trench 3           59
SIL ZERO        Trench 3           71
SIL-Instore     Trench 3           71
dtype: int64

In [227]:
f2.head()

Unnamed: 0,start_date,end_date,Alpha_cic_sil_score_FPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-08-26,2024-09-01,,Week,Alpha_cic_sil_score,v2,FPD30,Overall,Trench 3
1,2024-09-01,2024-09-30,0.32816,Month,Alpha_cic_sil_score,v2,FPD30,Overall,Trench 3
2,2024-09-02,2024-09-08,0.75502,Week,Alpha_cic_sil_score,v2,FPD30,Overall,Trench 3
3,2024-09-09,2024-09-15,-0.533333,Week,Alpha_cic_sil_score,v2,FPD30,Overall,Trench 3
4,2024-09-16,2024-09-22,-0.111111,Week,Alpha_cic_sil_score,v2,FPD30,Overall,Trench 3


In [228]:
f20 = f2[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Alpha_cic_sil_score_FPD30_gini']].copy()
f20.rename(columns={'Alpha_cic_sil_score_FPD30_gini':'FPD30'}, inplace = True)
f20['category'] = 'cic_sil_score_fpd30_v2_t3'

## FSPD30

## Test

In [229]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId, 
    case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.audit_balance.ml_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory


In [230]:
df1 = dfd.copy()

## Train

In [231]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId,
        case when trenchCategory is null then 'ALL' 
        when trenchCategory = ''then 'ALL' 
        else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffspd30,
  flg_mature_fspd_30,
  loanmaster.new_loan_type,
    modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 3'
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory
0,2277939,b2733db1-e11d-465c-84db-cb35e205014c,60822779390039,0.071705,2024-09-30 15:36:36,2024-09-30,2024-09,Train,0,1,SIL-Instore,v2,Trench 3
1,2013237,6b4d7f66-e51c-4844-8d4f-5c43fff40e2a,60820132370021,0.165546,2024-09-25 09:41:57,2024-09-25,2024-09,Train,0,1,SIL-Instore,v2,Trench 3
2,2451216,ab49536b-dd7c-40df-aee0-205d0ff50514,60824512160021,0.145155,2024-09-16 10:01:04,2024-09-16,2024-09,Train,0,1,SIL ZERO,v2,Trench 3
3,2556109,d0ce3632-2e9f-4a5f-94c7-b8960d24faa6,60825561090025,0.060824,2024-09-24 19:16:35,2024-09-25,2024-09,Train,0,1,SIL-Instore,v2,Trench 3
4,2694757,56f95a69-85d4-4028-a500-ff86b05a44e8,60826947570029,0.188876,2024-09-09 10:45:44,2024-09-09,2024-09,Train,0,1,SIL-Instore,v2,Trench 3


In [232]:
df2 = dfd.copy()

In [233]:
df2['modelVersionId'].value_counts()

modelVersionId
v2    7551
Name: count, dtype: int64

In [234]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7551 entries, 0 to 7550
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             7551 non-null   Int64         
 1   digitalLoanAccountId   7551 non-null   object        
 2   loanAccountNumber      7551 non-null   object        
 3   Alpha_cic_sil_score    7551 non-null   float64       
 4   appln_submit_datetime  7551 non-null   datetime64[us]
 5   disbursementdate       7551 non-null   dbdate        
 6   Application_month      7551 non-null   object        
 7   Data_selection         7551 non-null   object        
 8   deffspd30              7551 non-null   Int64         
 9   flg_mature_fspd_30     7551 non-null   Int64         
 10  new_loan_type          7551 non-null   object        
 11  modelVersionId         7551 non-null   object        
 12  trenchCategory         7551 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [235]:
df_concat['Alpha_cic_sil_score'] = pd.to_numeric(df_concat['Alpha_cic_sil_score'], errors='coerce')
# gini_results = calculate_periodic_gini(df_concat, 'Alpha_cic_sil_score', 'deffspd30', 'FSPD30')
# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'Alpha_cic_sil_score', 
#     'deffspd30', 
#     'FSPD30',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat,
    'Alpha_cic_sil_score', 
    'deffspd30', 
    'FSPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [236]:
f3=gini_results.copy()

In [237]:
f3.head()

Unnamed: 0,start_date,end_date,Alpha_cic_sil_score_FSPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-08-26,2024-09-01,1.0,Week,Alpha_cic_sil_score,v2,FSPD30,Overall,Trench 3
1,2024-09-01,2024-09-30,0.47761,Month,Alpha_cic_sil_score,v2,FSPD30,Overall,Trench 3
2,2024-09-02,2024-09-08,0.432692,Week,Alpha_cic_sil_score,v2,FSPD30,Overall,Trench 3
3,2024-09-09,2024-09-15,0.399573,Week,Alpha_cic_sil_score,v2,FSPD30,Overall,Trench 3
4,2024-09-16,2024-09-22,0.220974,Week,Alpha_cic_sil_score,v2,FSPD30,Overall,Trench 3


In [238]:
f30 = f3[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Alpha_cic_sil_score_FSPD30_gini']].copy()
f30.rename(columns={'Alpha_cic_sil_score_FSPD30_gini':'FSPD30'}, inplace = True)
f30['category'] = 'cic_sil_score_fspd30_v2_t3'

## FSTPD30

## Test

In [239]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId, 
    case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.audit_balance.ml_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory


In [240]:
df1 = dfd.copy()

## Train

In [241]:
sq = """ 
with modelname as 
  (SELECT
    customerId,digitalLoanAccountId,prediction Alpha_cic_sil_score,start_time,end_time,modelDisplayName,modelVersionId,
        case when trenchCategory is null then 'ALL' 
        when trenchCategory = ''then 'ALL' 
        else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Alpha - CIC-SIL-Model', 'cic_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Alpha_cic_sil_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffstpd30,
  flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Alpha_cic_sil_score is not null
  and flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 3'
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Alpha_cic_sil_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory
0,2429274,b0bae04c-7054-46c7-b94f-79491d830487,60824292740028,0.152214,2024-09-23 15:30:59,2024-09-23,2024-09,Train,0,1,SIL ZERO,v2,Trench 3
1,2455017,bf2dc0c3-c764-44fc-93af-402395180d19,60824550170022,0.120363,2024-09-21 14:36:16,2024-09-22,2024-09,Train,0,1,SIL-Instore,v2,Trench 3
2,2314232,44b30e6d-1a15-4009-9d6a-e3667940aa1f,60823142320027,0.145376,2024-09-29 13:07:54,2024-09-29,2024-09,Train,0,1,SIL-Instore,v2,Trench 3
3,2548919,8b1915e1-d19d-4b39-9fc4-3f929f3e9978,60825489190024,0.255582,2024-09-01 17:35:28,2024-09-01,2024-09,Train,1,1,SIL-Instore,v2,Trench 3
4,2459427,cf60c5c0-87e4-43a9-9b84-79a2861d73e7,60824594270022,0.101543,2024-09-07 15:45:27,2024-09-07,2024-09,Train,0,1,SIL-Instore,v2,Trench 3


In [242]:
df2 = dfd.copy()

In [243]:
df2['modelVersionId'].value_counts()

modelVersionId
v2    6985
Name: count, dtype: int64

In [244]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6985 entries, 0 to 6984
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             6985 non-null   Int64         
 1   digitalLoanAccountId   6985 non-null   object        
 2   loanAccountNumber      6985 non-null   object        
 3   Alpha_cic_sil_score    6985 non-null   float64       
 4   appln_submit_datetime  6985 non-null   datetime64[us]
 5   disbursementdate       6985 non-null   dbdate        
 6   Application_month      6985 non-null   object        
 7   Data_selection         6985 non-null   object        
 8   deffstpd30             6985 non-null   Int64         
 9   flg_mature_fstpd_30    6985 non-null   Int64         
 10  new_loan_type          6985 non-null   object        
 11  modelVersionId         6985 non-null   object        
 12  trenchCategory         6985 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [245]:
df_concat['Alpha_cic_sil_score'] = pd.to_numeric(df_concat['Alpha_cic_sil_score'], errors='coerce')
# gini_results = calculate_periodic_gini(df_concat, 'Alpha_cic_sil_score', 'deffstpd30', 'FSTPD30')

# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'Alpha_cic_sil_score', 
#     'deffstpd30', 
#     'FSTPD30',
#     product_column='new_loan_type'  
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat,
    'Alpha_cic_sil_score', 
    'deffstpd30', 
    'FSTPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [246]:
f4 = gini_results.copy()
f4

Unnamed: 0,start_date,end_date,Alpha_cic_sil_score_FSTPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-08-26,2024-09-01,1.000000,Week,Alpha_cic_sil_score,v2,FSTPD30,Overall,Trench 3
1,2024-09-01,2024-09-30,0.349802,Month,Alpha_cic_sil_score,v2,FSTPD30,Overall,Trench 3
2,2024-09-02,2024-09-08,0.318421,Week,Alpha_cic_sil_score,v2,FSTPD30,Overall,Trench 3
3,2024-09-09,2024-09-15,0.094203,Week,Alpha_cic_sil_score,v2,FSTPD30,Overall,Trench 3
4,2024-09-16,2024-09-22,0.397287,Week,Alpha_cic_sil_score,v2,FSTPD30,Overall,Trench 3
...,...,...,...,...,...,...,...,...,...
231,2025-07-14,2025-07-20,-0.014706,Week,Alpha_cic_sil_score,v2,FSTPD30,SIL Competitor,Trench 3
232,2025-07-21,2025-07-27,0.666667,Week,Alpha_cic_sil_score,v2,FSTPD30,SIL Competitor,Trench 3
233,2025-07-28,2025-08-03,-0.272727,Week,Alpha_cic_sil_score,v2,FSTPD30,SIL Competitor,Trench 3
234,2025-08-01,2025-08-31,,Month,Alpha_cic_sil_score,v2,FSTPD30,SIL Competitor,Trench 3


In [247]:
f40 = f4[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Alpha_cic_sil_score_FSTPD30_gini']].copy()
f40.rename(columns={'Alpha_cic_sil_score_FSTPD30_gini':'FSTPD30'}, inplace = True)
f40['category'] = 'cic_sil_score_fstpd30_v2_t3'

## combining the dataframe

In [248]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type','trench_category', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'Alpha_cic_sil_score_FPD0_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'trench_category', 'Alpha_cic_sil_score_FPD10_gini',
       'Alpha_cic_sil_score_FPD30_gini',
       'Alpha_cic_sil_score_FSPD30_gini',
       'Alpha_cic_sil_score_FSTPD30_gini'], dtype=object)

In [249]:
final_df.head()

Unnamed: 0,start_date,end_date,Alpha_cic_sil_score_FPD0_gini,period,Model_Name,version,bad_rate,loan_type,trench_category,Alpha_cic_sil_score_FPD10_gini,Alpha_cic_sil_score_FPD30_gini,Alpha_cic_sil_score_FSPD30_gini,Alpha_cic_sil_score_FSTPD30_gini
0,2024-08-26,2024-09-01,,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 3,,,,
1,2024-09-01,2024-09-30,0.003041,Month,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 3,,,,
2,2024-09-02,2024-09-08,0.05772,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 3,,,,
3,2024-09-09,2024-09-15,0.298246,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 3,,,,
4,2024-09-16,2024-09-22,-0.466891,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 3,,,,


In [250]:
final_df.rename(columns={'trench_category':'Trench_category', 
'Alpha_cic_sil_score_FPD0_gini':'Alpha_cic_sil_score_FPD0_v2_t3_gini',
'Alpha_cic_sil_score_FPD10_gini':'Alpha_cic_sil_score_FPD10_v2_t3_gini',
'Alpha_cic_sil_score_FPD30_gini':'Alpha_cic_sil_score_FPD30_v2_t3_gini',
'Alpha_cic_sil_score_FSPD30_gini':'Alpha_cic_sil_score_FSPD30_v2_t3_gini',
'Alpha_cic_sil_score_FSTPD30_gini':'Alpha_cic_sil_score_FSTPD30_v2_t3_gini'                         
}, inplace=True)
final_df.columns

Index(['start_date', 'end_date', 'Alpha_cic_sil_score_FPD0_v2_t3_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'Trench_category', 'Alpha_cic_sil_score_FPD10_v2_t3_gini',
       'Alpha_cic_sil_score_FPD30_v2_t3_gini',
       'Alpha_cic_sil_score_FSPD30_v2_t3_gini',
       'Alpha_cic_sil_score_FSTPD30_v2_t3_gini'],
      dtype='object')

In [251]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate', 'loan_type', 'Trench_category'
                     ,'Alpha_cic_sil_score_FPD0_v2_t3_gini'
                     ,'Alpha_cic_sil_score_FPD10_v2_t3_gini', 'Alpha_cic_sil_score_FPD30_v2_t3_gini',
       'Alpha_cic_sil_score_FSPD30_v2_t3_gini', 'Alpha_cic_sil_score_FSTPD30_v2_t3_gini']].copy()
final_df['Model_display_name'] = 'cic_model_sil'
final_df['Product_type'] = 'SIL'
final_df.dtypes

start_date                                datetime64[ns]
end_date                                  datetime64[ns]
period                                            object
Model_Name                                        object
version                                           object
bad_rate                                          object
loan_type                                         object
Trench_category                                   object
Alpha_cic_sil_score_FPD0_v2_t3_gini              float64
Alpha_cic_sil_score_FPD10_v2_t3_gini             float64
Alpha_cic_sil_score_FPD30_v2_t3_gini             float64
Alpha_cic_sil_score_FSPD30_v2_t3_gini            float64
Alpha_cic_sil_score_FSTPD30_v2_t3_gini           float64
Model_display_name                                object
Product_type                                      object
dtype: object

In [252]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,Trench_category,Alpha_cic_sil_score_FPD0_v2_t3_gini,Alpha_cic_sil_score_FPD10_v2_t3_gini,Alpha_cic_sil_score_FPD30_v2_t3_gini,Alpha_cic_sil_score_FSPD30_v2_t3_gini,Alpha_cic_sil_score_FSTPD30_v2_t3_gini,Model_display_name,Product_type
0,2024-08-26,2024-09-01,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 3,,,,,,cic_model_sil,SIL
1,2024-09-01,2024-09-30,Month,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 3,0.003041,,,,,cic_model_sil,SIL
2,2024-09-02,2024-09-08,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 3,0.05772,,,,,cic_model_sil,SIL
3,2024-09-09,2024-09-15,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 3,0.298246,,,,,cic_model_sil,SIL
4,2024-09-16,2024-09-22,Week,Alpha_cic_sil_score,v2,FPD0,Overall,Trench 3,-0.466891,,,,,cic_model_sil,SIL


In [253]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.cic_sil_score_v2_t3_giniv1"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=cf4755e8-47ac-4217-a75a-404b8cab8794>

In [254]:
import functools

dataframes = [f01, f10, f20, f30, f40]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type','trench_category', 'bad_rate', 'category']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

cicsilscorev2t3 = functools.reduce(merge_dataframes, dataframes)

print(cicsilscorev2t3.columns.values)

result = pd.concat([cicsilscorev1all, cicsilscorev2t1, cicsilscorev2t2, cicsilscorev2t3], ignore_index=True)
result.head()



['start_date' 'end_date' 'period' 'Model_Name' 'version' 'bad_rate'
 'loan_type' 'trench_category' 'FPD0' 'category' 'FPD10' 'FPD30' 'FSPD30'
 'FSTPD30']


Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


In [255]:
result['category'].value_counts(dropna=False)

category
cic_sil_score_fpd0_v1_all       508
cic_sil_score_fpd10_v1_all      496
cic_sil_score_fpd30_v1_all      488
cic_sil_score_fspd30_v1_all     468
cic_sil_score_fstpd30_v1_all    444
cic_sil_score_fpd0_v2_t1        277
cic_sil_score_fpd10_v2_t1       277
cic_sil_score_fpd30_v2_t1       275
cic_sil_score_fpd0_v2_t3        274
cic_sil_score_fpd10_v2_t3       274
cic_sil_score_fpd30_v2_t3       272
cic_sil_score_fpd0_v2_t2        270
cic_sil_score_fpd10_v2_t2       270
cic_sil_score_fpd30_v2_t2       267
cic_sil_score_fspd30_v2_t1      260
cic_sil_score_fspd30_v2_t3      259
cic_sil_score_fspd30_v2_t2      250
cic_sil_score_fstpd10_v2_t1     236
cic_sil_score_fstpd30_v2_t3     236
cic_sil_score_fstpd30_v2_t2     230
Name: count, dtype: int64

In [256]:
masterdf = result.copy()
masterdf.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


In [257]:
masterdf.shape

(6331, 14)

# Alpha Sil Stack Model

##### V1

## FPD0

## Test

In [258]:
sq = """ 
with modelname as 
(
  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
        case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v1'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  deffpd0,
  flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and flg_mature_fpd0 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory
0,3471049,1f8a3528-811a-4c8f-b756-75062f6b405f,60834710490013,0.0724932903139941,2025-06-01 10:39:16,2025-06-01,2025-06,Test,0,1,SIL-Instore,v1,ALL
1,3471028,a5df93ad-9f48-41a4-bb34-4caebc0d6961,60834710280018,0.0958976907312423,2025-06-01 10:41:09,2025-06-01,2025-06,Test,0,1,SIL-Instore,v1,ALL
2,1936885,85b6c772-723e-4e6b-8962-41a3fc6c288a,60819368850025,0.0748627402078637,2025-06-01 10:44:44,2025-06-01,2025-06,Test,0,1,SIL-Instore,v1,ALL
3,2892700,cfdde0f0-579c-41fb-87b2-d7f8abaf162e,60828927000026,0.1182751058142081,2025-06-01 09:32:52,2025-06-01,2025-06,Test,0,1,SIL-Instore,v1,ALL
4,3471159,0a7e070f-ed82-4a06-86f0-3fe27f5f0916,60834711590018,0.0710208021234906,2025-06-01 11:38:29,2025-06-01,2025-06,Test,0,1,SIL-Instore,v1,ALL


In [259]:
df1 = dfd.copy()

## Train

In [260]:
sq = """ 
  with modelname as 
  (  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v1'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    deffpd0,
  flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and flg_mature_fpd0 = 1
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory
0,2109962,11ed3c5d-fe0d-44dd-bdeb-2064237407e7,60821099620019,0.149775,2023-06-27 17:26:24,2023-06-27,2023-06,Train,1,1,SIL-Instore,v1,ALL
1,1878133,e710c269-ed84-4a5b-a997-23446995f249,60818781330016,0.087053,2023-01-29 15:40:53,2023-01-29,2023-01,Train,1,1,SIL-Instore,v1,ALL
2,2079178,d837179c-92a9-47aa-9f86-cc360b7e18f5,60820791780012,0.045936,2023-06-05 17:50:37,2023-06-05,2023-06,Train,0,1,SIL-Instore,v1,ALL
3,1859491,a4b870ac-f110-4c53-890d-f7b54463a8bf,60818594910019,0.116156,2023-01-12 18:28:06,2023-01-12,2023-01,Train,0,1,SIL-Instore,v1,ALL
4,2033375,157e3b15-f091-47df-ad03-2ab71a301b9a,60820333750011,0.025256,2023-05-07 16:39:21,2023-05-07,2023-05,Train,0,1,SIL-Instore,v1,ALL


In [261]:
df2 = dfd.copy()

In [262]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 182141 entries, 0 to 182140
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             182141 non-null  object        
 1   digitalLoanAccountId   182141 non-null  object        
 2   loanAccountNumber      182141 non-null  object        
 3   Sil_Alpha_Stack_score  182141 non-null  object        
 4   appln_submit_datetime  182141 non-null  datetime64[us]
 5   disbursementdate       182141 non-null  dbdate        
 6   Application_month      182141 non-null  object        
 7   Data_selection         182141 non-null  object        
 8   deffpd0                182141 non-null  Int64         
 9   flg_mature_fpd0        182141 non-null  Int64         
 10  new_loan_type          182141 non-null  object        
 11  modelVersionId         182141 non-null  object        
 12  trenchCategory         182141 non-null  obje

In [263]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['Sil_Alpha_Stack_score'] = pd.to_numeric(df_concat['Sil_Alpha_Stack_score'], errors='coerce')

In [264]:
# gini_results = calculate_periodic_gini(df_concat, 'Alpha_cic_sil_score', 'deffpd0', 'FPD0')

# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'Alpha_cic_sil_score', 
#     'deffpd0', 
#     'FPD0',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'Sil_Alpha_Stack_score', 
    'deffpd0', 
    'FPD0',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [265]:
f0 = gini_results.copy()
f0.head()

Unnamed: 0,start_date,end_date,Sil_Alpha_Stack_score_FPD0_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2023-01-01,2023-01-31,0.252502,Month,Sil_Alpha_Stack_score,v1,FPD0,Overall,ALL
1,2023-01-09,2023-01-15,0.343407,Week,Sil_Alpha_Stack_score,v1,FPD0,Overall,ALL
2,2023-01-16,2023-01-22,0.660714,Week,Sil_Alpha_Stack_score,v1,FPD0,Overall,ALL
3,2023-01-23,2023-01-29,0.113514,Week,Sil_Alpha_Stack_score,v1,FPD0,Overall,ALL
4,2023-01-30,2023-02-05,0.182724,Week,Sil_Alpha_Stack_score,v1,FPD0,Overall,ALL


In [266]:
f0.columns

Index(['start_date', 'end_date', 'Sil_Alpha_Stack_score_FPD0_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category'],
      dtype='object')

In [267]:
f01 = f0[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Sil_Alpha_Stack_score_FPD0_gini']].copy()
f01.rename(columns={'Sil_Alpha_Stack_score_FPD0_gini':'FPD0'}, inplace = True)
f01['category'] = 'alpha_stack_model_sil_fpd0_v1_all'

In [268]:
f0.groupby(['loan_type', 'version', 'trench_category']).size()

loan_type       version  trench_category
Overall         v1       ALL                181
SIL Competitor  v1       ALL                 51
SIL Repeat      v1       ALL                 14
SIL ZERO        v1       ALL                 81
SIL-Instore     v1       ALL                181
dtype: int64

In [269]:
f01.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category
0,2023-01-01,2023-01-31,Month,Sil_Alpha_Stack_score,v1,FPD0,Overall,ALL,0.252502,alpha_stack_model_sil_fpd0_v1_all
1,2023-01-09,2023-01-15,Week,Sil_Alpha_Stack_score,v1,FPD0,Overall,ALL,0.343407,alpha_stack_model_sil_fpd0_v1_all
2,2023-01-16,2023-01-22,Week,Sil_Alpha_Stack_score,v1,FPD0,Overall,ALL,0.660714,alpha_stack_model_sil_fpd0_v1_all
3,2023-01-23,2023-01-29,Week,Sil_Alpha_Stack_score,v1,FPD0,Overall,ALL,0.113514,alpha_stack_model_sil_fpd0_v1_all
4,2023-01-30,2023-02-05,Week,Sil_Alpha_Stack_score,v1,FPD0,Overall,ALL,0.182724,alpha_stack_model_sil_fpd0_v1_all


## FPD10

## Test

In [270]:
sq = """ 
with modelname as 
(
  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v1'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fpd10 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory
0,3669726,d7e3adc8-dc60-4e3a-85a8-f43acf316821,60836697260016,0.1141377774934147,2025-09-07 15:02:13,2025-09-07,2025-09,Test,0,1,SIL-Instore,v1,ALL
1,3670056,376c8538-c464-4d90-9cca-f9025aac1481,60836700560011,0.0666054671766312,2025-09-07 14:18:04,2025-09-07,2025-09,Test,0,1,SIL Competitor,v1,ALL
2,3670083,a0659ff8-75bc-41dd-89cf-ff322b38b4e3,60836700830013,0.0482093380638206,2025-09-07 14:24:25,2025-09-07,2025-09,Test,0,1,SIL Competitor,v1,ALL
3,3670110,21a43a9f-1bd6-452a-b26f-cebaf889d83d,60836701100019,0.1395609446074279,2025-09-07 14:26:37,2025-09-07,2025-09,Test,0,1,SIL-Instore,v1,ALL
4,3670337,dde6d177-1946-444b-ac74-f3d3a2c338ce,60836703370016,0.0754201042848481,2025-09-07 15:25:29,2025-09-07,2025-09,Test,0,1,SIL-Instore,v1,ALL


In [271]:
df1 = dfd.copy()

## Train

In [272]:
sq = """ 
  with modelname as 
  (  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v1'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffpd10,
  del.flg_mature_fpd10,
    loanmaster.new_loan_type,
    modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fpd10 = 1
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory
0,2076031,a06beaf8-3423-44be-9659-d10c5eff3d0b,60820760310012,0.075169,2023-06-03 19:38:43,2023-06-03,2023-06,Train,0,1,SIL-Instore,v1,ALL
1,2116490,19b6c48e-4351-446c-95fa-01fb6ee1ac9d,60821164900011,0.131845,2023-07-02 16:38:56,2023-07-02,2023-07,Train,1,1,SIL-Instore,v1,ALL
2,1778491,85ee1518-2ece-4529-8bd9-627050f1fbbe,60817784910019,0.110601,2023-05-28 18:03:22,2023-05-28,2023-05,Train,0,1,SIL-Instore,v1,ALL
3,2127098,98fe8661-d666-4d26-ab29-f4d2526aaf95,60821270980016,0.282929,2023-07-09 15:23:01,2023-07-09,2023-07,Train,1,1,SIL-Instore,v1,ALL
4,2140444,406c81ca-c84c-4152-b0ab-2004ed3ac8c0,60821404440016,0.105448,2023-07-18 19:16:22,2023-07-18,2023-07,Train,0,1,SIL-Instore,v1,ALL


In [273]:
df2 = dfd.copy()

In [274]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 177174 entries, 0 to 177173
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             177174 non-null  object        
 1   digitalLoanAccountId   177174 non-null  object        
 2   loanAccountNumber      177174 non-null  object        
 3   Sil_Alpha_Stack_score  177174 non-null  object        
 4   appln_submit_datetime  177174 non-null  datetime64[us]
 5   disbursementdate       177174 non-null  dbdate        
 6   Application_month      177174 non-null  object        
 7   Data_selection         177174 non-null  object        
 8   deffpd10               177174 non-null  Int64         
 9   flg_mature_fpd10       177174 non-null  Int64         
 10  new_loan_type          177174 non-null  object        
 11  modelVersionId         177174 non-null  object        
 12  trenchCategory         177174 non-null  obje

In [275]:
df_concat['Sil_Alpha_Stack_score'] = pd.to_numeric(df_concat['Sil_Alpha_Stack_score'], errors='coerce')
# gini_results = calculate_periodic_gini(df_concat, 'Sil_Alpha_Stack_score', 'deffpd10', 'FPD10')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'Sil_Alpha_Stack_score', 
    'deffpd10', 
    'FPD10',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [276]:
f1=gini_results.copy()

In [277]:
f1.head(10)

Unnamed: 0,start_date,end_date,Sil_Alpha_Stack_score_FPD10_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2023-01-01,2023-01-31,0.263158,Month,Sil_Alpha_Stack_score,v1,FPD10,Overall,ALL
1,2023-01-09,2023-01-15,0.275862,Week,Sil_Alpha_Stack_score,v1,FPD10,Overall,ALL
2,2023-01-16,2023-01-22,0.789474,Week,Sil_Alpha_Stack_score,v1,FPD10,Overall,ALL
3,2023-01-23,2023-01-29,0.237179,Week,Sil_Alpha_Stack_score,v1,FPD10,Overall,ALL
4,2023-01-30,2023-02-05,0.181818,Week,Sil_Alpha_Stack_score,v1,FPD10,Overall,ALL
5,2023-02-01,2023-02-28,0.133333,Month,Sil_Alpha_Stack_score,v1,FPD10,Overall,ALL
6,2023-02-06,2023-02-12,0.419355,Week,Sil_Alpha_Stack_score,v1,FPD10,Overall,ALL
7,2023-02-13,2023-02-19,-0.110512,Week,Sil_Alpha_Stack_score,v1,FPD10,Overall,ALL
8,2023-02-20,2023-02-26,-0.192157,Week,Sil_Alpha_Stack_score,v1,FPD10,Overall,ALL
9,2023-02-27,2023-03-05,0.522222,Week,Sil_Alpha_Stack_score,v1,FPD10,Overall,ALL


In [278]:
f10 = f1[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Sil_Alpha_Stack_score_FPD10_gini']].copy()
f10.rename(columns={'Sil_Alpha_Stack_score_FPD10_gini':'FPD10'}, inplace = True)
f10['category'] = 'alpha_stack_model_sil_fpd10_v1_all'

In [279]:
f10.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD10,category
0,2023-01-01,2023-01-31,Month,Sil_Alpha_Stack_score,v1,FPD10,Overall,ALL,0.263158,alpha_stack_model_sil_fpd10_v1_all
1,2023-01-09,2023-01-15,Week,Sil_Alpha_Stack_score,v1,FPD10,Overall,ALL,0.275862,alpha_stack_model_sil_fpd10_v1_all
2,2023-01-16,2023-01-22,Week,Sil_Alpha_Stack_score,v1,FPD10,Overall,ALL,0.789474,alpha_stack_model_sil_fpd10_v1_all
3,2023-01-23,2023-01-29,Week,Sil_Alpha_Stack_score,v1,FPD10,Overall,ALL,0.237179,alpha_stack_model_sil_fpd10_v1_all
4,2023-01-30,2023-02-05,Week,Sil_Alpha_Stack_score,v1,FPD10,Overall,ALL,0.181818,alpha_stack_model_sil_fpd10_v1_all


## FPD30

## Test

In [280]:
sq = """ 
with modelname as 
(
  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
       case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v1'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fpd30 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory
0,3438031,57fb7dda-2157-4cba-a811-a1dc8ef2563e,60834380310011,0.0737187493553919,2025-05-15 11:41:07,2025-05-15,2025-05,Test,0,1,SIL-Instore,v1,ALL
1,3438304,bc4c8159-d2b8-4475-8bd4-5737b4b131f1,60834383040017,0.0563791737920084,2025-05-15 13:51:18,2025-05-15,2025-05,Test,0,1,SIL-Instore,v1,ALL
2,3438326,79360329-f5aa-4f52-b8c4-c9847e57739d,60834383260013,0.1205279068782283,2025-05-15 14:00:04,2025-05-15,2025-05,Test,0,1,SIL-Instore,v1,ALL
3,3438393,36025e29-afec-4102-8c8e-bf992f8e71f5,60834383930014,0.034337345104186,2025-05-15 14:29:07,2025-05-15,2025-05,Test,0,1,SIL-Instore,v1,ALL
4,3437904,5f2834a1-4a7f-4447-b72e-2edae25a5694,60834379040011,0.1271706167859121,2025-05-15 10:47:47,2025-05-15,2025-05,Test,0,1,SIL-Instore,v1,ALL


In [281]:
df1 = dfd.copy()

## Train

In [282]:
sq = """ 
  with modelname as 
  (  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v1'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fpd30 = 1
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory
0,1941498,9af7e49f-8403-43d8-af12-15c9adec3948,60819414980015,0.086393,2023-03-13 16:09:53,2023-03-13,2023-03,Train,0,1,SIL-Instore,v1,ALL
1,2035102,b92e34f3-8cfc-4a26-95c3-4956f91517b0,60820351020016,0.113308,2023-05-08 18:30:41,2023-05-08,2023-05,Train,0,1,SIL-Instore,v1,ALL
2,1907278,06d8355e-efbd-4a58-986b-e2e254180669,60819072780019,0.031877,2023-02-19 11:39:06,2023-02-19,2023-02,Train,0,1,SIL-Instore,v1,ALL
3,1973718,4df3c8a0-a54d-4d77-b583-d1dd7d98d9c4,60819737180011,0.067572,2023-04-02 11:14:45,2023-04-02,2023-04,Train,0,1,SIL-Instore,v1,ALL
4,1958557,1cf1c7ea-51dc-4ad0-b97b-75a5837983af,60819585570012,0.093191,2023-03-24 16:48:49,2023-03-24,2023-03,Train,0,1,SIL-Instore,v1,ALL


In [283]:
df2 = dfd.copy()

In [284]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 170947 entries, 0 to 170946
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             170947 non-null  object        
 1   digitalLoanAccountId   170947 non-null  object        
 2   loanAccountNumber      170947 non-null  object        
 3   Sil_Alpha_Stack_score  170947 non-null  object        
 4   appln_submit_datetime  170947 non-null  datetime64[us]
 5   disbursementdate       170947 non-null  dbdate        
 6   Application_month      170947 non-null  object        
 7   Data_selection         170947 non-null  object        
 8   deffpd30               170947 non-null  Int64         
 9   flg_mature_fpd30       170947 non-null  Int64         
 10  new_loan_type          170947 non-null  object        
 11  modelVersionId         170947 non-null  object        
 12  trenchCategory         170947 non-null  obje

In [285]:
df_concat['Sil_Alpha_Stack_score'] = pd.to_numeric(df_concat['Sil_Alpha_Stack_score'], errors='coerce')
# gini_results = calculate_periodic_gini(df_concat, 'Sil_Alpha_Stack_score', 'deffpd30', 'FPD30')

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'Sil_Alpha_Stack_score', 
    'deffpd30', 
    'FPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [286]:
f2=gini_results.copy()

In [287]:
f2.head(10)

Unnamed: 0,start_date,end_date,Sil_Alpha_Stack_score_FPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2023-01-01,2023-01-31,0.358431,Month,Sil_Alpha_Stack_score,v1,FPD30,Overall,ALL
1,2023-01-09,2023-01-15,0.718033,Week,Sil_Alpha_Stack_score,v1,FPD30,Overall,ALL
2,2023-01-16,2023-01-22,0.789474,Week,Sil_Alpha_Stack_score,v1,FPD30,Overall,ALL
3,2023-01-23,2023-01-29,0.107143,Week,Sil_Alpha_Stack_score,v1,FPD30,Overall,ALL
4,2023-01-30,2023-02-05,0.181818,Week,Sil_Alpha_Stack_score,v1,FPD30,Overall,ALL
5,2023-02-01,2023-02-28,0.181795,Month,Sil_Alpha_Stack_score,v1,FPD30,Overall,ALL
6,2023-02-06,2023-02-12,0.419355,Week,Sil_Alpha_Stack_score,v1,FPD30,Overall,ALL
7,2023-02-13,2023-02-19,-0.110512,Week,Sil_Alpha_Stack_score,v1,FPD30,Overall,ALL
8,2023-02-20,2023-02-26,0.009615,Week,Sil_Alpha_Stack_score,v1,FPD30,Overall,ALL
9,2023-02-27,2023-03-05,0.87234,Week,Sil_Alpha_Stack_score,v1,FPD30,Overall,ALL


In [288]:
f20 = f2[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Sil_Alpha_Stack_score_FPD30_gini']].copy()
f20.rename(columns={'Sil_Alpha_Stack_score_FPD30_gini':'FPD30'}, inplace = True)
f20['category'] = 'alpha_stack_model_sil_fpd30_v1_all'

In [289]:
f20.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD30,category
0,2023-01-01,2023-01-31,Month,Sil_Alpha_Stack_score,v1,FPD30,Overall,ALL,0.358431,alpha_stack_model_sil_fpd30_v1_all
1,2023-01-09,2023-01-15,Week,Sil_Alpha_Stack_score,v1,FPD30,Overall,ALL,0.718033,alpha_stack_model_sil_fpd30_v1_all
2,2023-01-16,2023-01-22,Week,Sil_Alpha_Stack_score,v1,FPD30,Overall,ALL,0.789474,alpha_stack_model_sil_fpd30_v1_all
3,2023-01-23,2023-01-29,Week,Sil_Alpha_Stack_score,v1,FPD30,Overall,ALL,0.107143,alpha_stack_model_sil_fpd30_v1_all
4,2023-01-30,2023-02-05,Week,Sil_Alpha_Stack_score,v1,FPD30,Overall,ALL,0.181818,alpha_stack_model_sil_fpd30_v1_all


## FSPD30

## Test

In [290]:
sq = """ 
with modelname as 
(
  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v1'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fspd_30 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory
0,3664338,a5872ed1-a008-45d6-b6e6-43d70c488255,60836643380018,0.0742009831785215,2025-09-04 19:38:34,2025-09-04,2025-09,Test,0,1,SIL ZERO,v1,ALL
1,3667324,be0913e6-ae5c-49f8-86e6-7017cf6cffe2,60836673240014,0.0859257227245609,2025-09-06 11:48:09,2025-09-06,2025-09,Test,0,1,SIL Competitor,v1,ALL
2,3667268,a40e54d5-2b2c-4191-a97f-d051504e75d6,60836672680013,0.0208208271284212,2025-09-06 11:22:34,2025-09-06,2025-09,Test,0,1,SIL-Instore,v1,ALL
3,3674553,3d7a381e-525f-4e35-b9b5-d4bda1f0ced9,60836745530018,0.0507355141854219,2025-09-09 16:37:50,2025-09-09,2025-09,Test,0,1,SIL Competitor,v1,ALL
4,3667051,f87795fd-f6d6-4dae-b637-d2c078d4908f,60836670510018,0.1256571988215703,2025-09-06 10:06:22,2025-09-06,2025-09,Test,0,1,SIL-Instore,v1,ALL


In [291]:
df1 = dfd.copy()

## Train

In [292]:
sq = """ 
  with modelname as 
  (  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v1'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fspd_30 = 1
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory
0,1976135,57ed1603-d85f-4e86-8722-5bcdd59cf96c,60819761350016,0.06326,2023-04-03 17:23:10,2023-04-03,2023-04,Train,0,1,SIL-Instore,v1,ALL
1,2120539,9100d829-0b32-4627-82bf-fcba7f5d2758,60821205390012,0.031276,2023-07-05 16:07:18,2023-07-05,2023-07,Train,0,1,SIL-Instore,v1,ALL
2,1940396,cd8857f1-76d8-442d-b33d-b724d867038e,60819403960016,0.081149,2023-03-12 18:56:30,2023-03-12,2023-03,Train,0,1,SIL-Instore,v1,ALL
3,2064936,c2f0c3d6-e7d2-4434-b3fd-1cce98dd6f7f,60820649360011,0.06982,2023-05-28 11:01:49,2023-05-28,2023-05,Train,0,1,SIL-Instore,v1,ALL
4,2008764,48c75694-889c-4605-a37c-ec34c08f74e2,60820087640015,0.113428,2023-04-22 14:55:59,2023-04-24,2023-04,Train,1,1,SIL-Instore,v1,ALL


In [293]:
df2 = dfd.copy()

In [294]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 161997 entries, 0 to 161996
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             161997 non-null  object        
 1   digitalLoanAccountId   161997 non-null  object        
 2   loanAccountNumber      161997 non-null  object        
 3   Sil_Alpha_Stack_score  161997 non-null  object        
 4   appln_submit_datetime  161997 non-null  datetime64[us]
 5   disbursementdate       161997 non-null  dbdate        
 6   Application_month      161997 non-null  object        
 7   Data_selection         161997 non-null  object        
 8   deffspd30              161997 non-null  Int64         
 9   flg_mature_fspd_30     161997 non-null  Int64         
 10  new_loan_type          161997 non-null  object        
 11  modelVersionId         161997 non-null  object        
 12  trenchCategory         161997 non-null  obje

In [295]:
df_concat['Sil_Alpha_Stack_score'] = pd.to_numeric(df_concat['Sil_Alpha_Stack_score'], errors='coerce')
# gini_results = calculate_periodic_gini(df_concat, 'Sil_Alpha_Stack_score', 'deffspd30', 'FSPD30')

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'Sil_Alpha_Stack_score', 
    'deffspd30', 
    'FSPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [296]:
f3=gini_results.copy()

In [297]:
f3.head(10)

Unnamed: 0,start_date,end_date,Sil_Alpha_Stack_score_FSPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2023-01-01,2023-01-31,0.258788,Month,Sil_Alpha_Stack_score,v1,FSPD30,Overall,ALL
1,2023-01-09,2023-01-15,0.418103,Week,Sil_Alpha_Stack_score,v1,FSPD30,Overall,ALL
2,2023-01-16,2023-01-22,0.789474,Week,Sil_Alpha_Stack_score,v1,FSPD30,Overall,ALL
3,2023-01-23,2023-01-29,0.032051,Week,Sil_Alpha_Stack_score,v1,FSPD30,Overall,ALL
4,2023-01-30,2023-02-05,0.143631,Week,Sil_Alpha_Stack_score,v1,FSPD30,Overall,ALL
5,2023-02-01,2023-02-28,0.117886,Month,Sil_Alpha_Stack_score,v1,FSPD30,Overall,ALL
6,2023-02-06,2023-02-12,0.362963,Week,Sil_Alpha_Stack_score,v1,FSPD30,Overall,ALL
7,2023-02-13,2023-02-19,-0.08061,Week,Sil_Alpha_Stack_score,v1,FSPD30,Overall,ALL
8,2023-02-20,2023-02-26,-0.02,Week,Sil_Alpha_Stack_score,v1,FSPD30,Overall,ALL
9,2023-02-27,2023-03-05,0.449612,Week,Sil_Alpha_Stack_score,v1,FSPD30,Overall,ALL


In [298]:
f30 = f3[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Sil_Alpha_Stack_score_FSPD30_gini']].copy()
f30.rename(columns={'Sil_Alpha_Stack_score_FSPD30_gini':'FSPD30'}, inplace = True)
f30['category'] = 'alpha_stack_model_sil_fspd30_v1_all'

In [299]:
f30.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FSPD30,category
0,2023-01-01,2023-01-31,Month,Sil_Alpha_Stack_score,v1,FSPD30,Overall,ALL,0.258788,alpha_stack_model_sil_fspd30_v1_all
1,2023-01-09,2023-01-15,Week,Sil_Alpha_Stack_score,v1,FSPD30,Overall,ALL,0.418103,alpha_stack_model_sil_fspd30_v1_all
2,2023-01-16,2023-01-22,Week,Sil_Alpha_Stack_score,v1,FSPD30,Overall,ALL,0.789474,alpha_stack_model_sil_fspd30_v1_all
3,2023-01-23,2023-01-29,Week,Sil_Alpha_Stack_score,v1,FSPD30,Overall,ALL,0.032051,alpha_stack_model_sil_fspd30_v1_all
4,2023-01-30,2023-02-05,Week,Sil_Alpha_Stack_score,v1,FSPD30,Overall,ALL,0.143631,alpha_stack_model_sil_fspd30_v1_all


## FSTPD30

## Test

In [300]:
sq = """ 
with modelname as 
(
  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v1'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fstpd_30 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory
0,3603221,67c928fa-e469-4471-b6fa-0ccfc459d6b5,60836032210012,0.1378389890116292,2025-08-06 15:08:17,2025-08-06,2025-08,Test,0,1,SIL-Instore,v1,ALL
1,3452144,e5ced157-e148-4f14-8843-9e9a78bd3455,60834521440015,0.1406512064560185,2025-05-22 19:37:58,2025-05-22,2025-05,Test,0,1,SIL-Instore,v1,ALL
2,3381741,64dc0d73-2ee4-402f-a011-75b2a711d917,60833817410015,0.1269618471306287,2025-04-15 18:26:12,2025-04-15,2025-04,Test,0,1,SIL-Instore,v1,ALL
3,3605049,20dc9554-bb7a-47fe-abbf-a79febabfaf3,60836050490016,0.0677952189315499,2025-08-07 13:28:53,2025-08-07,2025-08,Test,0,1,SIL-Instore,v1,ALL
4,3603759,838f536b-7d43-444a-b856-250535d2975f,60836037590011,0.1163170846421517,2025-08-06 17:59:33,2025-08-06,2025-08,Test,0,1,SIL-Instore,v1,ALL


In [301]:
df1 = dfd.copy()

## Train

In [302]:
sq = """ 
  with modelname as 
  (  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v1'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fstpd_30 = 1
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory
0,1906261,aa42d0e6-aca4-4917-a065-8cfdf17ccc0d,60819062610016,0.097471,2023-02-18 14:49:12,2023-02-18,2023-02,Train,0,1,SIL-Instore,v1,ALL
1,1891979,07d71b05-eebb-4e3f-b2c9-77a057d91539,60818919790016,0.099913,2023-02-08 16:19:56,2023-02-08,2023-02,Train,0,1,SIL-Instore,v1,ALL
2,1861570,38bad92e-ad5c-4d5e-b1b5-7e4add8c233c,60818615700011,0.100869,2023-01-14 15:41:55,2023-01-15,2023-01,Train,0,1,SIL-Instore,v1,ALL
3,2120659,065b5161-0246-4da2-abf7-4b2dd21dc03d,60821206590019,0.059746,2023-07-05 17:19:01,2023-07-05,2023-07,Train,0,1,SIL-Instore,v1,ALL
4,1416748,78e01765-e029-4f14-a04d-75c576ad66f2,60814167480027,0.14059,2023-04-17 18:23:51,2023-04-17,2023-04,Train,0,1,SIL-Instore,v1,ALL


In [303]:
df2 = dfd.copy()

In [304]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 152436 entries, 0 to 152435
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             152436 non-null  object        
 1   digitalLoanAccountId   152436 non-null  object        
 2   loanAccountNumber      152436 non-null  object        
 3   Sil_Alpha_Stack_score  152436 non-null  object        
 4   appln_submit_datetime  152436 non-null  datetime64[us]
 5   disbursementdate       152436 non-null  dbdate        
 6   Application_month      152436 non-null  object        
 7   Data_selection         152436 non-null  object        
 8   deffstpd30             152436 non-null  Int64         
 9   flg_mature_fstpd_30    152436 non-null  Int64         
 10  new_loan_type          152436 non-null  object        
 11  modelVersionId         152436 non-null  object        
 12  trenchCategory         152436 non-null  obje

In [305]:
df_concat['Sil_Alpha_Stack_score'] = pd.to_numeric(df_concat['Sil_Alpha_Stack_score'], errors='coerce')
# gini_results = calculate_periodic_gini(df_concat, 'Sil_Alpha_Stack_score', 'deffstpd30', 'FSTPD30')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'Sil_Alpha_Stack_score', 
    'deffstpd30', 
    'FSTPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [306]:
f4=gini_results.copy()

In [307]:
f4.head(10)

Unnamed: 0,start_date,end_date,Sil_Alpha_Stack_score_FSTPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2023-01-01,2023-01-31,0.265602,Month,Sil_Alpha_Stack_score,v1,FSTPD30,Overall,ALL
1,2023-01-09,2023-01-15,0.302479,Week,Sil_Alpha_Stack_score,v1,FSTPD30,Overall,ALL
2,2023-01-16,2023-01-22,0.575758,Week,Sil_Alpha_Stack_score,v1,FSTPD30,Overall,ALL
3,2023-01-23,2023-01-29,0.157895,Week,Sil_Alpha_Stack_score,v1,FSTPD30,Overall,ALL
4,2023-01-30,2023-02-05,0.324561,Week,Sil_Alpha_Stack_score,v1,FSTPD30,Overall,ALL
5,2023-02-01,2023-02-28,0.202272,Month,Sil_Alpha_Stack_score,v1,FSTPD30,Overall,ALL
6,2023-02-06,2023-02-12,0.393103,Week,Sil_Alpha_Stack_score,v1,FSTPD30,Overall,ALL
7,2023-02-13,2023-02-19,0.028,Week,Sil_Alpha_Stack_score,v1,FSTPD30,Overall,ALL
8,2023-02-20,2023-02-26,-0.036458,Week,Sil_Alpha_Stack_score,v1,FSTPD30,Overall,ALL
9,2023-02-27,2023-03-05,0.347561,Week,Sil_Alpha_Stack_score,v1,FSTPD30,Overall,ALL


In [308]:
f40 = f4[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Sil_Alpha_Stack_score_FSTPD30_gini']].copy()
f40.rename(columns={'Sil_Alpha_Stack_score_FSTPD30_gini':'FSTPD30'}, inplace = True)
f40['category'] = 'alpha_stack_model_sil_fstpd30_v1_all'

In [309]:
f40.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FSTPD30,category
0,2023-01-01,2023-01-31,Month,Sil_Alpha_Stack_score,v1,FSTPD30,Overall,ALL,0.265602,alpha_stack_model_sil_fstpd30_v1_all
1,2023-01-09,2023-01-15,Week,Sil_Alpha_Stack_score,v1,FSTPD30,Overall,ALL,0.302479,alpha_stack_model_sil_fstpd30_v1_all
2,2023-01-16,2023-01-22,Week,Sil_Alpha_Stack_score,v1,FSTPD30,Overall,ALL,0.575758,alpha_stack_model_sil_fstpd30_v1_all
3,2023-01-23,2023-01-29,Week,Sil_Alpha_Stack_score,v1,FSTPD30,Overall,ALL,0.157895,alpha_stack_model_sil_fstpd30_v1_all
4,2023-01-30,2023-02-05,Week,Sil_Alpha_Stack_score,v1,FSTPD30,Overall,ALL,0.324561,alpha_stack_model_sil_fstpd30_v1_all


## combining the dataframe

In [310]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type','trench_category','bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'Sil_Alpha_Stack_score_FPD0_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'trench_category', 'Sil_Alpha_Stack_score_FPD10_gini',
       'Sil_Alpha_Stack_score_FPD30_gini',
       'Sil_Alpha_Stack_score_FSPD30_gini',
       'Sil_Alpha_Stack_score_FSTPD30_gini'], dtype=object)

In [311]:
final_df.rename(columns={'trench_category':'Trench_category', 
'Sil_Alpha_Stack_score_FPD0_gini':'Sil_Alpha_Stack_score_FPD0_v1_all_gini',
'Sil_Alpha_Stack_score_FPD10_gini':'Sil_Alpha_Stack_score_FPD10_v1_all_gini',
'Sil_Alpha_Stack_score_FPD30_gini':'Sil_Alpha_Stack_score_FPD30_v1_all_gini',
'Sil_Alpha_Stack_score_FSPD30_gini':'Sil_Alpha_Stack_score_FSPD30_v1_all_gini', 
'Sil_Alpha_Stack_score_FSTPD30_gini':'Sil_Alpha_Stack_score_FSTPD30_v1_all_gini'                         
}, inplace=True)
final_df.columns

Index(['start_date', 'end_date', 'Sil_Alpha_Stack_score_FPD0_v1_all_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'Trench_category', 'Sil_Alpha_Stack_score_FPD10_v1_all_gini',
       'Sil_Alpha_Stack_score_FPD30_v1_all_gini',
       'Sil_Alpha_Stack_score_FSPD30_v1_all_gini',
       'Sil_Alpha_Stack_score_FSTPD30_v1_all_gini'],
      dtype='object')

In [312]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'loan_type','bad_rate'
,'Sil_Alpha_Stack_score_FPD0_v1_all_gini',
'Sil_Alpha_Stack_score_FPD10_v1_all_gini',
'Sil_Alpha_Stack_score_FPD30_v1_all_gini',  
'Sil_Alpha_Stack_score_FSPD30_v1_all_gini', 
'Sil_Alpha_Stack_score_FSTPD30_v1_all_gini']].copy()
final_df['Trench_category'] = 'All'
final_df['Model_display_name'] = 'alpha_stack_model_sil'
final_df['Product_type'] = 'SIL'
final_df.dtypes

start_date                                   datetime64[ns]
end_date                                     datetime64[ns]
period                                               object
Model_Name                                           object
version                                              object
loan_type                                            object
bad_rate                                             object
Sil_Alpha_Stack_score_FPD0_v1_all_gini              float64
Sil_Alpha_Stack_score_FPD10_v1_all_gini             float64
Sil_Alpha_Stack_score_FPD30_v1_all_gini             float64
Sil_Alpha_Stack_score_FSPD30_v1_all_gini            float64
Sil_Alpha_Stack_score_FSTPD30_v1_all_gini           float64
Trench_category                                      object
Model_display_name                                   object
Product_type                                         object
dtype: object

In [313]:
final_df.head() 

Unnamed: 0,start_date,end_date,period,Model_Name,version,loan_type,bad_rate,Sil_Alpha_Stack_score_FPD0_v1_all_gini,Sil_Alpha_Stack_score_FPD10_v1_all_gini,Sil_Alpha_Stack_score_FPD30_v1_all_gini,Sil_Alpha_Stack_score_FSPD30_v1_all_gini,Sil_Alpha_Stack_score_FSTPD30_v1_all_gini,Trench_category,Model_display_name,Product_type
0,2023-01-01,2023-01-31,Month,Sil_Alpha_Stack_score,v1,Overall,FPD0,0.252502,,,,,All,alpha_stack_model_sil,SIL
1,2023-01-09,2023-01-15,Week,Sil_Alpha_Stack_score,v1,Overall,FPD0,0.343407,,,,,All,alpha_stack_model_sil,SIL
2,2023-01-16,2023-01-22,Week,Sil_Alpha_Stack_score,v1,Overall,FPD0,0.660714,,,,,All,alpha_stack_model_sil,SIL
3,2023-01-23,2023-01-29,Week,Sil_Alpha_Stack_score,v1,Overall,FPD0,0.113514,,,,,All,alpha_stack_model_sil,SIL
4,2023-01-30,2023-02-05,Week,Sil_Alpha_Stack_score,v1,Overall,FPD0,0.182724,,,,,All,alpha_stack_model_sil,SIL


In [314]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.alpha_stack_model_sil_v1_all_gini_v1"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=a4e79772-0ee2-4846-9034-47f1c930d26e>

In [315]:
import functools

dataframes = [f01, f10, f20, f30, f40]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type','trench_category', 'bad_rate', 'category']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

alphastacksilv1all = functools.reduce(merge_dataframes, dataframes)

In [316]:
alphastacksilv1all.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Sil_Alpha_Stack_score,v1,FPD0,Overall,ALL,0.252502,alpha_stack_model_sil_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Sil_Alpha_Stack_score,v1,FPD0,Overall,ALL,0.343407,alpha_stack_model_sil_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Sil_Alpha_Stack_score,v1,FPD0,Overall,ALL,0.660714,alpha_stack_model_sil_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Sil_Alpha_Stack_score,v1,FPD0,Overall,ALL,0.113514,alpha_stack_model_sil_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Sil_Alpha_Stack_score,v1,FPD0,Overall,ALL,0.182724,alpha_stack_model_sil_fpd0_v1_all,,,,


In [317]:
result = pd.concat([cicsilscorev1all, cicsilscorev2t1, cicsilscorev2t2, cicsilscorev2t3, alphastacksilv1all], ignore_index=True)
print(f"The shape of the concatenated dataframe result is:\t {result.shape}")
result.head()

The shape of the concatenated dataframe result is:	 (8735, 14)


Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


In [318]:
result[['Model_Name', 'version', 'category']].value_counts(dropna=False)

Model_Name             version  category                            
Alpha_cic_sil_score    v1       cic_sil_score_fpd0_v1_all               508
Sil_Alpha_Stack_score  v1       alpha_stack_model_sil_fpd0_v1_all       508
Alpha_cic_sil_score    v1       cic_sil_score_fpd10_v1_all              496
Sil_Alpha_Stack_score  v1       alpha_stack_model_sil_fpd10_v1_all      496
Alpha_cic_sil_score    v1       cic_sil_score_fpd30_v1_all              488
Sil_Alpha_Stack_score  v1       alpha_stack_model_sil_fpd30_v1_all      488
Alpha_cic_sil_score    v1       cic_sil_score_fspd30_v1_all             468
Sil_Alpha_Stack_score  v1       alpha_stack_model_sil_fspd30_v1_all     468
                                alpha_stack_model_sil_fstpd30_v1_all    444
Alpha_cic_sil_score    v1       cic_sil_score_fstpd30_v1_all            444
                       v2       cic_sil_score_fpd10_v2_t1               277
                                cic_sil_score_fpd0_v2_t1                277
                   

In [319]:
masterdf = result.copy()
masterdf.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


##### V2

##### Trench 1

## FPD0

## Test

In [320]:
sq = """ 
with modelname as 
(
  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
        case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  deffpd0,
  flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and flg_mature_fpd0 = 1
  and trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory


In [321]:
df1 = dfd.copy()

## Train

In [322]:
sq = """ 
  with modelname as 
  (  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    deffpd0,
  flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and flg_mature_fpd0 = 1
  and trenchCategory = 'Trench 1'
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory
0,3206286,6b2d3c9c-efd4-4d62-a4d3-db28826ffd7a,60832062860017,0.507937,2025-11-15 10:23:43,2025-01-21,2025-11,Train,0,1,SIL-Instore,v2,Trench 1
1,2814450,bec8e418-3bfd-4b2d-8101-4ce87de7695e,60828144500017,0.452499,2024-09-01 15:01:19,2024-09-01,2024-09,Train,1,1,SIL ZERO,v2,Trench 1
2,2857531,00d400a3-b41e-43af-8939-7c002cc15e7e,60828575310019,0.821519,2024-09-18 13:38:26,2024-09-18,2024-09,Train,1,1,SIL ZERO,v2,Trench 1
3,2828930,733e6327-f0c9-4ff3-874f-f85eed70c4a2,60828289300019,0.639456,2024-09-07 17:54:03,2024-09-07,2024-09,Train,0,1,SIL ZERO,v2,Trench 1
4,2874011,9c6f5e53-15c5-4a7c-9cfc-9c519fced96a,60828740110018,0.174391,2024-09-23 15:06:21,2024-09-23,2024-09,Train,0,1,SIL ZERO,v2,Trench 1


In [323]:
df2 = dfd.copy()

In [324]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 152093 entries, 0 to 152092
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             152093 non-null  Int64         
 1   digitalLoanAccountId   152093 non-null  object        
 2   loanAccountNumber      152093 non-null  object        
 3   Sil_Alpha_Stack_score  152093 non-null  float64       
 4   appln_submit_datetime  152093 non-null  datetime64[us]
 5   disbursementdate       152093 non-null  dbdate        
 6   Application_month      152093 non-null  object        
 7   Data_selection         152093 non-null  object        
 8   deffpd0                152093 non-null  Int64         
 9   flg_mature_fpd0        152093 non-null  Int64         
 10  new_loan_type          152093 non-null  object        
 11  modelVersionId         152093 non-null  object        
 12  trenchCategory         152093 non-null  obje

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [325]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['Sil_Alpha_Stack_score'] = pd.to_numeric(df_concat['Sil_Alpha_Stack_score'], errors='coerce')

In [326]:
df_concat.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory
0,3206286,6b2d3c9c-efd4-4d62-a4d3-db28826ffd7a,60832062860017,0.507937,2025-11-15 10:23:43,2025-01-21,2025-11,Train,0,1,SIL-Instore,v2,Trench 1
1,2814450,bec8e418-3bfd-4b2d-8101-4ce87de7695e,60828144500017,0.452499,2024-09-01 15:01:19,2024-09-01,2024-09,Train,1,1,SIL ZERO,v2,Trench 1
2,2857531,00d400a3-b41e-43af-8939-7c002cc15e7e,60828575310019,0.821519,2024-09-18 13:38:26,2024-09-18,2024-09,Train,1,1,SIL ZERO,v2,Trench 1
3,2828930,733e6327-f0c9-4ff3-874f-f85eed70c4a2,60828289300019,0.639456,2024-09-07 17:54:03,2024-09-07,2024-09,Train,0,1,SIL ZERO,v2,Trench 1
4,2874011,9c6f5e53-15c5-4a7c-9cfc-9c519fced96a,60828740110018,0.174391,2024-09-23 15:06:21,2024-09-23,2024-09,Train,0,1,SIL ZERO,v2,Trench 1


In [327]:
# gini_results = calculate_periodic_gini(df_concat, 'Alpha_cic_sil_score', 'deffpd0', 'FPD0')

# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'Alpha_cic_sil_score', 
#     'deffpd0', 
#     'FPD0',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'Sil_Alpha_Stack_score', 
    'deffpd0', 
    'FPD0',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [328]:
f0 = gini_results.copy()
f0.head()

Unnamed: 0,start_date,end_date,Sil_Alpha_Stack_score_FPD0_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-08-26,2024-09-01,0.416175,Week,Sil_Alpha_Stack_score,v2,FPD0,Overall,Trench 1
1,2024-09-01,2024-09-30,0.343366,Month,Sil_Alpha_Stack_score,v2,FPD0,Overall,Trench 1
2,2024-09-02,2024-09-08,0.341003,Week,Sil_Alpha_Stack_score,v2,FPD0,Overall,Trench 1
3,2024-09-09,2024-09-15,0.338116,Week,Sil_Alpha_Stack_score,v2,FPD0,Overall,Trench 1
4,2024-09-16,2024-09-22,0.330955,Week,Sil_Alpha_Stack_score,v2,FPD0,Overall,Trench 1


In [329]:
f0.columns

Index(['start_date', 'end_date', 'Sil_Alpha_Stack_score_FPD0_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category'],
      dtype='object')

In [330]:
f01 = f0[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Sil_Alpha_Stack_score_FPD0_gini']].copy()
f01.rename(columns={'Sil_Alpha_Stack_score_FPD0_gini':'FPD0'}, inplace = True)
f01['category'] = 'alpha_stack_model_sil_fpd0_v2_t1'

In [331]:
f0.groupby(['loan_type', 'version', 'trench_category']).size()

loan_type       version  trench_category
Overall         v2       Trench 1           75
SIL Competitor  v2       Trench 1           62
SIL ZERO        v2       Trench 1           75
SIL-Instore     v2       Trench 1           74
dtype: int64

In [332]:
f01.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category
0,2024-08-26,2024-09-01,Week,Sil_Alpha_Stack_score,v2,FPD0,Overall,Trench 1,0.416175,alpha_stack_model_sil_fpd0_v2_t1
1,2024-09-01,2024-09-30,Month,Sil_Alpha_Stack_score,v2,FPD0,Overall,Trench 1,0.343366,alpha_stack_model_sil_fpd0_v2_t1
2,2024-09-02,2024-09-08,Week,Sil_Alpha_Stack_score,v2,FPD0,Overall,Trench 1,0.341003,alpha_stack_model_sil_fpd0_v2_t1
3,2024-09-09,2024-09-15,Week,Sil_Alpha_Stack_score,v2,FPD0,Overall,Trench 1,0.338116,alpha_stack_model_sil_fpd0_v2_t1
4,2024-09-16,2024-09-22,Week,Sil_Alpha_Stack_score,v2,FPD0,Overall,Trench 1,0.330955,alpha_stack_model_sil_fpd0_v2_t1


## FPD10

## Test

In [333]:
sq = """ 
with modelname as 
(
  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fpd10 = 1
  and trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory


In [334]:
df1 = dfd.copy()

## Train

In [335]:
sq = """ 
  with modelname as 
  (  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffpd10,
  del.flg_mature_fpd10,
    loanmaster.new_loan_type,
    modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fpd10 = 1
  and trenchCategory = 'Trench 1'
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory
0,3206286,6b2d3c9c-efd4-4d62-a4d3-db28826ffd7a,60832062860017,0.507937,2025-11-15 10:23:43,2025-01-21,2025-11,Train,0,1,SIL-Instore,v2,Trench 1
1,2830422,87214b63-3e4c-431f-9791-37e40ce4664b,60828304220017,0.404952,2024-09-08 13:21:20,2024-09-08,2024-09,Train,0,1,SIL-Instore,v2,Trench 1
2,2885726,935df5d0-8866-4210-968f-f02ef95d3007,60828857260015,0.568998,2024-09-27 14:00:22,2024-09-27,2024-09,Train,0,1,SIL-Instore,v2,Trench 1
3,2813156,ece40240-f67d-4079-a0ea-1c668a1a046b,60828131560011,0.537611,2024-09-01 10:29:25,2024-09-01,2024-09,Train,1,1,SIL-Instore,v2,Trench 1
4,2868621,283c0b51-fcb5-4624-84ec-850da4501a72,60828686210019,0.493953,2024-09-22 08:54:05,2024-09-22,2024-09,Train,0,1,SIL-Instore,v2,Trench 1


In [336]:
df2 = dfd.copy()

In [337]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 152093 entries, 0 to 152092
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             152093 non-null  Int64         
 1   digitalLoanAccountId   152093 non-null  object        
 2   loanAccountNumber      152093 non-null  object        
 3   Sil_Alpha_Stack_score  152093 non-null  float64       
 4   appln_submit_datetime  152093 non-null  datetime64[us]
 5   disbursementdate       152093 non-null  dbdate        
 6   Application_month      152093 non-null  object        
 7   Data_selection         152093 non-null  object        
 8   deffpd10               152093 non-null  Int64         
 9   flg_mature_fpd10       152093 non-null  Int64         
 10  new_loan_type          152093 non-null  object        
 11  modelVersionId         152093 non-null  object        
 12  trenchCategory         152093 non-null  obje

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [338]:
df_concat['trenchCategory'].value_counts(dropna=False)

trenchCategory
Trench 1    152093
Name: count, dtype: int64

In [339]:
df_concat['Sil_Alpha_Stack_score'] = pd.to_numeric(df_concat['Sil_Alpha_Stack_score'], errors='coerce')
# gini_results = calculate_periodic_gini(df_concat, 'Sil_Alpha_Stack_score', 'deffpd10', 'FPD10')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'Sil_Alpha_Stack_score', 
    'deffpd10', 
    'FPD10',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [340]:
f1=gini_results.copy()

In [341]:
f1.head(10)

Unnamed: 0,start_date,end_date,Sil_Alpha_Stack_score_FPD10_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-08-26,2024-09-01,0.56121,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 1
1,2024-09-01,2024-09-30,0.451286,Month,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 1
2,2024-09-02,2024-09-08,0.469647,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 1
3,2024-09-09,2024-09-15,0.46499,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 1
4,2024-09-16,2024-09-22,0.385632,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 1
5,2024-09-23,2024-09-29,0.462157,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 1
6,2024-09-30,2024-10-06,0.430138,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 1
7,2024-10-01,2024-10-31,0.468474,Month,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 1
8,2024-10-07,2024-10-13,0.46592,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 1
9,2024-10-14,2024-10-20,0.547029,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 1


In [342]:
f10 = f1[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Sil_Alpha_Stack_score_FPD10_gini']].copy()
f10.rename(columns={'Sil_Alpha_Stack_score_FPD10_gini':'FPD10'}, inplace = True)
f10['category'] = 'alpha_stack_model_sil_fpd10_v2_t1'

In [343]:
f10.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD10,category
0,2024-08-26,2024-09-01,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 1,0.56121,alpha_stack_model_sil_fpd10_v2_t1
1,2024-09-01,2024-09-30,Month,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 1,0.451286,alpha_stack_model_sil_fpd10_v2_t1
2,2024-09-02,2024-09-08,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 1,0.469647,alpha_stack_model_sil_fpd10_v2_t1
3,2024-09-09,2024-09-15,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 1,0.46499,alpha_stack_model_sil_fpd10_v2_t1
4,2024-09-16,2024-09-22,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 1,0.385632,alpha_stack_model_sil_fpd10_v2_t1


## FPD30

## Test

In [344]:
sq = """ 
with modelname as 
(
  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
       case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fpd30 = 1
  and trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory


In [345]:
df1 = dfd.copy()

## Train

In [346]:
sq = """ 
  with modelname as 
  (  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fpd30 = 1
  and trenchCategory = 'Trench 1'
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory
0,3206286,6b2d3c9c-efd4-4d62-a4d3-db28826ffd7a,60832062860017,0.507937,2025-11-15 10:23:43,2025-01-21,2025-11,Train,0,1,SIL-Instore,v2,Trench 1
1,2867428,4c69e99b-9611-415e-bd30-a528007601eb,60828674280016,0.08146,2024-09-21 18:05:23,2024-09-21,2024-09,Train,0,1,SIL-Instore,v2,Trench 1
2,2828230,66f6d89a-ed01-4acb-bba7-edede135d506,60828282300018,0.617658,2024-09-07 14:39:02,2024-09-07,2024-09,Train,0,1,SIL ZERO,v2,Trench 1
3,2829270,2fbee2d6-dbef-4ddb-bd38-dfc8c4e482db,60828292700011,0.788095,2024-09-07 20:29:51,2024-09-07,2024-09,Train,0,1,SIL-Instore,v2,Trench 1
4,2869067,ada7caa2-f0b6-4453-ae39-5dbe60eb61be,60828690670019,0.663884,2024-09-22 11:05:32,2024-09-22,2024-09,Train,1,1,SIL ZERO,v2,Trench 1


In [347]:
df2 = dfd.copy()

In [348]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 148817 entries, 0 to 148816
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             148817 non-null  Int64         
 1   digitalLoanAccountId   148817 non-null  object        
 2   loanAccountNumber      148817 non-null  object        
 3   Sil_Alpha_Stack_score  148817 non-null  float64       
 4   appln_submit_datetime  148817 non-null  datetime64[us]
 5   disbursementdate       148817 non-null  dbdate        
 6   Application_month      148817 non-null  object        
 7   Data_selection         148817 non-null  object        
 8   deffpd30               148817 non-null  Int64         
 9   flg_mature_fpd30       148817 non-null  Int64         
 10  new_loan_type          148817 non-null  object        
 11  modelVersionId         148817 non-null  object        
 12  trenchCategory         148817 non-null  obje

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [349]:
df_concat['Sil_Alpha_Stack_score'] = pd.to_numeric(df_concat['Sil_Alpha_Stack_score'], errors='coerce')
# gini_results = calculate_periodic_gini(df_concat, 'Sil_Alpha_Stack_score', 'deffpd30', 'FPD30')

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'Sil_Alpha_Stack_score', 
    'deffpd30', 
    'FPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [350]:
f2=gini_results.copy()

In [351]:
f2.head(10)

Unnamed: 0,start_date,end_date,Sil_Alpha_Stack_score_FPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-08-26,2024-09-01,0.669121,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 1
1,2024-09-01,2024-09-30,0.480352,Month,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 1
2,2024-09-02,2024-09-08,0.480446,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 1
3,2024-09-09,2024-09-15,0.503425,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 1
4,2024-09-16,2024-09-22,0.405969,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 1
5,2024-09-23,2024-09-29,0.494087,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 1
6,2024-09-30,2024-10-06,0.48058,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 1
7,2024-10-01,2024-10-31,0.499822,Month,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 1
8,2024-10-07,2024-10-13,0.514678,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 1
9,2024-10-14,2024-10-20,0.55459,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 1


In [352]:
f20 = f2[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Sil_Alpha_Stack_score_FPD30_gini']].copy()
f20.rename(columns={'Sil_Alpha_Stack_score_FPD30_gini':'FPD30'}, inplace = True)
f20['category'] = 'alpha_stack_model_sil_fpd30_v2_t1'

In [353]:
f20.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD30,category
0,2024-08-26,2024-09-01,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 1,0.669121,alpha_stack_model_sil_fpd30_v2_t1
1,2024-09-01,2024-09-30,Month,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 1,0.480352,alpha_stack_model_sil_fpd30_v2_t1
2,2024-09-02,2024-09-08,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 1,0.480446,alpha_stack_model_sil_fpd30_v2_t1
3,2024-09-09,2024-09-15,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 1,0.503425,alpha_stack_model_sil_fpd30_v2_t1
4,2024-09-16,2024-09-22,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 1,0.405969,alpha_stack_model_sil_fpd30_v2_t1


## FSPD30

## Test

In [354]:
sq = """ 
with modelname as 
(
  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fspd_30 = 1
  and trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory


In [355]:
df1 = dfd.copy()

## Train

In [356]:
sq = """ 
  with modelname as 
  (  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fspd_30 = 1
  and trenchCategory = 'Trench 1'
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory
0,3206286,6b2d3c9c-efd4-4d62-a4d3-db28826ffd7a,60832062860017,0.507937,2025-11-15 10:23:43,2025-01-21,2025-11,Train,0,1,SIL-Instore,v2,Trench 1
1,2820364,45401557-7aa9-4eb0-9a3f-ce30ad2421e8,60828203640019,0.764233,2024-09-03 18:11:12,2024-09-03,2024-09,Train,1,1,SIL-Instore,v2,Trench 1
2,2825988,a0341700-9a88-40f3-876e-9743d2a6e1a6,60828259880015,0.659743,2024-09-06 15:14:12,2024-09-06,2024-09,Train,1,1,SIL-Instore,v2,Trench 1
3,2813866,8f67b668-147d-4893-abb3-55013f856d80,60828138660013,0.582195,2024-09-01 13:10:56,2024-09-01,2024-09,Train,1,1,SIL-Instore,v2,Trench 1
4,2885845,bf77ebaf-cd33-4faf-a614-125e52380964,60828858450011,0.778172,2024-09-27 14:33:40,2024-09-27,2024-09,Train,0,1,SIL-Instore,v2,Trench 1


In [357]:
df2 = dfd.copy()

In [358]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 138821 entries, 0 to 138820
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             138821 non-null  Int64         
 1   digitalLoanAccountId   138821 non-null  object        
 2   loanAccountNumber      138821 non-null  object        
 3   Sil_Alpha_Stack_score  138821 non-null  float64       
 4   appln_submit_datetime  138821 non-null  datetime64[us]
 5   disbursementdate       138821 non-null  dbdate        
 6   Application_month      138821 non-null  object        
 7   Data_selection         138821 non-null  object        
 8   deffspd30              138821 non-null  Int64         
 9   flg_mature_fspd_30     138821 non-null  Int64         
 10  new_loan_type          138821 non-null  object        
 11  modelVersionId         138821 non-null  object        
 12  trenchCategory         138821 non-null  obje

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [359]:
df_concat['Sil_Alpha_Stack_score'] = pd.to_numeric(df_concat['Sil_Alpha_Stack_score'], errors='coerce')
# gini_results = calculate_periodic_gini(df_concat, 'Sil_Alpha_Stack_score', 'deffspd30', 'FSPD30')

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'Sil_Alpha_Stack_score', 
    'deffspd30', 
    'FSPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [360]:
f3=gini_results.copy()

In [361]:
f3.head(10)

Unnamed: 0,start_date,end_date,Sil_Alpha_Stack_score_FSPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-08-26,2024-09-01,0.611959,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 1
1,2024-09-01,2024-09-30,0.489682,Month,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 1
2,2024-09-02,2024-09-08,0.540785,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 1
3,2024-09-09,2024-09-15,0.467597,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 1
4,2024-09-16,2024-09-22,0.430687,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 1
5,2024-09-23,2024-09-29,0.493013,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 1
6,2024-09-30,2024-10-06,0.498793,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 1
7,2024-10-01,2024-10-31,0.502221,Month,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 1
8,2024-10-07,2024-10-13,0.523623,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 1
9,2024-10-14,2024-10-20,0.509917,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 1


In [362]:
f30 = f3[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Sil_Alpha_Stack_score_FSPD30_gini']].copy()
f30.rename(columns={'Sil_Alpha_Stack_score_FSPD30_gini':'FSPD30'}, inplace = True)
f30['category'] = 'alpha_stack_model_sil_fspd30_v2_t1'

In [363]:
f30.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FSPD30,category
0,2024-08-26,2024-09-01,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 1,0.611959,alpha_stack_model_sil_fspd30_v2_t1
1,2024-09-01,2024-09-30,Month,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 1,0.489682,alpha_stack_model_sil_fspd30_v2_t1
2,2024-09-02,2024-09-08,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 1,0.540785,alpha_stack_model_sil_fspd30_v2_t1
3,2024-09-09,2024-09-15,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 1,0.467597,alpha_stack_model_sil_fspd30_v2_t1
4,2024-09-16,2024-09-22,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 1,0.430687,alpha_stack_model_sil_fspd30_v2_t1


## FSTPD30

## Test

In [364]:
sq = """ 
with modelname as 
(
  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fstpd_30 = 1
  and trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory


In [365]:
df1 = dfd.copy()

## Train

In [366]:
sq = """ 
  with modelname as 
  (  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fstpd_30 = 1
  and trenchCategory = 'Trench 1'
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory
0,3206286,6b2d3c9c-efd4-4d62-a4d3-db28826ffd7a,60832062860017,0.507937,2025-11-15 10:23:43,2025-01-21,2025-11,Train,0,1,SIL-Instore,v2,Trench 1
1,2814450,bec8e418-3bfd-4b2d-8101-4ce87de7695e,60828144500017,0.452499,2024-09-01 15:01:19,2024-09-01,2024-09,Train,1,1,SIL ZERO,v2,Trench 1
2,2857531,00d400a3-b41e-43af-8939-7c002cc15e7e,60828575310019,0.821519,2024-09-18 13:38:26,2024-09-18,2024-09,Train,1,1,SIL ZERO,v2,Trench 1
3,2828930,733e6327-f0c9-4ff3-874f-f85eed70c4a2,60828289300019,0.639456,2024-09-07 17:54:03,2024-09-07,2024-09,Train,0,1,SIL ZERO,v2,Trench 1
4,2874011,9c6f5e53-15c5-4a7c-9cfc-9c519fced96a,60828740110018,0.174391,2024-09-23 15:06:21,2024-09-23,2024-09,Train,0,1,SIL ZERO,v2,Trench 1


In [367]:
df2 = dfd.copy()

In [368]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 128176 entries, 0 to 128175
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             128176 non-null  Int64         
 1   digitalLoanAccountId   128176 non-null  object        
 2   loanAccountNumber      128176 non-null  object        
 3   Sil_Alpha_Stack_score  128176 non-null  float64       
 4   appln_submit_datetime  128176 non-null  datetime64[us]
 5   disbursementdate       128176 non-null  dbdate        
 6   Application_month      128176 non-null  object        
 7   Data_selection         128176 non-null  object        
 8   deffstpd30             128176 non-null  Int64         
 9   flg_mature_fstpd_30    128176 non-null  Int64         
 10  new_loan_type          128176 non-null  object        
 11  modelVersionId         128176 non-null  object        
 12  trenchCategory         128176 non-null  obje

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [369]:
df_concat['Sil_Alpha_Stack_score'] = pd.to_numeric(df_concat['Sil_Alpha_Stack_score'], errors='coerce')
# gini_results = calculate_periodic_gini(df_concat, 'Sil_Alpha_Stack_score', 'deffstpd30', 'FSTPD30')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'Sil_Alpha_Stack_score', 
    'deffstpd30', 
    'FSTPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [370]:
f4=gini_results.copy()

In [371]:
f4.head(10)

Unnamed: 0,start_date,end_date,Sil_Alpha_Stack_score_FSTPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-08-26,2024-09-01,0.520009,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 1
1,2024-09-01,2024-09-30,0.470711,Month,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 1
2,2024-09-02,2024-09-08,0.515699,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 1
3,2024-09-09,2024-09-15,0.431272,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 1
4,2024-09-16,2024-09-22,0.443845,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 1
5,2024-09-23,2024-09-29,0.477663,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 1
6,2024-09-30,2024-10-06,0.449542,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 1
7,2024-10-01,2024-10-31,0.452209,Month,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 1
8,2024-10-07,2024-10-13,0.485459,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 1
9,2024-10-14,2024-10-20,0.456156,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 1


In [372]:
f40 = f4[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Sil_Alpha_Stack_score_FSTPD30_gini']].copy()
f40.rename(columns={'Sil_Alpha_Stack_score_FSTPD30_gini':'FSTPD30'}, inplace = True)
f40['category'] = 'alpha_stack_model_sil_fstpd30_v2_t1'

In [373]:
f40.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FSTPD30,category
0,2024-08-26,2024-09-01,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 1,0.520009,alpha_stack_model_sil_fstpd30_v2_t1
1,2024-09-01,2024-09-30,Month,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 1,0.470711,alpha_stack_model_sil_fstpd30_v2_t1
2,2024-09-02,2024-09-08,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 1,0.515699,alpha_stack_model_sil_fstpd30_v2_t1
3,2024-09-09,2024-09-15,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 1,0.431272,alpha_stack_model_sil_fstpd30_v2_t1
4,2024-09-16,2024-09-22,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 1,0.443845,alpha_stack_model_sil_fstpd30_v2_t1


## combining the dataframe

In [374]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type','trench_category','bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'Sil_Alpha_Stack_score_FPD0_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'trench_category', 'Sil_Alpha_Stack_score_FPD10_gini',
       'Sil_Alpha_Stack_score_FPD30_gini',
       'Sil_Alpha_Stack_score_FSPD30_gini',
       'Sil_Alpha_Stack_score_FSTPD30_gini'], dtype=object)

In [375]:
final_df.rename(columns={'trench_category':'Trench_category', 
'Sil_Alpha_Stack_score_FPD0_gini':'Sil_Alpha_Stack_score_FPD0_v2_t1_gini',
'Sil_Alpha_Stack_score_FPD10_gini':'Sil_Alpha_Stack_score_FPD10_v2_t1_gini',
'Sil_Alpha_Stack_score_FPD30_gini':'Sil_Alpha_Stack_score_FPD30_v2_t1_gini',
'Sil_Alpha_Stack_score_FSPD30_gini':'Sil_Alpha_Stack_score_FSPD30_v2_t1_gini', 
'Sil_Alpha_Stack_score_FSTPD30_gini':'Sil_Alpha_Stack_score_FSTPD30_v2_t1_gini'                         
}, inplace=True)
final_df.columns

Index(['start_date', 'end_date', 'Sil_Alpha_Stack_score_FPD0_v2_t1_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'Trench_category', 'Sil_Alpha_Stack_score_FPD10_v2_t1_gini',
       'Sil_Alpha_Stack_score_FPD30_v2_t1_gini',
       'Sil_Alpha_Stack_score_FSPD30_v2_t1_gini',
       'Sil_Alpha_Stack_score_FSTPD30_v2_t1_gini'],
      dtype='object')

In [376]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'loan_type','bad_rate', 'Trench_category'
,'Sil_Alpha_Stack_score_FPD0_v2_t1_gini',
'Sil_Alpha_Stack_score_FPD10_v2_t1_gini',
       'Sil_Alpha_Stack_score_FPD30_v2_t1_gini',
       'Sil_Alpha_Stack_score_FSPD30_v2_t1_gini',
       'Sil_Alpha_Stack_score_FSTPD30_v2_t1_gini']].copy()
final_df['Model_display_name'] = 'alpha_stack_model_sil'
final_df['Product_type'] = 'SIL'
final_df.dtypes

start_date                                  datetime64[ns]
end_date                                    datetime64[ns]
period                                              object
Model_Name                                          object
version                                             object
loan_type                                           object
bad_rate                                            object
Trench_category                                     object
Sil_Alpha_Stack_score_FPD0_v2_t1_gini              float64
Sil_Alpha_Stack_score_FPD10_v2_t1_gini             float64
Sil_Alpha_Stack_score_FPD30_v2_t1_gini             float64
Sil_Alpha_Stack_score_FSPD30_v2_t1_gini            float64
Sil_Alpha_Stack_score_FSTPD30_v2_t1_gini           float64
Model_display_name                                  object
Product_type                                        object
dtype: object

In [377]:
final_df.head() 

Unnamed: 0,start_date,end_date,period,Model_Name,version,loan_type,bad_rate,Trench_category,Sil_Alpha_Stack_score_FPD0_v2_t1_gini,Sil_Alpha_Stack_score_FPD10_v2_t1_gini,Sil_Alpha_Stack_score_FPD30_v2_t1_gini,Sil_Alpha_Stack_score_FSPD30_v2_t1_gini,Sil_Alpha_Stack_score_FSTPD30_v2_t1_gini,Model_display_name,Product_type
0,2024-08-26,2024-09-01,Week,Sil_Alpha_Stack_score,v2,Overall,FPD0,Trench 1,0.416175,,,,,alpha_stack_model_sil,SIL
1,2024-09-01,2024-09-30,Month,Sil_Alpha_Stack_score,v2,Overall,FPD0,Trench 1,0.343366,,,,,alpha_stack_model_sil,SIL
2,2024-09-02,2024-09-08,Week,Sil_Alpha_Stack_score,v2,Overall,FPD0,Trench 1,0.341003,,,,,alpha_stack_model_sil,SIL
3,2024-09-09,2024-09-15,Week,Sil_Alpha_Stack_score,v2,Overall,FPD0,Trench 1,0.338116,,,,,alpha_stack_model_sil,SIL
4,2024-09-16,2024-09-22,Week,Sil_Alpha_Stack_score,v2,Overall,FPD0,Trench 1,0.330955,,,,,alpha_stack_model_sil,SIL


In [378]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.alpha_stack_model_sil_v2_t1_gini_v1"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=4ff4c303-272e-4025-864b-024d633cf4e0>

In [379]:
import functools

dataframes = [f01, f10, f20, f30, f40]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type','trench_category', 'bad_rate', 'category']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

alphastacksilv2t1 = functools.reduce(merge_dataframes, dataframes)

In [380]:
alphastacksilv2t1['category'].value_counts(dropna=False)

category
alpha_stack_model_sil_fpd0_v2_t1       286
alpha_stack_model_sil_fpd10_v2_t1      286
alpha_stack_model_sil_fpd30_v2_t1      280
alpha_stack_model_sil_fspd30_v2_t1     260
alpha_stack_model_sil_fstpd30_v2_t1    236
Name: count, dtype: int64

In [381]:
result = pd.concat([cicsilscorev1all, 
                    cicsilscorev2t1, 
                    cicsilscorev2t2, 
                    cicsilscorev2t3, 
                    alphastacksilv1all,
                    alphastacksilv2t1,
                    ], ignore_index=True)
print(f"The shape of the concatenated dataframe result is:\t {result.shape}")
result.head()

The shape of the concatenated dataframe result is:	 (10083, 14)


Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


In [382]:
result[['Model_Name', 'version', 'category']].value_counts(dropna=False)

Model_Name             version  category                            
Alpha_cic_sil_score    v1       cic_sil_score_fpd0_v1_all               508
Sil_Alpha_Stack_score  v1       alpha_stack_model_sil_fpd0_v1_all       508
Alpha_cic_sil_score    v1       cic_sil_score_fpd10_v1_all              496
Sil_Alpha_Stack_score  v1       alpha_stack_model_sil_fpd10_v1_all      496
Alpha_cic_sil_score    v1       cic_sil_score_fpd30_v1_all              488
Sil_Alpha_Stack_score  v1       alpha_stack_model_sil_fpd30_v1_all      488
Alpha_cic_sil_score    v1       cic_sil_score_fspd30_v1_all             468
Sil_Alpha_Stack_score  v1       alpha_stack_model_sil_fspd30_v1_all     468
                                alpha_stack_model_sil_fstpd30_v1_all    444
Alpha_cic_sil_score    v1       cic_sil_score_fstpd30_v1_all            444
Sil_Alpha_Stack_score  v2       alpha_stack_model_sil_fpd0_v2_t1        286
                                alpha_stack_model_sil_fpd10_v2_t1       286
                   

In [383]:
masterdf = result.copy()
masterdf.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


##### Trench 2

## FPD0

## Test

In [384]:
sq = """ 
with modelname as 
(
  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
        case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  deffpd0,
  flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and flg_mature_fpd0 = 1
  and trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory


In [385]:
df1 = dfd.copy()

## Train

In [386]:
sq = """ 
  with modelname as 
  (  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    deffpd0,
  flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and flg_mature_fpd0 = 1
  and trenchCategory = 'Trench 2'
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory
0,2621237,88fbae32-01f0-43b9-a838-979a3ada4c36,60826212370015,0.793867,2024-09-03 11:36:32,2024-09-03,2024-09,Train,0,1,SIL-Instore,v2,Trench 2
1,2534345,7300ec82-8916-459c-bb8c-373fafdeefa4,60825343450019,0.105926,2024-09-02 10:54:30,2024-09-02,2024-09,Train,0,1,SIL-Instore,v2,Trench 2
2,1528189,86782387-6264-43db-a5eb-c5cfbdc6d2b4,60815281890019,0.208607,2024-09-11 13:54:59,2024-09-11,2024-09,Train,0,1,SIL-Instore,v2,Trench 2
3,1673823,4884ad2e-ee4e-48ce-8000-e1de520e6bea,60816738230013,0.896384,2024-09-24 15:34:12,2024-09-24,2024-09,Train,0,1,SIL-Instore,v2,Trench 2
4,1719580,218f4ec0-821a-4198-bfe9-a5ab744d0d0d,60817195800013,0.672252,2024-09-13 18:12:14,2024-09-13,2024-09,Train,0,1,SIL-Instore,v2,Trench 2


In [387]:
df2 = dfd.copy()

In [388]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3995 entries, 0 to 3994
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             3995 non-null   Int64         
 1   digitalLoanAccountId   3995 non-null   object        
 2   loanAccountNumber      3995 non-null   object        
 3   Sil_Alpha_Stack_score  3995 non-null   float64       
 4   appln_submit_datetime  3995 non-null   datetime64[us]
 5   disbursementdate       3995 non-null   dbdate        
 6   Application_month      3995 non-null   object        
 7   Data_selection         3995 non-null   object        
 8   deffpd0                3995 non-null   Int64         
 9   flg_mature_fpd0        3995 non-null   Int64         
 10  new_loan_type          3995 non-null   object        
 11  modelVersionId         3995 non-null   object        
 12  trenchCategory         3995 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [389]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['Sil_Alpha_Stack_score'] = pd.to_numeric(df_concat['Sil_Alpha_Stack_score'], errors='coerce')

In [390]:
# gini_results = calculate_periodic_gini(df_concat, 'Alpha_cic_sil_score', 'deffpd0', 'FPD0')

# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'Alpha_cic_sil_score', 
#     'deffpd0', 
#     'FPD0',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'Sil_Alpha_Stack_score', 
    'deffpd0', 
    'FPD0',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [391]:
f0 = gini_results.copy()
f0.head()

Unnamed: 0,start_date,end_date,Sil_Alpha_Stack_score_FPD0_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-08-26,2024-09-01,,Week,Sil_Alpha_Stack_score,v2,FPD0,Overall,Trench 2
1,2024-09-01,2024-09-30,0.187166,Month,Sil_Alpha_Stack_score,v2,FPD0,Overall,Trench 2
2,2024-09-02,2024-09-08,0.565217,Week,Sil_Alpha_Stack_score,v2,FPD0,Overall,Trench 2
3,2024-09-09,2024-09-15,-0.032258,Week,Sil_Alpha_Stack_score,v2,FPD0,Overall,Trench 2
4,2024-09-16,2024-09-22,0.453333,Week,Sil_Alpha_Stack_score,v2,FPD0,Overall,Trench 2


In [392]:
f0.columns

Index(['start_date', 'end_date', 'Sil_Alpha_Stack_score_FPD0_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category'],
      dtype='object')

In [393]:
f01 = f0[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Sil_Alpha_Stack_score_FPD0_gini']].copy()
f01.rename(columns={'Sil_Alpha_Stack_score_FPD0_gini':'FPD0'}, inplace = True)
f01['category'] = 'alpha_stack_model_sil_fpd0_v2_t2'

In [394]:
f0.groupby(['loan_type', 'version', 'trench_category']).size()

loan_type       version  trench_category
Overall         v2       Trench 2           74
SIL Competitor  v2       Trench 2           61
SIL ZERO        v2       Trench 2           69
SIL-Instore     v2       Trench 2           74
dtype: int64

In [395]:
f01.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category
0,2024-08-26,2024-09-01,Week,Sil_Alpha_Stack_score,v2,FPD0,Overall,Trench 2,,alpha_stack_model_sil_fpd0_v2_t2
1,2024-09-01,2024-09-30,Month,Sil_Alpha_Stack_score,v2,FPD0,Overall,Trench 2,0.187166,alpha_stack_model_sil_fpd0_v2_t2
2,2024-09-02,2024-09-08,Week,Sil_Alpha_Stack_score,v2,FPD0,Overall,Trench 2,0.565217,alpha_stack_model_sil_fpd0_v2_t2
3,2024-09-09,2024-09-15,Week,Sil_Alpha_Stack_score,v2,FPD0,Overall,Trench 2,-0.032258,alpha_stack_model_sil_fpd0_v2_t2
4,2024-09-16,2024-09-22,Week,Sil_Alpha_Stack_score,v2,FPD0,Overall,Trench 2,0.453333,alpha_stack_model_sil_fpd0_v2_t2


## FPD10

## Test

In [396]:
sq = """ 
with modelname as 
(
  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fpd10 = 1
  and trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory


In [397]:
df1 = dfd.copy()

## Train

In [398]:
sq = """ 
  with modelname as 
  (  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffpd10,
  del.flg_mature_fpd10,
    loanmaster.new_loan_type,
    modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fpd10 = 1
  and trenchCategory = 'Trench 2'
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory
0,2320482,6bcef948-c404-4e60-b87d-4817a5fcee29,60823204820014,0.542238,2024-09-15 16:08:17,2024-09-15,2024-09,Train,0,1,SIL-Instore,v2,Trench 2
1,2540950,5b243cf7-dfc3-4aad-aae5-91de46d51600,60825409500013,0.333223,2024-09-24 16:30:22,2024-09-24,2024-09,Train,0,1,SIL-Instore,v2,Trench 2
2,2717921,eac2f561-19a4-4228-847c-8f42c36d5684,60827179210013,0.685761,2024-09-17 15:49:12,2024-09-17,2024-09,Train,0,1,SIL-Instore,v2,Trench 2
3,2590423,f4be4837-58c0-4b53-ba10-744cc645e136,60825904230011,0.212503,2024-09-26 15:00:16,2024-10-01,2024-09,Train,0,1,SIL-Instore,v2,Trench 2
4,2695046,cad800ae-d92a-4965-9ee8-bb4a6a3cd2a2,60826950460011,0.492258,2024-09-17 18:03:57,2024-09-17,2024-09,Train,0,1,SIL-Instore,v2,Trench 2


In [399]:
df2 = dfd.copy()

In [400]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3995 entries, 0 to 3994
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             3995 non-null   Int64         
 1   digitalLoanAccountId   3995 non-null   object        
 2   loanAccountNumber      3995 non-null   object        
 3   Sil_Alpha_Stack_score  3995 non-null   float64       
 4   appln_submit_datetime  3995 non-null   datetime64[us]
 5   disbursementdate       3995 non-null   dbdate        
 6   Application_month      3995 non-null   object        
 7   Data_selection         3995 non-null   object        
 8   deffpd10               3995 non-null   Int64         
 9   flg_mature_fpd10       3995 non-null   Int64         
 10  new_loan_type          3995 non-null   object        
 11  modelVersionId         3995 non-null   object        
 12  trenchCategory         3995 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [401]:
df_concat['Sil_Alpha_Stack_score'] = pd.to_numeric(df_concat['Sil_Alpha_Stack_score'], errors='coerce')
# gini_results = calculate_periodic_gini(df_concat, 'Sil_Alpha_Stack_score', 'deffpd10', 'FPD10')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'Sil_Alpha_Stack_score', 
    'deffpd10', 
    'FPD10',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [402]:
f1=gini_results.copy()

In [403]:
f1.head(10)

Unnamed: 0,start_date,end_date,Sil_Alpha_Stack_score_FPD10_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-08-26,2024-09-01,,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 2
1,2024-09-01,2024-09-30,0.30386,Month,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 2
2,2024-09-02,2024-09-08,0.653333,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 2
3,2024-09-09,2024-09-15,,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 2
4,2024-09-16,2024-09-22,0.480392,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 2
5,2024-09-23,2024-09-29,0.20098,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 2
6,2024-09-30,2024-10-06,0.854545,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 2
7,2024-10-01,2024-10-31,0.542742,Month,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 2
8,2024-10-07,2024-10-13,0.710526,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 2
9,2024-10-14,2024-10-20,0.287879,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 2


In [404]:
f10 = f1[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Sil_Alpha_Stack_score_FPD10_gini']].copy()
f10.rename(columns={'Sil_Alpha_Stack_score_FPD10_gini':'FPD10'}, inplace = True)
f10['category'] = 'alpha_stack_model_sil_fpd10_v2_t2'

In [405]:
f10.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD10,category
0,2024-08-26,2024-09-01,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 2,,alpha_stack_model_sil_fpd10_v2_t2
1,2024-09-01,2024-09-30,Month,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 2,0.30386,alpha_stack_model_sil_fpd10_v2_t2
2,2024-09-02,2024-09-08,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 2,0.653333,alpha_stack_model_sil_fpd10_v2_t2
3,2024-09-09,2024-09-15,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 2,,alpha_stack_model_sil_fpd10_v2_t2
4,2024-09-16,2024-09-22,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 2,0.480392,alpha_stack_model_sil_fpd10_v2_t2


## FPD30

## Test

In [406]:
sq = """ 
with modelname as 
(
  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
       case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fpd30 = 1
  and trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory


In [407]:
df1 = dfd.copy()

## Train

In [408]:
sq = """ 
  with modelname as 
  (  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fpd30 = 1
  and trenchCategory = 'Trench 2'
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory
0,1528189,86782387-6264-43db-a5eb-c5cfbdc6d2b4,60815281890019,0.208607,2024-09-11 13:54:59,2024-09-11,2024-09,Train,0,1,SIL-Instore,v2,Trench 2
1,1673823,4884ad2e-ee4e-48ce-8000-e1de520e6bea,60816738230013,0.896384,2024-09-24 15:34:12,2024-09-24,2024-09,Train,0,1,SIL-Instore,v2,Trench 2
2,2645326,698a1393-47a9-4d27-b8f3-c2ae222e4470,60826453260011,0.790144,2024-09-13 15:07:36,2024-09-13,2024-09,Train,0,1,SIL ZERO,v2,Trench 2
3,2333362,ecd5356a-839a-411b-b4ac-5f9275999713,60823333620014,0.638585,2024-09-14 15:38:48,2024-09-14,2024-09,Train,0,1,SIL-Instore,v2,Trench 2
4,2711475,ade64bc3-8743-46fd-bf6f-be0b7baaab80,60827114750018,0.814106,2024-09-07 15:31:59,2024-09-07,2024-09,Train,0,1,SIL-Instore,v2,Trench 2


In [409]:
df2 = dfd.copy()

In [410]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3899 entries, 0 to 3898
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             3899 non-null   Int64         
 1   digitalLoanAccountId   3899 non-null   object        
 2   loanAccountNumber      3899 non-null   object        
 3   Sil_Alpha_Stack_score  3899 non-null   float64       
 4   appln_submit_datetime  3899 non-null   datetime64[us]
 5   disbursementdate       3899 non-null   dbdate        
 6   Application_month      3899 non-null   object        
 7   Data_selection         3899 non-null   object        
 8   deffpd30               3899 non-null   Int64         
 9   flg_mature_fpd30       3899 non-null   Int64         
 10  new_loan_type          3899 non-null   object        
 11  modelVersionId         3899 non-null   object        
 12  trenchCategory         3899 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [411]:
df_concat['Sil_Alpha_Stack_score'] = pd.to_numeric(df_concat['Sil_Alpha_Stack_score'], errors='coerce')
# gini_results = calculate_periodic_gini(df_concat, 'Sil_Alpha_Stack_score', 'deffpd30', 'FPD30')

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'Sil_Alpha_Stack_score', 
    'deffpd30', 
    'FPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [412]:
f2=gini_results.copy()

In [413]:
f2.head(10)

Unnamed: 0,start_date,end_date,Sil_Alpha_Stack_score_FPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-08-26,2024-09-01,,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 2
1,2024-09-01,2024-09-30,0.30386,Month,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 2
2,2024-09-02,2024-09-08,0.653333,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 2
3,2024-09-09,2024-09-15,,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 2
4,2024-09-16,2024-09-22,0.480392,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 2
5,2024-09-23,2024-09-29,0.20098,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 2
6,2024-09-30,2024-10-06,0.854545,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 2
7,2024-10-01,2024-10-31,0.513194,Month,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 2
8,2024-10-07,2024-10-13,0.710526,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 2
9,2024-10-14,2024-10-20,0.176471,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 2


In [414]:
f20 = f2[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Sil_Alpha_Stack_score_FPD30_gini']].copy()
f20.rename(columns={'Sil_Alpha_Stack_score_FPD30_gini':'FPD30'}, inplace = True)
f20['category'] = 'alpha_stack_model_sil_fpd30_v2_t2'

In [415]:
f20.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD30,category
0,2024-08-26,2024-09-01,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 2,,alpha_stack_model_sil_fpd30_v2_t2
1,2024-09-01,2024-09-30,Month,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 2,0.30386,alpha_stack_model_sil_fpd30_v2_t2
2,2024-09-02,2024-09-08,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 2,0.653333,alpha_stack_model_sil_fpd30_v2_t2
3,2024-09-09,2024-09-15,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 2,,alpha_stack_model_sil_fpd30_v2_t2
4,2024-09-16,2024-09-22,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 2,0.480392,alpha_stack_model_sil_fpd30_v2_t2


## FSPD30

## Test

In [416]:
sq = """ 
with modelname as 
(
  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fspd_30 = 1
  and trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory


In [417]:
df1 = dfd.copy()

## Train

In [418]:
sq = """ 
  with modelname as 
  (  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fspd_30 = 1
  and trenchCategory = 'Trench 2'
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory
0,2599974,63147312-c7e6-421c-a16c-348fc302f0f2,60825999740012,0.673824,2024-09-11 10:12:34,2024-09-11,2024-09,Train,0,1,SIL-Instore,v2,Trench 2
1,2099472,873b56cd-66de-4864-ac76-ca2fa23b4c16,60820994720015,0.530946,2024-09-22 17:59:19,2024-09-29,2024-09,Train,1,1,SIL-Instore,v2,Trench 2
2,2140656,d89c500f-93ce-4d11-be4e-28e679d65138,60821406560016,0.356198,2024-09-01 09:44:48,2024-09-01,2024-09,Train,0,1,SIL-Instore,v2,Trench 2
3,1776918,19a6f2c0-d92b-4cd7-9c7e-62d6ba5b6f84,60817769180017,0.674027,2024-09-13 17:30:25,2024-09-13,2024-09,Train,0,1,SIL-Instore,v2,Trench 2
4,2421050,9401c957-8de1-4e81-a44f-030d48068656,60824210500014,0.486287,2024-09-14 18:39:39,2024-09-14,2024-09,Train,0,1,SIL-Instore,v2,Trench 2


In [419]:
df2 = dfd.copy()

In [420]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3617 entries, 0 to 3616
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             3617 non-null   Int64         
 1   digitalLoanAccountId   3617 non-null   object        
 2   loanAccountNumber      3617 non-null   object        
 3   Sil_Alpha_Stack_score  3617 non-null   float64       
 4   appln_submit_datetime  3617 non-null   datetime64[us]
 5   disbursementdate       3617 non-null   dbdate        
 6   Application_month      3617 non-null   object        
 7   Data_selection         3617 non-null   object        
 8   deffspd30              3617 non-null   Int64         
 9   flg_mature_fspd_30     3617 non-null   Int64         
 10  new_loan_type          3617 non-null   object        
 11  modelVersionId         3617 non-null   object        
 12  trenchCategory         3617 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [421]:
df_concat['Sil_Alpha_Stack_score'] = pd.to_numeric(df_concat['Sil_Alpha_Stack_score'], errors='coerce')
# gini_results = calculate_periodic_gini(df_concat, 'Sil_Alpha_Stack_score', 'deffspd30', 'FSPD30')

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'Sil_Alpha_Stack_score', 
    'deffspd30', 
    'FSPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [422]:
f3=gini_results.copy()

In [423]:
f3.head(10)

Unnamed: 0,start_date,end_date,Sil_Alpha_Stack_score_FSPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-08-26,2024-09-01,,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 2
1,2024-09-01,2024-09-30,0.372414,Month,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 2
2,2024-09-02,2024-09-08,0.75,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 2
3,2024-09-09,2024-09-15,0.870968,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 2
4,2024-09-16,2024-09-22,0.324675,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 2
5,2024-09-23,2024-09-29,0.306667,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 2
6,2024-09-30,2024-10-06,0.834101,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 2
7,2024-10-01,2024-10-31,0.551781,Month,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 2
8,2024-10-07,2024-10-13,0.705628,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 2
9,2024-10-14,2024-10-20,0.0875,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 2


In [424]:
f30 = f3[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Sil_Alpha_Stack_score_FSPD30_gini']].copy()
f30.rename(columns={'Sil_Alpha_Stack_score_FSPD30_gini':'FSPD30'}, inplace = True)
f30['category'] = 'alpha_stack_model_sil_fspd30_v2_t2'

In [425]:
f30.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FSPD30,category
0,2024-08-26,2024-09-01,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 2,,alpha_stack_model_sil_fspd30_v2_t2
1,2024-09-01,2024-09-30,Month,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 2,0.372414,alpha_stack_model_sil_fspd30_v2_t2
2,2024-09-02,2024-09-08,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 2,0.75,alpha_stack_model_sil_fspd30_v2_t2
3,2024-09-09,2024-09-15,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 2,0.870968,alpha_stack_model_sil_fspd30_v2_t2
4,2024-09-16,2024-09-22,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 2,0.324675,alpha_stack_model_sil_fspd30_v2_t2


## FSTPD30

## Test

In [426]:
sq = """ 
with modelname as 
(
  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fstpd_30 = 1
  and trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory


In [427]:
df1 = dfd.copy()

## Train

In [428]:
sq = """ 
  with modelname as 
  (  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fstpd_30 = 1
  and trenchCategory = 'Trench 2'
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory
0,2619293,fde74a8c-c49d-4e38-a421-8a378e570891,60826192930011,0.654918,2024-12-06 17:46:31,2024-12-06,2024-12,Train,1,1,SIL-Instore,v2,Trench 2
1,3220712,43545d82-3baa-4102-a7ae-a914e2c12273,60832207120011,0.609357,2025-05-21 11:44:15,2025-05-21,2025-05,Train,0,1,SIL-Instore,v2,Trench 2
2,2749434,a91e9dab-ba95-4db2-b0ff-8e980291be0b,60827494340016,0.467953,2025-01-31 11:36:14,2025-01-31,2025-01,Train,0,1,SIL-Instore,v2,Trench 2
3,1376007,64129a5b-20a0-4d18-9e91-f09643d04a6b,60813760070013,0.892365,2025-04-29 17:31:09,2025-04-29,2025-04,Train,1,1,SIL-Instore,v2,Trench 2
4,3236492,375fb478-9987-4a44-b631-929f2ee385ff,60832364920016,0.742149,2025-02-04 10:48:57,2025-02-04,2025-02,Train,0,1,SIL-Instore,v2,Trench 2


In [429]:
df2 = dfd.copy()

In [430]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3302 entries, 0 to 3301
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             3302 non-null   Int64         
 1   digitalLoanAccountId   3302 non-null   object        
 2   loanAccountNumber      3302 non-null   object        
 3   Sil_Alpha_Stack_score  3302 non-null   float64       
 4   appln_submit_datetime  3302 non-null   datetime64[us]
 5   disbursementdate       3302 non-null   dbdate        
 6   Application_month      3302 non-null   object        
 7   Data_selection         3302 non-null   object        
 8   deffstpd30             3302 non-null   Int64         
 9   flg_mature_fstpd_30    3302 non-null   Int64         
 10  new_loan_type          3302 non-null   object        
 11  modelVersionId         3302 non-null   object        
 12  trenchCategory         3302 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [431]:
df_concat['Sil_Alpha_Stack_score'] = pd.to_numeric(df_concat['Sil_Alpha_Stack_score'], errors='coerce')
# gini_results = calculate_periodic_gini(df_concat, 'Sil_Alpha_Stack_score', 'deffstpd30', 'FSTPD30')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'Sil_Alpha_Stack_score', 
    'deffstpd30', 
    'FSTPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [432]:
f4=gini_results.copy()

In [433]:
f4.head(10)

Unnamed: 0,start_date,end_date,Sil_Alpha_Stack_score_FSTPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-08-26,2024-09-01,,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 2
1,2024-09-01,2024-09-30,0.389349,Month,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 2
2,2024-09-02,2024-09-08,0.46087,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 2
3,2024-09-09,2024-09-15,0.433333,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 2
4,2024-09-16,2024-09-22,0.586207,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 2
5,2024-09-23,2024-09-29,0.380814,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 2
6,2024-09-30,2024-10-06,0.8,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 2
7,2024-10-01,2024-10-31,0.507143,Month,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 2
8,2024-10-07,2024-10-13,0.69906,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 2
9,2024-10-14,2024-10-20,-0.086022,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 2


In [434]:
f40 = f4[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Sil_Alpha_Stack_score_FSTPD30_gini']].copy()
f40.rename(columns={'Sil_Alpha_Stack_score_FSTPD30_gini':'FSTPD30'}, inplace = True)
f40['category'] = 'alpha_stack_model_sil_fstpd30_v2_t2'

In [435]:
f40.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FSTPD30,category
0,2024-08-26,2024-09-01,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 2,,alpha_stack_model_sil_fstpd30_v2_t2
1,2024-09-01,2024-09-30,Month,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 2,0.389349,alpha_stack_model_sil_fstpd30_v2_t2
2,2024-09-02,2024-09-08,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 2,0.46087,alpha_stack_model_sil_fstpd30_v2_t2
3,2024-09-09,2024-09-15,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 2,0.433333,alpha_stack_model_sil_fstpd30_v2_t2
4,2024-09-16,2024-09-22,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 2,0.586207,alpha_stack_model_sil_fstpd30_v2_t2


## combining the dataframe

In [436]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type','trench_category','bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'Sil_Alpha_Stack_score_FPD0_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'trench_category', 'Sil_Alpha_Stack_score_FPD10_gini',
       'Sil_Alpha_Stack_score_FPD30_gini',
       'Sil_Alpha_Stack_score_FSPD30_gini',
       'Sil_Alpha_Stack_score_FSTPD30_gini'], dtype=object)

In [437]:
final_df.rename(columns={'trench_category':'Trench_category', 
'Sil_Alpha_Stack_score_FPD0_gini':'Sil_Alpha_Stack_score_FPD0_v2_t2_gini',
'Sil_Alpha_Stack_score_FPD10_gini':'Sil_Alpha_Stack_score_FPD10_v2_t2_gini',
'Sil_Alpha_Stack_score_FPD30_gini':'Sil_Alpha_Stack_score_FPD30_v2_t2_gini',
'Sil_Alpha_Stack_score_FSPD30_gini':'Sil_Alpha_Stack_score_FSPD30_v2_t2_gini', 
'Sil_Alpha_Stack_score_FSTPD30_gini':'Sil_Alpha_Stack_score_FSTPD30_v2_t2_gini'                         
}, inplace=True)
final_df.columns

Index(['start_date', 'end_date', 'Sil_Alpha_Stack_score_FPD0_v2_t2_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'Trench_category', 'Sil_Alpha_Stack_score_FPD10_v2_t2_gini',
       'Sil_Alpha_Stack_score_FPD30_v2_t2_gini',
       'Sil_Alpha_Stack_score_FSPD30_v2_t2_gini',
       'Sil_Alpha_Stack_score_FSTPD30_v2_t2_gini'],
      dtype='object')

In [438]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'loan_type','bad_rate', 'Trench_category'
,'Sil_Alpha_Stack_score_FPD0_v2_t2_gini',
'Sil_Alpha_Stack_score_FPD10_v2_t2_gini',
       'Sil_Alpha_Stack_score_FPD30_v2_t2_gini',
       'Sil_Alpha_Stack_score_FSPD30_v2_t2_gini',
       'Sil_Alpha_Stack_score_FSTPD30_v2_t2_gini']].copy()
final_df['Model_display_name'] = 'alpha_stack_model_sil'
final_df['Product_type'] = 'SIL'
final_df.dtypes

start_date                                  datetime64[ns]
end_date                                    datetime64[ns]
period                                              object
Model_Name                                          object
version                                             object
loan_type                                           object
bad_rate                                            object
Trench_category                                     object
Sil_Alpha_Stack_score_FPD0_v2_t2_gini              float64
Sil_Alpha_Stack_score_FPD10_v2_t2_gini             float64
Sil_Alpha_Stack_score_FPD30_v2_t2_gini             float64
Sil_Alpha_Stack_score_FSPD30_v2_t2_gini            float64
Sil_Alpha_Stack_score_FSTPD30_v2_t2_gini           float64
Model_display_name                                  object
Product_type                                        object
dtype: object

In [439]:
final_df.head() 

Unnamed: 0,start_date,end_date,period,Model_Name,version,loan_type,bad_rate,Trench_category,Sil_Alpha_Stack_score_FPD0_v2_t2_gini,Sil_Alpha_Stack_score_FPD10_v2_t2_gini,Sil_Alpha_Stack_score_FPD30_v2_t2_gini,Sil_Alpha_Stack_score_FSPD30_v2_t2_gini,Sil_Alpha_Stack_score_FSTPD30_v2_t2_gini,Model_display_name,Product_type
0,2024-08-26,2024-09-01,Week,Sil_Alpha_Stack_score,v2,Overall,FPD0,Trench 2,,,,,,alpha_stack_model_sil,SIL
1,2024-09-01,2024-09-30,Month,Sil_Alpha_Stack_score,v2,Overall,FPD0,Trench 2,0.187166,,,,,alpha_stack_model_sil,SIL
2,2024-09-02,2024-09-08,Week,Sil_Alpha_Stack_score,v2,Overall,FPD0,Trench 2,0.565217,,,,,alpha_stack_model_sil,SIL
3,2024-09-09,2024-09-15,Week,Sil_Alpha_Stack_score,v2,Overall,FPD0,Trench 2,-0.032258,,,,,alpha_stack_model_sil,SIL
4,2024-09-16,2024-09-22,Week,Sil_Alpha_Stack_score,v2,Overall,FPD0,Trench 2,0.453333,,,,,alpha_stack_model_sil,SIL


In [440]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.alpha_stack_model_sil_v2_t2_gini_v1"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=b631b460-5ba1-4ce6-b45a-8801114f028f>

In [441]:
import functools

dataframes = [f01, f10, f20, f30, f40]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type','trench_category', 'bad_rate', 'category']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

alphastacksilv2t2 = functools.reduce(merge_dataframes, dataframes)

In [442]:
alphastacksilv2t2['category'].value_counts(dropna=False)

category
alpha_stack_model_sil_fpd0_v2_t2       278
alpha_stack_model_sil_fpd10_v2_t2      278
alpha_stack_model_sil_fpd30_v2_t2      271
alpha_stack_model_sil_fspd30_v2_t2     250
alpha_stack_model_sil_fstpd30_v2_t2    230
Name: count, dtype: int64

In [443]:
result = pd.concat([cicsilscorev1all, 
                    cicsilscorev2t1, 
                    cicsilscorev2t2, 
                    cicsilscorev2t3, 
                    alphastacksilv1all,
                    alphastacksilv2t1,
                    alphastacksilv2t2
                    ], ignore_index=True)
print(f"The shape of the concatenated dataframe result is:\t {result.shape}")
result.head()

The shape of the concatenated dataframe result is:	 (11390, 14)


Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


In [444]:
result[['Model_Name', 'version', 'category']].value_counts(dropna=False)

Model_Name             version  category                            
Alpha_cic_sil_score    v1       cic_sil_score_fpd0_v1_all               508
Sil_Alpha_Stack_score  v1       alpha_stack_model_sil_fpd0_v1_all       508
Alpha_cic_sil_score    v1       cic_sil_score_fpd10_v1_all              496
Sil_Alpha_Stack_score  v1       alpha_stack_model_sil_fpd10_v1_all      496
Alpha_cic_sil_score    v1       cic_sil_score_fpd30_v1_all              488
Sil_Alpha_Stack_score  v1       alpha_stack_model_sil_fpd30_v1_all      488
Alpha_cic_sil_score    v1       cic_sil_score_fspd30_v1_all             468
Sil_Alpha_Stack_score  v1       alpha_stack_model_sil_fspd30_v1_all     468
Alpha_cic_sil_score    v1       cic_sil_score_fstpd30_v1_all            444
Sil_Alpha_Stack_score  v1       alpha_stack_model_sil_fstpd30_v1_all    444
                       v2       alpha_stack_model_sil_fpd10_v2_t1       286
                                alpha_stack_model_sil_fpd0_v2_t1        286
                   

In [445]:
masterdf = result.copy()
print(f"The shape of masterdf is:\t {masterdf.shape}")
masterdf.head()

The shape of masterdf is:	 (11390, 14)


Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


##### Trench 3

## FPD0

## Test

In [446]:
sq = """ 
with modelname as 
(
  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
        case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  deffpd0,
  flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and flg_mature_fpd0 = 1
  and trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory


In [447]:
df1 = dfd.copy()

## Train

In [448]:
sq = """ 
  with modelname as 
  (  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    deffpd0,
  flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and flg_mature_fpd0 = 1
  and trenchCategory = 'Trench 3'
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory
0,2138642,58417a5a-b7f5-4c54-8b80-49af5d941611,60821386420025,0.292525,2024-09-29 15:05:34,2024-09-29,2024-09,Train,0,1,SIL-Instore,v2,Trench 3
1,2460817,fa32b21a-3e43-4e3b-a306-665f5a71b0b9,60824608170021,0.258839,2024-09-09 11:55:53,2024-09-09,2024-09,Train,0,1,SIL-Instore,v2,Trench 3
2,2308085,2ee5eb6f-ed2d-49e8-8aeb-b34c2245e4ca,60823080850025,0.461505,2024-09-07 19:26:16,2024-09-07,2024-09,Train,1,1,SIL-Instore,v2,Trench 3
3,2417073,4a63f8f6-683c-4242-8b76-95890560688f,60824170730028,0.10998,2024-09-14 09:59:00,2024-09-14,2024-09,Train,0,1,SIL-Instore,v2,Trench 3
4,2186702,9a1bd3d2-208d-46a5-abf4-0bcbccaf216d,60821867020027,0.230262,2024-09-29 15:40:44,2024-09-29,2024-09,Train,0,1,SIL-Instore,v2,Trench 3


In [449]:
df2 = dfd.copy()

In [450]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8253 entries, 0 to 8252
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             8253 non-null   Int64         
 1   digitalLoanAccountId   8253 non-null   object        
 2   loanAccountNumber      8253 non-null   object        
 3   Sil_Alpha_Stack_score  8253 non-null   float64       
 4   appln_submit_datetime  8253 non-null   datetime64[us]
 5   disbursementdate       8253 non-null   dbdate        
 6   Application_month      8253 non-null   object        
 7   Data_selection         8253 non-null   object        
 8   deffpd0                8253 non-null   Int64         
 9   flg_mature_fpd0        8253 non-null   Int64         
 10  new_loan_type          8253 non-null   object        
 11  modelVersionId         8253 non-null   object        
 12  trenchCategory         8253 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [451]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['Sil_Alpha_Stack_score'] = pd.to_numeric(df_concat['Sil_Alpha_Stack_score'], errors='coerce')

In [452]:
# gini_results = calculate_periodic_gini(df_concat, 'Alpha_cic_sil_score', 'deffpd0', 'FPD0')

# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'Alpha_cic_sil_score', 
#     'deffpd0', 
#     'FPD0',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'Sil_Alpha_Stack_score', 
    'deffpd0', 
    'FPD0',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [453]:
f0 = gini_results.copy()
f0.head()

Unnamed: 0,start_date,end_date,Sil_Alpha_Stack_score_FPD0_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-08-26,2024-09-01,,Week,Sil_Alpha_Stack_score,v2,FPD0,Overall,Trench 3
1,2024-09-01,2024-09-30,0.295499,Month,Sil_Alpha_Stack_score,v2,FPD0,Overall,Trench 3
2,2024-09-02,2024-09-08,0.209235,Week,Sil_Alpha_Stack_score,v2,FPD0,Overall,Trench 3
3,2024-09-09,2024-09-15,0.388471,Week,Sil_Alpha_Stack_score,v2,FPD0,Overall,Trench 3
4,2024-09-16,2024-09-22,-0.01459,Week,Sil_Alpha_Stack_score,v2,FPD0,Overall,Trench 3


In [454]:
f0.columns

Index(['start_date', 'end_date', 'Sil_Alpha_Stack_score_FPD0_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category'],
      dtype='object')

In [455]:
f01 = f0[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Sil_Alpha_Stack_score_FPD0_gini']].copy()
f01.rename(columns={'Sil_Alpha_Stack_score_FPD0_gini':'FPD0'}, inplace = True)
f01['category'] = 'alpha_stack_model_sil_fpd0_v2_t3'

In [456]:
f0.groupby(['loan_type', 'version', 'trench_category']).size()

loan_type       version  trench_category
Overall         v2       Trench 3           74
SIL Competitor  v2       Trench 3           62
SIL ZERO        v2       Trench 3           73
SIL-Instore     v2       Trench 3           74
dtype: int64

In [457]:
f01.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category
0,2024-08-26,2024-09-01,Week,Sil_Alpha_Stack_score,v2,FPD0,Overall,Trench 3,,alpha_stack_model_sil_fpd0_v2_t3
1,2024-09-01,2024-09-30,Month,Sil_Alpha_Stack_score,v2,FPD0,Overall,Trench 3,0.295499,alpha_stack_model_sil_fpd0_v2_t3
2,2024-09-02,2024-09-08,Week,Sil_Alpha_Stack_score,v2,FPD0,Overall,Trench 3,0.209235,alpha_stack_model_sil_fpd0_v2_t3
3,2024-09-09,2024-09-15,Week,Sil_Alpha_Stack_score,v2,FPD0,Overall,Trench 3,0.388471,alpha_stack_model_sil_fpd0_v2_t3
4,2024-09-16,2024-09-22,Week,Sil_Alpha_Stack_score,v2,FPD0,Overall,Trench 3,-0.01459,alpha_stack_model_sil_fpd0_v2_t3


## FPD10

## Test

In [458]:
sq = """ 
with modelname as 
(
  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fpd10 = 1
  and trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory


In [459]:
df1 = dfd.copy()

## Train

In [460]:
sq = """ 
  with modelname as 
  (  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffpd10,
  del.flg_mature_fpd10,
    loanmaster.new_loan_type,
    modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fpd10 = 1
  and trenchCategory = 'Trench 3'
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory
0,2247904,15145d50-6068-4c7d-b523-9088aa0551b3,60822479040025,0.145021,2024-09-12 20:00:43,2024-09-12,2024-09,Train,0,1,SIL-Instore,v2,Trench 3
1,2394079,d73f8027-7dd9-4ad0-a180-3defa1e110e7,60823940790034,0.241557,2024-09-26 19:55:58,2024-09-26,2024-09,Train,0,1,SIL-Instore,v2,Trench 3
2,2505846,b4b94e79-966a-461e-b0b1-0765bfa842b2,60825058460022,0.428271,2024-09-06 14:14:18,2024-09-06,2024-09,Train,0,1,SIL-Instore,v2,Trench 3
3,2322219,ea435a0e-9fc8-439d-b3e0-4964c275366a,60823222190027,0.285529,2024-09-08 18:33:42,2024-09-08,2024-09,Train,0,1,SIL-Instore,v2,Trench 3
4,2711094,f398ca14-b85f-49c7-b825-116fc0d76091,60827110940027,0.255193,2024-09-25 14:18:45,2024-09-25,2024-09,Train,0,1,SIL-Instore,v2,Trench 3


In [461]:
df2 = dfd.copy()

In [462]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8253 entries, 0 to 8252
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             8253 non-null   Int64         
 1   digitalLoanAccountId   8253 non-null   object        
 2   loanAccountNumber      8253 non-null   object        
 3   Sil_Alpha_Stack_score  8253 non-null   float64       
 4   appln_submit_datetime  8253 non-null   datetime64[us]
 5   disbursementdate       8253 non-null   dbdate        
 6   Application_month      8253 non-null   object        
 7   Data_selection         8253 non-null   object        
 8   deffpd10               8253 non-null   Int64         
 9   flg_mature_fpd10       8253 non-null   Int64         
 10  new_loan_type          8253 non-null   object        
 11  modelVersionId         8253 non-null   object        
 12  trenchCategory         8253 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [463]:
df_concat['Sil_Alpha_Stack_score'] = pd.to_numeric(df_concat['Sil_Alpha_Stack_score'], errors='coerce')
# gini_results = calculate_periodic_gini(df_concat, 'Sil_Alpha_Stack_score', 'deffpd10', 'FPD10')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'Sil_Alpha_Stack_score', 
    'deffpd10', 
    'FPD10',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [464]:
f1=gini_results.copy()

In [465]:
f1.head(10)

Unnamed: 0,start_date,end_date,Sil_Alpha_Stack_score_FPD10_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-08-26,2024-09-01,,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 3
1,2024-09-01,2024-09-30,0.375286,Month,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 3
2,2024-09-02,2024-09-08,0.441975,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 3
3,2024-09-09,2024-09-15,-0.042017,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 3
4,2024-09-16,2024-09-22,0.310861,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 3
5,2024-09-23,2024-09-29,0.684211,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 3
6,2024-09-30,2024-10-06,0.337097,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 3
7,2024-10-01,2024-10-31,0.478074,Month,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 3
8,2024-10-07,2024-10-13,0.33945,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 3
9,2024-10-14,2024-10-20,0.563559,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 3


In [466]:
f10 = f1[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Sil_Alpha_Stack_score_FPD10_gini']].copy()
f10.rename(columns={'Sil_Alpha_Stack_score_FPD10_gini':'FPD10'}, inplace = True)
f10['category'] = 'alpha_stack_model_sil_fpd10_v2_t3'

In [467]:
f10.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD10,category
0,2024-08-26,2024-09-01,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 3,,alpha_stack_model_sil_fpd10_v2_t3
1,2024-09-01,2024-09-30,Month,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 3,0.375286,alpha_stack_model_sil_fpd10_v2_t3
2,2024-09-02,2024-09-08,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 3,0.441975,alpha_stack_model_sil_fpd10_v2_t3
3,2024-09-09,2024-09-15,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 3,-0.042017,alpha_stack_model_sil_fpd10_v2_t3
4,2024-09-16,2024-09-22,Week,Sil_Alpha_Stack_score,v2,FPD10,Overall,Trench 3,0.310861,alpha_stack_model_sil_fpd10_v2_t3


## FPD30

## Test

In [468]:
sq = """ 
with modelname as 
(
  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
       case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fpd30 = 1
  and trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory


In [469]:
df1 = dfd.copy()

## Train

In [470]:
sq = """ 
  with modelname as 
  (  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fpd30 = 1
  and trenchCategory = 'Trench 3'
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory
0,2040537,0ed20b81-6b13-47bb-8fdb-aa50fdfb24e3,60820405370031,0.418129,2024-09-15 18:02:02,2024-09-15,2024-09,Train,0,1,SIL ZERO,v2,Trench 3
1,2523804,87885247-afd2-4b9a-bc93-cbc16be107e4,60825238040024,0.224186,2024-09-30 16:57:01,2024-09-30,2024-09,Train,0,1,SIL ZERO,v2,Trench 3
2,2624258,3107ba14-1c31-47ec-b349-9aa29b40a4f7,60826242580021,0.237433,2024-09-11 19:17:29,2024-09-11,2024-09,Train,0,1,SIL-Instore,v2,Trench 3
3,2454938,dd4bb9ba-87b1-4aa7-9c84-435617c13cbd,60824549380021,0.477409,2024-09-22 10:59:29,2024-09-22,2024-09,Train,0,1,SIL-Instore,v2,Trench 3
4,2185140,b1c992aa-2bad-433d-8ea7-8bd425e66840,60821851400021,0.177543,2024-09-08 09:43:55,2024-09-08,2024-09,Train,0,1,SIL-Instore,v2,Trench 3


In [471]:
df2 = dfd.copy()

In [472]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8057 entries, 0 to 8056
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             8057 non-null   Int64         
 1   digitalLoanAccountId   8057 non-null   object        
 2   loanAccountNumber      8057 non-null   object        
 3   Sil_Alpha_Stack_score  8057 non-null   float64       
 4   appln_submit_datetime  8057 non-null   datetime64[us]
 5   disbursementdate       8057 non-null   dbdate        
 6   Application_month      8057 non-null   object        
 7   Data_selection         8057 non-null   object        
 8   deffpd30               8057 non-null   Int64         
 9   flg_mature_fpd30       8057 non-null   Int64         
 10  new_loan_type          8057 non-null   object        
 11  modelVersionId         8057 non-null   object        
 12  trenchCategory         8057 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [473]:
df_concat['Sil_Alpha_Stack_score'] = pd.to_numeric(df_concat['Sil_Alpha_Stack_score'], errors='coerce')
# gini_results = calculate_periodic_gini(df_concat, 'Sil_Alpha_Stack_score', 'deffpd30', 'FPD30')

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'Sil_Alpha_Stack_score', 
    'deffpd30', 
    'FPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [474]:
f2=gini_results.copy()

In [475]:
f2.head(10)

Unnamed: 0,start_date,end_date,Sil_Alpha_Stack_score_FPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-08-26,2024-09-01,,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 3
1,2024-09-01,2024-09-30,0.486456,Month,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 3
2,2024-09-02,2024-09-08,0.670683,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 3
3,2024-09-09,2024-09-15,-0.283333,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 3
4,2024-09-16,2024-09-22,0.466667,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 3
5,2024-09-23,2024-09-29,0.982759,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 3
6,2024-09-30,2024-10-06,0.408425,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 3
7,2024-10-01,2024-10-31,0.550186,Month,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 3
8,2024-10-07,2024-10-13,0.790909,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 3
9,2024-10-14,2024-10-20,0.371901,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 3


In [476]:
f20 = f2[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Sil_Alpha_Stack_score_FPD30_gini']].copy()
f20.rename(columns={'Sil_Alpha_Stack_score_FPD30_gini':'FPD30'}, inplace = True)
f20['category'] = 'alpha_stack_model_sil_fpd30_v2_t3'

In [477]:
f20.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD30,category
0,2024-08-26,2024-09-01,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 3,,alpha_stack_model_sil_fpd30_v2_t3
1,2024-09-01,2024-09-30,Month,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 3,0.486456,alpha_stack_model_sil_fpd30_v2_t3
2,2024-09-02,2024-09-08,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 3,0.670683,alpha_stack_model_sil_fpd30_v2_t3
3,2024-09-09,2024-09-15,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 3,-0.283333,alpha_stack_model_sil_fpd30_v2_t3
4,2024-09-16,2024-09-22,Week,Sil_Alpha_Stack_score,v2,FPD30,Overall,Trench 3,0.466667,alpha_stack_model_sil_fpd30_v2_t3


## FSPD30

## Test

In [478]:
sq = """ 
with modelname as 
(
  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fspd_30 = 1
  and trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory


In [479]:
df1 = dfd.copy()

## Train

In [480]:
sq = """ 
  with modelname as 
  (  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fspd_30 = 1
  and trenchCategory = 'Trench 3'
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory
0,2055071,30845fa6-de8f-472f-b534-211b34bf79a0,60820550710024,0.343464,2024-09-23 16:26:54,2024-09-23,2024-09,Train,0,1,SIL-Instore,v2,Trench 3
1,2474730,d2703da9-6afd-454e-98dc-8bc2f3583304,60824747300026,0.141776,2024-09-14 12:03:08,2024-09-14,2024-09,Train,0,1,SIL-Instore,v2,Trench 3
2,2363394,f259696e-6a5c-459d-bc92-13c521cc75a2,60823633940025,0.241045,2024-09-15 11:31:07,2024-09-15,2024-09,Train,0,1,SIL ZERO,v2,Trench 3
3,2235819,db9a8371-d4dd-41e6-adfe-e4bafa8bcb0f,60822358190034,0.106311,2024-09-18 15:59:57,2024-09-18,2024-09,Train,0,1,SIL ZERO,v2,Trench 3
4,2420023,5cde8e3e-a9bb-42c1-968d-3b36563e2e8c,60824200230025,0.091217,2024-09-25 13:36:10,2024-09-25,2024-09,Train,0,1,SIL-Instore,v2,Trench 3


In [481]:
df2 = dfd.copy()

In [482]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7546 entries, 0 to 7545
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             7546 non-null   Int64         
 1   digitalLoanAccountId   7546 non-null   object        
 2   loanAccountNumber      7546 non-null   object        
 3   Sil_Alpha_Stack_score  7546 non-null   float64       
 4   appln_submit_datetime  7546 non-null   datetime64[us]
 5   disbursementdate       7546 non-null   dbdate        
 6   Application_month      7546 non-null   object        
 7   Data_selection         7546 non-null   object        
 8   deffspd30              7546 non-null   Int64         
 9   flg_mature_fspd_30     7546 non-null   Int64         
 10  new_loan_type          7546 non-null   object        
 11  modelVersionId         7546 non-null   object        
 12  trenchCategory         7546 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [483]:
df_concat['Sil_Alpha_Stack_score'] = pd.to_numeric(df_concat['Sil_Alpha_Stack_score'], errors='coerce')
# gini_results = calculate_periodic_gini(df_concat, 'Sil_Alpha_Stack_score', 'deffspd30', 'FSPD30')

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'Sil_Alpha_Stack_score', 
    'deffspd30', 
    'FSPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [484]:
f3=gini_results.copy()

In [485]:
f3.head(10)

Unnamed: 0,start_date,end_date,Sil_Alpha_Stack_score_FSPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-08-26,2024-09-01,0.785714,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 3
1,2024-09-01,2024-09-30,0.482831,Month,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 3
2,2024-09-02,2024-09-08,0.413462,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 3
3,2024-09-09,2024-09-15,0.358974,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 3
4,2024-09-16,2024-09-22,0.400749,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 3
5,2024-09-23,2024-09-29,0.852174,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 3
6,2024-09-30,2024-10-06,0.437908,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 3
7,2024-10-01,2024-10-31,0.506863,Month,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 3
8,2024-10-07,2024-10-13,0.790909,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 3
9,2024-10-14,2024-10-20,0.559322,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 3


In [486]:
f30 = f3[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Sil_Alpha_Stack_score_FSPD30_gini']].copy()
f30.rename(columns={'Sil_Alpha_Stack_score_FSPD30_gini':'FSPD30'}, inplace = True)
f30['category'] = 'alpha_stack_model_sil_fspd30_v2_t3'

In [487]:
f30.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FSPD30,category
0,2024-08-26,2024-09-01,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 3,0.785714,alpha_stack_model_sil_fspd30_v2_t3
1,2024-09-01,2024-09-30,Month,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 3,0.482831,alpha_stack_model_sil_fspd30_v2_t3
2,2024-09-02,2024-09-08,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 3,0.413462,alpha_stack_model_sil_fspd30_v2_t3
3,2024-09-09,2024-09-15,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 3,0.358974,alpha_stack_model_sil_fspd30_v2_t3
4,2024-09-16,2024-09-22,Week,Sil_Alpha_Stack_score,v2,FSPD30,Overall,Trench 3,0.400749,alpha_stack_model_sil_fspd30_v2_t3


## FSTPD30

## Test

In [488]:
sq = """ 
with modelname as 
(
  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fstpd_30 = 1
  and trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()



Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory


In [489]:
df1 = dfd.copy()

## Train

In [490]:
sq = """ 
  with modelname as 
  (  SELECT
    customerId,digitalLoanAccountId,prediction Sil_Alpha_Stack_score,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in  ('Alpha - StackingModel', 'alpha_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Sil_Alpha_Stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
    del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Sil_Alpha_Stack_score is not null
  and del.flg_mature_fstpd_30 = 1
  and trenchCategory = 'Trench 3'
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Sil_Alpha_Stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory
0,2577077,2e128c92-cace-4712-a131-dd5c5c8cb8d2,60825770770028,0.3157,2024-09-13 15:24:48,2024-09-13,2024-09,Train,0,1,SIL-Instore,v2,Trench 3
1,2664139,9c326eb8-08e6-4c8c-91ff-e61bbeaf6a87,60826641390024,0.473754,2024-09-14 11:26:18,2024-09-14,2024-09,Train,0,1,SIL-Instore,v2,Trench 3
2,2418601,169316a3-8920-492a-8377-0de3faa044ec,60824186010024,0.381505,2024-09-20 15:29:08,2024-09-20,2024-09,Train,0,1,SIL-Instore,v2,Trench 3
3,2356400,35954a1b-ac5d-4799-bbaa-74a058bd6098,60823564000023,0.098992,2024-09-29 17:53:07,2024-09-29,2024-09,Train,0,1,SIL-Instore,v2,Trench 3
4,2566973,0be70cc9-ecce-4d7f-b697-4469b7c9398b,60825669730022,0.222947,2024-09-02 14:47:52,2024-09-02,2024-09,Train,0,1,SIL-Instore,v2,Trench 3


In [491]:
df2 = dfd.copy()

In [492]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6981 entries, 0 to 6980
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             6981 non-null   Int64         
 1   digitalLoanAccountId   6981 non-null   object        
 2   loanAccountNumber      6981 non-null   object        
 3   Sil_Alpha_Stack_score  6981 non-null   float64       
 4   appln_submit_datetime  6981 non-null   datetime64[us]
 5   disbursementdate       6981 non-null   dbdate        
 6   Application_month      6981 non-null   object        
 7   Data_selection         6981 non-null   object        
 8   deffstpd30             6981 non-null   Int64         
 9   flg_mature_fstpd_30    6981 non-null   Int64         
 10  new_loan_type          6981 non-null   object        
 11  modelVersionId         6981 non-null   object        
 12  trenchCategory         6981 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [493]:
df_concat['Sil_Alpha_Stack_score'] = pd.to_numeric(df_concat['Sil_Alpha_Stack_score'], errors='coerce')
# gini_results = calculate_periodic_gini(df_concat, 'Sil_Alpha_Stack_score', 'deffstpd30', 'FSTPD30')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'Sil_Alpha_Stack_score', 
    'deffstpd30', 
    'FSTPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [494]:
f4=gini_results.copy()

In [495]:
f4.head(10)

Unnamed: 0,start_date,end_date,Sil_Alpha_Stack_score_FSTPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-08-26,2024-09-01,0.785714,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 3
1,2024-09-01,2024-09-30,0.37593,Month,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 3
2,2024-09-02,2024-09-08,0.302632,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 3
3,2024-09-09,2024-09-15,0.286957,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 3
4,2024-09-16,2024-09-22,0.275194,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 3
5,2024-09-23,2024-09-29,0.582143,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 3
6,2024-09-30,2024-10-06,0.342163,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 3
7,2024-10-01,2024-10-31,0.341516,Month,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 3
8,2024-10-07,2024-10-13,0.393519,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 3
9,2024-10-14,2024-10-20,0.243421,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 3


In [496]:
f40 = f4[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'Sil_Alpha_Stack_score_FSTPD30_gini']].copy()
f40.rename(columns={'Sil_Alpha_Stack_score_FSTPD30_gini':'FSTPD30'}, inplace = True)
f40['category'] = 'alpha_stack_model_sil_fstpd30_v2_t3'

In [497]:
f40.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FSTPD30,category
0,2024-08-26,2024-09-01,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 3,0.785714,alpha_stack_model_sil_fstpd30_v2_t3
1,2024-09-01,2024-09-30,Month,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 3,0.37593,alpha_stack_model_sil_fstpd30_v2_t3
2,2024-09-02,2024-09-08,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 3,0.302632,alpha_stack_model_sil_fstpd30_v2_t3
3,2024-09-09,2024-09-15,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 3,0.286957,alpha_stack_model_sil_fstpd30_v2_t3
4,2024-09-16,2024-09-22,Week,Sil_Alpha_Stack_score,v2,FSTPD30,Overall,Trench 3,0.275194,alpha_stack_model_sil_fstpd30_v2_t3


## combining the dataframe

In [498]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type','trench_category','bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'Sil_Alpha_Stack_score_FPD0_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'trench_category', 'Sil_Alpha_Stack_score_FPD10_gini',
       'Sil_Alpha_Stack_score_FPD30_gini',
       'Sil_Alpha_Stack_score_FSPD30_gini',
       'Sil_Alpha_Stack_score_FSTPD30_gini'], dtype=object)

In [499]:
final_df.rename(columns={'trench_category':'Trench_category', 
'Sil_Alpha_Stack_score_FPD0_gini':'Sil_Alpha_Stack_score_FPD0_v2_t3_gini',
'Sil_Alpha_Stack_score_FPD10_gini':'Sil_Alpha_Stack_score_FPD10_v2_t3_gini',
'Sil_Alpha_Stack_score_FPD30_gini':'Sil_Alpha_Stack_score_FPD30_v2_t3_gini',
'Sil_Alpha_Stack_score_FSPD30_gini':'Sil_Alpha_Stack_score_FSPD30_v2_t3_gini', 
'Sil_Alpha_Stack_score_FSTPD30_gini':'Sil_Alpha_Stack_score_FSTPD30_v2_t3_gini'                         
}, inplace=True)
final_df.columns

Index(['start_date', 'end_date', 'Sil_Alpha_Stack_score_FPD0_v2_t3_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'Trench_category', 'Sil_Alpha_Stack_score_FPD10_v2_t3_gini',
       'Sil_Alpha_Stack_score_FPD30_v2_t3_gini',
       'Sil_Alpha_Stack_score_FSPD30_v2_t3_gini',
       'Sil_Alpha_Stack_score_FSTPD30_v2_t3_gini'],
      dtype='object')

In [500]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'loan_type','bad_rate', 'Trench_category'
,'Sil_Alpha_Stack_score_FPD0_v2_t3_gini',
'Sil_Alpha_Stack_score_FPD10_v2_t3_gini',
       'Sil_Alpha_Stack_score_FPD30_v2_t3_gini',
       'Sil_Alpha_Stack_score_FSPD30_v2_t3_gini',
       'Sil_Alpha_Stack_score_FSTPD30_v2_t3_gini']].copy()
final_df['Model_display_name'] = 'alpha_stack_model_sil'
final_df['Product_type'] = 'SIL'
final_df.dtypes

start_date                                  datetime64[ns]
end_date                                    datetime64[ns]
period                                              object
Model_Name                                          object
version                                             object
loan_type                                           object
bad_rate                                            object
Trench_category                                     object
Sil_Alpha_Stack_score_FPD0_v2_t3_gini              float64
Sil_Alpha_Stack_score_FPD10_v2_t3_gini             float64
Sil_Alpha_Stack_score_FPD30_v2_t3_gini             float64
Sil_Alpha_Stack_score_FSPD30_v2_t3_gini            float64
Sil_Alpha_Stack_score_FSTPD30_v2_t3_gini           float64
Model_display_name                                  object
Product_type                                        object
dtype: object

In [501]:
final_df.head() 

Unnamed: 0,start_date,end_date,period,Model_Name,version,loan_type,bad_rate,Trench_category,Sil_Alpha_Stack_score_FPD0_v2_t3_gini,Sil_Alpha_Stack_score_FPD10_v2_t3_gini,Sil_Alpha_Stack_score_FPD30_v2_t3_gini,Sil_Alpha_Stack_score_FSPD30_v2_t3_gini,Sil_Alpha_Stack_score_FSTPD30_v2_t3_gini,Model_display_name,Product_type
0,2024-08-26,2024-09-01,Week,Sil_Alpha_Stack_score,v2,Overall,FPD0,Trench 3,,,,,,alpha_stack_model_sil,SIL
1,2024-09-01,2024-09-30,Month,Sil_Alpha_Stack_score,v2,Overall,FPD0,Trench 3,0.295499,,,,,alpha_stack_model_sil,SIL
2,2024-09-02,2024-09-08,Week,Sil_Alpha_Stack_score,v2,Overall,FPD0,Trench 3,0.209235,,,,,alpha_stack_model_sil,SIL
3,2024-09-09,2024-09-15,Week,Sil_Alpha_Stack_score,v2,Overall,FPD0,Trench 3,0.388471,,,,,alpha_stack_model_sil,SIL
4,2024-09-16,2024-09-22,Week,Sil_Alpha_Stack_score,v2,Overall,FPD0,Trench 3,-0.01459,,,,,alpha_stack_model_sil,SIL


In [502]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.alpha_stack_model_sil_v2_t3_gini_v1"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=c9e906f3-cafb-41d8-b0b2-5dc033478c6a>

In [503]:
import functools

dataframes = [f01, f10, f20, f30, f40]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type','trench_category', 'bad_rate', 'category']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

alphastacksilv2t3 = functools.reduce(merge_dataframes, dataframes)

In [504]:
alphastacksilv2t3['category'].value_counts(dropna=False)

category
alpha_stack_model_sil_fpd0_v2_t3       283
alpha_stack_model_sil_fpd10_v2_t3      283
alpha_stack_model_sil_fpd30_v2_t3      279
alpha_stack_model_sil_fspd30_v2_t3     259
alpha_stack_model_sil_fstpd30_v2_t3    236
Name: count, dtype: int64

In [505]:
result = pd.concat([cicsilscorev1all, 
                    cicsilscorev2t1, 
                    cicsilscorev2t2, 
                    cicsilscorev2t3, 
                    alphastacksilv1all,
                    alphastacksilv2t1,
                    alphastacksilv2t2,
                    alphastacksilv2t3,
                    ], ignore_index=True)
print(f"The shape of the concatenated dataframe result is:\t {result.shape}")
result.head()

The shape of the concatenated dataframe result is:	 (12730, 14)


Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


In [506]:
result[['Model_Name', 'version', 'category']].value_counts(dropna=False)

Model_Name             version  category                            
Alpha_cic_sil_score    v1       cic_sil_score_fpd0_v1_all               508
Sil_Alpha_Stack_score  v1       alpha_stack_model_sil_fpd0_v1_all       508
                                alpha_stack_model_sil_fpd10_v1_all      496
Alpha_cic_sil_score    v1       cic_sil_score_fpd10_v1_all              496
Sil_Alpha_Stack_score  v1       alpha_stack_model_sil_fpd30_v1_all      488
Alpha_cic_sil_score    v1       cic_sil_score_fpd30_v1_all              488
Sil_Alpha_Stack_score  v1       alpha_stack_model_sil_fspd30_v1_all     468
Alpha_cic_sil_score    v1       cic_sil_score_fspd30_v1_all             468
                                cic_sil_score_fstpd30_v1_all            444
Sil_Alpha_Stack_score  v1       alpha_stack_model_sil_fstpd30_v1_all    444
                       v2       alpha_stack_model_sil_fpd0_v2_t1        286
                                alpha_stack_model_sil_fpd10_v2_t1       286
                   

In [507]:
masterdf = result.copy()
print(f"The shape of masterdf is:\t {masterdf.shape}")
masterdf.head()

The shape of masterdf is:	 (12730, 14)


Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


# Beta SIL STACK Score Model

## V1

## FPD0

## Test

In [508]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v1'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fpd0 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory
0,3476982,d7db9058-2876-41ec-a8e9-0708784ed77f,60834769820011,0.0588782122265403,2025-06-04 10:58:40,2025-06-04,2025-06,Test,0,1,SIL-Instore,v1,ALL
1,3476983,baaec35f-d41e-4009-a0f2-d32b7e5560e1,60834769830012,0.1888923831891509,2025-06-04 11:02:41,2025-06-04,2025-06,Test,0,1,SIL-Instore,v1,ALL
2,3477006,d39f2cb2-7a70-4200-bf99-6399a83ec9ec,60834770060016,0.1210550030032858,2025-06-04 11:04:30,2025-06-04,2025-06,Test,0,1,SIL-Instore,v1,ALL
3,3477050,503cfdad-7222-45d0-93b1-5318367f3eab,60834770500011,0.0956118793732722,2025-06-04 11:21:53,2025-06-04,2025-06,Test,0,1,SIL ZERO,v1,ALL
4,3477063,252fbc6d-2b57-485a-861a-19894ab65e48,60834770630015,0.0622375412237768,2025-06-04 11:31:29,2025-06-04,2025-06,Test,0,1,SIL-Instore,v1,ALL


In [509]:
df1 = dfd.copy()

## Train

In [510]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM  prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v1'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId,
  trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fpd0 = 1
  ;

"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory
0,1906261,aa42d0e6-aca4-4917-a065-8cfdf17ccc0d,60819062610016,0.120782,2023-02-18 14:49:12,2023-02-18,2023-02,Train,0,1,SIL-Instore,v1,ALL
1,2065194,71d1a2d3-d5c0-465e-a923-17a4cc7fd818,60820651940011,0.159329,2023-05-28 14:10:16,2023-05-28,2023-05,Train,1,1,SIL-Instore,v1,ALL
2,1968323,75d352e3-89e5-445d-8b5a-ee769f52c4d8,60819683230011,0.097269,2023-03-30 12:17:45,2023-03-30,2023-03,Train,1,1,SIL-Instore,v1,ALL
3,1891979,07d71b05-eebb-4e3f-b2c9-77a057d91539,60818919790016,0.109295,2023-02-08 16:19:56,2023-02-08,2023-02,Train,0,1,SIL-Instore,v1,ALL
4,1846848,ac41223e-0d42-4569-b21f-789bce021291,60818468480011,0.057879,2023-01-02 10:50:43,2023-01-02,2023-01,Train,0,1,SIL-Instore,v1,ALL


In [511]:
df2 = dfd.copy()

In [512]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 280404 entries, 0 to 280403
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             280404 non-null  object        
 1   digitalLoanAccountId   280404 non-null  object        
 2   loanAccountNumber      280404 non-null  object        
 3   sil_beta_stack_score   280404 non-null  object        
 4   appln_submit_datetime  280404 non-null  datetime64[us]
 5   disbursementdate       280404 non-null  dbdate        
 6   Application_month      280404 non-null  object        
 7   Data_selection         280404 non-null  object        
 8   deffpd0                280404 non-null  Int64         
 9   flg_mature_fpd0        280404 non-null  Int64         
 10  new_loan_type          280404 non-null  object        
 11  modelVersionId         280404 non-null  object        
 12  trenchCategory         280404 non-null  obje

In [513]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_stack_score'] = pd.to_numeric(df_concat['sil_beta_stack_score'], errors='coerce')

In [514]:
# df_concat.to_csv(r"sil_beta_stack_scorefpd0.csv")

In [515]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_stack_score', 'deffpd0', 'FPD0')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_stack_score', 
    'deffpd0', 
    'FPD0',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [516]:
f0 = gini_results.copy()

In [517]:
f0.head(10)

Unnamed: 0,start_date,end_date,sil_beta_stack_score_FPD0_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2023-01-01,2023-01-31,0.207885,Month,sil_beta_stack_score,v1,FPD0,Overall,ALL
1,2023-01-02,2023-01-08,0.330022,Week,sil_beta_stack_score,v1,FPD0,Overall,ALL
2,2023-01-09,2023-01-15,0.260013,Week,sil_beta_stack_score,v1,FPD0,Overall,ALL
3,2023-01-16,2023-01-22,0.234899,Week,sil_beta_stack_score,v1,FPD0,Overall,ALL
4,2023-01-23,2023-01-29,0.045143,Week,sil_beta_stack_score,v1,FPD0,Overall,ALL
5,2023-01-30,2023-02-05,0.348225,Week,sil_beta_stack_score,v1,FPD0,Overall,ALL
6,2023-02-01,2023-02-28,0.28873,Month,sil_beta_stack_score,v1,FPD0,Overall,ALL
7,2023-02-06,2023-02-12,0.316416,Week,sil_beta_stack_score,v1,FPD0,Overall,ALL
8,2023-02-13,2023-02-19,0.318935,Week,sil_beta_stack_score,v1,FPD0,Overall,ALL
9,2023-02-20,2023-02-26,0.257971,Week,sil_beta_stack_score,v1,FPD0,Overall,ALL


In [518]:
f01 = f0[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_stack_score_FPD0_gini']].copy()
f01.rename(columns={'sil_beta_stack_score_FPD0_gini':'FPD0'}, inplace = True)
f01['category'] = 'beta_stack_model_sil_fpd0_v1_all'

In [519]:
f0.groupby(['loan_type', 'version', 'trench_category']).size()

loan_type       version  trench_category
Overall         v1       ALL                184
SIL Competitor  v1       ALL                 51
SIL Repeat      v1       ALL                 14
SIL ZERO        v1       ALL                 83
SIL-Instore     v1       ALL                184
dtype: int64

In [520]:
f01.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category
0,2023-01-01,2023-01-31,Month,sil_beta_stack_score,v1,FPD0,Overall,ALL,0.207885,beta_stack_model_sil_fpd0_v1_all
1,2023-01-02,2023-01-08,Week,sil_beta_stack_score,v1,FPD0,Overall,ALL,0.330022,beta_stack_model_sil_fpd0_v1_all
2,2023-01-09,2023-01-15,Week,sil_beta_stack_score,v1,FPD0,Overall,ALL,0.260013,beta_stack_model_sil_fpd0_v1_all
3,2023-01-16,2023-01-22,Week,sil_beta_stack_score,v1,FPD0,Overall,ALL,0.234899,beta_stack_model_sil_fpd0_v1_all
4,2023-01-23,2023-01-29,Week,sil_beta_stack_score,v1,FPD0,Overall,ALL,0.045143,beta_stack_model_sil_fpd0_v1_all


## FPD10

## Test

In [521]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v1'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
    modelVersionId,
  trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fpd10 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory
0,2989461,bad4a268-8745-4a69-9d11-f68f8d3db0a2,60829894610011,0.0581568024427897,2025-04-01 09:37:47,2025-04-01,2025-04,Test,0,1,SIL-Instore,v1,ALL
1,3352360,b919beac-1a43-4a44-a896-51565863db3d,60833523600015,0.0899882088913893,2025-04-01 08:49:31,2025-04-01,2025-04,Test,0,1,SIL-Instore,v1,ALL
2,3352367,37c95d5e-8d01-4967-8069-dee129384854,60833523670012,0.0613390568526263,2025-04-01 08:55:22,2025-04-01,2025-04,Test,0,1,SIL-Instore,v1,ALL
3,3352416,7a79cb59-c29f-42fa-b11a-779cb376f381,60833524160014,0.1140411734398674,2025-04-01 09:35:56,2025-04-01,2025-04,Test,0,1,SIL-Instore,v1,ALL
4,3352463,b340f7b0-3491-49a8-b0b6-8189d7750f97,60833524630011,0.093775966002715,2025-04-01 09:47:54,2025-04-01,2025-04,Test,0,1,SIL-Instore,v1,ALL


In [522]:
df1 = dfd.copy()

## Train

In [523]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM  prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v1'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  modelVersionId,
  trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fpd10 = 1
  ;

"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory
0,2097290,51c33b30-591e-4e5e-b0fc-540d3c1aa151,60820972900013,0.02859,2023-06-17 15:54:16,2023-06-17,2023-06,Train,1,1,SIL-Instore,v1,ALL
1,1974309,38d6101f-dd27-494f-b3e3-24e04c0956ef,60819743090012,0.125401,2023-04-02 17:10:22,2023-04-02,2023-04,Train,0,1,SIL-Instore,v1,ALL
2,1863509,f713850a-1344-4783-83f5-73af3fd0ecaa,60818635090013,0.083476,2023-01-16 14:05:42,2023-01-16,2023-01,Train,0,1,SIL-Instore,v1,ALL
3,1863820,99f48e3e-3bb3-4a1b-986f-5c13e0b1401f,60818638200017,0.027621,2023-01-16 18:05:17,2023-01-16,2023-01,Train,0,1,SIL-Instore,v1,ALL
4,2048685,25f2609f-5e8d-4dd0-96a1-fdadcbeccfde,60820486850017,0.096803,2023-05-17 14:43:28,2023-05-17,2023-05,Train,0,1,SIL-Instore,v1,ALL


In [524]:
df2 = dfd.copy()

In [525]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 273188 entries, 0 to 273187
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             273188 non-null  object        
 1   digitalLoanAccountId   273188 non-null  object        
 2   loanAccountNumber      273188 non-null  object        
 3   sil_beta_stack_score   273188 non-null  object        
 4   appln_submit_datetime  273188 non-null  datetime64[us]
 5   disbursementdate       273188 non-null  dbdate        
 6   Application_month      273188 non-null  object        
 7   Data_selection         273188 non-null  object        
 8   deffpd10               273188 non-null  Int64         
 9   flg_mature_fpd10       273188 non-null  Int64         
 10  new_loan_type          273188 non-null  object        
 11  modelVersionId         273188 non-null  object        
 12  trenchCategory         273188 non-null  obje

In [526]:
df_concat['sil_beta_stack_score'] = pd.to_numeric(df_concat['sil_beta_stack_score'], errors='coerce')
df_concat['Application_month'].value_counts(dropna=False).sort_index(ascending = False)

Application_month
2025-11        1
2025-10    11155
2025-09    13007
2025-08    14729
2025-07    15211
2025-06    11176
2025-05      961
2025-04      641
2025-03     4513
2025-02    12185
2025-01    13753
2024-12    29802
2024-11    15175
2024-10    14120
2024-09    15010
2024-08    15076
2024-07    12114
2024-06    12013
2024-05    10173
2024-04     9563
2024-03     5564
2024-02     3769
2024-01     4660
2023-12     9545
2023-11     4551
2023-10     4061
2023-09     3064
2023-08     2159
2023-07     1243
2023-06      804
2023-05      787
2023-04      727
2023-03      633
2023-02      534
2023-01      709
Name: count, dtype: int64

In [527]:
# df_concat.to_csv(r"sil_beta_stack_scorefpd10.csv")

In [528]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_stack_score', 'deffpd10', 'FPD10')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_stack_score', 
    'deffpd10', 
    'FPD10',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [529]:
f1 = gini_results.copy()

In [530]:
f1.head(10)

Unnamed: 0,start_date,end_date,sil_beta_stack_score_FPD10_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2023-01-01,2023-01-31,0.295238,Month,sil_beta_stack_score,v1,FPD10,Overall,ALL
1,2023-01-02,2023-01-08,0.390082,Week,sil_beta_stack_score,v1,FPD10,Overall,ALL
2,2023-01-09,2023-01-15,0.302997,Week,sil_beta_stack_score,v1,FPD10,Overall,ALL
3,2023-01-16,2023-01-22,0.475325,Week,sil_beta_stack_score,v1,FPD10,Overall,ALL
4,2023-01-23,2023-01-29,0.189446,Week,sil_beta_stack_score,v1,FPD10,Overall,ALL
5,2023-01-30,2023-02-05,0.30819,Week,sil_beta_stack_score,v1,FPD10,Overall,ALL
6,2023-02-01,2023-02-28,0.269499,Month,sil_beta_stack_score,v1,FPD10,Overall,ALL
7,2023-02-06,2023-02-12,0.338139,Week,sil_beta_stack_score,v1,FPD10,Overall,ALL
8,2023-02-13,2023-02-19,0.272636,Week,sil_beta_stack_score,v1,FPD10,Overall,ALL
9,2023-02-20,2023-02-26,0.201709,Week,sil_beta_stack_score,v1,FPD10,Overall,ALL


In [531]:
f10 = f1[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_stack_score_FPD10_gini']].copy()
f10.rename(columns={'sil_beta_stack_score_FPD10_gini':'FPD10'}, inplace = True)
f10['category'] = 'beta_stack_model_sil_fpd10_v1_all'

In [532]:
f10.groupby(['loan_type', 'version', 'trench_category']).size()

loan_type       version  trench_category
Overall         v1       ALL                181
SIL Competitor  v1       ALL                 48
SIL Repeat      v1       ALL                 14
SIL ZERO        v1       ALL                 80
SIL-Instore     v1       ALL                181
dtype: int64

## FPD30

## Test

In [533]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v1'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId,
  trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fpd30 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory
0,2989461,bad4a268-8745-4a69-9d11-f68f8d3db0a2,60829894610011,0.0581568024427897,2025-04-01 09:37:47,2025-04-01,2025-04,Test,0,1,SIL-Instore,v1,ALL
1,3352360,b919beac-1a43-4a44-a896-51565863db3d,60833523600015,0.0899882088913893,2025-04-01 08:49:31,2025-04-01,2025-04,Test,0,1,SIL-Instore,v1,ALL
2,3352367,37c95d5e-8d01-4967-8069-dee129384854,60833523670012,0.0613390568526263,2025-04-01 08:55:22,2025-04-01,2025-04,Test,0,1,SIL-Instore,v1,ALL
3,3352416,7a79cb59-c29f-42fa-b11a-779cb376f381,60833524160014,0.1140411734398674,2025-04-01 09:35:56,2025-04-01,2025-04,Test,0,1,SIL-Instore,v1,ALL
4,3352463,b340f7b0-3491-49a8-b0b6-8189d7750f97,60833524630011,0.093775966002715,2025-04-01 09:47:54,2025-04-01,2025-04,Test,0,1,SIL-Instore,v1,ALL


In [534]:
df1 = dfd.copy()

## Train

In [535]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM  prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v1'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId,
  trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fpd30 = 1
  ;

"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory
0,1861394,1854e131-211c-4197-b47a-c5a5ebc9a75f,60818613940012,0.036134,2023-01-14 13:32:29,2023-01-14,2023-01,Train,0,1,SIL-Instore,v1,ALL
1,1937329,a313c40b-6d2e-4f8d-82b2-0ae52857e51b,60819373290018,0.083061,2023-03-10 14:56:46,2023-03-10,2023-03,Train,0,1,SIL-Instore,v1,ALL
2,2094239,b5e80c5f-e1c4-4d1c-974e-ef574733780b,60820942390018,0.066222,2023-06-15 14:47:36,2023-06-15,2023-06,Train,0,1,SIL-Instore,v1,ALL
3,1984214,f1fb9407-c3a0-4a9c-bdb0-a5c3af157f79,60819842140011,0.067712,2023-04-08 17:50:41,2023-04-08,2023-04,Train,0,1,SIL-Instore,v1,ALL
4,2106205,559cca28-a32f-4f43-a8b7-9f3ac088a2d2,60821062050017,0.017755,2023-06-24 14:37:04,2023-06-24,2023-06,Train,0,1,SIL-Instore,v1,ALL


In [536]:
df2 = dfd.copy()

In [537]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 264110 entries, 0 to 264109
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             264110 non-null  object        
 1   digitalLoanAccountId   264110 non-null  object        
 2   loanAccountNumber      264110 non-null  object        
 3   sil_beta_stack_score   264110 non-null  object        
 4   appln_submit_datetime  264110 non-null  datetime64[us]
 5   disbursementdate       264110 non-null  dbdate        
 6   Application_month      264110 non-null  object        
 7   Data_selection         264110 non-null  object        
 8   deffpd30               264110 non-null  Int64         
 9   flg_mature_fpd30       264110 non-null  Int64         
 10  new_loan_type          264110 non-null  object        
 11  modelVersionId         264110 non-null  object        
 12  trenchCategory         264110 non-null  obje

In [538]:
df_concat['sil_beta_stack_score'] = pd.to_numeric(df_concat['sil_beta_stack_score'], errors='coerce')
df_concat['Application_month'].value_counts(dropna=False).sort_index(ascending = False)

Application_month
2025-11        1
2025-10     2090
2025-09    12994
2025-08    14729
2025-07    15211
2025-06    11176
2025-05      961
2025-04      641
2025-03     4513
2025-02    12185
2025-01    13753
2024-12    29802
2024-11    15175
2024-10    14120
2024-09    15010
2024-08    15076
2024-07    12114
2024-06    12013
2024-05    10173
2024-04     9563
2024-03     5564
2024-02     3769
2024-01     4660
2023-12     9545
2023-11     4551
2023-10     4061
2023-09     3064
2023-08     2159
2023-07     1243
2023-06      804
2023-05      787
2023-04      727
2023-03      633
2023-02      534
2023-01      709
Name: count, dtype: int64

In [539]:
# df_concat.to_csv(r"sil_beta_stack_scorefpd30.csv")

In [540]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_stack_score', 'deffpd30', 'FPD30')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_stack_score', 
    'deffpd30', 
    'FPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [541]:
f2 = gini_results.copy()

In [542]:
f2.head(10)

Unnamed: 0,start_date,end_date,sil_beta_stack_score_FPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2023-01-01,2023-01-31,0.353518,Month,sil_beta_stack_score,v1,FPD30,Overall,ALL
1,2023-01-02,2023-01-08,0.418182,Week,sil_beta_stack_score,v1,FPD30,Overall,ALL
2,2023-01-09,2023-01-15,0.563636,Week,sil_beta_stack_score,v1,FPD30,Overall,ALL
3,2023-01-16,2023-01-22,0.475325,Week,sil_beta_stack_score,v1,FPD30,Overall,ALL
4,2023-01-23,2023-01-29,0.133212,Week,sil_beta_stack_score,v1,FPD30,Overall,ALL
5,2023-01-30,2023-02-05,0.349288,Week,sil_beta_stack_score,v1,FPD30,Overall,ALL
6,2023-02-01,2023-02-28,0.284919,Month,sil_beta_stack_score,v1,FPD30,Overall,ALL
7,2023-02-06,2023-02-12,0.282517,Week,sil_beta_stack_score,v1,FPD30,Overall,ALL
8,2023-02-13,2023-02-19,0.288865,Week,sil_beta_stack_score,v1,FPD30,Overall,ALL
9,2023-02-20,2023-02-26,0.332061,Week,sil_beta_stack_score,v1,FPD30,Overall,ALL


In [543]:
f20 = f2[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_stack_score_FPD30_gini']].copy()
f20.rename(columns={'sil_beta_stack_score_FPD30_gini':'FPD30'}, inplace = True)
f20['category'] = 'beta_stack_model_sil_fpd30_v1_all'
f20.groupby(['loan_type', 'version', 'trench_category', 'category']).size()

loan_type       version  trench_category  category                         
Overall         v1       ALL              beta_stack_model_sil_fpd30_v1_all    179
SIL Competitor  v1       ALL              beta_stack_model_sil_fpd30_v1_all     46
SIL Repeat      v1       ALL              beta_stack_model_sil_fpd30_v1_all     14
SIL ZERO        v1       ALL              beta_stack_model_sil_fpd30_v1_all     78
SIL-Instore     v1       ALL              beta_stack_model_sil_fpd30_v1_all    179
dtype: int64

In [544]:
f20.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD30,category
0,2023-01-01,2023-01-31,Month,sil_beta_stack_score,v1,FPD30,Overall,ALL,0.353518,beta_stack_model_sil_fpd30_v1_all
1,2023-01-02,2023-01-08,Week,sil_beta_stack_score,v1,FPD30,Overall,ALL,0.418182,beta_stack_model_sil_fpd30_v1_all
2,2023-01-09,2023-01-15,Week,sil_beta_stack_score,v1,FPD30,Overall,ALL,0.563636,beta_stack_model_sil_fpd30_v1_all
3,2023-01-16,2023-01-22,Week,sil_beta_stack_score,v1,FPD30,Overall,ALL,0.475325,beta_stack_model_sil_fpd30_v1_all
4,2023-01-23,2023-01-29,Week,sil_beta_stack_score,v1,FPD30,Overall,ALL,0.133212,beta_stack_model_sil_fpd30_v1_all


## FSPD30

## Test

In [545]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
    when trenchCategory = '' then 'ALL'
    else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v1'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  modelVersionId,
  trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fspd_30 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory
0,3316303,95418f08-9591-4687-b7af-09e732b6d459,60833163030012,0.0729578867866871,2025-09-05 14:55:21,2025-09-05,2025-09,Test,0,1,SIL Competitor,v1,ALL
1,3664338,a5872ed1-a008-45d6-b6e6-43d70c488255,60836643380018,0.0480576546551213,2025-09-04 19:38:34,2025-09-04,2025-09,Test,0,1,SIL ZERO,v1,ALL
2,3667455,03699ad5-dec4-4971-8820-858ebbf9bfa5,60836674550014,0.080972389690661,2025-09-06 12:36:43,2025-09-06,2025-09,Test,0,1,SIL-Instore,v1,ALL
3,3665873,3d1f7f4e-b17b-414c-acfe-ffcb265ee6f2,60836658730011,0.0371792452596862,2025-09-05 15:36:50,2025-09-05,2025-09,Test,0,1,SIL-Instore,v1,ALL
4,3664029,f0d83587-2353-4d64-a721-89e88481bfc6,60836640290018,0.0497725014376084,2025-09-04 17:11:21,2025-09-04,2025-09,Test,0,1,SIL ZERO,v1,ALL


In [546]:
df1 = dfd.copy()

## Train

In [547]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
        when trenchCategory = '' then 'ALL'
        else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM  prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v1'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  modelVersionId,
  trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fspd_30 = 1
  ;

"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory
0,2077360,7d6f9c92-5e10-4093-bbf6-cdf641f806f9,60820773600011,0.075193,2023-06-04 17:09:37,2023-06-04,2023-06,Train,0,1,SIL-Instore,v1,ALL
1,2053611,2c31d42f-3e31-43d5-9a68-545b8d5ea1db,60820536110014,0.087423,2023-05-20 19:08:18,2023-05-20,2023-05,Train,0,1,SIL-Instore,v1,ALL
2,2089040,013130ee-781d-46b2-873c-82eb73434731,60820890400011,0.187408,2023-06-13 15:23:47,2023-06-13,2023-06,Train,1,1,SIL-Instore,v1,ALL
3,1940231,8aead027-23d5-4d1c-ad7f-bf9cbe4f72c0,60819402310016,0.046236,2023-03-12 16:38:50,2023-03-12,2023-03,Train,0,1,SIL-Instore,v1,ALL
4,1862319,81a49477-b05b-462c-be3b-00ac3549a2ad,60818623190015,0.095997,2023-01-15 12:13:43,2023-01-15,2023-01,Train,0,1,SIL-Instore,v1,ALL


In [548]:
df2 = dfd.copy()

In [549]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 250819 entries, 0 to 250818
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             250819 non-null  object        
 1   digitalLoanAccountId   250819 non-null  object        
 2   loanAccountNumber      250819 non-null  object        
 3   sil_beta_stack_score   250819 non-null  object        
 4   appln_submit_datetime  250819 non-null  datetime64[us]
 5   disbursementdate       250819 non-null  dbdate        
 6   Application_month      250819 non-null  object        
 7   Data_selection         250819 non-null  object        
 8   deffspd30              250819 non-null  Int64         
 9   flg_mature_fspd_30     250819 non-null  Int64         
 10  new_loan_type          250819 non-null  object        
 11  modelVersionId         250819 non-null  object        
 12  trenchCategory         250819 non-null  obje

In [550]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_stack_score'] = pd.to_numeric(df_concat['sil_beta_stack_score'], errors='coerce')

In [551]:
# df_concat.to_csv(r"sil_beta_stack_scorefspd30.csv")

In [552]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_stack_score', 'deffspd30', 'FSPD30')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_stack_score', 
    'deffspd30', 
    'FSPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [553]:
f3 = gini_results.copy()

In [554]:
f3.head(10)

Unnamed: 0,start_date,end_date,sil_beta_stack_score_FSPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2023-01-01,2023-01-31,0.310982,Month,sil_beta_stack_score,v1,FSPD30,Overall,ALL
1,2023-01-02,2023-01-08,0.309075,Week,sil_beta_stack_score,v1,FSPD30,Overall,ALL
2,2023-01-09,2023-01-15,0.405128,Week,sil_beta_stack_score,v1,FSPD30,Overall,ALL
3,2023-01-16,2023-01-22,0.485906,Week,sil_beta_stack_score,v1,FSPD30,Overall,ALL
4,2023-01-23,2023-01-29,0.2,Week,sil_beta_stack_score,v1,FSPD30,Overall,ALL
5,2023-01-30,2023-02-05,0.309917,Week,sil_beta_stack_score,v1,FSPD30,Overall,ALL
6,2023-02-01,2023-02-28,0.300713,Month,sil_beta_stack_score,v1,FSPD30,Overall,ALL
7,2023-02-06,2023-02-12,0.366957,Week,sil_beta_stack_score,v1,FSPD30,Overall,ALL
8,2023-02-13,2023-02-19,0.311029,Week,sil_beta_stack_score,v1,FSPD30,Overall,ALL
9,2023-02-20,2023-02-26,0.225143,Week,sil_beta_stack_score,v1,FSPD30,Overall,ALL


In [555]:
f30 = f3[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_stack_score_FSPD30_gini']].copy()
f30.rename(columns={'sil_beta_stack_score_FSPD30_gini':'FSPD30'}, inplace = True)
f30['category'] = 'beta_stack_model_sil_fspd30_v1_all'
f30.groupby(['loan_type', 'version', 'trench_category']).size()

loan_type       version  trench_category
Overall         v1       ALL                174
SIL Competitor  v1       ALL                 41
SIL Repeat      v1       ALL                 14
SIL ZERO        v1       ALL                 73
SIL-Instore     v1       ALL                174
dtype: int64

In [556]:
f30.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FSPD30,category
0,2023-01-01,2023-01-31,Month,sil_beta_stack_score,v1,FSPD30,Overall,ALL,0.310982,beta_stack_model_sil_fspd30_v1_all
1,2023-01-02,2023-01-08,Week,sil_beta_stack_score,v1,FSPD30,Overall,ALL,0.309075,beta_stack_model_sil_fspd30_v1_all
2,2023-01-09,2023-01-15,Week,sil_beta_stack_score,v1,FSPD30,Overall,ALL,0.405128,beta_stack_model_sil_fspd30_v1_all
3,2023-01-16,2023-01-22,Week,sil_beta_stack_score,v1,FSPD30,Overall,ALL,0.485906,beta_stack_model_sil_fspd30_v1_all
4,2023-01-23,2023-01-29,Week,sil_beta_stack_score,v1,FSPD30,Overall,ALL,0.2,beta_stack_model_sil_fspd30_v1_all


## FSTPD30

## Test

In [557]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
        when trenchCategory = '' then 'ALL'
        else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v1'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
    loanmaster.new_loan_type,
    modelVersionId,
  trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fstpd_30 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory
0,3606879,c5b823b4-a2b9-4a3a-836f-cbffbba5bcfd,60836068790011,0.0448243631570753,2025-08-08 11:20:04,2025-08-08,2025-08,Test,0,1,SIL-Instore,v1,ALL
1,3603572,c62ab930-a2ec-48a1-be00-44f6e56d106c,60836035720012,0.054308379755642,2025-08-06 17:01:13,2025-08-06,2025-08,Test,0,1,SIL Competitor,v1,ALL
2,3603221,67c928fa-e469-4471-b6fa-0ccfc459d6b5,60836032210012,0.0827362045698328,2025-08-06 15:08:17,2025-08-06,2025-08,Test,0,1,SIL-Instore,v1,ALL
3,3452144,e5ced157-e148-4f14-8843-9e9a78bd3455,60834521440015,0.0586241028363989,2025-05-22 19:37:58,2025-05-22,2025-05,Test,0,1,SIL-Instore,v1,ALL
4,3381741,64dc0d73-2ee4-402f-a011-75b2a711d917,60833817410015,0.2030394711375377,2025-04-15 18:26:12,2025-04-15,2025-04,Test,0,1,SIL-Instore,v1,ALL


In [558]:
df1 = dfd.copy()

## Train

In [559]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
        when trenchCategory = '' then 'ALL'
        else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM  prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v1'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
    loanmaster.new_loan_type,
    modelVersionId,
  trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fstpd_30 = 1
  ;

"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory
0,1906261,aa42d0e6-aca4-4917-a065-8cfdf17ccc0d,60819062610016,0.120782,2023-02-18 14:49:12,2023-02-18,2023-02,Train,0,1,SIL-Instore,v1,ALL
1,2065194,71d1a2d3-d5c0-465e-a923-17a4cc7fd818,60820651940011,0.159329,2023-05-28 14:10:16,2023-05-28,2023-05,Train,0,1,SIL-Instore,v1,ALL
2,1968323,75d352e3-89e5-445d-8b5a-ee769f52c4d8,60819683230011,0.097269,2023-03-30 12:17:45,2023-03-30,2023-03,Train,1,1,SIL-Instore,v1,ALL
3,1891979,07d71b05-eebb-4e3f-b2c9-77a057d91539,60818919790016,0.109295,2023-02-08 16:19:56,2023-02-08,2023-02,Train,0,1,SIL-Instore,v1,ALL
4,1846848,ac41223e-0d42-4569-b21f-789bce021291,60818468480011,0.057879,2023-01-02 10:50:43,2023-01-02,2023-01,Train,0,1,SIL-Instore,v1,ALL


In [560]:
df2 = dfd.copy()

In [561]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 236609 entries, 0 to 236608
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             236609 non-null  object        
 1   digitalLoanAccountId   236609 non-null  object        
 2   loanAccountNumber      236609 non-null  object        
 3   sil_beta_stack_score   236609 non-null  object        
 4   appln_submit_datetime  236609 non-null  datetime64[us]
 5   disbursementdate       236609 non-null  dbdate        
 6   Application_month      236609 non-null  object        
 7   Data_selection         236609 non-null  object        
 8   deffstpd30             236609 non-null  Int64         
 9   flg_mature_fstpd_30    236609 non-null  Int64         
 10  new_loan_type          236609 non-null  object        
 11  modelVersionId         236609 non-null  object        
 12  trenchCategory         236609 non-null  obje

In [562]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_stack_score'] = pd.to_numeric(df_concat['sil_beta_stack_score'], errors='coerce')

In [563]:
# df_concat.to_csv(r"sil_beta_stack_scorefstpd30.csv")

In [564]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_stack_score', 'deffstpd30', 'FSTPD30')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_stack_score', 
    'deffstpd30', 
    'FSTPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [565]:
f4 = gini_results.copy()

In [566]:
f4.head(10)

Unnamed: 0,start_date,end_date,sil_beta_stack_score_FSTPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2023-01-01,2023-01-31,0.317218,Month,sil_beta_stack_score,v1,FSTPD30,Overall,ALL
1,2023-01-02,2023-01-08,0.304604,Week,sil_beta_stack_score,v1,FSTPD30,Overall,ALL
2,2023-01-09,2023-01-15,0.367139,Week,sil_beta_stack_score,v1,FSTPD30,Overall,ALL
3,2023-01-16,2023-01-22,0.373654,Week,sil_beta_stack_score,v1,FSTPD30,Overall,ALL
4,2023-01-23,2023-01-29,0.292769,Week,sil_beta_stack_score,v1,FSTPD30,Overall,ALL
5,2023-01-30,2023-02-05,0.362257,Week,sil_beta_stack_score,v1,FSTPD30,Overall,ALL
6,2023-02-01,2023-02-28,0.325214,Month,sil_beta_stack_score,v1,FSTPD30,Overall,ALL
7,2023-02-06,2023-02-12,0.43507,Week,sil_beta_stack_score,v1,FSTPD30,Overall,ALL
8,2023-02-13,2023-02-19,0.326577,Week,sil_beta_stack_score,v1,FSTPD30,Overall,ALL
9,2023-02-20,2023-02-26,0.167126,Week,sil_beta_stack_score,v1,FSTPD30,Overall,ALL


In [567]:
f40 = f4[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_stack_score_FSTPD30_gini']].copy()
f40.rename(columns={'sil_beta_stack_score_FSTPD30_gini':'FSTPD30'}, inplace = True)
f40['category'] = 'beta_stack_model_sil_fstpd30_v1_all'
f40.groupby(['loan_type', 'version', 'trench_category']).size()

loan_type       version  trench_category
Overall         v1       ALL                168
SIL Competitor  v1       ALL                 35
SIL Repeat      v1       ALL                 14
SIL ZERO        v1       ALL                 67
SIL-Instore     v1       ALL                168
dtype: int64

In [568]:
f40.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FSTPD30,category
0,2023-01-01,2023-01-31,Month,sil_beta_stack_score,v1,FSTPD30,Overall,ALL,0.317218,beta_stack_model_sil_fstpd30_v1_all
1,2023-01-02,2023-01-08,Week,sil_beta_stack_score,v1,FSTPD30,Overall,ALL,0.304604,beta_stack_model_sil_fstpd30_v1_all
2,2023-01-09,2023-01-15,Week,sil_beta_stack_score,v1,FSTPD30,Overall,ALL,0.367139,beta_stack_model_sil_fstpd30_v1_all
3,2023-01-16,2023-01-22,Week,sil_beta_stack_score,v1,FSTPD30,Overall,ALL,0.373654,beta_stack_model_sil_fstpd30_v1_all
4,2023-01-23,2023-01-29,Week,sil_beta_stack_score,v1,FSTPD30,Overall,ALL,0.292769,beta_stack_model_sil_fstpd30_v1_all


## combining the dataframe

In [569]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type','trench_category','bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'sil_beta_stack_score_FPD0_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'trench_category', 'sil_beta_stack_score_FPD10_gini',
       'sil_beta_stack_score_FPD30_gini',
       'sil_beta_stack_score_FSPD30_gini',
       'sil_beta_stack_score_FSTPD30_gini'], dtype=object)

In [570]:
final_df.rename(columns={'trench_category':'Trench_category',
'sil_beta_stack_score_FPD0_gini':'sil_beta_stack_score_FPD0_v1_all_gini',
'sil_beta_stack_score_FPD10_gini':'sil_beta_stack_score_FPD10_v1_all_gini',
'sil_beta_stack_score_FPD30_gini':'sil_beta_stack_score_FPD30_v1_all_gini',
'sil_beta_stack_score_FSPD30_gini':'sil_beta_stack_score_FSPD30_v1_all_gini', 
'sil_beta_stack_score_FSTPD30_gini':'sil_beta_stack_score_FSTPD30_v1_all_gini'
}, inplace=True)
final_df.columns

Index(['start_date', 'end_date', 'sil_beta_stack_score_FPD0_v1_all_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'Trench_category', 'sil_beta_stack_score_FPD10_v1_all_gini',
       'sil_beta_stack_score_FPD30_v1_all_gini',
       'sil_beta_stack_score_FSPD30_v1_all_gini',
       'sil_beta_stack_score_FSTPD30_v1_all_gini'],
      dtype='object')

In [571]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'loan_type','bad_rate'
                        ,'sil_beta_stack_score_FPD0_v1_all_gini',
                        'sil_beta_stack_score_FPD10_v1_all_gini',
                        'sil_beta_stack_score_FPD30_v1_all_gini',
                        'sil_beta_stack_score_FSPD30_v1_all_gini',
                        'sil_beta_stack_score_FSTPD30_v1_all_gini'
                    ]].copy()
final_df['Trench_category'] = 'All'
final_df['Model_display_name'] = 'beta_stack_model_sil'
final_df['Product_type'] = 'SIL'
final_df.dtypes

start_date                                  datetime64[ns]
end_date                                    datetime64[ns]
period                                              object
Model_Name                                          object
version                                             object
loan_type                                           object
bad_rate                                            object
sil_beta_stack_score_FPD0_v1_all_gini              float64
sil_beta_stack_score_FPD10_v1_all_gini             float64
sil_beta_stack_score_FPD30_v1_all_gini             float64
sil_beta_stack_score_FSPD30_v1_all_gini            float64
sil_beta_stack_score_FSTPD30_v1_all_gini           float64
Trench_category                                     object
Model_display_name                                  object
Product_type                                        object
dtype: object

In [572]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,loan_type,bad_rate,sil_beta_stack_score_FPD0_v1_all_gini,sil_beta_stack_score_FPD10_v1_all_gini,sil_beta_stack_score_FPD30_v1_all_gini,sil_beta_stack_score_FSPD30_v1_all_gini,sil_beta_stack_score_FSTPD30_v1_all_gini,Trench_category,Model_display_name,Product_type
0,2023-01-01,2023-01-31,Month,sil_beta_stack_score,v1,Overall,FPD0,0.207885,,,,,All,beta_stack_model_sil,SIL
1,2023-01-02,2023-01-08,Week,sil_beta_stack_score,v1,Overall,FPD0,0.330022,,,,,All,beta_stack_model_sil,SIL
2,2023-01-09,2023-01-15,Week,sil_beta_stack_score,v1,Overall,FPD0,0.260013,,,,,All,beta_stack_model_sil,SIL
3,2023-01-16,2023-01-22,Week,sil_beta_stack_score,v1,Overall,FPD0,0.234899,,,,,All,beta_stack_model_sil,SIL
4,2023-01-23,2023-01-29,Week,sil_beta_stack_score,v1,Overall,FPD0,0.045143,,,,,All,beta_stack_model_sil,SIL


In [573]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.beta_stack_model_sil_v1_all_gini_v1"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=39aabde0-1501-47da-acb0-84bed478c591>

In [574]:
import functools

dataframes = [f01, f10, f20, f30, f40]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type','trench_category', 'bad_rate', 'category']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

betastacksilv1all = functools.reduce(merge_dataframes, dataframes)

In [575]:
betastacksilv1all['category'].value_counts(dropna=False)

category
beta_stack_model_sil_fpd0_v1_all       516
beta_stack_model_sil_fpd10_v1_all      504
beta_stack_model_sil_fpd30_v1_all      496
beta_stack_model_sil_fspd30_v1_all     476
beta_stack_model_sil_fstpd30_v1_all    452
Name: count, dtype: int64

In [576]:
result = pd.concat([cicsilscorev1all, 
                    cicsilscorev2t1, 
                    cicsilscorev2t2, 
                    cicsilscorev2t3, 
                    alphastacksilv1all,
                    alphastacksilv2t1,
                    alphastacksilv2t2,
                    alphastacksilv2t3,
                    betastacksilv1all,
                    ], ignore_index=True)
print(f"The shape of the concatenated dataframe result is:\t {result.shape}")
result.head()

The shape of the concatenated dataframe result is:	 (15174, 14)


Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


In [577]:
result[['Model_Name', 'version', 'category']].value_counts(dropna=False).sort_index(ascending = False)

Model_Name             version  category                            
sil_beta_stack_score   v1       beta_stack_model_sil_fstpd30_v1_all     452
                                beta_stack_model_sil_fspd30_v1_all      476
                                beta_stack_model_sil_fpd30_v1_all       496
                                beta_stack_model_sil_fpd10_v1_all       504
                                beta_stack_model_sil_fpd0_v1_all        516
Sil_Alpha_Stack_score  v2       alpha_stack_model_sil_fstpd30_v2_t3     236
                                alpha_stack_model_sil_fstpd30_v2_t2     230
                                alpha_stack_model_sil_fstpd30_v2_t1     236
                                alpha_stack_model_sil_fspd30_v2_t3      259
                                alpha_stack_model_sil_fspd30_v2_t2      250
                                alpha_stack_model_sil_fspd30_v2_t1      260
                                alpha_stack_model_sil_fpd30_v2_t3       279
                   

In [578]:
masterdf = result.copy()
print(f"The shape of masterdf is:\t {masterdf.shape}")
masterdf.head()

The shape of masterdf is:	 (15174, 14)


Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


## V2

## Trench 1

## FPD0

## Test

In [579]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fpd0 = 1
  )
  select * from base where trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory


In [580]:
df1 = dfd.copy()

## Train

In [581]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM  prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId,
  trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fpd0 = 1
  )
  select * from base where trenchCategory = 'Trench 1'
  ;

"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory
0,3206286,6b2d3c9c-efd4-4d62-a4d3-db28826ffd7a,60832062860017,0.427991,2025-11-15 10:23:43,2025-01-21,2025-11,Train,0,1,SIL-Instore,v2,Trench 1
1,2811986,5e68b21a-8ddb-436e-b0b5-f1b2af5fa318,60828119860012,0.526576,2024-08-31 18:52:06,2024-08-31,2024-08,Train,0,1,SIL ZERO,v2,Trench 1
2,2784995,87bf8950-4415-4c7f-bdd3-eade2e247690,60827849950014,0.286204,2024-08-23 18:43:26,2024-08-23,2024-08,Train,0,1,SIL ZERO,v2,Trench 1
3,2808529,41b1cd2d-3a34-4f5a-a4fc-444ddab59873,60828085290014,0.582252,2024-08-30 18:39:55,2024-08-30,2024-08,Train,0,1,SIL ZERO,v2,Trench 1
4,2760131,b3aa4afa-65a8-4bd2-b975-24f3db218d37,60827601310018,0.465169,2024-08-16 15:26:18,2024-08-16,2024-08,Train,0,1,SIL ZERO,v2,Trench 1


In [582]:
df2 = dfd.copy()

In [583]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 212101 entries, 0 to 212100
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             212101 non-null  Int64         
 1   digitalLoanAccountId   212101 non-null  object        
 2   loanAccountNumber      212101 non-null  object        
 3   sil_beta_stack_score   212101 non-null  float64       
 4   appln_submit_datetime  212101 non-null  datetime64[us]
 5   disbursementdate       212101 non-null  dbdate        
 6   Application_month      212101 non-null  object        
 7   Data_selection         212101 non-null  object        
 8   deffpd0                212101 non-null  Int64         
 9   flg_mature_fpd0        212101 non-null  Int64         
 10  new_loan_type          212101 non-null  object        
 11  modelVersionId         212101 non-null  object        
 12  trenchCategory         212101 non-null  obje

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [584]:
df_concat['sil_beta_stack_score'] = pd.to_numeric(df_concat['sil_beta_stack_score'], errors='coerce')
df_concat['Application_month'].value_counts(dropna=False).sort_index(ascending = False)

Application_month
2025-11        1
2025-10     5984
2025-09    12181
2025-08    13776
2025-07    14176
2025-06    13899
2025-05    15237
2025-04    15102
2025-03    14155
2025-02    11430
2025-01    12676
2024-12    27319
2024-11    14070
2024-10    13318
2024-09    14356
2024-08    14421
Name: count, dtype: int64

In [585]:
# df_concat.to_csv(r"sil_beta_stack_scorefpd0.csv")

In [586]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_stack_score', 'deffpd0', 'FPD0')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_stack_score', 
    'deffpd0', 
    'FPD0',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [587]:
f0 = gini_results.copy()

In [588]:
f0.head(10)

Unnamed: 0,start_date,end_date,sil_beta_stack_score_FPD0_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-07-29,2024-08-04,0.294608,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 1
1,2024-08-01,2024-08-31,0.325917,Month,sil_beta_stack_score,v2,FPD0,Overall,Trench 1
2,2024-08-05,2024-08-11,0.322586,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 1
3,2024-08-12,2024-08-18,0.350014,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 1
4,2024-08-19,2024-08-25,0.335327,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 1
5,2024-08-26,2024-09-01,0.319576,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 1
6,2024-09-01,2024-09-30,0.329625,Month,sil_beta_stack_score,v2,FPD0,Overall,Trench 1
7,2024-09-02,2024-09-08,0.320618,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 1
8,2024-09-09,2024-09-15,0.329537,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 1
9,2024-09-16,2024-09-22,0.31117,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 1


In [589]:
f01 = f0[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_stack_score_FPD0_gini']].copy()
f01.rename(columns={'sil_beta_stack_score_FPD0_gini':'FPD0'}, inplace = True)
f01['category'] = 'beta_stack_model_sil_fpd0_v2_t1'

In [590]:
f01.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                         loan_type       version  trench_category
beta_stack_model_sil_fpd0_v2_t1  Overall         v2       Trench 1           80
                                 SIL Competitor  v2       Trench 1           62
                                 SIL ZERO        v2       Trench 1           80
                                 SIL-Instore     v2       Trench 1           79
dtype: int64

In [591]:
f01.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category
0,2024-07-29,2024-08-04,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 1,0.294608,beta_stack_model_sil_fpd0_v2_t1
1,2024-08-01,2024-08-31,Month,sil_beta_stack_score,v2,FPD0,Overall,Trench 1,0.325917,beta_stack_model_sil_fpd0_v2_t1
2,2024-08-05,2024-08-11,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 1,0.322586,beta_stack_model_sil_fpd0_v2_t1
3,2024-08-12,2024-08-18,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 1,0.350014,beta_stack_model_sil_fpd0_v2_t1
4,2024-08-19,2024-08-25,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 1,0.335327,beta_stack_model_sil_fpd0_v2_t1


## FPD10

## Test

In [592]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
    modelVersionId,
  trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fpd10 = 1
  )
  select * from base where trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory


In [593]:
df1 = dfd.copy()

## Train

In [594]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM  prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  modelVersionId,
  trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fpd10 = 1
  )
  select * from base where trenchCategory = 'Trench 1'
  ;

"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory
0,3206286,6b2d3c9c-efd4-4d62-a4d3-db28826ffd7a,60832062860017,0.427991,2025-11-15 10:23:43,2025-01-21,2025-11,Train,0,1,SIL-Instore,v2,Trench 1
1,2774894,3237ec0e-aa75-4e08-8c9f-d3f9cdae2bf5,60827748940015,0.449809,2024-08-20 20:10:50,2024-08-20,2024-08,Train,0,1,SIL-Instore,v2,Trench 1
2,2771370,42c4b96e-77ef-4f83-b0c8-91874e646f23,60827713700011,0.552988,2024-08-19 18:47:47,2024-08-20,2024-08,Train,0,1,SIL-Instore,v2,Trench 1
3,2756900,0a66d88b-ed53-44f6-b5f6-422ff6df02cf,60827569000014,0.750267,2024-08-15 16:27:02,2024-08-22,2024-08,Train,1,1,SIL-Instore,v2,Trench 1
4,2770836,95ab512a-8b16-4c41-915d-e8b2d97767f5,60827708360019,0.450691,2024-08-19 16:40:05,2024-08-19,2024-08,Train,0,1,SIL-Instore,v2,Trench 1


In [595]:
df2 = dfd.copy()

In [596]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 212101 entries, 0 to 212100
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             212101 non-null  Int64         
 1   digitalLoanAccountId   212101 non-null  object        
 2   loanAccountNumber      212101 non-null  object        
 3   sil_beta_stack_score   212101 non-null  float64       
 4   appln_submit_datetime  212101 non-null  datetime64[us]
 5   disbursementdate       212101 non-null  dbdate        
 6   Application_month      212101 non-null  object        
 7   Data_selection         212101 non-null  object        
 8   deffpd10               212101 non-null  Int64         
 9   flg_mature_fpd10       212101 non-null  Int64         
 10  new_loan_type          212101 non-null  object        
 11  modelVersionId         212101 non-null  object        
 12  trenchCategory         212101 non-null  obje

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [597]:
df_concat['sil_beta_stack_score'] = pd.to_numeric(df_concat['sil_beta_stack_score'], errors='coerce')
df_concat['Application_month'].value_counts(dropna=False).sort_index(ascending = False)

Application_month
2025-11        1
2025-10     5984
2025-09    12181
2025-08    13776
2025-07    14176
2025-06    13899
2025-05    15237
2025-04    15102
2025-03    14155
2025-02    11430
2025-01    12676
2024-12    27319
2024-11    14070
2024-10    13318
2024-09    14356
2024-08    14421
Name: count, dtype: int64

In [598]:
# df_concat.to_csv(r"sil_beta_stack_scorefpd10.csv")

In [599]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_stack_score', 'deffpd10', 'FPD10')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_stack_score', 
    'deffpd10', 
    'FPD10',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [600]:
f1 = gini_results.copy()

In [601]:
f1.head(10)

Unnamed: 0,start_date,end_date,sil_beta_stack_score_FPD10_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-07-29,2024-08-04,0.344337,Week,sil_beta_stack_score,v2,FPD10,Overall,Trench 1
1,2024-08-01,2024-08-31,0.398907,Month,sil_beta_stack_score,v2,FPD10,Overall,Trench 1
2,2024-08-05,2024-08-11,0.397431,Week,sil_beta_stack_score,v2,FPD10,Overall,Trench 1
3,2024-08-12,2024-08-18,0.406379,Week,sil_beta_stack_score,v2,FPD10,Overall,Trench 1
4,2024-08-19,2024-08-25,0.408915,Week,sil_beta_stack_score,v2,FPD10,Overall,Trench 1
5,2024-08-26,2024-09-01,0.424225,Week,sil_beta_stack_score,v2,FPD10,Overall,Trench 1
6,2024-09-01,2024-09-30,0.427045,Month,sil_beta_stack_score,v2,FPD10,Overall,Trench 1
7,2024-09-02,2024-09-08,0.440392,Week,sil_beta_stack_score,v2,FPD10,Overall,Trench 1
8,2024-09-09,2024-09-15,0.44225,Week,sil_beta_stack_score,v2,FPD10,Overall,Trench 1
9,2024-09-16,2024-09-22,0.384412,Week,sil_beta_stack_score,v2,FPD10,Overall,Trench 1


In [602]:
f10 = f1[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_stack_score_FPD10_gini']].copy()
f10.rename(columns={'sil_beta_stack_score_FPD10_gini':'FPD10'}, inplace = True)
f10['category'] = 'beta_stack_model_sil_fpd10_v2_t1'

In [603]:
f10.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                          loan_type       version  trench_category
beta_stack_model_sil_fpd10_v2_t1  Overall         v2       Trench 1           80
                                  SIL Competitor  v2       Trench 1           62
                                  SIL ZERO        v2       Trench 1           80
                                  SIL-Instore     v2       Trench 1           79
dtype: int64

## FPD30

## Test

In [604]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId,
  trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fpd30 = 1
  )
  select * from base where trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory


In [605]:
df1 = dfd.copy()

## Train

In [606]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM  prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId,
  trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fpd30 = 1
  )
  select * from base where trenchCategory = 'Trench 1'
  ;

"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory
0,3206286,6b2d3c9c-efd4-4d62-a4d3-db28826ffd7a,60832062860017,0.427991,2025-11-15 10:23:43,2025-01-21,2025-11,Train,0,1,SIL-Instore,v2,Trench 1
1,2773647,cae4f100-252c-41a9-bee5-f0dd2af5677a,60827736470018,0.562468,2024-08-20 14:34:46,2024-08-20,2024-08,Train,1,1,SIL-Instore,v2,Trench 1
2,2781164,7acf194f-7b5d-4b4b-91c3-3ef5ed93d0c1,60827811640011,0.693714,2024-08-22 17:22:50,2024-08-24,2024-08,Train,0,1,SIL-Instore,v2,Trench 1
3,2772556,fdca6a0c-ee77-44bd-86c4-52dda2382428,60827725560012,0.542104,2024-08-20 10:03:34,2024-08-20,2024-08,Train,1,1,SIL-Instore,v2,Trench 1
4,2811941,d002a93d-45c7-4893-b213-2c370c43f7e5,60828119410019,0.757711,2024-08-31 18:38:53,2024-08-31,2024-08,Train,0,1,SIL-Instore,v2,Trench 1


In [607]:
df2 = dfd.copy()

In [608]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 208073 entries, 0 to 208072
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             208073 non-null  Int64         
 1   digitalLoanAccountId   208073 non-null  object        
 2   loanAccountNumber      208073 non-null  object        
 3   sil_beta_stack_score   208073 non-null  float64       
 4   appln_submit_datetime  208073 non-null  datetime64[us]
 5   disbursementdate       208073 non-null  dbdate        
 6   Application_month      208073 non-null  object        
 7   Data_selection         208073 non-null  object        
 8   deffpd30               208073 non-null  Int64         
 9   flg_mature_fpd30       208073 non-null  Int64         
 10  new_loan_type          208073 non-null  object        
 11  modelVersionId         208073 non-null  object        
 12  trenchCategory         208073 non-null  obje

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [609]:
df_concat['sil_beta_stack_score'] = pd.to_numeric(df_concat['sil_beta_stack_score'], errors='coerce')
df_concat['Application_month'].value_counts(dropna=False).sort_index(ascending = False)

Application_month
2025-11        1
2025-10     1965
2025-09    12172
2025-08    13776
2025-07    14176
2025-06    13899
2025-05    15237
2025-04    15102
2025-03    14155
2025-02    11430
2025-01    12676
2024-12    27319
2024-11    14070
2024-10    13318
2024-09    14356
2024-08    14421
Name: count, dtype: int64

In [610]:
# df_concat.to_csv(r"sil_beta_stack_scorefpd30.csv")

In [611]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_stack_score', 'deffpd30', 'FPD30')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_stack_score', 
    'deffpd30', 
    'FPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [612]:
f2 = gini_results.copy()

In [613]:
f2.head(10)

Unnamed: 0,start_date,end_date,sil_beta_stack_score_FPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-07-29,2024-08-04,0.387794,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 1
1,2024-08-01,2024-08-31,0.417446,Month,sil_beta_stack_score,v2,FPD30,Overall,Trench 1
2,2024-08-05,2024-08-11,0.401827,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 1
3,2024-08-12,2024-08-18,0.445501,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 1
4,2024-08-19,2024-08-25,0.417025,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 1
5,2024-08-26,2024-09-01,0.450725,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 1
6,2024-09-01,2024-09-30,0.450391,Month,sil_beta_stack_score,v2,FPD30,Overall,Trench 1
7,2024-09-02,2024-09-08,0.444771,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 1
8,2024-09-09,2024-09-15,0.466446,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 1
9,2024-09-16,2024-09-22,0.394327,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 1


In [614]:
f20 = f2[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_stack_score_FPD30_gini']].copy()
f20.rename(columns={'sil_beta_stack_score_FPD30_gini':'FPD30'}, inplace = True)
f20['category'] = 'beta_stack_model_sil_fpd30_v2_t1'
f20.groupby(['loan_type', 'version', 'trench_category', 'category']).size()

loan_type       version  trench_category  category                        
Overall         v2       Trench 1         beta_stack_model_sil_fpd30_v2_t1    78
SIL Competitor  v2       Trench 1         beta_stack_model_sil_fpd30_v2_t1    61
SIL ZERO        v2       Trench 1         beta_stack_model_sil_fpd30_v2_t1    78
SIL-Instore     v2       Trench 1         beta_stack_model_sil_fpd30_v2_t1    78
dtype: int64

In [615]:
f20.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD30,category
0,2024-07-29,2024-08-04,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 1,0.387794,beta_stack_model_sil_fpd30_v2_t1
1,2024-08-01,2024-08-31,Month,sil_beta_stack_score,v2,FPD30,Overall,Trench 1,0.417446,beta_stack_model_sil_fpd30_v2_t1
2,2024-08-05,2024-08-11,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 1,0.401827,beta_stack_model_sil_fpd30_v2_t1
3,2024-08-12,2024-08-18,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 1,0.445501,beta_stack_model_sil_fpd30_v2_t1
4,2024-08-19,2024-08-25,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 1,0.417025,beta_stack_model_sil_fpd30_v2_t1


## FSPD30

## Test

In [616]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
    when trenchCategory = '' then 'ALL'
    else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  modelVersionId,
  trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fspd_30 = 1
  )
  select * from base where trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory


In [617]:
df1 = dfd.copy()

## Train

In [618]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
        when trenchCategory = '' then 'ALL'
        else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM  prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  modelVersionId,
  trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fspd_30 = 1
  )
  select * from base where trenchCategory = 'Trench 1'
  ;

"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory
0,3206286,6b2d3c9c-efd4-4d62-a4d3-db28826ffd7a,60832062860017,0.427991,2025-11-15 10:23:43,2025-01-21,2025-11,Train,0,1,SIL-Instore,v2,Trench 1
1,2774229,c0e33380-c602-42bf-9d87-be559a15762a,60827742290018,0.666342,2024-08-20 16:48:27,2024-08-20,2024-08,Train,0,1,SIL-Instore,v2,Trench 1
2,2767885,4254f9cf-5508-4fe4-8d6e-d0dce887b453,60827678850011,0.574751,2024-08-18 17:47:27,2024-08-18,2024-08,Train,1,1,SIL-Instore,v2,Trench 1
3,2739789,a02030e0-f40d-4cf8-8b0c-133cfd4fc169,60827397890015,0.727596,2024-08-10 13:38:45,2024-08-10,2024-08,Train,1,1,SIL-Instore,v2,Trench 1
4,2762709,5594a8be-d65a-4555-8354-b9a22843659a,60827627090016,0.735065,2024-08-17 12:27:15,2024-08-17,2024-08,Train,0,1,SIL-Instore,v2,Trench 1


In [619]:
df2 = dfd.copy()

In [620]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 195633 entries, 0 to 195632
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             195633 non-null  Int64         
 1   digitalLoanAccountId   195633 non-null  object        
 2   loanAccountNumber      195633 non-null  object        
 3   sil_beta_stack_score   195633 non-null  float64       
 4   appln_submit_datetime  195633 non-null  datetime64[us]
 5   disbursementdate       195633 non-null  dbdate        
 6   Application_month      195633 non-null  object        
 7   Data_selection         195633 non-null  object        
 8   deffspd30              195633 non-null  Int64         
 9   flg_mature_fspd_30     195633 non-null  Int64         
 10  new_loan_type          195633 non-null  object        
 11  modelVersionId         195633 non-null  object        
 12  trenchCategory         195633 non-null  obje

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [621]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_stack_score'] = pd.to_numeric(df_concat['sil_beta_stack_score'], errors='coerce')

In [622]:
# df_concat.to_csv(r"sil_beta_stack_scorefspd30.csv")

In [623]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_stack_score', 'deffspd30', 'FSPD30')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_stack_score', 
    'deffspd30', 
    'FSPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [624]:
f3 = gini_results.copy()

In [625]:
f3.head(10)

Unnamed: 0,start_date,end_date,sil_beta_stack_score_FSPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-07-29,2024-08-04,0.405229,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 1
1,2024-08-01,2024-08-31,0.431823,Month,sil_beta_stack_score,v2,FSPD30,Overall,Trench 1
2,2024-08-05,2024-08-11,0.409753,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 1
3,2024-08-12,2024-08-18,0.438655,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 1
4,2024-08-19,2024-08-25,0.449343,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 1
5,2024-08-26,2024-09-01,0.461408,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 1
6,2024-09-01,2024-09-30,0.462457,Month,sil_beta_stack_score,v2,FSPD30,Overall,Trench 1
7,2024-09-02,2024-09-08,0.490284,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 1
8,2024-09-09,2024-09-15,0.453157,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 1
9,2024-09-16,2024-09-22,0.416626,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 1


In [626]:
f30 = f3[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_stack_score_FSPD30_gini']].copy()
f30.rename(columns={'sil_beta_stack_score_FSPD30_gini':'FSPD30'}, inplace = True)
f30['category'] = 'beta_stack_model_sil_fspd30_v2_t1'
f30.groupby(['loan_type', 'version', 'trench_category']).size()

loan_type       version  trench_category
Overall         v2       Trench 1           73
SIL Competitor  v2       Trench 1           56
SIL ZERO        v2       Trench 1           73
SIL-Instore     v2       Trench 1           73
dtype: int64

In [627]:
f30.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FSPD30,category
0,2024-07-29,2024-08-04,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 1,0.405229,beta_stack_model_sil_fspd30_v2_t1
1,2024-08-01,2024-08-31,Month,sil_beta_stack_score,v2,FSPD30,Overall,Trench 1,0.431823,beta_stack_model_sil_fspd30_v2_t1
2,2024-08-05,2024-08-11,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 1,0.409753,beta_stack_model_sil_fspd30_v2_t1
3,2024-08-12,2024-08-18,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 1,0.438655,beta_stack_model_sil_fspd30_v2_t1
4,2024-08-19,2024-08-25,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 1,0.449343,beta_stack_model_sil_fspd30_v2_t1


## FSTPD30

## Test

In [628]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
        when trenchCategory = '' then 'ALL'
        else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
    loanmaster.new_loan_type,
    modelVersionId,
  trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fstpd_30 = 1
  )
  select * from base where trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory


In [629]:
df1 = dfd.copy()

## Train

In [630]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
        when trenchCategory = '' then 'ALL'
        else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM  prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
    loanmaster.new_loan_type,
    modelVersionId,
  trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fstpd_30 = 1
  )
  select * from base where trenchCategory = 'Trench 1'
  ;

"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory
0,3206286,6b2d3c9c-efd4-4d62-a4d3-db28826ffd7a,60832062860017,0.427991,2025-11-15 10:23:43,2025-01-21,2025-11,Train,0,1,SIL-Instore,v2,Trench 1
1,2790965,26f55e4b-a160-4305-96cb-2586e6b15e9b,60827909650017,0.771481,2024-08-25 15:16:10,2024-08-25,2024-08,Train,1,1,SIL-Instore,v2,Trench 1
2,2797294,98025c22-581f-442a-9654-c7b18f3b3f31,60827972940019,0.754107,2024-08-27 13:22:40,2024-08-27,2024-08,Train,1,1,SIL-Instore,v2,Trench 1
3,2787551,0eab69e3-3ee8-45f9-b430-6afb7f9e5de9,60827875510012,0.634805,2024-08-24 15:39:14,2024-08-24,2024-08,Train,0,1,SIL-Instore,v2,Trench 1
4,2750690,1ccb60bc-2a44-4d1d-94af-8061251d024e,60827506900019,0.775729,2024-08-13 16:56:01,2024-08-13,2024-08,Train,1,1,SIL-Instore,v2,Trench 1


In [631]:
df2 = dfd.copy()

In [632]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 182319 entries, 0 to 182318
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             182319 non-null  Int64         
 1   digitalLoanAccountId   182319 non-null  object        
 2   loanAccountNumber      182319 non-null  object        
 3   sil_beta_stack_score   182319 non-null  float64       
 4   appln_submit_datetime  182319 non-null  datetime64[us]
 5   disbursementdate       182319 non-null  dbdate        
 6   Application_month      182319 non-null  object        
 7   Data_selection         182319 non-null  object        
 8   deffstpd30             182319 non-null  Int64         
 9   flg_mature_fstpd_30    182319 non-null  Int64         
 10  new_loan_type          182319 non-null  object        
 11  modelVersionId         182319 non-null  object        
 12  trenchCategory         182319 non-null  obje

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [633]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_stack_score'] = pd.to_numeric(df_concat['sil_beta_stack_score'], errors='coerce')

In [634]:
# df_concat.to_csv(r"sil_beta_stack_scorefstpd30.csv")

In [635]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_stack_score', 'deffstpd30', 'FSTPD30')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_stack_score', 
    'deffstpd30', 
    'FSTPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [636]:
f4 = gini_results.copy()

In [637]:
f4.head(10)

Unnamed: 0,start_date,end_date,sil_beta_stack_score_FSTPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-07-29,2024-08-04,0.367333,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 1
1,2024-08-01,2024-08-31,0.398688,Month,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 1
2,2024-08-05,2024-08-11,0.385219,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 1
3,2024-08-12,2024-08-18,0.404098,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 1
4,2024-08-19,2024-08-25,0.413418,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 1
5,2024-08-26,2024-09-01,0.42359,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 1
6,2024-09-01,2024-09-30,0.432913,Month,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 1
7,2024-09-02,2024-09-08,0.472669,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 1
8,2024-09-09,2024-09-15,0.414764,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 1
9,2024-09-16,2024-09-22,0.40741,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 1


In [638]:
f40 = f4[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_stack_score_FSTPD30_gini']].copy()
f40.rename(columns={'sil_beta_stack_score_FSTPD30_gini':'FSTPD30'}, inplace = True)
f40['category'] = 'beta_stack_model_sil_fstpd30_v2_t1'
f40.groupby(['loan_type', 'version', 'trench_category']).size()

loan_type       version  trench_category
Overall         v2       Trench 1           67
SIL Competitor  v2       Trench 1           50
SIL ZERO        v2       Trench 1           67
SIL-Instore     v2       Trench 1           67
dtype: int64

In [639]:
f40.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FSTPD30,category
0,2024-07-29,2024-08-04,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 1,0.367333,beta_stack_model_sil_fstpd30_v2_t1
1,2024-08-01,2024-08-31,Month,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 1,0.398688,beta_stack_model_sil_fstpd30_v2_t1
2,2024-08-05,2024-08-11,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 1,0.385219,beta_stack_model_sil_fstpd30_v2_t1
3,2024-08-12,2024-08-18,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 1,0.404098,beta_stack_model_sil_fstpd30_v2_t1
4,2024-08-19,2024-08-25,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 1,0.413418,beta_stack_model_sil_fstpd30_v2_t1


## combining the dataframe

In [640]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type','trench_category','bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'sil_beta_stack_score_FPD0_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'trench_category', 'sil_beta_stack_score_FPD10_gini',
       'sil_beta_stack_score_FPD30_gini',
       'sil_beta_stack_score_FSPD30_gini',
       'sil_beta_stack_score_FSTPD30_gini'], dtype=object)

In [641]:
final_df.rename(columns={'trench_category':'Trench_category',
'sil_beta_stack_score_FPD0_gini':'sil_beta_stack_score_FPD0_v2_t1_gini',
'sil_beta_stack_score_FPD10_gini':'sil_beta_stack_score_FPD10_v2_t1_gini',
'sil_beta_stack_score_FPD30_gini':'sil_beta_stack_score_FPD30_v2_t1_gini',
'sil_beta_stack_score_FSPD30_gini':'sil_beta_stack_score_FSPD30_v2_t1_gini', 
'sil_beta_stack_score_FSTPD30_gini':'sil_beta_stack_score_FSTPD30_v2_t1_gini'
}, inplace=True)
final_df.columns

Index(['start_date', 'end_date', 'sil_beta_stack_score_FPD0_v2_t1_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'Trench_category', 'sil_beta_stack_score_FPD10_v2_t1_gini',
       'sil_beta_stack_score_FPD30_v2_t1_gini',
       'sil_beta_stack_score_FSPD30_v2_t1_gini',
       'sil_beta_stack_score_FSTPD30_v2_t1_gini'],
      dtype='object')

In [642]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'loan_type','bad_rate', 'Trench_category'
                        ,'sil_beta_stack_score_FPD0_v2_t1_gini',
                        'sil_beta_stack_score_FPD10_v2_t1_gini',
                        'sil_beta_stack_score_FPD30_v2_t1_gini',
                        'sil_beta_stack_score_FSPD30_v2_t1_gini',
                        'sil_beta_stack_score_FSTPD30_v2_t1_gini'
                    ]].copy()
final_df['Model_display_name'] = 'beta_stack_model_sil'
final_df['Product_type'] = 'SIL'
final_df.dtypes

start_date                                 datetime64[ns]
end_date                                   datetime64[ns]
period                                             object
Model_Name                                         object
version                                            object
loan_type                                          object
bad_rate                                           object
Trench_category                                    object
sil_beta_stack_score_FPD0_v2_t1_gini              float64
sil_beta_stack_score_FPD10_v2_t1_gini             float64
sil_beta_stack_score_FPD30_v2_t1_gini             float64
sil_beta_stack_score_FSPD30_v2_t1_gini            float64
sil_beta_stack_score_FSTPD30_v2_t1_gini           float64
Model_display_name                                 object
Product_type                                       object
dtype: object

In [643]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,loan_type,bad_rate,Trench_category,sil_beta_stack_score_FPD0_v2_t1_gini,sil_beta_stack_score_FPD10_v2_t1_gini,sil_beta_stack_score_FPD30_v2_t1_gini,sil_beta_stack_score_FSPD30_v2_t1_gini,sil_beta_stack_score_FSTPD30_v2_t1_gini,Model_display_name,Product_type
0,2024-07-29,2024-08-04,Week,sil_beta_stack_score,v2,Overall,FPD0,Trench 1,0.294608,,,,,beta_stack_model_sil,SIL
1,2024-08-01,2024-08-31,Month,sil_beta_stack_score,v2,Overall,FPD0,Trench 1,0.325917,,,,,beta_stack_model_sil,SIL
2,2024-08-05,2024-08-11,Week,sil_beta_stack_score,v2,Overall,FPD0,Trench 1,0.322586,,,,,beta_stack_model_sil,SIL
3,2024-08-12,2024-08-18,Week,sil_beta_stack_score,v2,Overall,FPD0,Trench 1,0.350014,,,,,beta_stack_model_sil,SIL
4,2024-08-19,2024-08-25,Week,sil_beta_stack_score,v2,Overall,FPD0,Trench 1,0.335327,,,,,beta_stack_model_sil,SIL


In [644]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.beta_stack_model_sil_v2_t1_gini_v1"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=0bd622a3-debe-47f8-90cc-02c6742ac010>

In [645]:
import functools

dataframes = [f01, f10, f20, f30, f40]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type','trench_category', 'bad_rate', 'category']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

betastacksilv2t1 = functools.reduce(merge_dataframes, dataframes)

In [646]:
betastacksilv2t1['category'].value_counts(dropna=False)

category
beta_stack_model_sil_fpd0_v2_t1       301
beta_stack_model_sil_fpd10_v2_t1      301
beta_stack_model_sil_fpd30_v2_t1      295
beta_stack_model_sil_fspd30_v2_t1     275
beta_stack_model_sil_fstpd30_v2_t1    251
Name: count, dtype: int64

In [647]:
result = pd.concat([cicsilscorev1all, 
                    cicsilscorev2t1, 
                    cicsilscorev2t2, 
                    cicsilscorev2t3, 
                    alphastacksilv1all,
                    alphastacksilv2t1,
                    alphastacksilv2t2,
                    alphastacksilv2t3,
                    betastacksilv1all,
                    betastacksilv2t1,
                    ], ignore_index=True)
print(f"The shape of the concatenated dataframe result is:\t {result.shape}")
result.head()

The shape of the concatenated dataframe result is:	 (16597, 14)


Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


In [648]:
result[['Model_Name', 'version', 'category']].value_counts(dropna=False).sort_index(ascending = False)

Model_Name             version  category                            
sil_beta_stack_score   v2       beta_stack_model_sil_fstpd30_v2_t1      251
                                beta_stack_model_sil_fspd30_v2_t1       275
                                beta_stack_model_sil_fpd30_v2_t1        295
                                beta_stack_model_sil_fpd10_v2_t1        301
                                beta_stack_model_sil_fpd0_v2_t1         301
                       v1       beta_stack_model_sil_fstpd30_v1_all     452
                                beta_stack_model_sil_fspd30_v1_all      476
                                beta_stack_model_sil_fpd30_v1_all       496
                                beta_stack_model_sil_fpd10_v1_all       504
                                beta_stack_model_sil_fpd0_v1_all        516
Sil_Alpha_Stack_score  v2       alpha_stack_model_sil_fstpd30_v2_t3     236
                                alpha_stack_model_sil_fstpd30_v2_t2     230
                   

In [649]:
masterdf = result.copy()
print(f"The shape of masterdf is:\t {masterdf.shape}")
masterdf.head()

The shape of masterdf is:	 (16597, 14)


Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


## Trench 2

## FPD0

## Test

In [650]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fpd0 = 1
  )
  select * from base where trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory


In [651]:
df1 = dfd.copy()

## Train

In [652]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM  prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId,
  trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fpd0 = 1
  )
  select * from base where trenchCategory = 'Trench 2'
  ;

"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory
0,2514315,6280a4da-5ff3-4ba5-a339-a5a87b6a2363,60825143150013,0.725813,2024-08-20 14:52:04,2024-08-20,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
1,2369419,e40203cb-960b-4ab7-9f47-0d041bd027ff,60823694190013,0.542575,2024-08-09 11:26:20,2024-08-09,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
2,2438659,211b519c-ebf1-4696-a82f-e1bd68adf6af,60824386590014,0.54342,2024-08-31 17:17:08,2024-08-31,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
3,2771290,0d32056c-5a19-42ba-99dc-51831475c069,60827712900011,0.384534,2024-08-19 18:25:10,2024-08-19,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
4,2217283,fb2ba986-2b47-4598-8098-f0cf1e4d2bbf,60822172830016,0.437553,2024-08-15 15:08:28,2024-08-15,2024-08,Train,0,1,SIL-Instore,v2,Trench 2


In [653]:
df2 = dfd.copy()

In [654]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4429 entries, 0 to 4428
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             4429 non-null   Int64         
 1   digitalLoanAccountId   4429 non-null   object        
 2   loanAccountNumber      4429 non-null   object        
 3   sil_beta_stack_score   4429 non-null   float64       
 4   appln_submit_datetime  4429 non-null   datetime64[us]
 5   disbursementdate       4429 non-null   dbdate        
 6   Application_month      4429 non-null   object        
 7   Data_selection         4429 non-null   object        
 8   deffpd0                4429 non-null   Int64         
 9   flg_mature_fpd0        4429 non-null   Int64         
 10  new_loan_type          4429 non-null   object        
 11  modelVersionId         4429 non-null   object        
 12  trenchCategory         4429 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [655]:
df_concat['sil_beta_stack_score'] = pd.to_numeric(df_concat['sil_beta_stack_score'], errors='coerce')
df_concat['Application_month'].value_counts(dropna=False).sort_index(ascending = False)

Application_month
2025-10    135
2025-09    291
2025-08    349
2025-07    389
2025-06    357
2025-05    417
2025-04    477
2025-03    339
2025-02    221
2025-01    197
2024-12    459
2024-11    269
2024-10    175
2024-09    176
2024-08    178
Name: count, dtype: int64

In [656]:
# df_concat.to_csv(r"sil_beta_stack_scorefpd0.csv")

In [657]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_stack_score', 'deffpd0', 'FPD0')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_stack_score', 
    'deffpd0', 
    'FPD0',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [658]:
f0 = gini_results.copy()

In [659]:
f0.head(10)

Unnamed: 0,start_date,end_date,sil_beta_stack_score_FPD0_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-07-29,2024-08-04,0.684211,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 2
1,2024-08-01,2024-08-31,0.458693,Month,sil_beta_stack_score,v2,FPD0,Overall,Trench 2
2,2024-08-05,2024-08-11,0.544643,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 2
3,2024-08-12,2024-08-18,0.546875,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 2
4,2024-08-19,2024-08-25,0.320955,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 2
5,2024-08-26,2024-09-01,0.122507,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 2
6,2024-09-01,2024-09-30,0.124866,Month,sil_beta_stack_score,v2,FPD0,Overall,Trench 2
7,2024-09-02,2024-09-08,0.633333,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 2
8,2024-09-09,2024-09-15,0.294118,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 2
9,2024-09-16,2024-09-22,0.354839,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 2


In [660]:
f01 = f0[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_stack_score_FPD0_gini']].copy()
f01.rename(columns={'sil_beta_stack_score_FPD0_gini':'FPD0'}, inplace = True)
f01['category'] = 'beta_stack_model_sil_fpd0_v2_t2'

In [661]:
f01.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                         loan_type       version  trench_category
beta_stack_model_sil_fpd0_v2_t2  Overall         v2       Trench 2           79
                                 SIL Competitor  v2       Trench 2           61
                                 SIL ZERO        v2       Trench 2           73
                                 SIL-Instore     v2       Trench 2           79
dtype: int64

In [662]:
f01.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category
0,2024-07-29,2024-08-04,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 2,0.684211,beta_stack_model_sil_fpd0_v2_t2
1,2024-08-01,2024-08-31,Month,sil_beta_stack_score,v2,FPD0,Overall,Trench 2,0.458693,beta_stack_model_sil_fpd0_v2_t2
2,2024-08-05,2024-08-11,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 2,0.544643,beta_stack_model_sil_fpd0_v2_t2
3,2024-08-12,2024-08-18,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 2,0.546875,beta_stack_model_sil_fpd0_v2_t2
4,2024-08-19,2024-08-25,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 2,0.320955,beta_stack_model_sil_fpd0_v2_t2


## FPD10

## Test

In [663]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
    modelVersionId,
  trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fpd10 = 1
  )
  select * from base where trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory


In [664]:
df1 = dfd.copy()

## Train

In [665]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM  prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  modelVersionId,
  trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fpd10 = 1
  )
  select * from base where trenchCategory = 'Trench 2'
  ;

"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory
0,1819166,b834523a-d6d1-448c-8da8-33c6611c6073,60818191660019,0.309829,2024-08-24 15:13:52,2024-08-24,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
1,2615689,4294b46c-60b2-4404-a03b-463d4529b580,60826156890011,0.637936,2024-08-05 15:21:47,2024-08-05,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
2,2271249,753bf2ac-7cfc-467a-b5df-3806908a22d3,60822712490011,0.635813,2024-08-25 15:03:24,2024-08-25,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
3,2648167,8a56cc1d-2037-40e0-a7b2-e13614c00c22,60826481670016,0.324315,2024-08-17 18:07:36,2024-08-17,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
4,2514811,da494684-2dca-42ca-a214-711fe33cdd67,60825148110014,0.612396,2024-08-30 13:30:56,2024-08-30,2024-08,Train,0,1,SIL-Instore,v2,Trench 2


In [666]:
df2 = dfd.copy()

In [667]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4429 entries, 0 to 4428
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             4429 non-null   Int64         
 1   digitalLoanAccountId   4429 non-null   object        
 2   loanAccountNumber      4429 non-null   object        
 3   sil_beta_stack_score   4429 non-null   float64       
 4   appln_submit_datetime  4429 non-null   datetime64[us]
 5   disbursementdate       4429 non-null   dbdate        
 6   Application_month      4429 non-null   object        
 7   Data_selection         4429 non-null   object        
 8   deffpd10               4429 non-null   Int64         
 9   flg_mature_fpd10       4429 non-null   Int64         
 10  new_loan_type          4429 non-null   object        
 11  modelVersionId         4429 non-null   object        
 12  trenchCategory         4429 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [668]:
df_concat['sil_beta_stack_score'] = pd.to_numeric(df_concat['sil_beta_stack_score'], errors='coerce')
df_concat['Application_month'].value_counts(dropna=False).sort_index(ascending = False)

Application_month
2025-10    135
2025-09    291
2025-08    349
2025-07    389
2025-06    357
2025-05    417
2025-04    477
2025-03    339
2025-02    221
2025-01    197
2024-12    459
2024-11    269
2024-10    175
2024-09    176
2024-08    178
Name: count, dtype: int64

In [669]:
# df_concat.to_csv(r"sil_beta_stack_scorefpd10.csv")

In [670]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_stack_score', 'deffpd10', 'FPD10')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_stack_score', 
    'deffpd10', 
    'FPD10',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [671]:
f1 = gini_results.copy()

In [672]:
f1.head(10)

Unnamed: 0,start_date,end_date,sil_beta_stack_score_FPD10_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-07-29,2024-08-04,0.492063,Week,sil_beta_stack_score,v2,FPD10,Overall,Trench 2
1,2024-08-01,2024-08-31,0.497976,Month,sil_beta_stack_score,v2,FPD10,Overall,Trench 2
2,2024-08-05,2024-08-11,0.694581,Week,sil_beta_stack_score,v2,FPD10,Overall,Trench 2
3,2024-08-12,2024-08-18,0.676471,Week,sil_beta_stack_score,v2,FPD10,Overall,Trench 2
4,2024-08-19,2024-08-25,0.373737,Week,sil_beta_stack_score,v2,FPD10,Overall,Trench 2
5,2024-08-26,2024-09-01,0.114983,Week,sil_beta_stack_score,v2,FPD10,Overall,Trench 2
6,2024-09-01,2024-09-30,0.252971,Month,sil_beta_stack_score,v2,FPD10,Overall,Trench 2
7,2024-09-02,2024-09-08,0.769231,Week,sil_beta_stack_score,v2,FPD10,Overall,Trench 2
8,2024-09-09,2024-09-15,,Week,sil_beta_stack_score,v2,FPD10,Overall,Trench 2
9,2024-09-16,2024-09-22,0.514286,Week,sil_beta_stack_score,v2,FPD10,Overall,Trench 2


In [673]:
f10 = f1[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_stack_score_FPD10_gini']].copy()
f10.rename(columns={'sil_beta_stack_score_FPD10_gini':'FPD10'}, inplace = True)
f10['category'] = 'beta_stack_model_sil_fpd10_v2_t2'

In [674]:
f10.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                          loan_type       version  trench_category
beta_stack_model_sil_fpd10_v2_t2  Overall         v2       Trench 2           79
                                  SIL Competitor  v2       Trench 2           61
                                  SIL ZERO        v2       Trench 2           73
                                  SIL-Instore     v2       Trench 2           79
dtype: int64

## FPD30

## Test

In [675]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId,
  trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fpd30 = 1
  )
  select * from base where trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory


In [676]:
df1 = dfd.copy()

## Train

In [677]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM  prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId,
  trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fpd30 = 1
  )
  select * from base where trenchCategory = 'Trench 2'
  ;

"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory
0,2495801,962ae643-3e5b-4c81-a3c2-e681346fadc3,60824958010011,0.642597,2024-08-11 17:55:30,2024-08-11,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
1,2395216,f11eb09f-8ce6-4465-b691-2c9b61cd142b,60823952160013,0.562196,2024-08-16 18:11:06,2024-08-20,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
2,2181410,e1487276-90f9-4811-b531-5c185e351cd0,60821814100011,0.30698,2024-08-11 17:54:45,2024-08-11,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
3,2355343,1ec1e62a-486b-43cb-b0aa-9d4cb5f44a49,60823553430019,0.510647,2024-08-05 17:52:16,2024-08-05,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
4,2483002,010348e9-caa4-43e5-a682-bcf5575e9892,60824830020014,0.325687,2024-08-12 18:52:18,2024-08-12,2024-08,Train,0,1,SIL-Instore,v2,Trench 2


In [678]:
df2 = dfd.copy()

In [679]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4332 entries, 0 to 4331
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             4332 non-null   Int64         
 1   digitalLoanAccountId   4332 non-null   object        
 2   loanAccountNumber      4332 non-null   object        
 3   sil_beta_stack_score   4332 non-null   float64       
 4   appln_submit_datetime  4332 non-null   datetime64[us]
 5   disbursementdate       4332 non-null   dbdate        
 6   Application_month      4332 non-null   object        
 7   Data_selection         4332 non-null   object        
 8   deffpd30               4332 non-null   Int64         
 9   flg_mature_fpd30       4332 non-null   Int64         
 10  new_loan_type          4332 non-null   object        
 11  modelVersionId         4332 non-null   object        
 12  trenchCategory         4332 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [680]:
df_concat['sil_beta_stack_score'] = pd.to_numeric(df_concat['sil_beta_stack_score'], errors='coerce')
df_concat['Application_month'].value_counts(dropna=False).sort_index(ascending = False)

Application_month
2025-10     41
2025-09    288
2025-08    349
2025-07    389
2025-06    357
2025-05    417
2025-04    477
2025-03    339
2025-02    221
2025-01    197
2024-12    459
2024-11    269
2024-10    175
2024-09    176
2024-08    178
Name: count, dtype: int64

In [681]:
# df_concat.to_csv(r"sil_beta_stack_scorefpd30.csv")

In [682]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_stack_score', 'deffpd30', 'FPD30')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_stack_score', 
    'deffpd30', 
    'FPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [683]:
f2 = gini_results.copy()

In [684]:
f2.head(10)

Unnamed: 0,start_date,end_date,sil_beta_stack_score_FPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-07-29,2024-08-04,0.492063,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 2
1,2024-08-01,2024-08-31,0.480784,Month,sil_beta_stack_score,v2,FPD30,Overall,Trench 2
2,2024-08-05,2024-08-11,0.622222,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 2
3,2024-08-12,2024-08-18,0.676471,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 2
4,2024-08-19,2024-08-25,0.373737,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 2
5,2024-08-26,2024-09-01,0.114983,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 2
6,2024-09-01,2024-09-30,0.252971,Month,sil_beta_stack_score,v2,FPD30,Overall,Trench 2
7,2024-09-02,2024-09-08,0.769231,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 2
8,2024-09-09,2024-09-15,,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 2
9,2024-09-16,2024-09-22,0.514286,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 2


In [685]:
f20 = f2[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_stack_score_FPD30_gini']].copy()
f20.rename(columns={'sil_beta_stack_score_FPD30_gini':'FPD30'}, inplace = True)
f20['category'] = 'beta_stack_model_sil_fpd30_v2_t2'
f20.groupby(['loan_type', 'version', 'trench_category', 'category']).size()

loan_type       version  trench_category  category                        
Overall         v2       Trench 2         beta_stack_model_sil_fpd30_v2_t2    78
SIL Competitor  v2       Trench 2         beta_stack_model_sil_fpd30_v2_t2    60
SIL ZERO        v2       Trench 2         beta_stack_model_sil_fpd30_v2_t2    69
SIL-Instore     v2       Trench 2         beta_stack_model_sil_fpd30_v2_t2    78
dtype: int64

In [686]:
f20.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD30,category
0,2024-07-29,2024-08-04,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 2,0.492063,beta_stack_model_sil_fpd30_v2_t2
1,2024-08-01,2024-08-31,Month,sil_beta_stack_score,v2,FPD30,Overall,Trench 2,0.480784,beta_stack_model_sil_fpd30_v2_t2
2,2024-08-05,2024-08-11,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 2,0.622222,beta_stack_model_sil_fpd30_v2_t2
3,2024-08-12,2024-08-18,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 2,0.676471,beta_stack_model_sil_fpd30_v2_t2
4,2024-08-19,2024-08-25,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 2,0.373737,beta_stack_model_sil_fpd30_v2_t2


## FSPD30

## Test

In [687]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
    when trenchCategory = '' then 'ALL'
    else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  modelVersionId,
  trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fspd_30 = 1
  )
  select * from base where trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory


In [688]:
df1 = dfd.copy()

## Train

In [689]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
        when trenchCategory = '' then 'ALL'
        else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM  prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  modelVersionId,
  trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fspd_30 = 1
  )
  select * from base where trenchCategory = 'Trench 2'
  ;

"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory
0,2495801,962ae643-3e5b-4c81-a3c2-e681346fadc3,60824958010011,0.642597,2024-08-11 17:55:30,2024-08-11,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
1,2395216,f11eb09f-8ce6-4465-b691-2c9b61cd142b,60823952160013,0.562196,2024-08-16 18:11:06,2024-08-20,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
2,1442992,fd1f4b53-faee-494c-b113-cb3648839fa3,60814429920018,0.87427,2024-08-31 12:20:02,2024-08-31,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
3,2400041,d018f5ac-6aa7-4267-95da-b246e6bdf8df,60824000410019,0.287637,2024-08-24 10:41:12,2024-08-24,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
4,2204311,d6245f38-1c70-4bd4-9ea6-b4ff22e858cb,60822043110016,0.470954,2024-08-10 17:10:35,2024-08-10,2024-08,Train,0,1,SIL-Instore,v2,Trench 2


In [690]:
df2 = dfd.copy()

In [691]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4036 entries, 0 to 4035
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             4036 non-null   Int64         
 1   digitalLoanAccountId   4036 non-null   object        
 2   loanAccountNumber      4036 non-null   object        
 3   sil_beta_stack_score   4036 non-null   float64       
 4   appln_submit_datetime  4036 non-null   datetime64[us]
 5   disbursementdate       4036 non-null   dbdate        
 6   Application_month      4036 non-null   object        
 7   Data_selection         4036 non-null   object        
 8   deffspd30              4036 non-null   Int64         
 9   flg_mature_fspd_30     4036 non-null   Int64         
 10  new_loan_type          4036 non-null   object        
 11  modelVersionId         4036 non-null   object        
 12  trenchCategory         4036 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [692]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_stack_score'] = pd.to_numeric(df_concat['sil_beta_stack_score'], errors='coerce')

In [693]:
# df_concat.to_csv(r"sil_beta_stack_scorefspd30.csv")

In [694]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_stack_score', 'deffspd30', 'FSPD30')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_stack_score', 
    'deffspd30', 
    'FSPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [695]:
f3 = gini_results.copy()

In [696]:
f3.head(10)

Unnamed: 0,start_date,end_date,sil_beta_stack_score_FSPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-07-29,2024-08-04,0.492063,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 2
1,2024-08-01,2024-08-31,0.445658,Month,sil_beta_stack_score,v2,FSPD30,Overall,Trench 2
2,2024-08-05,2024-08-11,0.515385,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 2
3,2024-08-12,2024-08-18,0.359375,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 2
4,2024-08-19,2024-08-25,0.384615,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 2
5,2024-08-26,2024-09-01,0.389474,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 2
6,2024-09-01,2024-09-30,0.355556,Month,sil_beta_stack_score,v2,FSPD30,Overall,Trench 2
7,2024-09-02,2024-09-08,0.82,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 2
8,2024-09-09,2024-09-15,0.823529,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 2
9,2024-09-16,2024-09-22,0.394958,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 2


In [697]:
f30 = f3[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_stack_score_FSPD30_gini']].copy()
f30.rename(columns={'sil_beta_stack_score_FSPD30_gini':'FSPD30'}, inplace = True)
f30['category'] = 'beta_stack_model_sil_fspd30_v2_t2'
f30.groupby(['loan_type', 'version', 'trench_category']).size()

loan_type       version  trench_category
Overall         v2       Trench 2           73
SIL Competitor  v2       Trench 2           55
SIL ZERO        v2       Trench 2           64
SIL-Instore     v2       Trench 2           72
dtype: int64

In [698]:
f30.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FSPD30,category
0,2024-07-29,2024-08-04,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 2,0.492063,beta_stack_model_sil_fspd30_v2_t2
1,2024-08-01,2024-08-31,Month,sil_beta_stack_score,v2,FSPD30,Overall,Trench 2,0.445658,beta_stack_model_sil_fspd30_v2_t2
2,2024-08-05,2024-08-11,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 2,0.515385,beta_stack_model_sil_fspd30_v2_t2
3,2024-08-12,2024-08-18,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 2,0.359375,beta_stack_model_sil_fspd30_v2_t2
4,2024-08-19,2024-08-25,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 2,0.384615,beta_stack_model_sil_fspd30_v2_t2


## FSTPD30

## Test

In [699]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
        when trenchCategory = '' then 'ALL'
        else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
    loanmaster.new_loan_type,
    modelVersionId,
  trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fstpd_30 = 1
  )
  select * from base where trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory


In [700]:
df1 = dfd.copy()

## Train

In [701]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
        when trenchCategory = '' then 'ALL'
        else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM  prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
    loanmaster.new_loan_type,
    modelVersionId,
  trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fstpd_30 = 1
  )
  select * from base where trenchCategory = 'Trench 2'
  ;

"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory
0,2495801,962ae643-3e5b-4c81-a3c2-e681346fadc3,60824958010011,0.642597,2024-08-11 17:55:30,2024-08-11,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
1,2395216,f11eb09f-8ce6-4465-b691-2c9b61cd142b,60823952160013,0.562196,2024-08-16 18:11:06,2024-08-20,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
2,1705144,27f6902e-d2a5-4c47-b35c-eb04d64c9ab7,60817051440016,0.404213,2024-08-05 17:00:22,2024-08-05,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
3,2288526,17b99fa5-9fd3-4f0f-9df7-6befe6a32b25,60822885260015,0.530081,2024-08-11 19:27:35,2024-08-11,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
4,2531361,5de7241e-7a11-4776-a736-3cc8c0860f39,60825313610017,0.496057,2024-08-24 13:55:20,2024-08-24,2024-08,Train,1,1,SIL-Instore,v2,Trench 2


In [702]:
df2 = dfd.copy()

In [703]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3711 entries, 0 to 3710
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             3711 non-null   Int64         
 1   digitalLoanAccountId   3711 non-null   object        
 2   loanAccountNumber      3711 non-null   object        
 3   sil_beta_stack_score   3711 non-null   float64       
 4   appln_submit_datetime  3711 non-null   datetime64[us]
 5   disbursementdate       3711 non-null   dbdate        
 6   Application_month      3711 non-null   object        
 7   Data_selection         3711 non-null   object        
 8   deffstpd30             3711 non-null   Int64         
 9   flg_mature_fstpd_30    3711 non-null   Int64         
 10  new_loan_type          3711 non-null   object        
 11  modelVersionId         3711 non-null   object        
 12  trenchCategory         3711 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [704]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_stack_score'] = pd.to_numeric(df_concat['sil_beta_stack_score'], errors='coerce')

In [705]:
# df_concat.to_csv(r"sil_beta_stack_scorefstpd30.csv")

In [706]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_stack_score', 'deffstpd30', 'FSTPD30')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_stack_score', 
    'deffstpd30', 
    'FSTPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [707]:
f4 = gini_results.copy()

In [708]:
f4.head(10)

Unnamed: 0,start_date,end_date,sil_beta_stack_score_FSTPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-07-29,2024-08-04,0.284211,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 2
1,2024-08-01,2024-08-31,0.390056,Month,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 2
2,2024-08-05,2024-08-11,0.515385,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 2
3,2024-08-12,2024-08-18,0.359375,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 2
4,2024-08-19,2024-08-25,0.288462,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 2
5,2024-08-26,2024-09-01,0.310185,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 2
6,2024-09-01,2024-09-30,0.329155,Month,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 2
7,2024-09-02,2024-09-08,0.483333,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 2
8,2024-09-09,2024-09-15,0.575758,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 2
9,2024-09-16,2024-09-22,0.521212,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 2


In [709]:
f40 = f4[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_stack_score_FSTPD30_gini']].copy()
f40.rename(columns={'sil_beta_stack_score_FSTPD30_gini':'FSTPD30'}, inplace = True)
f40['category'] = 'beta_stack_model_sil_fstpd30_v2_t2'
f40.groupby(['loan_type', 'version', 'trench_category']).size()

loan_type       version  trench_category
Overall         v2       Trench 2           67
SIL Competitor  v2       Trench 2           49
SIL ZERO        v2       Trench 2           61
SIL-Instore     v2       Trench 2           67
dtype: int64

In [710]:
f40.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FSTPD30,category
0,2024-07-29,2024-08-04,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 2,0.284211,beta_stack_model_sil_fstpd30_v2_t2
1,2024-08-01,2024-08-31,Month,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 2,0.390056,beta_stack_model_sil_fstpd30_v2_t2
2,2024-08-05,2024-08-11,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 2,0.515385,beta_stack_model_sil_fstpd30_v2_t2
3,2024-08-12,2024-08-18,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 2,0.359375,beta_stack_model_sil_fstpd30_v2_t2
4,2024-08-19,2024-08-25,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 2,0.288462,beta_stack_model_sil_fstpd30_v2_t2


## combining the dataframe

In [711]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type','trench_category','bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'sil_beta_stack_score_FPD0_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'trench_category', 'sil_beta_stack_score_FPD10_gini',
       'sil_beta_stack_score_FPD30_gini',
       'sil_beta_stack_score_FSPD30_gini',
       'sil_beta_stack_score_FSTPD30_gini'], dtype=object)

In [712]:
final_df.rename(columns={'trench_category':'Trench_category',
'sil_beta_stack_score_FPD0_gini':'sil_beta_stack_score_FPD0_v2_t2_gini',
'sil_beta_stack_score_FPD10_gini':'sil_beta_stack_score_FPD10_v2_t2_gini',
'sil_beta_stack_score_FPD30_gini':'sil_beta_stack_score_FPD30_v2_t2_gini',
'sil_beta_stack_score_FSPD30_gini':'sil_beta_stack_score_FSPD30_v2_t2_gini', 
'sil_beta_stack_score_FSTPD30_gini':'sil_beta_stack_score_FSTPD30_v2_t2_gini'
}, inplace=True)
final_df.columns

Index(['start_date', 'end_date', 'sil_beta_stack_score_FPD0_v2_t2_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'Trench_category', 'sil_beta_stack_score_FPD10_v2_t2_gini',
       'sil_beta_stack_score_FPD30_v2_t2_gini',
       'sil_beta_stack_score_FSPD30_v2_t2_gini',
       'sil_beta_stack_score_FSTPD30_v2_t2_gini'],
      dtype='object')

In [713]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'loan_type','bad_rate', 'Trench_category'
                        ,'sil_beta_stack_score_FPD0_v2_t2_gini',
                        'sil_beta_stack_score_FPD10_v2_t2_gini',
                        'sil_beta_stack_score_FPD30_v2_t2_gini',
                        'sil_beta_stack_score_FSPD30_v2_t2_gini',
                        'sil_beta_stack_score_FSTPD30_v2_t2_gini'
                    ]].copy()
final_df['Model_display_name'] = 'beta_stack_model_sil'
final_df['Product_type'] = 'SIL'
final_df.dtypes

start_date                                 datetime64[ns]
end_date                                   datetime64[ns]
period                                             object
Model_Name                                         object
version                                            object
loan_type                                          object
bad_rate                                           object
Trench_category                                    object
sil_beta_stack_score_FPD0_v2_t2_gini              float64
sil_beta_stack_score_FPD10_v2_t2_gini             float64
sil_beta_stack_score_FPD30_v2_t2_gini             float64
sil_beta_stack_score_FSPD30_v2_t2_gini            float64
sil_beta_stack_score_FSTPD30_v2_t2_gini           float64
Model_display_name                                 object
Product_type                                       object
dtype: object

In [714]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,loan_type,bad_rate,Trench_category,sil_beta_stack_score_FPD0_v2_t2_gini,sil_beta_stack_score_FPD10_v2_t2_gini,sil_beta_stack_score_FPD30_v2_t2_gini,sil_beta_stack_score_FSPD30_v2_t2_gini,sil_beta_stack_score_FSTPD30_v2_t2_gini,Model_display_name,Product_type
0,2024-07-29,2024-08-04,Week,sil_beta_stack_score,v2,Overall,FPD0,Trench 2,0.684211,,,,,beta_stack_model_sil,SIL
1,2024-08-01,2024-08-31,Month,sil_beta_stack_score,v2,Overall,FPD0,Trench 2,0.458693,,,,,beta_stack_model_sil,SIL
2,2024-08-05,2024-08-11,Week,sil_beta_stack_score,v2,Overall,FPD0,Trench 2,0.544643,,,,,beta_stack_model_sil,SIL
3,2024-08-12,2024-08-18,Week,sil_beta_stack_score,v2,Overall,FPD0,Trench 2,0.546875,,,,,beta_stack_model_sil,SIL
4,2024-08-19,2024-08-25,Week,sil_beta_stack_score,v2,Overall,FPD0,Trench 2,0.320955,,,,,beta_stack_model_sil,SIL


In [715]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.beta_stack_model_sil_v2_t2_gini_v1"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=a50d9430-cb0e-48d9-902e-834b6af07c04>

In [716]:
import functools

dataframes = [f01, f10, f20, f30, f40]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type','trench_category', 'bad_rate', 'category']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

betastacksilv2t2 = functools.reduce(merge_dataframes, dataframes)

In [717]:
betastacksilv2t2['category'].value_counts(dropna=False)

category
beta_stack_model_sil_fpd0_v2_t2       292
beta_stack_model_sil_fpd10_v2_t2      292
beta_stack_model_sil_fpd30_v2_t2      285
beta_stack_model_sil_fspd30_v2_t2     264
beta_stack_model_sil_fstpd30_v2_t2    244
Name: count, dtype: int64

In [718]:
result = pd.concat([cicsilscorev1all, 
                    cicsilscorev2t1, 
                    cicsilscorev2t2, 
                    cicsilscorev2t3, 
                    alphastacksilv1all,
                    alphastacksilv2t1,
                    alphastacksilv2t2,
                    alphastacksilv2t3,
                    betastacksilv1all,
                    betastacksilv2t1,
                    betastacksilv2t2,
                    ], ignore_index=True)
print(f"The shape of the concatenated dataframe result is:\t {result.shape}")
result.head()

The shape of the concatenated dataframe result is:	 (17974, 14)


Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


In [719]:
result[['Model_Name', 'version', 'category']].value_counts(dropna=False).sort_index(ascending = False)

Model_Name             version  category                            
sil_beta_stack_score   v2       beta_stack_model_sil_fstpd30_v2_t2      244
                                beta_stack_model_sil_fstpd30_v2_t1      251
                                beta_stack_model_sil_fspd30_v2_t2       264
                                beta_stack_model_sil_fspd30_v2_t1       275
                                beta_stack_model_sil_fpd30_v2_t2        285
                                beta_stack_model_sil_fpd30_v2_t1        295
                                beta_stack_model_sil_fpd10_v2_t2        292
                                beta_stack_model_sil_fpd10_v2_t1        301
                                beta_stack_model_sil_fpd0_v2_t2         292
                                beta_stack_model_sil_fpd0_v2_t1         301
                       v1       beta_stack_model_sil_fstpd30_v1_all     452
                                beta_stack_model_sil_fspd30_v1_all      476
                   

In [720]:
masterdf = result.copy()
print(f"The shape of masterdf is:\t {masterdf.shape}")
masterdf.head()

The shape of masterdf is:	 (17974, 14)


Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


## Trench 3

## FPD0

## Test

In [721]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fpd0 = 1
  )
  select * from base where trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory


In [722]:
df1 = dfd.copy()

## Train

In [723]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM  prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId,
  trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fpd0 = 1
  )
  select * from base where trenchCategory = 'Trench 3'
  ;

"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory
0,2295508,f4194b66-257d-4486-8ed7-1a4f70b5b026,60822955080024,0.342711,2024-08-18 17:59:05,2024-08-18,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
1,2367320,7a0ecde9-587e-42bc-a506-13594461a2e0,60823673200023,0.233013,2024-08-01 16:21:08,2024-08-01,2024-08,Train,1,1,SIL-Instore,v2,Trench 3
2,2458151,2e353874-648b-46d8-bb0f-1875cb4ddd2b,60824581510029,0.116084,2024-08-19 13:41:28,2024-08-19,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
3,2280974,e7fdfd3d-c4e9-4a13-add6-e4768a97901a,60822809740021,0.135066,2024-08-26 19:12:44,2024-08-30,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
4,2362659,67918f2f-9bbd-4597-8aaf-bba2e9af826d,60823626590027,0.21215,2024-08-02 16:50:01,2024-08-02,2024-08,Train,0,1,SIL-Instore,v2,Trench 3


In [724]:
df2 = dfd.copy()

In [725]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9108 entries, 0 to 9107
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             9108 non-null   Int64         
 1   digitalLoanAccountId   9108 non-null   object        
 2   loanAccountNumber      9108 non-null   object        
 3   sil_beta_stack_score   9108 non-null   float64       
 4   appln_submit_datetime  9108 non-null   datetime64[us]
 5   disbursementdate       9108 non-null   dbdate        
 6   Application_month      9108 non-null   object        
 7   Data_selection         9108 non-null   object        
 8   deffpd0                9108 non-null   Int64         
 9   flg_mature_fpd0        9108 non-null   Int64         
 10  new_loan_type          9108 non-null   object        
 11  modelVersionId         9108 non-null   object        
 12  trenchCategory         9108 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [726]:
df_concat['sil_beta_stack_score'] = pd.to_numeric(df_concat['sil_beta_stack_score'], errors='coerce')
df_concat['Application_month'].value_counts(dropna=False).sort_index(ascending = False)

Application_month
2025-10    276
2025-09    500
2025-08    600
2025-07    665
2025-06    701
2025-05    650
2025-04    843
2025-03    721
2025-02    496
2025-01    468
2024-12    900
2024-11    799
2024-10    564
2024-09    461
2024-08    464
Name: count, dtype: int64

In [727]:
# df_concat.to_csv(r"sil_beta_stack_scorefpd0.csv")

In [728]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_stack_score', 'deffpd0', 'FPD0')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_stack_score', 
    'deffpd0', 
    'FPD0',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [729]:
f0 = gini_results.copy()

In [730]:
f0.head(10)

Unnamed: 0,start_date,end_date,sil_beta_stack_score_FPD0_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-07-29,2024-08-04,0.099206,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 3
1,2024-08-01,2024-08-31,0.080773,Month,sil_beta_stack_score,v2,FPD0,Overall,Trench 3
2,2024-08-05,2024-08-11,0.109091,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 3
3,2024-08-12,2024-08-18,-0.051821,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 3
4,2024-08-19,2024-08-25,0.322188,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 3
5,2024-08-26,2024-09-01,-0.037778,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 3
6,2024-09-01,2024-09-30,0.303682,Month,sil_beta_stack_score,v2,FPD0,Overall,Trench 3
7,2024-09-02,2024-09-08,0.13253,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 3
8,2024-09-09,2024-09-15,0.342365,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 3
9,2024-09-16,2024-09-22,0.110865,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 3


In [731]:
f01 = f0[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_stack_score_FPD0_gini']].copy()
f01.rename(columns={'sil_beta_stack_score_FPD0_gini':'FPD0'}, inplace = True)
f01['category'] = 'beta_stack_model_sil_fpd0_v2_t3'

In [732]:
f01.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                         loan_type       version  trench_category
beta_stack_model_sil_fpd0_v2_t3  Overall         v2       Trench 3           79
                                 SIL Competitor  v2       Trench 3           62
                                 SIL ZERO        v2       Trench 3           76
                                 SIL-Instore     v2       Trench 3           79
dtype: int64

In [733]:
f01.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category
0,2024-07-29,2024-08-04,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 3,0.099206,beta_stack_model_sil_fpd0_v2_t3
1,2024-08-01,2024-08-31,Month,sil_beta_stack_score,v2,FPD0,Overall,Trench 3,0.080773,beta_stack_model_sil_fpd0_v2_t3
2,2024-08-05,2024-08-11,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 3,0.109091,beta_stack_model_sil_fpd0_v2_t3
3,2024-08-12,2024-08-18,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 3,-0.051821,beta_stack_model_sil_fpd0_v2_t3
4,2024-08-19,2024-08-25,Week,sil_beta_stack_score,v2,FPD0,Overall,Trench 3,0.322188,beta_stack_model_sil_fpd0_v2_t3


## FPD10

## Test

In [734]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
    modelVersionId,
  trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fpd10 = 1
  )
  select * from base where trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory


In [735]:
df1 = dfd.copy()

## Train

In [736]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM  prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  modelVersionId,
  trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fpd10 = 1
  )
  select * from base where trenchCategory = 'Trench 3'
  ;

"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory
0,2346530,345c4095-7096-461f-9503-79ac240e0c42,60823465300027,0.435051,2024-08-18 14:21:58,2024-08-18,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
1,2417040,ae00ff0d-02cb-4dd1-b74e-ee68f9fb2318,60824170400029,0.405188,2024-08-22 09:44:29,2024-08-22,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
2,2335858,38b2e0fe-6d25-4a62-9ca3-8a74dc731173,60823358580029,0.292322,2024-08-05 16:50:22,2024-08-05,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
3,2128479,9a525b40-3369-4984-b406-0376f1c0d1ca,60821284790027,0.335267,2024-08-19 17:11:36,2024-08-19,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
4,2354787,84c2e766-da4e-4cda-9bc0-d134158f9622,60823547870027,0.513333,2024-08-04 18:05:13,2024-08-04,2024-08,Train,0,1,SIL-Instore,v2,Trench 3


In [737]:
df2 = dfd.copy()

In [738]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9108 entries, 0 to 9107
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             9108 non-null   Int64         
 1   digitalLoanAccountId   9108 non-null   object        
 2   loanAccountNumber      9108 non-null   object        
 3   sil_beta_stack_score   9108 non-null   float64       
 4   appln_submit_datetime  9108 non-null   datetime64[us]
 5   disbursementdate       9108 non-null   dbdate        
 6   Application_month      9108 non-null   object        
 7   Data_selection         9108 non-null   object        
 8   deffpd10               9108 non-null   Int64         
 9   flg_mature_fpd10       9108 non-null   Int64         
 10  new_loan_type          9108 non-null   object        
 11  modelVersionId         9108 non-null   object        
 12  trenchCategory         9108 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [739]:
df_concat['sil_beta_stack_score'] = pd.to_numeric(df_concat['sil_beta_stack_score'], errors='coerce')
df_concat['Application_month'].value_counts(dropna=False).sort_index(ascending = False)

Application_month
2025-10    276
2025-09    500
2025-08    600
2025-07    665
2025-06    701
2025-05    650
2025-04    843
2025-03    721
2025-02    496
2025-01    468
2024-12    900
2024-11    799
2024-10    564
2024-09    461
2024-08    464
Name: count, dtype: int64

In [740]:
# df_concat.to_csv(r"sil_beta_stack_scorefpd10.csv")

In [741]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_stack_score', 'deffpd10', 'FPD10')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_stack_score', 
    'deffpd10', 
    'FPD10',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [742]:
f1 = gini_results.copy()

In [743]:
f1.head(10)

Unnamed: 0,start_date,end_date,sil_beta_stack_score_FPD10_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-07-29,2024-08-04,0.1,Week,sil_beta_stack_score,v2,FPD10,Overall,Trench 3
1,2024-08-01,2024-08-31,0.078577,Month,sil_beta_stack_score,v2,FPD10,Overall,Trench 3
2,2024-08-05,2024-08-11,0.17029,Week,sil_beta_stack_score,v2,FPD10,Overall,Trench 3
3,2024-08-12,2024-08-18,-0.015152,Week,sil_beta_stack_score,v2,FPD10,Overall,Trench 3
4,2024-08-19,2024-08-25,0.8,Week,sil_beta_stack_score,v2,FPD10,Overall,Trench 3
5,2024-08-26,2024-09-01,-0.477663,Week,sil_beta_stack_score,v2,FPD10,Overall,Trench 3
6,2024-09-01,2024-09-30,0.321189,Month,sil_beta_stack_score,v2,FPD10,Overall,Trench 3
7,2024-09-02,2024-09-08,0.250575,Week,sil_beta_stack_score,v2,FPD10,Overall,Trench 3
8,2024-09-09,2024-09-15,-0.041322,Week,sil_beta_stack_score,v2,FPD10,Overall,Trench 3
9,2024-09-16,2024-09-22,0.437037,Week,sil_beta_stack_score,v2,FPD10,Overall,Trench 3


In [744]:
f10 = f1[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_stack_score_FPD10_gini']].copy()
f10.rename(columns={'sil_beta_stack_score_FPD10_gini':'FPD10'}, inplace = True)
f10['category'] = 'beta_stack_model_sil_fpd10_v2_t3'

In [745]:
f10.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                          loan_type       version  trench_category
beta_stack_model_sil_fpd10_v2_t3  Overall         v2       Trench 3           79
                                  SIL Competitor  v2       Trench 3           62
                                  SIL ZERO        v2       Trench 3           76
                                  SIL-Instore     v2       Trench 3           79
dtype: int64

## FPD30

## Test

In [746]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
            case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId,
  trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fpd30 = 1
  )
  select * from base where trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory


In [747]:
df1 = dfd.copy()

## Train

In [748]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
         when trenchCategory = '' then 'ALL'
         else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM  prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId,
  trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fpd30 = 1
  )
  select * from base where trenchCategory = 'Trench 3'
  ;

"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory
0,2368076,a919037b-013e-4d76-b1cb-2b20ef0f3273,60823680760024,0.182039,2024-08-23 10:36:51,2024-08-23,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
1,2488629,8b3d8025-3eee-4123-883e-8aeb26702709,60824886290021,0.255145,2024-08-11 10:56:01,2024-08-11,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
2,2263342,9be7b406-c74a-4bb5-8261-80cab45b36fc,60822633420035,0.326006,2024-08-01 11:47:48,2024-08-01,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
3,2330538,ab3ae0a2-169b-4c7a-a459-ba89416c467c,60823305380026,0.184152,2024-08-16 09:35:23,2024-08-16,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
4,2252748,8bfb5997-ce9b-4e3c-a250-120ada9bf2be,60822527480026,0.135292,2024-08-31 17:23:48,2024-08-31,2024-08,Train,0,1,SIL-Instore,v2,Trench 3


In [749]:
df2 = dfd.copy()

In [750]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8912 entries, 0 to 8911
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             8912 non-null   Int64         
 1   digitalLoanAccountId   8912 non-null   object        
 2   loanAccountNumber      8912 non-null   object        
 3   sil_beta_stack_score   8912 non-null   float64       
 4   appln_submit_datetime  8912 non-null   datetime64[us]
 5   disbursementdate       8912 non-null   dbdate        
 6   Application_month      8912 non-null   object        
 7   Data_selection         8912 non-null   object        
 8   deffpd30               8912 non-null   Int64         
 9   flg_mature_fpd30       8912 non-null   Int64         
 10  new_loan_type          8912 non-null   object        
 11  modelVersionId         8912 non-null   object        
 12  trenchCategory         8912 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [751]:
df_concat['sil_beta_stack_score'] = pd.to_numeric(df_concat['sil_beta_stack_score'], errors='coerce')
df_concat['Application_month'].value_counts(dropna=False).sort_index(ascending = False)

Application_month
2025-10     81
2025-09    499
2025-08    600
2025-07    665
2025-06    701
2025-05    650
2025-04    843
2025-03    721
2025-02    496
2025-01    468
2024-12    900
2024-11    799
2024-10    564
2024-09    461
2024-08    464
Name: count, dtype: int64

In [752]:
# df_concat.to_csv(r"sil_beta_stack_scorefpd30.csv")

In [753]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_stack_score', 'deffpd30', 'FPD30')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_stack_score', 
    'deffpd30', 
    'FPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [754]:
f2 = gini_results.copy()

In [755]:
f2.head(10)

Unnamed: 0,start_date,end_date,sil_beta_stack_score_FPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-07-29,2024-08-04,0.295082,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 3
1,2024-08-01,2024-08-31,0.233817,Month,sil_beta_stack_score,v2,FPD30,Overall,Trench 3
2,2024-08-05,2024-08-11,0.494681,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 3
3,2024-08-12,2024-08-18,0.165179,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 3
4,2024-08-19,2024-08-25,0.8,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 3
5,2024-08-26,2024-09-01,-0.477663,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 3
6,2024-09-01,2024-09-30,0.406181,Month,sil_beta_stack_score,v2,FPD30,Overall,Trench 3
7,2024-09-02,2024-09-08,0.400749,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 3
8,2024-09-09,2024-09-15,-0.180328,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 3
9,2024-09-16,2024-09-22,0.626374,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 3


In [756]:
f20 = f2[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_stack_score_FPD30_gini']].copy()
f20.rename(columns={'sil_beta_stack_score_FPD30_gini':'FPD30'}, inplace = True)
f20['category'] = 'beta_stack_model_sil_fpd30_v2_t3'
f20.groupby(['loan_type', 'version', 'trench_category', 'category']).size()

loan_type       version  trench_category  category                        
Overall         v2       Trench 3         beta_stack_model_sil_fpd30_v2_t3    78
SIL Competitor  v2       Trench 3         beta_stack_model_sil_fpd30_v2_t3    61
SIL ZERO        v2       Trench 3         beta_stack_model_sil_fpd30_v2_t3    75
SIL-Instore     v2       Trench 3         beta_stack_model_sil_fpd30_v2_t3    78
dtype: int64

In [757]:
f20.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD30,category
0,2024-07-29,2024-08-04,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 3,0.295082,beta_stack_model_sil_fpd30_v2_t3
1,2024-08-01,2024-08-31,Month,sil_beta_stack_score,v2,FPD30,Overall,Trench 3,0.233817,beta_stack_model_sil_fpd30_v2_t3
2,2024-08-05,2024-08-11,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 3,0.494681,beta_stack_model_sil_fpd30_v2_t3
3,2024-08-12,2024-08-18,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 3,0.165179,beta_stack_model_sil_fpd30_v2_t3
4,2024-08-19,2024-08-25,Week,sil_beta_stack_score,v2,FPD30,Overall,Trench 3,0.8,beta_stack_model_sil_fpd30_v2_t3


## FSPD30

## Test

In [758]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
    when trenchCategory = '' then 'ALL'
    else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  modelVersionId,
  trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fspd_30 = 1
  )
  select * from base where trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory


In [759]:
df1 = dfd.copy()

## Train

In [760]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
        when trenchCategory = '' then 'ALL'
        else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM  prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  modelVersionId,
  trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fspd_30 = 1
  )
  select * from base where trenchCategory = 'Trench 3'
  ;

"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory
0,2259741,61ce90d9-047b-41be-9389-d92b559a7064,60822597410027,0.212368,2024-08-07 20:05:12,2024-08-07,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
1,2192505,03935a3a-146e-4fde-8317-d2f5b6686314,60821925050023,0.12251,2024-08-06 15:00:20,2024-08-06,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
2,2477180,030809d5-ebdb-4c4f-905e-730d508d17bb,60824771800021,0.178938,2024-08-12 15:21:44,2024-08-12,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
3,2207922,c054ec57-f0da-4a6f-8e41-b21bfbae17df,60822079220027,0.35683,2024-08-29 11:27:46,2024-08-29,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
4,2222536,aadadf3f-f98c-4a4e-9ff4-2a8168a78c9b,60822225360021,0.264306,2024-08-29 10:33:14,2024-08-29,2024-08,Train,0,1,SIL-Instore,v2,Trench 3


In [761]:
df2 = dfd.copy()

In [762]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8393 entries, 0 to 8392
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             8393 non-null   Int64         
 1   digitalLoanAccountId   8393 non-null   object        
 2   loanAccountNumber      8393 non-null   object        
 3   sil_beta_stack_score   8393 non-null   float64       
 4   appln_submit_datetime  8393 non-null   datetime64[us]
 5   disbursementdate       8393 non-null   dbdate        
 6   Application_month      8393 non-null   object        
 7   Data_selection         8393 non-null   object        
 8   deffspd30              8393 non-null   Int64         
 9   flg_mature_fspd_30     8393 non-null   Int64         
 10  new_loan_type          8393 non-null   object        
 11  modelVersionId         8393 non-null   object        
 12  trenchCategory         8393 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [763]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_stack_score'] = pd.to_numeric(df_concat['sil_beta_stack_score'], errors='coerce')

In [764]:
# df_concat.to_csv(r"sil_beta_stack_scorefspd30.csv")

In [765]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_stack_score', 'deffspd30', 'FSPD30')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_stack_score', 
    'deffspd30', 
    'FSPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [766]:
f3 = gini_results.copy()

In [767]:
f3.head(10)

Unnamed: 0,start_date,end_date,sil_beta_stack_score_FSPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-07-29,2024-08-04,0.350282,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 3
1,2024-08-01,2024-08-31,0.322235,Month,sil_beta_stack_score,v2,FSPD30,Overall,Trench 3
2,2024-08-05,2024-08-11,0.6,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 3
3,2024-08-12,2024-08-18,0.009091,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 3
4,2024-08-19,2024-08-25,0.591837,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 3
5,2024-08-26,2024-09-01,0.291113,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 3
6,2024-09-01,2024-09-30,0.384354,Month,sil_beta_stack_score,v2,FSPD30,Overall,Trench 3
7,2024-09-02,2024-09-08,0.297619,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 3
8,2024-09-09,2024-09-15,0.298319,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 3
9,2024-09-16,2024-09-22,0.407407,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 3


In [768]:
f30 = f3[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_stack_score_FSPD30_gini']].copy()
f30.rename(columns={'sil_beta_stack_score_FSPD30_gini':'FSPD30'}, inplace = True)
f30['category'] = 'beta_stack_model_sil_fspd30_v2_t3'
f30.groupby(['loan_type', 'version', 'trench_category']).size()

loan_type       version  trench_category
Overall         v2       Trench 3           73
SIL Competitor  v2       Trench 3           56
SIL ZERO        v2       Trench 3           70
SIL-Instore     v2       Trench 3           73
dtype: int64

In [769]:
f30.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FSPD30,category
0,2024-07-29,2024-08-04,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 3,0.350282,beta_stack_model_sil_fspd30_v2_t3
1,2024-08-01,2024-08-31,Month,sil_beta_stack_score,v2,FSPD30,Overall,Trench 3,0.322235,beta_stack_model_sil_fspd30_v2_t3
2,2024-08-05,2024-08-11,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 3,0.6,beta_stack_model_sil_fspd30_v2_t3
3,2024-08-12,2024-08-18,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 3,0.009091,beta_stack_model_sil_fspd30_v2_t3
4,2024-08-19,2024-08-25,Week,sil_beta_stack_score,v2,FSPD30,Overall,Trench 3,0.591837,beta_stack_model_sil_fspd30_v2_t3


## FSTPD30

## Test

In [770]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
        when trenchCategory = '' then 'ALL'
        else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
    loanmaster.new_loan_type,
    modelVersionId,
  trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fstpd_30 = 1
  )
  select * from base where trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory


In [771]:
df1 = dfd.copy()

## Train

In [772]:
sq = """
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
        when trenchCategory = '' then 'ALL'
        else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature
  FROM  prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - StackScoreModel', 'beta_stack_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_stack_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_stack_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
    loanmaster.new_loan_type,
    modelVersionId,
  trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_stack_score is not null
  and del.flg_mature_fstpd_30 = 1
  )
  select * from base where trenchCategory = 'Trench 3'
  ;

"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_stack_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory
0,2185617,ba9e57cf-a44d-4a76-a37d-414630e9f0b6,60821856170027,0.250084,2024-08-04 17:24:35,2024-08-04,2024-08,Train,1,1,SIL ZERO,v2,Trench 3
1,2402814,bc606984-230d-430a-bd4f-4581577c0f44,60824028140026,0.225884,2024-08-14 14:41:48,2024-08-14,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
2,2680796,185ca044-14d2-4a40-a520-cdd42a8a296a,60826807960025,0.241854,2024-08-20 18:16:50,2024-08-20,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
3,2235548,aa3280d7-ce38-4010-a003-66b6a675d6e0,60822355480022,0.283752,2024-08-31 17:52:52,2024-08-31,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
4,2252748,8bfb5997-ce9b-4e3c-a250-120ada9bf2be,60822527480026,0.135292,2024-08-31 17:23:48,2024-08-31,2024-08,Train,0,1,SIL-Instore,v2,Trench 3


In [773]:
df2 = dfd.copy()

In [774]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7828 entries, 0 to 7827
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             7828 non-null   Int64         
 1   digitalLoanAccountId   7828 non-null   object        
 2   loanAccountNumber      7828 non-null   object        
 3   sil_beta_stack_score   7828 non-null   float64       
 4   appln_submit_datetime  7828 non-null   datetime64[us]
 5   disbursementdate       7828 non-null   dbdate        
 6   Application_month      7828 non-null   object        
 7   Data_selection         7828 non-null   object        
 8   deffstpd30             7828 non-null   Int64         
 9   flg_mature_fstpd_30    7828 non-null   Int64         
 10  new_loan_type          7828 non-null   object        
 11  modelVersionId         7828 non-null   object        
 12  trenchCategory         7828 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [775]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_stack_score'] = pd.to_numeric(df_concat['sil_beta_stack_score'], errors='coerce')

In [776]:
# df_concat.to_csv(r"sil_beta_stack_scorefstpd30.csv")

In [777]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_stack_score', 'deffstpd30', 'FSTPD30')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_stack_score', 
    'deffstpd30', 
    'FSTPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [778]:
f4 = gini_results.copy()

In [779]:
f4.head(10)

Unnamed: 0,start_date,end_date,sil_beta_stack_score_FSTPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-07-29,2024-08-04,0.192982,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 3
1,2024-08-01,2024-08-31,0.26642,Month,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 3
2,2024-08-05,2024-08-11,0.544741,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 3
3,2024-08-12,2024-08-18,0.09434,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 3
4,2024-08-19,2024-08-25,0.416667,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 3
5,2024-08-26,2024-09-01,0.246212,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 3
6,2024-09-01,2024-09-30,0.2942,Month,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 3
7,2024-09-02,2024-09-08,0.219512,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 3
8,2024-09-09,2024-09-15,0.253561,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 3
9,2024-09-16,2024-09-22,0.195402,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 3


In [780]:
f40 = f4[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_stack_score_FSTPD30_gini']].copy()
f40.rename(columns={'sil_beta_stack_score_FSTPD30_gini':'FSTPD30'}, inplace = True)
f40['category'] = 'beta_stack_model_sil_fstpd30_v2_t3'
f40.groupby(['loan_type', 'version', 'trench_category']).size()

loan_type       version  trench_category
Overall         v2       Trench 3           67
SIL Competitor  v2       Trench 3           50
SIL ZERO        v2       Trench 3           65
SIL-Instore     v2       Trench 3           67
dtype: int64

In [781]:
f40.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FSTPD30,category
0,2024-07-29,2024-08-04,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 3,0.192982,beta_stack_model_sil_fstpd30_v2_t3
1,2024-08-01,2024-08-31,Month,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 3,0.26642,beta_stack_model_sil_fstpd30_v2_t3
2,2024-08-05,2024-08-11,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 3,0.544741,beta_stack_model_sil_fstpd30_v2_t3
3,2024-08-12,2024-08-18,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 3,0.09434,beta_stack_model_sil_fstpd30_v2_t3
4,2024-08-19,2024-08-25,Week,sil_beta_stack_score,v2,FSTPD30,Overall,Trench 3,0.416667,beta_stack_model_sil_fstpd30_v2_t3


## combining the dataframe

In [782]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type','trench_category','bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'sil_beta_stack_score_FPD0_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'trench_category', 'sil_beta_stack_score_FPD10_gini',
       'sil_beta_stack_score_FPD30_gini',
       'sil_beta_stack_score_FSPD30_gini',
       'sil_beta_stack_score_FSTPD30_gini'], dtype=object)

In [783]:
final_df.rename(columns={'trench_category':'Trench_category',
'sil_beta_stack_score_FPD0_gini':'sil_beta_stack_score_FPD0_v2_t3_gini',
'sil_beta_stack_score_FPD10_gini':'sil_beta_stack_score_FPD10_v2_t3_gini',
'sil_beta_stack_score_FPD30_gini':'sil_beta_stack_score_FPD30_v2_t3_gini',
'sil_beta_stack_score_FSPD30_gini':'sil_beta_stack_score_FSPD30_v2_t3_gini', 
'sil_beta_stack_score_FSTPD30_gini':'sil_beta_stack_score_FSTPD30_v2_t3_gini'
}, inplace=True)
final_df.columns

Index(['start_date', 'end_date', 'sil_beta_stack_score_FPD0_v2_t3_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'Trench_category', 'sil_beta_stack_score_FPD10_v2_t3_gini',
       'sil_beta_stack_score_FPD30_v2_t3_gini',
       'sil_beta_stack_score_FSPD30_v2_t3_gini',
       'sil_beta_stack_score_FSTPD30_v2_t3_gini'],
      dtype='object')

In [784]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'loan_type','bad_rate', 'Trench_category'
                        ,'sil_beta_stack_score_FPD0_v2_t3_gini',
                        'sil_beta_stack_score_FPD10_v2_t3_gini',
                        'sil_beta_stack_score_FPD30_v2_t3_gini',
                        'sil_beta_stack_score_FSPD30_v2_t3_gini',
                        'sil_beta_stack_score_FSTPD30_v2_t3_gini'
                    ]].copy()
final_df['Model_display_name'] = 'beta_stack_model_sil'
final_df['Product_type'] = 'SIL'
final_df.dtypes

start_date                                 datetime64[ns]
end_date                                   datetime64[ns]
period                                             object
Model_Name                                         object
version                                            object
loan_type                                          object
bad_rate                                           object
Trench_category                                    object
sil_beta_stack_score_FPD0_v2_t3_gini              float64
sil_beta_stack_score_FPD10_v2_t3_gini             float64
sil_beta_stack_score_FPD30_v2_t3_gini             float64
sil_beta_stack_score_FSPD30_v2_t3_gini            float64
sil_beta_stack_score_FSTPD30_v2_t3_gini           float64
Model_display_name                                 object
Product_type                                       object
dtype: object

In [785]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,loan_type,bad_rate,Trench_category,sil_beta_stack_score_FPD0_v2_t3_gini,sil_beta_stack_score_FPD10_v2_t3_gini,sil_beta_stack_score_FPD30_v2_t3_gini,sil_beta_stack_score_FSPD30_v2_t3_gini,sil_beta_stack_score_FSTPD30_v2_t3_gini,Model_display_name,Product_type
0,2024-07-29,2024-08-04,Week,sil_beta_stack_score,v2,Overall,FPD0,Trench 3,0.099206,,,,,beta_stack_model_sil,SIL
1,2024-08-01,2024-08-31,Month,sil_beta_stack_score,v2,Overall,FPD0,Trench 3,0.080773,,,,,beta_stack_model_sil,SIL
2,2024-08-05,2024-08-11,Week,sil_beta_stack_score,v2,Overall,FPD0,Trench 3,0.109091,,,,,beta_stack_model_sil,SIL
3,2024-08-12,2024-08-18,Week,sil_beta_stack_score,v2,Overall,FPD0,Trench 3,-0.051821,,,,,beta_stack_model_sil,SIL
4,2024-08-19,2024-08-25,Week,sil_beta_stack_score,v2,Overall,FPD0,Trench 3,0.322188,,,,,beta_stack_model_sil,SIL


In [786]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.beta_stack_model_sil_v2_t3_gini_v1"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=2b443e62-82ad-49d8-9417-f0586ad403c2>

In [787]:
import functools

dataframes = [f01, f10, f20, f30, f40]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type','trench_category', 'bad_rate', 'category']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

betastacksilv2t3 = functools.reduce(merge_dataframes, dataframes)

In [788]:
betastacksilv2t3['category'].value_counts(dropna=False)

category
beta_stack_model_sil_fpd0_v2_t3       296
beta_stack_model_sil_fpd10_v2_t3      296
beta_stack_model_sil_fpd30_v2_t3      292
beta_stack_model_sil_fspd30_v2_t3     272
beta_stack_model_sil_fstpd30_v2_t3    249
Name: count, dtype: int64

In [789]:
result = pd.concat([cicsilscorev1all, 
                    cicsilscorev2t1, 
                    cicsilscorev2t2, 
                    cicsilscorev2t3, 
                    alphastacksilv1all,
                    alphastacksilv2t1,
                    alphastacksilv2t2,
                    alphastacksilv2t3,
                    betastacksilv1all,
                    betastacksilv2t1,
                    betastacksilv2t2,
                    betastacksilv2t3,
                    ], ignore_index=True)
print(f"The shape of the concatenated dataframe result is:\t {result.shape}")
result.head()

The shape of the concatenated dataframe result is:	 (19379, 14)


Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


In [790]:
result[['Model_Name', 'version', 'category']].value_counts(dropna=False).sort_index(ascending = False)

Model_Name             version  category                            
sil_beta_stack_score   v2       beta_stack_model_sil_fstpd30_v2_t3      249
                                beta_stack_model_sil_fstpd30_v2_t2      244
                                beta_stack_model_sil_fstpd30_v2_t1      251
                                beta_stack_model_sil_fspd30_v2_t3       272
                                beta_stack_model_sil_fspd30_v2_t2       264
                                beta_stack_model_sil_fspd30_v2_t1       275
                                beta_stack_model_sil_fpd30_v2_t3        292
                                beta_stack_model_sil_fpd30_v2_t2        285
                                beta_stack_model_sil_fpd30_v2_t1        295
                                beta_stack_model_sil_fpd10_v2_t3        296
                                beta_stack_model_sil_fpd10_v2_t2        292
                                beta_stack_model_sil_fpd10_v2_t1        301
                   

In [791]:
masterdf = result.copy()
print(f"The shape of masterdf is:\t {masterdf.shape}")
masterdf.head()

The shape of masterdf is:	 (19379, 14)


Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


# Beta Sil App Score

## V1

## FPD0

## Test

In [792]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,

    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v1'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64) as sil_beta_app_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fpd0 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory
0,2948381,5a7fd43c-4b1c-437e-958d-4ed37fd62cca,60829483810018,0.505596,2025-07-28 12:14:11,2025-07-28,2025-07,Test,0,1,SIL Competitor,v1,ALL
1,3290908,94a727e2-6f4b-4cf3-93a9-8cc436933a55,60832909080023,0.397194,2025-07-28 13:27:44,2025-07-28,2025-07,Test,0,1,SIL-Instore,v1,ALL
2,3580172,86400237-863d-4fc8-9bdb-e8db3ea41daa,60835801720012,0.583031,2025-07-28 12:54:30,2025-07-28,2025-07,Test,0,1,SIL-Instore,v1,ALL
3,3583775,4d621b31-5d4a-4b4d-991b-1d58771ae75e,60835837750015,0.55658,2025-07-28 12:03:20,2025-07-28,2025-07,Test,0,1,SIL Competitor,v1,ALL
4,3583782,427ad290-c66c-464b-ae28-f74ddf291658,60835837820014,0.201762,2025-07-28 11:58:01,2025-07-28,2025-07,Test,0,1,SIL Competitor,v1,ALL


In [793]:
df1 = dfd.copy()

## Train

In [794]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
        case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(cast(prediction as string), "'", '"'), "None", "null") AS prediction_clean
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v1'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  coalesce(prediction, safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64)) as sil_beta_app_score, 
  modelVersionId,
  trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
    modelVersionId,
  trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fpd0 = 1
  ;

"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory
0,2065194,71d1a2d3-d5c0-465e-a923-17a4cc7fd818,60820651940011,0.620883,2023-05-28 14:10:16,2023-05-28,2023-05,Train,1,1,SIL-Instore,v1,ALL
1,1968323,75d352e3-89e5-445d-8b5a-ee769f52c4d8,60819683230011,0.52573,2023-03-30 12:17:45,2023-03-30,2023-03,Train,1,1,SIL-Instore,v1,ALL
2,1891979,07d71b05-eebb-4e3f-b2c9-77a057d91539,60818919790016,0.447169,2023-02-08 16:19:56,2023-02-08,2023-02,Train,0,1,SIL-Instore,v1,ALL
3,1846848,ac41223e-0d42-4569-b21f-789bce021291,60818468480011,0.518034,2023-01-02 10:50:43,2023-01-02,2023-01,Train,0,1,SIL-Instore,v1,ALL
4,1861570,38bad92e-ad5c-4d5e-b1b5-7e4add8c233c,60818615700011,0.548146,2023-01-14 15:41:55,2023-01-15,2023-01,Train,0,1,SIL-Instore,v1,ALL


In [795]:
df2 = dfd.copy()

In [796]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 251565 entries, 0 to 251564
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             251565 non-null  object        
 1   digitalLoanAccountId   251565 non-null  object        
 2   loanAccountNumber      251565 non-null  object        
 3   sil_beta_app_score     251565 non-null  float64       
 4   appln_submit_datetime  251565 non-null  datetime64[us]
 5   disbursementdate       251565 non-null  dbdate        
 6   Application_month      251565 non-null  object        
 7   Data_selection         251565 non-null  object        
 8   deffpd0                251565 non-null  Int64         
 9   flg_mature_fpd0        251565 non-null  Int64         
 10  new_loan_type          251565 non-null  object        
 11  modelVersionId         251565 non-null  object        
 12  trenchCategory         251565 non-null  obje

In [797]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_app_score'] = pd.to_numeric(df_concat['sil_beta_app_score'], errors='coerce')

In [798]:
# df_concat.to_csv(r"sil_beta_app_score.csv")

In [799]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_app_score', 'deffpd0', 'FPD0')
# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'sil_beta_app_score', 
#     'deffpd0', 
#     'FPD0',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_app_score', 
    'deffpd0', 
    'FPD0',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [800]:
f0 = gini_results.copy()

In [801]:
f0.head(10)

Unnamed: 0,start_date,end_date,sil_beta_app_score_FPD0_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2023-01-01,2023-01-31,0.156053,Month,sil_beta_app_score,v1,FPD0,Overall,ALL
1,2023-01-02,2023-01-08,0.181917,Week,sil_beta_app_score,v1,FPD0,Overall,ALL
2,2023-01-09,2023-01-15,0.297158,Week,sil_beta_app_score,v1,FPD0,Overall,ALL
3,2023-01-16,2023-01-22,0.170139,Week,sil_beta_app_score,v1,FPD0,Overall,ALL
4,2023-01-23,2023-01-29,0.083217,Week,sil_beta_app_score,v1,FPD0,Overall,ALL
5,2023-01-30,2023-02-05,0.297507,Week,sil_beta_app_score,v1,FPD0,Overall,ALL
6,2023-02-01,2023-02-28,0.293496,Month,sil_beta_app_score,v1,FPD0,Overall,ALL
7,2023-02-06,2023-02-12,0.413246,Week,sil_beta_app_score,v1,FPD0,Overall,ALL
8,2023-02-13,2023-02-19,0.164119,Week,sil_beta_app_score,v1,FPD0,Overall,ALL
9,2023-02-20,2023-02-26,0.302374,Week,sil_beta_app_score,v1,FPD0,Overall,ALL


In [802]:
f01 = f0[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_app_score_FPD0_gini']].copy()
f01.rename(columns={'sil_beta_app_score_FPD0_gini':'FPD0'}, inplace = True)
f01['category'] = 'sil_beta_app_score_FPD0_v1_all'
f01.groupby(['loan_type', 'version', 'trench_category']).size()

loan_type       version  trench_category
Overall         v1       ALL                184
SIL Competitor  v1       ALL                 51
SIL Repeat      v1       ALL                 14
SIL ZERO        v1       ALL                 83
SIL-Instore     v1       ALL                184
dtype: int64

## FPD10

## Test

In [803]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
    when trenchCategory = '' then 'ALL'
    else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v1'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64) as sil_beta_app_score,
  modelVersionId, trenchCategory
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fpd10 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory
0,3412343,2f95f289-bf8d-4a17-ba4c-1be18dac848d,60834123430011,0.528953,2025-05-01 10:43:20,2025-05-01,2025-05,Test,0,1,SIL-Instore,v1,ALL
1,3412450,8e2fbf41-931c-4a69-ae11-6c881c1b46d5,60834124500012,0.499481,2025-05-01 11:14:33,2025-05-01,2025-05,Test,0,1,SIL-Instore,v1,ALL
2,3412615,333801f4-a06b-4dad-b1cb-d7e0463250e6,60834126150015,0.432927,2025-05-01 12:19:03,2025-05-01,2025-05,Test,0,1,SIL-Instore,v1,ALL
3,3412756,de27053b-ee7b-49e3-a0d9-c5bfe4b2926e,60834127560017,0.501764,2025-05-01 12:53:15,2025-05-01,2025-05,Test,0,1,SIL-Instore,v1,ALL
4,3412640,3b7fab95-05dd-4465-91eb-665752cfca13,60834126400016,0.389994,2025-05-01 12:53:39,2025-05-01,2025-05,Test,0,1,SIL-Instore,v1,ALL


In [804]:
df1 = dfd.copy()

## Train

In [805]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(cast(prediction as string), "'", '"'), "None", "null") AS prediction_clean
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v1'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  coalesce(prediction, safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64)) as sil_beta_app_score, 
  modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
    modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fpd10 = 1
  ;

"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory
0,1973747,36a4c276-3fef-4ee1-9946-79a8fc524bed,60819737470016,0.471495,2023-04-02 11:27:11,2023-04-02,2023-04,Train,0,1,SIL-Instore,v1,ALL
1,2065522,3fe094d1-232c-4c18-9975-f137b88fea87,60820655220017,0.694932,2023-05-28 16:42:49,2023-05-28,2023-05,Train,1,1,SIL-Instore,v1,ALL
2,2106435,ceaac794-f314-4903-bc91-e1a21c373008,60821064350019,0.560792,2023-06-24 17:42:25,2023-06-24,2023-06,Train,0,1,SIL-Instore,v1,ALL
3,2037756,13be6e8c-9a85-4016-8ddd-8bf646efed81,60820377560016,0.627201,2023-05-10 14:31:13,2023-05-10,2023-05,Train,0,1,SIL-Instore,v1,ALL
4,1920144,68303aac-138d-4f57-a96e-c3a0f1062e30,60819201440018,0.593074,2023-02-27 16:53:26,2023-02-27,2023-02,Train,0,1,SIL-Instore,v1,ALL


In [806]:
df2 = dfd.copy()

In [807]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 245200 entries, 0 to 245199
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             245200 non-null  object        
 1   digitalLoanAccountId   245200 non-null  object        
 2   loanAccountNumber      245200 non-null  object        
 3   sil_beta_app_score     245200 non-null  float64       
 4   appln_submit_datetime  245200 non-null  datetime64[us]
 5   disbursementdate       245200 non-null  dbdate        
 6   Application_month      245200 non-null  object        
 7   Data_selection         245200 non-null  object        
 8   deffpd10               245200 non-null  Int64         
 9   flg_mature_fpd10       245200 non-null  Int64         
 10  new_loan_type          245200 non-null  object        
 11  modelVersionId         245200 non-null  object        
 12  trenchCategory         245200 non-null  obje

In [808]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_app_score'] = pd.to_numeric(df_concat['sil_beta_app_score'], errors='coerce')

In [809]:
# df_concat.to_csv(r"sil_beta_app_scorefpd10.csv")

In [810]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_app_score', 'deffpd10', 'FPD10')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_app_score', 
    'deffpd10', 
    'FPD10',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [811]:
f1 = gini_results.copy()

In [812]:
f1.head(10)

Unnamed: 0,start_date,end_date,sil_beta_app_score_FPD10_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2023-01-01,2023-01-31,0.22585,Month,sil_beta_app_score,v1,FPD10,Overall,ALL
1,2023-01-02,2023-01-08,0.196935,Week,sil_beta_app_score,v1,FPD10,Overall,ALL
2,2023-01-09,2023-01-15,0.381288,Week,sil_beta_app_score,v1,FPD10,Overall,ALL
3,2023-01-16,2023-01-22,0.210526,Week,sil_beta_app_score,v1,FPD10,Overall,ALL
4,2023-01-23,2023-01-29,0.191667,Week,sil_beta_app_score,v1,FPD10,Overall,ALL
5,2023-01-30,2023-02-05,0.411765,Week,sil_beta_app_score,v1,FPD10,Overall,ALL
6,2023-02-01,2023-02-28,0.275626,Month,sil_beta_app_score,v1,FPD10,Overall,ALL
7,2023-02-06,2023-02-12,0.378472,Week,sil_beta_app_score,v1,FPD10,Overall,ALL
8,2023-02-13,2023-02-19,0.03794,Week,sil_beta_app_score,v1,FPD10,Overall,ALL
9,2023-02-20,2023-02-26,0.318296,Week,sil_beta_app_score,v1,FPD10,Overall,ALL


In [813]:
f10 = f1[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_app_score_FPD10_gini']].copy()
f10.rename(columns={'sil_beta_app_score_FPD10_gini':'FPD10'}, inplace = True)
f10['category'] = 'sil_beta_app_score_FPD10_v1_all'
f10.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                         loan_type       version  trench_category
sil_beta_app_score_FPD10_v1_all  Overall         v1       ALL                181
                                 SIL Competitor  v1       ALL                 48
                                 SIL Repeat      v1       ALL                 14
                                 SIL ZERO        v1       ALL                 80
                                 SIL-Instore     v1       ALL                181
dtype: int64

## FPD30

## Test

In [814]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v1'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64) as sil_beta_app_score,
  modelVersionId, trenchCategory
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fpd30 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory
0,2995479,479e92bd-c5f2-49be-a59b-d12f48f0d291,60829954790028,0.395407,2025-04-19 10:38:59,2025-04-19,2025-04,Test,0,1,SIL ZERO,v1,ALL
1,3388407,cf69e38c-9731-4d6f-aac9-d54043ce8c30,60833884070012,0.526122,2025-04-19 11:15:58,2025-04-19,2025-04,Test,0,1,SIL-Instore,v1,ALL
2,3388242,5b6dc3b2-98b7-4eca-8e42-52e2f69610b1,60833882420019,0.407495,2025-04-19 10:25:30,2025-04-19,2025-04,Test,0,1,SIL ZERO,v1,ALL
3,3388247,8ba0951d-f590-49db-a199-e9626120de96,60833882470014,0.555504,2025-04-19 10:29:06,2025-04-19,2025-04,Test,0,1,SIL-Instore,v1,ALL
4,3388385,317b896a-1ad4-4771-8656-dd48e9301661,60833883850013,0.520217,2025-04-19 11:08:29,2025-04-19,2025-04,Test,0,1,SIL-Instore,v1,ALL


In [815]:
df1 = dfd.copy()

## Train

In [816]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(cast(prediction as string), "'", '"'), "None", "null") AS prediction_clean
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v1'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  coalesce(prediction, safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64)) as sil_beta_app_score, 
  modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fpd30 = 1
  ;

"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory
0,2061957,9f4760f5-6e05-4773-8f9a-6043c200bb92,60820619570014,0.431396,2023-05-26 13:37:18,2023-05-26,2023-05,Train,0,1,SIL-Instore,v1,ALL
1,1851563,2d3d7193-6273-4bf0-9498-1edf8f54ef2b,60818515630016,0.440519,2023-01-05 19:02:20,2023-01-05,2023-01,Train,0,1,SIL-Instore,v1,ALL
2,1870869,f7f2798c-af9a-4a26-8e6e-413d3db8d2e7,60818708690017,0.418685,2023-01-23 10:31:07,2023-01-23,2023-01,Train,0,1,SIL-Instore,v1,ALL
3,1953421,255d2710-a522-432e-ae1c-685fb395e9ae,60819534210017,0.279308,2023-03-21 15:49:57,2023-03-21,2023-03,Train,0,1,SIL-Instore,v1,ALL
4,2013190,b0a5c934-96a1-4646-821c-dc84204ea298,60820131900014,0.396386,2023-04-25 13:46:12,2023-04-25,2023-04,Train,0,1,SIL-Instore,v1,ALL


In [817]:
df2 = dfd.copy()

In [818]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 237205 entries, 0 to 237204
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             237205 non-null  object        
 1   digitalLoanAccountId   237205 non-null  object        
 2   loanAccountNumber      237205 non-null  object        
 3   sil_beta_app_score     237205 non-null  float64       
 4   appln_submit_datetime  237205 non-null  datetime64[us]
 5   disbursementdate       237205 non-null  dbdate        
 6   Application_month      237205 non-null  object        
 7   Data_selection         237205 non-null  object        
 8   deffpd30               237205 non-null  Int64         
 9   flg_mature_fpd30       237205 non-null  Int64         
 10  new_loan_type          237205 non-null  object        
 11  modelVersionId         237205 non-null  object        
 12  trenchCategory         237205 non-null  obje

In [819]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_app_score'] = pd.to_numeric(df_concat['sil_beta_app_score'], errors='coerce')

In [820]:
# df_concat.to_csv(r"sil_beta_app_scorefpd30.csv")

In [821]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_app_score', 'deffpd30', 'FPD30')

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_app_score', 
    'deffpd30', 
    'FPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [822]:
f2 = gini_results.copy()

In [823]:
f2.head(10)

Unnamed: 0,start_date,end_date,sil_beta_app_score_FPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2023-01-01,2023-01-31,0.26979,Month,sil_beta_app_score,v1,FPD30,Overall,ALL
1,2023-01-02,2023-01-08,0.257051,Week,sil_beta_app_score,v1,FPD30,Overall,ALL
2,2023-01-09,2023-01-15,0.555556,Week,sil_beta_app_score,v1,FPD30,Overall,ALL
3,2023-01-16,2023-01-22,0.210526,Week,sil_beta_app_score,v1,FPD30,Overall,ALL
4,2023-01-23,2023-01-29,0.130579,Week,sil_beta_app_score,v1,FPD30,Overall,ALL
5,2023-01-30,2023-02-05,0.496911,Week,sil_beta_app_score,v1,FPD30,Overall,ALL
6,2023-02-01,2023-02-28,0.305164,Month,sil_beta_app_score,v1,FPD30,Overall,ALL
7,2023-02-06,2023-02-12,0.334583,Week,sil_beta_app_score,v1,FPD30,Overall,ALL
8,2023-02-13,2023-02-19,0.096774,Week,sil_beta_app_score,v1,FPD30,Overall,ALL
9,2023-02-20,2023-02-26,0.437681,Week,sil_beta_app_score,v1,FPD30,Overall,ALL


In [824]:
f20 = f2[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_app_score_FPD30_gini']].copy()
f20.rename(columns={'sil_beta_app_score_FPD30_gini':'FPD30'}, inplace = True)
f20['category'] = 'sil_beta_app_score_FPD30_v1_all'
f20.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                         loan_type       version  trench_category
sil_beta_app_score_FPD30_v1_all  Overall         v1       ALL                179
                                 SIL Competitor  v1       ALL                 46
                                 SIL Repeat      v1       ALL                 14
                                 SIL ZERO        v1       ALL                 78
                                 SIL-Instore     v1       ALL                179
dtype: int64

## FSPD30

## Test

In [825]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v1'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64) as sil_beta_app_score,
  modelVersionId,trenchCategory

  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  modelVersionId,trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fspd_30 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory
0,3352367,37c95d5e-8d01-4967-8069-dee129384854,60833523670012,0.503878,2025-04-01 08:55:22,2025-04-01,2025-04,Test,0,1,SIL-Instore,v1,ALL
1,3431554,19c9c6b1-2d3b-4a0d-8c74-943294573ead,60834315540015,0.441843,2025-05-11 15:30:58,2025-05-11,2025-05,Test,0,1,SIL-Instore,v1,ALL
2,3342144,800047a9-e040-4eb7-89e5-442fff0b4feb,60833421440014,0.517661,2025-03-26 18:34:35,2025-03-26,2025-03,Test,0,1,SIL-Instore,v1,ALL
3,3438031,57fb7dda-2157-4cba-a811-a1dc8ef2563e,60834380310011,0.423818,2025-05-15 11:41:07,2025-05-15,2025-05,Test,0,1,SIL-Instore,v1,ALL
4,3341205,749070f3-2fc8-41dd-a163-6dd0d92e52fa,60833412050016,0.366467,2025-03-26 11:26:34,2025-03-26,2025-03,Test,0,1,SIL-Instore,v1,ALL


In [826]:
df1 = dfd.copy()

## Train

In [827]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(cast(prediction as string), "'", '"'), "None", "null") AS prediction_clean
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v1'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  coalesce(prediction, safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64)) as sil_beta_app_score, 
  modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fspd_30 = 1
  ;

"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory
0,1922965,1ce8af34-6d72-425d-bf1f-01c0404e2f95,60819229650015,0.4431,2023-03-01 14:56:45,2023-03-01,2023-03,Train,0,1,SIL-Instore,v1,ALL
1,2059837,5f9a2b85-6cfa-4200-9188-97b81a9df0b9,60820598370014,0.359386,2023-05-24 12:35:39,2023-05-24,2023-05,Train,0,1,SIL-Instore,v1,ALL
2,1983662,fe24995f-db9c-4d6d-942e-54806fdda594,60819836620017,0.491796,2023-04-08 12:16:07,2023-04-08,2023-04,Train,0,1,SIL-Instore,v1,ALL
3,2008762,d730cc64-57cd-4c2b-b96e-19aa05759a9a,60820087620013,0.355909,2023-04-22 15:31:05,2023-04-22,2023-04,Train,1,1,SIL-Instore,v1,ALL
4,1960101,804ee12d-6647-46c5-90c9-99422bf75782,60819601010017,0.418632,2023-03-25 15:50:20,2023-03-25,2023-03,Train,0,1,SIL-Instore,v1,ALL


In [828]:
df2 = dfd.copy()

In [829]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 225408 entries, 0 to 225407
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             225408 non-null  object        
 1   digitalLoanAccountId   225408 non-null  object        
 2   loanAccountNumber      225408 non-null  object        
 3   sil_beta_app_score     225408 non-null  float64       
 4   appln_submit_datetime  225408 non-null  datetime64[us]
 5   disbursementdate       225408 non-null  dbdate        
 6   Application_month      225408 non-null  object        
 7   Data_selection         225408 non-null  object        
 8   deffspd30              225408 non-null  Int64         
 9   flg_mature_fspd_30     225408 non-null  Int64         
 10  new_loan_type          225408 non-null  object        
 11  modelVersionId         225408 non-null  object        
 12  trenchCategory         225408 non-null  obje

In [830]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_app_score'] = pd.to_numeric(df_concat['sil_beta_app_score'], errors='coerce')

In [831]:
# df_concat.to_csv(r"sil_beta_app_scorefspd30.csv")

In [832]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_app_score', 'deffspd30', 'FSPD30')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_app_score', 
    'deffspd30', 
    'FSPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [833]:
f3 = gini_results.copy()

In [834]:
f3.head(10)

Unnamed: 0,start_date,end_date,sil_beta_app_score_FSPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2023-01-01,2023-01-31,0.247928,Month,sil_beta_app_score,v1,FSPD30,Overall,ALL
1,2023-01-02,2023-01-08,0.124845,Week,sil_beta_app_score,v1,FSPD30,Overall,ALL
2,2023-01-09,2023-01-15,0.399069,Week,sil_beta_app_score,v1,FSPD30,Overall,ALL
3,2023-01-16,2023-01-22,0.364341,Week,sil_beta_app_score,v1,FSPD30,Overall,ALL
4,2023-01-23,2023-01-29,0.246377,Week,sil_beta_app_score,v1,FSPD30,Overall,ALL
5,2023-01-30,2023-02-05,0.419042,Week,sil_beta_app_score,v1,FSPD30,Overall,ALL
6,2023-02-01,2023-02-28,0.330061,Month,sil_beta_app_score,v1,FSPD30,Overall,ALL
7,2023-02-06,2023-02-12,0.419753,Week,sil_beta_app_score,v1,FSPD30,Overall,ALL
8,2023-02-13,2023-02-19,0.188025,Week,sil_beta_app_score,v1,FSPD30,Overall,ALL
9,2023-02-20,2023-02-26,0.318043,Week,sil_beta_app_score,v1,FSPD30,Overall,ALL


In [835]:
f30 = f3[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_app_score_FSPD30_gini']].copy()
f30.rename(columns={'sil_beta_app_score_FSPD30_gini':'FSPD30'}, inplace = True)
f30['category'] = 'sil_beta_app_score_FSPD30_v1_all'
f30.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                          loan_type       version  trench_category
sil_beta_app_score_FSPD30_v1_all  Overall         v1       ALL                174
                                  SIL Competitor  v1       ALL                 41
                                  SIL Repeat      v1       ALL                 14
                                  SIL ZERO        v1       ALL                 73
                                  SIL-Instore     v1       ALL                174
dtype: int64

## FSTPD30

## Test

In [836]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v1'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64) as sil_beta_app_score,
  modelVersionId, trenchCategory
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fstpd_30 = 1
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory
0,3603572,c62ab930-a2ec-48a1-be00-44f6e56d106c,60836035720012,0.501711,2025-08-06 17:01:13,2025-08-06,2025-08,Test,0,1,SIL Competitor,v1,ALL
1,3603221,67c928fa-e469-4471-b6fa-0ccfc459d6b5,60836032210012,0.564346,2025-08-06 15:08:17,2025-08-06,2025-08,Test,0,1,SIL-Instore,v1,ALL
2,3452144,e5ced157-e148-4f14-8843-9e9a78bd3455,60834521440015,0.498759,2025-05-22 19:37:58,2025-05-22,2025-05,Test,0,1,SIL-Instore,v1,ALL
3,3381741,64dc0d73-2ee4-402f-a011-75b2a711d917,60833817410015,0.626785,2025-04-15 18:26:12,2025-04-15,2025-04,Test,0,1,SIL-Instore,v1,ALL
4,3602957,b5f94b5c-787d-4f0a-ba63-09c685bd936a,60836029570011,0.585708,2025-08-06 13:18:26,2025-08-06,2025-08,Test,0,1,SIL Competitor,v1,ALL


In [837]:
df1 = dfd.copy()

## Train

In [838]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(cast(prediction as string), "'", '"'), "None", "null") AS prediction_clean
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v1'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  coalesce(prediction, safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64)) as sil_beta_app_score, 
  modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fstpd_30 = 1
  ;

"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory
0,2030392,6ab899d1-ba7f-4ae3-90e7-8ff5908f0336,60820303920011,0.315418,2023-05-05 18:19:53,2023-05-05,2023-05,Train,0,1,SIL-Instore,v1,ALL
1,1979435,6d7c565a-8efe-4720-838d-9e3fce1e38ff,60819794350017,0.350361,2023-04-05 15:07:14,2023-04-05,2023-04,Train,0,1,SIL-Instore,v1,ALL
2,1989058,a65ffb87-ddd6-4be9-b8d6-8867e80201b9,60819890580017,0.581028,2023-04-11 14:01:01,2023-04-11,2023-04,Train,1,1,SIL-Instore,v1,ALL
3,1896563,a191c085-c9ed-4b00-b3b4-a5ea749668b7,60818965630016,0.30821,2023-02-11 18:52:58,2023-02-11,2023-02,Train,0,1,SIL-Instore,v1,ALL
4,2107156,c5c1ba2d-2d7c-4fa6-8a83-d88ae267a187,60821071560019,0.47372,2023-06-25 11:59:32,2023-06-25,2023-06,Train,0,1,SIL-Instore,v1,ALL


In [839]:
df2 = dfd.copy()

In [840]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 212889 entries, 0 to 212888
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             212889 non-null  object        
 1   digitalLoanAccountId   212889 non-null  object        
 2   loanAccountNumber      212889 non-null  object        
 3   sil_beta_app_score     212889 non-null  float64       
 4   appln_submit_datetime  212889 non-null  datetime64[us]
 5   disbursementdate       212889 non-null  dbdate        
 6   Application_month      212889 non-null  object        
 7   Data_selection         212889 non-null  object        
 8   deffstpd30             212889 non-null  Int64         
 9   flg_mature_fstpd_30    212889 non-null  Int64         
 10  new_loan_type          212889 non-null  object        
 11  modelVersionId         212889 non-null  object        
 12  trenchCategory         212889 non-null  obje

In [841]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_app_score'] = pd.to_numeric(df_concat['sil_beta_app_score'], errors='coerce')

In [842]:
# df_concat.to_csv(r"sil_beta_app_scorefstpd30.csv")

In [843]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_app_score', 'deffstpd30', 'FSTPD30')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_app_score', 
    'deffstpd30', 
    'FSTPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [844]:
f4 = gini_results.copy()

In [845]:
f4.head(10)

Unnamed: 0,start_date,end_date,sil_beta_app_score_FSTPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2023-01-01,2023-01-31,0.260324,Month,sil_beta_app_score,v1,FSTPD30,Overall,ALL
1,2023-01-02,2023-01-08,0.145503,Week,sil_beta_app_score,v1,FSTPD30,Overall,ALL
2,2023-01-09,2023-01-15,0.362537,Week,sil_beta_app_score,v1,FSTPD30,Overall,ALL
3,2023-01-16,2023-01-22,0.261324,Week,sil_beta_app_score,v1,FSTPD30,Overall,ALL
4,2023-01-23,2023-01-29,0.30878,Week,sil_beta_app_score,v1,FSTPD30,Overall,ALL
5,2023-01-30,2023-02-05,0.436764,Week,sil_beta_app_score,v1,FSTPD30,Overall,ALL
6,2023-02-01,2023-02-28,0.356994,Month,sil_beta_app_score,v1,FSTPD30,Overall,ALL
7,2023-02-06,2023-02-12,0.503968,Week,sil_beta_app_score,v1,FSTPD30,Overall,ALL
8,2023-02-13,2023-02-19,0.242287,Week,sil_beta_app_score,v1,FSTPD30,Overall,ALL
9,2023-02-20,2023-02-26,0.238095,Week,sil_beta_app_score,v1,FSTPD30,Overall,ALL


In [846]:
f40 = f4[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_app_score_FSTPD30_gini']].copy()
f40.rename(columns={'sil_beta_app_score_FSTPD30_gini':'FSTPD30'}, inplace = True)
f40['category'] = 'sil_beta_app_score_FSTPD30_v1_all'
f40.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                           loan_type       version  trench_category
sil_beta_app_score_FSTPD30_v1_all  Overall         v1       ALL                168
                                   SIL Competitor  v1       ALL                 35
                                   SIL Repeat      v1       ALL                 14
                                   SIL ZERO        v1       ALL                 67
                                   SIL-Instore     v1       ALL                168
dtype: int64

## combining the dataframe

In [847]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type', 'bad_rate', 'trench_category']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'sil_beta_app_score_FPD0_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'loan_type',
       'trench_category', 'sil_beta_app_score_FPD10_gini',
       'sil_beta_app_score_FPD30_gini', 'sil_beta_app_score_FSPD30_gini',
       'sil_beta_app_score_FSTPD30_gini'], dtype=object)

In [848]:
final_df.rename(columns={'trench_category':'Trench_category',
'sil_beta_app_score_FPD0_gini':'sil_beta_app_score_FPD0_v1_all_gini',
'sil_beta_app_score_FPD10_gini':'sil_beta_app_score_FPD10_v1_all_gini',
'sil_beta_app_score_FPD30_gini':'sil_beta_app_score_FPD30_v1_all_gini',
'sil_beta_app_score_FSPD30_gini':'sil_beta_app_score_FSPD30_v1_all_gini', 
'sil_beta_app_score_FSTPD30_gini':'sil_beta_app_score_FSTPD30_v1_all_gini'
}, inplace=True)
final_df.columns

Index(['start_date', 'end_date', 'sil_beta_app_score_FPD0_v1_all_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'Trench_category', 'sil_beta_app_score_FPD10_v1_all_gini',
       'sil_beta_app_score_FPD30_v1_all_gini',
       'sil_beta_app_score_FSPD30_v1_all_gini',
       'sil_beta_app_score_FSTPD30_v1_all_gini'],
      dtype='object')

In [849]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'loan_type','bad_rate', 'Trench_category'
       ,'sil_beta_app_score_FPD0_v1_all_gini',
       'sil_beta_app_score_FPD10_v1_all_gini', 
       'sil_beta_app_score_FPD30_v1_all_gini',
       'sil_beta_app_score_FSPD30_v1_all_gini',
       'sil_beta_app_score_FSTPD30_v1_all_gini']].copy()
final_df['Model_display_name'] = 'apps_score_model_sil'
final_df['Product_type'] = 'SIL'
final_df.dtypes

start_date                                datetime64[ns]
end_date                                  datetime64[ns]
period                                            object
Model_Name                                        object
version                                           object
loan_type                                         object
bad_rate                                          object
Trench_category                                   object
sil_beta_app_score_FPD0_v1_all_gini              float64
sil_beta_app_score_FPD10_v1_all_gini             float64
sil_beta_app_score_FPD30_v1_all_gini             float64
sil_beta_app_score_FSPD30_v1_all_gini            float64
sil_beta_app_score_FSTPD30_v1_all_gini           float64
Model_display_name                                object
Product_type                                      object
dtype: object

In [850]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,loan_type,bad_rate,Trench_category,sil_beta_app_score_FPD0_v1_all_gini,sil_beta_app_score_FPD10_v1_all_gini,sil_beta_app_score_FPD30_v1_all_gini,sil_beta_app_score_FSPD30_v1_all_gini,sil_beta_app_score_FSTPD30_v1_all_gini,Model_display_name,Product_type
0,2023-01-01,2023-01-31,Month,sil_beta_app_score,v1,Overall,FPD0,ALL,0.156053,,,,,apps_score_model_sil,SIL
1,2023-01-02,2023-01-08,Week,sil_beta_app_score,v1,Overall,FPD0,ALL,0.181917,,,,,apps_score_model_sil,SIL
2,2023-01-09,2023-01-15,Week,sil_beta_app_score,v1,Overall,FPD0,ALL,0.297158,,,,,apps_score_model_sil,SIL
3,2023-01-16,2023-01-22,Week,sil_beta_app_score,v1,Overall,FPD0,ALL,0.170139,,,,,apps_score_model_sil,SIL
4,2023-01-23,2023-01-29,Week,sil_beta_app_score,v1,Overall,FPD0,ALL,0.083217,,,,,apps_score_model_sil,SIL


In [851]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.apps_score_model_sil_v1_all_gini_v1"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=850a87c8-c89e-4457-9351-045803d0dd93>

In [852]:
import functools

dataframes = [f01, f10, f20, f30, f40]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type','trench_category', 'bad_rate', 'category']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

appscoresilv1all = functools.reduce(merge_dataframes, dataframes)

appscoresilv1all.columns.values

array(['start_date', 'end_date', 'period', 'Model_Name', 'version',
       'bad_rate', 'loan_type', 'trench_category', 'FPD0', 'category',
       'FPD10', 'FPD30', 'FSPD30', 'FSTPD30'], dtype=object)

In [853]:
result = pd.concat([cicsilscorev1all, 
                    cicsilscorev2t1, 
                    cicsilscorev2t2, 
                    cicsilscorev2t3, 
                    alphastacksilv1all,
                    alphastacksilv2t1,
                    alphastacksilv2t2,
                    alphastacksilv2t3,
                    betastacksilv1all,
                    betastacksilv2t1,
                    betastacksilv2t2,
                    betastacksilv2t3,
                    appscoresilv1all,
                    ], ignore_index=True)
print(f"The shape of the concatenated dataframe result is:\t {result.shape}")
result.head()

The shape of the concatenated dataframe result is:	 (21823, 14)


Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


In [854]:
result[['Model_Name', 'version', 'category']].value_counts(dropna=False).sort_index(ascending = False)

Model_Name             version  category                            
sil_beta_stack_score   v2       beta_stack_model_sil_fstpd30_v2_t3      249
                                beta_stack_model_sil_fstpd30_v2_t2      244
                                beta_stack_model_sil_fstpd30_v2_t1      251
                                beta_stack_model_sil_fspd30_v2_t3       272
                                beta_stack_model_sil_fspd30_v2_t2       264
                                beta_stack_model_sil_fspd30_v2_t1       275
                                beta_stack_model_sil_fpd30_v2_t3        292
                                beta_stack_model_sil_fpd30_v2_t2        285
                                beta_stack_model_sil_fpd30_v2_t1        295
                                beta_stack_model_sil_fpd10_v2_t3        296
                                beta_stack_model_sil_fpd10_v2_t2        292
                                beta_stack_model_sil_fpd10_v2_t1        301
                   

In [855]:
result['Model_Name'].value_counts(dropna=False)

Model_Name
sil_beta_stack_score     6649
Sil_Alpha_Stack_score    6399
Alpha_cic_sil_score      6331
sil_beta_app_score       2444
Name: count, dtype: int64

In [856]:
masterdf = result.copy()
print(f"The shape of masterdf is:\t {masterdf.shape}")
masterdf.head()

The shape of masterdf is:	 (21823, 14)


Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


## V2

##### Trench 1

## FPD0

## Test

In [857]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,

    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64) as sil_beta_app_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fpd0 = 1
  )
  select * from base where trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory


In [858]:
df1 = dfd.copy()

## Train

In [859]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
        case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(cast(prediction as string), "'", '"'), "None", "null") AS prediction_clean
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  coalesce(prediction, safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64)) as sil_beta_app_score, 
  modelVersionId,
  trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
    modelVersionId,
  trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fpd0 = 1
  )
  select * from base where trenchCategory = 'Trench 1'
  ;

"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory
0,2969102,a78c66d9-6558-49a0-9ff3-04d9e5176e2b,60829691020011,0.499377,2024-10-24 15:13:31,2024-10-24,2024-10,Train,0,1,SIL-Instore,v2,Trench 1
1,2912160,dbcf891f-cf00-4cc7-9039-1e9ad5ab91ba,60829121600018,0.496877,2024-10-05 12:49:54,2024-10-05,2024-10,Train,1,1,SIL-Instore,v2,Trench 1
2,2934218,71d583de-5293-4845-9722-92346f91aa6f,60829342180017,0.607613,2024-10-12 17:39:48,2024-10-12,2024-10,Train,0,1,SIL-Instore,v2,Trench 1
3,2913536,4f996c68-3b78-41df-ae5f-a58bf04be932,60829135360014,0.448717,2024-10-05 18:32:40,2024-10-05,2024-10,Train,0,1,SIL ZERO,v2,Trench 1
4,2909206,ceafeecb-3d20-4b1e-8d0f-4afe5b2b9135,60829092060018,0.456315,2024-10-04 12:48:08,2024-10-04,2024-10,Train,0,1,SIL-Instore,v2,Trench 1


In [860]:
df2 = dfd.copy()

In [861]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 172771 entries, 0 to 172770
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             172771 non-null  Int64         
 1   digitalLoanAccountId   172771 non-null  object        
 2   loanAccountNumber      172771 non-null  object        
 3   sil_beta_app_score     172771 non-null  float64       
 4   appln_submit_datetime  172771 non-null  datetime64[us]
 5   disbursementdate       172771 non-null  dbdate        
 6   Application_month      172771 non-null  object        
 7   Data_selection         172771 non-null  object        
 8   deffpd0                172771 non-null  Int64         
 9   flg_mature_fpd0        172771 non-null  Int64         
 10  new_loan_type          172771 non-null  object        
 11  modelVersionId         172771 non-null  object        
 12  trenchCategory         172771 non-null  obje

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [862]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_app_score'] = pd.to_numeric(df_concat['sil_beta_app_score'], errors='coerce')

In [863]:
# df_concat.to_csv(r"sil_beta_app_score.csv")

In [864]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_app_score', 'deffpd0', 'FPD0')
# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'sil_beta_app_score', 
#     'deffpd0', 
#     'FPD0',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_app_score', 
    'deffpd0', 
    'FPD0',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [865]:
f0 = gini_results.copy()

In [866]:
f0.head(10)

Unnamed: 0,start_date,end_date,sil_beta_app_score_FPD0_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-09-30,2024-10-06,0.264068,Week,sil_beta_app_score,v2,FPD0,Overall,Trench 1
1,2024-10-01,2024-10-31,0.290981,Month,sil_beta_app_score,v2,FPD0,Overall,Trench 1
2,2024-10-07,2024-10-13,0.295555,Week,sil_beta_app_score,v2,FPD0,Overall,Trench 1
3,2024-10-14,2024-10-20,0.299003,Week,sil_beta_app_score,v2,FPD0,Overall,Trench 1
4,2024-10-21,2024-10-27,0.311144,Week,sil_beta_app_score,v2,FPD0,Overall,Trench 1
5,2024-10-28,2024-11-03,0.283795,Week,sil_beta_app_score,v2,FPD0,Overall,Trench 1
6,2024-11-01,2024-11-30,0.236036,Month,sil_beta_app_score,v2,FPD0,Overall,Trench 1
7,2024-11-04,2024-11-10,0.2244,Week,sil_beta_app_score,v2,FPD0,Overall,Trench 1
8,2024-11-11,2024-11-17,0.270207,Week,sil_beta_app_score,v2,FPD0,Overall,Trench 1
9,2024-11-18,2024-11-24,0.232413,Week,sil_beta_app_score,v2,FPD0,Overall,Trench 1


In [867]:
f01 = f0[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_app_score_FPD0_gini']].copy()
f01.rename(columns={'sil_beta_app_score_FPD0_gini':'FPD0'}, inplace = True)
f01['category'] = 'sil_beta_app_score_FPD0_v2_t1'
f01.groupby(['loan_type', 'version', 'trench_category']).size()

loan_type       version  trench_category
Overall         v2       Trench 1           72
SIL Competitor  v2       Trench 1           66
SIL ZERO        v2       Trench 1           72
SIL-Instore     v2       Trench 1           72
dtype: int64

## FPD10

## Test

In [868]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
    when trenchCategory = '' then 'ALL'
    else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64) as sil_beta_app_score,
  modelVersionId, trenchCategory
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fpd10 = 1
  )
  select * from base where trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory


In [869]:
df1 = dfd.copy()

## Train

In [870]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(cast(prediction as string), "'", '"'), "None", "null") AS prediction_clean
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  coalesce(prediction, safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64)) as sil_beta_app_score, 
  modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
    modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fpd10 = 1
  )
  select * from base where trenchCategory = 'Trench 1'
  ;

"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory
0,2952513,ec8f9927-1be7-4d34-a51b-ea396d511481,60829525130013,0.432033,2024-10-18 18:57:02,2024-10-18,2024-10,Train,0,1,SIL-Instore,v2,Trench 1
1,2964254,56b12975-0296-4343-bfc7-d608e0f89f03,60829642540016,0.606603,2024-10-22 16:00:42,2024-10-22,2024-10,Train,0,1,SIL-Instore,v2,Trench 1
2,2926922,6e2217d1-1331-4f2a-b1fe-e9b485aceaf1,60829269220017,0.410664,2024-10-10 09:08:58,2024-10-10,2024-10,Train,0,1,SIL-Instore,v2,Trench 1
3,2964678,bef16581-4adc-44d9-a7aa-1cd977ca4803,60829646780016,0.551255,2024-10-22 18:11:46,2024-10-22,2024-10,Train,0,1,SIL-Instore,v2,Trench 1
4,2910267,c76c7407-823b-4185-be52-4004e0093b72,60829102670011,0.585778,2024-10-04 17:39:52,2024-10-06,2024-10,Train,1,1,SIL-Instore,v2,Trench 1


In [871]:
df2 = dfd.copy()

In [872]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 166795 entries, 0 to 166794
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             166795 non-null  Int64         
 1   digitalLoanAccountId   166795 non-null  object        
 2   loanAccountNumber      166795 non-null  object        
 3   sil_beta_app_score     166795 non-null  float64       
 4   appln_submit_datetime  166795 non-null  datetime64[us]
 5   disbursementdate       166795 non-null  dbdate        
 6   Application_month      166795 non-null  object        
 7   Data_selection         166795 non-null  object        
 8   deffpd10               166795 non-null  Int64         
 9   flg_mature_fpd10       166795 non-null  Int64         
 10  new_loan_type          166795 non-null  object        
 11  modelVersionId         166795 non-null  object        
 12  trenchCategory         166795 non-null  obje

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [873]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_app_score'] = pd.to_numeric(df_concat['sil_beta_app_score'], errors='coerce')

In [874]:
# df_concat.to_csv(r"sil_beta_app_scorefpd10.csv")

In [875]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_app_score', 'deffpd10', 'FPD10')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_app_score', 
    'deffpd10', 
    'FPD10',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [876]:
f1 = gini_results.copy()

In [877]:
f1.head(10)

Unnamed: 0,start_date,end_date,sil_beta_app_score_FPD10_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-09-30,2024-10-06,0.350988,Week,sil_beta_app_score,v2,FPD10,Overall,Trench 1
1,2024-10-01,2024-10-31,0.394195,Month,sil_beta_app_score,v2,FPD10,Overall,Trench 1
2,2024-10-07,2024-10-13,0.414287,Week,sil_beta_app_score,v2,FPD10,Overall,Trench 1
3,2024-10-14,2024-10-20,0.393558,Week,sil_beta_app_score,v2,FPD10,Overall,Trench 1
4,2024-10-21,2024-10-27,0.415844,Week,sil_beta_app_score,v2,FPD10,Overall,Trench 1
5,2024-10-28,2024-11-03,0.396005,Week,sil_beta_app_score,v2,FPD10,Overall,Trench 1
6,2024-11-01,2024-11-30,0.366641,Month,sil_beta_app_score,v2,FPD10,Overall,Trench 1
7,2024-11-04,2024-11-10,0.379745,Week,sil_beta_app_score,v2,FPD10,Overall,Trench 1
8,2024-11-11,2024-11-17,0.413245,Week,sil_beta_app_score,v2,FPD10,Overall,Trench 1
9,2024-11-18,2024-11-24,0.349191,Week,sil_beta_app_score,v2,FPD10,Overall,Trench 1


In [878]:
f10 = f1[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_app_score_FPD10_gini']].copy()
f10.rename(columns={'sil_beta_app_score_FPD10_gini':'FPD10'}, inplace = True)
f10['category'] = 'sil_beta_app_score_FPD10_v2_t1'
f10.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                        loan_type       version  trench_category
sil_beta_app_score_FPD10_v2_t1  Overall         v2       Trench 1           69
                                SIL Competitor  v2       Trench 1           63
                                SIL ZERO        v2       Trench 1           69
                                SIL-Instore     v2       Trench 1           69
dtype: int64

## FPD30

## Test

In [879]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64) as sil_beta_app_score,
  modelVersionId, trenchCategory
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fpd30 = 1
  )
  select * from base where trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory


In [880]:
df1 = dfd.copy()

## Train

In [881]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(cast(prediction as string), "'", '"'), "None", "null") AS prediction_clean
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  coalesce(prediction, safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64)) as sil_beta_app_score, 
  modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fpd30 = 1
  )
    select * from base where trenchCategory = 'Trench 1'
  ;

"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory
0,2921531,e7913ce6-26df-4328-bc35-992c8dce55e8,60829215310016,0.532104,2024-10-08 11:45:50,2024-10-08,2024-10,Train,0,1,SIL-Instore,v2,Trench 1
1,2955432,14e2dac2-6049-485a-a18a-5152295059ec,60829554320015,0.570684,2024-10-19 18:22:11,2024-10-19,2024-10,Train,0,1,SIL-Instore,v2,Trench 1
2,2982119,1a21d701-398e-4bdf-9c77-64de83cd1b77,60829821190012,0.419606,2024-10-29 13:19:09,2024-10-29,2024-10,Train,0,1,SIL-Instore,v2,Trench 1
3,2904762,55ef64e4-8995-4219-a320-5875f33c593d,60829047620011,0.546531,2024-10-02 20:28:20,2024-10-02,2024-10,Train,0,1,SIL ZERO,v2,Trench 1
4,2903405,c71d6fcf-fb3f-4f54-a9a2-726cf9d087a6,60829034050019,0.595519,2024-10-02 14:00:35,2024-10-02,2024-10,Train,0,1,SIL-Instore,v2,Trench 1


In [882]:
df2 = dfd.copy()

In [883]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 159287 entries, 0 to 159286
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             159287 non-null  Int64         
 1   digitalLoanAccountId   159287 non-null  object        
 2   loanAccountNumber      159287 non-null  object        
 3   sil_beta_app_score     159287 non-null  float64       
 4   appln_submit_datetime  159287 non-null  datetime64[us]
 5   disbursementdate       159287 non-null  dbdate        
 6   Application_month      159287 non-null  object        
 7   Data_selection         159287 non-null  object        
 8   deffpd30               159287 non-null  Int64         
 9   flg_mature_fpd30       159287 non-null  Int64         
 10  new_loan_type          159287 non-null  object        
 11  modelVersionId         159287 non-null  object        
 12  trenchCategory         159287 non-null  obje

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [884]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_app_score'] = pd.to_numeric(df_concat['sil_beta_app_score'], errors='coerce')

In [885]:
# df_concat.to_csv(r"sil_beta_app_scorefpd30.csv")

In [886]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_app_score', 'deffpd30', 'FPD30')

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_app_score', 
    'deffpd30', 
    'FPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [887]:
f2 = gini_results.copy()

In [888]:
f2.head(10)

Unnamed: 0,start_date,end_date,sil_beta_app_score_FPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-09-30,2024-10-06,0.386285,Week,sil_beta_app_score,v2,FPD30,Overall,Trench 1
1,2024-10-01,2024-10-31,0.421869,Month,sil_beta_app_score,v2,FPD30,Overall,Trench 1
2,2024-10-07,2024-10-13,0.454113,Week,sil_beta_app_score,v2,FPD30,Overall,Trench 1
3,2024-10-14,2024-10-20,0.40735,Week,sil_beta_app_score,v2,FPD30,Overall,Trench 1
4,2024-10-21,2024-10-27,0.451489,Week,sil_beta_app_score,v2,FPD30,Overall,Trench 1
5,2024-10-28,2024-11-03,0.403375,Week,sil_beta_app_score,v2,FPD30,Overall,Trench 1
6,2024-11-01,2024-11-30,0.400245,Month,sil_beta_app_score,v2,FPD30,Overall,Trench 1
7,2024-11-04,2024-11-10,0.397934,Week,sil_beta_app_score,v2,FPD30,Overall,Trench 1
8,2024-11-11,2024-11-17,0.425261,Week,sil_beta_app_score,v2,FPD30,Overall,Trench 1
9,2024-11-18,2024-11-24,0.399516,Week,sil_beta_app_score,v2,FPD30,Overall,Trench 1


In [889]:
f20 = f2[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_app_score_FPD30_gini']].copy()
f20.rename(columns={'sil_beta_app_score_FPD30_gini':'FPD30'}, inplace = True)
f20['category'] = 'sil_beta_app_score_FPD30_v2_t1'
f20.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                        loan_type       version  trench_category
sil_beta_app_score_FPD30_v2_t1  Overall         v2       Trench 1           67
                                SIL Competitor  v2       Trench 1           61
                                SIL ZERO        v2       Trench 1           67
                                SIL-Instore     v2       Trench 1           67
dtype: int64

## FSPD30

## Test

In [890]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64) as sil_beta_app_score,
  modelVersionId,trenchCategory

  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  modelVersionId,trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fspd_30 = 1
  )
    select * from base where trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory


In [891]:
df1 = dfd.copy()

## Train

In [892]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(cast(prediction as string), "'", '"'), "None", "null") AS prediction_clean
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  coalesce(prediction, safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64)) as sil_beta_app_score, 
  modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fspd_30 = 1
  )
    select * from base where trenchCategory = 'Trench 1'
  ;

"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory
0,2922445,d22f63ee-a5b0-4218-a4f3-4cadd040336c,60829224450013,0.378607,2024-10-08 16:03:33,2024-10-08,2024-10,Train,1,1,SIL ZERO,v2,Trench 1
1,2984429,f0c0a0d2-f6f7-4916-89d4-b1ec9a1f2db6,60829844290011,0.119076,2024-10-30 11:41:28,2024-10-30,2024-10,Train,0,1,SIL ZERO,v2,Trench 1
2,2970697,6ed0fd85-8c4a-484d-96ea-c07cec92c472,60829706970018,0.440519,2024-10-25 11:18:36,2024-10-25,2024-10,Train,0,1,SIL ZERO,v2,Trench 1
3,2935447,49c9f761-5956-4f5d-a017-e976f73e31ef,60829354470012,0.387836,2024-10-13 09:51:57,2024-10-13,2024-10,Train,0,1,SIL ZERO,v2,Trench 1
4,2927105,30a1c5b3-8a0b-4f30-9734-0ca0b294ce16,60829271050016,0.572523,2024-10-10 10:24:43,2024-10-10,2024-10,Train,1,1,SIL ZERO,v2,Trench 1


In [893]:
df2 = dfd.copy()

In [894]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 148240 entries, 0 to 148239
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             148240 non-null  Int64         
 1   digitalLoanAccountId   148240 non-null  object        
 2   loanAccountNumber      148240 non-null  object        
 3   sil_beta_app_score     148240 non-null  float64       
 4   appln_submit_datetime  148240 non-null  datetime64[us]
 5   disbursementdate       148240 non-null  dbdate        
 6   Application_month      148240 non-null  object        
 7   Data_selection         148240 non-null  object        
 8   deffspd30              148240 non-null  Int64         
 9   flg_mature_fspd_30     148240 non-null  Int64         
 10  new_loan_type          148240 non-null  object        
 11  modelVersionId         148240 non-null  object        
 12  trenchCategory         148240 non-null  obje

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [895]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_app_score'] = pd.to_numeric(df_concat['sil_beta_app_score'], errors='coerce')

In [896]:
# df_concat.to_csv(r"sil_beta_app_scorefspd30.csv")

In [897]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_app_score', 'deffspd30', 'FSPD30')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_app_score', 
    'deffspd30', 
    'FSPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [898]:
f3 = gini_results.copy()

In [899]:
f3.head(10)

Unnamed: 0,start_date,end_date,sil_beta_app_score_FSPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-09-30,2024-10-06,0.401351,Week,sil_beta_app_score,v2,FSPD30,Overall,Trench 1
1,2024-10-01,2024-10-31,0.41115,Month,sil_beta_app_score,v2,FSPD30,Overall,Trench 1
2,2024-10-07,2024-10-13,0.418286,Week,sil_beta_app_score,v2,FSPD30,Overall,Trench 1
3,2024-10-14,2024-10-20,0.38458,Week,sil_beta_app_score,v2,FSPD30,Overall,Trench 1
4,2024-10-21,2024-10-27,0.439563,Week,sil_beta_app_score,v2,FSPD30,Overall,Trench 1
5,2024-10-28,2024-11-03,0.408498,Week,sil_beta_app_score,v2,FSPD30,Overall,Trench 1
6,2024-11-01,2024-11-30,0.379455,Month,sil_beta_app_score,v2,FSPD30,Overall,Trench 1
7,2024-11-04,2024-11-10,0.365291,Week,sil_beta_app_score,v2,FSPD30,Overall,Trench 1
8,2024-11-11,2024-11-17,0.398157,Week,sil_beta_app_score,v2,FSPD30,Overall,Trench 1
9,2024-11-18,2024-11-24,0.370609,Week,sil_beta_app_score,v2,FSPD30,Overall,Trench 1


In [900]:
f30 = f3[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_app_score_FSPD30_gini']].copy()
f30.rename(columns={'sil_beta_app_score_FSPD30_gini':'FSPD30'}, inplace = True)
f30['category'] = 'sil_beta_app_score_FSPD30_v2_t1'
f30.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                         loan_type       version  trench_category
sil_beta_app_score_FSPD30_v2_t1  Overall         v2       Trench 1           62
                                 SIL Competitor  v2       Trench 1           56
                                 SIL ZERO        v2       Trench 1           62
                                 SIL-Instore     v2       Trench 1           62
dtype: int64

## FSTPD30

## Test

In [901]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64) as sil_beta_app_score,
  modelVersionId, trenchCategory
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fstpd_30 = 1
  )
  select * from base where trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory


In [902]:
df1 = dfd.copy()

## Train

In [903]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(cast(prediction as string), "'", '"'), "None", "null") AS prediction_clean
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  coalesce(prediction, safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64)) as sil_beta_app_score, 
  modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fstpd_30 = 1
  )
    select * from base where trenchCategory = 'Trench 1'
  ;

"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory
0,2979248,46cb9f0c-fd32-4d39-a5e5-92b47c458f8a,60829792480013,0.570292,2024-10-28 11:05:50,2024-10-28,2024-10,Train,0,1,SIL-Instore,v2,Trench 1
1,2983270,29e6d60d-7c25-4b71-8f20-3487c8ada855,60829832700012,0.597914,2024-10-29 19:27:23,2024-10-29,2024-10,Train,1,1,SIL-Instore,v2,Trench 1
2,2946032,f2506883-2759-4524-90fa-999f6cff7e96,60829460320012,0.617579,2024-10-16 16:52:21,2024-10-16,2024-10,Train,1,1,SIL-Instore,v2,Trench 1
3,2966808,fe40545d-9a53-4f16-9687-7b37eb0428dc,60829668080016,0.412828,2024-10-23 15:58:09,2024-10-23,2024-10,Train,0,1,SIL-Instore,v2,Trench 1
4,2935498,27961a35-d3ee-4e86-a3ba-2d78ffd91171,60829354980013,0.56553,2024-10-13 10:08:54,2024-10-13,2024-10,Train,1,1,SIL-Instore,v2,Trench 1


In [904]:
df2 = dfd.copy()

In [905]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 136479 entries, 0 to 136478
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             136479 non-null  Int64         
 1   digitalLoanAccountId   136479 non-null  object        
 2   loanAccountNumber      136479 non-null  object        
 3   sil_beta_app_score     136479 non-null  float64       
 4   appln_submit_datetime  136479 non-null  datetime64[us]
 5   disbursementdate       136479 non-null  dbdate        
 6   Application_month      136479 non-null  object        
 7   Data_selection         136479 non-null  object        
 8   deffstpd30             136479 non-null  Int64         
 9   flg_mature_fstpd_30    136479 non-null  Int64         
 10  new_loan_type          136479 non-null  object        
 11  modelVersionId         136479 non-null  object        
 12  trenchCategory         136479 non-null  obje

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [906]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_app_score'] = pd.to_numeric(df_concat['sil_beta_app_score'], errors='coerce')

In [907]:
# df_concat.to_csv(r"sil_beta_app_scorefstpd30.csv")

In [908]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_app_score', 'deffstpd30', 'FSTPD30')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_app_score', 
    'deffstpd30', 
    'FSTPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [909]:
f4 = gini_results.copy()

In [910]:
f4.head(10)

Unnamed: 0,start_date,end_date,sil_beta_app_score_FSTPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-09-30,2024-10-06,0.343117,Week,sil_beta_app_score,v2,FSTPD30,Overall,Trench 1
1,2024-10-01,2024-10-31,0.35586,Month,sil_beta_app_score,v2,FSTPD30,Overall,Trench 1
2,2024-10-07,2024-10-13,0.373467,Week,sil_beta_app_score,v2,FSTPD30,Overall,Trench 1
3,2024-10-14,2024-10-20,0.325243,Week,sil_beta_app_score,v2,FSTPD30,Overall,Trench 1
4,2024-10-21,2024-10-27,0.386966,Week,sil_beta_app_score,v2,FSTPD30,Overall,Trench 1
5,2024-10-28,2024-11-03,0.34591,Week,sil_beta_app_score,v2,FSTPD30,Overall,Trench 1
6,2024-11-01,2024-11-30,0.360927,Month,sil_beta_app_score,v2,FSTPD30,Overall,Trench 1
7,2024-11-04,2024-11-10,0.349722,Week,sil_beta_app_score,v2,FSTPD30,Overall,Trench 1
8,2024-11-11,2024-11-17,0.391807,Week,sil_beta_app_score,v2,FSTPD30,Overall,Trench 1
9,2024-11-18,2024-11-24,0.34032,Week,sil_beta_app_score,v2,FSTPD30,Overall,Trench 1


In [911]:
f40 = f4[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_app_score_FSTPD30_gini']].copy()
f40.rename(columns={'sil_beta_app_score_FSTPD30_gini':'FSTPD30'}, inplace = True)
f40['category'] = 'sil_beta_app_score_FSTPD30_v2_t1'
f40.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                          loan_type       version  trench_category
sil_beta_app_score_FSTPD30_v2_t1  Overall         v2       Trench 1           56
                                  SIL Competitor  v2       Trench 1           50
                                  SIL ZERO        v2       Trench 1           56
                                  SIL-Instore     v2       Trench 1           56
dtype: int64

## combining the dataframe

In [912]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type', 'bad_rate', 'trench_category']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'sil_beta_app_score_FPD0_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'loan_type',
       'trench_category', 'sil_beta_app_score_FPD10_gini',
       'sil_beta_app_score_FPD30_gini', 'sil_beta_app_score_FSPD30_gini',
       'sil_beta_app_score_FSTPD30_gini'], dtype=object)

In [913]:
final_df.rename(columns={'trench_category':'Trench_category',
'sil_beta_app_score_FPD0_gini':'sil_beta_app_score_FPD0_v2_t1_gini',
'sil_beta_app_score_FPD10_gini':'sil_beta_app_score_FPD10_v2_t1_gini',
'sil_beta_app_score_FPD30_gini':'sil_beta_app_score_FPD30_v2_t1_gini',
'sil_beta_app_score_FSPD30_gini':'sil_beta_app_score_FSPD30_v2_t1_gini', 
'sil_beta_app_score_FSTPD30_gini':'sil_beta_app_score_FSTPD30_v2_t1_gini'
}, inplace=True)
final_df.columns

Index(['start_date', 'end_date', 'sil_beta_app_score_FPD0_v2_t1_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'Trench_category', 'sil_beta_app_score_FPD10_v2_t1_gini',
       'sil_beta_app_score_FPD30_v2_t1_gini',
       'sil_beta_app_score_FSPD30_v2_t1_gini',
       'sil_beta_app_score_FSTPD30_v2_t1_gini'],
      dtype='object')

In [914]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'loan_type','bad_rate', 'Trench_category'
       ,'sil_beta_app_score_FPD0_v2_t1_gini',
       'sil_beta_app_score_FPD10_v2_t1_gini', 
       'sil_beta_app_score_FPD30_v2_t1_gini',
       'sil_beta_app_score_FSPD30_v2_t1_gini',
       'sil_beta_app_score_FSTPD30_v2_t1_gini']].copy()
final_df['Model_display_name'] = 'apps_score_model_sil'
final_df['Product_type'] = 'SIL'
final_df.dtypes

start_date                               datetime64[ns]
end_date                                 datetime64[ns]
period                                           object
Model_Name                                       object
version                                          object
loan_type                                        object
bad_rate                                         object
Trench_category                                  object
sil_beta_app_score_FPD0_v2_t1_gini              float64
sil_beta_app_score_FPD10_v2_t1_gini             float64
sil_beta_app_score_FPD30_v2_t1_gini             float64
sil_beta_app_score_FSPD30_v2_t1_gini            float64
sil_beta_app_score_FSTPD30_v2_t1_gini           float64
Model_display_name                               object
Product_type                                     object
dtype: object

In [915]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,loan_type,bad_rate,Trench_category,sil_beta_app_score_FPD0_v2_t1_gini,sil_beta_app_score_FPD10_v2_t1_gini,sil_beta_app_score_FPD30_v2_t1_gini,sil_beta_app_score_FSPD30_v2_t1_gini,sil_beta_app_score_FSTPD30_v2_t1_gini,Model_display_name,Product_type
0,2024-09-30,2024-10-06,Week,sil_beta_app_score,v2,Overall,FPD0,Trench 1,0.264068,,,,,apps_score_model_sil,SIL
1,2024-10-01,2024-10-31,Month,sil_beta_app_score,v2,Overall,FPD0,Trench 1,0.290981,,,,,apps_score_model_sil,SIL
2,2024-10-07,2024-10-13,Week,sil_beta_app_score,v2,Overall,FPD0,Trench 1,0.295555,,,,,apps_score_model_sil,SIL
3,2024-10-14,2024-10-20,Week,sil_beta_app_score,v2,Overall,FPD0,Trench 1,0.299003,,,,,apps_score_model_sil,SIL
4,2024-10-21,2024-10-27,Week,sil_beta_app_score,v2,Overall,FPD0,Trench 1,0.311144,,,,,apps_score_model_sil,SIL


In [916]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.apps_score_model_sil_v2_t1_gini_v1"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=739c3a30-e261-467e-b2dd-c32e3afebfcf>

In [917]:
import functools

dataframes = [f01, f10, f20, f30, f40]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type','trench_category', 'bad_rate', 'category']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

appscoresilv2t1 = functools.reduce(merge_dataframes, dataframes)

appscoresilv2t1.columns.values

array(['start_date', 'end_date', 'period', 'Model_Name', 'version',
       'bad_rate', 'loan_type', 'trench_category', 'FPD0', 'category',
       'FPD10', 'FPD30', 'FSPD30', 'FSTPD30'], dtype=object)

In [918]:
result = pd.concat([cicsilscorev1all, 
                    cicsilscorev2t1, 
                    cicsilscorev2t2, 
                    cicsilscorev2t3, 
                    alphastacksilv1all,
                    alphastacksilv2t1,
                    alphastacksilv2t2,
                    alphastacksilv2t3,
                    betastacksilv1all,
                    betastacksilv2t1,
                    betastacksilv2t2,
                    betastacksilv2t3,
                    appscoresilv1all,
                    appscoresilv2t1,
                    ], ignore_index=True)
print(f"The shape of the concatenated dataframe result is:\t {result.shape}")
result.head()

The shape of the concatenated dataframe result is:	 (23097, 14)


Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


In [919]:
result[['Model_Name', 'version', 'category']].value_counts(dropna=False).sort_index(ascending = False)

Model_Name             version  category                            
sil_beta_stack_score   v2       beta_stack_model_sil_fstpd30_v2_t3      249
                                beta_stack_model_sil_fstpd30_v2_t2      244
                                beta_stack_model_sil_fstpd30_v2_t1      251
                                beta_stack_model_sil_fspd30_v2_t3       272
                                beta_stack_model_sil_fspd30_v2_t2       264
                                beta_stack_model_sil_fspd30_v2_t1       275
                                beta_stack_model_sil_fpd30_v2_t3        292
                                beta_stack_model_sil_fpd30_v2_t2        285
                                beta_stack_model_sil_fpd30_v2_t1        295
                                beta_stack_model_sil_fpd10_v2_t3        296
                                beta_stack_model_sil_fpd10_v2_t2        292
                                beta_stack_model_sil_fpd10_v2_t1        301
                   

In [920]:
result['Model_Name'].value_counts(dropna=False)

Model_Name
sil_beta_stack_score     6649
Sil_Alpha_Stack_score    6399
Alpha_cic_sil_score      6331
sil_beta_app_score       3718
Name: count, dtype: int64

In [921]:
masterdf = result.copy()
print(f"The shape of masterdf is:\t {masterdf.shape}")
masterdf.head()

The shape of masterdf is:	 (23097, 14)


Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


#### Trench 2

## FPD0

## Test

In [922]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,

    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64) as sil_beta_app_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fpd0 = 1
  )
  select * from base where trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory


In [923]:
df1 = dfd.copy()

## Train

In [924]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
        case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(cast(prediction as string), "'", '"'), "None", "null") AS prediction_clean
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  coalesce(prediction, safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64)) as sil_beta_app_score, 
  modelVersionId,
  trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
    modelVersionId,
  trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fpd0 = 1
  )
  select * from base where trenchCategory = 'Trench 2'
  ;

"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory
0,2193183,eb565ad6-655c-4eda-ba50-9ed2f1dd19fa,60821931830011,0.589935,2024-10-01 19:34:41,2024-10-01,2024-10,Train,0,1,SIL-Instore,v2,Trench 2
1,2792262,c20a7736-e584-40e2-8a4b-c566ad773959,60827922620011,0.579315,2024-10-10 17:43:21,2024-10-10,2024-10,Train,0,1,SIL-Instore,v2,Trench 2
2,2348861,35ba1fb8-88d1-45bc-92d4-0e3351380cd7,60823488610013,0.579582,2024-10-04 09:31:34,2024-10-04,2024-10,Train,0,1,SIL-Instore,v2,Trench 2
3,2642190,e4467704-aa36-434d-8001-124e3806fe5f,60826421900011,0.482599,2024-10-22 17:53:07,2024-10-22,2024-10,Train,0,1,SIL ZERO,v2,Trench 2
4,2344988,627b4d97-5f57-4320-97d7-d863f44f1171,60823449880011,0.396725,2024-10-22 15:39:36,2024-10-23,2024-10,Train,0,1,SIL-Instore,v2,Trench 2


In [925]:
df2 = dfd.copy()

In [926]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3783 entries, 0 to 3782
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             3783 non-null   Int64         
 1   digitalLoanAccountId   3783 non-null   object        
 2   loanAccountNumber      3783 non-null   object        
 3   sil_beta_app_score     3783 non-null   float64       
 4   appln_submit_datetime  3783 non-null   datetime64[us]
 5   disbursementdate       3783 non-null   dbdate        
 6   Application_month      3783 non-null   object        
 7   Data_selection         3783 non-null   object        
 8   deffpd0                3783 non-null   Int64         
 9   flg_mature_fpd0        3783 non-null   Int64         
 10  new_loan_type          3783 non-null   object        
 11  modelVersionId         3783 non-null   object        
 12  trenchCategory         3783 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [927]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_app_score'] = pd.to_numeric(df_concat['sil_beta_app_score'], errors='coerce')

In [928]:
# df_concat.to_csv(r"sil_beta_app_score.csv")

In [929]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_app_score', 'deffpd0', 'FPD0')
# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'sil_beta_app_score', 
#     'deffpd0', 
#     'FPD0',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_app_score', 
    'deffpd0', 
    'FPD0',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [930]:
f0 = gini_results.copy()

In [931]:
f0.head(10)

Unnamed: 0,start_date,end_date,sil_beta_app_score_FPD0_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-09-30,2024-10-06,0.138889,Week,sil_beta_app_score,v2,FPD0,Overall,Trench 2
1,2024-10-01,2024-10-31,0.195903,Month,sil_beta_app_score,v2,FPD0,Overall,Trench 2
2,2024-10-07,2024-10-13,0.425455,Week,sil_beta_app_score,v2,FPD0,Overall,Trench 2
3,2024-10-14,2024-10-20,-0.209877,Week,sil_beta_app_score,v2,FPD0,Overall,Trench 2
4,2024-10-21,2024-10-27,0.149425,Week,sil_beta_app_score,v2,FPD0,Overall,Trench 2
5,2024-10-28,2024-11-03,0.278846,Week,sil_beta_app_score,v2,FPD0,Overall,Trench 2
6,2024-11-01,2024-11-30,0.214887,Month,sil_beta_app_score,v2,FPD0,Overall,Trench 2
7,2024-11-04,2024-11-10,0.446154,Week,sil_beta_app_score,v2,FPD0,Overall,Trench 2
8,2024-11-11,2024-11-17,0.124579,Week,sil_beta_app_score,v2,FPD0,Overall,Trench 2
9,2024-11-18,2024-11-24,0.136364,Week,sil_beta_app_score,v2,FPD0,Overall,Trench 2


In [932]:
f01 = f0[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_app_score_FPD0_gini']].copy()
f01.rename(columns={'sil_beta_app_score_FPD0_gini':'FPD0'}, inplace = True)
f01['category'] = 'sil_beta_app_score_FPD0_v2_t2'
f01.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                       loan_type       version  trench_category
sil_beta_app_score_FPD0_v2_t2  Overall         v2       Trench 2           72
                               SIL Competitor  v2       Trench 2           65
                               SIL Repeat      v2       Trench 2           26
                               SIL ZERO        v2       Trench 2           67
                               SIL-Instore     v2       Trench 2           72
dtype: int64

## FPD10

## Test

In [933]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
    when trenchCategory = '' then 'ALL'
    else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64) as sil_beta_app_score,
  modelVersionId, trenchCategory
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fpd10 = 1
  )
  select * from base where trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory


In [934]:
df1 = dfd.copy()

## Train

In [935]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(cast(prediction as string), "'", '"'), "None", "null") AS prediction_clean
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  coalesce(prediction, safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64)) as sil_beta_app_score, 
  modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
    modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fpd10 = 1
  )
  select * from base where trenchCategory = 'Trench 2'
  ;

"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory
0,2193183,eb565ad6-655c-4eda-ba50-9ed2f1dd19fa,60821931830011,0.589935,2024-10-01 19:34:41,2024-10-01,2024-10,Train,0,1,SIL-Instore,v2,Trench 2
1,2792262,c20a7736-e584-40e2-8a4b-c566ad773959,60827922620011,0.579315,2024-10-10 17:43:21,2024-10-10,2024-10,Train,0,1,SIL-Instore,v2,Trench 2
2,2956901,af997d2e-ee7b-4a6a-9ff8-1de6a63c6e08,60829569010017,0.251322,2024-10-20 10:53:29,2024-10-20,2024-10,Train,0,1,SIL-Instore,v2,Trench 2
3,2573343,15e6e87c-50ae-4fd8-b46e-f3f942c01d7b,60825733430013,0.253257,2024-10-20 12:40:15,2024-10-20,2024-10,Train,0,1,SIL-Instore,v2,Trench 2
4,2202477,b98fb5f0-d714-4ec2-b051-f40e86572b3f,60822024770019,0.581572,2024-10-19 15:26:05,2024-10-19,2024-10,Train,0,1,SIL-Instore,v2,Trench 2


In [936]:
df2 = dfd.copy()

In [937]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3579 entries, 0 to 3578
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             3579 non-null   Int64         
 1   digitalLoanAccountId   3579 non-null   object        
 2   loanAccountNumber      3579 non-null   object        
 3   sil_beta_app_score     3579 non-null   float64       
 4   appln_submit_datetime  3579 non-null   datetime64[us]
 5   disbursementdate       3579 non-null   dbdate        
 6   Application_month      3579 non-null   object        
 7   Data_selection         3579 non-null   object        
 8   deffpd10               3579 non-null   Int64         
 9   flg_mature_fpd10       3579 non-null   Int64         
 10  new_loan_type          3579 non-null   object        
 11  modelVersionId         3579 non-null   object        
 12  trenchCategory         3579 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [938]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_app_score'] = pd.to_numeric(df_concat['sil_beta_app_score'], errors='coerce')

In [939]:
# df_concat.to_csv(r"sil_beta_app_scorefpd10.csv")

In [940]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_app_score', 'deffpd10', 'FPD10')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_app_score', 
    'deffpd10', 
    'FPD10',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [941]:
f1 = gini_results.copy()

In [942]:
f1.head(10)

Unnamed: 0,start_date,end_date,sil_beta_app_score_FPD10_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-09-30,2024-10-06,0.685714,Week,sil_beta_app_score,v2,FPD10,Overall,Trench 2
1,2024-10-01,2024-10-31,0.486966,Month,sil_beta_app_score,v2,FPD10,Overall,Trench 2
2,2024-10-07,2024-10-13,0.941176,Week,sil_beta_app_score,v2,FPD10,Overall,Trench 2
3,2024-10-14,2024-10-20,-0.035714,Week,sil_beta_app_score,v2,FPD10,Overall,Trench 2
4,2024-10-21,2024-10-27,0.354167,Week,sil_beta_app_score,v2,FPD10,Overall,Trench 2
5,2024-10-28,2024-11-03,0.238095,Week,sil_beta_app_score,v2,FPD10,Overall,Trench 2
6,2024-11-01,2024-11-30,0.215962,Month,sil_beta_app_score,v2,FPD10,Overall,Trench 2
7,2024-11-04,2024-11-10,0.659574,Week,sil_beta_app_score,v2,FPD10,Overall,Trench 2
8,2024-11-11,2024-11-17,0.166667,Week,sil_beta_app_score,v2,FPD10,Overall,Trench 2
9,2024-11-18,2024-11-24,-0.136364,Week,sil_beta_app_score,v2,FPD10,Overall,Trench 2


In [943]:
f10 = f1[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_app_score_FPD10_gini']].copy()
f10.rename(columns={'sil_beta_app_score_FPD10_gini':'FPD10'}, inplace = True)
f10['category'] = 'sil_beta_app_score_FPD10_v2_t2'
f10.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                        loan_type       version  trench_category
sil_beta_app_score_FPD10_v2_t2  Overall         v2       Trench 2           69
                                SIL Competitor  v2       Trench 2           62
                                SIL Repeat      v2       Trench 2           26
                                SIL ZERO        v2       Trench 2           63
                                SIL-Instore     v2       Trench 2           69
dtype: int64

## FPD30

## Test

In [944]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64) as sil_beta_app_score,
  modelVersionId, trenchCategory
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fpd30 = 1
  )
  select * from base where trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory


In [945]:
df1 = dfd.copy()

## Train

In [946]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(cast(prediction as string), "'", '"'), "None", "null") AS prediction_clean
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  coalesce(prediction, safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64)) as sil_beta_app_score, 
  modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fpd30 = 1
  )
    select * from base where trenchCategory = 'Trench 2'
  ;

"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory
0,2193183,eb565ad6-655c-4eda-ba50-9ed2f1dd19fa,60821931830011,0.589935,2024-10-01 19:34:41,2024-10-01,2024-10,Train,0,1,SIL-Instore,v2,Trench 2
1,2792262,c20a7736-e584-40e2-8a4b-c566ad773959,60827922620011,0.579315,2024-10-10 17:43:21,2024-10-10,2024-10,Train,0,1,SIL-Instore,v2,Trench 2
2,2481376,0eafd783-9a84-4968-9dfd-7d00653d4453,60824813760013,0.310524,2024-10-20 10:40:58,2024-10-20,2024-10,Train,0,1,SIL-Instore,v2,Trench 2
3,2347953,6fd0d946-7a5a-437a-9c30-6ef665d888d5,60823479530012,0.609754,2024-10-03 18:45:26,2024-10-03,2024-10,Train,0,1,SIL-Instore,v2,Trench 2
4,2987308,651b548d-89ff-4013-b9fb-0ed86ef5384f,60829873080015,0.406469,2024-10-31 13:13:41,2024-10-31,2024-10,Train,0,1,SIL-Instore,v2,Trench 2


In [947]:
df2 = dfd.copy()

In [948]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3407 entries, 0 to 3406
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             3407 non-null   Int64         
 1   digitalLoanAccountId   3407 non-null   object        
 2   loanAccountNumber      3407 non-null   object        
 3   sil_beta_app_score     3407 non-null   float64       
 4   appln_submit_datetime  3407 non-null   datetime64[us]
 5   disbursementdate       3407 non-null   dbdate        
 6   Application_month      3407 non-null   object        
 7   Data_selection         3407 non-null   object        
 8   deffpd30               3407 non-null   Int64         
 9   flg_mature_fpd30       3407 non-null   Int64         
 10  new_loan_type          3407 non-null   object        
 11  modelVersionId         3407 non-null   object        
 12  trenchCategory         3407 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [949]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_app_score'] = pd.to_numeric(df_concat['sil_beta_app_score'], errors='coerce')

In [950]:
# df_concat.to_csv(r"sil_beta_app_scorefpd30.csv")

In [951]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_app_score', 'deffpd30', 'FPD30')

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_app_score', 
    'deffpd30', 
    'FPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [952]:
f2 = gini_results.copy()

In [953]:
f2.head(10)

Unnamed: 0,start_date,end_date,sil_beta_app_score_FPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-09-30,2024-10-06,0.685714,Week,sil_beta_app_score,v2,FPD30,Overall,Trench 2
1,2024-10-01,2024-10-31,0.504796,Month,sil_beta_app_score,v2,FPD30,Overall,Trench 2
2,2024-10-07,2024-10-13,0.941176,Week,sil_beta_app_score,v2,FPD30,Overall,Trench 2
3,2024-10-14,2024-10-20,-0.655172,Week,sil_beta_app_score,v2,FPD30,Overall,Trench 2
4,2024-10-21,2024-10-27,0.354167,Week,sil_beta_app_score,v2,FPD30,Overall,Trench 2
5,2024-10-28,2024-11-03,0.27381,Week,sil_beta_app_score,v2,FPD30,Overall,Trench 2
6,2024-11-01,2024-11-30,0.249049,Month,sil_beta_app_score,v2,FPD30,Overall,Trench 2
7,2024-11-04,2024-11-10,0.916667,Week,sil_beta_app_score,v2,FPD30,Overall,Trench 2
8,2024-11-11,2024-11-17,0.295567,Week,sil_beta_app_score,v2,FPD30,Overall,Trench 2
9,2024-11-18,2024-11-24,-0.136364,Week,sil_beta_app_score,v2,FPD30,Overall,Trench 2


In [954]:
f20 = f2[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_app_score_FPD30_gini']].copy()
f20.rename(columns={'sil_beta_app_score_FPD30_gini':'FPD30'}, inplace = True)
f20['category'] = 'sil_beta_app_score_FPD30_v2_t2'
f20.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                        loan_type       version  trench_category
sil_beta_app_score_FPD30_v2_t2  Overall         v2       Trench 2           67
                                SIL Competitor  v2       Trench 2           60
                                SIL Repeat      v2       Trench 2           25
                                SIL ZERO        v2       Trench 2           60
                                SIL-Instore     v2       Trench 2           67
dtype: int64

## FSPD30

## Test

In [955]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64) as sil_beta_app_score,
  modelVersionId,trenchCategory

  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  modelVersionId,trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fspd_30 = 1
  )
    select * from base where trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory


In [956]:
df1 = dfd.copy()

## Train

In [957]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(cast(prediction as string), "'", '"'), "None", "null") AS prediction_clean
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  coalesce(prediction, safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64)) as sil_beta_app_score, 
  modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fspd_30 = 1
  )
    select * from base where trenchCategory = 'Trench 2'
  ;

"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory
0,1211534,c46e7944-f696-4bbd-9129-1ecb1b1064ff,60812115340057,0.439479,2024-11-09 18:14:30,2024-11-09,2024-11,Train,0,1,SIL ZERO,v2,Trench 2
1,1836996,9a146e57-835e-47b3-a288-c7b1a524ed3b,60818369960017,0.443889,2025-02-16 13:39:44,2025-02-16,2025-02,Train,0,1,SIL ZERO,v2,Trench 2
2,2938272,a5959958-3d88-4cde-bd7b-e691fe508e59,60829382720019,0.249782,2025-01-09 17:43:55,2025-01-09,2025-01,Train,0,1,SIL-Instore,v2,Trench 2
3,1816505,40828e3f-fb55-42cb-8e98-0795e675450b,60818165050024,0.389787,2025-08-02 13:33:42,2025-08-02,2025-08,Train,0,1,SIL Competitor,v2,Trench 2
4,2060755,c0b668bc-77d2-40a7-a866-2ed5be22b3fb,60820607550012,0.237103,2025-02-04 09:05:25,2025-02-04,2025-02,Train,0,1,SIL-Instore,v2,Trench 2


In [958]:
df2 = dfd.copy()

In [959]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3156 entries, 0 to 3155
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             3156 non-null   Int64         
 1   digitalLoanAccountId   3156 non-null   object        
 2   loanAccountNumber      3156 non-null   object        
 3   sil_beta_app_score     3156 non-null   float64       
 4   appln_submit_datetime  3156 non-null   datetime64[us]
 5   disbursementdate       3156 non-null   dbdate        
 6   Application_month      3156 non-null   object        
 7   Data_selection         3156 non-null   object        
 8   deffspd30              3156 non-null   Int64         
 9   flg_mature_fspd_30     3156 non-null   Int64         
 10  new_loan_type          3156 non-null   object        
 11  modelVersionId         3156 non-null   object        
 12  trenchCategory         3156 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [960]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_app_score'] = pd.to_numeric(df_concat['sil_beta_app_score'], errors='coerce')

In [961]:
# df_concat.to_csv(r"sil_beta_app_scorefspd30.csv")

In [962]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_app_score', 'deffspd30', 'FSPD30')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_app_score', 
    'deffspd30', 
    'FSPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [963]:
f3 = gini_results.copy()

In [964]:
f3.head(10)

Unnamed: 0,start_date,end_date,sil_beta_app_score_FSPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-09-30,2024-10-06,0.483516,Week,sil_beta_app_score,v2,FSPD30,Overall,Trench 2
1,2024-10-01,2024-10-31,0.364341,Month,sil_beta_app_score,v2,FSPD30,Overall,Trench 2
2,2024-10-07,2024-10-13,0.522222,Week,sil_beta_app_score,v2,FSPD30,Overall,Trench 2
3,2024-10-14,2024-10-20,-0.209877,Week,sil_beta_app_score,v2,FSPD30,Overall,Trench 2
4,2024-10-21,2024-10-27,0.4,Week,sil_beta_app_score,v2,FSPD30,Overall,Trench 2
5,2024-10-28,2024-11-03,0.216931,Week,sil_beta_app_score,v2,FSPD30,Overall,Trench 2
6,2024-11-01,2024-11-30,0.299656,Month,sil_beta_app_score,v2,FSPD30,Overall,Trench 2
7,2024-11-04,2024-11-10,0.381818,Week,sil_beta_app_score,v2,FSPD30,Overall,Trench 2
8,2024-11-11,2024-11-17,0.357143,Week,sil_beta_app_score,v2,FSPD30,Overall,Trench 2
9,2024-11-18,2024-11-24,0.337121,Week,sil_beta_app_score,v2,FSPD30,Overall,Trench 2


In [965]:
f30 = f3[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_app_score_FSPD30_gini']].copy()
f30.rename(columns={'sil_beta_app_score_FSPD30_gini':'FSPD30'}, inplace = True)
f30['category'] = 'sil_beta_app_score_FSPD30_v2_t2'
f30.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                         loan_type       version  trench_category
sil_beta_app_score_FSPD30_v2_t2  Overall         v2       Trench 2           62
                                 SIL Competitor  v2       Trench 2           55
                                 SIL Repeat      v2       Trench 2           21
                                 SIL ZERO        v2       Trench 2           55
                                 SIL-Instore     v2       Trench 2           61
dtype: int64

## FSTPD30

## Test

In [966]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64) as sil_beta_app_score,
  modelVersionId, trenchCategory
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fstpd_30 = 1
  )
  select * from base where trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory


In [967]:
df1 = dfd.copy()

## Train

In [968]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(cast(prediction as string), "'", '"'), "None", "null") AS prediction_clean
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  coalesce(prediction, safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64)) as sil_beta_app_score, 
  modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fstpd_30 = 1
  )
    select * from base where trenchCategory = 'Trench 2'
  ;

"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory
0,1211534,c46e7944-f696-4bbd-9129-1ecb1b1064ff,60812115340057,0.439479,2024-11-09 18:14:30,2024-11-09,2024-11,Train,0,1,SIL ZERO,v2,Trench 2
1,1836996,9a146e57-835e-47b3-a288-c7b1a524ed3b,60818369960017,0.443889,2025-02-16 13:39:44,2025-02-16,2025-02,Train,0,1,SIL ZERO,v2,Trench 2
2,2938272,a5959958-3d88-4cde-bd7b-e691fe508e59,60829382720019,0.249782,2025-01-09 17:43:55,2025-01-09,2025-01,Train,0,1,SIL-Instore,v2,Trench 2
3,2060755,c0b668bc-77d2-40a7-a866-2ed5be22b3fb,60820607550012,0.237103,2025-02-04 09:05:25,2025-02-04,2025-02,Train,0,1,SIL-Instore,v2,Trench 2
4,2959465,35066347-d0be-45b2-b563-56863a89d25d,60829594650011,0.598608,2025-01-24 11:16:07,2025-01-24,2025-01,Train,0,1,SIL-Instore,v2,Trench 2


In [969]:
df2 = dfd.copy()

In [970]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2889 entries, 0 to 2888
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             2889 non-null   Int64         
 1   digitalLoanAccountId   2889 non-null   object        
 2   loanAccountNumber      2889 non-null   object        
 3   sil_beta_app_score     2889 non-null   float64       
 4   appln_submit_datetime  2889 non-null   datetime64[us]
 5   disbursementdate       2889 non-null   dbdate        
 6   Application_month      2889 non-null   object        
 7   Data_selection         2889 non-null   object        
 8   deffstpd30             2889 non-null   Int64         
 9   flg_mature_fstpd_30    2889 non-null   Int64         
 10  new_loan_type          2889 non-null   object        
 11  modelVersionId         2889 non-null   object        
 12  trenchCategory         2889 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [971]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_app_score'] = pd.to_numeric(df_concat['sil_beta_app_score'], errors='coerce')

In [972]:
# df_concat.to_csv(r"sil_beta_app_scorefstpd30.csv")

In [973]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_app_score', 'deffstpd30', 'FSTPD30')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_app_score', 
    'deffstpd30', 
    'FSTPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [974]:
f4 = gini_results.copy()

In [975]:
f4.head(10)

Unnamed: 0,start_date,end_date,sil_beta_app_score_FSTPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-09-30,2024-10-06,0.57,Week,sil_beta_app_score,v2,FSTPD30,Overall,Trench 2
1,2024-10-01,2024-10-31,0.395592,Month,sil_beta_app_score,v2,FSTPD30,Overall,Trench 2
2,2024-10-07,2024-10-13,0.580247,Week,sil_beta_app_score,v2,FSTPD30,Overall,Trench 2
3,2024-10-14,2024-10-20,-0.442308,Week,sil_beta_app_score,v2,FSTPD30,Overall,Trench 2
4,2024-10-21,2024-10-27,0.469388,Week,sil_beta_app_score,v2,FSTPD30,Overall,Trench 2
5,2024-10-28,2024-11-03,0.326923,Week,sil_beta_app_score,v2,FSTPD30,Overall,Trench 2
6,2024-11-01,2024-11-30,0.321526,Month,sil_beta_app_score,v2,FSTPD30,Overall,Trench 2
7,2024-11-04,2024-11-10,0.518293,Week,sil_beta_app_score,v2,FSTPD30,Overall,Trench 2
8,2024-11-11,2024-11-17,0.403361,Week,sil_beta_app_score,v2,FSTPD30,Overall,Trench 2
9,2024-11-18,2024-11-24,0.209302,Week,sil_beta_app_score,v2,FSTPD30,Overall,Trench 2


In [976]:
f40 = f4[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_app_score_FSTPD30_gini']].copy()
f40.rename(columns={'sil_beta_app_score_FSTPD30_gini':'FSTPD30'}, inplace = True)
f40['category'] = 'sil_beta_app_score_FSTPD30_v2_t2'
f40.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                          loan_type       version  trench_category
sil_beta_app_score_FSTPD30_v2_t2  Overall         v2       Trench 2           56
                                  SIL Competitor  v2       Trench 2           49
                                  SIL Repeat      v2       Trench 2           18
                                  SIL ZERO        v2       Trench 2           52
                                  SIL-Instore     v2       Trench 2           56
dtype: int64

## combining the dataframe

In [977]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type', 'bad_rate', 'trench_category']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'sil_beta_app_score_FPD0_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'loan_type',
       'trench_category', 'sil_beta_app_score_FPD10_gini',
       'sil_beta_app_score_FPD30_gini', 'sil_beta_app_score_FSPD30_gini',
       'sil_beta_app_score_FSTPD30_gini'], dtype=object)

In [978]:
final_df.rename(columns={'trench_category':'Trench_category',
'sil_beta_app_score_FPD0_gini':'sil_beta_app_score_FPD0_v2_t2_gini',
'sil_beta_app_score_FPD10_gini':'sil_beta_app_score_FPD10_v2_t2_gini',
'sil_beta_app_score_FPD30_gini':'sil_beta_app_score_FPD30_v2_t2_gini',
'sil_beta_app_score_FSPD30_gini':'sil_beta_app_score_FSPD30_v2_t2_gini', 
'sil_beta_app_score_FSTPD30_gini':'sil_beta_app_score_FSTPD30_v2_t2_gini'
}, inplace=True)
final_df.columns

Index(['start_date', 'end_date', 'sil_beta_app_score_FPD0_v2_t2_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'Trench_category', 'sil_beta_app_score_FPD10_v2_t2_gini',
       'sil_beta_app_score_FPD30_v2_t2_gini',
       'sil_beta_app_score_FSPD30_v2_t2_gini',
       'sil_beta_app_score_FSTPD30_v2_t2_gini'],
      dtype='object')

In [979]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'loan_type','bad_rate', 'Trench_category'
       ,'sil_beta_app_score_FPD0_v2_t2_gini',
       'sil_beta_app_score_FPD10_v2_t2_gini', 
       'sil_beta_app_score_FPD30_v2_t2_gini',
       'sil_beta_app_score_FSPD30_v2_t2_gini',
       'sil_beta_app_score_FSTPD30_v2_t2_gini']].copy()
final_df['Model_display_name'] = 'apps_score_model_sil'
final_df['Product_type'] = 'SIL'
final_df.dtypes

start_date                               datetime64[ns]
end_date                                 datetime64[ns]
period                                           object
Model_Name                                       object
version                                          object
loan_type                                        object
bad_rate                                         object
Trench_category                                  object
sil_beta_app_score_FPD0_v2_t2_gini              float64
sil_beta_app_score_FPD10_v2_t2_gini             float64
sil_beta_app_score_FPD30_v2_t2_gini             float64
sil_beta_app_score_FSPD30_v2_t2_gini            float64
sil_beta_app_score_FSTPD30_v2_t2_gini           float64
Model_display_name                               object
Product_type                                     object
dtype: object

In [980]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,loan_type,bad_rate,Trench_category,sil_beta_app_score_FPD0_v2_t2_gini,sil_beta_app_score_FPD10_v2_t2_gini,sil_beta_app_score_FPD30_v2_t2_gini,sil_beta_app_score_FSPD30_v2_t2_gini,sil_beta_app_score_FSTPD30_v2_t2_gini,Model_display_name,Product_type
0,2024-09-30,2024-10-06,Week,sil_beta_app_score,v2,Overall,FPD0,Trench 2,0.138889,,,,,apps_score_model_sil,SIL
1,2024-10-01,2024-10-31,Month,sil_beta_app_score,v2,Overall,FPD0,Trench 2,0.195903,,,,,apps_score_model_sil,SIL
2,2024-10-07,2024-10-13,Week,sil_beta_app_score,v2,Overall,FPD0,Trench 2,0.425455,,,,,apps_score_model_sil,SIL
3,2024-10-14,2024-10-20,Week,sil_beta_app_score,v2,Overall,FPD0,Trench 2,-0.209877,,,,,apps_score_model_sil,SIL
4,2024-10-21,2024-10-27,Week,sil_beta_app_score,v2,Overall,FPD0,Trench 2,0.149425,,,,,apps_score_model_sil,SIL


In [981]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.apps_score_model_sil_v2_t2_gini_v1"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=ab28529f-3858-4ec9-a41b-92c6531f829c>

In [982]:
import functools

dataframes = [f01, f10, f20, f30, f40]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type','trench_category', 'bad_rate', 'category']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

appscoresilv2t2 = functools.reduce(merge_dataframes, dataframes)

appscoresilv2t2.columns.values

array(['start_date', 'end_date', 'period', 'Model_Name', 'version',
       'bad_rate', 'loan_type', 'trench_category', 'FPD0', 'category',
       'FPD10', 'FPD30', 'FSPD30', 'FSTPD30'], dtype=object)

In [983]:
result = pd.concat([cicsilscorev1all, 
                    cicsilscorev2t1, 
                    cicsilscorev2t2, 
                    cicsilscorev2t3, 
                    alphastacksilv1all,
                    alphastacksilv2t1,
                    alphastacksilv2t2,
                    alphastacksilv2t3,
                    betastacksilv1all,
                    betastacksilv2t1,
                    betastacksilv2t2,
                    betastacksilv2t3,
                    appscoresilv1all,
                    appscoresilv2t1,
                    appscoresilv2t2,
                    ], ignore_index=True)
print(f"The shape of the concatenated dataframe result is:\t {result.shape}")
result.head()

The shape of the concatenated dataframe result is:	 (24452, 14)


Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


In [984]:
result[['Model_Name', 'version', 'category']].value_counts(dropna=False).sort_index(ascending = False)

Model_Name             version  category                            
sil_beta_stack_score   v2       beta_stack_model_sil_fstpd30_v2_t3      249
                                beta_stack_model_sil_fstpd30_v2_t2      244
                                beta_stack_model_sil_fstpd30_v2_t1      251
                                beta_stack_model_sil_fspd30_v2_t3       272
                                beta_stack_model_sil_fspd30_v2_t2       264
                                beta_stack_model_sil_fspd30_v2_t1       275
                                beta_stack_model_sil_fpd30_v2_t3        292
                                beta_stack_model_sil_fpd30_v2_t2        285
                                beta_stack_model_sil_fpd30_v2_t1        295
                                beta_stack_model_sil_fpd10_v2_t3        296
                                beta_stack_model_sil_fpd10_v2_t2        292
                                beta_stack_model_sil_fpd10_v2_t1        301
                   

In [985]:
result['Model_Name'].value_counts(dropna=False)

Model_Name
sil_beta_stack_score     6649
Sil_Alpha_Stack_score    6399
Alpha_cic_sil_score      6331
sil_beta_app_score       5073
Name: count, dtype: int64

In [986]:
masterdf = result.copy()
print(f"The shape of masterdf is:\t {masterdf.shape}")
masterdf.head()

The shape of masterdf is:	 (24452, 14)


Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


#### Trench 3

## FPD0

## Test

In [987]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,

    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64) as sil_beta_app_score,
  modelVersionId,
  trenchCategory,
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fpd0 = 1
  )
  select * from base where trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory


In [988]:
df1 = dfd.copy()

## Train

In [989]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
        case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(cast(prediction as string), "'", '"'), "None", "null") AS prediction_clean
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  coalesce(prediction, safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64)) as sil_beta_app_score, 
  modelVersionId,
  trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
    modelVersionId,
  trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fpd0 = 1
  )
  select * from base where trenchCategory = 'Trench 3'
  ;

"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory
0,2080718,c7505cfe-c37c-4418-a7bb-ff439f860758,60820807180025,0.56548,2024-10-17 12:51:10,2024-10-17,2024-10,Train,1,1,SIL-Instore,v2,Trench 3
1,2379111,53435ea9-8baf-40f5-ae47-24fa73a17c8f,60823791110029,0.50682,2024-10-25 13:47:46,2024-10-25,2024-10,Train,0,1,SIL-Instore,v2,Trench 3
2,2569062,cb420a73-f5f9-4aeb-9e01-35cbb439feac,60825690620024,0.435182,2024-10-08 13:29:48,2024-10-08,2024-10,Train,0,1,SIL-Instore,v2,Trench 3
3,2125603,5d5a432b-e500-44f4-9048-b7d5543c3c29,60821256030021,0.352294,2024-10-07 17:21:12,2024-10-07,2024-10,Train,0,1,SIL-Instore,v2,Trench 3
4,2488537,612c5b09-1db4-44b6-a74b-e3c39e84b65a,60824885370028,0.401322,2024-10-07 17:34:25,2024-10-07,2024-10,Train,0,1,SIL-Instore,v2,Trench 3


In [990]:
df2 = dfd.copy()

In [991]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14172 entries, 0 to 14171
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             14172 non-null  Int64         
 1   digitalLoanAccountId   14172 non-null  object        
 2   loanAccountNumber      14172 non-null  object        
 3   sil_beta_app_score     14172 non-null  float64       
 4   appln_submit_datetime  14172 non-null  datetime64[us]
 5   disbursementdate       14172 non-null  dbdate        
 6   Application_month      14172 non-null  object        
 7   Data_selection         14172 non-null  object        
 8   deffpd0                14172 non-null  Int64         
 9   flg_mature_fpd0        14172 non-null  Int64         
 10  new_loan_type          14172 non-null  object        
 11  modelVersionId         14172 non-null  object        
 12  trenchCategory         14172 non-null  object        
dtypes

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [992]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_app_score'] = pd.to_numeric(df_concat['sil_beta_app_score'], errors='coerce')

In [993]:
# df_concat.to_csv(r"sil_beta_app_score.csv")

In [994]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_app_score', 'deffpd0', 'FPD0')
# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'sil_beta_app_score', 
#     'deffpd0', 
#     'FPD0',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_app_score', 
    'deffpd0', 
    'FPD0',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [995]:
f0 = gini_results.copy()

In [996]:
f0.head(10)

Unnamed: 0,start_date,end_date,sil_beta_app_score_FPD0_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-09-30,2024-10-06,0.116809,Week,sil_beta_app_score,v2,FPD0,Overall,Trench 3
1,2024-10-01,2024-10-31,0.268929,Month,sil_beta_app_score,v2,FPD0,Overall,Trench 3
2,2024-10-07,2024-10-13,0.098947,Week,sil_beta_app_score,v2,FPD0,Overall,Trench 3
3,2024-10-14,2024-10-20,0.495238,Week,sil_beta_app_score,v2,FPD0,Overall,Trench 3
4,2024-10-21,2024-10-27,0.319088,Week,sil_beta_app_score,v2,FPD0,Overall,Trench 3
5,2024-10-28,2024-11-03,0.275748,Week,sil_beta_app_score,v2,FPD0,Overall,Trench 3
6,2024-11-01,2024-11-30,0.126551,Month,sil_beta_app_score,v2,FPD0,Overall,Trench 3
7,2024-11-04,2024-11-10,0.160305,Week,sil_beta_app_score,v2,FPD0,Overall,Trench 3
8,2024-11-11,2024-11-17,-0.012912,Week,sil_beta_app_score,v2,FPD0,Overall,Trench 3
9,2024-11-18,2024-11-24,0.199459,Week,sil_beta_app_score,v2,FPD0,Overall,Trench 3


In [997]:
f01 = f0[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_app_score_FPD0_gini']].copy()
f01.rename(columns={'sil_beta_app_score_FPD0_gini':'FPD0'}, inplace = True)
f01['category'] = 'sil_beta_app_score_FPD0_v2_t3'
f01.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                       loan_type       version  trench_category
sil_beta_app_score_FPD0_v2_t3  Overall         v2       Trench 3           72
                               SIL Competitor  v2       Trench 3           66
                               SIL Repeat      v2       Trench 3           51
                               SIL ZERO        v2       Trench 3           72
                               SIL-Instore     v2       Trench 3           72
dtype: int64

## FPD10

## Test

In [998]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
    when trenchCategory = '' then 'ALL'
    else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64) as sil_beta_app_score,
  modelVersionId, trenchCategory
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fpd10 = 1
  )
  select * from base where trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory


In [999]:
df1 = dfd.copy()

## Train

In [1000]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(cast(prediction as string), "'", '"'), "None", "null") AS prediction_clean
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  coalesce(prediction, safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64)) as sil_beta_app_score, 
  modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
    modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fpd10 = 1
  )
  select * from base where trenchCategory = 'Trench 3'
  ;

"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory
0,2370349,28a4359a-3ffa-48e4-809d-9dbb2fb1250c,60823703490023,0.496912,2024-10-20 19:06:14,2024-10-25,2024-10,Train,1,1,SIL-Instore,v2,Trench 3
1,2466237,1d8bb534-c347-4c99-9e88-f1d6b58261de,60824662370039,0.345876,2024-10-22 17:53:36,2024-10-22,2024-10,Train,0,1,SIL-Instore,v2,Trench 3
2,2381627,cfcbde3b-ba86-4102-a870-c66c8f6c8b0e,60823816270025,0.322694,2024-10-18 13:27:27,2024-10-18,2024-10,Train,0,1,SIL-Instore,v2,Trench 3
3,2698445,9c8838a3-8b91-4e02-acc2-ec57b86033d8,60826984450022,0.546891,2024-10-25 11:47:14,2024-10-25,2024-10,Train,0,1,SIL-Instore,v2,Trench 3
4,2172493,7c0fe0c6-53de-40d1-a10f-e70f38cc453c,60821724930034,0.43405,2024-10-20 15:50:41,2024-10-20,2024-10,Train,0,1,SIL-Instore,v2,Trench 3


In [1001]:
df2 = dfd.copy()

In [1002]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13613 entries, 0 to 13612
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             13613 non-null  Int64         
 1   digitalLoanAccountId   13613 non-null  object        
 2   loanAccountNumber      13613 non-null  object        
 3   sil_beta_app_score     13613 non-null  float64       
 4   appln_submit_datetime  13613 non-null  datetime64[us]
 5   disbursementdate       13613 non-null  dbdate        
 6   Application_month      13613 non-null  object        
 7   Data_selection         13613 non-null  object        
 8   deffpd10               13613 non-null  Int64         
 9   flg_mature_fpd10       13613 non-null  Int64         
 10  new_loan_type          13613 non-null  object        
 11  modelVersionId         13613 non-null  object        
 12  trenchCategory         13613 non-null  object        
dtypes

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [1003]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_app_score'] = pd.to_numeric(df_concat['sil_beta_app_score'], errors='coerce')

In [1004]:
# df_concat.to_csv(r"sil_beta_app_scorefpd10.csv")

In [1005]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_app_score', 'deffpd10', 'FPD10')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_app_score', 
    'deffpd10', 
    'FPD10',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [1006]:
f1 = gini_results.copy()

In [1007]:
f1.head(10)

Unnamed: 0,start_date,end_date,sil_beta_app_score_FPD10_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-09-30,2024-10-06,0.058548,Week,sil_beta_app_score,v2,FPD10,Overall,Trench 3
1,2024-10-01,2024-10-31,0.267951,Month,sil_beta_app_score,v2,FPD10,Overall,Trench 3
2,2024-10-07,2024-10-13,0.20915,Week,sil_beta_app_score,v2,FPD10,Overall,Trench 3
3,2024-10-14,2024-10-20,0.430435,Week,sil_beta_app_score,v2,FPD10,Overall,Trench 3
4,2024-10-21,2024-10-27,0.483696,Week,sil_beta_app_score,v2,FPD10,Overall,Trench 3
5,2024-10-28,2024-11-03,0.408521,Week,sil_beta_app_score,v2,FPD10,Overall,Trench 3
6,2024-11-01,2024-11-30,0.072046,Month,sil_beta_app_score,v2,FPD10,Overall,Trench 3
7,2024-11-04,2024-11-10,0.985401,Week,sil_beta_app_score,v2,FPD10,Overall,Trench 3
8,2024-11-11,2024-11-17,0.182857,Week,sil_beta_app_score,v2,FPD10,Overall,Trench 3
9,2024-11-18,2024-11-24,0.037518,Week,sil_beta_app_score,v2,FPD10,Overall,Trench 3


In [1008]:
f10 = f1[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_app_score_FPD10_gini']].copy()
f10.rename(columns={'sil_beta_app_score_FPD10_gini':'FPD10'}, inplace = True)
f10['category'] = 'sil_beta_app_score_FPD10_v2_t3'
f10.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                        loan_type       version  trench_category
sil_beta_app_score_FPD10_v2_t3  Overall         v2       Trench 3           69
                                SIL Competitor  v2       Trench 3           63
                                SIL Repeat      v2       Trench 3           48
                                SIL ZERO        v2       Trench 3           68
                                SIL-Instore     v2       Trench 3           69
dtype: int64

## FPD30

## Test

In [1009]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64) as sil_beta_app_score,
  modelVersionId, trenchCategory
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fpd30 = 1
  )
  select * from base where trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory


In [1010]:
df1 = dfd.copy()

## Train

In [1011]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(cast(prediction as string), "'", '"'), "None", "null") AS prediction_clean
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  coalesce(prediction, safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64)) as sil_beta_app_score, 
  modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fpd30 = 1
  )
    select * from base where trenchCategory = 'Trench 3'
  ;

"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory
0,2450318,77fc052c-c220-419d-9816-e7cef1499fcb,60824503180022,0.345065,2024-10-05 15:07:41,2024-10-05,2024-10,Train,0,1,SIL-Instore,v2,Trench 3
1,2676216,8d74b6e7-1175-4f09-a088-e83a72980051,60826762160023,0.525737,2024-10-29 18:29:16,2024-10-29,2024-10,Train,0,1,SIL-Instore,v2,Trench 3
2,2320436,1c52890c-5f95-4bfa-abb7-bd87ed1ecbb4,60823204360026,0.345932,2024-10-02 11:43:10,2024-10-02,2024-10,Train,0,1,SIL-Instore,v2,Trench 3
3,2513484,c78ad0de-9c82-4e86-9d65-4621527caa6a,60825134840023,0.321243,2024-10-16 10:43:33,2024-10-16,2024-10,Train,0,1,SIL-Instore,v2,Trench 3
4,2625785,47dd3923-2106-4c83-ad5c-586ee23a12f8,60826257850022,0.38346,2024-10-01 16:43:30,2024-10-01,2024-10,Train,0,1,SIL-Instore,v2,Trench 3


In [1012]:
df2 = dfd.copy()

In [1013]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12665 entries, 0 to 12664
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             12665 non-null  Int64         
 1   digitalLoanAccountId   12665 non-null  object        
 2   loanAccountNumber      12665 non-null  object        
 3   sil_beta_app_score     12665 non-null  float64       
 4   appln_submit_datetime  12665 non-null  datetime64[us]
 5   disbursementdate       12665 non-null  dbdate        
 6   Application_month      12665 non-null  object        
 7   Data_selection         12665 non-null  object        
 8   deffpd30               12665 non-null  Int64         
 9   flg_mature_fpd30       12665 non-null  Int64         
 10  new_loan_type          12665 non-null  object        
 11  modelVersionId         12665 non-null  object        
 12  trenchCategory         12665 non-null  object        
dtypes

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [1014]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_app_score'] = pd.to_numeric(df_concat['sil_beta_app_score'], errors='coerce')

In [1015]:
# df_concat.to_csv(r"sil_beta_app_scorefpd30.csv")

In [1016]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_app_score', 'deffpd30', 'FPD30')

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_app_score', 
    'deffpd30', 
    'FPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [1017]:
f2 = gini_results.copy()

In [1018]:
f2.head(10)

Unnamed: 0,start_date,end_date,sil_beta_app_score_FPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-09-30,2024-10-06,0.100271,Week,sil_beta_app_score,v2,FPD30,Overall,Trench 3
1,2024-10-01,2024-10-31,0.276267,Month,sil_beta_app_score,v2,FPD30,Overall,Trench 3
2,2024-10-07,2024-10-13,0.718447,Week,sil_beta_app_score,v2,FPD30,Overall,Trench 3
3,2024-10-14,2024-10-20,0.457627,Week,sil_beta_app_score,v2,FPD30,Overall,Trench 3
4,2024-10-21,2024-10-27,0.304659,Week,sil_beta_app_score,v2,FPD30,Overall,Trench 3
5,2024-10-28,2024-11-03,0.408521,Week,sil_beta_app_score,v2,FPD30,Overall,Trench 3
6,2024-11-01,2024-11-30,0.076868,Month,sil_beta_app_score,v2,FPD30,Overall,Trench 3
7,2024-11-04,2024-11-10,,Week,sil_beta_app_score,v2,FPD30,Overall,Trench 3
8,2024-11-11,2024-11-17,0.182857,Week,sil_beta_app_score,v2,FPD30,Overall,Trench 3
9,2024-11-18,2024-11-24,0.19598,Week,sil_beta_app_score,v2,FPD30,Overall,Trench 3


In [1019]:
f20 = f2[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_app_score_FPD30_gini']].copy()
f20.rename(columns={'sil_beta_app_score_FPD30_gini':'FPD30'}, inplace = True)
f20['category'] = 'sil_beta_app_score_FPD30_v2_t3'
f20.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                        loan_type       version  trench_category
sil_beta_app_score_FPD30_v2_t3  Overall         v2       Trench 3           67
                                SIL Competitor  v2       Trench 3           61
                                SIL Repeat      v2       Trench 3           46
                                SIL ZERO        v2       Trench 3           66
                                SIL-Instore     v2       Trench 3           67
dtype: int64

## FSPD30

## Test

In [1020]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64) as sil_beta_app_score,
  modelVersionId,trenchCategory

  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  modelVersionId,trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fspd_30 = 1
  )
    select * from base where trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory


In [1021]:
df1 = dfd.copy()

## Train

In [1022]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(cast(prediction as string), "'", '"'), "None", "null") AS prediction_clean
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  coalesce(prediction, safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64)) as sil_beta_app_score, 
  modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fspd_30 = 1
  )
    select * from base where trenchCategory = 'Trench 3'
  ;

"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory
0,2450318,77fc052c-c220-419d-9816-e7cef1499fcb,60824503180022,0.345065,2024-10-05 15:07:41,2024-10-05,2024-10,Train,0,1,SIL-Instore,v2,Trench 3
1,2676216,8d74b6e7-1175-4f09-a088-e83a72980051,60826762160023,0.525737,2024-10-29 18:29:16,2024-10-29,2024-10,Train,0,1,SIL-Instore,v2,Trench 3
2,2320436,1c52890c-5f95-4bfa-abb7-bd87ed1ecbb4,60823204360026,0.345932,2024-10-02 11:43:10,2024-10-02,2024-10,Train,0,1,SIL-Instore,v2,Trench 3
3,2513484,c78ad0de-9c82-4e86-9d65-4621527caa6a,60825134840023,0.321243,2024-10-16 10:43:33,2024-10-16,2024-10,Train,0,1,SIL-Instore,v2,Trench 3
4,2625785,47dd3923-2106-4c83-ad5c-586ee23a12f8,60826257850022,0.38346,2024-10-01 16:43:30,2024-10-01,2024-10,Train,0,1,SIL-Instore,v2,Trench 3


In [1023]:
df2 = dfd.copy()

In [1024]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11109 entries, 0 to 11108
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             11109 non-null  Int64         
 1   digitalLoanAccountId   11109 non-null  object        
 2   loanAccountNumber      11109 non-null  object        
 3   sil_beta_app_score     11109 non-null  float64       
 4   appln_submit_datetime  11109 non-null  datetime64[us]
 5   disbursementdate       11109 non-null  dbdate        
 6   Application_month      11109 non-null  object        
 7   Data_selection         11109 non-null  object        
 8   deffspd30              11109 non-null  Int64         
 9   flg_mature_fspd_30     11109 non-null  Int64         
 10  new_loan_type          11109 non-null  object        
 11  modelVersionId         11109 non-null  object        
 12  trenchCategory         11109 non-null  object        
dtypes

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [1025]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_app_score'] = pd.to_numeric(df_concat['sil_beta_app_score'], errors='coerce')

In [1026]:
# df_concat.to_csv(r"sil_beta_app_scorefspd30.csv")

In [1027]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_app_score', 'deffspd30', 'FSPD30')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_app_score', 
    'deffspd30', 
    'FSPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [1028]:
f3 = gini_results.copy()

In [1029]:
f3.head(10)

Unnamed: 0,start_date,end_date,sil_beta_app_score_FSPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-09-30,2024-10-06,0.131481,Week,sil_beta_app_score,v2,FSPD30,Overall,Trench 3
1,2024-10-01,2024-10-31,0.286614,Month,sil_beta_app_score,v2,FSPD30,Overall,Trench 3
2,2024-10-07,2024-10-13,0.718447,Week,sil_beta_app_score,v2,FSPD30,Overall,Trench 3
3,2024-10-14,2024-10-20,0.404348,Week,sil_beta_app_score,v2,FSPD30,Overall,Trench 3
4,2024-10-21,2024-10-27,0.13587,Week,sil_beta_app_score,v2,FSPD30,Overall,Trench 3
5,2024-10-28,2024-11-03,0.441221,Week,sil_beta_app_score,v2,FSPD30,Overall,Trench 3
6,2024-11-01,2024-11-30,0.219844,Month,sil_beta_app_score,v2,FSPD30,Overall,Trench 3
7,2024-11-04,2024-11-10,0.595062,Week,sil_beta_app_score,v2,FSPD30,Overall,Trench 3
8,2024-11-11,2024-11-17,0.260989,Week,sil_beta_app_score,v2,FSPD30,Overall,Trench 3
9,2024-11-18,2024-11-24,0.247166,Week,sil_beta_app_score,v2,FSPD30,Overall,Trench 3


In [1030]:
f30 = f3[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_app_score_FSPD30_gini']].copy()
f30.rename(columns={'sil_beta_app_score_FSPD30_gini':'FSPD30'}, inplace = True)
f30['category'] = 'sil_beta_app_score_FSPD30_v2_t3'
f30.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                         loan_type       version  trench_category
sil_beta_app_score_FSPD30_v2_t3  Overall         v2       Trench 3           62
                                 SIL Competitor  v2       Trench 3           56
                                 SIL Repeat      v2       Trench 3           41
                                 SIL ZERO        v2       Trench 3           61
                                 SIL-Instore     v2       Trench 3           62
dtype: int64

## FSTPD30

## Test

In [1031]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
    case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64) as sil_beta_app_score,
  modelVersionId, trenchCategory
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fstpd_30 = 1
  )
  select * from base where trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory


In [1032]:
df1 = dfd.copy()

## Train

In [1033]:
sq = """ 
WITH cleaned AS (
  SELECT
    customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
    REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature,
    REPLACE(REPLACE(cast(prediction as string), "'", '"'), "None", "null") AS prediction_clean
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - AppsScoreModel', 'apps_score_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  coalesce(prediction, safe_cast(JSON_VALUE(prediction_clean, "$.combined_score") AS float64)) as sil_beta_app_score, 
  modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_app_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_app_score is not null
  and del.flg_mature_fstpd_30 = 1
  )
    select * from base where trenchCategory = 'Trench 3'
  ;

"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_app_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory
0,2336621,12ebef47-9f06-4531-a3db-08a438b8b5c3,60823366210024,0.609814,2024-10-20 09:44:52,2024-10-20,2024-10,Train,0,1,SIL-Instore,v2,Trench 3
1,2298884,a94b35b1-bc72-4a18-a16f-24eecbf617a0,60822988840027,0.266736,2024-10-21 14:39:14,2024-10-21,2024-10,Train,0,1,SIL ZERO,v2,Trench 3
2,2346517,1d47b1c5-5d6e-4c7f-a4d9-f9ba4fb36b66,60823465170021,0.431236,2024-10-06 19:35:02,2024-10-06,2024-10,Train,0,1,SIL-Instore,v2,Trench 3
3,2446469,08a5cc09-9a17-4901-84d0-ef7820167b87,60824464690025,0.435758,2024-10-20 16:51:27,2024-10-20,2024-10,Train,0,1,SIL-Instore,v2,Trench 3
4,2520219,c07b91cb-45d3-4de2-9579-29e943f7ce0a,60825202190021,0.462936,2024-10-06 18:09:17,2024-10-06,2024-10,Train,0,1,SIL-Instore,v2,Trench 3


In [1034]:
df2 = dfd.copy()

In [1035]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9629 entries, 0 to 9628
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             9629 non-null   Int64         
 1   digitalLoanAccountId   9629 non-null   object        
 2   loanAccountNumber      9629 non-null   object        
 3   sil_beta_app_score     9629 non-null   float64       
 4   appln_submit_datetime  9629 non-null   datetime64[us]
 5   disbursementdate       9629 non-null   dbdate        
 6   Application_month      9629 non-null   object        
 7   Data_selection         9629 non-null   object        
 8   deffstpd30             9629 non-null   Int64         
 9   flg_mature_fstpd_30    9629 non-null   Int64         
 10  new_loan_type          9629 non-null   object        
 11  modelVersionId         9629 non-null   object        
 12  trenchCategory         9629 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [1036]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_app_score'] = pd.to_numeric(df_concat['sil_beta_app_score'], errors='coerce')

In [1037]:
# df_concat.to_csv(r"sil_beta_app_scorefstpd30.csv")

In [1038]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_app_score', 'deffstpd30', 'FSTPD30')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_app_score', 
    'deffstpd30', 
    'FSTPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [1039]:
f4 = gini_results.copy()

In [1040]:
f4.head(10)

Unnamed: 0,start_date,end_date,sil_beta_app_score_FSTPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-09-30,2024-10-06,0.05547,Week,sil_beta_app_score,v2,FSTPD30,Overall,Trench 3
1,2024-10-01,2024-10-31,0.174895,Month,sil_beta_app_score,v2,FSTPD30,Overall,Trench 3
2,2024-10-07,2024-10-13,0.183168,Week,sil_beta_app_score,v2,FSTPD30,Overall,Trench 3
3,2024-10-14,2024-10-20,0.288288,Week,sil_beta_app_score,v2,FSTPD30,Overall,Trench 3
4,2024-10-21,2024-10-27,0.067688,Week,sil_beta_app_score,v2,FSTPD30,Overall,Trench 3
5,2024-10-28,2024-11-03,0.507692,Week,sil_beta_app_score,v2,FSTPD30,Overall,Trench 3
6,2024-11-01,2024-11-30,0.277088,Month,sil_beta_app_score,v2,FSTPD30,Overall,Trench 3
7,2024-11-04,2024-11-10,0.5,Week,sil_beta_app_score,v2,FSTPD30,Overall,Trench 3
8,2024-11-11,2024-11-17,0.331727,Week,sil_beta_app_score,v2,FSTPD30,Overall,Trench 3
9,2024-11-18,2024-11-24,0.266667,Week,sil_beta_app_score,v2,FSTPD30,Overall,Trench 3


In [1041]:
f40 = f4[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_app_score_FSTPD30_gini']].copy()
f40.rename(columns={'sil_beta_app_score_FSTPD30_gini':'FSTPD30'}, inplace = True)
f40['category'] = 'sil_beta_app_score_FSTPD30_v2_t3'
f40.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                          loan_type       version  trench_category
sil_beta_app_score_FSTPD30_v2_t3  Overall         v2       Trench 3           56
                                  SIL Competitor  v2       Trench 3           50
                                  SIL Repeat      v2       Trench 3           35
                                  SIL ZERO        v2       Trench 3           56
                                  SIL-Instore     v2       Trench 3           56
dtype: int64

## combining the dataframe

In [1042]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type', 'bad_rate', 'trench_category']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'sil_beta_app_score_FPD0_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'loan_type',
       'trench_category', 'sil_beta_app_score_FPD10_gini',
       'sil_beta_app_score_FPD30_gini', 'sil_beta_app_score_FSPD30_gini',
       'sil_beta_app_score_FSTPD30_gini'], dtype=object)

In [1043]:
final_df.rename(columns={'trench_category':'Trench_category',
'sil_beta_app_score_FPD0_gini':'sil_beta_app_score_FPD0_v2_t3_gini',
'sil_beta_app_score_FPD10_gini':'sil_beta_app_score_FPD10_v2_t3_gini',
'sil_beta_app_score_FPD30_gini':'sil_beta_app_score_FPD30_v2_t3_gini',
'sil_beta_app_score_FSPD30_gini':'sil_beta_app_score_FSPD30_v2_t3_gini', 
'sil_beta_app_score_FSTPD30_gini':'sil_beta_app_score_FSTPD30_v2_t3_gini'
}, inplace=True)
final_df.columns

Index(['start_date', 'end_date', 'sil_beta_app_score_FPD0_v2_t3_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'Trench_category', 'sil_beta_app_score_FPD10_v2_t3_gini',
       'sil_beta_app_score_FPD30_v2_t3_gini',
       'sil_beta_app_score_FSPD30_v2_t3_gini',
       'sil_beta_app_score_FSTPD30_v2_t3_gini'],
      dtype='object')

In [1044]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'loan_type','bad_rate', 'Trench_category'
       ,'sil_beta_app_score_FPD0_v2_t3_gini',
       'sil_beta_app_score_FPD10_v2_t3_gini', 
       'sil_beta_app_score_FPD30_v2_t3_gini',
       'sil_beta_app_score_FSPD30_v2_t3_gini',
       'sil_beta_app_score_FSTPD30_v2_t3_gini']].copy()
final_df['Model_display_name'] = 'apps_score_model_sil'
final_df['Product_type'] = 'SIL'
final_df.dtypes

start_date                               datetime64[ns]
end_date                                 datetime64[ns]
period                                           object
Model_Name                                       object
version                                          object
loan_type                                        object
bad_rate                                         object
Trench_category                                  object
sil_beta_app_score_FPD0_v2_t3_gini              float64
sil_beta_app_score_FPD10_v2_t3_gini             float64
sil_beta_app_score_FPD30_v2_t3_gini             float64
sil_beta_app_score_FSPD30_v2_t3_gini            float64
sil_beta_app_score_FSTPD30_v2_t3_gini           float64
Model_display_name                               object
Product_type                                     object
dtype: object

In [1045]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,loan_type,bad_rate,Trench_category,sil_beta_app_score_FPD0_v2_t3_gini,sil_beta_app_score_FPD10_v2_t3_gini,sil_beta_app_score_FPD30_v2_t3_gini,sil_beta_app_score_FSPD30_v2_t3_gini,sil_beta_app_score_FSTPD30_v2_t3_gini,Model_display_name,Product_type
0,2024-09-30,2024-10-06,Week,sil_beta_app_score,v2,Overall,FPD0,Trench 3,0.116809,,,,,apps_score_model_sil,SIL
1,2024-10-01,2024-10-31,Month,sil_beta_app_score,v2,Overall,FPD0,Trench 3,0.268929,,,,,apps_score_model_sil,SIL
2,2024-10-07,2024-10-13,Week,sil_beta_app_score,v2,Overall,FPD0,Trench 3,0.098947,,,,,apps_score_model_sil,SIL
3,2024-10-14,2024-10-20,Week,sil_beta_app_score,v2,Overall,FPD0,Trench 3,0.495238,,,,,apps_score_model_sil,SIL
4,2024-10-21,2024-10-27,Week,sil_beta_app_score,v2,Overall,FPD0,Trench 3,0.319088,,,,,apps_score_model_sil,SIL


In [1046]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.apps_score_model_sil_v2_t3_gini_v1"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=a41eeb6c-26d4-4b36-966c-62f6ff8e8bdc>

In [1047]:
import functools

dataframes = [f01, f10, f20, f30, f40]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type','trench_category', 'bad_rate', 'category']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

appscoresilv2t3 = functools.reduce(merge_dataframes, dataframes)

appscoresilv2t3.columns.values

array(['start_date', 'end_date', 'period', 'Model_Name', 'version',
       'bad_rate', 'loan_type', 'trench_category', 'FPD0', 'category',
       'FPD10', 'FPD30', 'FSPD30', 'FSTPD30'], dtype=object)

In [1048]:
result = pd.concat([cicsilscorev1all, 
                    cicsilscorev2t1, 
                    cicsilscorev2t2, 
                    cicsilscorev2t3, 
                    alphastacksilv1all,
                    alphastacksilv2t1,
                    alphastacksilv2t2,
                    alphastacksilv2t3,
                    betastacksilv1all,
                    betastacksilv2t1,
                    betastacksilv2t2,
                    betastacksilv2t3,
                    appscoresilv1all,
                    appscoresilv2t1,
                    appscoresilv2t2,
                    appscoresilv2t3,
                    ], ignore_index=True)
print(f"The shape of the concatenated dataframe result is:\t {result.shape}")
result.head()

The shape of the concatenated dataframe result is:	 (25944, 14)


Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


In [1049]:
result[['Model_Name', 'version', 'category']].value_counts(dropna=False).sort_index(ascending = False)

Model_Name             version  category                            
sil_beta_stack_score   v2       beta_stack_model_sil_fstpd30_v2_t3      249
                                beta_stack_model_sil_fstpd30_v2_t2      244
                                beta_stack_model_sil_fstpd30_v2_t1      251
                                beta_stack_model_sil_fspd30_v2_t3       272
                                beta_stack_model_sil_fspd30_v2_t2       264
                                beta_stack_model_sil_fspd30_v2_t1       275
                                beta_stack_model_sil_fpd30_v2_t3        292
                                beta_stack_model_sil_fpd30_v2_t2        285
                                beta_stack_model_sil_fpd30_v2_t1        295
                                beta_stack_model_sil_fpd10_v2_t3        296
                                beta_stack_model_sil_fpd10_v2_t2        292
                                beta_stack_model_sil_fpd10_v2_t1        301
                   

In [1050]:
result['Model_Name'].value_counts(dropna=False)

Model_Name
sil_beta_stack_score     6649
sil_beta_app_score       6565
Sil_Alpha_Stack_score    6399
Alpha_cic_sil_score      6331
Name: count, dtype: int64

In [1051]:
masterdf = result.copy()
print(f"The shape of masterdf is:\t {masterdf.shape}")
masterdf.head()

The shape of masterdf is:	 (25944, 14)


Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


# Beta SIL Demo Score

## V1

## FPD0

## Test

In [1052]:
sq = """WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v1'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
  modelVersionId, trenchCategory,
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fpd0 = 1
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory
0,2941291,2f28c592-7e0f-4d9f-85a4-82e9fb20331f,60829412910019,0.1481726806,2025-09-15 12:15:49,2025-09-15,2025-09,Test,0,1,SIL-Instore,v1,ALL
1,3236486,dd803806-7507-45f9-b78b-e51af49293e2,60832364860018,0.1610862924,2025-09-15 12:27:17,2025-09-15,2025-09,Test,0,1,SIL Competitor,v1,ALL
2,3614141,4ca18655-1a6e-4752-aaf1-79e5ccdf1aec,60836141410021,0.0729577436,2025-09-15 12:53:47,2025-09-15,2025-09,Test,0,1,SIL Competitor,v1,ALL
3,3679161,f859c9af-94be-4847-8409-394e0a54412d,60836791610016,0.0478485902,2025-09-15 12:33:36,2025-09-15,2025-09,Test,0,1,SIL-Instore,v1,ALL
4,3684950,bcb6eaef-d14b-4e65-8d7f-a33cf15341da,60836849500012,0.0667055448,2025-09-15 12:36:04,2025-09-15,2025-09,Test,0,1,SIL Competitor,v1,ALL


In [1053]:
df1 = dfd.copy()

## Train

In [1054]:
sq = """ 
WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v1'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
  modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fpd0 = 1
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory
0,2088188,30f5f6c6-bd89-49f3-b092-f387cc6b91bf,60820881880015,0.086305,2023-06-11 19:15:47,2023-06-11,2023-06,Train,1,1,SIL-Instore,v1,ALL
1,2074935,93eadec0-dd64-4fbb-a44a-8ffcfe919700,60820749350015,0.116053,2023-06-08 11:04:07,2023-06-08,2023-06,Train,0,1,SIL-Instore,v1,ALL
2,1968160,ba53edd4-1bd5-481a-b6b2-e00fbcfcbd44,60819681600011,0.066137,2023-03-30 10:50:42,2023-03-30,2023-03,Train,0,1,SIL-Instore,v1,ALL
3,1869115,0a2e6d9c-be65-4dc1-92c3-17bbc2389fa1,60818691150013,0.118876,2023-01-21 15:09:20,2023-01-21,2023-01,Train,0,1,SIL-Instore,v1,ALL
4,2065320,27a621fb-037d-43b5-af02-62d0edb518b8,60820653200019,0.086217,2023-05-28 15:01:03,2023-05-28,2023-05,Train,0,1,SIL-Instore,v1,ALL


In [1055]:
df2 = dfd.copy()

In [1056]:
df2 = dfd.copy()

In [1057]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 280404 entries, 0 to 280403
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             280404 non-null  object        
 1   digitalLoanAccountId   280404 non-null  object        
 2   loanAccountNumber      280404 non-null  object        
 3   sil_beta_demo_score    280404 non-null  object        
 4   appln_submit_datetime  280404 non-null  datetime64[us]
 5   disbursementdate       280404 non-null  dbdate        
 6   Application_month      280404 non-null  object        
 7   Data_selection         280404 non-null  object        
 8   deffpd0                280404 non-null  Int64         
 9   flg_mature_fpd0        280404 non-null  Int64         
 10  new_loan_type          280404 non-null  object        
 11  modelVersionId         280404 non-null  object        
 12  trenchCategory         280404 non-null  obje

In [1058]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_demo_score'] = pd.to_numeric(df_concat['sil_beta_demo_score'], errors='coerce')

In [1059]:
# df_concat.to_csv(r"sil_beta_demo_scorefpd0.csv")

In [1060]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_demo_score', 'deffpd0', 'FPD0')
# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'sil_beta_demo_score', 
#     'deffpd0', 
#     'FPD0',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_demo_score', 
    'deffpd0', 
    'FPD0',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [1061]:
f0 = gini_results.copy()

In [1062]:
f0.head(10)

Unnamed: 0,start_date,end_date,sil_beta_demo_score_FPD0_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2023-01-01,2023-01-31,0.141958,Month,sil_beta_demo_score,v1,FPD0,Overall,ALL
1,2023-01-02,2023-01-08,0.279249,Week,sil_beta_demo_score,v1,FPD0,Overall,ALL
2,2023-01-09,2023-01-15,0.118033,Week,sil_beta_demo_score,v1,FPD0,Overall,ALL
3,2023-01-16,2023-01-22,0.15302,Week,sil_beta_demo_score,v1,FPD0,Overall,ALL
4,2023-01-23,2023-01-29,-0.006286,Week,sil_beta_demo_score,v1,FPD0,Overall,ALL
5,2023-01-30,2023-02-05,0.196649,Week,sil_beta_demo_score,v1,FPD0,Overall,ALL
6,2023-02-01,2023-02-28,0.150587,Month,sil_beta_demo_score,v1,FPD0,Overall,ALL
7,2023-02-06,2023-02-12,0.030928,Week,sil_beta_demo_score,v1,FPD0,Overall,ALL
8,2023-02-13,2023-02-19,0.379882,Week,sil_beta_demo_score,v1,FPD0,Overall,ALL
9,2023-02-20,2023-02-26,0.056522,Week,sil_beta_demo_score,v1,FPD0,Overall,ALL


In [1063]:
f01 = f0[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_demo_score_FPD0_gini']].copy()
f01.rename(columns={'sil_beta_demo_score_FPD0_gini':'FPD0'}, inplace = True)
f01['category'] = 'sil_beta_demo_score_FPD0_v1_all'
f01.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                         loan_type       version  trench_category
sil_beta_demo_score_FPD0_v1_all  Overall         v1       ALL                184
                                 SIL Competitor  v1       ALL                 51
                                 SIL Repeat      v1       ALL                 14
                                 SIL ZERO        v1       ALL                 83
                                 SIL-Instore     v1       ALL                184
dtype: int64

## FPD10

## Test

In [1064]:
sq = """WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v1'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
  modelVersionId, trenchCategory
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fpd10 = 1
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory
0,3747348,17b2d3b5-01ba-40dd-af8a-bc69cae866c2,60837473480018,0.0551821368,2025-10-16 09:17:39,2025-10-16,2025-10,Test,0,1,SIL Competitor,v1,ALL
1,3747379,1e224ff2-65f8-4811-a992-affe83f52fb1,60837473790015,0.0194045076,2025-10-16 09:32:23,2025-10-16,2025-10,Test,0,1,SIL-Instore,v1,ALL
2,3747305,3f7cadb5-80bf-436f-8633-dace8e784a92,60837473050017,0.080498859,2025-10-16 09:01:16,2025-10-16,2025-10,Test,1,1,SIL-Instore,v1,ALL
3,3747313,e1015f19-a695-41dd-aee1-3e0acb0202e6,60837473130017,0.0470407015,2025-10-16 08:53:23,2025-10-16,2025-10,Test,0,1,SIL Competitor,v1,ALL
4,3747745,48e043b2-628a-4b0e-b124-25e690e69b3d,60837477450017,0.0357624129,2025-10-16 12:19:07,2025-10-16,2025-10,Test,0,1,SIL Competitor,v1,ALL


In [1065]:
df1 = dfd.copy()

## Train

In [1066]:
sq = """ 
WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v1'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
  modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fpd10 = 1
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory
0,1906261,aa42d0e6-aca4-4917-a065-8cfdf17ccc0d,60819062610016,0.101238,2023-02-18 14:49:12,2023-02-18,2023-02,Train,0,1,SIL-Instore,v1,ALL
1,2065194,71d1a2d3-d5c0-465e-a923-17a4cc7fd818,60820651940011,0.095539,2023-05-28 14:10:16,2023-05-28,2023-05,Train,0,1,SIL-Instore,v1,ALL
2,1968323,75d352e3-89e5-445d-8b5a-ee769f52c4d8,60819683230011,0.079822,2023-03-30 12:17:45,2023-03-30,2023-03,Train,1,1,SIL-Instore,v1,ALL
3,1891979,07d71b05-eebb-4e3f-b2c9-77a057d91539,60818919790016,0.130782,2023-02-08 16:19:56,2023-02-08,2023-02,Train,0,1,SIL-Instore,v1,ALL
4,1846848,ac41223e-0d42-4569-b21f-789bce021291,60818468480011,0.052463,2023-01-02 10:50:43,2023-01-02,2023-01,Train,0,1,SIL-Instore,v1,ALL


In [1067]:
df2 = dfd.copy()

In [1068]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 273188 entries, 0 to 273187
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             273188 non-null  object        
 1   digitalLoanAccountId   273188 non-null  object        
 2   loanAccountNumber      273188 non-null  object        
 3   sil_beta_demo_score    273188 non-null  object        
 4   appln_submit_datetime  273188 non-null  datetime64[us]
 5   disbursementdate       273188 non-null  dbdate        
 6   Application_month      273188 non-null  object        
 7   Data_selection         273188 non-null  object        
 8   deffpd10               273188 non-null  Int64         
 9   flg_mature_fpd10       273188 non-null  Int64         
 10  new_loan_type          273188 non-null  object        
 11  modelVersionId         273188 non-null  object        
 12  trenchCategory         273188 non-null  obje

In [1069]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_demo_score'] = pd.to_numeric(df_concat['sil_beta_demo_score'], errors='coerce')

In [1070]:
# df_concat.to_csv(r"sil_beta_demo_scorefpd10.csv")

In [1071]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_demo_score', 'deffpd10', 'FPD10')
# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'sil_beta_demo_score', 
#     'deffpd10', 
#     'FPD10',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_demo_score', 
    'deffpd10', 
    'FPD10',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [1072]:
f1 = gini_results.copy()

In [1073]:
f1.head(10)

Unnamed: 0,start_date,end_date,sil_beta_demo_score_FPD10_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2023-01-01,2023-01-31,0.231433,Month,sil_beta_demo_score,v1,FPD10,Overall,ALL
1,2023-01-02,2023-01-08,0.410532,Week,sil_beta_demo_score,v1,FPD10,Overall,ALL
2,2023-01-09,2023-01-15,0.113578,Week,sil_beta_demo_score,v1,FPD10,Overall,ALL
3,2023-01-16,2023-01-22,0.485714,Week,sil_beta_demo_score,v1,FPD10,Overall,ALL
4,2023-01-23,2023-01-29,0.102941,Week,sil_beta_demo_score,v1,FPD10,Overall,ALL
5,2023-01-30,2023-02-05,0.042026,Week,sil_beta_demo_score,v1,FPD10,Overall,ALL
6,2023-02-01,2023-02-28,0.113077,Month,sil_beta_demo_score,v1,FPD10,Overall,ALL
7,2023-02-06,2023-02-12,0.17038,Week,sil_beta_demo_score,v1,FPD10,Overall,ALL
8,2023-02-13,2023-02-19,0.384306,Week,sil_beta_demo_score,v1,FPD10,Overall,ALL
9,2023-02-20,2023-02-26,-0.136752,Week,sil_beta_demo_score,v1,FPD10,Overall,ALL


In [1074]:
f10 = f1[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_demo_score_FPD10_gini']].copy()
f10.rename(columns={'sil_beta_demo_score_FPD10_gini':'FPD10'}, inplace = True)
f10['category'] = 'sil_beta_demo_score_FPD10_v1_all'
f10.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                          loan_type       version  trench_category
sil_beta_demo_score_FPD10_v1_all  Overall         v1       ALL                181
                                  SIL Competitor  v1       ALL                 48
                                  SIL Repeat      v1       ALL                 14
                                  SIL ZERO        v1       ALL                 80
                                  SIL-Instore     v1       ALL                181
dtype: int64

## FPD30

## Test

In [1075]:
sq = """WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v1'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
  modelVersionId, trenchCategory
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fpd30 = 1
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory
0,3727034,264d0e2b-58d3-4ee5-ba1a-52c341fc5492,60837270340013,0.0579940987,2025-10-06 12:44:27,2025-10-06,2025-10,Test,0,1,SIL Competitor,v1,ALL
1,3726989,31eaa9f0-bb68-4e56-a787-7f5fbaf1776c,60837269890011,0.0171313726,2025-10-06 12:28:13,2025-10-06,2025-10,Test,0,1,SIL-Instore,v1,ALL
2,3727116,45e88a56-6b91-4afa-89ec-82f221ae3ede,60837271160014,0.0410995396,2025-10-06 13:21:58,2025-10-06,2025-10,Test,0,1,SIL-Instore,v1,ALL
3,3726963,57a49b0a-e9a0-4d98-8c3c-00af0beb0aff,60837269630011,0.0426564884,2025-10-06 13:10:14,2025-10-06,2025-10,Test,0,1,SIL-Instore,v1,ALL
4,3727104,71884179-dd00-4974-91a9-09e55f6312b0,60837271040011,0.0569256112,2025-10-06 13:18:03,2025-10-06,2025-10,Test,0,1,SIL-Instore,v1,ALL


In [1076]:
df1 = dfd.copy()

## Train

In [1077]:
sq = """ 
WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v1'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
  modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fpd30 = 1
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory
0,2075704,8ca4cb7c-7a5f-4533-9675-b2710fe5a653,60820757040016,0.141425,2023-06-03 16:21:44,2023-06-03,2023-06,Train,0,1,SIL-Instore,v1,ALL
1,2010489,a5584976-92c4-4136-89dc-ff39807d2f1f,60820104890018,0.086428,2023-04-23 17:28:02,2023-04-23,2023-04,Train,0,1,SIL-Instore,v1,ALL
2,1961049,49c69ee5-8556-4c08-aa44-65bb804abb5a,60819610490014,0.094472,2023-03-26 09:52:05,2023-03-26,2023-03,Train,0,1,SIL-Instore,v1,ALL
3,1862581,88ebd919-a6c1-4c88-8d68-3c8954db9007,60818625810017,0.081355,2023-01-15 15:40:14,2023-01-15,2023-01,Train,0,1,SIL-Instore,v1,ALL
4,1915610,d0f89c37-2c48-4074-976c-57c39db897ff,60819156100018,0.12808,2023-02-24 17:03:03,2023-02-24,2023-02,Train,0,1,SIL-Instore,v1,ALL


In [1078]:
df2 = dfd.copy()

In [1079]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 264110 entries, 0 to 264109
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             264110 non-null  object        
 1   digitalLoanAccountId   264110 non-null  object        
 2   loanAccountNumber      264110 non-null  object        
 3   sil_beta_demo_score    264110 non-null  object        
 4   appln_submit_datetime  264110 non-null  datetime64[us]
 5   disbursementdate       264110 non-null  dbdate        
 6   Application_month      264110 non-null  object        
 7   Data_selection         264110 non-null  object        
 8   deffpd30               264110 non-null  Int64         
 9   flg_mature_fpd30       264110 non-null  Int64         
 10  new_loan_type          264110 non-null  object        
 11  modelVersionId         264110 non-null  object        
 12  trenchCategory         264110 non-null  obje

In [1080]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_demo_score'] = pd.to_numeric(df_concat['sil_beta_demo_score'], errors='coerce')

In [1081]:
# df_concat.to_csv(r"sil_beta_demo_scorefpd30.csv")

In [1082]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_demo_score', 'deffpd30', 'FPD30')
# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'sil_beta_demo_score', 
#     'deffpd30', 
#     'FPD30',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat,
    'sil_beta_demo_score',
        'deffpd30', 
    'FPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [1083]:
f2 = gini_results.copy()

In [1084]:
f2.head(10)

Unnamed: 0,start_date,end_date,sil_beta_demo_score_FPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2023-01-01,2023-01-31,0.286133,Month,sil_beta_demo_score,v1,FPD30,Overall,ALL
1,2023-01-02,2023-01-08,0.382369,Week,sil_beta_demo_score,v1,FPD30,Overall,ALL
2,2023-01-09,2023-01-15,0.290358,Week,sil_beta_demo_score,v1,FPD30,Overall,ALL
3,2023-01-16,2023-01-22,0.485714,Week,sil_beta_demo_score,v1,FPD30,Overall,ALL
4,2023-01-23,2023-01-29,0.119526,Week,sil_beta_demo_score,v1,FPD30,Overall,ALL
5,2023-01-30,2023-02-05,0.090598,Week,sil_beta_demo_score,v1,FPD30,Overall,ALL
6,2023-02-01,2023-02-28,0.133196,Month,sil_beta_demo_score,v1,FPD30,Overall,ALL
7,2023-02-06,2023-02-12,0.153846,Week,sil_beta_demo_score,v1,FPD30,Overall,ALL
8,2023-02-13,2023-02-19,0.417967,Week,sil_beta_demo_score,v1,FPD30,Overall,ALL
9,2023-02-20,2023-02-26,-0.04771,Week,sil_beta_demo_score,v1,FPD30,Overall,ALL


In [1085]:
f20 = f2[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_demo_score_FPD30_gini']].copy()
f20.rename(columns={'sil_beta_demo_score_FPD30_gini':'FPD30'}, inplace = True)
f20['category'] = 'sil_beta_demo_score_FPD30_v1_all'
f20.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                          loan_type       version  trench_category
sil_beta_demo_score_FPD30_v1_all  Overall         v1       ALL                179
                                  SIL Competitor  v1       ALL                 46
                                  SIL Repeat      v1       ALL                 14
                                  SIL ZERO        v1       ALL                 78
                                  SIL-Instore     v1       ALL                179
dtype: int64

## FSPD30

## Test

In [1086]:
sq = """WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
  modelVersionId, trenchCategory
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
   modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fspd_30 = 1
  )
  select * from base
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory


In [1087]:
df1 = dfd.copy()

## Train

In [1088]:
sq = """ 
WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v1'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
   modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
   modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fspd_30 = 1
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory
0,2071931,e585b35d-bc13-494d-970d-2272d976991b,60820719310019,0.102814,2023-06-01 12:50:57,2023-06-01,2023-06,Train,0,1,SIL-Instore,v1,ALL
1,1846820,73902306-ce07-4625-9d1e-a611eeb83166,60818468200019,0.07975,2023-01-02 10:16:04,2023-01-02,2023-01,Train,0,1,SIL-Instore,v1,ALL
2,2112184,ea999d53-c130-4982-9b65-f38cc33fc1a4,60821121840018,0.233475,2023-06-29 14:25:00,2023-06-29,2023-06,Train,0,1,SIL-Instore,v1,ALL
3,1977804,3fe28d61-53a1-4f6c-8c4e-a505b2322b1a,60819778040014,0.267969,2023-05-11 15:12:46,2023-05-11,2023-05,Train,0,1,SIL-Instore,v1,ALL
4,1886434,b904ad8e-32d0-4094-a9cb-87f7107447fc,60818864340013,0.101886,2023-02-04 18:01:28,2023-02-04,2023-02,Train,0,1,SIL-Instore,v1,ALL


In [1089]:
df2 = dfd.copy()

In [1090]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 206014 entries, 0 to 206013
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             206014 non-null  Int64         
 1   digitalLoanAccountId   206014 non-null  object        
 2   loanAccountNumber      206014 non-null  object        
 3   sil_beta_demo_score    206014 non-null  float64       
 4   appln_submit_datetime  206014 non-null  datetime64[us]
 5   disbursementdate       206014 non-null  dbdate        
 6   Application_month      206014 non-null  object        
 7   Data_selection         206014 non-null  object        
 8   deffspd30              206014 non-null  Int64         
 9   flg_mature_fspd_30     206014 non-null  Int64         
 10  new_loan_type          206014 non-null  object        
 11  modelVersionId         206014 non-null  object        
 12  trenchCategory         206014 non-null  obje

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [1091]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_demo_score'] = pd.to_numeric(df_concat['sil_beta_demo_score'], errors='coerce')

In [1092]:
# df_concat.to_csv(r"sil_beta_demo_scorefspd30.csv")

In [1093]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_demo_score', 'deffspd30', 'FSPD30')
# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'sil_beta_demo_score', 
#     'deffspd30', 
#     'FSPD30',
#     product_column='new_loan_type'
# )                                                             

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_demo_score', 
    'deffspd30', 
    'FSPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [1094]:
f3 = gini_results.copy()

In [1095]:
f3.head(10)

Unnamed: 0,start_date,end_date,sil_beta_demo_score_FSPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2023-01-01,2023-01-31,0.25096,Month,sil_beta_demo_score,v1,FSPD30,Overall,ALL
1,2023-01-02,2023-01-08,0.355795,Week,sil_beta_demo_score,v1,FSPD30,Overall,ALL
2,2023-01-09,2023-01-15,0.235256,Week,sil_beta_demo_score,v1,FSPD30,Overall,ALL
3,2023-01-16,2023-01-22,0.371812,Week,sil_beta_demo_score,v1,FSPD30,Overall,ALL
4,2023-01-23,2023-01-29,0.131773,Week,sil_beta_demo_score,v1,FSPD30,Overall,ALL
5,2023-01-30,2023-02-05,0.066116,Week,sil_beta_demo_score,v1,FSPD30,Overall,ALL
6,2023-02-01,2023-02-28,0.120064,Month,sil_beta_demo_score,v1,FSPD30,Overall,ALL
7,2023-02-06,2023-02-12,0.150435,Week,sil_beta_demo_score,v1,FSPD30,Overall,ALL
8,2023-02-13,2023-02-19,0.309559,Week,sil_beta_demo_score,v1,FSPD30,Overall,ALL
9,2023-02-20,2023-02-26,-0.036571,Week,sil_beta_demo_score,v1,FSPD30,Overall,ALL


In [1096]:
f30 = f3[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_demo_score_FSPD30_gini']].copy()
f30.rename(columns={'sil_beta_demo_score_FSPD30_gini':'FSPD30'}, inplace = True)
f30['category'] = 'sil_beta_demo_score_FSPD30_v1_all'
f30.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                           loan_type       version  trench_category
sil_beta_demo_score_FSPD30_v1_all  Overall         v1       ALL                142
                                   SIL Competitor  v1       ALL                 24
                                   SIL Repeat      v1       ALL                 14
                                   SIL ZERO        v1       ALL                 41
                                   SIL-Instore     v1       ALL                142
dtype: int64

## FSTPD30

## Test

In [1097]:
sq = """WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v1'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score, 
  modelVersionId, trenchCategory
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fstpd_30 = 1
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory
0,3606879,c5b823b4-a2b9-4a3a-836f-cbffbba5bcfd,60836068790011,0.0462201083,2025-08-08 11:20:04,2025-08-08,2025-08,Test,0,1,SIL-Instore,v1,ALL
1,3603572,c62ab930-a2ec-48a1-be00-44f6e56d106c,60836035720012,0.0553451698,2025-08-06 17:01:13,2025-08-06,2025-08,Test,0,1,SIL Competitor,v1,ALL
2,3603221,67c928fa-e469-4471-b6fa-0ccfc459d6b5,60836032210012,0.0827231193,2025-08-06 15:08:17,2025-08-06,2025-08,Test,0,1,SIL-Instore,v1,ALL
3,3452144,e5ced157-e148-4f14-8843-9e9a78bd3455,60834521440015,0.074376438,2025-05-22 19:37:58,2025-05-22,2025-05,Test,0,1,SIL-Instore,v1,ALL
4,3381741,64dc0d73-2ee4-402f-a011-75b2a711d917,60833817410015,0.0892592066,2025-04-15 18:26:12,2025-04-15,2025-04,Test,0,1,SIL-Instore,v1,ALL


In [1098]:
df1 = dfd.copy()

## Train

In [1099]:
sq = """ 
WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v1'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
  modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
 modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fstpd_30 = 1
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory
0,2033056,ca535f9e-4c93-4d7b-8590-204375be8582,60820330560019,0.071975,2023-05-07 13:24:43,2023-05-07,2023-05,Train,0,1,SIL-Instore,v1,ALL
1,1269032,8569c6a5-74be-439a-9fe3-d86fb82c3e7b,60812690320015,0.212552,2023-01-23 16:09:44,2023-01-23,2023-01,Train,0,1,SIL-Instore,v1,ALL
2,1087315,9fa761ba-f20e-4ecc-9bf2-5df7ed14d78a,60810873150013,0.287521,2023-03-31 10:43:00,2023-03-31,2023-03,Train,0,1,SIL-Instore,v1,ALL
3,2096155,5d846bd4-85c4-4b58-94ee-536a38f15a4b,60820961550013,0.168554,2023-06-16 18:30:55,2023-06-16,2023-06,Train,0,1,SIL-Instore,v1,ALL
4,2032759,1bfc5662-ba92-45f3-8dfc-c04f194147e3,60820327590019,0.098765,2023-05-07 10:28:07,2023-05-07,2023-05,Train,1,1,SIL-Instore,v1,ALL


In [1100]:
df2 = dfd.copy()

In [1101]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 236609 entries, 0 to 236608
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             236609 non-null  object        
 1   digitalLoanAccountId   236609 non-null  object        
 2   loanAccountNumber      236609 non-null  object        
 3   sil_beta_demo_score    236609 non-null  object        
 4   appln_submit_datetime  236609 non-null  datetime64[us]
 5   disbursementdate       236609 non-null  dbdate        
 6   Application_month      236609 non-null  object        
 7   Data_selection         236609 non-null  object        
 8   deffstpd30             236609 non-null  Int64         
 9   flg_mature_fstpd_30    236609 non-null  Int64         
 10  new_loan_type          236609 non-null  object        
 11  modelVersionId         236609 non-null  object        
 12  trenchCategory         236609 non-null  obje

In [1102]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_demo_score'] = pd.to_numeric(df_concat['sil_beta_demo_score'], errors='coerce')

In [1103]:
# df_concat.to_csv(r"sil_beta_demo_scorefstpd30.csv")

In [1104]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_demo_score', 'deffstpd30', 'FSTPD30')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_demo_score', 
    'deffstpd30', 
    'FSTPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [1105]:
f4 = gini_results.copy()

In [1106]:
f4.head(10)

Unnamed: 0,start_date,end_date,sil_beta_demo_score_FSTPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2023-01-01,2023-01-31,0.253371,Month,sil_beta_demo_score,v1,FSTPD30,Overall,ALL
1,2023-01-02,2023-01-08,0.335695,Week,sil_beta_demo_score,v1,FSTPD30,Overall,ALL
2,2023-01-09,2023-01-15,0.219488,Week,sil_beta_demo_score,v1,FSTPD30,Overall,ALL
3,2023-01-16,2023-01-22,0.363712,Week,sil_beta_demo_score,v1,FSTPD30,Overall,ALL
4,2023-01-23,2023-01-29,0.146972,Week,sil_beta_demo_score,v1,FSTPD30,Overall,ALL
5,2023-01-30,2023-02-05,0.121693,Week,sil_beta_demo_score,v1,FSTPD30,Overall,ALL
6,2023-02-01,2023-02-28,0.128934,Month,sil_beta_demo_score,v1,FSTPD30,Overall,ALL
7,2023-02-06,2023-02-12,0.118855,Week,sil_beta_demo_score,v1,FSTPD30,Overall,ALL
8,2023-02-13,2023-02-19,0.343576,Week,sil_beta_demo_score,v1,FSTPD30,Overall,ALL
9,2023-02-20,2023-02-26,-0.04775,Week,sil_beta_demo_score,v1,FSTPD30,Overall,ALL


In [1107]:
f40 = f4[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_demo_score_FSTPD30_gini']].copy()
f40.rename(columns={'sil_beta_demo_score_FSTPD30_gini':'FSTPD30'}, inplace = True)
f40['category'] = 'sil_beta_demo_score_FSTPD30_v1_all'
f40.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                            loan_type       version  trench_category
sil_beta_demo_score_FSTPD30_v1_all  Overall         v1       ALL                168
                                    SIL Competitor  v1       ALL                 35
                                    SIL Repeat      v1       ALL                 14
                                    SIL ZERO        v1       ALL                 67
                                    SIL-Instore     v1       ALL                168
dtype: int64

## combining the dataframe

In [1108]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type', 'bad_rate', 'trench_category']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'sil_beta_demo_score_FPD0_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'trench_category', 'sil_beta_demo_score_FPD10_gini',
       'sil_beta_demo_score_FPD30_gini',
       'sil_beta_demo_score_FSPD30_gini',
       'sil_beta_demo_score_FSTPD30_gini'], dtype=object)

In [1109]:
final_df.rename(columns={'trench_category':'Trench_category',
'sil_beta_demo_score_FPD0_gini':'sil_beta_demo_score_FPD0_v1_all_gini',
'sil_beta_demo_score_FPD10_gini':'sil_beta_demo_score_FPD10_v1_all_gini',
'sil_beta_demo_score_FPD30_gini':'sil_beta_demo_score_FPD30_v1_all_gini',
'sil_beta_demo_score_FSPD30_gini':'sil_beta_demo_score_FSPD30_v1_all_gini', 
'sil_beta_demo_score_FSTPD30_gini':'sil_beta_demo_score_FSTPD30_v1_all_gini'
}, inplace=True)
final_df.columns

Index(['start_date', 'end_date', 'sil_beta_demo_score_FPD0_v1_all_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'Trench_category', 'sil_beta_demo_score_FPD10_v1_all_gini',
       'sil_beta_demo_score_FPD30_v1_all_gini',
       'sil_beta_demo_score_FSPD30_v1_all_gini',
       'sil_beta_demo_score_FSTPD30_v1_all_gini'],
      dtype='object')

In [1110]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'loan_type', 'bad_rate', 'Trench_category'
,'sil_beta_demo_score_FPD0_v1_all_gini','sil_beta_demo_score_FPD10_v1_all_gini', 'sil_beta_demo_score_FPD30_v1_all_gini',
       'sil_beta_demo_score_FSPD30_v1_all_gini',
       'sil_beta_demo_score_FSTPD30_v1_all_gini']].copy()
final_df['Model_display_name'] = 'beta_demo_model_sil'
final_df['Product_type'] = 'SIL'
final_df.dtypes

start_date                                 datetime64[ns]
end_date                                   datetime64[ns]
period                                             object
Model_Name                                         object
version                                            object
loan_type                                          object
bad_rate                                           object
Trench_category                                    object
sil_beta_demo_score_FPD0_v1_all_gini              float64
sil_beta_demo_score_FPD10_v1_all_gini             float64
sil_beta_demo_score_FPD30_v1_all_gini             float64
sil_beta_demo_score_FSPD30_v1_all_gini            float64
sil_beta_demo_score_FSTPD30_v1_all_gini           float64
Model_display_name                                 object
Product_type                                       object
dtype: object

In [1111]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,loan_type,bad_rate,Trench_category,sil_beta_demo_score_FPD0_v1_all_gini,sil_beta_demo_score_FPD10_v1_all_gini,sil_beta_demo_score_FPD30_v1_all_gini,sil_beta_demo_score_FSPD30_v1_all_gini,sil_beta_demo_score_FSTPD30_v1_all_gini,Model_display_name,Product_type
0,2023-01-01,2023-01-31,Month,sil_beta_demo_score,v1,Overall,FPD0,ALL,0.141958,,,,,beta_demo_model_sil,SIL
1,2023-01-02,2023-01-08,Week,sil_beta_demo_score,v1,Overall,FPD0,ALL,0.279249,,,,,beta_demo_model_sil,SIL
2,2023-01-09,2023-01-15,Week,sil_beta_demo_score,v1,Overall,FPD0,ALL,0.118033,,,,,beta_demo_model_sil,SIL
3,2023-01-16,2023-01-22,Week,sil_beta_demo_score,v1,Overall,FPD0,ALL,0.15302,,,,,beta_demo_model_sil,SIL
4,2023-01-23,2023-01-29,Week,sil_beta_demo_score,v1,Overall,FPD0,ALL,-0.006286,,,,,beta_demo_model_sil,SIL


In [1112]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.beta_demo_model_sil_v1_all_gini_v1"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=ef1316a1-8a73-4272-be22-c86111ea8ddb>

In [1113]:
import functools

dataframes = [f01, f10, f20, f30, f40]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type','trench_category', 'bad_rate', 'category']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

demoscoresilv1all = functools.reduce(merge_dataframes, dataframes)

demoscoresilv1all.columns.values

array(['start_date', 'end_date', 'period', 'Model_Name', 'version',
       'bad_rate', 'loan_type', 'trench_category', 'FPD0', 'category',
       'FPD10', 'FPD30', 'FSPD30', 'FSTPD30'], dtype=object)

In [1114]:
result = pd.concat([cicsilscorev1all, 
                    cicsilscorev2t1, 
                    cicsilscorev2t2, 
                    cicsilscorev2t3, 
                    alphastacksilv1all,
                    alphastacksilv2t1,
                    alphastacksilv2t2,
                    alphastacksilv2t3,
                    betastacksilv1all,
                    betastacksilv2t1,
                    betastacksilv2t2,
                    betastacksilv2t3,
                    appscoresilv1all,
                    appscoresilv2t1,
                    appscoresilv2t2,
                    appscoresilv2t3,
                    demoscoresilv1all,
                    ], ignore_index=True)
print(f"The shape of the concatenated dataframe result is:\t {result.shape}")
result.head()

The shape of the concatenated dataframe result is:	 (28275, 14)


Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


In [1115]:
result[['Model_Name', 'version', 'category']].value_counts(dropna=False).sort_index(ascending = False)

Model_Name             version  category                            
sil_beta_stack_score   v2       beta_stack_model_sil_fstpd30_v2_t3      249
                                beta_stack_model_sil_fstpd30_v2_t2      244
                                beta_stack_model_sil_fstpd30_v2_t1      251
                                beta_stack_model_sil_fspd30_v2_t3       272
                                beta_stack_model_sil_fspd30_v2_t2       264
                                beta_stack_model_sil_fspd30_v2_t1       275
                                beta_stack_model_sil_fpd30_v2_t3        292
                                beta_stack_model_sil_fpd30_v2_t2        285
                                beta_stack_model_sil_fpd30_v2_t1        295
                                beta_stack_model_sil_fpd10_v2_t3        296
                                beta_stack_model_sil_fpd10_v2_t2        292
                                beta_stack_model_sil_fpd10_v2_t1        301
                   

In [1116]:
result['Model_Name'].value_counts(dropna=False)

Model_Name
sil_beta_stack_score     6649
sil_beta_app_score       6565
Sil_Alpha_Stack_score    6399
Alpha_cic_sil_score      6331
sil_beta_demo_score      2331
Name: count, dtype: int64

In [1117]:
masterdf = result.copy()
print(f"The shape of masterdf is:\t {masterdf.shape}")
masterdf.head()

The shape of masterdf is:	 (28275, 14)


Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


## V2

## Trench 1

## FPD0

## Test

In [1118]:
sq = """WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
  modelVersionId, trenchCategory,
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fpd0 = 1
  )
  select * from base where trenchCategory = 'Trench 1'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory


In [1119]:
df1 = dfd.copy()

## Train

In [1120]:
sq = """ 
WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
  modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fpd0 = 1
  )
  select * from base where trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory
0,3206286,6b2d3c9c-efd4-4d62-a4d3-db28826ffd7a,60832062860017,0.496388,2025-11-15 10:23:43,2025-01-21,2025-11,Train,0,1,SIL-Instore,v2,Trench 1
1,2812017,df809fae-2184-4654-b3b6-771c5b5fd2ab,60828120170016,0.636443,2024-08-31 18:59:01,2024-08-31,2024-08,Train,0,1,SIL-Instore,v2,Trench 1
2,2755726,94965dff-18a1-4768-b2be-f274560811cc,60827557260014,0.376856,2024-08-15 12:08:41,2024-08-15,2024-08,Train,0,1,SIL-Instore,v2,Trench 1
3,2788431,f58f08af-a7b7-4d34-8b1d-e6ee614f117f,60827884310019,0.228,2024-08-24 19:00:10,2024-08-24,2024-08,Train,0,1,SIL-Instore,v2,Trench 1
4,2744696,5864d3d3-df54-4116-bc52-d4f5f4a7cef4,60827446960016,0.332302,2024-08-11 18:52:20,2024-08-11,2024-08,Train,0,1,SIL-Instore,v2,Trench 1


In [1121]:
df2 = dfd.copy()

In [1122]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 213250 entries, 0 to 213249
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             213250 non-null  Int64         
 1   digitalLoanAccountId   213250 non-null  object        
 2   loanAccountNumber      213250 non-null  object        
 3   sil_beta_demo_score    213250 non-null  float64       
 4   appln_submit_datetime  213250 non-null  datetime64[us]
 5   disbursementdate       213250 non-null  dbdate        
 6   Application_month      213250 non-null  object        
 7   Data_selection         213250 non-null  object        
 8   deffpd0                213250 non-null  Int64         
 9   flg_mature_fpd0        213250 non-null  Int64         
 10  new_loan_type          213250 non-null  object        
 11  modelVersionId         213250 non-null  object        
 12  trenchCategory         213250 non-null  obje

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [1123]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_demo_score'] = pd.to_numeric(df_concat['sil_beta_demo_score'], errors='coerce')

In [1124]:
# df_concat.to_csv(r"sil_beta_demo_scorefpd0.csv")

In [1125]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_demo_score', 'deffpd0', 'FPD0')
# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'sil_beta_demo_score', 
#     'deffpd0', 
#     'FPD0',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_demo_score', 
    'deffpd0', 
    'FPD0',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [1126]:
f0 = gini_results.copy()

In [1127]:
f0.head(10)

Unnamed: 0,start_date,end_date,sil_beta_demo_score_FPD0_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-07-29,2024-08-04,0.168274,Week,sil_beta_demo_score,v2,FPD0,Overall,Trench 1
1,2024-08-01,2024-08-31,0.205176,Month,sil_beta_demo_score,v2,FPD0,Overall,Trench 1
2,2024-08-05,2024-08-11,0.21809,Week,sil_beta_demo_score,v2,FPD0,Overall,Trench 1
3,2024-08-12,2024-08-18,0.237965,Week,sil_beta_demo_score,v2,FPD0,Overall,Trench 1
4,2024-08-19,2024-08-25,0.210336,Week,sil_beta_demo_score,v2,FPD0,Overall,Trench 1
5,2024-08-26,2024-09-01,0.183367,Week,sil_beta_demo_score,v2,FPD0,Overall,Trench 1
6,2024-09-01,2024-09-30,0.210041,Month,sil_beta_demo_score,v2,FPD0,Overall,Trench 1
7,2024-09-02,2024-09-08,0.198951,Week,sil_beta_demo_score,v2,FPD0,Overall,Trench 1
8,2024-09-09,2024-09-15,0.232366,Week,sil_beta_demo_score,v2,FPD0,Overall,Trench 1
9,2024-09-16,2024-09-22,0.185164,Week,sil_beta_demo_score,v2,FPD0,Overall,Trench 1


In [1128]:
f01 = f0[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_demo_score_FPD0_gini']].copy()
f01.rename(columns={'sil_beta_demo_score_FPD0_gini':'FPD0'}, inplace = True)
f01['category'] = 'sil_beta_demo_score_FPD0_v2_t1'
f01.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                        loan_type       version  trench_category
sil_beta_demo_score_FPD0_v2_t1  Overall         v2       Trench 1           80
                                SIL Competitor  v2       Trench 1           63
                                SIL ZERO        v2       Trench 1           80
                                SIL-Instore     v2       Trench 1           79
dtype: int64

## FPD10

## Test

In [1129]:
sq = """WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
  modelVersionId, trenchCategory
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fpd10 = 1
  )
  select * from base where trenchCategory = 'Trench 1'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory


In [1130]:
df1 = dfd.copy()

## Train

In [1131]:
sq = """ 
WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
  modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fpd10 = 1
  )
  select * from base where trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory
0,3206286,6b2d3c9c-efd4-4d62-a4d3-db28826ffd7a,60832062860017,0.496388,2025-11-15 10:23:43,2025-01-21,2025-11,Train,0,1,SIL-Instore,v2,Trench 1
1,2794134,3c7e8bec-d91f-44ee-bd67-1139565a6b37,60827941340012,0.569275,2024-08-26 14:00:09,2024-08-26,2024-08,Train,1,1,SIL-Instore,v2,Trench 1
2,2721030,a6af46be-9edd-4b82-885a-cea7e56a5d30,60827210300018,0.659967,2024-08-04 15:05:31,2024-08-04,2024-08,Train,0,1,SIL-Instore,v2,Trench 1
3,2804104,7ad822af-3d57-4732-b3b8-543891bfb207,60828041040017,0.559946,2024-08-29 13:34:51,2024-08-29,2024-08,Train,1,1,SIL-Instore,v2,Trench 1
4,2789672,cb66a339-ca39-4649-a892-4114e6f7265b,60827896720018,0.641302,2024-08-25 10:56:23,2024-08-25,2024-08,Train,0,1,SIL-Instore,v2,Trench 1


In [1132]:
df2 = dfd.copy()

In [1133]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 213250 entries, 0 to 213249
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             213250 non-null  Int64         
 1   digitalLoanAccountId   213250 non-null  object        
 2   loanAccountNumber      213250 non-null  object        
 3   sil_beta_demo_score    213250 non-null  float64       
 4   appln_submit_datetime  213250 non-null  datetime64[us]
 5   disbursementdate       213250 non-null  dbdate        
 6   Application_month      213250 non-null  object        
 7   Data_selection         213250 non-null  object        
 8   deffpd10               213250 non-null  Int64         
 9   flg_mature_fpd10       213250 non-null  Int64         
 10  new_loan_type          213250 non-null  object        
 11  modelVersionId         213250 non-null  object        
 12  trenchCategory         213250 non-null  obje

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [1134]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_demo_score'] = pd.to_numeric(df_concat['sil_beta_demo_score'], errors='coerce')

In [1135]:
# df_concat.to_csv(r"sil_beta_demo_scorefpd10.csv")

In [1136]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_demo_score', 'deffpd10', 'FPD10')
# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'sil_beta_demo_score', 
#     'deffpd10', 
#     'FPD10',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_demo_score', 
    'deffpd10', 
    'FPD10',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [1137]:
f1 = gini_results.copy()

In [1138]:
f1.head(10)

Unnamed: 0,start_date,end_date,sil_beta_demo_score_FPD10_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-07-29,2024-08-04,0.231188,Week,sil_beta_demo_score,v2,FPD10,Overall,Trench 1
1,2024-08-01,2024-08-31,0.26573,Month,sil_beta_demo_score,v2,FPD10,Overall,Trench 1
2,2024-08-05,2024-08-11,0.270589,Week,sil_beta_demo_score,v2,FPD10,Overall,Trench 1
3,2024-08-12,2024-08-18,0.264299,Week,sil_beta_demo_score,v2,FPD10,Overall,Trench 1
4,2024-08-19,2024-08-25,0.285619,Week,sil_beta_demo_score,v2,FPD10,Overall,Trench 1
5,2024-08-26,2024-09-01,0.258792,Week,sil_beta_demo_score,v2,FPD10,Overall,Trench 1
6,2024-09-01,2024-09-30,0.270889,Month,sil_beta_demo_score,v2,FPD10,Overall,Trench 1
7,2024-09-02,2024-09-08,0.264672,Week,sil_beta_demo_score,v2,FPD10,Overall,Trench 1
8,2024-09-09,2024-09-15,0.307507,Week,sil_beta_demo_score,v2,FPD10,Overall,Trench 1
9,2024-09-16,2024-09-22,0.251742,Week,sil_beta_demo_score,v2,FPD10,Overall,Trench 1


In [1139]:
f10 = f1[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_demo_score_FPD10_gini']].copy()
f10.rename(columns={'sil_beta_demo_score_FPD10_gini':'FPD10'}, inplace = True)
f10['category'] = 'sil_beta_demo_score_FPD10_v2_t1'
f10.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                         loan_type       version  trench_category
sil_beta_demo_score_FPD10_v2_t1  Overall         v2       Trench 1           80
                                 SIL Competitor  v2       Trench 1           63
                                 SIL ZERO        v2       Trench 1           80
                                 SIL-Instore     v2       Trench 1           79
dtype: int64

## FPD30

## Test

In [1140]:
sq = """WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
  modelVersionId, trenchCategory
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fpd30 = 1
  )
  select * from base where trenchCategory = 'Trench 1'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory


In [1141]:
df1 = dfd.copy()

## Train

In [1142]:
sq = """ 
WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
  modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fpd30 = 1
  )
  select * from base where trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory
0,3206286,6b2d3c9c-efd4-4d62-a4d3-db28826ffd7a,60832062860017,0.496388,2025-11-15 10:23:43,2025-01-21,2025-11,Train,0,1,SIL-Instore,v2,Trench 1
1,2811986,5e68b21a-8ddb-436e-b0b5-f1b2af5fa318,60828119860012,0.4238,2024-08-31 18:52:06,2024-08-31,2024-08,Train,0,1,SIL ZERO,v2,Trench 1
2,2784995,87bf8950-4415-4c7f-bdd3-eade2e247690,60827849950014,0.427894,2024-08-23 18:43:26,2024-08-23,2024-08,Train,0,1,SIL ZERO,v2,Trench 1
3,2808529,41b1cd2d-3a34-4f5a-a4fc-444ddab59873,60828085290014,0.572579,2024-08-30 18:39:55,2024-08-30,2024-08,Train,0,1,SIL ZERO,v2,Trench 1
4,2760131,b3aa4afa-65a8-4bd2-b975-24f3db218d37,60827601310018,0.322471,2024-08-16 15:26:18,2024-08-16,2024-08,Train,0,1,SIL ZERO,v2,Trench 1


In [1143]:
df2 = dfd.copy()

In [1144]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 208492 entries, 0 to 208491
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             208492 non-null  Int64         
 1   digitalLoanAccountId   208492 non-null  object        
 2   loanAccountNumber      208492 non-null  object        
 3   sil_beta_demo_score    208492 non-null  float64       
 4   appln_submit_datetime  208492 non-null  datetime64[us]
 5   disbursementdate       208492 non-null  dbdate        
 6   Application_month      208492 non-null  object        
 7   Data_selection         208492 non-null  object        
 8   deffpd30               208492 non-null  Int64         
 9   flg_mature_fpd30       208492 non-null  Int64         
 10  new_loan_type          208492 non-null  object        
 11  modelVersionId         208492 non-null  object        
 12  trenchCategory         208492 non-null  obje

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [1145]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_demo_score'] = pd.to_numeric(df_concat['sil_beta_demo_score'], errors='coerce')

In [1146]:
# df_concat.to_csv(r"sil_beta_demo_scorefpd30.csv")

In [1147]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_demo_score', 'deffpd30', 'FPD30')
# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'sil_beta_demo_score', 
#     'deffpd30', 
#     'FPD30',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat,
    'sil_beta_demo_score',
        'deffpd30', 
    'FPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [1148]:
f2 = gini_results.copy()

In [1149]:
f2.head(10)

Unnamed: 0,start_date,end_date,sil_beta_demo_score_FPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-07-29,2024-08-04,0.270218,Week,sil_beta_demo_score,v2,FPD30,Overall,Trench 1
1,2024-08-01,2024-08-31,0.281032,Month,sil_beta_demo_score,v2,FPD30,Overall,Trench 1
2,2024-08-05,2024-08-11,0.275998,Week,sil_beta_demo_score,v2,FPD30,Overall,Trench 1
3,2024-08-12,2024-08-18,0.289641,Week,sil_beta_demo_score,v2,FPD30,Overall,Trench 1
4,2024-08-19,2024-08-25,0.300175,Week,sil_beta_demo_score,v2,FPD30,Overall,Trench 1
5,2024-08-26,2024-09-01,0.280547,Week,sil_beta_demo_score,v2,FPD30,Overall,Trench 1
6,2024-09-01,2024-09-30,0.291224,Month,sil_beta_demo_score,v2,FPD30,Overall,Trench 1
7,2024-09-02,2024-09-08,0.274611,Week,sil_beta_demo_score,v2,FPD30,Overall,Trench 1
8,2024-09-09,2024-09-15,0.326392,Week,sil_beta_demo_score,v2,FPD30,Overall,Trench 1
9,2024-09-16,2024-09-22,0.281685,Week,sil_beta_demo_score,v2,FPD30,Overall,Trench 1


In [1150]:
f20 = f2[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_demo_score_FPD30_gini']].copy()
f20.rename(columns={'sil_beta_demo_score_FPD30_gini':'FPD30'}, inplace = True)
f20['category'] = 'sil_beta_demo_score_FPD30_v2_t1'
f20.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                         loan_type       version  trench_category
sil_beta_demo_score_FPD30_v2_t1  Overall         v2       Trench 1           78
                                 SIL Competitor  v2       Trench 1           61
                                 SIL ZERO        v2       Trench 1           78
                                 SIL-Instore     v2       Trench 1           78
dtype: int64

## FSPD30

## Test

In [1151]:
sq = """WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
  modelVersionId, trenchCategory
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
   modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fspd_30 = 1
  )
  select * from base where trenchCategory = 'Trench 1'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory


In [1152]:
df1 = dfd.copy()

## Train

In [1153]:
sq = """ 
WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
   modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
   modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fspd_30 = 1
  )
  select * from base where trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory
0,3206286,6b2d3c9c-efd4-4d62-a4d3-db28826ffd7a,60832062860017,0.496388,2025-11-15 10:23:43,2025-01-21,2025-11,Train,0,1,SIL-Instore,v2,Trench 1
1,2811986,5e68b21a-8ddb-436e-b0b5-f1b2af5fa318,60828119860012,0.4238,2024-08-31 18:52:06,2024-08-31,2024-08,Train,0,1,SIL ZERO,v2,Trench 1
2,2784995,87bf8950-4415-4c7f-bdd3-eade2e247690,60827849950014,0.427894,2024-08-23 18:43:26,2024-08-23,2024-08,Train,0,1,SIL ZERO,v2,Trench 1
3,2808529,41b1cd2d-3a34-4f5a-a4fc-444ddab59873,60828085290014,0.572579,2024-08-30 18:39:55,2024-08-30,2024-08,Train,0,1,SIL ZERO,v2,Trench 1
4,2760131,b3aa4afa-65a8-4bd2-b975-24f3db218d37,60827601310018,0.322471,2024-08-16 15:26:18,2024-08-16,2024-08,Train,0,1,SIL ZERO,v2,Trench 1


In [1154]:
df2 = dfd.copy()

In [1155]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 196009 entries, 0 to 196008
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             196009 non-null  Int64         
 1   digitalLoanAccountId   196009 non-null  object        
 2   loanAccountNumber      196009 non-null  object        
 3   sil_beta_demo_score    196009 non-null  float64       
 4   appln_submit_datetime  196009 non-null  datetime64[us]
 5   disbursementdate       196009 non-null  dbdate        
 6   Application_month      196009 non-null  object        
 7   Data_selection         196009 non-null  object        
 8   deffspd30              196009 non-null  Int64         
 9   flg_mature_fspd_30     196009 non-null  Int64         
 10  new_loan_type          196009 non-null  object        
 11  modelVersionId         196009 non-null  object        
 12  trenchCategory         196009 non-null  obje

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [1156]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_demo_score'] = pd.to_numeric(df_concat['sil_beta_demo_score'], errors='coerce')

In [1157]:
# df_concat.to_csv(r"sil_beta_demo_scorefspd30.csv")

In [1158]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_demo_score', 'deffspd30', 'FSPD30')
# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'sil_beta_demo_score', 
#     'deffspd30', 
#     'FSPD30',
#     product_column='new_loan_type'
# )                                                             

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_demo_score', 
    'deffspd30', 
    'FSPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [1159]:
f3 = gini_results.copy()

In [1160]:
f3.head(10)

Unnamed: 0,start_date,end_date,sil_beta_demo_score_FSPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-07-29,2024-08-04,0.294607,Week,sil_beta_demo_score,v2,FSPD30,Overall,Trench 1
1,2024-08-01,2024-08-31,0.293022,Month,sil_beta_demo_score,v2,FSPD30,Overall,Trench 1
2,2024-08-05,2024-08-11,0.271511,Week,sil_beta_demo_score,v2,FSPD30,Overall,Trench 1
3,2024-08-12,2024-08-18,0.274348,Week,sil_beta_demo_score,v2,FSPD30,Overall,Trench 1
4,2024-08-19,2024-08-25,0.316393,Week,sil_beta_demo_score,v2,FSPD30,Overall,Trench 1
5,2024-08-26,2024-09-01,0.315728,Week,sil_beta_demo_score,v2,FSPD30,Overall,Trench 1
6,2024-09-01,2024-09-30,0.316201,Month,sil_beta_demo_score,v2,FSPD30,Overall,Trench 1
7,2024-09-02,2024-09-08,0.332711,Week,sil_beta_demo_score,v2,FSPD30,Overall,Trench 1
8,2024-09-09,2024-09-15,0.340255,Week,sil_beta_demo_score,v2,FSPD30,Overall,Trench 1
9,2024-09-16,2024-09-22,0.285471,Week,sil_beta_demo_score,v2,FSPD30,Overall,Trench 1


In [1161]:
f30 = f3[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_demo_score_FSPD30_gini']].copy()
f30.rename(columns={'sil_beta_demo_score_FSPD30_gini':'FSPD30'}, inplace = True)
f30['category'] = 'sil_beta_demo_score_FSPD30_v2_t1'
f30.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                          loan_type       version  trench_category
sil_beta_demo_score_FSPD30_v2_t1  Overall         v2       Trench 1           73
                                  SIL Competitor  v2       Trench 1           56
                                  SIL ZERO        v2       Trench 1           73
                                  SIL-Instore     v2       Trench 1           73
dtype: int64

## FSTPD30

## Test

In [1162]:
sq = """WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score, 
  modelVersionId, trenchCategory
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fstpd_30 = 1
  )
  select * from base where trenchCategory = 'Trench 1'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory


In [1163]:
df1 = dfd.copy()

## Train

In [1164]:
sq = """ 
WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
  modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
 modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fstpd_30 = 1
  )
  select * from base where trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory
0,3206286,6b2d3c9c-efd4-4d62-a4d3-db28826ffd7a,60832062860017,0.496388,2025-11-15 10:23:43,2025-01-21,2025-11,Train,0,1,SIL-Instore,v2,Trench 1
1,2811986,5e68b21a-8ddb-436e-b0b5-f1b2af5fa318,60828119860012,0.4238,2024-08-31 18:52:06,2024-08-31,2024-08,Train,1,1,SIL ZERO,v2,Trench 1
2,2784995,87bf8950-4415-4c7f-bdd3-eade2e247690,60827849950014,0.427894,2024-08-23 18:43:26,2024-08-23,2024-08,Train,1,1,SIL ZERO,v2,Trench 1
3,2808529,41b1cd2d-3a34-4f5a-a4fc-444ddab59873,60828085290014,0.572579,2024-08-30 18:39:55,2024-08-30,2024-08,Train,0,1,SIL ZERO,v2,Trench 1
4,2760131,b3aa4afa-65a8-4bd2-b975-24f3db218d37,60827601310018,0.322471,2024-08-16 15:26:18,2024-08-16,2024-08,Train,0,1,SIL ZERO,v2,Trench 1


In [1165]:
df2 = dfd.copy()

In [1166]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 182673 entries, 0 to 182672
Data columns (total 13 columns):
 #   Column                 Non-Null Count   Dtype         
---  ------                 --------------   -----         
 0   customerId             182673 non-null  Int64         
 1   digitalLoanAccountId   182673 non-null  object        
 2   loanAccountNumber      182673 non-null  object        
 3   sil_beta_demo_score    182673 non-null  float64       
 4   appln_submit_datetime  182673 non-null  datetime64[us]
 5   disbursementdate       182673 non-null  dbdate        
 6   Application_month      182673 non-null  object        
 7   Data_selection         182673 non-null  object        
 8   deffstpd30             182673 non-null  Int64         
 9   flg_mature_fstpd_30    182673 non-null  Int64         
 10  new_loan_type          182673 non-null  object        
 11  modelVersionId         182673 non-null  object        
 12  trenchCategory         182673 non-null  obje

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [1167]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_demo_score'] = pd.to_numeric(df_concat['sil_beta_demo_score'], errors='coerce')

In [1168]:
# df_concat.to_csv(r"sil_beta_demo_scorefstpd30.csv")

In [1169]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_demo_score', 'deffstpd30', 'FSTPD30')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_demo_score', 
    'deffstpd30', 
    'FSTPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [1170]:
f4 = gini_results.copy()

In [1171]:
f4.head(10)

Unnamed: 0,start_date,end_date,sil_beta_demo_score_FSTPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-07-29,2024-08-04,0.243295,Week,sil_beta_demo_score,v2,FSTPD30,Overall,Trench 1
1,2024-08-01,2024-08-31,0.269567,Month,sil_beta_demo_score,v2,FSTPD30,Overall,Trench 1
2,2024-08-05,2024-08-11,0.267788,Week,sil_beta_demo_score,v2,FSTPD30,Overall,Trench 1
3,2024-08-12,2024-08-18,0.248642,Week,sil_beta_demo_score,v2,FSTPD30,Overall,Trench 1
4,2024-08-19,2024-08-25,0.2983,Week,sil_beta_demo_score,v2,FSTPD30,Overall,Trench 1
5,2024-08-26,2024-09-01,0.295662,Week,sil_beta_demo_score,v2,FSTPD30,Overall,Trench 1
6,2024-09-01,2024-09-30,0.299529,Month,sil_beta_demo_score,v2,FSTPD30,Overall,Trench 1
7,2024-09-02,2024-09-08,0.32952,Week,sil_beta_demo_score,v2,FSTPD30,Overall,Trench 1
8,2024-09-09,2024-09-15,0.308249,Week,sil_beta_demo_score,v2,FSTPD30,Overall,Trench 1
9,2024-09-16,2024-09-22,0.274893,Week,sil_beta_demo_score,v2,FSTPD30,Overall,Trench 1


In [1172]:
f40 = f4[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_demo_score_FSTPD30_gini']].copy()
f40.rename(columns={'sil_beta_demo_score_FSTPD30_gini':'FSTPD30'}, inplace = True)
f40['category'] = 'sil_beta_demo_score_FSTPD30_v2_t1'
f40.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                           loan_type       version  trench_category
sil_beta_demo_score_FSTPD30_v2_t1  Overall         v2       Trench 1           67
                                   SIL Competitor  v2       Trench 1           50
                                   SIL ZERO        v2       Trench 1           67
                                   SIL-Instore     v2       Trench 1           67
dtype: int64

## combining the dataframe

In [1173]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type', 'bad_rate', 'trench_category']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'sil_beta_demo_score_FPD0_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'trench_category', 'sil_beta_demo_score_FPD10_gini',
       'sil_beta_demo_score_FPD30_gini',
       'sil_beta_demo_score_FSPD30_gini',
       'sil_beta_demo_score_FSTPD30_gini'], dtype=object)

In [1174]:
final_df.rename(columns={'trench_category':'Trench_category',
'sil_beta_demo_score_FPD0_gini':'sil_beta_demo_score_FPD0_v2_t1_gini',
'sil_beta_demo_score_FPD10_gini':'sil_beta_demo_score_FPD10_v2_t1_gini',
'sil_beta_demo_score_FPD30_gini':'sil_beta_demo_score_FPD30_v2_t1_gini',
'sil_beta_demo_score_FSPD30_gini':'sil_beta_demo_score_FSPD30_v2_t1_gini', 
'sil_beta_demo_score_FSTPD30_gini':'sil_beta_demo_score_FSTPD30_v2_t1_gini'
}, inplace=True)
final_df.columns

Index(['start_date', 'end_date', 'sil_beta_demo_score_FPD0_v2_t1_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'Trench_category', 'sil_beta_demo_score_FPD10_v2_t1_gini',
       'sil_beta_demo_score_FPD30_v2_t1_gini',
       'sil_beta_demo_score_FSPD30_v2_t1_gini',
       'sil_beta_demo_score_FSTPD30_v2_t1_gini'],
      dtype='object')

In [1175]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'loan_type', 'bad_rate', 'Trench_category'
,'sil_beta_demo_score_FPD0_v2_t1_gini','sil_beta_demo_score_FPD10_v2_t1_gini', 'sil_beta_demo_score_FPD30_v2_t1_gini',
       'sil_beta_demo_score_FSPD30_v2_t1_gini',
       'sil_beta_demo_score_FSTPD30_v2_t1_gini']].copy()
final_df['Model_display_name'] = 'beta_demo_model_sil'
final_df['Product_type'] = 'SIL'
final_df.dtypes

start_date                                datetime64[ns]
end_date                                  datetime64[ns]
period                                            object
Model_Name                                        object
version                                           object
loan_type                                         object
bad_rate                                          object
Trench_category                                   object
sil_beta_demo_score_FPD0_v2_t1_gini              float64
sil_beta_demo_score_FPD10_v2_t1_gini             float64
sil_beta_demo_score_FPD30_v2_t1_gini             float64
sil_beta_demo_score_FSPD30_v2_t1_gini            float64
sil_beta_demo_score_FSTPD30_v2_t1_gini           float64
Model_display_name                                object
Product_type                                      object
dtype: object

In [1176]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,loan_type,bad_rate,Trench_category,sil_beta_demo_score_FPD0_v2_t1_gini,sil_beta_demo_score_FPD10_v2_t1_gini,sil_beta_demo_score_FPD30_v2_t1_gini,sil_beta_demo_score_FSPD30_v2_t1_gini,sil_beta_demo_score_FSTPD30_v2_t1_gini,Model_display_name,Product_type
0,2024-07-29,2024-08-04,Week,sil_beta_demo_score,v2,Overall,FPD0,Trench 1,0.168274,,,,,beta_demo_model_sil,SIL
1,2024-08-01,2024-08-31,Month,sil_beta_demo_score,v2,Overall,FPD0,Trench 1,0.205176,,,,,beta_demo_model_sil,SIL
2,2024-08-05,2024-08-11,Week,sil_beta_demo_score,v2,Overall,FPD0,Trench 1,0.21809,,,,,beta_demo_model_sil,SIL
3,2024-08-12,2024-08-18,Week,sil_beta_demo_score,v2,Overall,FPD0,Trench 1,0.237965,,,,,beta_demo_model_sil,SIL
4,2024-08-19,2024-08-25,Week,sil_beta_demo_score,v2,Overall,FPD0,Trench 1,0.210336,,,,,beta_demo_model_sil,SIL


In [1177]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.beta_demo_model_sil_v2_t1_gini_v1"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=00eefad4-d84a-4201-9a71-26857569d7d1>

In [1178]:
import functools

dataframes = [f01, f10, f20, f30, f40]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type','trench_category', 'bad_rate', 'category']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

demoscoresilv2t1 = functools.reduce(merge_dataframes, dataframes)

demoscoresilv2t1.columns.values

array(['start_date', 'end_date', 'period', 'Model_Name', 'version',
       'bad_rate', 'loan_type', 'trench_category', 'FPD0', 'category',
       'FPD10', 'FPD30', 'FSPD30', 'FSTPD30'], dtype=object)

In [1179]:
result = pd.concat([cicsilscorev1all, 
                    cicsilscorev2t1, 
                    cicsilscorev2t2, 
                    cicsilscorev2t3, 
                    alphastacksilv1all,
                    alphastacksilv2t1,
                    alphastacksilv2t2,
                    alphastacksilv2t3,
                    betastacksilv1all,
                    betastacksilv2t1,
                    betastacksilv2t2,
                    betastacksilv2t3,
                    appscoresilv1all,
                    appscoresilv2t1,
                    appscoresilv2t2,
                    appscoresilv2t3,
                    demoscoresilv1all,
                    demoscoresilv2t1,
                    ], ignore_index=True)
print(f"The shape of the concatenated dataframe result is:\t {result.shape}")
result.head()

The shape of the concatenated dataframe result is:	 (29700, 14)


Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


In [1180]:
result[['Model_Name', 'version', 'category']].value_counts(dropna=False).sort_index(ascending = False)

Model_Name             version  category                            
sil_beta_stack_score   v2       beta_stack_model_sil_fstpd30_v2_t3      249
                                beta_stack_model_sil_fstpd30_v2_t2      244
                                beta_stack_model_sil_fstpd30_v2_t1      251
                                beta_stack_model_sil_fspd30_v2_t3       272
                                beta_stack_model_sil_fspd30_v2_t2       264
                                beta_stack_model_sil_fspd30_v2_t1       275
                                beta_stack_model_sil_fpd30_v2_t3        292
                                beta_stack_model_sil_fpd30_v2_t2        285
                                beta_stack_model_sil_fpd30_v2_t1        295
                                beta_stack_model_sil_fpd10_v2_t3        296
                                beta_stack_model_sil_fpd10_v2_t2        292
                                beta_stack_model_sil_fpd10_v2_t1        301
                   

In [1181]:
result['Model_Name'].value_counts(dropna=False)

Model_Name
sil_beta_stack_score     6649
sil_beta_app_score       6565
Sil_Alpha_Stack_score    6399
Alpha_cic_sil_score      6331
sil_beta_demo_score      3756
Name: count, dtype: int64

In [1182]:
masterdf = result.copy()
print(f"The shape of masterdf is:\t {masterdf.shape}")
masterdf.head()

The shape of masterdf is:	 (29700, 14)


Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


## Trench 2

## FPD0

## Test

In [1183]:
sq = """WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
  modelVersionId, trenchCategory,
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fpd0 = 1
  )
  select * from base where trenchCategory = 'Trench 2'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory


In [1184]:
df1 = dfd.copy()

## Train

In [1185]:
sq = """ 
WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
  modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fpd0 = 1
  )
  select * from base where trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory
0,2171768,2e88cb4c-04fd-4d14-9947-42b4fe33b4b1,60821717680012,0.700301,2024-08-16 15:03:39,2024-08-16,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
1,2495801,962ae643-3e5b-4c81-a3c2-e681346fadc3,60824958010011,0.371613,2024-08-11 17:55:30,2024-08-11,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
2,2395216,f11eb09f-8ce6-4465-b691-2c9b61cd142b,60823952160013,0.552395,2024-08-16 18:11:06,2024-08-20,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
3,1698128,0fa6acbf-3493-4269-801e-f9f4a462b0e1,60816981280017,0.328849,2024-08-04 14:23:05,2024-08-04,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
4,2290846,c3912e2c-5e3e-4b2c-868f-d7823ced3955,60822908460011,0.649085,2024-08-26 19:14:37,2024-08-26,2024-08,Train,0,1,SIL-Instore,v2,Trench 2


In [1186]:
df2 = dfd.copy()

In [1187]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4454 entries, 0 to 4453
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             4454 non-null   Int64         
 1   digitalLoanAccountId   4454 non-null   object        
 2   loanAccountNumber      4454 non-null   object        
 3   sil_beta_demo_score    4454 non-null   float64       
 4   appln_submit_datetime  4454 non-null   datetime64[us]
 5   disbursementdate       4454 non-null   dbdate        
 6   Application_month      4454 non-null   object        
 7   Data_selection         4454 non-null   object        
 8   deffpd0                4454 non-null   Int64         
 9   flg_mature_fpd0        4454 non-null   Int64         
 10  new_loan_type          4454 non-null   object        
 11  modelVersionId         4454 non-null   object        
 12  trenchCategory         4454 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [1188]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_demo_score'] = pd.to_numeric(df_concat['sil_beta_demo_score'], errors='coerce')

In [1189]:
# df_concat.to_csv(r"sil_beta_demo_scorefpd0.csv")

In [1190]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_demo_score', 'deffpd0', 'FPD0')
# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'sil_beta_demo_score', 
#     'deffpd0', 
#     'FPD0',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_demo_score', 
    'deffpd0', 
    'FPD0',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [1191]:
f0 = gini_results.copy()

In [1192]:
f0.head(10)

Unnamed: 0,start_date,end_date,sil_beta_demo_score_FPD0_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-07-29,2024-08-04,0.536842,Week,sil_beta_demo_score,v2,FPD0,Overall,Trench 2
1,2024-08-01,2024-08-31,0.152386,Month,sil_beta_demo_score,v2,FPD0,Overall,Trench 2
2,2024-08-05,2024-08-11,0.241071,Week,sil_beta_demo_score,v2,FPD0,Overall,Trench 2
3,2024-08-12,2024-08-18,-0.109375,Week,sil_beta_demo_score,v2,FPD0,Overall,Trench 2
4,2024-08-19,2024-08-25,0.183024,Week,sil_beta_demo_score,v2,FPD0,Overall,Trench 2
5,2024-08-26,2024-09-01,-0.242165,Week,sil_beta_demo_score,v2,FPD0,Overall,Trench 2
6,2024-09-01,2024-09-30,0.135611,Month,sil_beta_demo_score,v2,FPD0,Overall,Trench 2
7,2024-09-02,2024-09-08,0.4,Week,sil_beta_demo_score,v2,FPD0,Overall,Trench 2
8,2024-09-09,2024-09-15,-0.058824,Week,sil_beta_demo_score,v2,FPD0,Overall,Trench 2
9,2024-09-16,2024-09-22,0.412903,Week,sil_beta_demo_score,v2,FPD0,Overall,Trench 2


In [1193]:
f01 = f0[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_demo_score_FPD0_gini']].copy()
f01.rename(columns={'sil_beta_demo_score_FPD0_gini':'FPD0'}, inplace = True)
f01['category'] = 'sil_beta_demo_score_FPD0_v2_t2'
f01.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                        loan_type       version  trench_category
sil_beta_demo_score_FPD0_v2_t2  Overall         v2       Trench 2           80
                                SIL Competitor  v2       Trench 2           61
                                SIL ZERO        v2       Trench 2           73
                                SIL-Instore     v2       Trench 2           80
dtype: int64

## FPD10

## Test

In [1194]:
sq = """WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
  modelVersionId, trenchCategory
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fpd10 = 1
  )
  select * from base where trenchCategory = 'Trench 2'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory


In [1195]:
df1 = dfd.copy()

## Train

In [1196]:
sq = """ 
WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
  modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fpd10 = 1
  )
  select * from base where trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory
0,2495801,962ae643-3e5b-4c81-a3c2-e681346fadc3,60824958010011,0.371613,2024-08-11 17:55:30,2024-08-11,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
1,2395216,f11eb09f-8ce6-4465-b691-2c9b61cd142b,60823952160013,0.552395,2024-08-16 18:11:06,2024-08-20,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
2,1819166,b834523a-d6d1-448c-8da8-33c6611c6073,60818191660019,0.325173,2024-08-24 15:13:52,2024-08-24,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
3,2171768,2e88cb4c-04fd-4d14-9947-42b4fe33b4b1,60821717680012,0.700301,2024-08-16 15:03:39,2024-08-16,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
4,1183628,ffc243c6-6423-4bc1-8362-38ce49b28d6a,60811836280033,0.317891,2024-08-11 13:54:14,2024-08-11,2024-08,Train,0,1,SIL-Instore,v2,Trench 2


In [1197]:
df2 = dfd.copy()

In [1198]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4454 entries, 0 to 4453
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             4454 non-null   Int64         
 1   digitalLoanAccountId   4454 non-null   object        
 2   loanAccountNumber      4454 non-null   object        
 3   sil_beta_demo_score    4454 non-null   float64       
 4   appln_submit_datetime  4454 non-null   datetime64[us]
 5   disbursementdate       4454 non-null   dbdate        
 6   Application_month      4454 non-null   object        
 7   Data_selection         4454 non-null   object        
 8   deffpd10               4454 non-null   Int64         
 9   flg_mature_fpd10       4454 non-null   Int64         
 10  new_loan_type          4454 non-null   object        
 11  modelVersionId         4454 non-null   object        
 12  trenchCategory         4454 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [1199]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_demo_score'] = pd.to_numeric(df_concat['sil_beta_demo_score'], errors='coerce')

In [1200]:
# df_concat.to_csv(r"sil_beta_demo_scorefpd10.csv")

In [1201]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_demo_score', 'deffpd10', 'FPD10')
# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'sil_beta_demo_score', 
#     'deffpd10', 
#     'FPD10',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_demo_score', 
    'deffpd10', 
    'FPD10',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [1202]:
f1 = gini_results.copy()

In [1203]:
f1.head(10)

Unnamed: 0,start_date,end_date,sil_beta_demo_score_FPD10_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-07-29,2024-08-04,0.365079,Week,sil_beta_demo_score,v2,FPD10,Overall,Trench 2
1,2024-08-01,2024-08-31,0.223178,Month,sil_beta_demo_score,v2,FPD10,Overall,Trench 2
2,2024-08-05,2024-08-11,0.349754,Week,sil_beta_demo_score,v2,FPD10,Overall,Trench 2
3,2024-08-12,2024-08-18,-0.058824,Week,sil_beta_demo_score,v2,FPD10,Overall,Trench 2
4,2024-08-19,2024-08-25,0.185185,Week,sil_beta_demo_score,v2,FPD10,Overall,Trench 2
5,2024-08-26,2024-09-01,-0.017422,Week,sil_beta_demo_score,v2,FPD10,Overall,Trench 2
6,2024-09-01,2024-09-30,0.29983,Month,sil_beta_demo_score,v2,FPD10,Overall,Trench 2
7,2024-09-02,2024-09-08,0.538462,Week,sil_beta_demo_score,v2,FPD10,Overall,Trench 2
8,2024-09-09,2024-09-15,,Week,sil_beta_demo_score,v2,FPD10,Overall,Trench 2
9,2024-09-16,2024-09-22,0.438095,Week,sil_beta_demo_score,v2,FPD10,Overall,Trench 2


In [1204]:
f10 = f1[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_demo_score_FPD10_gini']].copy()
f10.rename(columns={'sil_beta_demo_score_FPD10_gini':'FPD10'}, inplace = True)
f10['category'] = 'sil_beta_demo_score_FPD10_v2_t2'
f10.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                         loan_type       version  trench_category
sil_beta_demo_score_FPD10_v2_t2  Overall         v2       Trench 2           80
                                 SIL Competitor  v2       Trench 2           61
                                 SIL ZERO        v2       Trench 2           73
                                 SIL-Instore     v2       Trench 2           80
dtype: int64

## FPD30

## Test

In [1205]:
sq = """WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
  modelVersionId, trenchCategory
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fpd30 = 1
  )
  select * from base where trenchCategory = 'Trench 2'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory


In [1206]:
df1 = dfd.copy()

## Train

In [1207]:
sq = """ 
WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
  modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fpd30 = 1
  )
  select * from base where trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory
0,2283424,a61feee2-81f1-449d-80bc-713247456551,60822834240011,0.349314,2024-08-10 19:07:26,2024-08-10,2024-08,Train,0,1,SIL ZERO,v2,Trench 2
1,2592724,53583a1f-c02b-4acc-95dd-88a14d19a1e6,60825927240018,0.62108,2024-08-01 11:21:13,2024-08-01,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
2,2351981,9971c415-2217-4e6c-bbd0-600b1180810e,60823519810017,0.506248,2024-08-04 13:36:02,2024-08-04,2024-08,Train,1,1,SIL-Instore,v2,Trench 2
3,2332121,d8ba22e0-2c4e-463d-968f-ed3bb4e633b2,60823321210015,0.516379,2024-08-19 17:11:25,2024-08-19,2024-08,Train,1,1,SIL-Instore,v2,Trench 2
4,2218435,10c4c853-38b4-4e6f-a3f4-e47a6fbd5cd1,60822184350018,0.289931,2024-08-17 16:29:55,2024-08-17,2024-08,Train,0,1,SIL-Instore,v2,Trench 2


In [1208]:
df2 = dfd.copy()

In [1209]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4338 entries, 0 to 4337
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             4338 non-null   Int64         
 1   digitalLoanAccountId   4338 non-null   object        
 2   loanAccountNumber      4338 non-null   object        
 3   sil_beta_demo_score    4338 non-null   float64       
 4   appln_submit_datetime  4338 non-null   datetime64[us]
 5   disbursementdate       4338 non-null   dbdate        
 6   Application_month      4338 non-null   object        
 7   Data_selection         4338 non-null   object        
 8   deffpd30               4338 non-null   Int64         
 9   flg_mature_fpd30       4338 non-null   Int64         
 10  new_loan_type          4338 non-null   object        
 11  modelVersionId         4338 non-null   object        
 12  trenchCategory         4338 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [1210]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_demo_score'] = pd.to_numeric(df_concat['sil_beta_demo_score'], errors='coerce')

In [1211]:
# df_concat.to_csv(r"sil_beta_demo_scorefpd30.csv")

In [1212]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_demo_score', 'deffpd30', 'FPD30')
# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'sil_beta_demo_score', 
#     'deffpd30', 
#     'FPD30',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat,
    'sil_beta_demo_score',
        'deffpd30', 
    'FPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [1213]:
f2 = gini_results.copy()

In [1214]:
f2.head(10)

Unnamed: 0,start_date,end_date,sil_beta_demo_score_FPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-07-29,2024-08-04,0.365079,Week,sil_beta_demo_score,v2,FPD30,Overall,Trench 2
1,2024-08-01,2024-08-31,0.199477,Month,sil_beta_demo_score,v2,FPD30,Overall,Trench 2
2,2024-08-05,2024-08-11,0.222222,Week,sil_beta_demo_score,v2,FPD30,Overall,Trench 2
3,2024-08-12,2024-08-18,-0.058824,Week,sil_beta_demo_score,v2,FPD30,Overall,Trench 2
4,2024-08-19,2024-08-25,0.185185,Week,sil_beta_demo_score,v2,FPD30,Overall,Trench 2
5,2024-08-26,2024-09-01,-0.017422,Week,sil_beta_demo_score,v2,FPD30,Overall,Trench 2
6,2024-09-01,2024-09-30,0.29983,Month,sil_beta_demo_score,v2,FPD30,Overall,Trench 2
7,2024-09-02,2024-09-08,0.538462,Week,sil_beta_demo_score,v2,FPD30,Overall,Trench 2
8,2024-09-09,2024-09-15,,Week,sil_beta_demo_score,v2,FPD30,Overall,Trench 2
9,2024-09-16,2024-09-22,0.438095,Week,sil_beta_demo_score,v2,FPD30,Overall,Trench 2


In [1215]:
f20 = f2[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_demo_score_FPD30_gini']].copy()
f20.rename(columns={'sil_beta_demo_score_FPD30_gini':'FPD30'}, inplace = True)
f20['category'] = 'sil_beta_demo_score_FPD30_v2_t2'
f20.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                         loan_type       version  trench_category
sil_beta_demo_score_FPD30_v2_t2  Overall         v2       Trench 2           78
                                 SIL Competitor  v2       Trench 2           60
                                 SIL ZERO        v2       Trench 2           69
                                 SIL-Instore     v2       Trench 2           78
dtype: int64

## FSPD30

## Test

In [1216]:
sq = """WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
  modelVersionId, trenchCategory
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
   modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fspd_30 = 1
  )
  select * from base where trenchCategory = 'Trench 2'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory


In [1217]:
df1 = dfd.copy()

## Train

In [1218]:
sq = """ 
WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
   modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
   modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fspd_30 = 1
  )
  select * from base where trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory
0,2347495,a96979c7-db9e-4e1a-b8af-10a22e314f32,60823474950017,0.584098,2024-08-29 14:15:23,2024-08-29,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
1,2514315,6280a4da-5ff3-4ba5-a339-a5a87b6a2363,60825143150013,0.637662,2024-08-20 14:52:04,2024-08-20,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
2,2369419,e40203cb-960b-4ab7-9f47-0d041bd027ff,60823694190013,0.576901,2024-08-09 11:26:20,2024-08-09,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
3,2438659,211b519c-ebf1-4696-a82f-e1bd68adf6af,60824386590014,0.6109,2024-08-31 17:17:08,2024-08-31,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
4,2171768,2e88cb4c-04fd-4d14-9947-42b4fe33b4b1,60821717680012,0.700301,2024-08-16 15:03:39,2024-08-16,2024-08,Train,0,1,SIL-Instore,v2,Trench 2


In [1219]:
df2 = dfd.copy()

In [1220]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4042 entries, 0 to 4041
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             4042 non-null   Int64         
 1   digitalLoanAccountId   4042 non-null   object        
 2   loanAccountNumber      4042 non-null   object        
 3   sil_beta_demo_score    4042 non-null   float64       
 4   appln_submit_datetime  4042 non-null   datetime64[us]
 5   disbursementdate       4042 non-null   dbdate        
 6   Application_month      4042 non-null   object        
 7   Data_selection         4042 non-null   object        
 8   deffspd30              4042 non-null   Int64         
 9   flg_mature_fspd_30     4042 non-null   Int64         
 10  new_loan_type          4042 non-null   object        
 11  modelVersionId         4042 non-null   object        
 12  trenchCategory         4042 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [1221]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_demo_score'] = pd.to_numeric(df_concat['sil_beta_demo_score'], errors='coerce')

In [1222]:
# df_concat.to_csv(r"sil_beta_demo_scorefspd30.csv")

In [1223]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_demo_score', 'deffspd30', 'FSPD30')
# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'sil_beta_demo_score', 
#     'deffspd30', 
#     'FSPD30',
#     product_column='new_loan_type'
# )                                                             

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_demo_score', 
    'deffspd30', 
    'FSPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [1224]:
f3 = gini_results.copy()

In [1225]:
f3.head(10)

Unnamed: 0,start_date,end_date,sil_beta_demo_score_FSPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-07-29,2024-08-04,0.365079,Week,sil_beta_demo_score,v2,FSPD30,Overall,Trench 2
1,2024-08-01,2024-08-31,0.252827,Month,sil_beta_demo_score,v2,FSPD30,Overall,Trench 2
2,2024-08-05,2024-08-11,0.384615,Week,sil_beta_demo_score,v2,FSPD30,Overall,Trench 2
3,2024-08-12,2024-08-18,0.234375,Week,sil_beta_demo_score,v2,FSPD30,Overall,Trench 2
4,2024-08-19,2024-08-25,0.230769,Week,sil_beta_demo_score,v2,FSPD30,Overall,Trench 2
5,2024-08-26,2024-09-01,0.010526,Week,sil_beta_demo_score,v2,FSPD30,Overall,Trench 2
6,2024-09-01,2024-09-30,0.314444,Month,sil_beta_demo_score,v2,FSPD30,Overall,Trench 2
7,2024-09-02,2024-09-08,0.64,Week,sil_beta_demo_score,v2,FSPD30,Overall,Trench 2
8,2024-09-09,2024-09-15,0.294118,Week,sil_beta_demo_score,v2,FSPD30,Overall,Trench 2
9,2024-09-16,2024-09-22,0.260504,Week,sil_beta_demo_score,v2,FSPD30,Overall,Trench 2


In [1226]:
f30 = f3[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_demo_score_FSPD30_gini']].copy()
f30.rename(columns={'sil_beta_demo_score_FSPD30_gini':'FSPD30'}, inplace = True)
f30['category'] = 'sil_beta_demo_score_FSPD30_v2_t2'
f30.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                          loan_type       version  trench_category
sil_beta_demo_score_FSPD30_v2_t2  Overall         v2       Trench 2           73
                                  SIL Competitor  v2       Trench 2           55
                                  SIL ZERO        v2       Trench 2           64
                                  SIL-Instore     v2       Trench 2           72
dtype: int64

## FSTPD30

## Test

In [1227]:
sq = """WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score, 
  modelVersionId, trenchCategory
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fstpd_30 = 1
  )
  select * from base where trenchCategory = 'Trench 2'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory


In [1228]:
df1 = dfd.copy()

## Train

In [1229]:
sq = """ 
WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
  modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
 modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fstpd_30 = 1
  )
  select * from base where trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory
0,2309326,38df0c6a-580c-4c8a-8a29-e43ddb744cfc,60823093260019,0.321187,2024-08-28 18:47:42,2024-08-28,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
1,2328777,78fa440f-1fa9-4d50-9a80-c46c34f2141d,60823287770018,0.493765,2024-08-16 16:23:39,2024-08-16,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
2,2305871,dc132c9d-02ce-4b19-8151-f15196eb9a81,60823058710013,0.431187,2024-08-20 15:14:57,2024-08-20,2024-08,Train,0,1,SIL-Instore,v2,Trench 2
3,2412930,3da9952f-250e-4277-bfcb-6bf38f61cd1a,60824129300016,0.57813,2024-08-12 11:40:35,2024-08-15,2024-08,Train,1,1,SIL-Instore,v2,Trench 2
4,2476855,c57e5ee2-8947-43a1-8d43-477345bcd843,60824768550018,0.699393,2024-08-26 17:33:53,2024-08-26,2024-08,Train,1,1,SIL-Instore,v2,Trench 2


In [1230]:
df2 = dfd.copy()

In [1231]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3716 entries, 0 to 3715
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             3716 non-null   Int64         
 1   digitalLoanAccountId   3716 non-null   object        
 2   loanAccountNumber      3716 non-null   object        
 3   sil_beta_demo_score    3716 non-null   float64       
 4   appln_submit_datetime  3716 non-null   datetime64[us]
 5   disbursementdate       3716 non-null   dbdate        
 6   Application_month      3716 non-null   object        
 7   Data_selection         3716 non-null   object        
 8   deffstpd30             3716 non-null   Int64         
 9   flg_mature_fstpd_30    3716 non-null   Int64         
 10  new_loan_type          3716 non-null   object        
 11  modelVersionId         3716 non-null   object        
 12  trenchCategory         3716 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [1232]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_demo_score'] = pd.to_numeric(df_concat['sil_beta_demo_score'], errors='coerce')

In [1233]:
# df_concat.to_csv(r"sil_beta_demo_scorefstpd30.csv")

In [1234]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_demo_score', 'deffstpd30', 'FSTPD30')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_demo_score', 
    'deffstpd30', 
    'FSTPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [1235]:
f4 = gini_results.copy()

In [1236]:
f4.head(10)

Unnamed: 0,start_date,end_date,sil_beta_demo_score_FSTPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-07-29,2024-08-04,0.326316,Week,sil_beta_demo_score,v2,FSTPD30,Overall,Trench 2
1,2024-08-01,2024-08-31,0.216387,Month,sil_beta_demo_score,v2,FSTPD30,Overall,Trench 2
2,2024-08-05,2024-08-11,0.384615,Week,sil_beta_demo_score,v2,FSTPD30,Overall,Trench 2
3,2024-08-12,2024-08-18,0.234375,Week,sil_beta_demo_score,v2,FSTPD30,Overall,Trench 2
4,2024-08-19,2024-08-25,0.115385,Week,sil_beta_demo_score,v2,FSTPD30,Overall,Trench 2
5,2024-08-26,2024-09-01,0.138889,Week,sil_beta_demo_score,v2,FSTPD30,Overall,Trench 2
6,2024-09-01,2024-09-30,0.28471,Month,sil_beta_demo_score,v2,FSTPD30,Overall,Trench 2
7,2024-09-02,2024-09-08,0.316667,Week,sil_beta_demo_score,v2,FSTPD30,Overall,Trench 2
8,2024-09-09,2024-09-15,0.121212,Week,sil_beta_demo_score,v2,FSTPD30,Overall,Trench 2
9,2024-09-16,2024-09-22,0.345455,Week,sil_beta_demo_score,v2,FSTPD30,Overall,Trench 2


In [1237]:
f40 = f4[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_demo_score_FSTPD30_gini']].copy()
f40.rename(columns={'sil_beta_demo_score_FSTPD30_gini':'FSTPD30'}, inplace = True)
f40['category'] = 'sil_beta_demo_score_FSTPD30_v2_t2'
f40.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                           loan_type       version  trench_category
sil_beta_demo_score_FSTPD30_v2_t2  Overall         v2       Trench 2           67
                                   SIL Competitor  v2       Trench 2           49
                                   SIL ZERO        v2       Trench 2           61
                                   SIL-Instore     v2       Trench 2           67
dtype: int64

## combining the dataframe

In [1238]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type', 'bad_rate', 'trench_category']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'sil_beta_demo_score_FPD0_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'trench_category', 'sil_beta_demo_score_FPD10_gini',
       'sil_beta_demo_score_FPD30_gini',
       'sil_beta_demo_score_FSPD30_gini',
       'sil_beta_demo_score_FSTPD30_gini'], dtype=object)

In [1239]:
final_df.rename(columns={'trench_category':'Trench_category',
'sil_beta_demo_score_FPD0_gini':'sil_beta_demo_score_FPD0_v2_t2_gini',
'sil_beta_demo_score_FPD10_gini':'sil_beta_demo_score_FPD10_v2_t2_gini',
'sil_beta_demo_score_FPD30_gini':'sil_beta_demo_score_FPD30_v2_t2_gini',
'sil_beta_demo_score_FSPD30_gini':'sil_beta_demo_score_FSPD30_v2_t2_gini', 
'sil_beta_demo_score_FSTPD30_gini':'sil_beta_demo_score_FSTPD30_v2_t2_gini'
}, inplace=True)
final_df.columns

Index(['start_date', 'end_date', 'sil_beta_demo_score_FPD0_v2_t2_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'Trench_category', 'sil_beta_demo_score_FPD10_v2_t2_gini',
       'sil_beta_demo_score_FPD30_v2_t2_gini',
       'sil_beta_demo_score_FSPD30_v2_t2_gini',
       'sil_beta_demo_score_FSTPD30_v2_t2_gini'],
      dtype='object')

In [1240]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'loan_type', 'bad_rate', 'Trench_category'
,'sil_beta_demo_score_FPD0_v2_t2_gini','sil_beta_demo_score_FPD10_v2_t2_gini', 'sil_beta_demo_score_FPD30_v2_t2_gini',
       'sil_beta_demo_score_FSPD30_v2_t2_gini',
       'sil_beta_demo_score_FSTPD30_v2_t2_gini']].copy()
final_df['Model_display_name'] = 'beta_demo_model_sil'
final_df['Product_type'] = 'SIL'
final_df.dtypes

start_date                                datetime64[ns]
end_date                                  datetime64[ns]
period                                            object
Model_Name                                        object
version                                           object
loan_type                                         object
bad_rate                                          object
Trench_category                                   object
sil_beta_demo_score_FPD0_v2_t2_gini              float64
sil_beta_demo_score_FPD10_v2_t2_gini             float64
sil_beta_demo_score_FPD30_v2_t2_gini             float64
sil_beta_demo_score_FSPD30_v2_t2_gini            float64
sil_beta_demo_score_FSTPD30_v2_t2_gini           float64
Model_display_name                                object
Product_type                                      object
dtype: object

In [1241]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,loan_type,bad_rate,Trench_category,sil_beta_demo_score_FPD0_v2_t2_gini,sil_beta_demo_score_FPD10_v2_t2_gini,sil_beta_demo_score_FPD30_v2_t2_gini,sil_beta_demo_score_FSPD30_v2_t2_gini,sil_beta_demo_score_FSTPD30_v2_t2_gini,Model_display_name,Product_type
0,2024-07-29,2024-08-04,Week,sil_beta_demo_score,v2,Overall,FPD0,Trench 2,0.536842,,,,,beta_demo_model_sil,SIL
1,2024-08-01,2024-08-31,Month,sil_beta_demo_score,v2,Overall,FPD0,Trench 2,0.152386,,,,,beta_demo_model_sil,SIL
2,2024-08-05,2024-08-11,Week,sil_beta_demo_score,v2,Overall,FPD0,Trench 2,0.241071,,,,,beta_demo_model_sil,SIL
3,2024-08-12,2024-08-18,Week,sil_beta_demo_score,v2,Overall,FPD0,Trench 2,-0.109375,,,,,beta_demo_model_sil,SIL
4,2024-08-19,2024-08-25,Week,sil_beta_demo_score,v2,Overall,FPD0,Trench 2,0.183024,,,,,beta_demo_model_sil,SIL


In [1242]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.beta_demo_model_sil_v2_t2_gini_v1"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=f20f4574-f46e-40fa-9b36-6b85be1a10b6>

In [1243]:
import functools

dataframes = [f01, f10, f20, f30, f40]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type','trench_category', 'bad_rate', 'category']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

demoscoresilv2t2 = functools.reduce(merge_dataframes, dataframes)

demoscoresilv2t2.columns.values

array(['start_date', 'end_date', 'period', 'Model_Name', 'version',
       'bad_rate', 'loan_type', 'trench_category', 'FPD0', 'category',
       'FPD10', 'FPD30', 'FSPD30', 'FSTPD30'], dtype=object)

In [1244]:
result = pd.concat([cicsilscorev1all, 
                    cicsilscorev2t1, 
                    cicsilscorev2t2, 
                    cicsilscorev2t3, 
                    alphastacksilv1all,
                    alphastacksilv2t1,
                    alphastacksilv2t2,
                    alphastacksilv2t3,
                    betastacksilv1all,
                    betastacksilv2t1,
                    betastacksilv2t2,
                    betastacksilv2t3,
                    appscoresilv1all,
                    appscoresilv2t1,
                    appscoresilv2t2,
                    appscoresilv2t3,
                    demoscoresilv1all,
                    demoscoresilv2t1,
                    ], ignore_index=True)
print(f"The shape of the concatenated dataframe result is:\t {result.shape}")
result.head()

The shape of the concatenated dataframe result is:	 (29700, 14)


Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


In [1245]:
result[['Model_Name', 'version', 'category']].value_counts(dropna=False).sort_index(ascending = False)

Model_Name             version  category                            
sil_beta_stack_score   v2       beta_stack_model_sil_fstpd30_v2_t3      249
                                beta_stack_model_sil_fstpd30_v2_t2      244
                                beta_stack_model_sil_fstpd30_v2_t1      251
                                beta_stack_model_sil_fspd30_v2_t3       272
                                beta_stack_model_sil_fspd30_v2_t2       264
                                beta_stack_model_sil_fspd30_v2_t1       275
                                beta_stack_model_sil_fpd30_v2_t3        292
                                beta_stack_model_sil_fpd30_v2_t2        285
                                beta_stack_model_sil_fpd30_v2_t1        295
                                beta_stack_model_sil_fpd10_v2_t3        296
                                beta_stack_model_sil_fpd10_v2_t2        292
                                beta_stack_model_sil_fpd10_v2_t1        301
                   

In [1246]:
result['Model_Name'].value_counts(dropna=False)

Model_Name
sil_beta_stack_score     6649
sil_beta_app_score       6565
Sil_Alpha_Stack_score    6399
Alpha_cic_sil_score      6331
sil_beta_demo_score      3756
Name: count, dtype: int64

In [1247]:
masterdf = result.copy()
print(f"The shape of masterdf is:\t {masterdf.shape}")
masterdf.head()

The shape of masterdf is:	 (29700, 14)


Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


## Trench 3

## FPD0

## Test

In [1248]:
sq = """WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
  modelVersionId, trenchCategory,
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fpd0 = 1
  )
  select * from base where trenchCategory = 'Trench 3'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory


In [1249]:
df1 = dfd.copy()

## Train

In [1250]:
sq = """ 
WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
  modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fpd0 = 1
  )
  select * from base where trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type,modelVersionId,trenchCategory
0,2519802,66136370-781e-415d-ad71-e8d8ab56d07c,60825198020021,0.292755,2024-08-26 16:25:10,2024-08-26,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
1,2453032,81934d09-1247-4f83-a594-66290d947ddf,60824530320023,0.235507,2024-08-16 17:14:12,2024-08-16,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
2,2410803,b7eac8da-f766-4d80-9585-9029a1331980,60824108030021,0.31332,2024-08-29 14:42:45,2024-08-29,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
3,2382771,89be31af-17f3-44cb-951e-35ff211b5e65,60823827710026,0.439321,2024-08-10 19:36:36,2024-08-10,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
4,2330483,952bdb58-9f20-4a88-837a-9ba76db919af,60823304830028,0.273143,2024-08-03 12:51:49,2024-08-03,2024-08,Train,0,1,SIL-Instore,v2,Trench 3


In [1251]:
df2 = dfd.copy()

In [1252]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9151 entries, 0 to 9150
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             9151 non-null   Int64         
 1   digitalLoanAccountId   9151 non-null   object        
 2   loanAccountNumber      9151 non-null   object        
 3   sil_beta_demo_score    9151 non-null   float64       
 4   appln_submit_datetime  9151 non-null   datetime64[us]
 5   disbursementdate       9151 non-null   dbdate        
 6   Application_month      9151 non-null   object        
 7   Data_selection         9151 non-null   object        
 8   deffpd0                9151 non-null   Int64         
 9   flg_mature_fpd0        9151 non-null   Int64         
 10  new_loan_type          9151 non-null   object        
 11  modelVersionId         9151 non-null   object        
 12  trenchCategory         9151 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [1253]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_demo_score'] = pd.to_numeric(df_concat['sil_beta_demo_score'], errors='coerce')

In [1254]:
# df_concat.to_csv(r"sil_beta_demo_scorefpd0.csv")

In [1255]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_demo_score', 'deffpd0', 'FPD0')
# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'sil_beta_demo_score', 
#     'deffpd0', 
#     'FPD0',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_demo_score', 
    'deffpd0', 
    'FPD0',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [1256]:
f0 = gini_results.copy()

In [1257]:
f0.head(10)

Unnamed: 0,start_date,end_date,sil_beta_demo_score_FPD0_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-07-29,2024-08-04,0.335714,Week,sil_beta_demo_score,v2,FPD0,Overall,Trench 3
1,2024-08-01,2024-08-31,0.067159,Month,sil_beta_demo_score,v2,FPD0,Overall,Trench 3
2,2024-08-05,2024-08-11,0.047727,Week,sil_beta_demo_score,v2,FPD0,Overall,Trench 3
3,2024-08-12,2024-08-18,-0.151261,Week,sil_beta_demo_score,v2,FPD0,Overall,Trench 3
4,2024-08-19,2024-08-25,-0.027356,Week,sil_beta_demo_score,v2,FPD0,Overall,Trench 3
5,2024-08-26,2024-09-01,0.066667,Week,sil_beta_demo_score,v2,FPD0,Overall,Trench 3
6,2024-09-01,2024-09-30,0.070071,Month,sil_beta_demo_score,v2,FPD0,Overall,Trench 3
7,2024-09-02,2024-09-08,-0.060241,Week,sil_beta_demo_score,v2,FPD0,Overall,Trench 3
8,2024-09-09,2024-09-15,0.403941,Week,sil_beta_demo_score,v2,FPD0,Overall,Trench 3
9,2024-09-16,2024-09-22,-0.252772,Week,sil_beta_demo_score,v2,FPD0,Overall,Trench 3


In [1258]:
f01 = f0[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_demo_score_FPD0_gini']].copy()
f01.rename(columns={'sil_beta_demo_score_FPD0_gini':'FPD0'}, inplace = True)
f01['category'] = 'sil_beta_demo_score_FPD0_v2_t3'
f01.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                        loan_type       version  trench_category
sil_beta_demo_score_FPD0_v2_t3  Overall         v2       Trench 3           79
                                SIL Competitor  v2       Trench 3           62
                                SIL ZERO        v2       Trench 3           77
                                SIL-Instore     v2       Trench 3           79
dtype: int64

## FPD10

## Test

In [1259]:
sq = """WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
  modelVersionId, trenchCategory
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fpd10 = 1
  )
  select * from base where trenchCategory = 'Trench 3'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory


In [1260]:
df1 = dfd.copy()

## Train

In [1261]:
sq = """ 
WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
  modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fpd10 = 1
  )
  select * from base where trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type,modelVersionId,trenchCategory
0,2252864,d94fe5bc-2735-41a6-8438-747e6c4e7351,60822528640037,0.332564,2024-08-25 17:15:42,2024-08-25,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
1,2335858,38b2e0fe-6d25-4a62-9ca3-8a74dc731173,60823358580029,0.246256,2024-08-05 16:50:22,2024-08-05,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
2,2128479,9a525b40-3369-4984-b406-0376f1c0d1ca,60821284790027,0.217349,2024-08-19 17:11:36,2024-08-19,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
3,2354787,84c2e766-da4e-4cda-9bc0-d134158f9622,60823547870027,0.387714,2024-08-04 18:05:13,2024-08-04,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
4,2432367,4f609f3d-32d5-428e-a5bb-f930faf80904,60824323670029,0.318072,2024-08-18 14:52:29,2024-08-18,2024-08,Train,0,1,SIL-Instore,v2,Trench 3


In [1262]:
df2 = dfd.copy()

In [1263]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9151 entries, 0 to 9150
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             9151 non-null   Int64         
 1   digitalLoanAccountId   9151 non-null   object        
 2   loanAccountNumber      9151 non-null   object        
 3   sil_beta_demo_score    9151 non-null   float64       
 4   appln_submit_datetime  9151 non-null   datetime64[us]
 5   disbursementdate       9151 non-null   dbdate        
 6   Application_month      9151 non-null   object        
 7   Data_selection         9151 non-null   object        
 8   deffpd10               9151 non-null   Int64         
 9   flg_mature_fpd10       9151 non-null   Int64         
 10  new_loan_type          9151 non-null   object        
 11  modelVersionId         9151 non-null   object        
 12  trenchCategory         9151 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [1264]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_demo_score'] = pd.to_numeric(df_concat['sil_beta_demo_score'], errors='coerce')

In [1265]:
# df_concat.to_csv(r"sil_beta_demo_scorefpd10.csv")

In [1266]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_demo_score', 'deffpd10', 'FPD10')
# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'sil_beta_demo_score', 
#     'deffpd10', 
#     'FPD10',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_demo_score', 
    'deffpd10', 
    'FPD10',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [1267]:
f1 = gini_results.copy()

In [1268]:
f1.head(10)

Unnamed: 0,start_date,end_date,sil_beta_demo_score_FPD10_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-07-29,2024-08-04,0.35,Week,sil_beta_demo_score,v2,FPD10,Overall,Trench 3
1,2024-08-01,2024-08-31,0.130926,Month,sil_beta_demo_score,v2,FPD10,Overall,Trench 3
2,2024-08-05,2024-08-11,0.224638,Week,sil_beta_demo_score,v2,FPD10,Overall,Trench 3
3,2024-08-12,2024-08-18,0.006061,Week,sil_beta_demo_score,v2,FPD10,Overall,Trench 3
4,2024-08-19,2024-08-25,0.38,Week,sil_beta_demo_score,v2,FPD10,Overall,Trench 3
5,2024-08-26,2024-09-01,-0.333333,Week,sil_beta_demo_score,v2,FPD10,Overall,Trench 3
6,2024-09-01,2024-09-30,-0.082135,Month,sil_beta_demo_score,v2,FPD10,Overall,Trench 3
7,2024-09-02,2024-09-08,-0.029885,Week,sil_beta_demo_score,v2,FPD10,Overall,Trench 3
8,2024-09-09,2024-09-15,-0.132231,Week,sil_beta_demo_score,v2,FPD10,Overall,Trench 3
9,2024-09-16,2024-09-22,-0.274074,Week,sil_beta_demo_score,v2,FPD10,Overall,Trench 3


In [1269]:
f10 = f1[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_demo_score_FPD10_gini']].copy()
f10.rename(columns={'sil_beta_demo_score_FPD10_gini':'FPD10'}, inplace = True)
f10['category'] = 'sil_beta_demo_score_FPD10_v2_t3'
f10.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                         loan_type       version  trench_category
sil_beta_demo_score_FPD10_v2_t3  Overall         v2       Trench 3           79
                                 SIL Competitor  v2       Trench 3           62
                                 SIL ZERO        v2       Trench 3           77
                                 SIL-Instore     v2       Trench 3           79
dtype: int64

## FPD30

## Test

In [1270]:
sq = """WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
  modelVersionId, trenchCategory
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fpd30 = 1
  )
  select * from base where trenchCategory = 'Trench 3'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory


In [1271]:
df1 = dfd.copy()

## Train

In [1272]:
sq = """ 
WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
  modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fpd30 = 1
  )
  select * from base where trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type,modelVersionId,trenchCategory
0,2523806,8a03b1c3-68fa-4b06-82c7-c9c0aff96a3b,60825238060026,0.15851,2024-08-16 14:56:45,2024-08-16,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
1,2451834,b3130267-4df8-4eb5-8417-1b64786c1d75,60824518340021,0.38073,2024-08-15 16:09:09,2024-08-15,2024-08,Train,1,1,SIL-Instore,v2,Trench 3
2,2227950,7e765b7a-dc4c-4ef7-b958-fd799b6a1201,60822279500021,0.297801,2024-08-18 15:47:33,2024-08-18,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
3,2451693,bacd63e3-cfed-4280-a895-c3daf4580386,60824516930026,0.213946,2024-08-15 13:00:10,2024-08-15,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
4,2457073,3be341a1-9a41-4db5-9d12-a6dc20a876a9,60824570730028,0.132292,2024-08-10 18:33:38,2024-08-10,2024-08,Train,0,1,SIL-Instore,v2,Trench 3


In [1273]:
df2 = dfd.copy()

In [1274]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8924 entries, 0 to 8923
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             8924 non-null   Int64         
 1   digitalLoanAccountId   8924 non-null   object        
 2   loanAccountNumber      8924 non-null   object        
 3   sil_beta_demo_score    8924 non-null   float64       
 4   appln_submit_datetime  8924 non-null   datetime64[us]
 5   disbursementdate       8924 non-null   dbdate        
 6   Application_month      8924 non-null   object        
 7   Data_selection         8924 non-null   object        
 8   deffpd30               8924 non-null   Int64         
 9   flg_mature_fpd30       8924 non-null   Int64         
 10  new_loan_type          8924 non-null   object        
 11  modelVersionId         8924 non-null   object        
 12  trenchCategory         8924 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [1275]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_demo_score'] = pd.to_numeric(df_concat['sil_beta_demo_score'], errors='coerce')

In [1276]:
# df_concat.to_csv(r"sil_beta_demo_scorefpd30.csv")

In [1277]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_demo_score', 'deffpd30', 'FPD30')
# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'sil_beta_demo_score', 
#     'deffpd30', 
#     'FPD30',
#     product_column='new_loan_type'
# )

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat,
    'sil_beta_demo_score',
        'deffpd30', 
    'FPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [1278]:
f2 = gini_results.copy()

In [1279]:
f2.head(10)

Unnamed: 0,start_date,end_date,sil_beta_demo_score_FPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-07-29,2024-08-04,0.580328,Week,sil_beta_demo_score,v2,FPD30,Overall,Trench 3
1,2024-08-01,2024-08-31,0.272584,Month,sil_beta_demo_score,v2,FPD30,Overall,Trench 3
2,2024-08-05,2024-08-11,0.484043,Week,sil_beta_demo_score,v2,FPD30,Overall,Trench 3
3,2024-08-12,2024-08-18,0.133929,Week,sil_beta_demo_score,v2,FPD30,Overall,Trench 3
4,2024-08-19,2024-08-25,0.38,Week,sil_beta_demo_score,v2,FPD30,Overall,Trench 3
5,2024-08-26,2024-09-01,-0.333333,Week,sil_beta_demo_score,v2,FPD30,Overall,Trench 3
6,2024-09-01,2024-09-30,0.201435,Month,sil_beta_demo_score,v2,FPD30,Overall,Trench 3
7,2024-09-02,2024-09-08,0.385768,Week,sil_beta_demo_score,v2,FPD30,Overall,Trench 3
8,2024-09-09,2024-09-15,0.131148,Week,sil_beta_demo_score,v2,FPD30,Overall,Trench 3
9,2024-09-16,2024-09-22,0.087912,Week,sil_beta_demo_score,v2,FPD30,Overall,Trench 3


In [1280]:
f20 = f2[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_demo_score_FPD30_gini']].copy()
f20.rename(columns={'sil_beta_demo_score_FPD30_gini':'FPD30'}, inplace = True)
f20['category'] = 'sil_beta_demo_score_FPD30_v2_t3'
f20.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                         loan_type       version  trench_category
sil_beta_demo_score_FPD30_v2_t3  Overall         v2       Trench 3           78
                                 SIL Competitor  v2       Trench 3           61
                                 SIL ZERO        v2       Trench 3           75
                                 SIL-Instore     v2       Trench 3           78
dtype: int64

## FSPD30

## Test

In [1281]:
sq = """WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
  modelVersionId, trenchCategory
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
   modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fspd_30 = 1
  )
  select * from base where trenchCategory = 'Trench 3'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory


In [1282]:
df1 = dfd.copy()

## Train

In [1283]:
sq = """ 
WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
   modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
   modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fspd_30 = 1
  )
  select * from base where trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type,modelVersionId,trenchCategory
0,2141386,9bde22e6-55ad-4e3e-a6af-d1b4d2377d98,60821413860025,0.212792,2024-08-26 18:08:54,2024-08-26,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
1,2387675,56a43bee-478d-4f1e-959b-465e911f0326,60823876750027,0.406685,2024-08-08 12:24:40,2024-08-08,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
2,2365767,c11ce271-cedc-42ee-bad6-1529a452600c,60823657670022,0.333178,2024-08-17 14:47:11,2024-08-18,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
3,2374597,a0e9ae23-af69-4b63-afc4-c93011bb1dd1,60823745970023,0.466367,2024-08-02 10:22:46,2024-08-02,2024-08,Train,1,1,SIL-Instore,v2,Trench 3
4,2431954,f49ec5ca-7df6-4b6c-8450-5c6aebc95021,60824319540028,0.379059,2024-08-24 10:21:35,2024-08-24,2024-08,Train,0,1,SIL-Instore,v2,Trench 3


In [1284]:
df2 = dfd.copy()

In [1285]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8404 entries, 0 to 8403
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             8404 non-null   Int64         
 1   digitalLoanAccountId   8404 non-null   object        
 2   loanAccountNumber      8404 non-null   object        
 3   sil_beta_demo_score    8404 non-null   float64       
 4   appln_submit_datetime  8404 non-null   datetime64[us]
 5   disbursementdate       8404 non-null   dbdate        
 6   Application_month      8404 non-null   object        
 7   Data_selection         8404 non-null   object        
 8   deffspd30              8404 non-null   Int64         
 9   flg_mature_fspd_30     8404 non-null   Int64         
 10  new_loan_type          8404 non-null   object        
 11  modelVersionId         8404 non-null   object        
 12  trenchCategory         8404 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [1286]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_demo_score'] = pd.to_numeric(df_concat['sil_beta_demo_score'], errors='coerce')

In [1287]:
# df_concat.to_csv(r"sil_beta_demo_scorefspd30.csv")

In [1288]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_demo_score', 'deffspd30', 'FSPD30')
# gini_results = calculate_periodic_gini_producttype(
#     df_concat, 
#     'sil_beta_demo_score', 
#     'deffspd30', 
#     'FSPD30',
#     product_column='new_loan_type'
# )                                                             

gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_demo_score', 
    'deffspd30', 
    'FSPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [1289]:
f3 = gini_results.copy()

In [1290]:
f3.head(10)

Unnamed: 0,start_date,end_date,sil_beta_demo_score_FSPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-07-29,2024-08-04,0.622276,Week,sil_beta_demo_score,v2,FSPD30,Overall,Trench 3
1,2024-08-01,2024-08-31,0.295172,Month,sil_beta_demo_score,v2,FSPD30,Overall,Trench 3
2,2024-08-05,2024-08-11,0.406452,Week,sil_beta_demo_score,v2,FSPD30,Overall,Trench 3
3,2024-08-12,2024-08-18,-0.090909,Week,sil_beta_demo_score,v2,FSPD30,Overall,Trench 3
4,2024-08-19,2024-08-25,0.496599,Week,sil_beta_demo_score,v2,FSPD30,Overall,Trench 3
5,2024-08-26,2024-09-01,0.264556,Week,sil_beta_demo_score,v2,FSPD30,Overall,Trench 3
6,2024-09-01,2024-09-30,0.437868,Month,sil_beta_demo_score,v2,FSPD30,Overall,Trench 3
7,2024-09-02,2024-09-08,0.383929,Week,sil_beta_demo_score,v2,FSPD30,Overall,Trench 3
8,2024-09-09,2024-09-15,0.588235,Week,sil_beta_demo_score,v2,FSPD30,Overall,Trench 3
9,2024-09-16,2024-09-22,0.274074,Week,sil_beta_demo_score,v2,FSPD30,Overall,Trench 3


In [1291]:
f30 = f3[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_demo_score_FSPD30_gini']].copy()
f30.rename(columns={'sil_beta_demo_score_FSPD30_gini':'FSPD30'}, inplace = True)
f30['category'] = 'sil_beta_demo_score_FSPD30_v2_t3'
f30.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                          loan_type       version  trench_category
sil_beta_demo_score_FSPD30_v2_t3  Overall         v2       Trench 3           73
                                  SIL Competitor  v2       Trench 3           56
                                  SIL ZERO        v2       Trench 3           70
                                  SIL-Instore     v2       Trench 3           73
dtype: int64

## FSTPD30

## Test

In [1292]:
sq = """WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v2'
  ), 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score, 
  modelVersionId, trenchCategory
  from cleaned
  ),
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fstpd_30 = 1
  )
  select * from base where trenchCategory = 'Trench 3'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()


Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory


In [1293]:
df1 = dfd.copy()

## Train

In [1294]:
sq = """ 
WITH cleaned AS (
  SELECT
  customerId,digitalLoanAccountId,prediction,start_time,end_time,modelDisplayName,modelVersionId,
  case when trenchCategory is null then 'ALL'
 when trenchCategory = '' then 'ALL'
 else trenchCategory end as trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeature_cleaned
  FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
  WHERE modelDisplayName in ('Beta - DemoScoreModel', 'beta_demo_model_sil')
  and modelVersionId = 'v2'
  ),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction sil_beta_demo_score,
  modelVersionId, trenchCategory
  from cleaned
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data),
base as 
  (select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.sil_beta_demo_score,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
 modelVersionId, trenchCategory
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.sil_beta_demo_score is not null
  and del.flg_mature_fstpd_30 = 1
  )
  select * from base where trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,sil_beta_demo_score,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type,modelVersionId,trenchCategory
0,2307909,57a7640a-06ae-477e-8cf7-054a5dcef032,60823079090026,0.399219,2024-08-10 11:46:52,2024-08-10,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
1,2447566,62ce527c-562b-4304-960b-9b9bcaed9d9d,60824475660029,0.530679,2024-08-29 15:32:09,2024-08-29,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
2,2274175,f735febb-886b-4d33-898a-fa085fa55383,60822741750029,0.323333,2024-08-04 13:12:06,2024-08-04,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
3,2216861,570b4235-3218-4628-b82a-405110797fee,60822168610022,0.391103,2024-08-01 14:10:57,2024-08-01,2024-08,Train,0,1,SIL-Instore,v2,Trench 3
4,2240673,3f0be3fe-a2d1-4f90-a9a7-9a0b010b793a,60822406730021,0.188793,2024-08-05 09:38:24,2024-08-05,2024-08,Train,0,1,SIL-Instore,v2,Trench 3


In [1295]:
df2 = dfd.copy()

In [1296]:
df_concat = pd.concat([df1, df2], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7838 entries, 0 to 7837
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             7838 non-null   Int64         
 1   digitalLoanAccountId   7838 non-null   object        
 2   loanAccountNumber      7838 non-null   object        
 3   sil_beta_demo_score    7838 non-null   float64       
 4   appln_submit_datetime  7838 non-null   datetime64[us]
 5   disbursementdate       7838 non-null   dbdate        
 6   Application_month      7838 non-null   object        
 7   Data_selection         7838 non-null   object        
 8   deffstpd30             7838 non-null   Int64         
 9   flg_mature_fstpd_30    7838 non-null   Int64         
 10  new_loan_type          7838 non-null   object        
 11  modelVersionId         7838 non-null   object        
 12  trenchCategory         7838 non-null   object        
dtypes: 

  df_concat = pd.concat([df1, df2], ignore_index=True)


In [1297]:
df_concat['Application_month'].value_counts(dropna=False).sort_index()
df_concat['sil_beta_demo_score'] = pd.to_numeric(df_concat['sil_beta_demo_score'], errors='coerce')

In [1298]:
# df_concat.to_csv(r"sil_beta_demo_scorefstpd30.csv")

In [1299]:
# gini_results = calculate_periodic_gini(df_concat, 'sil_beta_demo_score', 'deffstpd30', 'FSTPD30')
gini_results = calculate_periodic_gini_prod_ver_trench(
    df_concat, 
    'sil_beta_demo_score', 
    'deffstpd30', 
    'FSTPD30',
    model_version_column='modelVersionId',
    trench_column='trenchCategory',
    product_column='new_loan_type'
)

In [1300]:
f4 = gini_results.copy()

In [1301]:
f4.head(10)

Unnamed: 0,start_date,end_date,sil_beta_demo_score_FSTPD30_gini,period,Model_Name,version,bad_rate,loan_type,trench_category
0,2024-07-29,2024-08-04,0.532164,Week,sil_beta_demo_score,v2,FSTPD30,Overall,Trench 3
1,2024-08-01,2024-08-31,0.203736,Month,sil_beta_demo_score,v2,FSTPD30,Overall,Trench 3
2,2024-08-05,2024-08-11,0.196232,Week,sil_beta_demo_score,v2,FSTPD30,Overall,Trench 3
3,2024-08-12,2024-08-18,0.015094,Week,sil_beta_demo_score,v2,FSTPD30,Overall,Trench 3
4,2024-08-19,2024-08-25,0.137097,Week,sil_beta_demo_score,v2,FSTPD30,Overall,Trench 3
5,2024-08-26,2024-09-01,0.248106,Week,sil_beta_demo_score,v2,FSTPD30,Overall,Trench 3
6,2024-09-01,2024-09-30,0.228306,Month,sil_beta_demo_score,v2,FSTPD30,Overall,Trench 3
7,2024-09-02,2024-09-08,0.187805,Week,sil_beta_demo_score,v2,FSTPD30,Overall,Trench 3
8,2024-09-09,2024-09-15,0.390313,Week,sil_beta_demo_score,v2,FSTPD30,Overall,Trench 3
9,2024-09-16,2024-09-22,0.111111,Week,sil_beta_demo_score,v2,FSTPD30,Overall,Trench 3


In [1302]:
f40 = f4[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'loan_type', 'trench_category', 'sil_beta_demo_score_FSTPD30_gini']].copy()
f40.rename(columns={'sil_beta_demo_score_FSTPD30_gini':'FSTPD30'}, inplace = True)
f40['category'] = 'sil_beta_demo_score_FSTPD30_v2_t3'
f40.groupby(['category','loan_type', 'version', 'trench_category']).size()

category                           loan_type       version  trench_category
sil_beta_demo_score_FSTPD30_v2_t3  Overall         v2       Trench 3           67
                                   SIL Competitor  v2       Trench 3           50
                                   SIL ZERO        v2       Trench 3           65
                                   SIL-Instore     v2       Trench 3           67
dtype: int64

## combining the dataframe

In [1303]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type', 'bad_rate', 'trench_category']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'sil_beta_demo_score_FPD0_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'trench_category', 'sil_beta_demo_score_FPD10_gini',
       'sil_beta_demo_score_FPD30_gini',
       'sil_beta_demo_score_FSPD30_gini',
       'sil_beta_demo_score_FSTPD30_gini'], dtype=object)

In [1304]:
final_df.rename(columns={'trench_category':'Trench_category',
'sil_beta_demo_score_FPD0_gini':'sil_beta_demo_score_FPD0_v2_t3_gini',
'sil_beta_demo_score_FPD10_gini':'sil_beta_demo_score_FPD10_v2_t3_gini',
'sil_beta_demo_score_FPD30_gini':'sil_beta_demo_score_FPD30_v2_t3_gini',
'sil_beta_demo_score_FSPD30_gini':'sil_beta_demo_score_FSPD30_v2_t3_gini', 
'sil_beta_demo_score_FSTPD30_gini':'sil_beta_demo_score_FSTPD30_v2_t3_gini'
}, inplace=True)
final_df.columns

Index(['start_date', 'end_date', 'sil_beta_demo_score_FPD0_v2_t3_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'Trench_category', 'sil_beta_demo_score_FPD10_v2_t3_gini',
       'sil_beta_demo_score_FPD30_v2_t3_gini',
       'sil_beta_demo_score_FSPD30_v2_t3_gini',
       'sil_beta_demo_score_FSTPD30_v2_t3_gini'],
      dtype='object')

In [1305]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'loan_type', 'bad_rate', 'Trench_category'
,'sil_beta_demo_score_FPD0_v2_t3_gini','sil_beta_demo_score_FPD10_v2_t3_gini', 'sil_beta_demo_score_FPD30_v2_t3_gini',
       'sil_beta_demo_score_FSPD30_v2_t3_gini',
       'sil_beta_demo_score_FSTPD30_v2_t3_gini']].copy()
final_df['Model_display_name'] = 'beta_demo_model_sil'
final_df['Product_type'] = 'SIL'
final_df.dtypes

start_date                                datetime64[ns]
end_date                                  datetime64[ns]
period                                            object
Model_Name                                        object
version                                           object
loan_type                                         object
bad_rate                                          object
Trench_category                                   object
sil_beta_demo_score_FPD0_v2_t3_gini              float64
sil_beta_demo_score_FPD10_v2_t3_gini             float64
sil_beta_demo_score_FPD30_v2_t3_gini             float64
sil_beta_demo_score_FSPD30_v2_t3_gini            float64
sil_beta_demo_score_FSTPD30_v2_t3_gini           float64
Model_display_name                                object
Product_type                                      object
dtype: object

In [1306]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,loan_type,bad_rate,Trench_category,sil_beta_demo_score_FPD0_v2_t3_gini,sil_beta_demo_score_FPD10_v2_t3_gini,sil_beta_demo_score_FPD30_v2_t3_gini,sil_beta_demo_score_FSPD30_v2_t3_gini,sil_beta_demo_score_FSTPD30_v2_t3_gini,Model_display_name,Product_type
0,2024-07-29,2024-08-04,Week,sil_beta_demo_score,v2,Overall,FPD0,Trench 3,0.335714,,,,,beta_demo_model_sil,SIL
1,2024-08-01,2024-08-31,Month,sil_beta_demo_score,v2,Overall,FPD0,Trench 3,0.067159,,,,,beta_demo_model_sil,SIL
2,2024-08-05,2024-08-11,Week,sil_beta_demo_score,v2,Overall,FPD0,Trench 3,0.047727,,,,,beta_demo_model_sil,SIL
3,2024-08-12,2024-08-18,Week,sil_beta_demo_score,v2,Overall,FPD0,Trench 3,-0.151261,,,,,beta_demo_model_sil,SIL
4,2024-08-19,2024-08-25,Week,sil_beta_demo_score,v2,Overall,FPD0,Trench 3,-0.027356,,,,,beta_demo_model_sil,SIL


In [1307]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.beta_demo_model_sil_v2_t3_gini_v1"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=e9e3ec3e-c9f3-4421-b711-61e42b32525e>

In [1308]:
import functools

dataframes = [f01, f10, f20, f30, f40]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type','trench_category', 'bad_rate', 'category']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

demoscoresilv2t3 = functools.reduce(merge_dataframes, dataframes)

demoscoresilv2t3.columns.values

array(['start_date', 'end_date', 'period', 'Model_Name', 'version',
       'bad_rate', 'loan_type', 'trench_category', 'FPD0', 'category',
       'FPD10', 'FPD30', 'FSPD30', 'FSTPD30'], dtype=object)

In [1309]:
result = pd.concat([cicsilscorev1all, 
                    cicsilscorev2t1, 
                    cicsilscorev2t2, 
                    cicsilscorev2t3, 
                    alphastacksilv1all,
                    alphastacksilv2t1,
                    alphastacksilv2t2,
                    alphastacksilv2t3,
                    betastacksilv1all,
                    betastacksilv2t1,
                    betastacksilv2t2,
                    betastacksilv2t3,
                    appscoresilv1all,
                    appscoresilv2t1,
                    appscoresilv2t2,
                    appscoresilv2t3,
                    demoscoresilv1all,
                    demoscoresilv2t1,
                    demoscoresilv2t3,
                    ], ignore_index=True)
print(f"The shape of the concatenated dataframe result is:\t {result.shape}")
result.head()

The shape of the concatenated dataframe result is:	 (31107, 14)


Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


In [1310]:
result[['Model_Name', 'version', 'category']].value_counts(dropna=False).sort_index(ascending = False)

Model_Name             version  category                            
sil_beta_stack_score   v2       beta_stack_model_sil_fstpd30_v2_t3      249
                                beta_stack_model_sil_fstpd30_v2_t2      244
                                beta_stack_model_sil_fstpd30_v2_t1      251
                                beta_stack_model_sil_fspd30_v2_t3       272
                                beta_stack_model_sil_fspd30_v2_t2       264
                                beta_stack_model_sil_fspd30_v2_t1       275
                                beta_stack_model_sil_fpd30_v2_t3        292
                                beta_stack_model_sil_fpd30_v2_t2        285
                                beta_stack_model_sil_fpd30_v2_t1        295
                                beta_stack_model_sil_fpd10_v2_t3        296
                                beta_stack_model_sil_fpd10_v2_t2        292
                                beta_stack_model_sil_fpd10_v2_t1        301
                   

In [1311]:
result['Model_Name'].value_counts(dropna=False)

Model_Name
sil_beta_stack_score     6649
sil_beta_app_score       6565
Sil_Alpha_Stack_score    6399
Alpha_cic_sil_score      6331
sil_beta_demo_score      5163
Name: count, dtype: int64

In [1312]:
masterdf = result.copy()
print(f"The shape of masterdf is:\t {masterdf.shape}")
masterdf.head()

The shape of masterdf is:	 (31107, 14)


Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,loan_type,trench_category,FPD0,category,FPD10,FPD30,FSPD30,FSTPD30
0,2023-01-01,2023-01-31,Month,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.205187,cic_sil_score_fpd0_v1_all,,,,
1,2023-01-09,2023-01-15,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.226648,cic_sil_score_fpd0_v1_all,,,,
2,2023-01-16,2023-01-22,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.375,cic_sil_score_fpd0_v1_all,,,,
3,2023-01-23,2023-01-29,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.110811,cic_sil_score_fpd0_v1_all,,,,
4,2023-01-30,2023-02-05,Week,Alpha_cic_sil_score,v1,FPD0,Overall,ALL,0.076412,cic_sil_score_fpd0_v1_all,,,,


# Cash

# Alpha-Cash-CIC-Model

## Trench 1

## FPD0

## Test

In [1313]:
sq = r"""
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
and modelVersionId = 'v1'
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
--REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aCicScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type
0,3798319,12773d68-9fcd-4b43-aab2-9c93b7385ed4,60837983190012,0.4487196246566582,Trench 1,2025-11-08 09:03:15,2025-11-08,2025-11,Test,1,1,Quick
1,3797977,eb68c531-a006-40c8-b7bd-1dacd227c81c,60837979770016,0.5112243246311323,Trench 1,2025-11-08 11:25:33,2025-11-08,2025-11,Test,0,1,Quick
2,3733169,4377218c-4ed8-4eb0-a0ee-e0af18a2267c,60837331690016,0.4274363928135837,Trench 1,2025-11-08 15:06:53,2025-11-08,2025-11,Test,0,1,Quick
3,3799065,213dd1da-2882-4e03-83e5-716bcb306058,60837990650013,0.4571110175229723,Trench 1,2025-11-08 15:05:31,2025-11-08,2025-11,Test,0,1,Quick
4,3799399,df45f96b-1e68-41a6-be55-2ca00cdd45d5,60837993990012,0.6071263995088564,Trench 1,2025-11-08 15:31:50,2025-11-08,2025-11,Test,0,1,Quick


In [1314]:
df1 = dfd.copy()

## Train

In [1315]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aCicScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type
0,2841091,28cd2859-9363-4838-9e73-b3bea2786f27,60828410910017,0.486808,Trench 1,2024-09-12 21:09:19,2024-09-13,2024-09,Train,1,1,Quick
1,2848494,1b46bd15-2ba4-4819-ab42-43fbdc6961cd,60828484940011,0.467935,Trench 1,2024-09-15 16:35:14,2024-09-15,2024-09,Train,0,1,Quick
2,2832393,1f748243-3dee-4e3b-a139-c57fd5065073,60828323930017,0.419965,Trench 1,2024-09-09 10:06:58,2024-09-09,2024-09,Train,0,1,Quick
3,2888226,3637c20b-03f0-4eae-9f91-ef954944bfad,60828882260012,0.494328,Trench 1,2024-09-28 10:40:34,2024-09-28,2024-09,Train,0,1,Quick
4,2836778,4c7748ba-029e-41a9-bdb7-bba247cf3220,60828367780016,0.668219,Trench 1,2024-09-11 07:41:46,2024-09-11,2024-09,Train,0,1,Quick


In [1316]:
df2 = dfd.copy()

In [1317]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18923 entries, 0 to 18922
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             18923 non-null  object        
 1   digitalLoanAccountId   18923 non-null  object        
 2   loanAccountNumber      18923 non-null  object        
 3   aCicScore              18923 non-null  object        
 4   trenchCategory         18923 non-null  object        
 5   appln_submit_datetime  18923 non-null  datetime64[us]
 6   disbursementdate       18923 non-null  dbdate        
 7   Application_month      18923 non-null  object        
 8   Data_selection         18923 non-null  object        
 9   deffpd0                18923 non-null  Int64         
 10  flg_mature_fpd0        18923 non-null  Int64         
 11  new_loan_type          18923 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(8)
memory 

In [1318]:
df_concat['aCicScore'] = pd.to_numeric(df_concat['aCicScore'], errors='coerce')

In [1319]:
# df_concat.to_csv(r"aCicScorefpd0.csv")

In [1320]:
# gini_results = calculate_periodic_gini(df_concat, 'aCicScore', 'deffpd0', 'FPD0')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'aCicScore', 
    'deffpd0', 
    'FPD0',
    product_column='new_loan_type'
)

In [1321]:
f0 = gini_results.copy()
f0.head()

Unnamed: 0,start_date,end_date,aCicScore_FPD0_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-08-26,2024-09-01,-0.17094,Week,aCicScore,1.1.0,FPD0,Overall
1,2024-09-01,2024-09-30,0.217332,Month,aCicScore,1.1.0,FPD0,Overall
2,2024-09-02,2024-09-08,0.18755,Week,aCicScore,1.1.0,FPD0,Overall
3,2024-09-09,2024-09-15,0.286166,Week,aCicScore,1.1.0,FPD0,Overall
4,2024-09-16,2024-09-22,0.271593,Week,aCicScore,1.1.0,FPD0,Overall


In [1322]:
f0['loan_type'].value_counts()

loan_type
Overall    79
Quick      79
Name: count, dtype: int64

## FPD10

## Test

In [1323]:
sq = r"""
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
and modelVersionId = 'v1'
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
--REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aCicScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
    loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type
0,3717801,33965df9-ab2a-4ee9-bafa-47d6c6c35a1f,60837178010013,0.4841089394467445,Trench 1,2025-10-02 14:58:23,2025-10-09,2025-10,Test,1,1,Quick
1,3718881,7c1dead3-748f-4299-a825-7e76c7d10c81,60837188810013,0.411287055313931,Trench 1,2025-10-02 15:31:34,2025-10-02,2025-10,Test,0,1,Quick
2,3719077,70b0e190-3125-4e0a-acab-ce37fa7ea02a,60837190770017,0.4541537426360085,Trench 1,2025-10-02 16:31:08,2025-10-02,2025-10,Test,0,1,Quick
3,3719554,2d648aa0-e65e-4b23-9547-a1c3fcb1f740,60837195540015,0.4389477231871976,Trench 1,2025-10-02 20:04:28,2025-10-02,2025-10,Test,0,1,Quick
4,3719610,4625afe9-909b-49fb-b373-307600575bc0,60837196100016,0.5500553489622988,Trench 1,2025-10-02 20:43:28,2025-10-07,2025-10,Test,1,1,Quick


In [1324]:
df1 = dfd.copy()

## Train

In [1325]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aCicScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type
0,2823628,13487bcc-35bf-4edf-866a-9180dfbf7a9d,60828236280016,0.48659,Trench 1,2024-09-05 13:51:35,2024-09-05,2024-09,Train,0,1,Quick
1,2816431,b70fdd85-51d3-40b4-8062-4b7800ce21b9,60828164310012,0.735493,Trench 1,2024-09-02 12:44:21,2024-09-02,2024-09,Train,1,1,Quick
2,2846247,143e38da-2ff5-4f03-9c27-a839486fdb57,60828462470019,0.604074,Trench 1,2024-09-18 05:18:44,2024-09-19,2024-09,Train,0,1,Quick
3,2819814,d8cbe09a-af6c-4ec1-a62b-d69d80b1b3f4,60828198140015,0.466235,Trench 1,2024-09-03 15:10:17,2024-09-03,2024-09,Train,0,1,Quick
4,2849413,585163b0-6668-4ea3-a157-f88fef68f9ef,60828494130017,0.535448,Trench 1,2024-09-15 21:54:36,2024-09-16,2024-09,Train,0,1,Quick


In [1326]:
df2 = dfd.copy()

In [1327]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17490 entries, 0 to 17489
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             17490 non-null  object        
 1   digitalLoanAccountId   17490 non-null  object        
 2   loanAccountNumber      17490 non-null  object        
 3   aCicScore              17490 non-null  object        
 4   trenchCategory         17490 non-null  object        
 5   appln_submit_datetime  17490 non-null  datetime64[us]
 6   disbursementdate       17490 non-null  dbdate        
 7   Application_month      17490 non-null  object        
 8   Data_selection         17490 non-null  object        
 9   deffpd10               17490 non-null  Int64         
 10  flg_mature_fpd10       17490 non-null  Int64         
 11  new_loan_type          17490 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(8)
memory 

In [1328]:
df_concat['aCicScore'] = pd.to_numeric(df_concat['aCicScore'], errors='coerce')

In [1329]:
# df_concat.to_csv(r"aCicScorefpd10.csv")

In [1330]:
# gini_results = calculate_periodic_gini(df_concat, 'aCicScore', 'deffpd10', 'FPD10')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'aCicScore', 
    'deffpd10', 
    'FPD10',
    product_column='new_loan_type'
)

In [1331]:
f1 = gini_results.copy()
f1.head()

Unnamed: 0,start_date,end_date,aCicScore_FPD10_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-08-26,2024-09-01,0.036585,Week,aCicScore,1.1.0,FPD10,Overall
1,2024-09-01,2024-09-30,0.236056,Month,aCicScore,1.1.0,FPD10,Overall
2,2024-09-02,2024-09-08,0.183358,Week,aCicScore,1.1.0,FPD10,Overall
3,2024-09-09,2024-09-15,0.334879,Week,aCicScore,1.1.0,FPD10,Overall
4,2024-09-16,2024-09-22,0.278113,Week,aCicScore,1.1.0,FPD10,Overall


## FPD30

## Test

In [1332]:
sq = r"""
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
and modelVersionId = 'v1'
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
--REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aCicScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type
0,3713545,3d4efb65-52d5-4ff2-8740-8f499b6491c7,60837135450011,0.4424019440049859,Trench 1,2025-09-29 19:36:21,2025-09-30,2025-09,Test,0,1,Quick
1,3711150,2ff5369d-dfcb-4d1a-8c33-d1c5997e5f9b,60837111500017,0.4188424801220896,Trench 1,2025-10-08 10:52:09,2025-10-08,2025-10,Test,0,1,Quick
2,3730759,e268d2a0-90e3-4293-bdd6-a67346cca12f,60837307590011,0.3661832061136901,Trench 1,2025-10-08 09:18:43,2025-10-08,2025-10,Test,0,1,Quick
3,3730944,acabba1e-4470-41d0-b943-fd46b34617af,60837309440027,0.4797426329688257,Trench 1,2025-10-08 11:04:29,2025-10-08,2025-10,Test,0,1,Quick
4,3731216,2bed3fa3-7f38-4619-b83d-45b855ccfe70,60837312160018,0.4588800216754293,Trench 1,2025-10-08 13:25:23,2025-10-08,2025-10,Test,1,1,Quick


In [1333]:
df1 = dfd.copy()

## Train

In [1334]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aCicScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type
0,2841091,28cd2859-9363-4838-9e73-b3bea2786f27,60828410910017,0.486808,Trench 1,2024-09-12 21:09:19,2024-09-13,2024-09,Train,1,1,Quick
1,2848494,1b46bd15-2ba4-4819-ab42-43fbdc6961cd,60828484940011,0.467935,Trench 1,2024-09-15 16:35:14,2024-09-15,2024-09,Train,0,1,Quick
2,2832393,1f748243-3dee-4e3b-a139-c57fd5065073,60828323930017,0.419965,Trench 1,2024-09-09 10:06:58,2024-09-09,2024-09,Train,0,1,Quick
3,2888226,3637c20b-03f0-4eae-9f91-ef954944bfad,60828882260012,0.494328,Trench 1,2024-09-28 10:40:34,2024-09-28,2024-09,Train,0,1,Quick
4,2836778,4c7748ba-029e-41a9-bdb7-bba247cf3220,60828367780016,0.668219,Trench 1,2024-09-11 07:41:46,2024-09-11,2024-09,Train,0,1,Quick


In [1335]:
df2 = dfd.copy()

In [1336]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16532 entries, 0 to 16531
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             16532 non-null  object        
 1   digitalLoanAccountId   16532 non-null  object        
 2   loanAccountNumber      16532 non-null  object        
 3   aCicScore              16532 non-null  object        
 4   trenchCategory         16532 non-null  object        
 5   appln_submit_datetime  16532 non-null  datetime64[us]
 6   disbursementdate       16532 non-null  dbdate        
 7   Application_month      16532 non-null  object        
 8   Data_selection         16532 non-null  object        
 9   deffpd30               16532 non-null  Int64         
 10  flg_mature_fpd30       16532 non-null  Int64         
 11  new_loan_type          16532 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(8)
memory 

In [1337]:
df_concat['aCicScore'] = pd.to_numeric(df_concat['aCicScore'], errors='coerce')

In [1338]:
# df_concat.to_csv(r"aCicScorefpd30.csv")

In [1339]:
# gini_results = calculate_periodic_gini(df_concat, 'aCicScore', 'deffpd30', 'FPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'aCicScore', 
    'deffpd30', 
    'FPD30',
    product_column='new_loan_type'
)

In [1340]:
f2 = gini_results.copy()
f2.head()

Unnamed: 0,start_date,end_date,aCicScore_FPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-08-26,2024-09-01,0.036585,Week,aCicScore,1.1.0,FPD30,Overall
1,2024-09-01,2024-09-30,0.265266,Month,aCicScore,1.1.0,FPD30,Overall
2,2024-09-02,2024-09-08,0.24121,Week,aCicScore,1.1.0,FPD30,Overall
3,2024-09-09,2024-09-15,0.338013,Week,aCicScore,1.1.0,FPD30,Overall
4,2024-09-16,2024-09-22,0.2925,Week,aCicScore,1.1.0,FPD30,Overall


## FSPD30

## Test

In [1341]:
sq = r"""
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
and modelVersionId = 'v1'
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
--REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aCicScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fpd30,new_loan_type
0,3705340,026a6ada-efc6-4e5e-968a-0d37e382d9ad,60837053400012,0.439280357053198,Trench 1,2025-09-26 14:04:27,2025-09-26,2025-09,Test,0,1,Quick
1,3705891,ec2bf9f2-ee68-44de-9f2e-9690f53ec824,60837058910018,0.444046831474933,Trench 1,2025-09-26 14:01:57,2025-10-02,2025-09,Test,0,1,Quick
2,3706096,98118b0f-5bae-4544-8a77-f77404a58b50,60837060960013,0.4359886495899309,Trench 1,2025-09-26 17:13:14,2025-09-27,2025-09,Test,0,1,Quick
4,3704941,8c979bb5-6c9f-490a-b0da-ee86863eebc4,60837049410017,0.4581197085922175,Trench 1,2025-09-25 19:31:42,2025-09-26,2025-09,Test,0,1,Quick
6,3716231,64b86ec3-a037-4e33-bf27-c0f2603ab1b8,60837162310017,0.557901400349205,Trench 1,2025-10-01 10:49:31,2025-10-01,2025-10,Test,0,1,Quick


In [1342]:
df1 = dfd.copy()

## Train

In [1343]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aCicScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type
0,2869554,43c3f197-bb3b-475a-8436-7c2144510677,60828695540019,0.489558,Trench 1,2024-09-22 12:56:49,2024-09-22,2024-09,Train,0,1,Quick
1,2869737,5f571fc7-282c-4b52-b7c1-ac9a25d189c8,60828697370014,0.403478,Trench 1,2024-09-22 13:51:42,2024-09-22,2024-09,Train,1,1,Quick
2,2875240,1d277875-fc3e-416a-ae7f-0ea19f3cb510,60828752400013,0.466261,Trench 1,2024-09-24 18:04:43,2024-09-25,2024-09,Train,0,1,Quick
3,2864713,d60df532-3f84-4ed4-a36a-8fdf2bdde342,60828647130016,0.304707,Trench 1,2024-09-20 23:51:10,2024-09-21,2024-09,Train,0,1,Quick
4,2840711,04229c47-359d-4694-863c-7693599691cc,60828407110018,0.491717,Trench 1,2024-09-17 14:17:18,2024-09-17,2024-09,Train,0,1,Quick


In [1344]:
df2 = dfd.copy()

In [1345]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15606 entries, 0 to 15605
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             15606 non-null  object        
 1   digitalLoanAccountId   15606 non-null  object        
 2   loanAccountNumber      15606 non-null  object        
 3   aCicScore              15606 non-null  object        
 4   trenchCategory         15606 non-null  object        
 5   appln_submit_datetime  15606 non-null  datetime64[us]
 6   disbursementdate       15606 non-null  dbdate        
 7   Application_month      15606 non-null  object        
 8   Data_selection         15606 non-null  object        
 9   deffspd30              15606 non-null  Int64         
 10  flg_mature_fspd_30     15564 non-null  Int64         
 11  new_loan_type          15606 non-null  object        
 12  flg_mature_fpd30       42 non-null     Int64         
dtypes

In [1346]:
df_concat['aCicScore'] = pd.to_numeric(df_concat['aCicScore'], errors='coerce')

In [1347]:
# df_concat.to_csv(r"aCicScorefspd30.csv")

In [1348]:
# gini_results = calculate_periodic_gini(df_concat, 'aCicScore', 'deffspd30', 'FSPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'aCicScore', 
    'deffspd30', 
    'FSPD30',
    product_column='new_loan_type'
)

In [1349]:
f3 = gini_results.copy()
f3.head()

Unnamed: 0,start_date,end_date,aCicScore_FSPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-08-26,2024-09-01,0.182432,Week,aCicScore,1.1.0,FSPD30,Overall
1,2024-09-01,2024-09-30,0.254053,Month,aCicScore,1.1.0,FSPD30,Overall
2,2024-09-02,2024-09-08,0.29238,Week,aCicScore,1.1.0,FSPD30,Overall
3,2024-09-09,2024-09-15,0.265693,Week,aCicScore,1.1.0,FSPD30,Overall
4,2024-09-16,2024-09-22,0.260664,Week,aCicScore,1.1.0,FSPD30,Overall


## FSTPD30

## Test

In [1350]:
sq = r"""
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
and modelVersionId = 'v1'
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
--REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aCicScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type


In [1351]:
df1 = dfd.copy()

## Train

In [1352]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aCicScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type
0,2841091,28cd2859-9363-4838-9e73-b3bea2786f27,60828410910017,0.486808,Trench 1,2024-09-12 21:09:19,2024-09-13,2024-09,Train,1,1,Quick
1,2848494,1b46bd15-2ba4-4819-ab42-43fbdc6961cd,60828484940011,0.467935,Trench 1,2024-09-15 16:35:14,2024-09-15,2024-09,Train,1,1,Quick
2,2832393,1f748243-3dee-4e3b-a139-c57fd5065073,60828323930017,0.419965,Trench 1,2024-09-09 10:06:58,2024-09-09,2024-09,Train,0,1,Quick
3,2888226,3637c20b-03f0-4eae-9f91-ef954944bfad,60828882260012,0.494328,Trench 1,2024-09-28 10:40:34,2024-09-28,2024-09,Train,0,1,Quick
4,2836778,4c7748ba-029e-41a9-bdb7-bba247cf3220,60828367780016,0.668219,Trench 1,2024-09-11 07:41:46,2024-09-11,2024-09,Train,0,1,Quick


In [1353]:
df2 = dfd.copy()

In [1354]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14375 entries, 0 to 14374
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             14375 non-null  Int64         
 1   digitalLoanAccountId   14375 non-null  object        
 2   loanAccountNumber      14375 non-null  object        
 3   aCicScore              14375 non-null  float64       
 4   trenchCategory         14375 non-null  object        
 5   appln_submit_datetime  14375 non-null  datetime64[us]
 6   disbursementdate       14375 non-null  dbdate        
 7   Application_month      14375 non-null  object        
 8   Data_selection         14375 non-null  object        
 9   deffstpd30             14375 non-null  Int64         
 10  flg_mature_fstpd_30    14375 non-null  Int64         
 11  new_loan_type          14375 non-null  object        
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), objec

  df_concat = pd.concat([df2, df1], ignore_index=True)


In [1355]:
df_concat['aCicScore'] = pd.to_numeric(df_concat['aCicScore'], errors='coerce')

In [1356]:
# df_concat.to_csv(r"aCicScorefstpd30.csv")

In [1357]:
# gini_results = calculate_periodic_gini(df_concat, 'aCicScore', 'deffstpd30', 'FSTPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'aCicScore', 
    'deffstpd30', 
    'FSTPD30',
    product_column='new_loan_type'
)

In [1358]:
f4 = gini_results.copy()
f4.head()

Unnamed: 0,start_date,end_date,aCicScore_FSTPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-08-26,2024-09-01,0.28877,Week,aCicScore,1.1.0,FSTPD30,Overall
1,2024-09-01,2024-09-30,0.254391,Month,aCicScore,1.1.0,FSTPD30,Overall
2,2024-09-02,2024-09-08,0.311464,Week,aCicScore,1.1.0,FSTPD30,Overall
3,2024-09-09,2024-09-15,0.243421,Week,aCicScore,1.1.0,FSTPD30,Overall
4,2024-09-16,2024-09-22,0.28589,Week,aCicScore,1.1.0,FSTPD30,Overall


## combining the dataframe

In [1359]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'aCicScore_FPD0_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'loan_type',
       'aCicScore_FPD10_gini', 'aCicScore_FPD30_gini',
       'aCicScore_FSPD30_gini', 'aCicScore_FSTPD30_gini'], dtype=object)

In [1360]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version','loan_type', 'bad_rate','aCicScore_FPD0_gini','aCicScore_FPD10_gini',
       'aCicScore_FPD30_gini', 'aCicScore_FSPD30_gini',
       'aCicScore_FSTPD30_gini']].copy()
final_df['Model_display_name'] = 'cic_model_cash'
final_df['Trench_category'] = 'Trench 1'
final_df['Product_type'] = 'CASH'
final_df.dtypes

start_date                datetime64[ns]
end_date                  datetime64[ns]
period                            object
Model_Name                        object
version                           object
loan_type                         object
bad_rate                          object
aCicScore_FPD0_gini              float64
aCicScore_FPD10_gini             float64
aCicScore_FPD30_gini             float64
aCicScore_FSPD30_gini            float64
aCicScore_FSTPD30_gini           float64
Model_display_name                object
Trench_category                   object
Product_type                      object
dtype: object

In [1361]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,loan_type,bad_rate,aCicScore_FPD0_gini,aCicScore_FPD10_gini,aCicScore_FPD30_gini,aCicScore_FSPD30_gini,aCicScore_FSTPD30_gini,Model_display_name,Trench_category,Product_type
0,2024-08-26,2024-09-01,Week,aCicScore,1.1.0,Overall,FPD0,-0.17094,,,,,cic_model_cash,Trench 1,CASH
1,2024-09-01,2024-09-30,Month,aCicScore,1.1.0,Overall,FPD0,0.217332,,,,,cic_model_cash,Trench 1,CASH
2,2024-09-02,2024-09-08,Week,aCicScore,1.1.0,Overall,FPD0,0.18755,,,,,cic_model_cash,Trench 1,CASH
3,2024-09-09,2024-09-15,Week,aCicScore,1.1.0,Overall,FPD0,0.286166,,,,,cic_model_cash,Trench 1,CASH
4,2024-09-16,2024-09-22,Week,aCicScore,1.1.0,Overall,FPD0,0.271593,,,,,cic_model_cash,Trench 1,CASH


In [1362]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.alpha_cash_cic_model_v1_gini5"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=6d5b9b48-2bbb-4838-a7c7-7b3a775c73eb>

## Trench 2

## FPD0

## Test

In [1363]:
sq = r"""
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
and modelVersionId = 'v1'
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
--REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aCicScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type
0,2201439,4e133e80-686c-4a84-820f-46fa2ba43fbc,60822014390019,0.3312969066093038,Trench 2,2025-10-15 10:10:37,2025-10-15,2025-10,Test,0,1,Quick
1,3476976,f76608cc-a9c5-41f7-9d02-f07f2f4ee025,60834769760013,0.6401060405040472,Trench 2,2025-10-15 11:01:32,2025-10-15,2025-10,Test,0,1,Quick
2,1698528,7797c0cd-4620-4065-b4e7-83feb03ac102,60816985280019,0.53544804475275,Trench 2,2025-10-15 09:56:26,2025-10-17,2025-10,Test,0,1,Quick
3,3225619,848c8c30-7524-4f0a-a58b-701784756560,60832256190014,0.4399004864192977,Trench 2,2025-10-15 13:19:08,2025-10-15,2025-10,Test,0,1,Quick
4,2690502,466b970e-a42c-4930-95f5-241ba4065268,60826905020014,0.4979958272343716,Trench 2,2025-10-15 17:24:29,2025-11-05,2025-10,Test,0,1,Quick


In [1364]:
df1 = dfd.copy()

## Train

In [1365]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aCicScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type
0,2673775,14cb4a69-8812-4ac8-9081-7be4cf5b4846,60826737750019,0.465478,Trench 2,2024-09-03 23:30:16,2024-09-04,2024-09,Train,0,1,Quick
1,2585854,8b235f56-6a43-4c99-b8f8-383c8ecd2594,60825858540014,0.590978,Trench 2,2024-09-26 08:54:17,2024-09-26,2024-09,Train,1,1,Quick
2,2385358,1fc395dc-86a6-4ec8-a73a-a8efa306edd0,60823853580011,0.717951,Trench 2,2024-09-19 20:48:54,2024-09-19,2024-09,Train,1,1,Quick
3,2409718,a42b4d99-0772-4549-9f04-0691703cdaae,60824097180017,0.464905,Trench 2,2024-09-18 15:31:44,2024-09-18,2024-09,Train,1,1,Quick
4,2564711,689008fe-2ca9-4483-be8e-3c06a799d39a,60825647110016,0.492682,Trench 2,2024-09-14 13:57:18,2024-09-14,2024-09,Train,0,1,Quick


In [1366]:
df2 = dfd.copy()

In [1367]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13337 entries, 0 to 13336
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             13337 non-null  object        
 1   digitalLoanAccountId   13337 non-null  object        
 2   loanAccountNumber      13337 non-null  object        
 3   aCicScore              13337 non-null  object        
 4   trenchCategory         13337 non-null  object        
 5   appln_submit_datetime  13337 non-null  datetime64[us]
 6   disbursementdate       13337 non-null  dbdate        
 7   Application_month      13337 non-null  object        
 8   Data_selection         13337 non-null  object        
 9   deffpd0                13337 non-null  Int64         
 10  flg_mature_fpd0        13337 non-null  Int64         
 11  new_loan_type          13337 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(8)
memory 

In [1368]:
df_concat['aCicScore'] = pd.to_numeric(df_concat['aCicScore'], errors='coerce')

In [1369]:
# df_concat.to_csv(r"aCicScoretrench2fpd0.csv")

In [1370]:
# gini_results = calculate_periodic_gini(df_concat, 'aCicScore', 'deffpd0', 'FPD0')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'aCicScore', 
    'deffpd0', 
    'FPD0',
    product_column='new_loan_type'
)

In [1371]:
f0 = gini_results.copy()
f0.head()

Unnamed: 0,start_date,end_date,aCicScore_FPD0_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-08-26,2024-09-01,0.495652,Week,aCicScore,1.1.0,FPD0,Overall
1,2024-09-01,2024-09-30,0.29075,Month,aCicScore,1.1.0,FPD0,Overall
2,2024-09-02,2024-09-08,0.285824,Week,aCicScore,1.1.0,FPD0,Overall
3,2024-09-09,2024-09-15,0.301728,Week,aCicScore,1.1.0,FPD0,Overall
4,2024-09-16,2024-09-22,0.224814,Week,aCicScore,1.1.0,FPD0,Overall


## FPD10

## Test

In [1372]:
sq = r"""
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
and modelVersionId = 'v1'
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
--REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aCicScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type
0,3129817,df6278b7-1352-465f-a22d-fd7a93e92553,60831298170018,0.4960154881210718,Trench 2,2025-10-05 13:43:34,2025-10-05,2025-10,Test,0,1,Quick
1,1908767,e96ef5a1-cf4e-4ab2-b22d-e32bc455ebe0,60819087670015,0.2969885369804143,Trench 2,2025-10-05 12:53:23,2025-10-08,2025-10,Test,0,1,Quick
2,3230169,9ed41d23-400d-4654-b0a7-dbd7e514bdfa,60832301690022,0.4807114066732938,Trench 2,2025-10-05 09:38:47,2025-10-05,2025-10,Test,0,1,Quick
3,3190544,e5324fe5-34e5-4718-803c-84b1b2402b7d,60831905440011,0.3992577745696508,Trench 2,2025-10-05 12:24:42,2025-10-06,2025-10,Test,0,1,Quick
4,1079075,7b455735-e63f-4e4a-a46d-e30d6f4fea87,60810790750051,0.3654532038749399,Trench 2,2025-10-05 14:26:54,2025-10-15,2025-10,Test,0,1,Quick


In [1373]:
df1 = dfd.copy()

## Train

In [1374]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')\
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aCicScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type
0,1094222,9e0a31bd-1ac4-43bb-a93e-0218a600983e,60810942220012,0.557349,Trench 2,2024-09-23 14:23:55,2024-09-24,2024-09,Train,0,1,Quick
1,2559993,615c0271-d382-41d5-9a08-e825f3eff702,60825599930015,0.419086,Trench 2,2024-09-18 17:07:53,2024-09-18,2024-09,Train,1,1,Quick
2,2666406,cca54523-bc8b-4ceb-ac3a-c35b54a1f256,60826664060014,0.395616,Trench 2,2024-09-10 14:53:23,2024-09-11,2024-09,Train,0,1,Quick
3,2709839,b3512aa7-8291-4744-87e4-ad76becea5e8,60827098390013,0.705006,Trench 2,2024-09-03 12:29:01,2024-09-05,2024-09,Train,0,1,Quick
4,2432013,a6901bfe-de18-4019-a7ca-65ef72d6d43d,60824320130018,0.480359,Trench 2,2024-09-29 10:32:43,2024-09-29,2024-09,Train,1,1,Quick


In [1375]:
df2 = dfd.copy()

In [1376]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12790 entries, 0 to 12789
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             12790 non-null  object        
 1   digitalLoanAccountId   12790 non-null  object        
 2   loanAccountNumber      12790 non-null  object        
 3   aCicScore              12790 non-null  object        
 4   trenchCategory         12790 non-null  object        
 5   appln_submit_datetime  12790 non-null  datetime64[us]
 6   disbursementdate       12790 non-null  dbdate        
 7   Application_month      12790 non-null  object        
 8   Data_selection         12790 non-null  object        
 9   deffpd10               12790 non-null  Int64         
 10  flg_mature_fpd10       12790 non-null  Int64         
 11  new_loan_type          12790 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(8)
memory 

In [1377]:
df_concat['aCicScore'] = pd.to_numeric(df_concat['aCicScore'], errors='coerce')

In [1378]:
# df_concat.to_csv(r"aCicScoretrench2fpd10.csv")

In [1379]:
# gini_results = calculate_periodic_gini(df_concat, 'aCicScore', 'deffpd10', 'FPD10')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'aCicScore', 
    'deffpd10', 
    'FPD10',
    product_column='new_loan_type'
)

In [1380]:
f1 = gini_results.copy()
f1.head()

Unnamed: 0,start_date,end_date,aCicScore_FPD10_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-08-26,2024-09-01,0.495652,Week,aCicScore,1.1.0,FPD10,Overall
1,2024-09-01,2024-09-30,0.291819,Month,aCicScore,1.1.0,FPD10,Overall
2,2024-09-02,2024-09-08,0.298729,Week,aCicScore,1.1.0,FPD10,Overall
3,2024-09-09,2024-09-15,0.277186,Week,aCicScore,1.1.0,FPD10,Overall
4,2024-09-16,2024-09-22,0.284906,Week,aCicScore,1.1.0,FPD10,Overall


## FPD30

## Test

In [1381]:
sq = r"""
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
and modelVersionId = 'v1'
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
--REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aCicScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type
0,3177894,940a265b-e43b-4985-905f-675f8ea3787b,60831778940018,0.4908542455071541,Trench 2,2025-10-01 18:05:45,2025-10-03,2025-10,Test,0,1,Quick
1,1242524,059f2133-161e-43bd-b5fa-0192e86991ab,60812425240046,0.4379257241528742,Trench 2,2025-10-01 21:40:29,2025-10-01,2025-10,Test,0,1,Quick
2,3230169,9ed41d23-400d-4654-b0a7-dbd7e514bdfa,60832301690022,0.4807114066732938,Trench 2,2025-10-05 09:38:47,2025-10-05,2025-10,Test,0,1,Quick
3,3190544,e5324fe5-34e5-4718-803c-84b1b2402b7d,60831905440011,0.3992577745696508,Trench 2,2025-10-05 12:24:42,2025-10-06,2025-10,Test,0,1,Quick
4,3090214,8b4e0f30-db10-4bff-ba1d-fd96ffdc34b1,60830902140011,0.5541771663277403,Trench 2,2025-10-05 20:30:05,2025-10-05,2025-10,Test,0,1,Quick


In [1382]:
df1 = dfd.copy()

## Train

In [1383]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aCicScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type
0,2202480,eafb0906-9de0-4826-b34e-4e2235ab9968,60822024800014,0.460595,Trench 2,2024-09-28 08:06:47,2024-10-01,2024-09,Train,0,1,Quick
1,2113512,64235a1f-6ad6-4e60-b09b-dd0809b5d4e0,60821135120018,0.750957,Trench 2,2024-09-13 20:41:03,2024-09-14,2024-09,Train,1,1,Quick
2,1451936,57faf4f3-e7fe-4f44-8752-1941579a4918,60814519360029,0.509975,Trench 2,2024-09-17 20:09:22,2024-09-17,2024-09,Train,1,1,Quick
3,1215801,80370d10-c7a7-4d21-b798-b0c78279ce3e,60812158010011,0.519138,Trench 2,2024-09-18 15:40:10,2024-09-18,2024-09,Train,1,1,Quick
4,2425255,e991e074-1208-4dac-8e90-6802c71712ee,60824252550011,0.51695,Trench 2,2024-09-08 01:46:10,2024-09-08,2024-09,Train,1,1,Quick


In [1384]:
df2 = dfd.copy()

In [1385]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12410 entries, 0 to 12409
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             12410 non-null  object        
 1   digitalLoanAccountId   12410 non-null  object        
 2   loanAccountNumber      12410 non-null  object        
 3   aCicScore              12410 non-null  object        
 4   trenchCategory         12410 non-null  object        
 5   appln_submit_datetime  12410 non-null  datetime64[us]
 6   disbursementdate       12410 non-null  dbdate        
 7   Application_month      12410 non-null  object        
 8   Data_selection         12410 non-null  object        
 9   deffpd30               12410 non-null  Int64         
 10  flg_mature_fpd30       12410 non-null  Int64         
 11  new_loan_type          12410 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(8)
memory 

In [1386]:
df_concat['aCicScore'] = pd.to_numeric(df_concat['aCicScore'], errors='coerce')

In [1387]:
# df_concat.to_csv(r"aCicScoretrench2fpd30.csv")

In [1388]:
# gini_results = calculate_periodic_gini(df_concat, 'aCicScore', 'deffpd30', 'FPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'aCicScore', 
    'deffpd30', 
    'FPD30',
    product_column='new_loan_type'
)

In [1389]:
f2 = gini_results.copy()
f2.head()

Unnamed: 0,start_date,end_date,aCicScore_FPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-08-26,2024-09-01,0.495652,Week,aCicScore,1.1.0,FPD30,Overall
1,2024-09-01,2024-09-30,0.318568,Month,aCicScore,1.1.0,FPD30,Overall
2,2024-09-02,2024-09-08,0.291139,Week,aCicScore,1.1.0,FPD30,Overall
3,2024-09-09,2024-09-15,0.285386,Week,aCicScore,1.1.0,FPD30,Overall
4,2024-09-16,2024-09-22,0.317084,Week,aCicScore,1.1.0,FPD30,Overall


## FSPD30

## Test

In [1390]:
sq = r"""
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
and modelVersionId = 'v1'
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
--REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aCicScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fpd30,new_loan_type
0,2473116,70dedacb-577f-433e-bfbf-4d57973ff5ea,60824731160018,0.4733630958229373,Trench 2,2025-10-03 20:45:57,2025-10-03,2025-10,Test,0,1,Quick
1,3479077,628bb8f9-78bd-4a36-bd17-80d4f85852d9,60834790770019,0.430884140338313,Trench 2,2025-10-03 19:44:09,2025-10-03,2025-10,Test,0,1,Quick
2,3486362,988a25a1-676d-4f79-831c-5190216e5f8d,60834863620014,0.421151556864262,Trench 2,2025-09-27 08:38:21,2025-09-27,2025-09,Test,0,1,Quick
3,3427045,399798f3-47e7-4cf6-8c2b-11b4a0afba4e,60834270450018,0.3903972847576768,Trench 2,2025-09-27 14:40:49,2025-10-03,2025-09,Test,0,1,Quick
4,1081564,eb47dde4-59bc-49fc-a2f0-d384fa91bcf0,60810815640056,0.4873423812357773,Trench 2,2025-10-08 22:13:17,2025-10-09,2025-10,Test,0,1,Quick


In [1391]:
df1 = dfd.copy()

## Train

In [1392]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aCicScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type
0,1440049,43e2f5b0-87fd-4fd3-9328-a7c7ec69476f,60814400490028,0.62116,Trench 2,2024-09-22 09:01:03,2024-09-22,2024-09,Train,1,1,Quick
1,1885720,c7c5d016-c623-4fa0-92c7-5617d7fa9965,60818857200012,0.468707,Trench 2,2024-09-18 16:15:19,2024-09-18,2024-09,Train,1,1,Quick
2,1756094,8cc0cc6b-05e5-4ac9-a8ae-60afd78f0c23,60817560940012,0.381581,Trench 2,2024-09-05 17:15:31,2024-09-05,2024-09,Train,0,1,Quick
3,2729762,0724524a-5a77-4e93-afc2-fc6402947f0a,60827297620019,0.575914,Trench 2,2024-09-11 13:51:06,2024-09-22,2024-09,Train,1,1,Quick
4,2162269,de0b2dd3-6be3-4a73-82b7-95cf058157ac,60821622690017,0.487814,Trench 2,2024-09-10 09:02:36,2024-09-10,2024-09,Train,1,1,Quick


In [1393]:
df2 = dfd.copy()

In [1394]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11909 entries, 0 to 11908
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             11909 non-null  object        
 1   digitalLoanAccountId   11909 non-null  object        
 2   loanAccountNumber      11909 non-null  object        
 3   aCicScore              11909 non-null  object        
 4   trenchCategory         11909 non-null  object        
 5   appln_submit_datetime  11909 non-null  datetime64[us]
 6   disbursementdate       11909 non-null  dbdate        
 7   Application_month      11909 non-null  object        
 8   Data_selection         11909 non-null  object        
 9   deffspd30              11909 non-null  Int64         
 10  flg_mature_fspd_30     11889 non-null  Int64         
 11  new_loan_type          11909 non-null  object        
 12  flg_mature_fpd30       20 non-null     Int64         
dtypes

In [1395]:
df_concat['aCicScore'] = pd.to_numeric(df_concat['aCicScore'], errors='coerce')

In [1396]:
# df_concat.to_csv(r"aCicScoretrench2fspd30.csv")

In [1397]:
# gini_results = calculate_periodic_gini(df_concat, 'aCicScore', 'deffspd30', 'FSPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'aCicScore', 
    'deffspd30', 
    'FSPD30',
    product_column='new_loan_type'
)

In [1398]:
f3 = gini_results.copy()
f3.head()

Unnamed: 0,start_date,end_date,aCicScore_FSPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-08-26,2024-09-01,0.636364,Week,aCicScore,1.1.0,FSPD30,Overall
1,2024-09-01,2024-09-30,0.330574,Month,aCicScore,1.1.0,FSPD30,Overall
2,2024-09-02,2024-09-08,0.34218,Week,aCicScore,1.1.0,FSPD30,Overall
3,2024-09-09,2024-09-15,0.325465,Week,aCicScore,1.1.0,FSPD30,Overall
4,2024-09-16,2024-09-22,0.320003,Week,aCicScore,1.1.0,FSPD30,Overall


## FSTPD30

## Test

In [1399]:
sq = r"""
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
and modelVersionId = 'v1'
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
--REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aCicScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type


In [1400]:
df1 = dfd.copy()

## Train

In [1401]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aCicScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type
0,1830538,da59d10d-7de0-4ad7-910e-5bf3373f8dd6,60818305380011,0.49162,Trench 2,2024-09-06 11:57:53,2024-09-06,2024-09,Train,0,1,Quick
1,2159359,1e9dc588-3b62-469f-95a4-9c6fe2526114,60821593590011,0.659491,Trench 2,2024-09-03 14:58:44,2024-09-03,2024-09,Train,1,1,Quick
2,2090987,ccec1be9-a608-4210-a0a5-f4b654945057,60820909870011,0.504782,Trench 2,2024-09-27 10:22:34,2024-09-28,2024-09,Train,1,1,Quick
3,1744576,0a8bfa81-8345-4799-b269-568bb1ae9b56,60817445760012,0.530501,Trench 2,2024-09-18 06:38:44,2024-09-25,2024-09,Train,0,1,Quick
4,2591654,47b3b1dc-9ed8-4c1c-9974-b5c447a3890a,60825916540017,0.498306,Trench 2,2024-09-23 21:05:36,2024-09-26,2024-09,Train,1,1,Quick


In [1402]:
df2 = dfd.copy()

In [1403]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11178 entries, 0 to 11177
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             11178 non-null  Int64         
 1   digitalLoanAccountId   11178 non-null  object        
 2   loanAccountNumber      11178 non-null  object        
 3   aCicScore              11178 non-null  float64       
 4   trenchCategory         11178 non-null  object        
 5   appln_submit_datetime  11178 non-null  datetime64[us]
 6   disbursementdate       11178 non-null  dbdate        
 7   Application_month      11178 non-null  object        
 8   Data_selection         11178 non-null  object        
 9   deffstpd30             11178 non-null  Int64         
 10  flg_mature_fstpd_30    11178 non-null  Int64         
 11  new_loan_type          11178 non-null  object        
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), objec

  df_concat = pd.concat([df2, df1], ignore_index=True)


In [1404]:
df_concat['aCicScore'] = pd.to_numeric(df_concat['aCicScore'], errors='coerce')

In [1405]:
# df_concat.to_csv(r"aCicScoretrench2fstpd30.csv")

In [1406]:
# gini_results = calculate_periodic_gini(df_concat, 'aCicScore', 'deffstpd30', 'FSTPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'aCicScore', 
    'deffstpd30', 
    'FSTPD30',
    product_column='new_loan_type'
)

In [1407]:
f4 = gini_results.copy()
f4.head()

Unnamed: 0,start_date,end_date,aCicScore_FSTPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-08-26,2024-09-01,0.426901,Week,aCicScore,1.1.0,FSTPD30,Overall
1,2024-09-01,2024-09-30,0.291161,Month,aCicScore,1.1.0,FSTPD30,Overall
2,2024-09-02,2024-09-08,0.308345,Week,aCicScore,1.1.0,FSTPD30,Overall
3,2024-09-09,2024-09-15,0.262134,Week,aCicScore,1.1.0,FSTPD30,Overall
4,2024-09-16,2024-09-22,0.30391,Week,aCicScore,1.1.0,FSTPD30,Overall


## combining the dataframe

In [1408]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'loan_type', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'aCicScore_FPD0_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'loan_type',
       'aCicScore_FPD10_gini', 'aCicScore_FPD30_gini',
       'aCicScore_FSPD30_gini', 'aCicScore_FSTPD30_gini'], dtype=object)

In [1409]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version','loan_type', 'bad_rate','aCicScore_FPD0_gini','aCicScore_FPD10_gini',
       'aCicScore_FPD30_gini', 'aCicScore_FSPD30_gini',
       'aCicScore_FSTPD30_gini']].copy()
final_df.rename(columns={'aCicScore_FPD0_gini':'aCicScore_t2_FPD0_gini', 'aCicScore_FPD10_gini':'aCicScore_t2_FPD10_gini', 'aCicScore_FPD30_gini':'aCicScore_t2_FPD30_gini', 'aCicScore_FSPD30_gini':'aCicScore_t2_FSPD30_gini'
                        , 'aCicScore_FSPD30_gini':'aCicScore_t2_FSPD30_gini', 'aCicScore_FSTPD30_gini':'aCicScore_t2_FSTPD30_gini'
                        }, inplace = True)
final_df['Trench_category'] = 'Trench 2'
final_df['Model_display_name'] = 'cic_model_cash'
final_df['Product_type'] = 'CASH'
final_df.dtypes

start_date                   datetime64[ns]
end_date                     datetime64[ns]
period                               object
Model_Name                           object
version                              object
loan_type                            object
bad_rate                             object
aCicScore_t2_FPD0_gini              float64
aCicScore_t2_FPD10_gini             float64
aCicScore_t2_FPD30_gini             float64
aCicScore_t2_FSPD30_gini            float64
aCicScore_t2_FSTPD30_gini           float64
Trench_category                      object
Model_display_name                   object
Product_type                         object
dtype: object

In [1410]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,loan_type,bad_rate,aCicScore_t2_FPD0_gini,aCicScore_t2_FPD10_gini,aCicScore_t2_FPD30_gini,aCicScore_t2_FSPD30_gini,aCicScore_t2_FSTPD30_gini,Trench_category,Model_display_name,Product_type
0,2024-08-26,2024-09-01,Week,aCicScore,1.1.0,Overall,FPD0,0.495652,,,,,Trench 2,cic_model_cash,CASH
1,2024-09-01,2024-09-30,Month,aCicScore,1.1.0,Overall,FPD0,0.29075,,,,,Trench 2,cic_model_cash,CASH
2,2024-09-02,2024-09-08,Week,aCicScore,1.1.0,Overall,FPD0,0.285824,,,,,Trench 2,cic_model_cash,CASH
3,2024-09-09,2024-09-15,Week,aCicScore,1.1.0,Overall,FPD0,0.301728,,,,,Trench 2,cic_model_cash,CASH
4,2024-09-16,2024-09-22,Week,aCicScore,1.1.0,Overall,FPD0,0.224814,,,,,Trench 2,cic_model_cash,CASH


In [1411]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.alpha_cash_cic_model_t2_v1_gini5"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=8511853f-4bc5-4751-a908-4be4d65ce2a0>

## Trench 3

## FPD0

## Test

In [1412]:
sq = r"""
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
and modelVersionId = 'v1'
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
--REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aCicScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type
0,2832384,03847a36-0b78-468b-9248-919edac720f1,60828323840024,0.4704949606606728,Trench 3,2025-10-01 08:15:53,2025-10-01,2025-10,Test,0,1,Quick
1,1886292,08ce90dd-2e33-485b-88f3-b684c6bc63ae,60818862920033,0.4199278976748951,Trench 3,2025-10-01 19:08:33,2025-10-01,2025-10,Test,0,1,Quick
2,1080796,27ed2366-6b6c-4c53-b984-ef57da6466bd,60810807960064,0.3507751055641749,Trench 3,2025-10-02 01:05:15,2025-10-02,2025-10,Test,0,1,Quick
3,3396420,0c018cbd-c4b5-40f4-aa1b-d44701b3b67d,60833964200024,0.4637373874807495,Trench 3,2025-10-28 12:34:47,2025-10-28,2025-10,Test,0,1,Quick
4,3292833,d514eded-344f-4685-8f74-7c0db8a0b11b,60832928330029,0.3920736445483647,Trench 3,2025-10-28 13:03:54,2025-10-28,2025-10,Test,0,1,Quick


In [1413]:
df1 = dfd.copy()

## Train

In [1414]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aCicScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type
0,1521878,087361a6-81a1-4187-9f9b-9b2b278992f6,60815218780035,0.370621,Trench 3,2024-09-26 12:12:37,2024-09-30,2024-09,Train,0,1,Quick
1,2253203,3babffa7-43bf-4609-9de4-47d02a3f9ecf,60822532030031,0.413417,Trench 3,2024-09-15 01:07:02,2024-09-15,2024-09,Train,0,1,Quick
2,1772915,b46887cf-fe0a-4d69-a00b-dac93daa8bda,60817729150034,0.392878,Trench 3,2024-09-23 13:05:32,2024-09-23,2024-09,Train,0,1,Quick
3,2469247,9cc45a6f-9d51-4dd7-b523-520c8414462a,60824692470025,0.463737,Trench 3,2024-09-25 06:23:51,2024-09-25,2024-09,Train,0,1,Quick
4,2162422,97a88cf7-5468-4760-94f9-567d441f67b7,60821624220034,0.426346,Trench 3,2024-09-02 19:29:14,2024-09-02,2024-09,Train,0,1,Quick


In [1415]:
df2 = dfd.copy()

In [1416]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11836 entries, 0 to 11835
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             11836 non-null  object        
 1   digitalLoanAccountId   11836 non-null  object        
 2   loanAccountNumber      11836 non-null  object        
 3   aCicScore              11836 non-null  object        
 4   trenchCategory         11836 non-null  object        
 5   appln_submit_datetime  11836 non-null  datetime64[us]
 6   disbursementdate       11836 non-null  dbdate        
 7   Application_month      11836 non-null  object        
 8   Data_selection         11836 non-null  object        
 9   deffpd0                11836 non-null  Int64         
 10  flg_mature_fpd0        11836 non-null  Int64         
 11  new_loan_type          11836 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(8)
memory 

In [1417]:
df_concat['aCicScore'] = pd.to_numeric(df_concat['aCicScore'], errors='coerce')

In [1418]:
# df_concat.to_csv(r"aCicScoretrench3fpd0.csv")

In [1419]:
# gini_results = calculate_periodic_gini(df_concat, 'aCicScore', 'deffpd0', 'FPD0')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'aCicScore', 
    'deffpd0', 
    'FPD0',
    product_column='new_loan_type'
)

In [1420]:
f0 = gini_results.copy()
f0.head()

Unnamed: 0,start_date,end_date,aCicScore_FPD0_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-08-26,2024-09-01,0.214286,Week,aCicScore,1.1.0,FPD0,Overall
1,2024-09-01,2024-09-30,0.168616,Month,aCicScore,1.1.0,FPD0,Overall
2,2024-09-02,2024-09-08,0.256825,Week,aCicScore,1.1.0,FPD0,Overall
3,2024-09-09,2024-09-15,0.14,Week,aCicScore,1.1.0,FPD0,Overall
4,2024-09-16,2024-09-22,0.108178,Week,aCicScore,1.1.0,FPD0,Overall


## FPD10

## Test

In [1421]:
sq = r"""
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
and modelVersionId = 'v1'
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
--REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aCicScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type
0,3143160,bd0717cf-5fff-4740-8ee4-40440d77a993,60831431600024,0.4342129550812156,Trench 3,2025-10-11 10:30:58,2025-10-11,2025-10,Test,0,1,Quick
1,2393282,7b00492c-efe6-4b28-915e-be3f68a4df57,60823932820122,0.5761393664191302,Trench 3,2025-10-11 09:54:53,2025-10-11,2025-10,Test,0,1,Quick
2,2464608,f0299d7f-09b2-4c28-8451-3109f46ced8c,60824646080021,0.4307681478691512,Trench 3,2025-10-11 10:44:33,2025-10-11,2025-10,Test,0,1,Quick
3,3415580,2e3e64b2-fc63-4a5c-8a98-7227a7f018b9,60834155800021,0.405258883320838,Trench 3,2025-10-11 10:09:31,2025-10-11,2025-10,Test,0,1,Quick
4,2722716,1f24fed3-c7e8-49f3-a3aa-c7a963e112e9,60827227160049,0.3374019652742286,Trench 3,2025-10-11 12:16:52,2025-10-11,2025-10,Test,0,1,Quick


In [1422]:
df1 = dfd.copy()

## Train

In [1423]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aCicScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type
0,1642321,49392d58-97a1-4499-9508-e01568bc47ce,60816423210041,0.426552,Trench 3,2024-09-10 13:58:58,2024-09-10,2024-09,Train,0,1,Quick
1,2312617,ba792404-4a6a-4b4b-aae0-3175f9ff1ac8,60823126170022,0.406736,Trench 3,2024-09-01 14:06:55,2024-09-01,2024-09,Train,0,1,Quick
2,2752560,fefd6251-9e92-42d1-b7ba-e5aac5458236,60827525600034,0.432835,Trench 3,2024-09-06 21:07:37,2024-09-06,2024-09,Train,0,1,Quick
3,2654776,91606244-419a-46ad-b5a0-a8b8e40b3c76,60826547760031,0.430747,Trench 3,2024-09-05 16:18:32,2024-09-05,2024-09,Train,0,1,Quick
4,2238795,9aaf55e8-9b6e-4597-bb7c-91938fd6300a,60822387950035,0.461064,Trench 3,2024-09-22 12:17:27,2024-09-22,2024-09,Train,0,1,Quick


In [1424]:
df2 = dfd.copy()

In [1425]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11586 entries, 0 to 11585
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             11586 non-null  object        
 1   digitalLoanAccountId   11586 non-null  object        
 2   loanAccountNumber      11586 non-null  object        
 3   aCicScore              11586 non-null  object        
 4   trenchCategory         11586 non-null  object        
 5   appln_submit_datetime  11586 non-null  datetime64[us]
 6   disbursementdate       11586 non-null  dbdate        
 7   Application_month      11586 non-null  object        
 8   Data_selection         11586 non-null  object        
 9   deffpd10               11586 non-null  Int64         
 10  flg_mature_fpd10       11586 non-null  Int64         
 11  new_loan_type          11586 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(8)
memory 

In [1426]:
df_concat['aCicScore'] = pd.to_numeric(df_concat['aCicScore'], errors='coerce')

In [1427]:
# df_concat.to_csv(r"aCicScoretrench3fpd10.csv")

In [1428]:
# gini_results = calculate_periodic_gini(df_concat, 'aCicScore', 'deffpd10', 'FPD10')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'aCicScore', 
    'deffpd10', 
    'FPD10',
    product_column='new_loan_type'
)

In [1429]:
f1 = gini_results.copy()
f1.head()

Unnamed: 0,start_date,end_date,aCicScore_FPD10_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-08-26,2024-09-01,0.055556,Week,aCicScore,1.1.0,FPD10,Overall
1,2024-09-01,2024-09-30,0.191107,Month,aCicScore,1.1.0,FPD10,Overall
2,2024-09-02,2024-09-08,0.098118,Week,aCicScore,1.1.0,FPD10,Overall
3,2024-09-09,2024-09-15,0.497942,Week,aCicScore,1.1.0,FPD10,Overall
4,2024-09-16,2024-09-22,0.09188,Week,aCicScore,1.1.0,FPD10,Overall


## FPD30

## Test

In [1430]:
sq = r"""
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
and modelVersionId = 'v1'
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
--REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aCicScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type
0,1502268,64a52fd7-fb8d-4a90-886d-25f1cbab3583,60815022680039,0.3939906186241853,Trench 3,2025-10-08 20:05:56,2025-10-09,2025-10,Test,0,1,Quick
1,3486965,39925c0b-76bc-45b7-ad04-1706dc7ddafe,60834869650031,0.3515130088875563,Trench 3,2025-10-08 22:29:20,2025-10-09,2025-10,Test,0,1,Quick
2,3056951,ef88f86a-58f8-44a7-8989-6f3fefc444f1,60830569510044,0.4482637631008859,Trench 3,2025-10-09 11:43:56,2025-10-09,2025-10,Test,0,1,Quick
3,2311054,f70db2ce-13de-48e0-8ea3-0ef3279fd38e,60823110540033,0.3504171765025647,Trench 3,2025-10-09 10:48:02,2025-10-09,2025-10,Test,0,1,Quick
4,3071399,fc78cc72-651b-4e92-b0df-a906c23fc167,60830713990042,0.366063895189204,Trench 3,2025-09-26 12:34:08,2025-09-26,2025-09,Test,0,1,Quick


In [1431]:
df1 = dfd.copy()

## Train

In [1432]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aCicScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type
0,2383791,e6809e3c-8445-4220-ba3f-bee9ea64d4ee,60823837910024,0.463737,Trench 3,2024-09-14 19:09:58,2024-09-14,2024-09,Train,0,1,Quick
1,1988300,c51ca036-95b7-426c-a610-8a9f8853860e,60819883000022,0.514885,Trench 3,2024-09-26 09:08:17,2024-09-26,2024-09,Train,0,1,Quick
2,2096245,02c4dc18-2c48-46c4-a97c-74449b3033d3,60820962450022,0.389844,Trench 3,2024-09-16 13:06:34,2024-09-16,2024-09,Train,0,1,Quick
3,2149653,99675aee-145c-4a64-a95e-8b5c1e934782,60821496530035,0.331523,Trench 3,2024-09-05 01:32:09,2024-09-05,2024-09,Train,0,1,Quick
4,1128445,9a211ff3-9908-42be-9fc9-287b186e4ff1,60811284450327,0.567695,Trench 3,2024-09-02 17:05:49,2024-09-02,2024-09,Train,0,1,Quick


In [1433]:
df2 = dfd.copy()

In [1434]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11246 entries, 0 to 11245
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             11246 non-null  object        
 1   digitalLoanAccountId   11246 non-null  object        
 2   loanAccountNumber      11246 non-null  object        
 3   aCicScore              11246 non-null  object        
 4   trenchCategory         11246 non-null  object        
 5   appln_submit_datetime  11246 non-null  datetime64[us]
 6   disbursementdate       11246 non-null  dbdate        
 7   Application_month      11246 non-null  object        
 8   Data_selection         11246 non-null  object        
 9   deffpd30               11246 non-null  Int64         
 10  flg_mature_fpd30       11246 non-null  Int64         
 11  new_loan_type          11246 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(8)
memory 

In [1435]:
df_concat['aCicScore'] = pd.to_numeric(df_concat['aCicScore'], errors='coerce')

In [1436]:
# df_concat.to_csv(r"aCicScoretrench3fpd30.csv")

In [1437]:
# gini_results = calculate_periodic_gini(df_concat, 'aCicScore', 'deffpd30', 'FPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'aCicScore', 
    'deffpd30', 
    'FPD30',
    product_column='new_loan_type'
)

In [1438]:
f2 = gini_results.copy()
f2.head()

Unnamed: 0,start_date,end_date,aCicScore_FPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-08-26,2024-09-01,-0.230769,Week,aCicScore,1.1.0,FPD30,Overall
1,2024-09-01,2024-09-30,0.210121,Month,aCicScore,1.1.0,FPD30,Overall
2,2024-09-02,2024-09-08,0.168794,Week,aCicScore,1.1.0,FPD30,Overall
3,2024-09-09,2024-09-15,0.332143,Week,aCicScore,1.1.0,FPD30,Overall
4,2024-09-16,2024-09-22,0.455556,Week,aCicScore,1.1.0,FPD30,Overall


## FSPD30

## Test

In [1439]:
sq = r"""
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
and modelVersionId = 'v1'
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
--REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aCicScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fpd30,new_loan_type
0,3357515,58f0e9c2-1805-4158-a3d6-c8f5759aa2af,60833575150024,0.4195854222094245,Trench 3,2025-10-02 16:13:23,2025-10-02,2025-10,Test,0,1,Quick
1,2863259,78fce405-fd35-4832-ab50-eb15cbf54b89,60828632590029,0.52303092228732,Trench 3,2025-10-02 10:17:03,2025-10-02,2025-10,Test,0,1,Quick
2,3140719,f755aeb4-fbc4-4d90-975d-8980f8da5a1f,60831407190029,0.5130243931598192,Trench 3,2025-09-27 20:07:11,2025-09-27,2025-09,Test,0,1,Quick
3,2356365,d01e4c2d-6453-4ea5-8aa1-98882a04b299,60823563650077,0.5777952129511873,Trench 3,2025-09-28 22:28:18,2025-09-30,2025-09,Test,0,1,Quick
4,1886335,883ef202-4aa4-41d7-99ff-fd6be2ef69db,60818863350053,0.3961675848446604,Trench 3,2025-09-28 18:26:57,2025-09-28,2025-09,Test,0,1,Quick


In [1440]:
df1 = dfd.copy()

## Train

In [1441]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aCicScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type
0,2480704,b786d8be-2128-48bc-a5e1-411253a13daa,60824807040049,0.55264,Trench 3,2024-09-28 05:57:54,2024-09-28,2024-09,Train,0,1,Quick
1,2243133,a79a6400-52f6-42b4-9e25-61e94c532513,60822431330021,0.439444,Trench 3,2024-09-21 08:42:28,2024-09-23,2024-09,Train,0,1,Quick
2,2214170,fb0006ab-efb8-4b5f-a61c-58ea4b5807cb,60822141700024,0.438316,Trench 3,2024-09-03 18:40:33,2024-09-03,2024-09,Train,0,1,Quick
3,1291122,458ba937-e531-47da-a27e-0d44de5d281d,60812911220035,0.366785,Trench 3,2024-09-09 08:22:18,2024-09-09,2024-09,Train,0,1,Quick
4,2762819,dcbbbb25-aa21-45ec-b217-4722d4daebf2,60827628190029,0.360694,Trench 3,2024-09-17 11:27:46,2024-09-17,2024-09,Train,0,1,Quick


In [1442]:
df2 = dfd.copy()

In [1443]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10285 entries, 0 to 10284
Data columns (total 13 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             10285 non-null  object        
 1   digitalLoanAccountId   10285 non-null  object        
 2   loanAccountNumber      10285 non-null  object        
 3   aCicScore              10285 non-null  object        
 4   trenchCategory         10285 non-null  object        
 5   appln_submit_datetime  10285 non-null  datetime64[us]
 6   disbursementdate       10285 non-null  dbdate        
 7   Application_month      10285 non-null  object        
 8   Data_selection         10285 non-null  object        
 9   deffspd30              10285 non-null  Int64         
 10  flg_mature_fspd_30     10258 non-null  Int64         
 11  new_loan_type          10285 non-null  object        
 12  flg_mature_fpd30       27 non-null     Int64         
dtypes

In [1444]:
df_concat['aCicScore'] = pd.to_numeric(df_concat['aCicScore'], errors='coerce')

In [1445]:
# df_concat.to_csv(r"aCicScoretrench2fspd30.csv")

In [1446]:
# gini_results = calculate_periodic_gini(df_concat, 'aCicScore', 'deffspd30', 'FSPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'aCicScore', 
    'deffspd30', 
    'FSPD30',
    product_column='new_loan_type'
)

In [1447]:
f3 = gini_results.copy()
f3.head()

Unnamed: 0,start_date,end_date,aCicScore_FSPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-08-26,2024-09-01,-0.230769,Week,aCicScore,1.1.0,FSPD30,Overall
1,2024-09-01,2024-09-30,0.276687,Month,aCicScore,1.1.0,FSPD30,Overall
2,2024-09-02,2024-09-08,0.234417,Week,aCicScore,1.1.0,FSPD30,Overall
3,2024-09-09,2024-09-15,0.499065,Week,aCicScore,1.1.0,FSPD30,Overall
4,2024-09-16,2024-09-22,0.316923,Week,aCicScore,1.1.0,FSPD30,Overall


## FSTPD30

## Test

In [1448]:
sq = r"""
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
and modelVersionId = 'v1'
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
--REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aCicScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type


In [1449]:
df1 = dfd.copy()

## Train

In [1450]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-CIC-Model','Alpha Cash CIC Model','cic_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aCicScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aCicScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aCicScore is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aCicScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type
0,2763792,8ec5d6ae-9cbd-4760-aaad-87b83ca008c5,60827637920029,0.385735,Trench 3,2024-09-25 11:02:29,2024-09-28,2024-09,Train,1,1,Quick
1,2553621,b290bdeb-d549-4463-ab44-85e23b6a6727,60825536210024,0.6859,Trench 3,2024-09-07 11:38:54,2024-09-07,2024-09,Train,1,1,Quick
2,2719810,22fb5bdf-bf39-42fb-9fd5-a178f2c7a4d8,60827198100023,0.400342,Trench 3,2024-09-02 19:47:57,2024-09-02,2024-09,Train,1,1,Quick
3,2179535,78a51e8a-a7f2-4ef3-8d20-d5229994deea,60821795350035,0.329342,Trench 3,2024-09-27 11:16:09,2024-09-27,2024-09,Train,0,1,Quick
4,1941378,52ca309f-ce7a-4a23-b016-b3fdffd28f78,60819413780026,0.576495,Trench 3,2024-09-04 23:34:24,2024-09-05,2024-09,Train,0,1,Quick


In [1451]:
df2 = dfd.copy()

In [1452]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9038 entries, 0 to 9037
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             9038 non-null   Int64         
 1   digitalLoanAccountId   9038 non-null   object        
 2   loanAccountNumber      9038 non-null   object        
 3   aCicScore              9038 non-null   float64       
 4   trenchCategory         9038 non-null   object        
 5   appln_submit_datetime  9038 non-null   datetime64[us]
 6   disbursementdate       9038 non-null   dbdate        
 7   Application_month      9038 non-null   object        
 8   Data_selection         9038 non-null   object        
 9   deffstpd30             9038 non-null   Int64         
 10  flg_mature_fstpd_30    9038 non-null   Int64         
 11  new_loan_type          9038 non-null   object        
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(

  df_concat = pd.concat([df2, df1], ignore_index=True)


In [1453]:
df_concat['aCicScore'] = pd.to_numeric(df_concat['aCicScore'], errors='coerce')

In [1454]:
# df_concat.to_csv(r"aCicScoretrench3fstpd30.csv")

In [1455]:
# gini_results = calculate_periodic_gini(df_concat, 'aCicScore', 'deffstpd30', 'FSTPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'aCicScore', 
    'deffstpd30', 
    'FSTPD30',
    product_column='new_loan_type'
)

In [1456]:
f4 = gini_results.copy()
f4.head()

Unnamed: 0,start_date,end_date,aCicScore_FSTPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-08-26,2024-09-01,-0.272727,Week,aCicScore,1.1.0,FSTPD30,Overall
1,2024-09-01,2024-09-30,0.283827,Month,aCicScore,1.1.0,FSTPD30,Overall
2,2024-09-02,2024-09-08,0.234417,Week,aCicScore,1.1.0,FSTPD30,Overall
3,2024-09-09,2024-09-15,0.495425,Week,aCicScore,1.1.0,FSTPD30,Overall
4,2024-09-16,2024-09-22,0.307951,Week,aCicScore,1.1.0,FSTPD30,Overall


## combining the dataframe

In [1457]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'loan_type', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'aCicScore_FPD0_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'loan_type',
       'aCicScore_FPD10_gini', 'aCicScore_FPD30_gini',
       'aCicScore_FSPD30_gini', 'aCicScore_FSTPD30_gini'], dtype=object)

In [1458]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version','loan_type', 'bad_rate','aCicScore_FPD0_gini','aCicScore_FPD10_gini',
       'aCicScore_FPD30_gini', 'aCicScore_FSPD30_gini',
       'aCicScore_FSTPD30_gini']].copy()
final_df.rename(columns={'aCicScore_FPD0_gini':'aCicScore_t3_FPD0_gini', 'aCicScore_FPD10_gini':'aCicScore_t3_FPD10_gini', 'aCicScore_FPD30_gini':'aCicScore_t3_FPD30_gini', 'aCicScore_FSPD30_gini':'aCicScore_t3_FSPD30_gini'
                        , 'aCicScore_FSPD30_gini':'aCicScore_t3_FSPD30_gini', 'aCicScore_FSTPD30_gini':'aCicScore_t3_FSTPD30_gini'
                        }, inplace = True)
final_df['Trench_category'] = 'Trench 3'
final_df['Model_display_name'] = 'cic_model_cash'
final_df['Product_type'] = 'CASH'
final_df.dtypes

start_date                   datetime64[ns]
end_date                     datetime64[ns]
period                               object
Model_Name                           object
version                              object
loan_type                            object
bad_rate                             object
aCicScore_t3_FPD0_gini              float64
aCicScore_t3_FPD10_gini             float64
aCicScore_t3_FPD30_gini             float64
aCicScore_t3_FSPD30_gini            float64
aCicScore_t3_FSTPD30_gini           float64
Trench_category                      object
Model_display_name                   object
Product_type                         object
dtype: object

In [1459]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,loan_type,bad_rate,aCicScore_t3_FPD0_gini,aCicScore_t3_FPD10_gini,aCicScore_t3_FPD30_gini,aCicScore_t3_FSPD30_gini,aCicScore_t3_FSTPD30_gini,Trench_category,Model_display_name,Product_type
0,2024-08-26,2024-09-01,Week,aCicScore,1.1.0,Overall,FPD0,0.214286,,,,,Trench 3,cic_model_cash,CASH
1,2024-09-01,2024-09-30,Month,aCicScore,1.1.0,Overall,FPD0,0.168616,,,,,Trench 3,cic_model_cash,CASH
2,2024-09-02,2024-09-08,Week,aCicScore,1.1.0,Overall,FPD0,0.256825,,,,,Trench 3,cic_model_cash,CASH
3,2024-09-09,2024-09-15,Week,aCicScore,1.1.0,Overall,FPD0,0.14,,,,,Trench 3,cic_model_cash,CASH
4,2024-09-16,2024-09-22,Week,aCicScore,1.1.0,Overall,FPD0,0.108178,,,,,Trench 3,cic_model_cash,CASH


In [1460]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.alpha_cash_cic_model_t3_v1_gini5"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=d4b9bd19-b245-42a9-b7cd-d72e7c8f845e>

# Alpha-Cash-Stack-Model

# Trench 1

## FPD0

## Test

In [1461]:
sq = r""" 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
and modelVersionId = 'v1'
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aStackScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type
0,3766090,fccbee0f-d902-4bfa-bf21-9ca93f76c095,60837660900017,0.1098297773522561,Trench 1,2025-10-25 10:30:49,2025-10-25,2025-10,Test,0,1,Quick
1,3766760,805f33c2-27e0-4a2d-afb2-8a2b1abdbc0e,60837667600012,0.3702994663389881,Trench 1,2025-10-25 10:16:32,2025-10-26,2025-10,Test,0,1,Quick
2,3766811,2de78a90-a954-4d3e-8496-52e0771e2dd8,60837668110016,0.5087177026924209,Trench 1,2025-10-25 10:05:27,2025-10-25,2025-10,Test,0,1,Quick
3,3766856,5eb074b1-244a-42fc-9662-906641174d85,60837668560019,0.4284959086882738,Trench 1,2025-10-25 10:23:11,2025-10-27,2025-10,Test,1,1,Quick
4,3766869,52141216-4c71-4185-8d6f-2cfff92b172f,60837668690014,0.5360550406439649,Trench 1,2025-10-25 10:26:58,2025-10-25,2025-10,Test,0,1,Quick


In [1462]:
df1 = dfd.copy()

## Train

In [1463]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aStackScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type
0,2982607,e9cf444d-9229-42c0-a61d-600e44a6e96b,60829826070013,0.627298,Trench 1,2024-10-29 19:24:24,2024-10-29,2024-10,Train,0,1,Quick
1,2981453,12078be5-4b89-49c2-8d2c-e2994eab50a0,60829814530019,0.871545,Trench 1,2024-10-29 08:37:40,2024-10-30,2024-10,Train,1,1,Quick
2,2934790,89b4f6df-32c0-451f-affe-25dc972b72e6,60829347900011,0.446132,Trench 1,2024-10-12 22:06:37,2024-10-13,2024-10,Train,0,1,Quick
3,2946919,24972dac-b08d-4063-bf9c-d8bb8342f789,60829469190012,0.347415,Trench 1,2024-10-16 23:07:40,2024-10-17,2024-10,Train,0,1,Quick
4,2983758,2f340de5-c3f4-4dc0-85b9-cb7404c31286,60829837580011,0.295679,Trench 1,2024-10-30 00:44:12,2024-10-30,2024-10,Train,0,1,Quick


In [1464]:
df2 = dfd.copy()

In [1465]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17479 entries, 0 to 17478
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             17479 non-null  object        
 1   digitalLoanAccountId   17479 non-null  object        
 2   loanAccountNumber      17479 non-null  object        
 3   aStackScore            17479 non-null  object        
 4   trenchCategory         17479 non-null  object        
 5   appln_submit_datetime  17479 non-null  datetime64[us]
 6   disbursementdate       17479 non-null  dbdate        
 7   Application_month      17479 non-null  object        
 8   Data_selection         17479 non-null  object        
 9   deffpd0                17479 non-null  Int64         
 10  flg_mature_fpd0        17479 non-null  Int64         
 11  new_loan_type          17479 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(8)
memory 

In [1466]:
df_concat['aStackScore'] = pd.to_numeric(df_concat['aStackScore'], errors='coerce')

In [1467]:
# df_concat.to_csv(r"aStackScoretrench1fpd0.csv")

In [1468]:
# gini_results = calculate_periodic_gini(df_concat, 'aStackScore', 'deffpd0', 'FPD0')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'aStackScore', 
    'deffpd0', 
    'FPD0',
    product_column='new_loan_type'
)

In [1469]:
f0 = gini_results.copy()
f0.head()

Unnamed: 0,start_date,end_date,aStackScore_FPD0_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-09-30,2024-10-06,0.492727,Week,aStackScore,1.1.0,FPD0,Overall
1,2024-10-01,2024-10-31,0.394791,Month,aStackScore,1.1.0,FPD0,Overall
2,2024-10-07,2024-10-13,0.379712,Week,aStackScore,1.1.0,FPD0,Overall
3,2024-10-14,2024-10-20,0.359015,Week,aStackScore,1.1.0,FPD0,Overall
4,2024-10-21,2024-10-27,0.380435,Week,aStackScore,1.1.0,FPD0,Overall


## FPD10

## Test

In [1470]:
sq = r""" 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
and modelVersionId = 'v1'
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aStackScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type
0,3724335,c4744db6-29c2-40ee-a840-85828101d45d,60837243350011,0.3151859393989577,Trench 1,2025-10-06 11:27:21,2025-10-06,2025-10,Test,0,1,Quick
1,3726839,7598ece1-3cd0-4782-b0bc-ad3cea9d009d,60837268390018,0.350270682513245,Trench 1,2025-10-06 11:37:41,2025-10-07,2025-10,Test,0,1,Quick
2,3727978,fb1ae638-3fd0-42e0-9c44-81a2e88d188e,60837279780012,0.3438103813753864,Trench 1,2025-10-06 18:44:36,2025-10-06,2025-10,Test,0,1,Quick
3,3695391,4f204ff6-121a-4b1d-a922-adc2b9863d4a,60836953910012,0.3847156817766144,Trench 1,2025-10-06 16:39:03,2025-10-06,2025-10,Test,1,1,Quick
4,3727354,8e379611-21ba-40ea-b1e8-ad52ea9cd44e,60837273540016,0.4254365161026788,Trench 1,2025-10-06 17:04:21,2025-10-07,2025-10,Test,0,1,Quick


In [1471]:
df1 = dfd.copy()

## Train

In [1472]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aStackScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type
0,2953294,afcccea6-37c8-47cb-80da-42be6bf4774f,60829532940015,0.772567,Trench 1,2024-10-19 06:34:13,2024-10-22,2024-10,Train,1,1,Quick
1,2961381,40add69d-7aff-40c1-b5b1-6e36f96ec0eb,60829613810011,0.863181,Trench 1,2024-10-21 17:35:53,2024-10-21,2024-10,Train,1,1,Quick
2,2977595,8f2242f2-0551-428e-b811-7ba5909aed5d,60829775950011,0.552098,Trench 1,2024-10-27 16:39:01,2024-10-28,2024-10,Train,1,1,Quick
3,2947252,666ed811-ddeb-4f7e-bc86-98a4063f19ec,60829472520016,0.550942,Trench 1,2024-10-17 06:56:39,2024-10-17,2024-10,Train,1,1,Quick
4,2943754,a3a18b37-34ee-4986-bd27-958ff316af47,60829437540017,0.84635,Trench 1,2024-10-15 22:07:57,2024-10-16,2024-10,Train,1,1,Quick


In [1473]:
df2 = dfd.copy()

In [1474]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16046 entries, 0 to 16045
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             16046 non-null  object        
 1   digitalLoanAccountId   16046 non-null  object        
 2   loanAccountNumber      16046 non-null  object        
 3   aStackScore            16046 non-null  object        
 4   trenchCategory         16046 non-null  object        
 5   appln_submit_datetime  16046 non-null  datetime64[us]
 6   disbursementdate       16046 non-null  dbdate        
 7   Application_month      16046 non-null  object        
 8   Data_selection         16046 non-null  object        
 9   deffpd10               16046 non-null  Int64         
 10  flg_mature_fpd10       16046 non-null  Int64         
 11  new_loan_type          16046 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(8)
memory 

In [1475]:
df_concat['aStackScore'] = pd.to_numeric(df_concat['aStackScore'], errors='coerce')

In [1476]:
# df_concat.to_csv(r"aStackScoretrench1fpd10.csv")

In [1477]:
# gini_results = calculate_periodic_gini(df_concat, 'aStackScore', 'deffpd10', 'FPD10')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'aStackScore', 
    'deffpd10', 
    'FPD10',
    product_column='new_loan_type'
)

In [1478]:
f1 = gini_results.copy()
f1.head()

Unnamed: 0,start_date,end_date,aStackScore_FPD10_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-09-30,2024-10-06,0.544336,Week,aStackScore,1.1.0,FPD10,Overall
1,2024-10-01,2024-10-31,0.549608,Month,aStackScore,1.1.0,FPD10,Overall
2,2024-10-07,2024-10-13,0.585794,Week,aStackScore,1.1.0,FPD10,Overall
3,2024-10-14,2024-10-20,0.518239,Week,aStackScore,1.1.0,FPD10,Overall
4,2024-10-21,2024-10-27,0.562636,Week,aStackScore,1.1.0,FPD10,Overall


## FPD30

## Test

In [1479]:
sq = r""" 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
and modelVersionId = 'v1'
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aStackScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type
0,3732908,c06b6d3a-5c1c-4159-b032-eb5734e961e7,60837329080013,0.3341210259629258,Trench 1,2025-10-09 11:13:44,2025-10-09,2025-10,Test,0,1,Quick
1,3732985,a51a6129-a36f-43b2-9b56-f5897824a443,60837329850016,0.400134108741866,Trench 1,2025-10-09 11:56:58,2025-10-09,2025-10,Test,0,1,Quick
2,3733014,fe0e6523-44f8-4c52-8e5c-cc1596f581c7,60837330140018,0.3822674066697173,Trench 1,2025-10-09 12:15:10,2025-10-09,2025-10,Test,0,1,Quick
3,3733252,034a1064-7f3b-4087-8337-fe0f2eb91e6f,60837332520011,0.3378641043975119,Trench 1,2025-10-09 13:52:08,2025-10-09,2025-10,Test,0,1,Quick
4,3732818,22250cb0-db22-4624-8a9e-d6708df8d378,60837328180012,0.3405226629133607,Trench 1,2025-10-09 13:38:25,2025-10-09,2025-10,Test,0,1,Quick


In [1480]:
df1 = dfd.copy()

## Train

In [1481]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aStackScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type
0,2982607,e9cf444d-9229-42c0-a61d-600e44a6e96b,60829826070013,0.627298,Trench 1,2024-10-29 19:24:24,2024-10-29,2024-10,Train,0,1,Quick
1,2981453,12078be5-4b89-49c2-8d2c-e2994eab50a0,60829814530019,0.871545,Trench 1,2024-10-29 08:37:40,2024-10-30,2024-10,Train,0,1,Quick
2,2934790,89b4f6df-32c0-451f-affe-25dc972b72e6,60829347900011,0.446132,Trench 1,2024-10-12 22:06:37,2024-10-13,2024-10,Train,0,1,Quick
3,2946919,24972dac-b08d-4063-bf9c-d8bb8342f789,60829469190012,0.347415,Trench 1,2024-10-16 23:07:40,2024-10-17,2024-10,Train,0,1,Quick
4,2983758,2f340de5-c3f4-4dc0-85b9-cb7404c31286,60829837580011,0.295679,Trench 1,2024-10-30 00:44:12,2024-10-30,2024-10,Train,0,1,Quick


In [1482]:
df2 = dfd.copy()

In [1483]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15088 entries, 0 to 15087
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             15088 non-null  object        
 1   digitalLoanAccountId   15088 non-null  object        
 2   loanAccountNumber      15088 non-null  object        
 3   aStackScore            15088 non-null  object        
 4   trenchCategory         15088 non-null  object        
 5   appln_submit_datetime  15088 non-null  datetime64[us]
 6   disbursementdate       15088 non-null  dbdate        
 7   Application_month      15088 non-null  object        
 8   Data_selection         15088 non-null  object        
 9   deffpd30               15088 non-null  Int64         
 10  flg_mature_fpd30       15088 non-null  Int64         
 11  new_loan_type          15088 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(8)
memory 

In [1484]:
df_concat['aStackScore'] = pd.to_numeric(df_concat['aStackScore'], errors='coerce')

In [1485]:
# df_concat.to_csv(r"aStackScoretrench1fpd30.csv")

In [1486]:
# gini_results = calculate_periodic_gini(df_concat, 'aStackScore', 'deffpd30', 'FPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'aStackScore', 
    'deffpd30', 
    'FPD30',
    product_column='new_loan_type'
)

In [1487]:
f2 = gini_results.copy()
f2.head()

Unnamed: 0,start_date,end_date,aStackScore_FPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-09-30,2024-10-06,0.609023,Week,aStackScore,1.1.0,FPD30,Overall
1,2024-10-01,2024-10-31,0.577049,Month,aStackScore,1.1.0,FPD30,Overall
2,2024-10-07,2024-10-13,0.592929,Week,aStackScore,1.1.0,FPD30,Overall
3,2024-10-14,2024-10-20,0.537136,Week,aStackScore,1.1.0,FPD30,Overall
4,2024-10-21,2024-10-27,0.582035,Week,aStackScore,1.1.0,FPD30,Overall


## FSPD30

## Test

In [1488]:
sq = r""" 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
and modelVersionId = 'v1'
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aStackScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type


In [1489]:
df1 = dfd.copy()

## Train

In [1490]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aStackScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type
0,2973109,c8e9412b-8f02-4c18-bf67-260481887ca2,60829731090019,0.367219,Trench 1,2024-10-26 09:41:38,2024-10-27,2024-10,Train,0,1,Quick
1,2907528,3e9ad64f-fa23-4418-9f4c-414738eca555,60829075280015,0.870859,Trench 1,2024-10-03 19:04:26,2024-10-03,2024-10,Train,1,1,Quick
2,2925374,629296c0-7417-45a8-946d-8ea315851b8e,60829253740017,0.714348,Trench 1,2024-10-09 16:14:52,2024-10-09,2024-10,Train,1,1,Quick
3,2936747,cb7d42d6-f7a5-4589-ac8b-5c81feda5859,60829367470015,0.297963,Trench 1,2024-10-13 15:01:25,2024-10-13,2024-10,Train,1,1,Quick
4,2933360,057b42a3-5852-45fe-a2d9-fc6a6c45b79b,60829333600018,0.595993,Trench 1,2024-10-12 13:54:22,2024-10-12,2024-10,Train,1,1,Quick


In [1491]:
df2 = dfd.copy()

In [1492]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14121 entries, 0 to 14120
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             14121 non-null  Int64         
 1   digitalLoanAccountId   14121 non-null  object        
 2   loanAccountNumber      14121 non-null  object        
 3   aStackScore            14121 non-null  float64       
 4   trenchCategory         14121 non-null  object        
 5   appln_submit_datetime  14121 non-null  datetime64[us]
 6   disbursementdate       14121 non-null  dbdate        
 7   Application_month      14121 non-null  object        
 8   Data_selection         14121 non-null  object        
 9   deffspd30              14121 non-null  Int64         
 10  flg_mature_fspd_30     14121 non-null  Int64         
 11  new_loan_type          14121 non-null  object        
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), objec

  df_concat = pd.concat([df2, df1], ignore_index=True)


In [1493]:
df_concat['aStackScore'] = pd.to_numeric(df_concat['aStackScore'], errors='coerce')

In [1494]:
# df_concat.to_csv(r"aStackScoretrench1fspd30.csv")

In [1495]:
# gini_results = calculate_periodic_gini(df_concat, 'aStackScore', 'deffspd30', 'FSPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'aStackScore', 
    'deffspd30', 
    'FSPD30',
    product_column='new_loan_type'
)

In [1496]:
f3 = gini_results.copy()
f3.head()

Unnamed: 0,start_date,end_date,aStackScore_FSPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-09-30,2024-10-06,0.630674,Week,aStackScore,1.1.0,FSPD30,Overall
1,2024-10-01,2024-10-31,0.552296,Month,aStackScore,1.1.0,FSPD30,Overall
2,2024-10-07,2024-10-13,0.589437,Week,aStackScore,1.1.0,FSPD30,Overall
3,2024-10-14,2024-10-20,0.455286,Week,aStackScore,1.1.0,FSPD30,Overall
4,2024-10-21,2024-10-27,0.559849,Week,aStackScore,1.1.0,FSPD30,Overall


## FSTPD30

## Test

In [1497]:
sq = r""" 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
and modelVersionId = 'v1'
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aStackScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type


In [1498]:
df1 = dfd.copy()

## Train

In [1499]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aStackScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type
0,2973109,c8e9412b-8f02-4c18-bf67-260481887ca2,60829731090019,0.367219,Trench 1,2024-10-26 09:41:38,2024-10-27,2024-10,Train,0,1,Quick
1,2907528,3e9ad64f-fa23-4418-9f4c-414738eca555,60829075280015,0.870859,Trench 1,2024-10-03 19:04:26,2024-10-03,2024-10,Train,1,1,Quick
2,2925374,629296c0-7417-45a8-946d-8ea315851b8e,60829253740017,0.714348,Trench 1,2024-10-09 16:14:52,2024-10-09,2024-10,Train,1,1,Quick
3,2936747,cb7d42d6-f7a5-4589-ac8b-5c81feda5859,60829367470015,0.297963,Trench 1,2024-10-13 15:01:25,2024-10-13,2024-10,Train,1,1,Quick
4,2933360,057b42a3-5852-45fe-a2d9-fc6a6c45b79b,60829333600018,0.595993,Trench 1,2024-10-12 13:54:22,2024-10-12,2024-10,Train,1,1,Quick


In [1500]:
df2 = dfd.copy()

In [1501]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12932 entries, 0 to 12931
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             12932 non-null  Int64         
 1   digitalLoanAccountId   12932 non-null  object        
 2   loanAccountNumber      12932 non-null  object        
 3   aStackScore            12932 non-null  float64       
 4   trenchCategory         12932 non-null  object        
 5   appln_submit_datetime  12932 non-null  datetime64[us]
 6   disbursementdate       12932 non-null  dbdate        
 7   Application_month      12932 non-null  object        
 8   Data_selection         12932 non-null  object        
 9   deffstpd30             12932 non-null  Int64         
 10  flg_mature_fstpd_30    12932 non-null  Int64         
 11  new_loan_type          12932 non-null  object        
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), objec

  df_concat = pd.concat([df2, df1], ignore_index=True)


In [1502]:
df_concat['aStackScore'] = pd.to_numeric(df_concat['aStackScore'], errors='coerce')

In [1503]:
# df_concat.to_csv(r"aStackScoretrench1fstpd30.csv")

In [1504]:
# gini_results = calculate_periodic_gini(df_concat, 'aStackScore', 'deffstpd30', 'FSTPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat,  
    'aStackScore', 
    'deffstpd30',   
    'FSTPD30',
    product_column='new_loan_type'
)

In [1505]:
f4 = gini_results.copy()
f4.head()

Unnamed: 0,start_date,end_date,aStackScore_FSTPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-09-30,2024-10-06,0.493301,Week,aStackScore,1.1.0,FSTPD30,Overall
1,2024-10-01,2024-10-31,0.491028,Month,aStackScore,1.1.0,FSTPD30,Overall
2,2024-10-07,2024-10-13,0.500463,Week,aStackScore,1.1.0,FSTPD30,Overall
3,2024-10-14,2024-10-20,0.414163,Week,aStackScore,1.1.0,FSTPD30,Overall
4,2024-10-21,2024-10-27,0.516269,Week,aStackScore,1.1.0,FSTPD30,Overall


## combining the dataframe

In [1506]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'aStackScore_FPD0_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'loan_type',
       'aStackScore_FPD10_gini', 'aStackScore_FPD30_gini',
       'aStackScore_FSPD30_gini', 'aStackScore_FSTPD30_gini'],
      dtype=object)

In [1507]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'loan_type', 'bad_rate','aStackScore_FPD0_gini','aStackScore_FPD10_gini',
       'aStackScore_FPD30_gini', 'aStackScore_FSPD30_gini',
       'aStackScore_FSTPD30_gini']].copy()
final_df.rename(columns={'aStackScore_FPD0_gini':'aStackScore_t1_FPD0_gini'
                         , 'aStackScore_FPD10_gini':'aStackScore_t1_FPD10_gini'
                         , 'aStackScore_FPD30_gini':'aStackScore_t1_FPD30_gini'
                         , 'aStackScore_FSPD30_gini':'aStackScore_t1_FSPD30_gini'
                        , 'aStackScore_FSTPD30_gini':'aStackScore_t1_FSTPD30_gini', 
                        }, inplace = True)
final_df['Trench_category'] = 'Trench 1'
final_df['Model_display_name'] = 'alpha_stack_model_cash'
final_df['Product_type'] = 'CASH'
final_df.dtypes

start_date                     datetime64[ns]
end_date                       datetime64[ns]
period                                 object
Model_Name                             object
version                                object
loan_type                              object
bad_rate                               object
aStackScore_t1_FPD0_gini              float64
aStackScore_t1_FPD10_gini             float64
aStackScore_t1_FPD30_gini             float64
aStackScore_t1_FSPD30_gini            float64
aStackScore_t1_FSTPD30_gini           float64
Trench_category                        object
Model_display_name                     object
Product_type                           object
dtype: object

In [1508]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,loan_type,bad_rate,aStackScore_t1_FPD0_gini,aStackScore_t1_FPD10_gini,aStackScore_t1_FPD30_gini,aStackScore_t1_FSPD30_gini,aStackScore_t1_FSTPD30_gini,Trench_category,Model_display_name,Product_type
0,2024-09-30,2024-10-06,Week,aStackScore,1.1.0,Overall,FPD0,0.492727,,,,,Trench 1,alpha_stack_model_cash,CASH
1,2024-10-01,2024-10-31,Month,aStackScore,1.1.0,Overall,FPD0,0.394791,,,,,Trench 1,alpha_stack_model_cash,CASH
2,2024-10-07,2024-10-13,Week,aStackScore,1.1.0,Overall,FPD0,0.379712,,,,,Trench 1,alpha_stack_model_cash,CASH
3,2024-10-14,2024-10-20,Week,aStackScore,1.1.0,Overall,FPD0,0.359015,,,,,Trench 1,alpha_stack_model_cash,CASH
4,2024-10-21,2024-10-27,Week,aStackScore,1.1.0,Overall,FPD0,0.380435,,,,,Trench 1,alpha_stack_model_cash,CASH


In [1509]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.alpha_cash_stack_model_t1_v1_gini5"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=ec5ec5e7-4be8-43e0-b188-824f12860315>

# Trench 2

## FPD0

## Test

In [1510]:
sq = r""" 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
and modelVersionId = 'v1'
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aStackScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type
0,1454987,b9606784-15f2-47b0-8bae-0720ee5ac25d,60814549870014,0.1214058412835968,Trench 2,2025-10-25 10:59:14,2025-10-25,2025-10,Test,0,1,Quick
1,3516740,45a68f17-c2ea-489f-84b7-4f82a10a51cc,60835167400011,0.5347171614132483,Trench 2,2025-10-25 09:44:57,2025-10-26,2025-10,Test,0,1,Quick
2,3534958,a126b4ee-2f18-46ca-899e-31db97f88866,60835349580019,0.4266963455199357,Trench 2,2025-10-25 10:09:55,2025-10-25,2025-10,Test,0,1,Quick
3,3225809,dab33f31-f4bd-4ebe-bbf1-60cd938498f3,60832258090018,0.3126232420942927,Trench 2,2025-10-25 11:51:55,2025-10-26,2025-10,Test,0,1,Quick
4,3299101,467fd287-e078-41cd-9d9b-78fe4fa796f0,60832991010011,0.4946866534618446,Trench 2,2025-10-25 11:15:41,2025-10-25,2025-10,Test,0,1,Quick


In [1511]:
df1 = dfd.copy()

## Train

In [1512]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aStackScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type
0,1871036,a2d4eb8c-3152-40ad-b9a2-9a4c53373ad8,60818710360018,0.571088,Trench 2,2024-10-06 10:05:56,2024-10-06,2024-10,Train,1,1,Quick
1,2641361,87448c7b-f7f8-4b75-bf38-1f3627f4f74c,60826413610018,0.898309,Trench 2,2024-10-08 21:55:58,2024-10-09,2024-10,Train,1,1,Quick
2,2675827,eb08be48-6bfc-4edd-9598-025470a4a1bf,60826758270012,0.721784,Trench 2,2024-10-21 17:10:14,2024-10-22,2024-10,Train,0,1,Quick
3,2399461,862d2734-ff89-4fbe-af7d-78a2b4e14136,60823994610028,0.115474,Trench 2,2024-10-01 00:31:27,2024-10-01,2024-10,Train,0,1,Quick
4,2500985,5234da51-d143-4932-a820-e0eb6e82ed41,60825009850014,0.680799,Trench 2,2024-10-23 16:04:23,2024-10-23,2024-10,Train,1,1,Quick


In [1513]:
df2 = dfd.copy()

In [1514]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11661 entries, 0 to 11660
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             11661 non-null  object        
 1   digitalLoanAccountId   11661 non-null  object        
 2   loanAccountNumber      11661 non-null  object        
 3   aStackScore            11661 non-null  object        
 4   trenchCategory         11661 non-null  object        
 5   appln_submit_datetime  11661 non-null  datetime64[us]
 6   disbursementdate       11661 non-null  dbdate        
 7   Application_month      11661 non-null  object        
 8   Data_selection         11661 non-null  object        
 9   deffpd0                11661 non-null  Int64         
 10  flg_mature_fpd0        11661 non-null  Int64         
 11  new_loan_type          11661 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(8)
memory 

In [1515]:
df_concat['aStackScore'] = pd.to_numeric(df_concat['aStackScore'], errors='coerce')

In [1516]:
# df_concat.to_csv(r"aStackScoretrench2fpd0.csv")

In [1517]:
# gini_results = calculate_periodic_gini(df_concat, 'aStackScore', 'deffpd0', 'FPD0')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'aStackScore', 
    'deffpd0', 
    'FPD0',
    product_column='new_loan_type'
)

In [1518]:
f0 = gini_results.copy()
f0.head()

Unnamed: 0,start_date,end_date,aStackScore_FPD0_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-09-30,2024-10-06,0.465684,Week,aStackScore,1.1.0,FPD0,Overall
1,2024-10-01,2024-10-31,0.422023,Month,aStackScore,1.1.0,FPD0,Overall
2,2024-10-07,2024-10-13,0.472046,Week,aStackScore,1.1.0,FPD0,Overall
3,2024-10-14,2024-10-20,0.415392,Week,aStackScore,1.1.0,FPD0,Overall
4,2024-10-21,2024-10-27,0.342969,Week,aStackScore,1.1.0,FPD0,Overall


## FPD10

## Test

In [1519]:
sq = r""" 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
and modelVersionId = 'v1'
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aStackScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type
0,3139917,a8252fd8-5637-4984-b3d4-0a77e16fa284,60831399170016,0.3402456034136671,Trench 2,2025-10-19 08:41:15,2025-10-23,2025-10,Test,0,1,Quick
1,3541483,091d7f32-8b0c-4d7f-86e1-049f942d29d2,60835414830018,0.2705871173807799,Trench 2,2025-10-19 08:30:59,2025-10-19,2025-10,Test,0,1,Quick
2,3656345,5f82fead-cc8f-4f8d-8020-84e544e03a78,60836563450011,0.4228749291969025,Trench 2,2025-10-19 08:23:40,2025-10-19,2025-10,Test,0,1,Quick
3,1058119,2c00067a-3bac-43c6-9f2b-d2151d01647e,60810581190052,0.0497492504764832,Trench 2,2025-10-19 10:58:26,2025-10-19,2025-10,Test,0,1,Quick
4,2381026,5fb61da0-d361-4bb2-9e15-8d0c1ca6138c,60823810260018,0.3594927043613093,Trench 2,2025-10-19 11:56:52,2025-10-19,2025-10,Test,0,1,Quick


In [1520]:
df1 = dfd.copy()

## Train

In [1521]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aStackScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type
0,2419936,9197ef27-27bb-40d5-806a-387b829abbda,60824199360011,0.419266,Trench 2,2024-10-27 14:31:19,2024-10-27,2024-10,Train,1,1,Quick
1,1442401,a9741064-8147-48e9-974f-b4b5e2fe9a9f,60814424010022,0.437693,Trench 2,2024-10-10 13:51:29,2024-10-10,2024-10,Train,0,1,Quick
2,2562923,38507857-ce6c-46da-9805-d3577417771d,60825629230018,0.512086,Trench 2,2024-10-25 18:20:30,2024-10-25,2024-10,Train,0,1,Quick
3,2444139,8e07e1f3-7f4b-4c54-9f85-fc7159aa701f,60824441390014,0.665853,Trench 2,2024-10-16 14:16:08,2024-10-16,2024-10,Train,0,1,Quick
4,2600528,a7278798-dcc1-4e7b-b938-4c00bea1fe11,60826005280019,0.229512,Trench 2,2024-10-09 19:42:13,2024-10-09,2024-10,Train,0,1,Quick


In [1522]:
df2 = dfd.copy()

In [1523]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11114 entries, 0 to 11113
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             11114 non-null  object        
 1   digitalLoanAccountId   11114 non-null  object        
 2   loanAccountNumber      11114 non-null  object        
 3   aStackScore            11114 non-null  object        
 4   trenchCategory         11114 non-null  object        
 5   appln_submit_datetime  11114 non-null  datetime64[us]
 6   disbursementdate       11114 non-null  dbdate        
 7   Application_month      11114 non-null  object        
 8   Data_selection         11114 non-null  object        
 9   deffpd10               11114 non-null  Int64         
 10  flg_mature_fpd10       11114 non-null  Int64         
 11  new_loan_type          11114 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(8)
memory 

In [1524]:
df_concat['aStackScore'] = pd.to_numeric(df_concat['aStackScore'], errors='coerce')

In [1525]:
# df_concat.to_csv(r"aStackScoretrench1fpd10.csv")

In [1526]:
# gini_results = calculate_periodic_gini(df_concat, 'aStackScore', 'deffpd10', 'FPD10')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'aStackScore', 
    'deffpd10', 
    'FPD10',
    product_column='new_loan_type'
)

In [1527]:
f1 = gini_results.copy()
f1.head()

Unnamed: 0,start_date,end_date,aStackScore_FPD10_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-09-30,2024-10-06,0.615146,Week,aStackScore,1.1.0,FPD10,Overall
1,2024-10-01,2024-10-31,0.547853,Month,aStackScore,1.1.0,FPD10,Overall
2,2024-10-07,2024-10-13,0.591411,Week,aStackScore,1.1.0,FPD10,Overall
3,2024-10-14,2024-10-20,0.555232,Week,aStackScore,1.1.0,FPD10,Overall
4,2024-10-21,2024-10-27,0.428945,Week,aStackScore,1.1.0,FPD10,Overall


## FPD30

## Test

In [1528]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
and modelVersionId = 'v1'
)
, 
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aStackScore,
  trenchCategory
  from parsed
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type
0,2473116,70dedacb-577f-433e-bfbf-4d57973ff5ea,60824731160018,0.4871555473798478,Trench 2,2025-10-03 20:45:57,2025-10-03,2025-10,Test,0,1,Quick
1,3479077,628bb8f9-78bd-4a36-bd17-80d4f85852d9,60834790770019,0.4693781318585817,Trench 2,2025-10-03 19:44:09,2025-10-03,2025-10,Test,0,1,Quick
2,3190544,e5324fe5-34e5-4718-803c-84b1b2402b7d,60831905440011,0.2816498485031286,Trench 2,2025-10-05 12:24:42,2025-10-06,2025-10,Test,0,1,Quick
3,3230169,9ed41d23-400d-4654-b0a7-dbd7e514bdfa,60832301690022,0.3552711560248203,Trench 2,2025-10-05 09:38:47,2025-10-05,2025-10,Test,0,1,Quick
4,3090214,8b4e0f30-db10-4bff-ba1d-fd96ffdc34b1,60830902140011,0.489326181375378,Trench 2,2025-10-05 20:30:05,2025-10-05,2025-10,Test,0,1,Quick


In [1529]:
df1 = dfd.copy()

## Train

In [1530]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aStackScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type
0,2335032,a60dd0e9-7683-463c-8b6b-3356daa19ac3,60823350320017,0.622708,Trench 2,2024-10-24 21:05:54,2024-10-24,2024-10,Train,0,1,Quick
1,1232676,c2326a3c-8694-4ce0-b7a5-1f84871c39e6,60812326760046,0.362886,Trench 2,2024-10-22 01:13:53,2024-10-23,2024-10,Train,1,1,Quick
2,2372472,35a7a43e-4c6b-4ff8-8125-3973d46ab2f9,60823724720015,0.837196,Trench 2,2024-10-21 14:29:39,2024-10-21,2024-10,Train,0,1,Quick
3,1256339,957dbc83-6a8a-4364-b8b9-17d4886f6fd9,60812563390014,0.395238,Trench 2,2024-10-07 09:38:46,2024-10-07,2024-10,Train,1,1,Quick
4,2307413,60d41ca5-2de9-47e4-b1ee-a2ed3a981486,60823074130019,0.765518,Trench 2,2024-10-03 22:18:07,2024-10-06,2024-10,Train,1,1,Quick


In [1531]:
df2 = dfd.copy()

In [1532]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10735 entries, 0 to 10734
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             10735 non-null  object        
 1   digitalLoanAccountId   10735 non-null  object        
 2   loanAccountNumber      10735 non-null  object        
 3   aStackScore            10735 non-null  object        
 4   trenchCategory         10735 non-null  object        
 5   appln_submit_datetime  10735 non-null  datetime64[us]
 6   disbursementdate       10735 non-null  dbdate        
 7   Application_month      10735 non-null  object        
 8   Data_selection         10735 non-null  object        
 9   deffpd30               10735 non-null  Int64         
 10  flg_mature_fpd30       10735 non-null  Int64         
 11  new_loan_type          10735 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(8)
memory 

In [1533]:
df_concat['aStackScore'] = pd.to_numeric(df_concat['aStackScore'], errors='coerce')

In [1534]:
# df_concat.to_csv(r"aStackScoretrench1fpd30.csv")

In [1535]:
# gini_results = calculate_periodic_gini(df_concat, 'aStackScore', 'deffpd30', 'FPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'aStackScore', 
    'deffpd30', 
    'FPD30',
    product_column='new_loan_type'
)

In [1536]:
f2 = gini_results.copy()
f2.head()

Unnamed: 0,start_date,end_date,aStackScore_FPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-09-30,2024-10-06,0.672161,Week,aStackScore,1.1.0,FPD30,Overall
1,2024-10-01,2024-10-31,0.559772,Month,aStackScore,1.1.0,FPD30,Overall
2,2024-10-07,2024-10-13,0.627584,Week,aStackScore,1.1.0,FPD30,Overall
3,2024-10-14,2024-10-20,0.529913,Week,aStackScore,1.1.0,FPD30,Overall
4,2024-10-21,2024-10-27,0.448268,Week,aStackScore,1.1.0,FPD30,Overall


## FSPD30

## Test

In [1537]:
sq = r""" 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
and modelVersionId = 'v1'
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aStackScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type


In [1538]:
df1 = dfd.copy()

## Train

In [1539]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aStackScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type
0,2858650,51043bbc-7303-4a5d-9c8c-7c592c48ac80,60828586500019,0.550407,Trench 2,2024-10-22 11:38:03,2024-10-22,2024-10,Train,1,1,Quick
1,2537031,3e288216-a1de-4a3d-80e2-65a405c94672,60825370310016,0.486758,Trench 2,2024-10-11 20:32:24,2024-10-11,2024-10,Train,1,1,Quick
2,1880481,0750523f-bea0-4db7-8737-51cf32dd9bff,60818804810024,0.727121,Trench 2,2024-10-23 07:06:21,2024-10-23,2024-10,Train,1,1,Quick
3,2650571,9fb8443f-0099-45a4-9581-3d0b6497ac1b,60826505710025,0.592614,Trench 2,2024-10-15 11:41:13,2024-10-15,2024-10,Train,1,1,Quick
4,2768574,6bea5b9b-fc9f-4c15-b72b-833a65492991,60827685740013,0.513767,Trench 2,2024-10-27 14:37:42,2024-10-27,2024-10,Train,0,1,Quick


In [1540]:
df2 = dfd.copy()

In [1541]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10234 entries, 0 to 10233
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             10234 non-null  Int64         
 1   digitalLoanAccountId   10234 non-null  object        
 2   loanAccountNumber      10234 non-null  object        
 3   aStackScore            10234 non-null  float64       
 4   trenchCategory         10234 non-null  object        
 5   appln_submit_datetime  10234 non-null  datetime64[us]
 6   disbursementdate       10234 non-null  dbdate        
 7   Application_month      10234 non-null  object        
 8   Data_selection         10234 non-null  object        
 9   deffspd30              10234 non-null  Int64         
 10  flg_mature_fspd_30     10234 non-null  Int64         
 11  new_loan_type          10234 non-null  object        
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), objec

  df_concat = pd.concat([df2, df1], ignore_index=True)


In [1542]:
df_concat['aStackScore'] = pd.to_numeric(df_concat['aStackScore'], errors='coerce')

In [1543]:
# df_concat.to_csv(r"aStackScoretrench2fspd30.csv")

In [1544]:
# gini_results = calculate_periodic_gini(df_concat, 'aStackScore', 'deffspd30', 'FSPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'aStackScore', 
    'deffspd30', 
    'FSPD30',
    product_column='new_loan_type'
)

In [1545]:
f3 = gini_results.copy()
f3.head()

Unnamed: 0,start_date,end_date,aStackScore_FSPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-09-30,2024-10-06,0.583149,Week,aStackScore,1.1.0,FSPD30,Overall
1,2024-10-01,2024-10-31,0.578261,Month,aStackScore,1.1.0,FSPD30,Overall
2,2024-10-07,2024-10-13,0.604414,Week,aStackScore,1.1.0,FSPD30,Overall
3,2024-10-14,2024-10-20,0.615705,Week,aStackScore,1.1.0,FSPD30,Overall
4,2024-10-21,2024-10-27,0.454545,Week,aStackScore,1.1.0,FSPD30,Overall


## FSTPD30

## Test

In [1546]:
sq = r""" 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
and modelVersionId = 'v1'
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aStackScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type


In [1547]:
df1 = dfd.copy()

## Train

In [1548]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aStackScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type
0,2858650,51043bbc-7303-4a5d-9c8c-7c592c48ac80,60828586500019,0.550407,Trench 2,2024-10-22 11:38:03,2024-10-22,2024-10,Train,1,1,Quick
1,2537031,3e288216-a1de-4a3d-80e2-65a405c94672,60825370310016,0.486758,Trench 2,2024-10-11 20:32:24,2024-10-11,2024-10,Train,1,1,Quick
2,1880481,0750523f-bea0-4db7-8737-51cf32dd9bff,60818804810024,0.727121,Trench 2,2024-10-23 07:06:21,2024-10-23,2024-10,Train,1,1,Quick
3,2650571,9fb8443f-0099-45a4-9581-3d0b6497ac1b,60826505710025,0.592614,Trench 2,2024-10-15 11:41:13,2024-10-15,2024-10,Train,1,1,Quick
4,2768574,6bea5b9b-fc9f-4c15-b72b-833a65492991,60827685740013,0.513767,Trench 2,2024-10-27 14:37:42,2024-10-27,2024-10,Train,0,1,Quick


In [1549]:
df2 = dfd.copy()

In [1550]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9544 entries, 0 to 9543
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             9544 non-null   Int64         
 1   digitalLoanAccountId   9544 non-null   object        
 2   loanAccountNumber      9544 non-null   object        
 3   aStackScore            9544 non-null   float64       
 4   trenchCategory         9544 non-null   object        
 5   appln_submit_datetime  9544 non-null   datetime64[us]
 6   disbursementdate       9544 non-null   dbdate        
 7   Application_month      9544 non-null   object        
 8   Data_selection         9544 non-null   object        
 9   deffstpd30             9544 non-null   Int64         
 10  flg_mature_fstpd_30    9544 non-null   Int64         
 11  new_loan_type          9544 non-null   object        
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(

  df_concat = pd.concat([df2, df1], ignore_index=True)


In [1551]:
df_concat['aStackScore'] = pd.to_numeric(df_concat['aStackScore'], errors='coerce')

In [1552]:
# df_concat.to_csv(r"aStackScoretrench2fstpd30.csv")

In [1553]:
# gini_results = calculate_periodic_gini(df_concat, 'aStackScore', 'deffstpd30', 'FSTPD30')
gini_results = calculate_periodic_gini_producttype( 
    df_concat, 
    'aStackScore', 
    'deffstpd30', 
    'FSTPD30',
    product_column='new_loan_type'
)

In [1554]:
f4 = gini_results.copy()
f4.head()

Unnamed: 0,start_date,end_date,aStackScore_FSTPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-09-30,2024-10-06,0.505994,Week,aStackScore,1.1.0,FSTPD30,Overall
1,2024-10-01,2024-10-31,0.530044,Month,aStackScore,1.1.0,FSTPD30,Overall
2,2024-10-07,2024-10-13,0.566274,Week,aStackScore,1.1.0,FSTPD30,Overall
3,2024-10-14,2024-10-20,0.548177,Week,aStackScore,1.1.0,FSTPD30,Overall
4,2024-10-21,2024-10-27,0.449289,Week,aStackScore,1.1.0,FSTPD30,Overall


## combining the dataframe

In [1555]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'loan_type', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'aStackScore_FPD0_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'loan_type',
       'aStackScore_FPD10_gini', 'aStackScore_FPD30_gini',
       'aStackScore_FSPD30_gini', 'aStackScore_FSTPD30_gini'],
      dtype=object)

In [1556]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version','loan_type', 'bad_rate','aStackScore_FPD0_gini','aStackScore_FPD10_gini',
       'aStackScore_FPD30_gini', 'aStackScore_FSPD30_gini',
       'aStackScore_FSTPD30_gini']].copy()
final_df.rename(columns={'aStackScore_FPD0_gini':'aStackScore_t2_FPD0_gini'
                         , 'aStackScore_FPD10_gini':'aStackScore_t2_FPD10_gini'
                         , 'aStackScore_FPD30_gini':'aStackScore_t2_FPD30_gini'
                         , 'aStackScore_FSPD30_gini':'aStackScore_t2_FSPD30_gini'
                        , 'aStackScore_FSTPD30_gini':'aStackScore_t2_FSTPD30_gini', 
                        }, inplace = True)
final_df['Trench_category'] = 'Trench 2'
final_df['Model_display_name'] = 'alpha_stack_model_cash'
final_df['Product_type'] = 'CASH'
final_df.dtypes

start_date                     datetime64[ns]
end_date                       datetime64[ns]
period                                 object
Model_Name                             object
version                                object
loan_type                              object
bad_rate                               object
aStackScore_t2_FPD0_gini              float64
aStackScore_t2_FPD10_gini             float64
aStackScore_t2_FPD30_gini             float64
aStackScore_t2_FSPD30_gini            float64
aStackScore_t2_FSTPD30_gini           float64
Trench_category                        object
Model_display_name                     object
Product_type                           object
dtype: object

In [1557]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,loan_type,bad_rate,aStackScore_t2_FPD0_gini,aStackScore_t2_FPD10_gini,aStackScore_t2_FPD30_gini,aStackScore_t2_FSPD30_gini,aStackScore_t2_FSTPD30_gini,Trench_category,Model_display_name,Product_type
0,2024-09-30,2024-10-06,Week,aStackScore,1.1.0,Overall,FPD0,0.465684,,,,,Trench 2,alpha_stack_model_cash,CASH
1,2024-10-01,2024-10-31,Month,aStackScore,1.1.0,Overall,FPD0,0.422023,,,,,Trench 2,alpha_stack_model_cash,CASH
2,2024-10-07,2024-10-13,Week,aStackScore,1.1.0,Overall,FPD0,0.472046,,,,,Trench 2,alpha_stack_model_cash,CASH
3,2024-10-14,2024-10-20,Week,aStackScore,1.1.0,Overall,FPD0,0.415392,,,,,Trench 2,alpha_stack_model_cash,CASH
4,2024-10-21,2024-10-27,Week,aStackScore,1.1.0,Overall,FPD0,0.342969,,,,,Trench 2,alpha_stack_model_cash,CASH


In [1558]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.alpha_cash_stack_model_t2_v1_gini5"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=5984a3ee-91c9-451a-a2bc-240d8e5e4ea4>

# Trench 3

## FPD0

## Test

In [1559]:
sq = r""" 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
and modelVersionId = 'v1'
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aStackScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type
0,3212480,fa05f5de-6c42-41c9-9952-d4f15bcac184,60832124800032,0.508028370473362,Trench 3,2025-10-12 11:43:51,2025-10-12,2025-10,Test,1,1,Quick
1,2691227,874f7fbd-3802-4b8e-9f09-30e7e0cbafc1,60826912270034,0.3713260359628709,Trench 3,2025-10-12 09:32:17,2025-10-13,2025-10,Test,0,1,Quick
2,3307972,578e53fa-13fb-4e8b-9899-ac0098ee3940,60833079720021,0.432877412307843,Trench 3,2025-10-12 09:18:42,2025-10-12,2025-10,Test,0,1,Quick
3,3044250,d96241e1-64eb-438b-846d-a4724161329a,60830442500023,0.3297200597029336,Trench 3,2025-10-12 13:37:58,2025-10-12,2025-10,Test,0,1,Quick
4,1590309,4afcc385-7cbe-412f-a23c-3d0b54398d42,60815903090046,0.2693422999839771,Trench 3,2025-10-12 16:45:50,2025-10-12,2025-10,Test,0,1,Quick


In [1560]:
df1 = dfd.copy()

## Train

In [1561]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aStackScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type
0,2588018,6a647e1b-8c12-457f-a91b-8dd7028b6280,60825880180026,0.382413,Trench 3,2024-10-28 11:13:14,2024-10-28,2024-10,Train,0,1,Quick
1,2246184,2ed25edd-6c18-4190-8489-144fa5f8b4f5,60822461840023,0.349081,Trench 3,2024-10-18 19:34:44,2024-10-18,2024-10,Train,0,1,Quick
2,2316681,6eb1b3a3-1299-4a15-aa32-a089a0a2defc,60823166810034,0.434284,Trench 3,2024-10-29 18:27:25,2024-10-31,2024-10,Train,0,1,Quick
3,2380572,f1f091aa-06e9-405a-8bfb-03ed92fd3bfd,60823805720023,0.585701,Trench 3,2024-10-12 09:52:17,2024-10-12,2024-10,Train,0,1,Quick
4,1743906,50883fda-af8e-49a5-99d7-e51a901f184b,60817439060041,0.288597,Trench 3,2024-10-09 17:58:02,2024-10-09,2024-10,Train,0,1,Quick


In [1562]:
df2 = dfd.copy()

In [1563]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11319 entries, 0 to 11318
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             11319 non-null  object        
 1   digitalLoanAccountId   11319 non-null  object        
 2   loanAccountNumber      11319 non-null  object        
 3   aStackScore            11319 non-null  object        
 4   trenchCategory         11319 non-null  object        
 5   appln_submit_datetime  11319 non-null  datetime64[us]
 6   disbursementdate       11319 non-null  dbdate        
 7   Application_month      11319 non-null  object        
 8   Data_selection         11319 non-null  object        
 9   deffpd0                11319 non-null  Int64         
 10  flg_mature_fpd0        11319 non-null  Int64         
 11  new_loan_type          11319 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(8)
memory 

In [1564]:
df_concat['aStackScore'] = pd.to_numeric(df_concat['aStackScore'], errors='coerce')

In [1565]:
# df_concat.to_csv(r"aStackScoretrench3fpd0.csv")

In [1566]:
# gini_results = calculate_periodic_gini(df_concat, 'aStackScore', 'deffpd0', 'FPD0')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'aStackScore', 
    'deffpd0', 
    'FPD0',
    product_column='new_loan_type'
)

In [1567]:
f0 = gini_results.copy()
f0.head()

Unnamed: 0,start_date,end_date,aStackScore_FPD0_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-09-30,2024-10-06,0.380488,Week,aStackScore,1.1.0,FPD0,Overall
1,2024-10-01,2024-10-31,0.365971,Month,aStackScore,1.1.0,FPD0,Overall
2,2024-10-07,2024-10-13,0.357708,Week,aStackScore,1.1.0,FPD0,Overall
3,2024-10-14,2024-10-20,0.459742,Week,aStackScore,1.1.0,FPD0,Overall
4,2024-10-21,2024-10-27,0.378547,Week,aStackScore,1.1.0,FPD0,Overall


## FPD10

## Test

In [1568]:
sq = r""" 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
and modelVersionId = 'v1'
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aStackScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
    loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type
0,2652688,879f37b7-c5e9-4fc5-a91e-14cb39917af4,60826526880025,0.4406135699278077,Trench 3,2025-09-25 10:12:16,2025-09-25,2025-09,Test,0,1,Quick
1,2904565,4d1b2318-42fb-429b-b833-aedae0f0c07e,60829045650026,0.612960308829296,Trench 3,2025-09-25 20:22:21,2025-09-25,2025-09,Test,0,1,Quick
2,3284580,5347c420-2fd6-486f-a896-d270a8862f62,60832845800038,0.4894251214589769,Trench 3,2025-10-04 09:06:38,2025-10-05,2025-10,Test,0,1,Quick
3,3097677,6353794c-2484-4d7b-8d7e-cd7663ae3b3a,60830976770031,0.413338381428348,Trench 3,2025-10-04 16:38:06,2025-10-05,2025-10,Test,1,1,Quick
4,2523031,e6b7c0e2-e96f-457e-875e-87eb3063bfdb,60825230310023,0.4433331348560717,Trench 3,2025-10-04 20:47:58,2025-10-04,2025-10,Test,0,1,Quick


In [1569]:
df1 = dfd.copy()

## Train

In [1570]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aStackScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type
0,1926940,2c57465f-855d-4630-9552-911519e0123d,60819269400021,0.470328,Trench 3,2024-10-17 18:10:03,2024-10-20,2024-10,Train,1,1,Quick
1,2421206,a4df6ed1-f3df-4c51-8c53-43b84eab154a,60824212060024,0.313658,Trench 3,2024-10-06 13:43:50,2024-10-06,2024-10,Train,0,1,Quick
2,2562710,e2be84aa-e53d-44c5-bf6c-efe241d758c5,60825627100025,0.412173,Trench 3,2024-10-16 17:32:22,2024-10-16,2024-10,Train,0,1,Quick
3,2554752,d2a7e732-a9ba-4389-aaf8-568128433b45,60825547520036,0.768049,Trench 3,2024-10-10 15:35:21,2024-10-10,2024-10,Train,1,1,Quick
4,2427358,256b5c4e-a553-40d0-bef2-019c8a967392,60824273580023,0.389422,Trench 3,2024-10-16 23:25:02,2024-10-17,2024-10,Train,0,1,Quick


In [1571]:
df2 = dfd.copy()

In [1572]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11069 entries, 0 to 11068
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             11069 non-null  object        
 1   digitalLoanAccountId   11069 non-null  object        
 2   loanAccountNumber      11069 non-null  object        
 3   aStackScore            11069 non-null  object        
 4   trenchCategory         11069 non-null  object        
 5   appln_submit_datetime  11069 non-null  datetime64[us]
 6   disbursementdate       11069 non-null  dbdate        
 7   Application_month      11069 non-null  object        
 8   Data_selection         11069 non-null  object        
 9   deffpd10               11069 non-null  Int64         
 10  flg_mature_fpd10       11069 non-null  Int64         
 11  new_loan_type          11069 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(8)
memory 

In [1573]:
df_concat['aStackScore'] = pd.to_numeric(df_concat['aStackScore'], errors='coerce')

In [1574]:
# df_concat.to_csv(r"aStackScoretrench1fpd10.csv")

In [1575]:
# gini_results = calculate_periodic_gini(df_concat, 'aStackScore', 'deffpd10', 'FPD10')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'aStackScore', 
    'deffpd10', 
    'FPD10',
    product_column='new_loan_type'
)

In [1576]:
f1 = gini_results.copy()
f1.head()

Unnamed: 0,start_date,end_date,aStackScore_FPD10_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-09-30,2024-10-06,0.61039,Week,aStackScore,1.1.0,FPD10,Overall
1,2024-10-01,2024-10-31,0.595177,Month,aStackScore,1.1.0,FPD10,Overall
2,2024-10-07,2024-10-13,0.45,Week,aStackScore,1.1.0,FPD10,Overall
3,2024-10-14,2024-10-20,0.759259,Week,aStackScore,1.1.0,FPD10,Overall
4,2024-10-21,2024-10-27,0.489115,Week,aStackScore,1.1.0,FPD10,Overall


## FPD30

## Test

In [1577]:
sq = r""" 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
and modelVersionId = 'v1'
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aStackScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type
0,2652688,879f37b7-c5e9-4fc5-a91e-14cb39917af4,60826526880025,0.4406135699278077,Trench 3,2025-09-25 10:12:16,2025-09-25,2025-09,Test,0,1,Quick
1,2904565,4d1b2318-42fb-429b-b833-aedae0f0c07e,60829045650026,0.612960308829296,Trench 3,2025-09-25 20:22:21,2025-09-25,2025-09,Test,0,1,Quick
2,3450487,3f85a529-ed37-4e8e-a494-487058987b51,60834504870025,0.4463746422926189,Trench 3,2025-09-30 11:23:16,2025-10-01,2025-09,Test,0,1,Quick
3,2335443,855135e0-dce2-4a34-aef9-8d5a798c1bb5,60823354430038,0.343920966316955,Trench 3,2025-09-30 15:21:22,2025-10-01,2025-09,Test,0,1,Quick
4,3066495,75da5a84-b7b6-4c21-9775-4d01a16a903c,60830664950038,0.5206735494497335,Trench 3,2025-09-30 18:59:01,2025-10-01,2025-09,Test,0,1,Quick


In [1578]:
df1 = dfd.copy()

## Train

In [1579]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aStackScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type
0,2581090,934220a2-85a4-43a5-b4bf-0bb74228de84,60825810900026,0.523035,Trench 3,2024-10-11 12:34:37,2024-10-11,2024-10,Train,0,1,Quick
1,2676076,b0f0477b-08b7-4c4d-be30-989de4691975,60826760760029,0.605938,Trench 3,2024-10-28 20:27:53,2024-10-28,2024-10,Train,0,1,Quick
2,2945954,6a7cd7a8-8ba8-4001-9133-a0755a4209f9,60829459540037,0.634157,Trench 3,2024-10-29 12:34:27,2024-10-29,2024-10,Train,0,1,Quick
3,2440607,9304617e-e065-499b-b692-5eba9f0a5af3,60824406070023,0.264538,Trench 3,2024-10-28 16:46:52,2024-10-28,2024-10,Train,0,1,Quick
4,2506352,dd6daa5c-41ad-43c6-bfa3-a9b2155bbac0,60825063520029,0.449431,Trench 3,2024-10-14 07:36:39,2024-10-14,2024-10,Train,0,1,Quick


In [1580]:
df2 = dfd.copy()

In [1581]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10729 entries, 0 to 10728
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             10729 non-null  object        
 1   digitalLoanAccountId   10729 non-null  object        
 2   loanAccountNumber      10729 non-null  object        
 3   aStackScore            10729 non-null  object        
 4   trenchCategory         10729 non-null  object        
 5   appln_submit_datetime  10729 non-null  datetime64[us]
 6   disbursementdate       10729 non-null  dbdate        
 7   Application_month      10729 non-null  object        
 8   Data_selection         10729 non-null  object        
 9   deffpd30               10729 non-null  Int64         
 10  flg_mature_fpd30       10729 non-null  Int64         
 11  new_loan_type          10729 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(8)
memory 

In [1582]:
df_concat['aStackScore'] = pd.to_numeric(df_concat['aStackScore'], errors='coerce')

In [1583]:
# df_concat.to_csv(r"aStackScoretrench3fpd30.csv")

In [1584]:
# gini_results = calculate_periodic_gini(df_concat, 'aStackScore', 'deffpd30', 'FPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'aStackScore', 
    'deffpd30', 
    'FPD30',
    product_column='new_loan_type'
)

In [1585]:
f2 = gini_results.copy()
f2.head()

Unnamed: 0,start_date,end_date,aStackScore_FPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-09-30,2024-10-06,0.61039,Week,aStackScore,1.1.0,FPD30,Overall
1,2024-10-01,2024-10-31,0.627072,Month,aStackScore,1.1.0,FPD30,Overall
2,2024-10-07,2024-10-13,0.516509,Week,aStackScore,1.1.0,FPD30,Overall
3,2024-10-14,2024-10-20,0.801835,Week,aStackScore,1.1.0,FPD30,Overall
4,2024-10-21,2024-10-27,0.457207,Week,aStackScore,1.1.0,FPD30,Overall


## FSPD30

## Test

In [1586]:
sq = r""" 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
and modelVersionId = 'v1'
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aStackScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type


In [1587]:
df1 = dfd.copy()

## Train

In [1588]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aStackScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type
0,2027516,3e0ddc65-69eb-400f-8b8d-f74316d4498b,60820275160091,0.31923,Trench 3,2024-10-20 14:54:50,2024-10-20,2024-10,Train,0,1,Quick
1,2508851,d5d38c7e-a4b0-4cd8-9b20-535d1cc914cc,60825088510039,0.500233,Trench 3,2024-10-05 02:30:12,2024-10-05,2024-10,Train,0,1,Quick
2,2307622,af3cb209-5890-4e2d-af95-eda630c2e082,60823076220024,0.511229,Trench 3,2024-10-23 15:25:02,2024-10-23,2024-10,Train,0,1,Quick
3,2911785,f2eae870-9de7-4268-a351-d443d8ba6d00,60829117850037,0.472549,Trench 3,2024-10-12 20:07:59,2024-10-12,2024-10,Train,0,1,Quick
4,2847149,7dea0767-589c-45ba-8ae4-b3823f6f22d5,60828471490038,0.544386,Trench 3,2024-10-31 11:51:03,2024-10-31,2024-10,Train,0,1,Quick


In [1589]:
df2 = dfd.copy()

In [1590]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9749 entries, 0 to 9748
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             9749 non-null   Int64         
 1   digitalLoanAccountId   9749 non-null   object        
 2   loanAccountNumber      9749 non-null   object        
 3   aStackScore            9749 non-null   float64       
 4   trenchCategory         9749 non-null   object        
 5   appln_submit_datetime  9749 non-null   datetime64[us]
 6   disbursementdate       9749 non-null   dbdate        
 7   Application_month      9749 non-null   object        
 8   Data_selection         9749 non-null   object        
 9   deffspd30              9749 non-null   Int64         
 10  flg_mature_fspd_30     9749 non-null   Int64         
 11  new_loan_type          9749 non-null   object        
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(

  df_concat = pd.concat([df2, df1], ignore_index=True)


In [1591]:
df_concat['aStackScore'] = pd.to_numeric(df_concat['aStackScore'], errors='coerce')

In [1592]:
# df_concat.to_csv(r"aStackScoretrench3fspd30.csv")

In [1593]:
# gini_results = calculate_periodic_gini(df_concat, 'aStackScore', 'deffspd30', 'FSPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'aStackScore', 
    'deffspd30', 
    'FSPD30',
    product_column='new_loan_type'
)

In [1594]:
f3 = gini_results.copy()
f3.head()

Unnamed: 0,start_date,end_date,aStackScore_FSPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-09-30,2024-10-06,0.646802,Week,aStackScore,1.1.0,FSPD30,Overall
1,2024-10-01,2024-10-31,0.611353,Month,aStackScore,1.1.0,FSPD30,Overall
2,2024-10-07,2024-10-13,0.430312,Week,aStackScore,1.1.0,FSPD30,Overall
3,2024-10-14,2024-10-20,0.76335,Week,aStackScore,1.1.0,FSPD30,Overall
4,2024-10-21,2024-10-27,0.527891,Week,aStackScore,1.1.0,FSPD30,Overall


## FSTPD30

## Test

In [1595]:
sq = r""" 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
and modelVersionId = 'v1'
),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,requestPayload as requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Alpha-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  p.start_time,
  p.prediction aStackScore,
  coalesce (p.trenchCategory, REGEXP_EXTRACT(m.requestPayload_clean, r"trenchCategory[:=]['\"]?([^'\"]+)['\"]?")) trenchCategory
  from parsed p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type


In [1596]:
df1 = dfd.copy()

## Train

In [1597]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Alpha-Cash-Stack-Model', 'alpha_stack_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction aStackScore,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.aStackScore,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.aStackScore is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,aStackScore,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type
0,1232351,dfd9f61c-0baf-4925-b7e3-dbfe7c47d7d5,60812323510105,0.436533,Trench 3,2024-10-03 03:26:45,2024-10-03,2024-10,Train,0,1,Quick
1,1869874,9f2c1577-422a-4178-8c3e-c222fba77ba7,60818698740057,0.424199,Trench 3,2024-10-25 09:32:10,2024-10-25,2024-10,Train,0,1,Quick
2,2343240,a532829e-518e-4788-b20e-3ccce9722266,60823432400028,0.462332,Trench 3,2024-10-31 10:01:37,2024-11-01,2024-10,Train,0,1,Quick
3,2235233,986f553d-ed0c-40f2-95bd-90c2ab5dd4fb,60822352330034,0.399549,Trench 3,2024-10-30 11:25:47,2024-10-30,2024-10,Train,0,1,Quick
4,2823157,ba0ab2ea-5af1-4ff5-af43-3ca22f847937,60828231570032,0.467665,Trench 3,2024-10-16 10:36:01,2024-10-16,2024-10,Train,0,1,Quick


In [1598]:
df2 = dfd.copy()

In [1599]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8536 entries, 0 to 8535
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             8536 non-null   Int64         
 1   digitalLoanAccountId   8536 non-null   object        
 2   loanAccountNumber      8536 non-null   object        
 3   aStackScore            8536 non-null   float64       
 4   trenchCategory         8536 non-null   object        
 5   appln_submit_datetime  8536 non-null   datetime64[us]
 6   disbursementdate       8536 non-null   dbdate        
 7   Application_month      8536 non-null   object        
 8   Data_selection         8536 non-null   object        
 9   deffstpd30             8536 non-null   Int64         
 10  flg_mature_fstpd_30    8536 non-null   Int64         
 11  new_loan_type          8536 non-null   object        
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(

  df_concat = pd.concat([df2, df1], ignore_index=True)


In [1600]:
df_concat['aStackScore'] = pd.to_numeric(df_concat['aStackScore'], errors='coerce')

In [1601]:
# df_concat.to_csv(r"aStackScoretrench3fstpd30.csv")

In [1602]:
# gini_results = calculate_periodic_gini(df_concat, 'aStackScore', 'deffstpd30', 'FSTPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat,
    'aStackScore', 
    'deffstpd30', 
    'FSTPD30',
    product_column='new_loan_type'
)

In [1603]:
f4 = gini_results.copy()
f4.head()

Unnamed: 0,start_date,end_date,aStackScore_FSTPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-09-30,2024-10-06,0.543902,Week,aStackScore,1.1.0,FSTPD30,Overall
1,2024-10-01,2024-10-31,0.518087,Month,aStackScore,1.1.0,FSTPD30,Overall
2,2024-10-07,2024-10-13,0.30382,Week,aStackScore,1.1.0,FSTPD30,Overall
3,2024-10-14,2024-10-20,0.772277,Week,aStackScore,1.1.0,FSTPD30,Overall
4,2024-10-21,2024-10-27,0.523652,Week,aStackScore,1.1.0,FSTPD30,Overall


## combining the dataframe

In [1604]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'loan_type', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'aStackScore_FPD0_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'loan_type',
       'aStackScore_FPD10_gini', 'aStackScore_FPD30_gini',
       'aStackScore_FSPD30_gini', 'aStackScore_FSTPD30_gini'],
      dtype=object)

In [1605]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version','loan_type', 'bad_rate','aStackScore_FPD0_gini','aStackScore_FPD10_gini',
       'aStackScore_FPD30_gini', 'aStackScore_FSPD30_gini',
       'aStackScore_FSTPD30_gini']].copy()
final_df.rename(columns={'aStackScore_FPD0_gini':'aStackScore_t3_FPD0_gini'
                         , 'aStackScore_FPD10_gini':'aStackScore_t3_FPD10_gini'
                         , 'aStackScore_FPD30_gini':'aStackScore_t3_FPD30_gini'
                         , 'aStackScore_FSPD30_gini':'aStackScore_t3_FSPD30_gini'
                        , 'aStackScore_FSTPD30_gini':'aStackScore_t3_FSTPD30_gini', 
                        }, inplace = True)
final_df['Trench_category'] = 'Trench 3'
final_df['Model_display_name'] = 'alpha_stack_model_cash'
final_df['Product_type'] = 'CASH'
final_df.dtypes

start_date                     datetime64[ns]
end_date                       datetime64[ns]
period                                 object
Model_Name                             object
version                                object
loan_type                              object
bad_rate                               object
aStackScore_t3_FPD0_gini              float64
aStackScore_t3_FPD10_gini             float64
aStackScore_t3_FPD30_gini             float64
aStackScore_t3_FSPD30_gini            float64
aStackScore_t3_FSTPD30_gini           float64
Trench_category                        object
Model_display_name                     object
Product_type                           object
dtype: object

In [1606]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,loan_type,bad_rate,aStackScore_t3_FPD0_gini,aStackScore_t3_FPD10_gini,aStackScore_t3_FPD30_gini,aStackScore_t3_FSPD30_gini,aStackScore_t3_FSTPD30_gini,Trench_category,Model_display_name,Product_type
0,2024-09-30,2024-10-06,Week,aStackScore,1.1.0,Overall,FPD0,0.380488,,,,,Trench 3,alpha_stack_model_cash,CASH
1,2024-10-01,2024-10-31,Month,aStackScore,1.1.0,Overall,FPD0,0.365971,,,,,Trench 3,alpha_stack_model_cash,CASH
2,2024-10-07,2024-10-13,Week,aStackScore,1.1.0,Overall,FPD0,0.357708,,,,,Trench 3,alpha_stack_model_cash,CASH
3,2024-10-14,2024-10-20,Week,aStackScore,1.1.0,Overall,FPD0,0.459742,,,,,Trench 3,alpha_stack_model_cash,CASH
4,2024-10-21,2024-10-27,Week,aStackScore,1.1.0,Overall,FPD0,0.378547,,,,,Trench 3,alpha_stack_model_cash,CASH


In [1607]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.alpha_cash_stack_model_t3_v1_gini5"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=a3747c92-1753-452e-b114-bc6f47b3ed98>

# Beta-Cash-Demo-Model

## Trench 1

## Test

In [1608]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory
  from latest_request p 
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type
0,3738710,81b27dfe-2a80-4a09-bb33-5c73b7c6368f,60837387100015,0.5578658426908933,Trench 1,2025-10-12 12:04:17,2025-10-12,2025-10,Test,0,1,Quick
1,3778520,f71a290a-dded-4e52-b157-1cf6ab3cd3e0,60837785200011,0.6130025312162396,Trench 1,2025-10-31 04:33:50,2025-10-31,2025-10,Test,1,1,Quick
2,3762366,ca9617df-52ea-4944-8929-87b6baba9bca,60837623660011,0.4772959931293181,Trench 1,2025-10-23 12:46:15,2025-10-25,2025-10,Test,0,1,Quick
3,3785374,c0329528-d745-4a77-b680-a5e69e97c1ca,60837853740012,0.4855432005125022,Trench 1,2025-11-02 00:26:11,2025-11-02,2025-11,Test,0,1,Quick
4,3770042,d0148cbe-dbce-4304-bb3a-a973f6f28c22,60837700420011,0.4856541046869797,Trench 1,2025-10-26 12:27:51,2025-10-26,2025-10,Test,1,1,Quick


In [1609]:
df1 = dfd.copy()

## Train

In [1610]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd0 = 1
 and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type
0,2977356,82dd2652-2649-4550-a47a-b2a45079b13c,60829773560018,0.550282,Trench 1,2024-10-27 15:36:13,2024-10-27,2024-10,Train,1,1,Quick
1,2988674,a1084c2f-78fc-4312-9b9e-ac9c1345e110,60829886740018,0.485192,Trench 1,2024-10-31 20:42:11,2024-10-31,2024-10,Train,0,1,Quick
2,2943134,bab6fc4e-aa6d-4b73-80bd-45caa4a305fa,60829431340015,0.560137,Trench 1,2024-10-22 15:17:21,2024-10-22,2024-10,Train,0,1,Quick
3,2873926,d9b726aa-321c-49ae-a457-af82c56062a7,60828739260018,0.559776,Trench 1,2024-10-02 08:03:50,2024-10-02,2024-10,Train,0,1,Quick
4,2956693,802891c0-7b7a-49b6-80ba-5ae3a87a21c2,60829566930018,0.488072,Trench 1,2024-10-20 20:53:55,2024-10-20,2024-10,Train,0,1,Quick


In [1611]:
df2 = dfd.copy()

In [1612]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18368 entries, 0 to 18367
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             18368 non-null  object        
 1   digitalLoanAccountId   18368 non-null  object        
 2   loanAccountNumber      18368 non-null  object        
 3   Beta_Cash_Demo_Score   18368 non-null  object        
 4   trenchCategory         18368 non-null  object        
 5   appln_submit_datetime  18368 non-null  datetime64[us]
 6   disbursementdate       18368 non-null  dbdate        
 7   Application_month      18368 non-null  object        
 8   Data_selection         18368 non-null  object        
 9   deffpd0                18368 non-null  Int64         
 10  flg_mature_fpd0        18368 non-null  Int64         
 11  new_loan_type          18368 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(8)
memory 

In [1613]:
df_concat['Beta_Cash_Demo_Score'] = pd.to_numeric(df_concat['Beta_Cash_Demo_Score'], errors='coerce')

In [1614]:
# df_concat.to_csv(r"Beta_Cash_Demo_Scoretrenchnunfpd0.csv")

In [1615]:
# gini_results = calculate_periodic_gini(df_concat, 'Beta_Cash_Demo_Score', 'deffpd0', 'FPD0')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'Beta_Cash_Demo_Score', 
    'deffpd0', 
    'FPD0',
    product_column='new_loan_type'
)

In [1616]:
f0 = gini_results.copy()
f0.head()

Unnamed: 0,start_date,end_date,Beta_Cash_Demo_Score_FPD0_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-09-30,2024-10-06,0.162108,Week,Beta_Cash_Demo_Score,1.1.0,FPD0,Overall
1,2024-10-01,2024-10-31,0.23941,Month,Beta_Cash_Demo_Score,1.1.0,FPD0,Overall
2,2024-10-07,2024-10-13,0.260402,Week,Beta_Cash_Demo_Score,1.1.0,FPD0,Overall
3,2024-10-14,2024-10-20,0.270544,Week,Beta_Cash_Demo_Score,1.1.0,FPD0,Overall
4,2024-10-21,2024-10-27,0.261032,Week,Beta_Cash_Demo_Score,1.1.0,FPD0,Overall


## FPD10

## Test

In [1617]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory
  from latest_request p 
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type
0,3742978,1d1043cf-ab80-4979-9776-b2f946a5c7bd,60837429780012,0.496096216190645,Trench 1,2025-10-14 04:05:27,2025-10-14,2025-10,Test,0,1,Quick
1,3713545,3d4efb65-52d5-4ff2-8740-8f499b6491c7,60837135450011,0.4691657070991971,Trench 1,2025-09-29 19:36:21,2025-09-30,2025-09,Test,0,1,Quick
2,3753271,74606b94-9f3f-402f-8a2f-407d8ace9475,60837532710013,0.5245488221282029,Trench 1,2025-10-19 23:19:17,2025-10-20,2025-10,Test,0,1,Quick
3,3755325,1ea7f965-2c0b-4333-826f-15aff87e13f2,60837553250018,0.5564467879754976,Trench 1,2025-10-19 20:13:34,2025-10-20,2025-10,Test,0,1,Quick
4,3730764,9822c3ea-1dff-4e7c-b6c4-177f3ee9fe8b,60837307640025,0.4406072349068134,Trench 1,2025-10-09 14:02:06,2025-10-10,2025-10,Test,0,1,Quick


In [1618]:
df1 = dfd.copy()

## Train

In [1619]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type
0,2936481,9c0bf28d-7cee-41d9-82fd-8c4bc266b0d0,60829364810018,0.468704,Trench 1,2024-10-13 14:16:57,2024-10-21,2024-10,Train,0,1,Quick
1,2960777,bb05a5cf-6c7e-402e-8019-5c115b184fd6,60829607770012,0.347366,Trench 1,2024-10-27 13:48:40,2024-10-29,2024-10,Train,0,1,Quick
2,2984994,849ee467-5b02-4c49-9374-113afe3a884d,60829849940015,0.569559,Trench 1,2024-10-30 14:57:24,2024-10-30,2024-10,Train,1,1,Quick
3,2966492,2a29d59f-c738-4e78-a7b9-9ccb54da5ee1,60829664920016,0.50099,Trench 1,2024-10-24 10:39:01,2024-10-24,2024-10,Train,0,1,Quick
4,2912972,94b2332e-fe95-470e-a8a1-8f043e439cea,60829129720016,0.575479,Trench 1,2024-10-05 16:04:50,2024-10-06,2024-10,Train,1,1,Quick


In [1620]:
df2 = dfd.copy()

In [1621]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16860 entries, 0 to 16859
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             16860 non-null  object        
 1   digitalLoanAccountId   16860 non-null  object        
 2   loanAccountNumber      16860 non-null  object        
 3   Beta_Cash_Demo_Score   16860 non-null  object        
 4   trenchCategory         16860 non-null  object        
 5   appln_submit_datetime  16860 non-null  datetime64[us]
 6   disbursementdate       16860 non-null  dbdate        
 7   Application_month      16860 non-null  object        
 8   Data_selection         16860 non-null  object        
 9   deffpd10               16860 non-null  Int64         
 10  flg_mature_fpd10       16860 non-null  Int64         
 11  new_loan_type          16860 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(8)
memory 

In [1622]:
df_concat['Beta_Cash_Demo_Score'] = pd.to_numeric(df_concat['Beta_Cash_Demo_Score'], errors='coerce')

In [1623]:
# df_concat.to_csv(r"Beta_Cash_Demo_Scoretrenchnunfpd10.csv")

In [1624]:
# gini_results = calculate_periodic_gini(df_concat, 'Beta_Cash_Demo_Score', 'deffpd10', 'FPD10')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'Beta_Cash_Demo_Score', 
    'deffpd10', 
    'FPD10',
    product_column='new_loan_type'
)

In [1625]:
f1 = gini_results.copy()
f1.head()

Unnamed: 0,start_date,end_date,Beta_Cash_Demo_Score_FPD10_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-09-30,2024-10-06,0.235812,Week,Beta_Cash_Demo_Score,1.1.0,FPD10,Overall
1,2024-10-01,2024-10-31,0.347381,Month,Beta_Cash_Demo_Score,1.1.0,FPD10,Overall
2,2024-10-07,2024-10-13,0.379712,Week,Beta_Cash_Demo_Score,1.1.0,FPD10,Overall
3,2024-10-14,2024-10-20,0.319219,Week,Beta_Cash_Demo_Score,1.1.0,FPD10,Overall
4,2024-10-21,2024-10-27,0.44878,Week,Beta_Cash_Demo_Score,1.1.0,FPD10,Overall


## FPD30

## Test

In [1626]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory
  from latest_request p 
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type
0,3719077,70b0e190-3125-4e0a-acab-ce37fa7ea02a,60837190770017,0.458302414858237,Trench 1,2025-10-02 16:31:08,2025-10-02,2025-10,Test,0,1,Quick
1,3718881,7c1dead3-748f-4299-a825-7e76c7d10c81,60837188810013,0.3114746517368309,Trench 1,2025-10-02 15:31:34,2025-10-02,2025-10,Test,0,1,Quick
2,3732908,c06b6d3a-5c1c-4159-b032-eb5734e961e7,60837329080013,0.4810610744741784,Trench 1,2025-10-09 11:13:44,2025-10-09,2025-10,Test,0,1,Quick
3,3733252,034a1064-7f3b-4087-8337-fe0f2eb91e6f,60837332520011,0.4694423056536225,Trench 1,2025-10-09 13:52:08,2025-10-09,2025-10,Test,0,1,Quick
4,3701246,9c698895-ba1e-4a18-a058-c4cae1311e82,60837012460019,0.4829765131860002,Trench 1,2025-10-03 20:32:40,2025-10-03,2025-10,Test,0,1,Quick


In [1627]:
df1 = dfd.copy()

## Train

In [1628]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type
0,2953294,afcccea6-37c8-47cb-80da-42be6bf4774f,60829532940015,0.599705,Trench 1,2024-10-19 06:34:13,2024-10-22,2024-10,Train,1,1,Quick
1,2961381,40add69d-7aff-40c1-b5b1-6e36f96ec0eb,60829613810011,0.613852,Trench 1,2024-10-21 17:35:53,2024-10-21,2024-10,Train,1,1,Quick
2,2977595,8f2242f2-0551-428e-b811-7ba5909aed5d,60829775950011,0.521642,Trench 1,2024-10-27 16:39:01,2024-10-28,2024-10,Train,1,1,Quick
3,2947252,666ed811-ddeb-4f7e-bc86-98a4063f19ec,60829472520016,0.526669,Trench 1,2024-10-17 06:56:39,2024-10-17,2024-10,Train,1,1,Quick
4,2943754,a3a18b37-34ee-4986-bd27-958ff316af47,60829437540017,0.549953,Trench 1,2024-10-15 22:07:57,2024-10-16,2024-10,Train,1,1,Quick


In [1629]:
df2 = dfd.copy()

In [1630]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15851 entries, 0 to 15850
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             15851 non-null  object        
 1   digitalLoanAccountId   15851 non-null  object        
 2   loanAccountNumber      15851 non-null  object        
 3   Beta_Cash_Demo_Score   15851 non-null  object        
 4   trenchCategory         15851 non-null  object        
 5   appln_submit_datetime  15851 non-null  datetime64[us]
 6   disbursementdate       15851 non-null  dbdate        
 7   Application_month      15851 non-null  object        
 8   Data_selection         15851 non-null  object        
 9   deffpd30               15851 non-null  Int64         
 10  flg_mature_fpd30       15851 non-null  Int64         
 11  new_loan_type          15851 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(8)
memory 

In [1631]:
df_concat['Beta_Cash_Demo_Score'] = pd.to_numeric(df_concat['Beta_Cash_Demo_Score'], errors='coerce')

In [1632]:
# df_concat.to_csv(r"Beta_Cash_Demo_Scoretrenchnunfpd30.csv")

In [1633]:
# gini_results = calculate_periodic_gini(df_concat, 'Beta_Cash_Demo_Score', 'deffpd30', 'FPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'Beta_Cash_Demo_Score', 
    'deffpd30', 
    'FPD30',
    product_column='new_loan_type'
)

In [1634]:
f2 = gini_results.copy()
f2.head()

Unnamed: 0,start_date,end_date,Beta_Cash_Demo_Score_FPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-09-30,2024-10-06,0.285632,Week,Beta_Cash_Demo_Score,1.1.0,FPD30,Overall
1,2024-10-01,2024-10-31,0.355098,Month,Beta_Cash_Demo_Score,1.1.0,FPD30,Overall
2,2024-10-07,2024-10-13,0.323583,Week,Beta_Cash_Demo_Score,1.1.0,FPD30,Overall
3,2024-10-14,2024-10-20,0.327721,Week,Beta_Cash_Demo_Score,1.1.0,FPD30,Overall
4,2024-10-21,2024-10-27,0.464095,Week,Beta_Cash_Demo_Score,1.1.0,FPD30,Overall


## FSPD30

## Test

In [1635]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory
  from latest_request p 
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type


In [1636]:
df1 = dfd.copy()

## Train

In [1637]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type
0,2973109,c8e9412b-8f02-4c18-bf67-260481887ca2,60829731090019,0.42917,Trench 1,2024-10-26 09:41:38,2024-10-27,2024-10,Train,0,1,Quick
1,2907528,3e9ad64f-fa23-4418-9f4c-414738eca555,60829075280015,0.71762,Trench 1,2024-10-03 19:04:26,2024-10-03,2024-10,Train,1,1,Quick
2,2925374,629296c0-7417-45a8-946d-8ea315851b8e,60829253740017,0.62217,Trench 1,2024-10-09 16:14:52,2024-10-09,2024-10,Train,1,1,Quick
3,2936747,cb7d42d6-f7a5-4589-ac8b-5c81feda5859,60829367470015,0.455255,Trench 1,2024-10-13 15:01:25,2024-10-13,2024-10,Train,1,1,Quick
4,2933360,057b42a3-5852-45fe-a2d9-fc6a6c45b79b,60829333600018,0.654058,Trench 1,2024-10-12 13:54:22,2024-10-12,2024-10,Train,1,1,Quick


In [1638]:
df2 = dfd.copy()

In [1639]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14839 entries, 0 to 14838
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             14839 non-null  Int64         
 1   digitalLoanAccountId   14839 non-null  object        
 2   loanAccountNumber      14839 non-null  object        
 3   Beta_Cash_Demo_Score   14839 non-null  float64       
 4   trenchCategory         14839 non-null  object        
 5   appln_submit_datetime  14839 non-null  datetime64[us]
 6   disbursementdate       14839 non-null  dbdate        
 7   Application_month      14839 non-null  object        
 8   Data_selection         14839 non-null  object        
 9   deffspd30              14839 non-null  Int64         
 10  flg_mature_fspd_30     14839 non-null  Int64         
 11  new_loan_type          14839 non-null  object        
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), objec

  df_concat = pd.concat([df2, df1], ignore_index=True)


In [1640]:
df_concat['Beta_Cash_Demo_Score'] = pd.to_numeric(df_concat['Beta_Cash_Demo_Score'], errors='coerce')

In [1641]:
# df_concat.to_csv(r"Beta_Cash_Demo_Scoretrenchnunfspd30.csv")

In [1642]:
# gini_results = calculate_periodic_gini(df_concat, 'Beta_Cash_Demo_Score', 'deffspd30', 'FSPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'Beta_Cash_Demo_Score', 
    'deffspd30', 
    'FSPD30',
    product_column='new_loan_type'
)

In [1643]:
f3 = gini_results.copy()
f3.head()

Unnamed: 0,start_date,end_date,Beta_Cash_Demo_Score_FSPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-09-30,2024-10-06,0.319646,Week,Beta_Cash_Demo_Score,1.1.0,FSPD30,Overall
1,2024-10-01,2024-10-31,0.318281,Month,Beta_Cash_Demo_Score,1.1.0,FSPD30,Overall
2,2024-10-07,2024-10-13,0.332984,Week,Beta_Cash_Demo_Score,1.1.0,FSPD30,Overall
3,2024-10-14,2024-10-20,0.30673,Week,Beta_Cash_Demo_Score,1.1.0,FSPD30,Overall
4,2024-10-21,2024-10-27,0.339559,Week,Beta_Cash_Demo_Score,1.1.0,FSPD30,Overall


## FSTPD30

## Test

In [1644]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory
  from latest_request p 
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type


In [1645]:
df1 = dfd.copy()

## Train

In [1646]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type
0,2980988,29535d8d-9a89-4020-843b-421afed64136,60829809880011,0.278381,Trench 1,2024-10-28 22:18:31,2024-10-29,2024-10,Train,0,1,Quick
1,2943878,278db654-8c5d-4c9c-b3ad-353a5d89aa51,60829438780018,0.608458,Trench 1,2024-10-15 23:20:40,2024-10-16,2024-10,Train,0,1,Quick
2,2987187,b0269a94-4d0b-4eda-a2d4-059f69f497bd,60829871870014,0.549761,Trench 1,2024-10-31 12:29:17,2024-10-31,2024-10,Train,1,1,Quick
3,2903302,51587c7d-10e4-4f98-bea6-236f17b2f4e7,60829033020013,0.50437,Trench 1,2024-10-02 13:25:28,2024-10-02,2024-10,Train,0,1,Quick
4,2939736,6ca47aa4-3bfe-49ba-bd39-a4934123fa31,60829397360014,0.582907,Trench 1,2024-10-15 07:39:36,2024-10-15,2024-10,Train,1,1,Quick


In [1647]:
df2 = dfd.copy()

In [1648]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13607 entries, 0 to 13606
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             13607 non-null  Int64         
 1   digitalLoanAccountId   13607 non-null  object        
 2   loanAccountNumber      13607 non-null  object        
 3   Beta_Cash_Demo_Score   13607 non-null  float64       
 4   trenchCategory         13607 non-null  object        
 5   appln_submit_datetime  13607 non-null  datetime64[us]
 6   disbursementdate       13607 non-null  dbdate        
 7   Application_month      13607 non-null  object        
 8   Data_selection         13607 non-null  object        
 9   deffstpd30             13607 non-null  Int64         
 10  flg_mature_fstpd_30    13607 non-null  Int64         
 11  new_loan_type          13607 non-null  object        
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), objec

  df_concat = pd.concat([df2, df1], ignore_index=True)


In [1649]:
df_concat['Beta_Cash_Demo_Score'] = pd.to_numeric(df_concat['Beta_Cash_Demo_Score'], errors='coerce')

In [1650]:
# df_concat.to_csv(r"Beta_Cash_Demo_Scoretrenchnunfstpd30.csv")

In [1651]:
# gini_results = calculate_periodic_gini(df_concat, 'Beta_Cash_Demo_Score', 'deffstpd30', 'FSTPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'Beta_Cash_Demo_Score', 
    'deffstpd30', 
    'FSTPD30',
    product_column='new_loan_type'
)

In [1652]:
f4 = gini_results.copy()
f4.head()

Unnamed: 0,start_date,end_date,Beta_Cash_Demo_Score_FSTPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-09-30,2024-10-06,0.274895,Week,Beta_Cash_Demo_Score,1.1.0,FSTPD30,Overall
1,2024-10-01,2024-10-31,0.293095,Month,Beta_Cash_Demo_Score,1.1.0,FSTPD30,Overall
2,2024-10-07,2024-10-13,0.326968,Week,Beta_Cash_Demo_Score,1.1.0,FSTPD30,Overall
3,2024-10-14,2024-10-20,0.231325,Week,Beta_Cash_Demo_Score,1.1.0,FSTPD30,Overall
4,2024-10-21,2024-10-27,0.333658,Week,Beta_Cash_Demo_Score,1.1.0,FSTPD30,Overall


## combining the dataframe

In [1653]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'Beta_Cash_Demo_Score_FPD0_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'Beta_Cash_Demo_Score_FPD10_gini',
       'Beta_Cash_Demo_Score_FPD30_gini',
       'Beta_Cash_Demo_Score_FSPD30_gini',
       'Beta_Cash_Demo_Score_FSTPD30_gini'], dtype=object)

In [1654]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'loan_type','bad_rate','Beta_Cash_Demo_Score_FPD0_gini','Beta_Cash_Demo_Score_FPD10_gini',
      'Beta_Cash_Demo_Score_FPD30_gini',
       'Beta_Cash_Demo_Score_FSPD30_gini',
       'Beta_Cash_Demo_Score_FSTPD30_gini']].copy()
final_df.rename(columns={'Beta_Cash_Demo_Score_FPD0_gini':'Beta_Cash_Demo_Score_FPD0_t1_gini'
                         , 'Beta_Cash_Demo_Score_FPD10_gini':'Beta_Cash_Demo_Score_FPD10_t1_gini'
                         , 'Beta_Cash_Demo_Score_FPD30_gini':'Beta_Cash_Demo_Score_FPD30_t1_gini'
                         , 'Beta_Cash_Demo_Score_FSPD30_gini':'Beta_Cash_Demo_Score_FSPD30_t1_gini'
                        , 'Beta_Cash_Demo_Score_FSTPD30_gini':'Beta_Cash_Demo_Score_FSTPD30_t1_gini', 
                        }, inplace = True)
final_df['Trench_category'] = 'Trench 1'
final_df['Model_display_name'] = 'beta_demo_model_cash'
final_df['Product_type'] = 'CASH'
final_df.dtypes

start_date                              datetime64[ns]
end_date                                datetime64[ns]
period                                          object
Model_Name                                      object
version                                         object
loan_type                                       object
bad_rate                                        object
Beta_Cash_Demo_Score_FPD0_t1_gini              float64
Beta_Cash_Demo_Score_FPD10_t1_gini             float64
Beta_Cash_Demo_Score_FPD30_t1_gini             float64
Beta_Cash_Demo_Score_FSPD30_t1_gini            float64
Beta_Cash_Demo_Score_FSTPD30_t1_gini           float64
Trench_category                                 object
Model_display_name                              object
Product_type                                    object
dtype: object

In [1655]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,loan_type,bad_rate,Beta_Cash_Demo_Score_FPD0_t1_gini,Beta_Cash_Demo_Score_FPD10_t1_gini,Beta_Cash_Demo_Score_FPD30_t1_gini,Beta_Cash_Demo_Score_FSPD30_t1_gini,Beta_Cash_Demo_Score_FSTPD30_t1_gini,Trench_category,Model_display_name,Product_type
0,2024-09-30,2024-10-06,Week,Beta_Cash_Demo_Score,1.1.0,Overall,FPD0,0.162108,,,,,Trench 1,beta_demo_model_cash,CASH
1,2024-10-01,2024-10-31,Month,Beta_Cash_Demo_Score,1.1.0,Overall,FPD0,0.23941,,,,,Trench 1,beta_demo_model_cash,CASH
2,2024-10-07,2024-10-13,Week,Beta_Cash_Demo_Score,1.1.0,Overall,FPD0,0.260402,,,,,Trench 1,beta_demo_model_cash,CASH
3,2024-10-14,2024-10-20,Week,Beta_Cash_Demo_Score,1.1.0,Overall,FPD0,0.270544,,,,,Trench 1,beta_demo_model_cash,CASH
4,2024-10-21,2024-10-27,Week,Beta_Cash_Demo_Score,1.1.0,Overall,FPD0,0.261032,,,,,Trench 1,beta_demo_model_cash,CASH


In [1656]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.beta_demo_model_cash_t1_v1_gini5"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=458e7d74-3eea-437c-933b-8a34acc81533>

## Trench 2

## FPD0

## Test

In [1657]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory
  from latest_request p 
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type
0,2047990,4d128d7a-3eaa-4c3a-bec4-b6ee5f1a5cac,60820479900019,0.4498272379777833,Trench 2,2025-10-20 00:19:21,2025-10-22,2025-10,Test,0,1,Quick
1,3195110,28254d29-89bd-4a7b-9023-02fc786b1e95,60831951100019,0.574160646299637,Trench 2,2025-10-22 12:51:45,2025-10-22,2025-10,Test,1,1,Quick
2,3553423,bd39c50f-eda3-41c1-aea4-b54c8ab30866,60835534230019,0.5731354897980833,Trench 2,2025-10-14 14:00:57,2025-10-14,2025-10,Test,0,1,Quick
3,2330436,980fdc8b-acee-405d-8f7e-48906a4305e1,60823304360013,0.5442023683559348,Trench 2,2025-11-03 14:20:45,2025-11-03,2025-11,Test,1,1,Quick
4,1812295,db853d3c-61ee-40c6-9b5a-6af6eca40e88,60818122950019,0.5069165921141096,Trench 2,2025-10-14 21:31:57,2025-10-15,2025-10,Test,0,1,Quick


In [1658]:
df1 = dfd.copy()

## Train

In [1659]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd0 = 1
 and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type
0,1135712,e29cca60-8010-460a-bb48-1e6a7079dcec,60811357120015,0.536509,Trench 2,2024-10-18 08:27:09,2024-10-18,2024-10,Train,0,1,Quick
1,1966932,f9eeb94c-98de-4aa6-8130-d9c3547d15a3,60819669320012,0.490764,Trench 2,2024-10-26 01:25:58,2024-10-26,2024-10,Train,1,1,Quick
2,1809371,0a9b699a-e4c4-45c7-aa39-3f4575fddaa7,60818093710025,0.598838,Trench 2,2024-10-17 12:08:14,2024-10-17,2024-10,Train,1,1,Quick
3,2140381,d8bbf7f7-6e6e-431f-b764-093183b5f2c0,60821403810018,0.612205,Trench 2,2024-10-29 12:51:34,2024-10-29,2024-10,Train,1,1,Quick
4,1350714,e3ea3e9c-c5a8-4fce-a8fe-2ca09afe5170,60813507140019,0.554457,Trench 2,2024-10-26 03:13:48,2024-10-26,2024-10,Train,0,1,Quick


In [1660]:
df2 = dfd.copy()

In [1661]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12784 entries, 0 to 12783
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             12784 non-null  object        
 1   digitalLoanAccountId   12784 non-null  object        
 2   loanAccountNumber      12784 non-null  object        
 3   Beta_Cash_Demo_Score   12784 non-null  object        
 4   trenchCategory         12784 non-null  object        
 5   appln_submit_datetime  12784 non-null  datetime64[us]
 6   disbursementdate       12784 non-null  dbdate        
 7   Application_month      12784 non-null  object        
 8   Data_selection         12784 non-null  object        
 9   deffpd0                12784 non-null  Int64         
 10  flg_mature_fpd0        12784 non-null  Int64         
 11  new_loan_type          12784 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(8)
memory 

In [1662]:
df_concat['Beta_Cash_Demo_Score'] = pd.to_numeric(df_concat['Beta_Cash_Demo_Score'], errors='coerce')

In [1663]:
# df_concat.to_csv(r"Beta_Cash_Demo_Scoretrenchnunfpd0.csv")

In [1664]:
# gini_results = calculate_periodic_gini(df_concat, 'Beta_Cash_Demo_Score', 'deffpd0', 'FPD0')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'Beta_Cash_Demo_Score', 
    'deffpd0', 
    'FPD0',
    product_column='new_loan_type'
)

In [1665]:
f0 = gini_results.copy()
f0.head()

Unnamed: 0,start_date,end_date,Beta_Cash_Demo_Score_FPD0_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-09-30,2024-10-06,0.256061,Week,Beta_Cash_Demo_Score,1.1.0,FPD0,Overall
1,2024-10-01,2024-10-31,0.217154,Month,Beta_Cash_Demo_Score,1.1.0,FPD0,Overall
2,2024-10-07,2024-10-13,0.272374,Week,Beta_Cash_Demo_Score,1.1.0,FPD0,Overall
3,2024-10-14,2024-10-20,0.252783,Week,Beta_Cash_Demo_Score,1.1.0,FPD0,Overall
4,2024-10-21,2024-10-27,0.12816,Week,Beta_Cash_Demo_Score,1.1.0,FPD0,Overall


## FPD10

## Test

In [1666]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory
  from latest_request p 
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type
0,2950005,a6731aca-79ac-49f0-9a63-46ff8f94a791,60829500050011,0.4911882547199866,Trench 2,2025-10-20 15:21:37,2025-10-20,2025-10,Test,0,1,Quick
1,3559952,e09b35d8-81b2-457f-8b66-e48b0ee1cdeb,60835599520021,0.5098734970761051,Trench 2,2025-10-16 10:41:38,2025-10-16,2025-10,Test,1,1,Quick
2,3566656,e3953604-581d-4ce9-b351-5a7adf98e803,60835666560011,0.4917677520844921,Trench 2,2025-10-18 21:20:25,2025-10-18,2025-10,Test,0,1,Quick
3,3487251,9757da0b-da63-48c7-8d7b-cc91fc7efbfa,60834872510012,0.5548085119982442,Trench 2,2025-10-22 19:15:21,2025-10-22,2025-10,Test,0,1,Quick
4,3469628,422201f2-6e10-49b9-b170-54d20e026dc3,60834696280013,0.4132054716434478,Trench 2,2025-10-19 12:57:34,2025-10-19,2025-10,Test,0,1,Quick


In [1667]:
df1 = dfd.copy()

## Train

In [1668]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type
0,1079749,62529ec5-16b5-49d6-a4cc-556ce99a7e39,60810797490034,0.33307,Trench 2,2024-10-07 17:41:11,2024-10-08,2024-10,Train,0,1,Quick
1,1697762,7ef1d324-90c2-46e6-a8ad-f6a845f77b73,60816977620013,0.610678,Trench 2,2024-10-16 12:37:25,2024-10-16,2024-10,Train,0,1,Quick
2,1468362,53f77296-0844-46fb-ac2e-b4c54b51b657,60814683620018,0.540928,Trench 2,2024-10-23 11:25:54,2024-10-25,2024-10,Train,1,1,Quick
3,1671544,963e365a-120d-417f-87ee-e2734d6afc4f,60816715440011,0.511591,Trench 2,2024-10-21 17:39:50,2024-10-21,2024-10,Train,0,1,Quick
4,2303538,b5ec318f-16a3-498a-a410-b575ff46c2d7,60823035380015,0.580372,Trench 2,2024-10-20 19:40:50,2024-10-20,2024-10,Train,0,1,Quick


In [1669]:
df2 = dfd.copy()

In [1670]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12220 entries, 0 to 12219
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             12220 non-null  object        
 1   digitalLoanAccountId   12220 non-null  object        
 2   loanAccountNumber      12220 non-null  object        
 3   Beta_Cash_Demo_Score   12220 non-null  object        
 4   trenchCategory         12220 non-null  object        
 5   appln_submit_datetime  12220 non-null  datetime64[us]
 6   disbursementdate       12220 non-null  dbdate        
 7   Application_month      12220 non-null  object        
 8   Data_selection         12220 non-null  object        
 9   deffpd10               12220 non-null  Int64         
 10  flg_mature_fpd10       12220 non-null  Int64         
 11  new_loan_type          12220 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(8)
memory 

In [1671]:
df_concat['Beta_Cash_Demo_Score'] = pd.to_numeric(df_concat['Beta_Cash_Demo_Score'], errors='coerce')

In [1672]:
# df_concat.to_csv(r"Beta_Cash_Demo_Scoretrenchnunfpd10.csv")

In [1673]:
# gini_results = calculate_periodic_gini(df_concat, 'Beta_Cash_Demo_Score', 'deffpd10', 'FPD10')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'Beta_Cash_Demo_Score', 
    'deffpd10', 
    'FPD10',
    product_column='new_loan_type'
)

In [1674]:
f1 = gini_results.copy()
f1.head()

Unnamed: 0,start_date,end_date,Beta_Cash_Demo_Score_FPD10_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-09-30,2024-10-06,0.453619,Week,Beta_Cash_Demo_Score,1.1.0,FPD10,Overall
1,2024-10-01,2024-10-31,0.28765,Month,Beta_Cash_Demo_Score,1.1.0,FPD10,Overall
2,2024-10-07,2024-10-13,0.274212,Week,Beta_Cash_Demo_Score,1.1.0,FPD10,Overall
3,2024-10-14,2024-10-20,0.334702,Week,Beta_Cash_Demo_Score,1.1.0,FPD10,Overall
4,2024-10-21,2024-10-27,0.11477,Week,Beta_Cash_Demo_Score,1.1.0,FPD10,Overall


## FPD30

## Test

In [1675]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory
  from latest_request p 
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30, 
    loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type
0,3569649,53a8cef3-7e62-4726-850f-d616880df6e2,60835696490014,0.4614560110124293,Trench 2,2025-09-29 21:25:24,2025-09-30,2025-09,Test,1,1,Quick
1,2473116,70dedacb-577f-433e-bfbf-4d57973ff5ea,60824731160018,0.5784879945509713,Trench 2,2025-10-03 20:45:57,2025-10-03,2025-10,Test,0,1,Quick
2,3177894,940a265b-e43b-4985-905f-675f8ea3787b,60831778940018,0.4820533013172368,Trench 2,2025-10-01 18:05:45,2025-10-03,2025-10,Test,0,1,Quick
3,2972716,f3ddbb3e-afea-4f96-861b-768c1abb7e5b,60829727160012,0.5450310640829273,Trench 2,2025-09-29 09:20:58,2025-09-29,2025-09,Test,0,1,Quick
4,3230169,9ed41d23-400d-4654-b0a7-dbd7e514bdfa,60832301690022,0.5116923749333839,Trench 2,2025-10-05 09:38:47,2025-10-05,2025-10,Test,0,1,Quick


In [1676]:
df1 = dfd.copy()

## Train

In [1677]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
    loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type
0,1887248,64808748-8990-4a5c-b472-e4b843b98418,60818872480017,0.59246,Trench 2,2024-10-01 18:46:15,2024-10-01,2024-10,Train,1,1,Quick
1,1025654,8e25f1c7-b28f-41cf-893b-5f6a7608cabc,60810256540053,0.479509,Trench 2,2024-10-06 01:14:17,2024-10-06,2024-10,Train,0,1,Quick
2,2184377,16755597-6692-4efd-83d3-b11eb430ccdf,60821843770018,0.560762,Trench 2,2024-10-13 10:47:11,2024-10-13,2024-10,Train,0,1,Quick
3,1666197,f4a7b5af-0622-4646-9649-9efd9668b4e1,60816661970017,0.515683,Trench 2,2024-10-15 21:30:29,2024-10-15,2024-10,Train,0,1,Quick
4,2799079,bdb191a6-bca6-49e0-8a43-320015402669,60827990790012,0.469311,Trench 2,2024-10-18 19:45:42,2024-10-18,2024-10,Train,0,1,Quick


In [1678]:
df2 = dfd.copy()

In [1679]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11832 entries, 0 to 11831
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             11832 non-null  object        
 1   digitalLoanAccountId   11832 non-null  object        
 2   loanAccountNumber      11832 non-null  object        
 3   Beta_Cash_Demo_Score   11832 non-null  object        
 4   trenchCategory         11832 non-null  object        
 5   appln_submit_datetime  11832 non-null  datetime64[us]
 6   disbursementdate       11832 non-null  dbdate        
 7   Application_month      11832 non-null  object        
 8   Data_selection         11832 non-null  object        
 9   deffpd30               11832 non-null  Int64         
 10  flg_mature_fpd30       11832 non-null  Int64         
 11  new_loan_type          11832 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(8)
memory 

In [1680]:
df_concat['Beta_Cash_Demo_Score'] = pd.to_numeric(df_concat['Beta_Cash_Demo_Score'], errors='coerce')

In [1681]:
# df_concat.to_csv(r"Beta_Cash_Demo_Scoretrenchnunfpd30.csv")

In [1682]:
# gini_results = calculate_periodic_gini(df_concat, 'Beta_Cash_Demo_Score', 'deffpd30', 'FPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'Beta_Cash_Demo_Score', 
    'deffpd30', 
    'FPD30',
    product_column='new_loan_type'
)

In [1683]:
f2 = gini_results.copy()
f2.head()

Unnamed: 0,start_date,end_date,Beta_Cash_Demo_Score_FPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-09-30,2024-10-06,0.435796,Week,Beta_Cash_Demo_Score,1.1.0,FPD30,Overall
1,2024-10-01,2024-10-31,0.282906,Month,Beta_Cash_Demo_Score,1.1.0,FPD30,Overall
2,2024-10-07,2024-10-13,0.286411,Week,Beta_Cash_Demo_Score,1.1.0,FPD30,Overall
3,2024-10-14,2024-10-20,0.30303,Week,Beta_Cash_Demo_Score,1.1.0,FPD30,Overall
4,2024-10-21,2024-10-27,0.135635,Week,Beta_Cash_Demo_Score,1.1.0,FPD30,Overall


## FSPD30

## Test

In [1684]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory
  from latest_request p 
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type


In [1685]:
df1 = dfd.copy()

## Train

In [1686]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type
0,2832150,4b85f2fb-eaf6-4dcb-abaa-7015ab857aee,60828321500011,0.43451,Trench 2,2024-10-13 08:26:19,2024-10-13,2024-10,Train,0,1,Quick
1,1438185,c916458c-e7a1-4027-b779-ed873d9497fa,60814381850014,0.598995,Trench 2,2024-10-24 09:37:36,2024-10-24,2024-10,Train,0,1,Quick
2,2627372,1621d7c0-6433-4189-9dc8-c3f989354603,60826273720031,0.516458,Trench 2,2024-10-13 07:06:29,2024-10-13,2024-10,Train,1,1,Quick
3,2706525,b10d994a-a956-473a-82ff-11a5baeafbe7,60827065250016,0.544974,Trench 2,2024-10-17 12:17:49,2024-10-18,2024-10,Train,1,1,Quick
4,2565558,80145ae9-b99b-4bcd-b13d-5810308595b9,60825655580019,0.585182,Trench 2,2024-10-05 01:01:32,2024-10-09,2024-10,Train,0,1,Quick


In [1687]:
df2 = dfd.copy()

In [1688]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11296 entries, 0 to 11295
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             11296 non-null  Int64         
 1   digitalLoanAccountId   11296 non-null  object        
 2   loanAccountNumber      11296 non-null  object        
 3   Beta_Cash_Demo_Score   11296 non-null  float64       
 4   trenchCategory         11296 non-null  object        
 5   appln_submit_datetime  11296 non-null  datetime64[us]
 6   disbursementdate       11296 non-null  dbdate        
 7   Application_month      11296 non-null  object        
 8   Data_selection         11296 non-null  object        
 9   deffspd30              11296 non-null  Int64         
 10  flg_mature_fspd_30     11296 non-null  Int64         
 11  new_loan_type          11296 non-null  object        
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), objec

  df_concat = pd.concat([df2, df1], ignore_index=True)


In [1689]:
df_concat['Beta_Cash_Demo_Score'] = pd.to_numeric(df_concat['Beta_Cash_Demo_Score'], errors='coerce')

In [1690]:
# df_concat.to_csv(r"Beta_Cash_Demo_Scoretrenchnunfspd30.csv")

In [1691]:
# gini_results = calculate_periodic_gini(df_concat, 'Beta_Cash_Demo_Score', 'deffspd30', 'FSPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'Beta_Cash_Demo_Score', 
    'deffspd30', 
    'FSPD30',
    product_column='new_loan_type'
)

In [1692]:
f3 = gini_results.copy()
f3.head()

Unnamed: 0,start_date,end_date,Beta_Cash_Demo_Score_FSPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-09-30,2024-10-06,0.427312,Week,Beta_Cash_Demo_Score,1.1.0,FSPD30,Overall
1,2024-10-01,2024-10-31,0.309332,Month,Beta_Cash_Demo_Score,1.1.0,FSPD30,Overall
2,2024-10-07,2024-10-13,0.358267,Week,Beta_Cash_Demo_Score,1.1.0,FSPD30,Overall
3,2024-10-14,2024-10-20,0.363044,Week,Beta_Cash_Demo_Score,1.1.0,FSPD30,Overall
4,2024-10-21,2024-10-27,0.163012,Week,Beta_Cash_Demo_Score,1.1.0,FSPD30,Overall


## FSTPD30

## Test

In [1693]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory
  from latest_request p 
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type


In [1694]:
df1 = dfd.copy()

## Train

In [1695]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type
0,2981294,cf5d5eda-b811-4763-81a5-111bc066c98e,60829812940012,0.539049,Trench 2,2024-10-29 05:26:59,2024-10-30,2024-10,Train,1,1,Quick
1,2568670,18cce9e1-a4ab-4f9c-b765-004c827187f8,60825686700011,0.49817,Trench 2,2024-10-06 00:12:38,2024-10-06,2024-10,Train,0,1,Quick
2,1028842,3f9c4268-9675-42b2-843a-56c65477c83e,60810288420015,0.409858,Trench 2,2024-10-10 23:40:37,2024-10-11,2024-10,Train,1,1,Quick
3,2654000,539d0f95-f016-42f7-8bb4-802d6e84c403,60826540000021,0.563358,Trench 2,2024-10-14 04:58:41,2024-10-14,2024-10,Train,1,1,Quick
4,2362089,e9dca40f-72b6-4b49-b3c3-09dd7cbdb795,60823620890017,0.404933,Trench 2,2024-10-27 23:03:57,2024-10-28,2024-10,Train,1,1,Quick


In [1696]:
df2 = dfd.copy()

In [1697]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10575 entries, 0 to 10574
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             10575 non-null  Int64         
 1   digitalLoanAccountId   10575 non-null  object        
 2   loanAccountNumber      10575 non-null  object        
 3   Beta_Cash_Demo_Score   10575 non-null  float64       
 4   trenchCategory         10575 non-null  object        
 5   appln_submit_datetime  10575 non-null  datetime64[us]
 6   disbursementdate       10575 non-null  dbdate        
 7   Application_month      10575 non-null  object        
 8   Data_selection         10575 non-null  object        
 9   deffstpd30             10575 non-null  Int64         
 10  flg_mature_fstpd_30    10575 non-null  Int64         
 11  new_loan_type          10575 non-null  object        
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), objec

  df_concat = pd.concat([df2, df1], ignore_index=True)


In [1698]:
df_concat['Beta_Cash_Demo_Score'] = pd.to_numeric(df_concat['Beta_Cash_Demo_Score'], errors='coerce')

In [1699]:
# df_concat.to_csv(r"Beta_Cash_Demo_Scoretrenchnunfstpd30.csv")

In [1700]:
# gini_results = calculate_periodic_gini(df_concat, 'Beta_Cash_Demo_Score', 'deffstpd30', 'FSTPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'Beta_Cash_Demo_Score', 
    'deffstpd30', 
    'FSTPD30',
    product_column='new_loan_type'
)

In [1701]:
f4 = gini_results.copy()
f4.head()

Unnamed: 0,start_date,end_date,Beta_Cash_Demo_Score_FSTPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-09-30,2024-10-06,0.3585,Week,Beta_Cash_Demo_Score,1.1.0,FSTPD30,Overall
1,2024-10-01,2024-10-31,0.315782,Month,Beta_Cash_Demo_Score,1.1.0,FSTPD30,Overall
2,2024-10-07,2024-10-13,0.369074,Week,Beta_Cash_Demo_Score,1.1.0,FSTPD30,Overall
3,2024-10-14,2024-10-20,0.3735,Week,Beta_Cash_Demo_Score,1.1.0,FSTPD30,Overall
4,2024-10-21,2024-10-27,0.203213,Week,Beta_Cash_Demo_Score,1.1.0,FSTPD30,Overall


## combining the dataframe

In [1702]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'loan_type', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'Beta_Cash_Demo_Score_FPD0_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'Beta_Cash_Demo_Score_FPD10_gini',
       'Beta_Cash_Demo_Score_FPD30_gini',
       'Beta_Cash_Demo_Score_FSPD30_gini',
       'Beta_Cash_Demo_Score_FSTPD30_gini'], dtype=object)

In [1703]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version','loan_type', 'bad_rate','Beta_Cash_Demo_Score_FPD0_gini','Beta_Cash_Demo_Score_FPD10_gini',
      'Beta_Cash_Demo_Score_FPD30_gini',
       'Beta_Cash_Demo_Score_FSPD30_gini',
       'Beta_Cash_Demo_Score_FSTPD30_gini']].copy()
final_df.rename(columns={'Beta_Cash_Demo_Score_FPD0_gini':'Beta_Cash_Demo_Score_FPD0_t2_gini'
                         , 'Beta_Cash_Demo_Score_FPD10_gini':'Beta_Cash_Demo_Score_FPD10_t2_gini'
                         , 'Beta_Cash_Demo_Score_FPD30_gini':'Beta_Cash_Demo_Score_FPD30_t2_gini'
                         , 'Beta_Cash_Demo_Score_FSPD30_gini':'Beta_Cash_Demo_Score_FSPD30_t2_gini'
                        , 'Beta_Cash_Demo_Score_FSTPD30_gini':'Beta_Cash_Demo_Score_FSTPD30_t2_gini', 
                        }, inplace = True)
final_df['Trench_category'] = 'Trench 2'
final_df['Model_display_name'] = 'beta_demo_model_cash'
final_df['Product_type'] = 'CASH'
final_df.dtypes

start_date                              datetime64[ns]
end_date                                datetime64[ns]
period                                          object
Model_Name                                      object
version                                         object
loan_type                                       object
bad_rate                                        object
Beta_Cash_Demo_Score_FPD0_t2_gini              float64
Beta_Cash_Demo_Score_FPD10_t2_gini             float64
Beta_Cash_Demo_Score_FPD30_t2_gini             float64
Beta_Cash_Demo_Score_FSPD30_t2_gini            float64
Beta_Cash_Demo_Score_FSTPD30_t2_gini           float64
Trench_category                                 object
Model_display_name                              object
Product_type                                    object
dtype: object

In [1704]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,loan_type,bad_rate,Beta_Cash_Demo_Score_FPD0_t2_gini,Beta_Cash_Demo_Score_FPD10_t2_gini,Beta_Cash_Demo_Score_FPD30_t2_gini,Beta_Cash_Demo_Score_FSPD30_t2_gini,Beta_Cash_Demo_Score_FSTPD30_t2_gini,Trench_category,Model_display_name,Product_type
0,2024-09-30,2024-10-06,Week,Beta_Cash_Demo_Score,1.1.0,Overall,FPD0,0.256061,,,,,Trench 2,beta_demo_model_cash,CASH
1,2024-10-01,2024-10-31,Month,Beta_Cash_Demo_Score,1.1.0,Overall,FPD0,0.217154,,,,,Trench 2,beta_demo_model_cash,CASH
2,2024-10-07,2024-10-13,Week,Beta_Cash_Demo_Score,1.1.0,Overall,FPD0,0.272374,,,,,Trench 2,beta_demo_model_cash,CASH
3,2024-10-14,2024-10-20,Week,Beta_Cash_Demo_Score,1.1.0,Overall,FPD0,0.252783,,,,,Trench 2,beta_demo_model_cash,CASH
4,2024-10-21,2024-10-27,Week,Beta_Cash_Demo_Score,1.1.0,Overall,FPD0,0.12816,,,,,Trench 2,beta_demo_model_cash,CASH


In [1705]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.beta_demo_model_cash_t2_v1_gini5"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=a6f62ba0-3c04-4243-aec6-067e83a236f0>

## Trench 3

## FPD0

## Test

In [1706]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory
  from latest_request p 
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type
0,2779937,e9655051-8a0d-48b7-b6eb-a20b4bc17faa,60827799370035,0.2520803024716169,Trench 3,2025-10-14 16:56:02,2025-10-14,2025-10,Test,0,1,Quick
1,3282914,4e0e125d-cd31-4544-8bfb-1163aa453707,60832829140024,0.4710114228689109,Trench 3,2025-10-08 22:03:57,2025-10-09,2025-10,Test,0,1,Quick
2,3349533,fbfc3ac9-430b-4177-90c7-b64d7251f0eb,60833495330029,0.4076415874470205,Trench 3,2025-10-15 11:26:59,2025-10-15,2025-10,Test,0,1,Quick
3,2121522,8b090b20-c65c-411f-b26e-42670bddb9ab,60821215220025,0.2971189822982126,Trench 3,2025-11-04 07:29:39,2025-11-04,2025-11,Test,1,1,Quick
4,1107525,8781a0c4-d8db-411b-b8af-acb0bd05ff86,60811075250041,0.3989644483866831,Trench 3,2025-10-23 19:09:13,2025-10-27,2025-10,Test,0,1,Quick


In [1707]:
df1 = dfd.copy()

## Train

In [1708]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd0 = 1
 and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type
0,2517106,ddc34bee-489c-412d-b6fe-1d40658f6d03,60825171060026,0.44141,Trench 3,2024-10-29 18:20:31,2024-10-30,2024-10,Train,1,1,Quick
1,2469891,8a744bd8-d47f-451a-be45-fc1d77b578d0,60824698910101,0.411971,Trench 3,2024-10-30 07:20:30,2024-10-30,2024-10,Train,0,1,Quick
2,2693583,c180fecd-3210-4fbd-8b25-11540828a257,60826935830021,0.382831,Trench 3,2024-10-26 21:15:05,2024-10-27,2024-10,Train,0,1,Quick
3,1801183,7e0a2cff-240f-4385-8618-9fffeea626a8,60818011830047,0.350114,Trench 3,2024-10-09 18:37:59,2024-10-09,2024-10,Train,0,1,Quick
4,2861367,069cd22a-738b-4c1f-9007-b7e2a4e3d1b0,60828613670058,0.414152,Trench 3,2024-10-18 14:27:11,2024-10-18,2024-10,Train,0,1,Quick


In [1709]:
df2 = dfd.copy()

In [1710]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11706 entries, 0 to 11705
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             11706 non-null  object        
 1   digitalLoanAccountId   11706 non-null  object        
 2   loanAccountNumber      11706 non-null  object        
 3   Beta_Cash_Demo_Score   11706 non-null  object        
 4   trenchCategory         11706 non-null  object        
 5   appln_submit_datetime  11706 non-null  datetime64[us]
 6   disbursementdate       11706 non-null  dbdate        
 7   Application_month      11706 non-null  object        
 8   Data_selection         11706 non-null  object        
 9   deffpd0                11706 non-null  Int64         
 10  flg_mature_fpd0        11706 non-null  Int64         
 11  new_loan_type          11706 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(8)
memory 

In [1711]:
df_concat['Beta_Cash_Demo_Score'] = pd.to_numeric(df_concat['Beta_Cash_Demo_Score'], errors='coerce')

In [1712]:
# df_concat.to_csv(r"Beta_Cash_Demo_Scoretrenchnunfpd0.csv")

In [1713]:
# gini_results = calculate_periodic_gini(df_concat, 'Beta_Cash_Demo_Score', 'deffpd0', 'FPD0')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'Beta_Cash_Demo_Score', 
    'deffpd0', 
    'FPD0',
    product_column='new_loan_type'
)

In [1714]:
f0 = gini_results.copy()
f0.head()

Unnamed: 0,start_date,end_date,Beta_Cash_Demo_Score_FPD0_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-09-30,2024-10-06,0.061224,Week,Beta_Cash_Demo_Score,1.1.0,FPD0,Overall
1,2024-10-01,2024-10-31,0.123607,Month,Beta_Cash_Demo_Score,1.1.0,FPD0,Overall
2,2024-10-07,2024-10-13,-0.001855,Week,Beta_Cash_Demo_Score,1.1.0,FPD0,Overall
3,2024-10-14,2024-10-20,0.195829,Week,Beta_Cash_Demo_Score,1.1.0,FPD0,Overall
4,2024-10-21,2024-10-27,0.309463,Week,Beta_Cash_Demo_Score,1.1.0,FPD0,Overall


## FPD10

## Test

In [1715]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory
  from latest_request p 
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type
0,2064618,394fe099-95ea-4c52-8ef4-43bf8f3f1fc4,60820646180028,0.4252731430511041,Trench 3,2025-09-28 04:48:13,2025-10-04,2025-09,Test,0,1,Quick
1,3230204,a62723e4-ea6f-44a0-a245-b9cfe69450f5,60832302040036,0.4313662758550411,Trench 3,2025-10-18 12:02:23,2025-10-19,2025-10,Test,1,1,Quick
2,2448700,1e2ebc1e-93dc-4e0e-8bc7-c0077788306c,60824487000039,0.4843724981839519,Trench 3,2025-10-19 16:12:37,2025-10-19,2025-10,Test,0,1,Quick
3,1886335,883ef202-4aa4-41d7-99ff-fd6be2ef69db,60818863350053,0.3754082371818635,Trench 3,2025-09-28 18:26:57,2025-09-28,2025-09,Test,0,1,Quick
4,1251240,e95830c6-c829-4ae9-ab9e-58ce476fdf54,60812512400048,0.4478126386295299,Trench 3,2025-10-20 16:04:43,2025-10-21,2025-10,Test,0,1,Quick


In [1716]:
df1 = dfd.copy()

## Train

In [1717]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type
0,2517106,ddc34bee-489c-412d-b6fe-1d40658f6d03,60825171060026,0.44141,Trench 3,2024-10-29 18:20:31,2024-10-30,2024-10,Train,0,1,Quick
1,2469891,8a744bd8-d47f-451a-be45-fc1d77b578d0,60824698910101,0.411971,Trench 3,2024-10-30 07:20:30,2024-10-30,2024-10,Train,0,1,Quick
2,2693583,c180fecd-3210-4fbd-8b25-11540828a257,60826935830021,0.382831,Trench 3,2024-10-26 21:15:05,2024-10-27,2024-10,Train,0,1,Quick
3,1801183,7e0a2cff-240f-4385-8618-9fffeea626a8,60818011830047,0.350114,Trench 3,2024-10-09 18:37:59,2024-10-09,2024-10,Train,0,1,Quick
4,2861367,069cd22a-738b-4c1f-9007-b7e2a4e3d1b0,60828613670058,0.414152,Trench 3,2024-10-18 14:27:11,2024-10-18,2024-10,Train,0,1,Quick


In [1718]:
df2 = dfd.copy()

In [1719]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11455 entries, 0 to 11454
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             11455 non-null  object        
 1   digitalLoanAccountId   11455 non-null  object        
 2   loanAccountNumber      11455 non-null  object        
 3   Beta_Cash_Demo_Score   11455 non-null  object        
 4   trenchCategory         11455 non-null  object        
 5   appln_submit_datetime  11455 non-null  datetime64[us]
 6   disbursementdate       11455 non-null  dbdate        
 7   Application_month      11455 non-null  object        
 8   Data_selection         11455 non-null  object        
 9   deffpd10               11455 non-null  Int64         
 10  flg_mature_fpd10       11455 non-null  Int64         
 11  new_loan_type          11455 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(8)
memory 

In [1720]:
df_concat['Beta_Cash_Demo_Score'] = pd.to_numeric(df_concat['Beta_Cash_Demo_Score'], errors='coerce')

In [1721]:
# df_concat.to_csv(r"Beta_Cash_Demo_Scoretrenchnunfpd10.csv")

In [1722]:
# gini_results = calculate_periodic_gini(df_concat, 'Beta_Cash_Demo_Score', 'deffpd10', 'FPD10')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'Beta_Cash_Demo_Score', 
    'deffpd10', 
    'FPD10',
    product_column='new_loan_type'
)

In [1723]:
f1 = gini_results.copy()
f1.head()

Unnamed: 0,start_date,end_date,Beta_Cash_Demo_Score_FPD10_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-09-30,2024-10-06,0.211931,Week,Beta_Cash_Demo_Score,1.1.0,FPD10,Overall
1,2024-10-01,2024-10-31,0.235389,Month,Beta_Cash_Demo_Score,1.1.0,FPD10,Overall
2,2024-10-07,2024-10-13,0.145455,Week,Beta_Cash_Demo_Score,1.1.0,FPD10,Overall
3,2024-10-14,2024-10-20,0.281073,Week,Beta_Cash_Demo_Score,1.1.0,FPD10,Overall
4,2024-10-21,2024-10-27,0.323349,Week,Beta_Cash_Demo_Score,1.1.0,FPD10,Overall


## FPD30

## Test

In [1724]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory
  from latest_request p 
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type
0,2370824,d59b58cb-5093-4cac-95ba-90397c2bb670,60823708240045,0.525362386293769,Trench 3,2025-09-28 15:24:26,2025-09-28,2025-09,Test,0,1,Quick
1,2904565,4d1b2318-42fb-429b-b833-aedae0f0c07e,60829045650026,0.3797452878925412,Trench 3,2025-09-25 20:22:21,2025-09-25,2025-09,Test,0,1,Quick
2,2523031,e6b7c0e2-e96f-457e-875e-87eb3063bfdb,60825230310023,0.4754322931445026,Trench 3,2025-10-04 20:47:58,2025-10-04,2025-10,Test,0,1,Quick
3,3140719,f755aeb4-fbc4-4d90-975d-8980f8da5a1f,60831407190029,0.4136465182333435,Trench 3,2025-09-27 20:07:11,2025-09-27,2025-09,Test,0,1,Quick
4,2098110,a6161ce3-1a71-4da0-b435-59a575c40038,60820981100034,0.2481512303174753,Trench 3,2025-09-30 23:03:59,2025-10-01,2025-09,Test,0,1,Quick


In [1725]:
df1 = dfd.copy()

## Train

In [1726]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type
0,2517106,ddc34bee-489c-412d-b6fe-1d40658f6d03,60825171060026,0.44141,Trench 3,2024-10-29 18:20:31,2024-10-30,2024-10,Train,0,1,Quick
1,2469891,8a744bd8-d47f-451a-be45-fc1d77b578d0,60824698910101,0.411971,Trench 3,2024-10-30 07:20:30,2024-10-30,2024-10,Train,0,1,Quick
2,2693583,c180fecd-3210-4fbd-8b25-11540828a257,60826935830021,0.382831,Trench 3,2024-10-26 21:15:05,2024-10-27,2024-10,Train,0,1,Quick
3,1801183,7e0a2cff-240f-4385-8618-9fffeea626a8,60818011830047,0.350114,Trench 3,2024-10-09 18:37:59,2024-10-09,2024-10,Train,0,1,Quick
4,2861367,069cd22a-738b-4c1f-9007-b7e2a4e3d1b0,60828613670058,0.414152,Trench 3,2024-10-18 14:27:11,2024-10-18,2024-10,Train,0,1,Quick


In [1727]:
df2 = dfd.copy()

In [1728]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11110 entries, 0 to 11109
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             11110 non-null  object        
 1   digitalLoanAccountId   11110 non-null  object        
 2   loanAccountNumber      11110 non-null  object        
 3   Beta_Cash_Demo_Score   11110 non-null  object        
 4   trenchCategory         11110 non-null  object        
 5   appln_submit_datetime  11110 non-null  datetime64[us]
 6   disbursementdate       11110 non-null  dbdate        
 7   Application_month      11110 non-null  object        
 8   Data_selection         11110 non-null  object        
 9   deffpd30               11110 non-null  Int64         
 10  flg_mature_fpd30       11110 non-null  Int64         
 11  new_loan_type          11110 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(8)
memory 

In [1729]:
df_concat['Beta_Cash_Demo_Score'] = pd.to_numeric(df_concat['Beta_Cash_Demo_Score'], errors='coerce')

In [1730]:
# df_concat.to_csv(r"Beta_Cash_Demo_Scoretrenchnunfpd30.csv")

In [1731]:
# gini_results = calculate_periodic_gini(df_concat, 'Beta_Cash_Demo_Score', 'deffpd30', 'FPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'Beta_Cash_Demo_Score', 
    'deffpd30', 
    'FPD30',
    product_column='new_loan_type'
)

In [1732]:
f2 = gini_results.copy()
f2.head()

Unnamed: 0,start_date,end_date,Beta_Cash_Demo_Score_FPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-09-30,2024-10-06,0.211931,Week,Beta_Cash_Demo_Score,1.1.0,FPD30,Overall
1,2024-10-01,2024-10-31,0.243771,Month,Beta_Cash_Demo_Score,1.1.0,FPD30,Overall
2,2024-10-07,2024-10-13,0.169643,Week,Beta_Cash_Demo_Score,1.1.0,FPD30,Overall
3,2024-10-14,2024-10-20,0.236058,Week,Beta_Cash_Demo_Score,1.1.0,FPD30,Overall
4,2024-10-21,2024-10-27,0.415254,Week,Beta_Cash_Demo_Score,1.1.0,FPD30,Overall


## FSPD30

## Test

In [1733]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory
  from latest_request p 
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type


In [1734]:
df1 = dfd.copy()

## Train

In [1735]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type
0,2280412,35941f72-330f-4b17-b33d-1de94cad37fe,60822804120022,0.419987,Trench 3,2024-10-09 12:11:56,2024-10-09,2024-10,Train,0,1,Quick
1,2882177,7c64d0ae-4c10-41ab-b3e0-a13496b94fae,60828821770024,0.522058,Trench 3,2024-10-05 08:28:02,2024-10-05,2024-10,Train,0,1,Quick
2,1693732,24a4e605-3b10-4ff6-8407-3fdb58d280be,60816937320037,0.28656,Trench 3,2024-10-30 20:11:30,2024-10-30,2024-10,Train,0,1,Quick
3,2333583,81375c7a-9033-44c3-be5b-308b77e58227,60823335830023,0.377926,Trench 3,2024-10-21 11:41:04,2024-10-21,2024-10,Train,0,1,Quick
4,2120994,cbc200bb-5e9f-48db-bfa6-905ed12649ff,60821209940029,0.440321,Trench 3,2024-10-12 18:25:59,2024-10-12,2024-10,Train,1,1,Quick


In [1736]:
df2 = dfd.copy()

In [1737]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10099 entries, 0 to 10098
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             10099 non-null  Int64         
 1   digitalLoanAccountId   10099 non-null  object        
 2   loanAccountNumber      10099 non-null  object        
 3   Beta_Cash_Demo_Score   10099 non-null  float64       
 4   trenchCategory         10099 non-null  object        
 5   appln_submit_datetime  10099 non-null  datetime64[us]
 6   disbursementdate       10099 non-null  dbdate        
 7   Application_month      10099 non-null  object        
 8   Data_selection         10099 non-null  object        
 9   deffspd30              10099 non-null  Int64         
 10  flg_mature_fspd_30     10099 non-null  Int64         
 11  new_loan_type          10099 non-null  object        
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), objec

  df_concat = pd.concat([df2, df1], ignore_index=True)


In [1738]:
df_concat['Beta_Cash_Demo_Score'] = pd.to_numeric(df_concat['Beta_Cash_Demo_Score'], errors='coerce')

In [1739]:
# df_concat.to_csv(r"Beta_Cash_Demo_Scoretrenchnunfspd30.csv")

In [1740]:
# gini_results = calculate_periodic_gini(df_concat, 'Beta_Cash_Demo_Score', 'deffspd30', 'FSPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'Beta_Cash_Demo_Score', 
    'deffspd30', 
    'FSPD30',
    product_column='new_loan_type'
)

In [1741]:
f3 = gini_results.copy()
f3.head()

Unnamed: 0,start_date,end_date,Beta_Cash_Demo_Score_FSPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-09-30,2024-10-06,0.259831,Week,Beta_Cash_Demo_Score,1.1.0,FSPD30,Overall
1,2024-10-01,2024-10-31,0.283025,Month,Beta_Cash_Demo_Score,1.1.0,FSPD30,Overall
2,2024-10-07,2024-10-13,0.002157,Week,Beta_Cash_Demo_Score,1.1.0,FSPD30,Overall
3,2024-10-14,2024-10-20,0.363873,Week,Beta_Cash_Demo_Score,1.1.0,FSPD30,Overall
4,2024-10-21,2024-10-27,0.404337,Week,Beta_Cash_Demo_Score,1.1.0,FSPD30,Overall


## FSTPD30

## Test

In [1742]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),
model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory
  from latest_request p 
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
   ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type


In [1743]:
df1 = dfd.copy()

## Train

In [1744]:
sq = """ 
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in ('Beta-Cash-Demo-Model', 'beta_demo_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_Cash_Demo_Score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_Cash_Demo_Score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_Cash_Demo_Score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,Beta_Cash_Demo_Score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type
0,2858650,51043bbc-7303-4a5d-9c8c-7c592c48ac80,60828586500019,0.444915,Trench 2,2024-10-22 11:38:03,2024-10-22,2024-10,Train,1,1,Quick
1,2537031,3e288216-a1de-4a3d-80e2-65a405c94672,60825370310016,0.451184,Trench 2,2024-10-11 20:32:24,2024-10-11,2024-10,Train,1,1,Quick
2,1880481,0750523f-bea0-4db7-8737-51cf32dd9bff,60818804810024,0.5983,Trench 2,2024-10-23 07:06:21,2024-10-23,2024-10,Train,1,1,Quick
3,2650571,9fb8443f-0099-45a4-9581-3d0b6497ac1b,60826505710025,0.474646,Trench 2,2024-10-15 11:41:13,2024-10-15,2024-10,Train,1,1,Quick
4,2768574,6bea5b9b-fc9f-4c15-b72b-833a65492991,60827685740013,0.558451,Trench 2,2024-10-27 14:37:42,2024-10-27,2024-10,Train,0,1,Quick


In [1745]:
df2 = dfd.copy()

In [1746]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10575 entries, 0 to 10574
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             10575 non-null  Int64         
 1   digitalLoanAccountId   10575 non-null  object        
 2   loanAccountNumber      10575 non-null  object        
 3   Beta_Cash_Demo_Score   10575 non-null  float64       
 4   trenchCategory         10575 non-null  object        
 5   appln_submit_datetime  10575 non-null  datetime64[us]
 6   disbursementdate       10575 non-null  dbdate        
 7   Application_month      10575 non-null  object        
 8   Data_selection         10575 non-null  object        
 9   deffstpd30             10575 non-null  Int64         
 10  flg_mature_fstpd_30    10575 non-null  Int64         
 11  new_loan_type          10575 non-null  object        
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), objec

  df_concat = pd.concat([df2, df1], ignore_index=True)


In [1747]:
df_concat['Beta_Cash_Demo_Score'] = pd.to_numeric(df_concat['Beta_Cash_Demo_Score'], errors='coerce')

In [1748]:
# gini_results = calculate_periodic_gini(df_concat, 'Beta_Cash_Demo_Score', 'deffstpd30', 'FSTPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'Beta_Cash_Demo_Score', 
    'deffstpd30', 
    'FSTPD30',
    product_column='new_loan_type'
)

In [1749]:
f4 = gini_results.copy()
f4.head()

Unnamed: 0,start_date,end_date,Beta_Cash_Demo_Score_FSTPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-09-30,2024-10-06,0.3585,Week,Beta_Cash_Demo_Score,1.1.0,FSTPD30,Overall
1,2024-10-01,2024-10-31,0.315782,Month,Beta_Cash_Demo_Score,1.1.0,FSTPD30,Overall
2,2024-10-07,2024-10-13,0.369074,Week,Beta_Cash_Demo_Score,1.1.0,FSTPD30,Overall
3,2024-10-14,2024-10-20,0.3735,Week,Beta_Cash_Demo_Score,1.1.0,FSTPD30,Overall
4,2024-10-21,2024-10-27,0.203213,Week,Beta_Cash_Demo_Score,1.1.0,FSTPD30,Overall


## combining the dataframe

In [1750]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'loan_type', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'Beta_Cash_Demo_Score_FPD0_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'Beta_Cash_Demo_Score_FPD10_gini',
       'Beta_Cash_Demo_Score_FPD30_gini',
       'Beta_Cash_Demo_Score_FSPD30_gini',
       'Beta_Cash_Demo_Score_FSTPD30_gini'], dtype=object)

In [1751]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'loan_type', 'bad_rate','Beta_Cash_Demo_Score_FPD0_gini','Beta_Cash_Demo_Score_FPD10_gini',
      'Beta_Cash_Demo_Score_FPD30_gini',
       'Beta_Cash_Demo_Score_FSPD30_gini',
       'Beta_Cash_Demo_Score_FSTPD30_gini']].copy()
final_df.rename(columns={'Beta_Cash_Demo_Score_FPD0_gini':'Beta_Cash_Demo_Score_FPD0_t3_gini'
                         , 'Beta_Cash_Demo_Score_FPD10_gini':'Beta_Cash_Demo_Score_FPD10_t3_gini'
                         , 'Beta_Cash_Demo_Score_FPD30_gini':'Beta_Cash_Demo_Score_FPD30_t3_gini'
                         , 'Beta_Cash_Demo_Score_FSPD30_gini':'Beta_Cash_Demo_Score_FSPD30_t3_gini'
                        , 'Beta_Cash_Demo_Score_FSTPD30_gini':'Beta_Cash_Demo_Score_FSTPD30_t3_gini', 
                        }, inplace = True)
final_df['Trench_category'] = 'Trench 3'
final_df['Model_display_name'] = 'beta_demo_model_cash'
final_df['Product_type'] = 'CASH'
final_df.dtypes

start_date                              datetime64[ns]
end_date                                datetime64[ns]
period                                          object
Model_Name                                      object
version                                         object
loan_type                                       object
bad_rate                                        object
Beta_Cash_Demo_Score_FPD0_t3_gini              float64
Beta_Cash_Demo_Score_FPD10_t3_gini             float64
Beta_Cash_Demo_Score_FPD30_t3_gini             float64
Beta_Cash_Demo_Score_FSPD30_t3_gini            float64
Beta_Cash_Demo_Score_FSTPD30_t3_gini           float64
Trench_category                                 object
Model_display_name                              object
Product_type                                    object
dtype: object

In [1752]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,loan_type,bad_rate,Beta_Cash_Demo_Score_FPD0_t3_gini,Beta_Cash_Demo_Score_FPD10_t3_gini,Beta_Cash_Demo_Score_FPD30_t3_gini,Beta_Cash_Demo_Score_FSPD30_t3_gini,Beta_Cash_Demo_Score_FSTPD30_t3_gini,Trench_category,Model_display_name,Product_type
0,2024-09-30,2024-10-06,Week,Beta_Cash_Demo_Score,1.1.0,Overall,FPD0,0.061224,,,,,Trench 3,beta_demo_model_cash,CASH
1,2024-10-01,2024-10-31,Month,Beta_Cash_Demo_Score,1.1.0,Overall,FPD0,0.123607,,,,,Trench 3,beta_demo_model_cash,CASH
2,2024-10-07,2024-10-13,Week,Beta_Cash_Demo_Score,1.1.0,Overall,FPD0,-0.001855,,,,,Trench 3,beta_demo_model_cash,CASH
3,2024-10-14,2024-10-20,Week,Beta_Cash_Demo_Score,1.1.0,Overall,FPD0,0.195829,,,,,Trench 3,beta_demo_model_cash,CASH
4,2024-10-21,2024-10-27,Week,Beta_Cash_Demo_Score,1.1.0,Overall,FPD0,0.309463,,,,,Trench 3,beta_demo_model_cash,CASH


In [1753]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.beta_demo_model_cash_t3_v1_gini5"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=538551a3-5a0c-4984-b986-3495ddc90c90>

# Beta-Cash-AppScore-Model

## Trench 1

## FPD0

## Test

In [1754]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  coalesce(SAFE_CAST(JSON_VALUE(p.prediction_clean, "$.combined_score") AS Float64)) AS beta_cash_app_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory"))  trenchCategory
  from latest_request p
  left join model_run m on p.digitalLoanAccountId = m.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type
0,3741409,2affef55-defa-4941-801d-25e0ecac81ff,60837414090011,0.439414,Trench 1,2025-10-13 15:24:48,2025-10-16,2025-10,Test,0,1,Quick
1,3737866,494e4ca0-aebb-4bfe-a5cf-f1ea2684569e,60837378660011,0.440007,Trench 1,2025-10-11 18:48:39,2025-10-11,2025-10,Test,0,1,Quick
2,3783526,c1f668a4-956f-479c-9191-7cb576be5995,60837835260012,0.514597,Trench 1,2025-11-01 08:53:37,2025-11-01,2025-11,Test,0,1,Quick
3,3785883,e18bb606-aa0a-47f3-9377-09cd03cb6c70,60837858830018,0.406104,Trench 1,2025-11-02 11:03:52,2025-11-02,2025-11,Test,0,1,Quick
4,3749658,4f798eee-1283-4845-93d5-f68abd7b3145,60837496580017,0.485882,Trench 1,2025-10-17 11:48:34,2025-10-17,2025-10,Test,1,1,Quick


In [1755]:
df1 = dfd.copy()

## Train

In [1756]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction beta_cash_app_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 1'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type
0,2381377,1b7bf03b-db87-4ff0-abb9-0351a0ec5f04,60823813770018,0.427711,Trench 1,2024-01-27 14:11:24,2024-01-27,2024-01,Train,0,1,Flex
1,2370574,890bb0f2-8a9d-45fc-bd14-e850d00883fa,60823705740012,0.414841,Trench 1,2024-01-21 23:49:35,2024-02-01,2024-01,Train,0,1,Flex
2,2368914,786eadce-badf-480a-999d-6a62c1eb9fec,60823689140019,0.629461,Trench 1,2024-01-14 23:37:26,2024-01-15,2024-01,Train,1,1,Quick
3,2372767,802a5a31-636c-4984-92cf-3ce5e373ee1d,60823727670017,0.485654,Trench 1,2024-01-22 18:30:45,2024-01-26,2024-01,Train,1,1,Quick
4,2381658,74102446-ccc9-41f5-89d3-0915382db68e,60823816580014,0.362734,Trench 1,2024-01-27 17:15:16,2024-01-27,2024-01,Train,1,1,Quick


In [1757]:
df2 = dfd.copy()

In [1758]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11369 entries, 0 to 11368
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             11369 non-null  object        
 1   digitalLoanAccountId   11369 non-null  object        
 2   loanAccountNumber      11369 non-null  object        
 3   beta_cash_app_score    11369 non-null  float64       
 4   trenchCategory         11369 non-null  object        
 5   appln_submit_datetime  11369 non-null  datetime64[us]
 6   disbursementdate       11369 non-null  dbdate        
 7   Application_month      11369 non-null  object        
 8   Data_selection         11369 non-null  object        
 9   deffpd0                11369 non-null  Int64         
 10  flg_mature_fpd0        11369 non-null  Int64         
 11  new_loan_type          11369 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), float64(1), objec

In [1759]:
df_concat['beta_cash_app_score'] = pd.to_numeric(df_concat['beta_cash_app_score'], errors='coerce')

In [1760]:
# df_concat.to_csv(r"beta_cash_app_scoretrench1fpd0.csv")

In [1761]:
# gini_results = calculate_periodic_gini(df_concat, 'beta_cash_app_score', 'deffpd0', 'FPD0')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'beta_cash_app_score', 
    'deffpd0', 
    'FPD0',
    product_column='new_loan_type'
)

In [1762]:
f0 = gini_results.copy()
f0.head()

Unnamed: 0,start_date,end_date,beta_cash_app_score_FPD0_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-01-01,2024-01-07,-0.076923,Week,beta_cash_app_score,1.1.0,FPD0,Overall
1,2024-01-01,2024-01-31,0.255906,Month,beta_cash_app_score,1.1.0,FPD0,Overall
2,2024-01-08,2024-01-14,0.875,Week,beta_cash_app_score,1.1.0,FPD0,Overall
3,2024-01-15,2024-01-21,0.430233,Week,beta_cash_app_score,1.1.0,FPD0,Overall
4,2024-01-22,2024-01-28,-0.051429,Week,beta_cash_app_score,1.1.0,FPD0,Overall


## FPD10

## Test

In [1763]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  coalesce(SAFE_CAST(JSON_VALUE(p.prediction_clean, "$.combined_score") AS Float64)) AS beta_cash_app_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory"))  trenchCategory
  from latest_request p
  left join model_run m on p.digitalLoanAccountId = m.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 1'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type
0,3740617,3d6810c3-1e9f-4b6b-8aee-db10b45e9fd0,60837406170012,0.503979,Trench 1,2025-10-12 21:21:54,2025-10-12,2025-10,Test,0,1,Quick
1,3754945,5bdacd23-a64d-496e-b60f-e007430f5324,60837549450016,0.454347,Trench 1,2025-10-19 17:28:51,2025-10-19,2025-10,Test,0,1,Quick
2,3733540,173ac36b-5365-4565-b7be-d09a229b92c7,60837335400015,0.337855,Trench 1,2025-10-10 07:29:04,2025-10-10,2025-10,Test,0,1,Quick
3,3764857,f3d07f02-4f3a-47b8-ac17-e196e7129740,60837648570012,0.466066,Trench 1,2025-10-24 11:33:28,2025-10-24,2025-10,Test,0,1,Quick
4,3740698,62e6d728-dc2d-43bb-abb9-76090f007f11,60837406980019,0.472274,Trench 1,2025-10-12 22:54:38,2025-10-13,2025-10,Test,1,1,Quick


In [1764]:
df1 = dfd.copy()

## Train

In [1765]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction beta_cash_app_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 1'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type
0,2379347,e1f4494e-207c-4eba-a53a-f49cb8693cd9,60823793470019,0.461975,Trench 1,2024-01-25 13:40:38,2024-01-25,2024-01,Train,0,1,Quick
1,2382039,6f54addf-63fa-4371-9e0b-855828ecae08,60823820390017,0.498382,Trench 1,2024-01-28 08:22:12,2024-01-28,2024-01,Train,0,1,Flex
2,2375304,3e17511a-ae6b-4ad0-a1be-c0e88b51d23d,60823753040012,0.682634,Trench 1,2024-01-21 14:01:25,2024-01-24,2024-01,Train,0,1,Quick
3,2362062,9f958c39-9d7a-4357-9709-f0f9ef893cc4,60823620620016,0.588591,Trench 1,2024-01-07 22:39:06,2024-01-08,2024-01,Train,1,1,Quick
4,2371598,a8548a1e-ed9d-427c-b309-2bf04f3586a9,60823715980014,0.512296,Trench 1,2024-01-17 16:13:44,2024-01-18,2024-01,Train,0,1,Quick


In [1766]:
df2 = dfd.copy()

In [1767]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10404 entries, 0 to 10403
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             10404 non-null  object        
 1   digitalLoanAccountId   10404 non-null  object        
 2   loanAccountNumber      10404 non-null  object        
 3   beta_cash_app_score    10404 non-null  float64       
 4   trenchCategory         10404 non-null  object        
 5   appln_submit_datetime  10404 non-null  datetime64[us]
 6   disbursementdate       10404 non-null  dbdate        
 7   Application_month      10404 non-null  object        
 8   Data_selection         10404 non-null  object        
 9   deffpd10               10404 non-null  Int64         
 10  flg_mature_fpd10       10404 non-null  Int64         
 11  new_loan_type          10404 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), float64(1), objec

In [1768]:
df_concat['beta_cash_app_score'] = pd.to_numeric(df_concat['beta_cash_app_score'], errors='coerce')

In [1769]:
# df_concat.to_csv(r"beta_cash_app_scoretrench1fpd0.csv")

In [1770]:
# gini_results = calculate_periodic_gini(df_concat, 'beta_cash_app_score', 'deffpd10', 'FPD10')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'beta_cash_app_score', 
    'deffpd10', 
    'FPD10',
    product_column='new_loan_type'
)

In [1771]:
f1 = gini_results.copy()
f1.head()

Unnamed: 0,start_date,end_date,beta_cash_app_score_FPD10_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-01-01,2024-01-07,0.071429,Week,beta_cash_app_score,1.1.0,FPD10,Overall
1,2024-01-01,2024-01-31,0.289216,Month,beta_cash_app_score,1.1.0,FPD10,Overall
2,2024-01-08,2024-01-14,0.69697,Week,beta_cash_app_score,1.1.0,FPD10,Overall
3,2024-01-15,2024-01-21,0.691489,Week,beta_cash_app_score,1.1.0,FPD10,Overall
4,2024-01-22,2024-01-28,-0.081481,Week,beta_cash_app_score,1.1.0,FPD10,Overall


## FPD30

## Test

In [1772]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  coalesce(SAFE_CAST(JSON_VALUE(p.prediction_clean, "$.combined_score") AS Float64)) AS beta_cash_app_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory"))  trenchCategory
  from latest_request p
  left join model_run m on p.digitalLoanAccountId = m.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 1'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type
0,3710758,abe5f29c-f306-47f0-b43e-4a2cc3d02656,60837107580019,0.39821,Trench 1,2025-09-28 17:24:26,2025-09-29,2025-09,Test,1,1,Quick
1,3714015,577e2c33-6a05-4f5d-ac37-de55ae801b98,60837140150013,0.407089,Trench 1,2025-09-30 09:03:02,2025-09-30,2025-09,Test,0,1,Quick
2,3706096,98118b0f-5bae-4544-8a77-f77404a58b50,60837060960013,0.512896,Trench 1,2025-09-26 17:13:14,2025-09-27,2025-09,Test,0,1,Quick
3,3716231,64b86ec3-a037-4e33-bf27-c0f2603ab1b8,60837162310017,0.440487,Trench 1,2025-10-01 10:49:31,2025-10-01,2025-10,Test,0,1,Quick
4,3716782,1616f4ec-1811-4744-8ec4-9ec17cf862bb,60837167820013,0.507565,Trench 1,2025-10-01 14:33:37,2025-10-06,2025-10,Test,0,1,Quick


In [1773]:
df1 = dfd.copy()

## Train

In [1774]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction beta_cash_app_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 1'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type
0,2371202,bebfb712-e0c5-43f7-9a8d-c4736a8f9700,60823712020011,0.481233,Trench 1,2024-01-17 17:17:07,2024-01-17,2024-01,Train,0,1,Quick
1,2369816,e54c1e64-616f-4a4a-89e3-199f712a2bf3,60823698160012,0.514735,Trench 1,2024-01-15 19:44:37,2024-01-16,2024-01,Train,0,1,Quick
2,2367480,514338de-da61-4fb8-a4fe-c7881fb85c7c,60823674800011,0.54715,Trench 1,2024-01-13 16:22:15,2024-02-05,2024-01,Train,0,1,Quick
3,2384419,5627daeb-edd1-41fe-b70e-80c387c8a2a3,60823844190013,0.510946,Trench 1,2024-01-30 16:17:55,2024-01-30,2024-01,Train,1,1,Quick
4,2385016,8f9d2965-137e-4671-9703-42f496ca5d76,60823850160012,0.588631,Trench 1,2024-01-31 00:36:32,2024-02-03,2024-01,Train,0,1,Quick


In [1775]:
df2 = dfd.copy()

In [1776]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9762 entries, 0 to 9761
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             9762 non-null   object        
 1   digitalLoanAccountId   9762 non-null   object        
 2   loanAccountNumber      9762 non-null   object        
 3   beta_cash_app_score    9762 non-null   float64       
 4   trenchCategory         9762 non-null   object        
 5   appln_submit_datetime  9762 non-null   datetime64[us]
 6   disbursementdate       9762 non-null   dbdate        
 7   Application_month      9762 non-null   object        
 8   Data_selection         9762 non-null   object        
 9   deffpd30               9762 non-null   Int64         
 10  flg_mature_fpd30       9762 non-null   Int64         
 11  new_loan_type          9762 non-null   object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), float64(1), object(

In [1777]:
df_concat['beta_cash_app_score'] = pd.to_numeric(df_concat['beta_cash_app_score'], errors='coerce')

In [1778]:
# df_concat.to_csv(r"beta_cash_app_scoretrench1fpd30.csv")

In [1779]:
# gini_results = calculate_periodic_gini(df_concat, 'beta_cash_app_score', 'deffpd30', 'FPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'beta_cash_app_score', 
    'deffpd30', 
    'FPD30',
    product_column='new_loan_type'
)

In [1780]:
f2 = gini_results.copy()
f2.head()

Unnamed: 0,start_date,end_date,beta_cash_app_score_FPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-01-01,2024-01-07,0.071429,Week,beta_cash_app_score,1.1.0,FPD30,Overall
1,2024-01-01,2024-01-31,0.210702,Month,beta_cash_app_score,1.1.0,FPD30,Overall
2,2024-01-08,2024-01-14,0.69697,Week,beta_cash_app_score,1.1.0,FPD30,Overall
3,2024-01-15,2024-01-21,0.652778,Week,beta_cash_app_score,1.1.0,FPD30,Overall
4,2024-01-22,2024-01-28,-0.303571,Week,beta_cash_app_score,1.1.0,FPD30,Overall


## FSPD30

## Test

In [1781]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  coalesce(SAFE_CAST(JSON_VALUE(p.prediction_clean, "$.combined_score") AS Float64)) AS beta_cash_app_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory"))  trenchCategory
  from latest_request p
  left join model_run m on p.digitalLoanAccountId = m.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type


In [1782]:
df1 = dfd.copy()

## Train

In [1783]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction beta_cash_app_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type
0,2371202,bebfb712-e0c5-43f7-9a8d-c4736a8f9700,60823712020011,0.481233,Trench 1,2024-01-17 17:17:07,2024-01-17,2024-01,Train,0,1,Quick
1,2350845,09725d6f-80e0-4dda-a726-710c5e7e30c0,60823508450016,0.555075,Trench 1,2024-01-11 06:41:27,2024-01-12,2024-01,Train,0,1,Quick
2,2355281,bc130ce0-3bff-4fa9-b03b-e6451f093a4c,60823552810012,0.511867,Trench 1,2024-01-01 11:55:20,2024-01-01,2024-01,Train,0,1,Quick
3,2382892,6bfe8674-f9e5-4560-a387-09a31a9f113c,60823828920016,0.40277,Trench 1,2024-01-28 21:35:21,2024-01-29,2024-01,Train,0,1,Flex
4,2382612,559a4c86-05e1-442b-ba67-1d7741b4bace,60823826120014,0.440113,Trench 1,2024-01-28 16:18:30,2024-02-03,2024-01,Train,0,1,Quick


In [1784]:
df2 = dfd.copy()

In [1785]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9351 entries, 0 to 9350
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             9351 non-null   Int64         
 1   digitalLoanAccountId   9351 non-null   object        
 2   loanAccountNumber      9351 non-null   object        
 3   beta_cash_app_score    9351 non-null   float64       
 4   trenchCategory         9351 non-null   object        
 5   appln_submit_datetime  9351 non-null   datetime64[us]
 6   disbursementdate       9351 non-null   dbdate        
 7   Application_month      9351 non-null   object        
 8   Data_selection         9351 non-null   object        
 9   deffspd30              9351 non-null   Int64         
 10  flg_mature_fspd_30     9351 non-null   Int64         
 11  new_loan_type          9351 non-null   object        
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(

  df_concat = pd.concat([df2, df1], ignore_index=True)


In [1786]:
df_concat['beta_cash_app_score'] = pd.to_numeric(df_concat['beta_cash_app_score'], errors='coerce')

In [1787]:
# df_concat.to_csv(r"beta_cash_app_scoretrench1fspd30.csv")

In [1788]:
# gini_results = calculate_periodic_gini(df_concat, 'beta_cash_app_score', 'deffspd30', 'FSPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'beta_cash_app_score', 
    'deffspd30', 
    'FSPD30',
    product_column='new_loan_type'
)

In [1789]:
f3 = gini_results.copy()
f3.head()

Unnamed: 0,start_date,end_date,beta_cash_app_score_FSPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-01-01,2024-01-07,-0.090909,Week,beta_cash_app_score,1.1.0,FSPD30,Overall
1,2024-01-01,2024-01-31,0.116168,Month,beta_cash_app_score,1.1.0,FSPD30,Overall
2,2024-01-08,2024-01-14,0.875,Week,beta_cash_app_score,1.1.0,FSPD30,Overall
3,2024-01-15,2024-01-21,0.185185,Week,beta_cash_app_score,1.1.0,FSPD30,Overall
4,2024-01-22,2024-01-28,-0.081481,Week,beta_cash_app_score,1.1.0,FSPD30,Overall


## FSTPD30

## Test

In [1790]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  coalesce(SAFE_CAST(JSON_VALUE(p.prediction_clean, "$.combined_score") AS Float64)) AS beta_cash_app_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory"))  trenchCategory
  from latest_request p
  left join model_run m on p.digitalLoanAccountId = m.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type


In [1791]:
df1 = dfd.copy()

## Train

In [1792]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction beta_cash_app_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type
0,2383905,397557b6-ce08-4bf4-958a-5c4453261132,60823839050018,0.451541,Trench 1,2024-01-29 21:35:10,2024-01-29,2024-01,Train,0,1,Quick
1,2374572,6726cebb-4e1f-44f7-9ba0-3fc224517747,60823745720016,0.579561,Trench 1,2024-01-20 18:16:56,2024-01-20,2024-01,Train,0,1,Quick
2,2383735,c7288aff-bb74-4ddd-b47f-984f36e6d44b,60823837350018,0.542962,Trench 1,2024-01-29 18:07:51,2024-01-29,2024-01,Train,0,1,Quick
3,2360987,dcbeecb0-738f-4f8f-b3a5-77b062733fb4,60823609870014,0.441135,Trench 1,2024-01-07 06:31:10,2024-01-12,2024-01,Train,0,1,Quick
4,2384419,5627daeb-edd1-41fe-b70e-80c387c8a2a3,60823844190013,0.510946,Trench 1,2024-01-30 16:17:55,2024-01-30,2024-01,Train,1,1,Quick


In [1793]:
df2 = dfd.copy()

In [1794]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8943 entries, 0 to 8942
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             8943 non-null   Int64         
 1   digitalLoanAccountId   8943 non-null   object        
 2   loanAccountNumber      8943 non-null   object        
 3   beta_cash_app_score    8943 non-null   float64       
 4   trenchCategory         8943 non-null   object        
 5   appln_submit_datetime  8943 non-null   datetime64[us]
 6   disbursementdate       8943 non-null   dbdate        
 7   Application_month      8943 non-null   object        
 8   Data_selection         8943 non-null   object        
 9   deffstpd30             8943 non-null   Int64         
 10  flg_mature_fstpd_30    8943 non-null   Int64         
 11  new_loan_type          8943 non-null   object        
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(

  df_concat = pd.concat([df2, df1], ignore_index=True)


In [1795]:
df_concat['beta_cash_app_score'] = pd.to_numeric(df_concat['beta_cash_app_score'], errors='coerce')

In [1796]:
# df_concat.to_csv(r"beta_cash_app_scoretrench1fstpd30.csv")

In [1797]:
# gini_results = calculate_periodic_gini(df_concat, 'beta_cash_app_score', 'deffstpd30', 'FSTPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'beta_cash_app_score', 
    'deffstpd30', 
    'FSTPD30',
    product_column='new_loan_type'
)

In [1798]:
f4 = gini_results.copy()
f4.head()

Unnamed: 0,start_date,end_date,beta_cash_app_score_FSTPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-01-01,2024-01-07,-0.135802,Week,beta_cash_app_score,1.1.0,FSTPD30,Overall
1,2024-01-01,2024-01-31,0.13273,Month,beta_cash_app_score,1.1.0,FSTPD30,Overall
2,2024-01-08,2024-01-14,0.569892,Week,beta_cash_app_score,1.1.0,FSTPD30,Overall
3,2024-01-15,2024-01-21,0.086364,Week,beta_cash_app_score,1.1.0,FSTPD30,Overall
4,2024-01-22,2024-01-28,0.005714,Week,beta_cash_app_score,1.1.0,FSTPD30,Overall


## combining the dataframe

In [1799]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'loan_type', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'beta_cash_app_score_FPD0_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'beta_cash_app_score_FPD10_gini', 'beta_cash_app_score_FPD30_gini',
       'beta_cash_app_score_FSPD30_gini',
       'beta_cash_app_score_FSTPD30_gini'], dtype=object)

In [1800]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'loan_type', 'bad_rate',
                     'beta_cash_app_score_FPD0_gini'
                     ,'beta_cash_app_score_FPD10_gini', 'beta_cash_app_score_FPD30_gini',
       'beta_cash_app_score_FSPD30_gini',
       'beta_cash_app_score_FSTPD30_gini']].copy()
final_df.rename(columns={'beta_cash_app_score_FPD0_gini':'beta_cash_app_score_FPD0_t1_gini'
                         , 'beta_cash_app_score_FPD10_gini':'beta_cash_app_score_FPD10_t1_gini'
                         , 'beta_cash_app_score_FPD30_gini':'beta_cash_app_score_FPD30_t1_gini'
                         , 'beta_cash_app_score_FSPD30_gini':'beta_cash_app_score_FSPD30_t1_gini'
                        , 'beta_cash_app_score_FSTPD30_gini':'beta_cash_app_score_FSTPD30_t1_gini', 
                        }, inplace = True)
final_df['Trench_category'] = 'Trench 1'
final_df['Model_display_name'] = 'apps_score_cash'
final_df['Product_type'] = 'CASH'
final_df.dtypes

start_date                             datetime64[ns]
end_date                               datetime64[ns]
period                                         object
Model_Name                                     object
version                                        object
loan_type                                      object
bad_rate                                       object
beta_cash_app_score_FPD0_t1_gini              float64
beta_cash_app_score_FPD10_t1_gini             float64
beta_cash_app_score_FPD30_t1_gini             float64
beta_cash_app_score_FSPD30_t1_gini            float64
beta_cash_app_score_FSTPD30_t1_gini           float64
Trench_category                                object
Model_display_name                             object
Product_type                                   object
dtype: object

In [1801]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,loan_type,bad_rate,beta_cash_app_score_FPD0_t1_gini,beta_cash_app_score_FPD10_t1_gini,beta_cash_app_score_FPD30_t1_gini,beta_cash_app_score_FSPD30_t1_gini,beta_cash_app_score_FSTPD30_t1_gini,Trench_category,Model_display_name,Product_type
0,2024-01-01,2024-01-07,Week,beta_cash_app_score,1.1.0,Overall,FPD0,-0.076923,,,,,Trench 1,apps_score_cash,CASH
1,2024-01-01,2024-01-31,Month,beta_cash_app_score,1.1.0,Overall,FPD0,0.255906,,,,,Trench 1,apps_score_cash,CASH
2,2024-01-08,2024-01-14,Week,beta_cash_app_score,1.1.0,Overall,FPD0,0.875,,,,,Trench 1,apps_score_cash,CASH
3,2024-01-15,2024-01-21,Week,beta_cash_app_score,1.1.0,Overall,FPD0,0.430233,,,,,Trench 1,apps_score_cash,CASH
4,2024-01-22,2024-01-28,Week,beta_cash_app_score,1.1.0,Overall,FPD0,-0.051429,,,,,Trench 1,apps_score_cash,CASH


In [1802]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.app_score_cash_t1_v1_gini5"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=115a52d4-f713-4b24-9e59-dd4c33b26a86>

## Trench 2

## FPD0

## Test

In [1803]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  coalesce(SAFE_CAST(JSON_VALUE(p.prediction_clean, "$.combined_score") AS Float64)) AS beta_cash_app_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory"))  trenchCategory
  from latest_request p
  left join model_run m on p.digitalLoanAccountId = m.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 2'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type
0,3583896,b8ade8c7-acfe-4c72-98ff-c385b82aad84,60835838960013,0.479492,Trench 2,2025-10-28 12:17:06,2025-10-28,2025-10,Test,0,1,Quick
1,2196972,823f8efe-eca0-4063-be08-ee76fc0cbb87,60821969720014,0.482375,Trench 2,2025-11-03 13:13:27,2025-11-03,2025-11,Test,1,1,Quick
2,2884064,16c48447-55f1-4dff-8ba6-1df0bca251a5,60828840640024,0.457656,Trench 2,2025-11-07 17:54:44,2025-11-08,2025-11,Test,1,1,Quick
3,2245137,ea96ebc8-11fd-4213-bc3c-bf5ed20153e0,60822451370014,0.39698,Trench 2,2025-10-12 11:47:52,2025-10-12,2025-10,Test,0,1,Quick
4,3266695,99bdaba1-8d62-4d61-a061-974f5ebd33ae,60832666950011,0.299456,Trench 2,2025-10-22 18:01:34,2025-10-23,2025-10,Test,0,1,Quick


In [1804]:
df1 = dfd.copy()

## Train

In [1805]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction beta_cash_app_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 2'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type
0,2264238,01874f72-ca85-4e11-956e-c9fa43fd5324,60822642380022,0.533911,Trench 2,2024-01-14 07:30:14,2024-01-14,2024-01,Train,0,1,Quick
1,2322078,36722b8d-a4f3-49b2-8615-e0fdf272ec90,60823220780014,0.48121,Trench 2,2024-01-03 07:56:56,2024-01-17,2024-01,Train,0,1,Quick
2,2204532,7d92fc6b-f04a-484a-bbbd-1176a9d631b7,60822045320017,0.566178,Trench 2,2024-01-25 12:03:09,2024-01-25,2024-01,Train,0,1,Quick
3,2264194,5f56693b-eaac-4cd0-83a9-28b8f1e46b7c,60822641940043,0.438526,Trench 2,2024-01-17 13:18:47,2024-01-17,2024-01,Train,0,1,Quick
4,2153045,3df227f9-ef87-4d7b-8f59-0cde62159295,60821530450012,0.531817,Trench 2,2024-01-28 12:41:44,2024-01-28,2024-01,Train,0,1,Quick


In [1806]:
df2 = dfd.copy()

In [1807]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8826 entries, 0 to 8825
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             8826 non-null   object        
 1   digitalLoanAccountId   8826 non-null   object        
 2   loanAccountNumber      8826 non-null   object        
 3   beta_cash_app_score    8826 non-null   float64       
 4   trenchCategory         8826 non-null   object        
 5   appln_submit_datetime  8826 non-null   datetime64[us]
 6   disbursementdate       8826 non-null   dbdate        
 7   Application_month      8826 non-null   object        
 8   Data_selection         8826 non-null   object        
 9   deffpd0                8826 non-null   Int64         
 10  flg_mature_fpd0        8826 non-null   Int64         
 11  new_loan_type          8826 non-null   object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), float64(1), object(

In [1808]:
df_concat['beta_cash_app_score'] = pd.to_numeric(df_concat['beta_cash_app_score'], errors='coerce')

In [1809]:
# df_concat.to_csv(r"beta_cash_app_scoretrench2fpd0.csv")

In [1810]:
# gini_results = calculate_periodic_gini(df_concat, 'beta_cash_app_score', 'deffpd0', 'FPD0')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'beta_cash_app_score', 
    'deffpd0', 
    'FPD0',
    product_column='new_loan_type'
)

In [1811]:
f0 = gini_results.copy()
f0.head()

Unnamed: 0,start_date,end_date,beta_cash_app_score_FPD0_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-01-01,2024-01-07,,Week,beta_cash_app_score,1.1.0,FPD0,Overall
1,2024-01-01,2024-01-31,0.448521,Month,beta_cash_app_score,1.1.0,FPD0,Overall
2,2024-01-08,2024-01-14,0.59375,Week,beta_cash_app_score,1.1.0,FPD0,Overall
3,2024-01-15,2024-01-21,0.404762,Week,beta_cash_app_score,1.1.0,FPD0,Overall
4,2024-01-22,2024-01-28,0.769231,Week,beta_cash_app_score,1.1.0,FPD0,Overall


## FPD10

## Test

In [1812]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  coalesce(SAFE_CAST(JSON_VALUE(p.prediction_clean, "$.combined_score") AS Float64)) AS beta_cash_app_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory"))  trenchCategory
  from latest_request p
  left join model_run m on p.digitalLoanAccountId = m.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 2'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type
0,3128911,f9bc722f-61ef-4277-9c8d-f458f001f803,60831289110011,0.364072,Trench 2,2025-10-18 16:53:46,2025-10-18,2025-10,Test,0,1,Quick
1,3515863,e61f273d-b50f-431f-a28a-9fe0132e7ed9,60835158630017,0.513107,Trench 2,2025-10-11 16:02:18,2025-10-13,2025-10,Test,0,1,Quick
2,3527362,81a70f9b-33d3-43a7-9b18-c33753cb5824,60835273620022,0.39563,Trench 2,2025-10-08 11:36:47,2025-10-08,2025-10,Test,0,1,Quick
3,2723832,f34abf51-5e73-42e1-87d7-a3d87570b8b0,60827238320012,0.456899,Trench 2,2025-10-25 16:52:35,2025-10-25,2025-10,Test,0,1,Quick
4,1517673,bbbae45c-5960-4508-91eb-c145d2f1402e,60815176730017,0.396158,Trench 2,2025-10-13 03:16:37,2025-10-14,2025-10,Test,0,1,Quick


In [1813]:
df1 = dfd.copy()

## Train

In [1814]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction beta_cash_app_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 2'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type
0,2245925,e0a86e08-a36a-4058-8dd6-b0820fb6558e,60822459250014,0.456634,Trench 2,2024-01-11 18:26:16,2024-01-12,2024-01,Train,0,1,Quick
1,1904439,0b9a0b6c-9f29-4050-b3f0-e09726136f7b,60819044390016,0.422564,Trench 2,2024-01-26 20:38:24,2024-01-26,2024-01,Train,0,1,Quick
2,1097709,0172b3fc-8c5e-4d87-b2ba-e054f35fcc2f,60810977090012,0.370655,Trench 2,2024-01-22 10:48:02,2024-01-25,2024-01,Train,0,1,Flex
3,1334303,633480ab-01ea-4568-80e0-9af0d12aa1ad,60813343030011,0.413218,Trench 2,2024-01-28 13:38:00,2024-01-28,2024-01,Train,0,1,Quick
4,2031254,6a387c81-3b01-4c4d-b958-8817ac36eebd,60820312540015,0.552753,Trench 2,2024-01-14 13:38:07,2024-01-14,2024-01,Train,0,1,Quick


In [1815]:
df2 = dfd.copy()

In [1816]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8501 entries, 0 to 8500
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             8501 non-null   object        
 1   digitalLoanAccountId   8501 non-null   object        
 2   loanAccountNumber      8501 non-null   object        
 3   beta_cash_app_score    8501 non-null   float64       
 4   trenchCategory         8501 non-null   object        
 5   appln_submit_datetime  8501 non-null   datetime64[us]
 6   disbursementdate       8501 non-null   dbdate        
 7   Application_month      8501 non-null   object        
 8   Data_selection         8501 non-null   object        
 9   deffpd10               8501 non-null   Int64         
 10  flg_mature_fpd10       8501 non-null   Int64         
 11  new_loan_type          8501 non-null   object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), float64(1), object(

In [1817]:
df_concat['beta_cash_app_score'] = pd.to_numeric(df_concat['beta_cash_app_score'], errors='coerce')

In [1818]:
# df_concat.to_csv(r"beta_cash_app_scoretrench1fpd0.csv")

In [1819]:
# gini_results = calculate_periodic_gini(df_concat, 'beta_cash_app_score', 'deffpd10', 'FPD10')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'beta_cash_app_score', 
    'deffpd10', 
    'FPD10',
    product_column='new_loan_type'
)

In [1820]:
f1 = gini_results.copy()
f1.head()

Unnamed: 0,start_date,end_date,beta_cash_app_score_FPD10_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-01-01,2024-01-07,,Week,beta_cash_app_score,1.1.0,FPD10,Overall
1,2024-01-01,2024-01-31,0.429412,Month,beta_cash_app_score,1.1.0,FPD10,Overall
2,2024-01-08,2024-01-14,0.59375,Week,beta_cash_app_score,1.1.0,FPD10,Overall
3,2024-01-15,2024-01-21,0.4,Week,beta_cash_app_score,1.1.0,FPD10,Overall
4,2024-01-22,2024-01-28,,Week,beta_cash_app_score,1.1.0,FPD10,Overall


## FPD30

## Test

In [1821]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  coalesce(SAFE_CAST(JSON_VALUE(p.prediction_clean, "$.combined_score") AS Float64)) AS beta_cash_app_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory"))  trenchCategory
  from latest_request p
  left join model_run m on p.digitalLoanAccountId = m.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 2'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type
0,3292066,07bf6d35-430c-4dfc-b7d3-7177507a8e08,60832920660016,0.506265,Trench 2,2025-10-01 23:18:17,2025-10-02,2025-10,Test,0,1,Quick
1,3569649,53a8cef3-7e62-4726-850f-d616880df6e2,60835696490014,0.475635,Trench 2,2025-09-29 21:25:24,2025-09-30,2025-09,Test,1,1,Quick
2,3169977,cc4d2ac6-b418-4121-bcd7-7ba549863169,60831699770013,0.455404,Trench 2,2025-09-26 20:29:59,2025-09-26,2025-09,Test,0,1,Quick
3,3174284,006766c8-3f5f-4d30-8257-52f9bfc3a110,60831742840016,0.453937,Trench 2,2025-09-29 17:57:23,2025-09-30,2025-09,Test,1,1,Quick
4,2473116,70dedacb-577f-433e-bfbf-4d57973ff5ea,60824731160018,0.466156,Trench 2,2025-10-03 20:45:57,2025-10-03,2025-10,Test,0,1,Quick


In [1822]:
df1 = dfd.copy()

## Train

In [1823]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction beta_cash_app_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 2'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type
0,2132253,3e9e9aa5-8007-46ce-bd4e-8358b62d046f,60821322530014,0.409959,Trench 2,2024-01-30 21:08:23,2024-02-03,2024-01,Train,0,1,Quick
1,2062899,ca64263d-9d61-4ea5-b551-851f30b931e7,60820628990015,0.521132,Trench 2,2024-01-25 23:41:20,2024-01-26,2024-01,Train,0,1,Quick
2,1490321,17467efb-ee68-4a88-837d-567d1e129780,60814903210012,0.531591,Trench 2,2024-01-09 11:03:12,2024-01-09,2024-01,Train,0,1,Quick
3,1447030,07d21b9c-b91c-4ae1-bafa-00745a5890dc,60814470300017,0.474935,Trench 2,2024-01-26 22:58:32,2024-01-27,2024-01,Train,0,1,Quick
4,1763640,4e1dd5be-cfc4-4851-96a7-4f5c88cf3752,60817636400019,0.51999,Trench 2,2024-01-31 05:56:42,2024-02-01,2024-01,Train,0,1,Quick


In [1824]:
df2 = dfd.copy()

In [1825]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8266 entries, 0 to 8265
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             8266 non-null   object        
 1   digitalLoanAccountId   8266 non-null   object        
 2   loanAccountNumber      8266 non-null   object        
 3   beta_cash_app_score    8266 non-null   float64       
 4   trenchCategory         8266 non-null   object        
 5   appln_submit_datetime  8266 non-null   datetime64[us]
 6   disbursementdate       8266 non-null   dbdate        
 7   Application_month      8266 non-null   object        
 8   Data_selection         8266 non-null   object        
 9   deffpd30               8266 non-null   Int64         
 10  flg_mature_fpd30       8266 non-null   Int64         
 11  new_loan_type          8266 non-null   object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), float64(1), object(

In [1826]:
df_concat['beta_cash_app_score'] = pd.to_numeric(df_concat['beta_cash_app_score'], errors='coerce')

In [1827]:
# df_concat.to_csv(r"beta_cash_app_scoretrench1fpd30.csv")

In [1828]:
# gini_results = calculate_periodic_gini(df_concat, 'beta_cash_app_score', 'deffpd30', 'FPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'beta_cash_app_score', 
    'deffpd30', 
    'FPD30',
    product_column='new_loan_type'
)

In [1829]:
f2 = gini_results.copy()
f2.head()

Unnamed: 0,start_date,end_date,beta_cash_app_score_FPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-01-01,2024-01-07,,Week,beta_cash_app_score,1.1.0,FPD30,Overall
1,2024-01-01,2024-01-31,0.429412,Month,beta_cash_app_score,1.1.0,FPD30,Overall
2,2024-01-08,2024-01-14,0.59375,Week,beta_cash_app_score,1.1.0,FPD30,Overall
3,2024-01-15,2024-01-21,0.4,Week,beta_cash_app_score,1.1.0,FPD30,Overall
4,2024-01-22,2024-01-28,,Week,beta_cash_app_score,1.1.0,FPD30,Overall


## FSPD30

## Test

In [1830]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  coalesce(SAFE_CAST(JSON_VALUE(p.prediction_clean, "$.combined_score") AS Float64)) AS beta_cash_app_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory"))  trenchCategory
  from latest_request p
  left join model_run m on p.digitalLoanAccountId = m.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type


In [1831]:
df1 = dfd.copy()

## Train

In [1832]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction beta_cash_app_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type
0,2132253,3e9e9aa5-8007-46ce-bd4e-8358b62d046f,60821322530014,0.409959,Trench 2,2024-01-30 21:08:23,2024-02-03,2024-01,Train,0,1,Quick
1,2062899,ca64263d-9d61-4ea5-b551-851f30b931e7,60820628990015,0.521132,Trench 2,2024-01-25 23:41:20,2024-01-26,2024-01,Train,0,1,Quick
2,2272742,3a6299c6-9945-48f1-9c45-9d9cfa3c41f5,60822727420012,0.449135,Trench 2,2024-01-28 18:40:42,2024-01-30,2024-01,Train,0,1,Quick
3,1719998,de75f9ac-4ef6-4170-88c9-a4934e6d164d,60817199980015,0.653952,Trench 2,2024-01-11 11:22:34,2024-01-11,2024-01,Train,1,1,Quick
4,2196083,11080669-32a0-434f-980a-72911d331e43,60821960830011,0.564028,Trench 2,2024-01-17 07:50:51,2024-01-23,2024-01,Train,0,1,Quick


In [1833]:
df2 = dfd.copy()

In [1834]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7992 entries, 0 to 7991
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             7992 non-null   Int64         
 1   digitalLoanAccountId   7992 non-null   object        
 2   loanAccountNumber      7992 non-null   object        
 3   beta_cash_app_score    7992 non-null   float64       
 4   trenchCategory         7992 non-null   object        
 5   appln_submit_datetime  7992 non-null   datetime64[us]
 6   disbursementdate       7992 non-null   dbdate        
 7   Application_month      7992 non-null   object        
 8   Data_selection         7992 non-null   object        
 9   deffspd30              7992 non-null   Int64         
 10  flg_mature_fspd_30     7992 non-null   Int64         
 11  new_loan_type          7992 non-null   object        
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(

  df_concat = pd.concat([df2, df1], ignore_index=True)


In [1835]:
df_concat['beta_cash_app_score'] = pd.to_numeric(df_concat['beta_cash_app_score'], errors='coerce')

In [1836]:
# df_concat.to_csv(r"beta_cash_app_scoretrench1fspd30.csv")

In [1837]:
# gini_results = calculate_periodic_gini(df_concat, 'beta_cash_app_score', 'deffspd30', 'FSPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'beta_cash_app_score', 
    'deffspd30', 
    'FSPD30',
    product_column='new_loan_type'
)

In [1838]:
f3 = gini_results.copy()
f3.head()

Unnamed: 0,start_date,end_date,beta_cash_app_score_FSPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-01-01,2024-01-07,,Week,beta_cash_app_score,1.1.0,FSPD30,Overall
1,2024-01-01,2024-01-31,0.371429,Month,beta_cash_app_score,1.1.0,FSPD30,Overall
2,2024-01-08,2024-01-14,0.261905,Week,beta_cash_app_score,1.1.0,FSPD30,Overall
3,2024-01-15,2024-01-21,0.52381,Week,beta_cash_app_score,1.1.0,FSPD30,Overall
4,2024-01-22,2024-01-28,0.230769,Week,beta_cash_app_score,1.1.0,FSPD30,Overall


## FSTPD30

## Test

In [1839]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  coalesce(SAFE_CAST(JSON_VALUE(p.prediction_clean, "$.combined_score") AS Float64)) AS beta_cash_app_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory"))  trenchCategory
  from latest_request p
  left join model_run m on p.digitalLoanAccountId = m.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type


In [1840]:
df1 = dfd.copy()

## Train

In [1841]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction beta_cash_app_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type
0,2260033,89f9e0f9-e063-4ce5-840a-aacd691b4716,60822600330017,0.329634,Trench 2,2024-01-27 17:21:51,2024-01-29,2024-01,Train,0,1,Quick
1,2322078,36722b8d-a4f3-49b2-8615-e0fdf272ec90,60823220780014,0.48121,Trench 2,2024-01-03 07:56:56,2024-01-17,2024-01,Train,1,1,Quick
2,2132253,3e9e9aa5-8007-46ce-bd4e-8358b62d046f,60821322530014,0.409959,Trench 2,2024-01-30 21:08:23,2024-02-03,2024-01,Train,0,1,Quick
3,2062899,ca64263d-9d61-4ea5-b551-851f30b931e7,60820628990015,0.521132,Trench 2,2024-01-25 23:41:20,2024-01-26,2024-01,Train,0,1,Quick
4,1715387,b5f03db3-d6e7-4baf-b19c-bf2632c27435,60817153870026,0.517559,Trench 2,2024-01-18 15:52:26,2024-01-18,2024-01,Train,1,1,Quick


In [1842]:
df2 = dfd.copy()

In [1843]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7679 entries, 0 to 7678
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             7679 non-null   Int64         
 1   digitalLoanAccountId   7679 non-null   object        
 2   loanAccountNumber      7679 non-null   object        
 3   beta_cash_app_score    7679 non-null   float64       
 4   trenchCategory         7679 non-null   object        
 5   appln_submit_datetime  7679 non-null   datetime64[us]
 6   disbursementdate       7679 non-null   dbdate        
 7   Application_month      7679 non-null   object        
 8   Data_selection         7679 non-null   object        
 9   deffstpd30             7679 non-null   Int64         
 10  flg_mature_fstpd_30    7679 non-null   Int64         
 11  new_loan_type          7679 non-null   object        
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(

  df_concat = pd.concat([df2, df1], ignore_index=True)


In [1844]:
df_concat['beta_cash_app_score'] = pd.to_numeric(df_concat['beta_cash_app_score'], errors='coerce')

In [1845]:
# df_concat.to_csv(r"beta_cash_app_scoretrench1fstpd30.csv")

In [1846]:
# gini_results = calculate_periodic_gini(df_concat, 'beta_cash_app_score', 'deffstpd30', 'FSTPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'beta_cash_app_score', 
    'deffstpd30', 
    'FSTPD30',
    product_column='new_loan_type'
)

In [1847]:
f4 = gini_results.copy()
f4.head()

Unnamed: 0,start_date,end_date,beta_cash_app_score_FSTPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2024-01-01,2024-01-07,,Week,beta_cash_app_score,1.1.0,FSTPD30,Overall
1,2024-01-01,2024-01-31,0.283333,Month,beta_cash_app_score,1.1.0,FSTPD30,Overall
2,2024-01-08,2024-01-14,0.274725,Week,beta_cash_app_score,1.1.0,FSTPD30,Overall
3,2024-01-15,2024-01-21,0.340909,Week,beta_cash_app_score,1.1.0,FSTPD30,Overall
4,2024-01-22,2024-01-28,0.04,Week,beta_cash_app_score,1.1.0,FSTPD30,Overall


## combining the dataframe

In [1848]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'loan_type', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'beta_cash_app_score_FPD0_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'beta_cash_app_score_FPD10_gini', 'beta_cash_app_score_FPD30_gini',
       'beta_cash_app_score_FSPD30_gini',
       'beta_cash_app_score_FSTPD30_gini'], dtype=object)

In [1849]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version','loan_type', 'bad_rate','beta_cash_app_score_FPD0_gini'
                     ,'beta_cash_app_score_FPD10_gini', 'beta_cash_app_score_FPD30_gini',
       'beta_cash_app_score_FSPD30_gini',
       'beta_cash_app_score_FSTPD30_gini']].copy()
final_df.rename(columns={'beta_cash_app_score_FPD0_gini':'beta_cash_app_score_FPD0_t2_gini'
                         , 'beta_cash_app_score_FPD10_gini':'beta_cash_app_score_FPD10_t2_gini'
                         , 'beta_cash_app_score_FPD30_gini':'beta_cash_app_score_FPD30_t2_gini'
                         , 'beta_cash_app_score_FSPD30_gini':'beta_cash_app_score_FSPD30_t2_gini'
                        , 'beta_cash_app_score_FSTPD30_gini':'beta_cash_app_score_FSTPD30_t2_gini', 
                        }, inplace = True)
final_df['Trench_category'] = 'Trench 2'
final_df['Model_display_name'] = 'apps_score_cash'
final_df['Product_type'] = 'CASH'
final_df.dtypes

start_date                             datetime64[ns]
end_date                               datetime64[ns]
period                                         object
Model_Name                                     object
version                                        object
loan_type                                      object
bad_rate                                       object
beta_cash_app_score_FPD0_t2_gini              float64
beta_cash_app_score_FPD10_t2_gini             float64
beta_cash_app_score_FPD30_t2_gini             float64
beta_cash_app_score_FSPD30_t2_gini            float64
beta_cash_app_score_FSTPD30_t2_gini           float64
Trench_category                                object
Model_display_name                             object
Product_type                                   object
dtype: object

In [1850]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,loan_type,bad_rate,beta_cash_app_score_FPD0_t2_gini,beta_cash_app_score_FPD10_t2_gini,beta_cash_app_score_FPD30_t2_gini,beta_cash_app_score_FSPD30_t2_gini,beta_cash_app_score_FSTPD30_t2_gini,Trench_category,Model_display_name,Product_type
0,2024-01-01,2024-01-07,Week,beta_cash_app_score,1.1.0,Overall,FPD0,,,,,,Trench 2,apps_score_cash,CASH
1,2024-01-01,2024-01-31,Month,beta_cash_app_score,1.1.0,Overall,FPD0,0.448521,,,,,Trench 2,apps_score_cash,CASH
2,2024-01-08,2024-01-14,Week,beta_cash_app_score,1.1.0,Overall,FPD0,0.59375,,,,,Trench 2,apps_score_cash,CASH
3,2024-01-15,2024-01-21,Week,beta_cash_app_score,1.1.0,Overall,FPD0,0.404762,,,,,Trench 2,apps_score_cash,CASH
4,2024-01-22,2024-01-28,Week,beta_cash_app_score,1.1.0,Overall,FPD0,0.769231,,,,,Trench 2,apps_score_cash,CASH


In [1851]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.app_score_cash_t2_v1_gini5"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=02189939-a8fe-4d6a-be12-a9bf67098e73>

## Trench 3

## FPD0

## Test

In [1852]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  coalesce(SAFE_CAST(JSON_VALUE(p.prediction_clean, "$.combined_score") AS Float64)) AS beta_cash_app_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory"))  trenchCategory
  from latest_request p
  left join model_run m on p.digitalLoanAccountId = m.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
    loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 3'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type
0,3292833,d514eded-344f-4685-8f74-7c0db8a0b11b,60832928330029,0.460579,Trench 3,2025-10-28 13:03:54,2025-10-28,2025-10,Test,0,1,Quick
1,2743518,6495e5e6-f43d-431a-bd4d-c69fcb7316f1,60827435180023,0.500459,Trench 3,2025-10-24 20:19:46,2025-10-24,2025-10,Test,0,1,Quick
2,2005787,a4c02487-62fa-49e3-8c50-3812dc948b3d,60820057870036,0.53643,Trench 3,2025-10-19 17:42:29,2025-10-20,2025-10,Test,0,1,Quick
3,2311054,f70db2ce-13de-48e0-8ea3-0ef3279fd38e,60823110540033,0.515071,Trench 3,2025-10-09 10:48:02,2025-10-09,2025-10,Test,0,1,Quick
4,3401581,0a74f7be-2376-4c51-ba6d-f29b1ed54b95,60834015810021,0.473266,Trench 3,2025-10-16 11:42:29,2025-10-16,2025-10,Test,0,1,Quick


In [1853]:
dfd['deffpd0'].value_counts()

deffpd0
0    296
1     61
Name: count, dtype: Int64

In [1854]:
df1 = dfd.copy()

## Train

In [1855]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction beta_cash_app_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
    loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and (lower(loanmaster.new_loan_type) not like '%sil%' and lower(loanmaster.new_loan_type) not like '%big%')
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 3'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd0,flg_mature_fpd0,new_loan_type
0,1510740,d50ef042-dfba-408f-b960-aa6db9b0197e,60815107400021,0.474985,Trench 3,2023-03-11 16:09:47,2023-03-11,2023-03,Train,0,1,Flex-up
1,1556018,377b77e4-d235-4879-8b50-2f9c3035e8d0,60815560180026,0.49372,Trench 3,2023-03-14 21:40:58,2023-03-16,2023-03,Train,0,1,Quick
2,1742440,91dc6922-16ba-4809-baff-d4d6d93a316d,60817424400027,0.493704,Trench 3,2023-01-07 18:32:44,2023-01-26,2023-01,Train,0,1,Flex
3,1883971,f7c5c8ed-07d0-429d-a025-dd8497fe770c,60818839710029,0.478896,Trench 3,2023-03-03 12:22:35,2023-03-03,2023-03,Train,0,1,Quick
4,1321234,0926a34f-3540-4216-aa3b-9f55ef0a5feb,60813212340025,0.446036,Trench 3,2023-03-21 18:48:09,2023-03-26,2023-03,Train,0,1,Flex


In [1856]:
dfd['new_loan_type'].value_counts()

new_loan_type
Flex-up    24282
Quick       5547
Flex         205
Name: count, dtype: int64

In [1857]:
df2 = dfd.copy()

In [1858]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30391 entries, 0 to 30390
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             30391 non-null  object        
 1   digitalLoanAccountId   30391 non-null  object        
 2   loanAccountNumber      30391 non-null  object        
 3   beta_cash_app_score    30391 non-null  float64       
 4   trenchCategory         30391 non-null  object        
 5   appln_submit_datetime  30391 non-null  datetime64[us]
 6   disbursementdate       30391 non-null  dbdate        
 7   Application_month      30391 non-null  object        
 8   Data_selection         30391 non-null  object        
 9   deffpd0                30391 non-null  Int64         
 10  flg_mature_fpd0        30391 non-null  Int64         
 11  new_loan_type          30391 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), float64(1), objec

In [1859]:
df_concat['beta_cash_app_score'] = pd.to_numeric(df_concat['beta_cash_app_score'], errors='coerce')

In [1860]:
# df_concat.to_csv(r"beta_cash_app_scoretrench2fpd0.csv")

In [1861]:
# gini_results = calculate_periodic_gini(df_concat, 'beta_cash_app_score', 'deffpd0', 'FPD0')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'beta_cash_app_score', 
    'deffpd0', 
    'FPD0',
    product_column='new_loan_type'
)

In [1862]:
f0 = gini_results.copy()
f0.head().sort_values('start_date', ascending=False)

Unnamed: 0,start_date,end_date,beta_cash_app_score_FPD0_gini,period,Model_Name,version,bad_rate,loan_type
4,2023-01-23,2023-01-29,0.222222,Week,beta_cash_app_score,1.1.0,FPD0,Overall
3,2023-01-16,2023-01-22,0.121212,Week,beta_cash_app_score,1.1.0,FPD0,Overall
2,2023-01-09,2023-01-15,0.0,Week,beta_cash_app_score,1.1.0,FPD0,Overall
1,2023-01-02,2023-01-08,0.05,Week,beta_cash_app_score,1.1.0,FPD0,Overall
0,2023-01-01,2023-01-31,0.085346,Month,beta_cash_app_score,1.1.0,FPD0,Overall


## FPD10

## Test

In [1863]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  coalesce(SAFE_CAST(JSON_VALUE(p.prediction_clean, "$.combined_score") AS Float64)) AS beta_cash_app_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory"))  trenchCategory
  from latest_request p
  left join model_run m on p.digitalLoanAccountId = m.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 3'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type
0,1886335,883ef202-4aa4-41d7-99ff-fd6be2ef69db,60818863350053,0.530778,Trench 3,2025-09-28 18:26:57,2025-09-28,2025-09,Test,0,1,Quick
1,2784096,9361a69b-9ccd-49dc-ba56-1007c7ac21f3,60827840960051,0.535099,Trench 3,2025-10-18 15:15:50,2025-10-19,2025-10,Test,0,1,Quick
2,2863259,78fce405-fd35-4832-ab50-eb15cbf54b89,60828632590029,0.515185,Trench 3,2025-10-02 10:17:03,2025-10-02,2025-10,Test,0,1,Quick
3,1479140,fa1679ba-d55a-46fe-a32f-921b658adeae,60814791400031,0.471591,Trench 3,2025-10-09 17:39:24,2025-10-09,2025-10,Test,0,1,Quick
4,3013833,56a168f7-10d4-4e39-b4cc-84cd936fd98c,60830138330021,0.500529,Trench 3,2025-09-28 18:37:35,2025-09-28,2025-09,Test,0,1,Quick


In [1864]:
df1 = dfd.copy()

## Train

In [1865]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction beta_cash_app_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and (lower(loanmaster.new_loan_type) not like '%sil%' and lower(loanmaster.new_loan_type) not like '%big%')
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 3'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd10,flg_mature_fpd10,new_loan_type
0,1321234,0926a34f-3540-4216-aa3b-9f55ef0a5feb,60813212340025,0.446036,Trench 3,2023-03-21 18:48:09,2023-03-26,2023-03,Train,0,1,Flex
1,1419074,5ad907e9-5038-4a16-9e94-a33080ce35ea,60814190740021,0.504198,Trench 3,2023-01-31 00:36:06,2023-01-31,2023-01,Train,0,1,Quick
2,1465620,9f080f7e-2247-48f7-990d-fbee7da968a4,60814656200023,0.431507,Trench 3,2023-01-23 14:00:28,2023-01-23,2023-01,Train,0,1,Flex-up
3,1880465,67c19277-0b91-48a3-944d-c302a187f7e8,60818804650024,0.428775,Trench 3,2023-02-13 18:38:13,2023-02-14,2023-02,Train,0,1,Flex
4,1589704,493a51b0-63d3-444a-89a9-de0b2997b1d1,60815897040028,0.478667,Trench 3,2023-03-22 20:24:07,2023-03-22,2023-03,Train,0,1,Flex-up


In [1866]:
df2 = dfd.copy()

In [1867]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30241 entries, 0 to 30240
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             30241 non-null  object        
 1   digitalLoanAccountId   30241 non-null  object        
 2   loanAccountNumber      30241 non-null  object        
 3   beta_cash_app_score    30241 non-null  float64       
 4   trenchCategory         30241 non-null  object        
 5   appln_submit_datetime  30241 non-null  datetime64[us]
 6   disbursementdate       30241 non-null  dbdate        
 7   Application_month      30241 non-null  object        
 8   Data_selection         30241 non-null  object        
 9   deffpd10               30241 non-null  Int64         
 10  flg_mature_fpd10       30241 non-null  Int64         
 11  new_loan_type          30241 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), float64(1), objec

In [1868]:
df_concat['beta_cash_app_score'] = pd.to_numeric(df_concat['beta_cash_app_score'], errors='coerce')

In [1869]:
# df_concat.to_csv(r"beta_cash_app_scoretrench1fpd0.csv")

In [1870]:
# gini_results = calculate_periodic_gini(df_concat, 'beta_cash_app_score', 'deffpd10', 'FPD10')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'beta_cash_app_score', 
    'deffpd10', 
    'FPD10',
    product_column='new_loan_type'
)

In [1871]:
f1 = gini_results.copy()
f1.head()

Unnamed: 0,start_date,end_date,beta_cash_app_score_FPD10_gini,period,Model_Name,version,bad_rate,loan_type
0,2023-01-01,2023-01-31,-0.212418,Month,beta_cash_app_score,1.1.0,FPD10,Overall
1,2023-01-02,2023-01-08,0.083333,Week,beta_cash_app_score,1.1.0,FPD10,Overall
2,2023-01-09,2023-01-15,-0.444444,Week,beta_cash_app_score,1.1.0,FPD10,Overall
3,2023-01-16,2023-01-22,0.266667,Week,beta_cash_app_score,1.1.0,FPD10,Overall
4,2023-01-23,2023-01-29,-0.575758,Week,beta_cash_app_score,1.1.0,FPD10,Overall


## FPD30

## Test

In [1872]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  coalesce(SAFE_CAST(JSON_VALUE(p.prediction_clean, "$.combined_score") AS Float64)) AS beta_cash_app_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory"))  trenchCategory
  from latest_request p
  left join model_run m on p.digitalLoanAccountId = m.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and (lower(loanmaster.new_loan_type) not like '%sil%' and lower(loanmaster.new_loan_type) not like '%big%')
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 3'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type
0,2673021,03a03e48-c03a-40b4-a80b-5fb4f425b61a,60826730210022,0.505751,Trench 3,2025-09-30 15:27:13,2025-09-30,2025-09,Test,0,1,Quick
1,3201341,35066458-9a4d-4d64-855c-b21ebf440fe8,60832013410025,0.561418,Trench 3,2025-09-30 16:54:39,2025-09-30,2025-09,Test,0,1,Quick
2,1886335,883ef202-4aa4-41d7-99ff-fd6be2ef69db,60818863350053,0.530778,Trench 3,2025-09-28 18:26:57,2025-09-28,2025-09,Test,0,1,Quick
3,3307096,e8d69ec8-866a-47c8-a944-067cb83ac800,60833070960021,0.448546,Trench 3,2025-09-28 17:18:59,2025-09-29,2025-09,Test,0,1,Quick
4,2370824,d59b58cb-5093-4cac-95ba-90397c2bb670,60823708240045,0.471498,Trench 3,2025-09-28 15:24:26,2025-09-28,2025-09,Test,0,1,Quick


In [1873]:
df1 = dfd.copy()

## Train

In [1874]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction beta_cash_app_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and (lower(loanmaster.new_loan_type) not like '%sil%' and lower(loanmaster.new_loan_type) not like '%big%')
  and r.beta_cash_app_score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 3'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffpd30,flg_mature_fpd30,new_loan_type
0,1850912,c820abdd-1520-48a7-9346-d32517e0816b,60818509120021,0.508452,Trench 3,2023-01-21 07:07:51,2023-01-21,2023-01,Train,0,1,Flex
1,1433945,96a778a2-0ba0-46e3-8a8c-33dc4675ae30,60814339450044,0.48785,Trench 3,2023-02-24 11:43:28,2023-02-24,2023-02,Train,0,1,Flex
2,1432477,c028397e-a8fe-40a3-9ef9-94fab6a5f83f,60814324770027,0.476592,Trench 3,2023-03-26 11:10:39,2023-03-26,2023-03,Train,0,1,Flex-up
3,1475618,2831a630-6548-4663-b2a3-f41eb6c6fc51,60814756180024,0.502481,Trench 3,2023-01-04 18:15:29,2023-01-04,2023-01,Train,0,1,Quick
4,1290862,e220cb4b-f28a-4cee-ab4e-230c7127ad2a,60812908620022,0.516493,Trench 3,2023-03-26 06:07:34,2023-03-28,2023-03,Train,0,1,Quick


In [1875]:
df2 = dfd.copy()

In [1876]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29688 entries, 0 to 29687
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             29688 non-null  object        
 1   digitalLoanAccountId   29688 non-null  object        
 2   loanAccountNumber      29688 non-null  object        
 3   beta_cash_app_score    29688 non-null  float64       
 4   trenchCategory         29688 non-null  object        
 5   appln_submit_datetime  29688 non-null  datetime64[us]
 6   disbursementdate       29688 non-null  dbdate        
 7   Application_month      29688 non-null  object        
 8   Data_selection         29688 non-null  object        
 9   deffpd30               29688 non-null  Int64         
 10  flg_mature_fpd30       29688 non-null  Int64         
 11  new_loan_type          29688 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), float64(1), objec

In [1877]:
df_concat['beta_cash_app_score'] = pd.to_numeric(df_concat['beta_cash_app_score'], errors='coerce')

In [1878]:
# df_concat.to_csv(r"beta_cash_app_scoretrench1fpd30.csv")

In [1879]:
# gini_results = calculate_periodic_gini(df_concat, 'beta_cash_app_score', 'deffpd30', 'FPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'beta_cash_app_score', 
    'deffpd30', 
    'FPD30',
    product_column='new_loan_type'
)

In [1880]:
f2 = gini_results.copy()
f2.head()

Unnamed: 0,start_date,end_date,beta_cash_app_score_FPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2023-01-01,2023-01-31,-0.25,Month,beta_cash_app_score,1.1.0,FPD30,Overall
1,2023-01-02,2023-01-08,0.083333,Week,beta_cash_app_score,1.1.0,FPD30,Overall
2,2023-01-09,2023-01-15,-0.444444,Week,beta_cash_app_score,1.1.0,FPD30,Overall
3,2023-01-16,2023-01-22,0.125,Week,beta_cash_app_score,1.1.0,FPD30,Overall
4,2023-01-23,2023-01-29,-0.575758,Week,beta_cash_app_score,1.1.0,FPD30,Overall


## FSPD30

## Test

In [1881]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  coalesce(SAFE_CAST(JSON_VALUE(p.prediction_clean, "$.combined_score") AS Float64)) AS beta_cash_app_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory"))  trenchCategory
  from latest_request p
  left join model_run m on p.digitalLoanAccountId = m.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and (lower(loanmaster.new_loan_type) not like '%sil%' and lower(loanmaster.new_loan_type) not like '%big%')
  and r.beta_cash_app_score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type


In [1882]:
df1 = dfd.copy()

## Train

In [1883]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction beta_cash_app_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and (lower(loanmaster.new_loan_type) not like '%sil%' and lower(loanmaster.new_loan_type) not like '%big%')
  and r.beta_cash_app_score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffspd30,flg_mature_fspd_30,new_loan_type
0,1684126,a0dac849-b96a-4fdf-bc25-51b46e66787f,60816841260022,0.481489,Trench 3,2023-03-03 19:57:46,2023-03-03,2023-03,Train,0,1,Flex
1,1458635,9cb4e619-8d70-4d40-9d36-27d58a148df5,60814586350027,0.493451,Trench 3,2023-03-23 17:59:32,2023-03-24,2023-03,Train,0,1,Quick
2,1510740,d50ef042-dfba-408f-b960-aa6db9b0197e,60815107400021,0.474985,Trench 3,2023-03-11 16:09:47,2023-03-11,2023-03,Train,0,1,Flex-up
3,1556018,377b77e4-d235-4879-8b50-2f9c3035e8d0,60815560180026,0.49372,Trench 3,2023-03-14 21:40:58,2023-03-16,2023-03,Train,0,1,Quick
4,1363991,8702e36e-e905-49a0-b762-47f86692f7f2,60813639910023,0.465292,Trench 3,2023-02-27 21:35:15,2023-02-28,2023-02,Train,1,1,Quick


In [1884]:
df2 = dfd.copy()

In [1885]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27257 entries, 0 to 27256
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             27257 non-null  Int64         
 1   digitalLoanAccountId   27257 non-null  object        
 2   loanAccountNumber      27257 non-null  object        
 3   beta_cash_app_score    27257 non-null  float64       
 4   trenchCategory         27257 non-null  object        
 5   appln_submit_datetime  27257 non-null  datetime64[us]
 6   disbursementdate       27257 non-null  dbdate        
 7   Application_month      27257 non-null  object        
 8   Data_selection         27257 non-null  object        
 9   deffspd30              27257 non-null  Int64         
 10  flg_mature_fspd_30     27257 non-null  Int64         
 11  new_loan_type          27257 non-null  object        
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), objec

  df_concat = pd.concat([df2, df1], ignore_index=True)


In [1886]:
df_concat['beta_cash_app_score'] = pd.to_numeric(df_concat['beta_cash_app_score'], errors='coerce')

In [1887]:
# df_concat.to_csv(r"beta_cash_app_scoretrench1fspd30.csv")

In [1888]:
# gini_results = calculate_periodic_gini(df_concat, 'beta_cash_app_score', 'deffspd30', 'FSPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'beta_cash_app_score', 
    'deffspd30', 
    'FSPD30',
    product_column='new_loan_type'
)

In [1889]:
f3 = gini_results.copy()
f3.head()

Unnamed: 0,start_date,end_date,beta_cash_app_score_FSPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2023-01-01,2023-01-31,-0.045082,Month,beta_cash_app_score,1.1.0,FSPD30,Overall
1,2023-01-02,2023-01-08,0.272727,Week,beta_cash_app_score,1.1.0,FSPD30,Overall
2,2023-01-09,2023-01-15,0.083333,Week,beta_cash_app_score,1.1.0,FSPD30,Overall
3,2023-01-16,2023-01-22,0.153846,Week,beta_cash_app_score,1.1.0,FSPD30,Overall
4,2023-01-23,2023-01-29,-0.18,Week,beta_cash_app_score,1.1.0,FSPD30,Overall


## FSTPD30

## Test

In [1890]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  coalesce(SAFE_CAST(JSON_VALUE(p.prediction_clean, "$.combined_score") AS Float64)) AS beta_cash_app_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory"))  trenchCategory
  from latest_request p
  left join model_run m on p.digitalLoanAccountId = m.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and (lower(loanmaster.new_loan_type) not like '%sil%' and lower(loanmaster.new_loan_type) not like '%big%')
  and r.beta_cash_app_score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type


In [1891]:
df1 = dfd.copy()

## Train

In [1892]:
sq = """WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-AppScore-Model', 'apps_score_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction beta_cash_app_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.beta_cash_app_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and (lower(loanmaster.new_loan_type) not like '%sil%' and lower(loanmaster.new_loan_type) not like '%big%')
  and r.beta_cash_app_score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

Unnamed: 0,customerId,digitalLoanAccountId,loanAccountNumber,beta_cash_app_score,trenchCategory,appln_submit_datetime,disbursementdate,Application_month,Data_selection,deffstpd30,flg_mature_fstpd_30,new_loan_type
0,1321234,0926a34f-3540-4216-aa3b-9f55ef0a5feb,60813212340025,0.446036,Trench 3,2023-03-21 18:48:09,2023-03-26,2023-03,Train,0,1,Flex
1,1419074,5ad907e9-5038-4a16-9e94-a33080ce35ea,60814190740021,0.504198,Trench 3,2023-01-31 00:36:06,2023-01-31,2023-01,Train,1,1,Quick
2,1101445,198f8407-0084-4c59-bdac-52fc2671e110,60811014450022,0.509409,Trench 3,2023-03-01 08:19:43,2023-03-01,2023-03,Train,0,1,Flex-up
3,1689305,bc237894-82fd-426e-a45d-8229d9e52f07,60816893050023,0.499124,Trench 3,2023-01-10 11:15:04,2023-01-10,2023-01,Train,0,1,Quick
4,1363908,d9dee1e3-62e0-43f5-a4f6-f74ebd968d19,60813639080022,0.469166,Trench 3,2023-03-18 15:49:17,2023-03-18,2023-03,Train,0,1,Flex


In [1893]:
df2 = dfd.copy()

In [1894]:
df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23901 entries, 0 to 23900
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             23901 non-null  Int64         
 1   digitalLoanAccountId   23901 non-null  object        
 2   loanAccountNumber      23901 non-null  object        
 3   beta_cash_app_score    23901 non-null  float64       
 4   trenchCategory         23901 non-null  object        
 5   appln_submit_datetime  23901 non-null  datetime64[us]
 6   disbursementdate       23901 non-null  dbdate        
 7   Application_month      23901 non-null  object        
 8   Data_selection         23901 non-null  object        
 9   deffstpd30             23901 non-null  Int64         
 10  flg_mature_fstpd_30    23901 non-null  Int64         
 11  new_loan_type          23901 non-null  object        
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), objec

  df_concat = pd.concat([df2, df1], ignore_index=True)


In [1895]:
df_concat['beta_cash_app_score'] = pd.to_numeric(df_concat['beta_cash_app_score'], errors='coerce')

In [1896]:
# df_concat.to_csv(r"beta_cash_app_scoretrench1fstpd30.csv")

In [1897]:
# gini_results = calculate_periodic_gini(df_concat, 'beta_cash_app_score', 'deffstpd30', 'FSTPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'beta_cash_app_score', 
    'deffstpd30', 
    'FSTPD30',
    product_column='new_loan_type'
)

In [1898]:
f4 = gini_results.copy()
f4.head()

Unnamed: 0,start_date,end_date,beta_cash_app_score_FSTPD30_gini,period,Model_Name,version,bad_rate,loan_type
0,2023-01-01,2023-01-31,0.074074,Month,beta_cash_app_score,1.1.0,FSTPD30,Overall
1,2023-01-02,2023-01-08,0.3,Week,beta_cash_app_score,1.1.0,FSTPD30,Overall
2,2023-01-09,2023-01-15,0.357143,Week,beta_cash_app_score,1.1.0,FSTPD30,Overall
3,2023-01-16,2023-01-22,0.151515,Week,beta_cash_app_score,1.1.0,FSTPD30,Overall
4,2023-01-23,2023-01-29,0.031746,Week,beta_cash_app_score,1.1.0,FSTPD30,Overall


## combining the dataframe

In [1899]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'beta_cash_app_score_FPD0_gini',
       'period', 'Model_Name', 'version', 'bad_rate', 'loan_type',
       'beta_cash_app_score_FPD10_gini', 'beta_cash_app_score_FPD30_gini',
       'beta_cash_app_score_FSPD30_gini',
       'beta_cash_app_score_FSTPD30_gini'], dtype=object)

In [1900]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'loan_type','bad_rate','beta_cash_app_score_FPD0_gini'
                     ,'beta_cash_app_score_FPD10_gini', 'beta_cash_app_score_FPD30_gini',
       'beta_cash_app_score_FSPD30_gini',
       'beta_cash_app_score_FSTPD30_gini']].copy()
final_df.rename(columns={'beta_cash_app_score_FPD0_gini':'beta_cash_app_score_FPD0_t3_gini'
                         , 'beta_cash_app_score_FPD10_gini':'beta_cash_app_score_FPD10_t3_gini'
                         , 'beta_cash_app_score_FPD30_gini':'beta_cash_app_score_FPD30_t3_gini'
                         , 'beta_cash_app_score_FSPD30_gini':'beta_cash_app_score_FSPD30_t3_gini'
                        , 'beta_cash_app_score_FSTPD30_gini':'beta_cash_app_score_FSTPD30_t3_gini', 
                        }, inplace = True)
final_df['Trench_category'] = 'Trench 3'
final_df['Model_display_name'] = 'apps_score_cash'
final_df['Product_type'] = 'CASH'
final_df.dtypes

start_date                             datetime64[ns]
end_date                               datetime64[ns]
period                                         object
Model_Name                                     object
version                                        object
loan_type                                      object
bad_rate                                       object
beta_cash_app_score_FPD0_t3_gini              float64
beta_cash_app_score_FPD10_t3_gini             float64
beta_cash_app_score_FPD30_t3_gini             float64
beta_cash_app_score_FSPD30_t3_gini            float64
beta_cash_app_score_FSTPD30_t3_gini           float64
Trench_category                                object
Model_display_name                             object
Product_type                                   object
dtype: object

In [1901]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,loan_type,bad_rate,beta_cash_app_score_FPD0_t3_gini,beta_cash_app_score_FPD10_t3_gini,beta_cash_app_score_FPD30_t3_gini,beta_cash_app_score_FSPD30_t3_gini,beta_cash_app_score_FSTPD30_t3_gini,Trench_category,Model_display_name,Product_type
0,2023-01-01,2023-01-31,Month,beta_cash_app_score,1.1.0,Overall,FPD0,0.085346,,,,,Trench 3,apps_score_cash,CASH
1,2023-01-02,2023-01-08,Week,beta_cash_app_score,1.1.0,Overall,FPD0,0.05,,,,,Trench 3,apps_score_cash,CASH
2,2023-01-09,2023-01-15,Week,beta_cash_app_score,1.1.0,Overall,FPD0,0.0,,,,,Trench 3,apps_score_cash,CASH
3,2023-01-16,2023-01-22,Week,beta_cash_app_score,1.1.0,Overall,FPD0,0.121212,,,,,Trench 3,apps_score_cash,CASH
4,2023-01-23,2023-01-29,Week,beta_cash_app_score,1.1.0,Overall,FPD0,0.222222,,,,,Trench 3,apps_score_cash,CASH


In [1902]:
# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.app_score_cash_t3_v1_gini5"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=4acd94ba-b83b-4f31-b65a-957b4ec1936d>

# Beta-Cash-Stack-Model

## Trench 1

## FPD0

In [1903]:
## Trench 1
## Test

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction AS Beta_cash_stack_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory,
  from latest_request p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df1 = dfd.copy()

## Train

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_cash_stack_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 1'
  ;
  """
  
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df2 = dfd.copy()

df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

df_concat['Beta_cash_stack_score'] = pd.to_numeric(df_concat['Beta_cash_stack_score'], errors='coerce')

# gini_results = calculate_periodic_gini(df_concat, 'Beta_cash_stack_score', 'deffpd0', 'FPD0')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'Beta_cash_stack_score', 
    'deffpd0', 
    'FPD0',
    product_column='new_loan_type'
)

f0 = gini_results.copy()
print(f0.head())



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18365 entries, 0 to 18364
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             18365 non-null  object        
 1   digitalLoanAccountId   18365 non-null  object        
 2   loanAccountNumber      18365 non-null  object        
 3   Beta_cash_stack_score  18365 non-null  object        
 4   trenchCategory         18365 non-null  object        
 5   appln_submit_datetime  18365 non-null  datetime64[us]
 6   disbursementdate       18365 non-null  dbdate        
 7   Application_month      18365 non-null  object        
 8   Data_selection         18365 non-null  object        
 9   deffpd0                18365 non-null  Int64         
 10  flg_mature_fpd0        18365 non-null  Int64         
 11  new_loan_type          18365 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(8)
memory 

## FPD10

In [1904]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction AS Beta_cash_stack_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory,
  from latest_request p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df1 = dfd.copy()

## Train

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_cash_stack_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 1'
  ;
  """
  
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df2 = dfd.copy()

df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

df_concat['Beta_cash_stack_score'] = pd.to_numeric(df_concat['Beta_cash_stack_score'], errors='coerce')

# gini_results = calculate_periodic_gini(df_concat, 'Beta_cash_stack_score', 'deffpd10', 'FPD10')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'Beta_cash_stack_score', 
    'deffpd10', 
    'FPD10',
    product_column='new_loan_type'
)

f1 = gini_results.copy()
print(f1.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16857 entries, 0 to 16856
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             16857 non-null  object        
 1   digitalLoanAccountId   16857 non-null  object        
 2   loanAccountNumber      16857 non-null  object        
 3   Beta_cash_stack_score  16857 non-null  object        
 4   trenchCategory         16857 non-null  object        
 5   appln_submit_datetime  16857 non-null  datetime64[us]
 6   disbursementdate       16857 non-null  dbdate        
 7   Application_month      16857 non-null  object        
 8   Data_selection         16857 non-null  object        
 9   deffpd10               16857 non-null  Int64         
 10  flg_mature_fpd10       16857 non-null  Int64         
 11  new_loan_type          16857 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(8)
memory 

## FPD30

In [1905]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction AS Beta_cash_stack_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory,
  from latest_request p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df1 = dfd.copy()

## Train

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_cash_stack_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
  """
  
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df2 = dfd.copy()

df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

df_concat['Beta_cash_stack_score'] = pd.to_numeric(df_concat['Beta_cash_stack_score'], errors='coerce')

# gini_results = calculate_periodic_gini(df_concat, 'Beta_cash_stack_score', 'deffpd30', 'FPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'Beta_cash_stack_score', 
    'deffpd30', 
    'FPD30',
    product_column='new_loan_type'
)

f2 = gini_results.copy()
print(f2.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15848 entries, 0 to 15847
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             15848 non-null  object        
 1   digitalLoanAccountId   15848 non-null  object        
 2   loanAccountNumber      15848 non-null  object        
 3   Beta_cash_stack_score  15848 non-null  object        
 4   trenchCategory         15848 non-null  object        
 5   appln_submit_datetime  15848 non-null  datetime64[us]
 6   disbursementdate       15848 non-null  dbdate        
 7   Application_month      15848 non-null  object        
 8   Data_selection         15848 non-null  object        
 9   deffpd30               15848 non-null  Int64         
 10  flg_mature_fpd30       15848 non-null  Int64         
 11  new_loan_type          15848 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(8)
memory 

## FSPD30

In [1906]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction AS Beta_cash_stack_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory,
  from latest_request p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df1 = dfd.copy()

## Train

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_cash_stack_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
  """
  
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df2 = dfd.copy()

df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

df_concat['Beta_cash_stack_score'] = pd.to_numeric(df_concat['Beta_cash_stack_score'], errors='coerce')

# gini_results = calculate_periodic_gini(df_concat, 'Beta_cash_stack_score', 'deffspd30', 'FSPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'Beta_cash_stack_score', 
    'deffspd30', 
    'FSPD30',
    product_column='new_loan_type'
)

f3 = gini_results.copy()
print(f3.head())

  df_concat = pd.concat([df2, df1], ignore_index=True)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14837 entries, 0 to 14836
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             14837 non-null  Int64         
 1   digitalLoanAccountId   14837 non-null  object        
 2   loanAccountNumber      14837 non-null  object        
 3   Beta_cash_stack_score  14837 non-null  float64       
 4   trenchCategory         14837 non-null  object        
 5   appln_submit_datetime  14837 non-null  datetime64[us]
 6   disbursementdate       14837 non-null  dbdate        
 7   Application_month      14837 non-null  object        
 8   Data_selection         14837 non-null  object        
 9   deffspd30              14837 non-null  Int64         
 10  flg_mature_fspd_30     14837 non-null  Int64         
 11  new_loan_type          14837 non-null  object        
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), objec

## FSTPD30

In [1907]:
sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction AS Beta_cash_stack_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory,
  from latest_request p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df1 = dfd.copy()

## Train

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_cash_stack_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 1'
  ;
  """
  
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df2 = dfd.copy()

df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

df_concat['Beta_cash_stack_score'] = pd.to_numeric(df_concat['Beta_cash_stack_score'], errors='coerce')

# gini_results = calculate_periodic_gini(df_concat, 'Beta_cash_stack_score', 'deffstpd30', 'FSTPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'Beta_cash_stack_score', 
    'deffstpd30', 
    'FSTPD30',
    product_column='new_loan_type'
)

f4 = gini_results.copy()
print(f4.head())

  df_concat = pd.concat([df2, df1], ignore_index=True)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13605 entries, 0 to 13604
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             13605 non-null  Int64         
 1   digitalLoanAccountId   13605 non-null  object        
 2   loanAccountNumber      13605 non-null  object        
 3   Beta_cash_stack_score  13605 non-null  float64       
 4   trenchCategory         13605 non-null  object        
 5   appln_submit_datetime  13605 non-null  datetime64[us]
 6   disbursementdate       13605 non-null  dbdate        
 7   Application_month      13605 non-null  object        
 8   Data_selection         13605 non-null  object        
 9   deffstpd30             13605 non-null  Int64         
 10  flg_mature_fstpd_30    13605 non-null  Int64         
 11  new_loan_type          13605 non-null  object        
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), objec

## combining the dataframe

In [1908]:
import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version','loan_type', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'loan_type', 'bad_rate','Beta_cash_stack_score_FPD0_gini',
                     'Beta_cash_stack_score_FPD10_gini',
       'Beta_cash_stack_score_FPD30_gini',
       'Beta_cash_stack_score_FSPD30_gini',
       'Beta_cash_stack_score_FSTPD30_gini']].copy()
final_df.rename(columns={'Beta_cash_stack_score_FPD0_gini':'Beta_cash_stack_score_FPD0_t1_gini'
                         , 'Beta_cash_stack_score_FPD10_gini':'Beta_cash_stack_score_FPD10_t1_gini'
                         , 'Beta_cash_stack_score_FPD30_gini':'Beta_cash_stack_score_FPD30_t1_gini'
                         , 'Beta_cash_stack_score_FSPD30_gini':'Beta_cash_stack_score_FSPD30_t1_gini'
                        , 'Beta_cash_stack_score_FSTPD30_gini':'Beta_cash_stack_score_FSTPD30_t1_gini', 
                        }, inplace = True)
final_df['Trench_category'] = 'Trench 1'
final_df['Model_display_name'] = 'beta_stack_model_cash'
final_df['Product_type'] = 'CASH'
final_df.dtypes

final_df.head()

# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.beta_stack_model_cash_t1_v1_gini_v5"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=9d1b7f2c-77f1-4f27-844f-f2e448abdb83>

## Trench 2

In [1909]:
## Trench 2
## Test

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction AS Beta_cash_stack_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory,
  from latest_request p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df1 = dfd.copy()

## Train

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_cash_stack_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 2'
  ;
  """
  
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df2 = dfd.copy()

df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

df_concat['Beta_cash_stack_score'] = pd.to_numeric(df_concat['Beta_cash_stack_score'], errors='coerce')

# gini_results = calculate_periodic_gini(df_concat, 'Beta_cash_stack_score', 'deffpd0', 'FPD0')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'Beta_cash_stack_score', 
    'deffpd0', 
    'FPD0',
    product_column='new_loan_type'
)   

f0 = gini_results.copy()
print(f0.head())

## FPD10

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction AS Beta_cash_stack_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory,
  from latest_request p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,  
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df1 = dfd.copy()

## Train

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_cash_stack_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 2'
  ;
  """
  
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df2 = dfd.copy()

df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

df_concat['Beta_cash_stack_score'] = pd.to_numeric(df_concat['Beta_cash_stack_score'], errors='coerce')

# gini_results = calculate_periodic_gini(df_concat, 'Beta_cash_stack_score', 'deffpd10', 'FPD10')
gini_results = calculate_periodic_gini_producttype(
    df_concat, 
    'Beta_cash_stack_score', 
    'deffpd10', 
    'FPD10',
    product_column='new_loan_type'
)

f1 = gini_results.copy()
print(f1.head())

## FPD30

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction AS Beta_cash_stack_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory,
  from latest_request p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df1 = dfd.copy()

## Train

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_cash_stack_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
  """
  
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df2 = dfd.copy()

df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

df_concat['Beta_cash_stack_score'] = pd.to_numeric(df_concat['Beta_cash_stack_score'], errors='coerce')

# gini_results = calculate_periodic_gini(df_concat, 'Beta_cash_stack_score', 'deffpd30', 'FPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat,
    'Beta_cash_stack_score',
    'deffpd30',
    'FPD30',
    product_column='new_loan_type'
)

f2 = gini_results.copy()
print(f2.head())

## FSPD30

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction AS Beta_cash_stack_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory,
  from latest_request p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df1 = dfd.copy()

## Train

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_cash_stack_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
  """
  
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df2 = dfd.copy()

df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

df_concat['Beta_cash_stack_score'] = pd.to_numeric(df_concat['Beta_cash_stack_score'], errors='coerce')

# gini_results = calculate_periodic_gini(df_concat, 'Beta_cash_stack_score', 'deffspd30', 'FSPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat,
    'Beta_cash_stack_score',
    'deffspd30',
    'FSPD30',
    product_column='new_loan_type'
)

f3 = gini_results.copy()
print(f3.head())

## FSTPD30

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction AS Beta_cash_stack_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory,
  from latest_request p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df1 = dfd.copy()

## Train

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_cash_stack_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 2'
  ;
  """
  
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df2 = dfd.copy()

df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

df_concat['Beta_cash_stack_score'] = pd.to_numeric(df_concat['Beta_cash_stack_score'], errors='coerce')

# gini_results = calculate_periodic_gini(df_concat, 'Beta_cash_stack_score', 'deffstpd30', 'FSTPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat,
    'Beta_cash_stack_score',
    'deffstpd30',
    'FSTPD30',
    product_column='new_loan_type'
)

f4 = gini_results.copy()
print(f4.head())

import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'loan_type', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'loan_type', 'bad_rate','Beta_cash_stack_score_FPD0_gini',
                     'Beta_cash_stack_score_FPD10_gini',
       'Beta_cash_stack_score_FPD30_gini',
       'Beta_cash_stack_score_FSPD30_gini',
       'Beta_cash_stack_score_FSTPD30_gini']].copy()
final_df.rename(columns={'Beta_cash_stack_score_FPD0_gini':'Beta_cash_stack_score_FPD0_t2_gini'
                         , 'Beta_cash_stack_score_FPD10_gini':'Beta_cash_stack_score_FPD10_t2_gini'
                         , 'Beta_cash_stack_score_FPD30_gini':'Beta_cash_stack_score_FPD30_t2_gini'
                         , 'Beta_cash_stack_score_FSPD30_gini':'Beta_cash_stack_score_FSPD30_t2_gini'
                        , 'Beta_cash_stack_score_FSTPD30_gini':'Beta_cash_stack_score_FSTPD30_t2_gini', 
                        }, inplace = True)
final_df['Trench_category'] = 'Trench 2'
final_df['Model_display_name'] = 'beta_stack_model_cash'
final_df['Product_type'] = 'CASH'
final_df.dtypes

final_df.head()

# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.beta_stack_model_cash_t2_v1_gini_v5"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12185 entries, 0 to 12184
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             12185 non-null  object        
 1   digitalLoanAccountId   12185 non-null  object        
 2   loanAccountNumber      12185 non-null  object        
 3   Beta_cash_stack_score  12185 non-null  object        
 4   trenchCategory         12185 non-null  object        
 5   appln_submit_datetime  12185 non-null  datetime64[us]
 6   disbursementdate       12185 non-null  dbdate        
 7   Application_month      12185 non-null  object        
 8   Data_selection         12185 non-null  object        
 9   deffpd0                12185 non-null  Int64         
 10  flg_mature_fpd0        12185 non-null  Int64         
 11  new_loan_type          12185 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(8)
memory 

  df_concat = pd.concat([df2, df1], ignore_index=True)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10720 entries, 0 to 10719
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             10720 non-null  Int64         
 1   digitalLoanAccountId   10720 non-null  object        
 2   loanAccountNumber      10720 non-null  object        
 3   Beta_cash_stack_score  10720 non-null  float64       
 4   trenchCategory         10720 non-null  object        
 5   appln_submit_datetime  10720 non-null  datetime64[us]
 6   disbursementdate       10720 non-null  dbdate        
 7   Application_month      10720 non-null  object        
 8   Data_selection         10720 non-null  object        
 9   deffspd30              10720 non-null  Int64         
 10  flg_mature_fspd_30     10720 non-null  Int64         
 11  new_loan_type          10720 non-null  object        
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), objec

  df_concat = pd.concat([df2, df1], ignore_index=True)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10020 entries, 0 to 10019
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             10020 non-null  Int64         
 1   digitalLoanAccountId   10020 non-null  object        
 2   loanAccountNumber      10020 non-null  object        
 3   Beta_cash_stack_score  10020 non-null  float64       
 4   trenchCategory         10020 non-null  object        
 5   appln_submit_datetime  10020 non-null  datetime64[us]
 6   disbursementdate       10020 non-null  dbdate        
 7   Application_month      10020 non-null  object        
 8   Data_selection         10020 non-null  object        
 9   deffstpd30             10020 non-null  Int64         
 10  flg_mature_fstpd_30    10020 non-null  Int64         
 11  new_loan_type          10020 non-null  object        
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), objec



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=b6da2630-365c-44ee-8be1-72d83a49ed48>

## Trench 3

In [1910]:
## Trench 3
## Test

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction AS Beta_cash_stack_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory,
  from latest_request p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df1 = dfd.copy()

## Train

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_cash_stack_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd0,
  del.flg_mature_fpd0,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd0 = 1
  and r.trenchCategory = 'Trench 3'
  ;
  """
  
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df2 = dfd.copy()

df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

df_concat['Beta_cash_stack_score'] = pd.to_numeric(df_concat['Beta_cash_stack_score'], errors='coerce')

# gini_results = calculate_periodic_gini(df_concat, 'Beta_cash_stack_score', 'deffpd0', 'FPD0')
gini_results = calculate_periodic_gini_producttype(
    df_concat,
    'Beta_cash_stack_score',
    'deffpd0',
    'FPD0',
    product_column='new_loan_type'
)

f0 = gini_results.copy()
print(f0.head())

## FPD10

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction AS Beta_cash_stack_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory,
  from latest_request p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df1 = dfd.copy()

## Train

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_cash_stack_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd10,
  del.flg_mature_fpd10,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd10 = 1
  and r.trenchCategory = 'Trench 3'
  ;
  """
  
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df2 = dfd.copy()

df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

df_concat['Beta_cash_stack_score'] = pd.to_numeric(df_concat['Beta_cash_stack_score'], errors='coerce')

# gini_results = calculate_periodic_gini(df_concat, 'Beta_cash_stack_score', 'deffpd10', 'FPD10')
gini_results = calculate_periodic_gini_producttype(
    df_concat,
    'Beta_cash_stack_score',
    'deffpd10',
    'FPD10',
    product_column='new_loan_type'
)

f1 = gini_results.copy()
print(f1.head())

## FPD30

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction AS Beta_cash_stack_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory,
  from latest_request p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df1 = dfd.copy()

## Train

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_cash_stack_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffpd30,
  del.flg_mature_fpd30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fpd30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
  """
  
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df2 = dfd.copy()

df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

df_concat['Beta_cash_stack_score'] = pd.to_numeric(df_concat['Beta_cash_stack_score'], errors='coerce')

# gini_results = calculate_periodic_gini(df_concat, 'Beta_cash_stack_score', 'deffpd30', 'FPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat,
    'Beta_cash_stack_score',
    'deffpd30',
    'FPD30',
    product_column='new_loan_type'
)

f2 = gini_results.copy()
print(f2.head())

## FSPD30

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction AS Beta_cash_stack_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory,
  from latest_request p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
    loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df1 = dfd.copy()

## Train

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_cash_stack_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffspd30,
  del.flg_mature_fspd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fspd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
  """
  
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df2 = dfd.copy()

df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

df_concat['Beta_cash_stack_score'] = pd.to_numeric(df_concat['Beta_cash_stack_score'], errors='coerce')

# gini_results = calculate_periodic_gini(df_concat, 'Beta_cash_stack_score', 'deffspd30', 'FSPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat,
    'Beta_cash_stack_score',
    'deffspd30',
    'FSPD30',
    product_column='new_loan_type'
)

f3 = gini_results.copy()
print(f3.head())

## FSTPD30

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
REPLACE(REPLACE(prediction, "'", '"'), "None", "null") AS prediction_clean
FROM `prj-prod-dataplatform.audit_balance.ml_model_run_details`
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
and modelVersionId = 'v1'
),
latest_request as (
select * from parsed
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelDisplayName ORDER BY start_time DESC ) = 1),

model_run as (
select customerId,digitalLoanAccountId,modelName, publish_time,
REPLACE(REPLACE(requestPayload, "'", '"'), "None", "null") AS requestPayload_clean
from `prj-prod-dataplatform.audit_balance.ml_request_details` 
WHERE modelName = 'Beta-Cash-Model-response'
QUALIFY ROW_NUMBER() OVER (PARTITION BY customerId, digitalLoanAccountId,modelName ORDER BY publish_time DESC ) = 1)
, 
  modelname as (
  select p.customerId,
  p.digitalLoanAccountId,
  start_time,
  prediction AS Beta_cash_stack_score,
  coalesce(p.trenchCategory, JSON_VALUE(m.requestPayload_clean, "$.predictions.trenchCategory")) AS trenchCategory,
  from latest_request p
  left join model_run m on m.digitalLoanAccountId = p.digitalLoanAccountId
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Test' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
"""

dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df1 = dfd.copy()

## Train

sq = """
WITH parsed as (
select customerId, digitalLoanAccountId,modelDisplayName,modelVersionId,start_time,end_time,prediction,trenchCategory,
REPLACE(REPLACE(calcFeature, "'", '"'), "None", "null") AS calcFeatures,
FROM prj-prod-dataplatform.dap_ds_poweruser_playground.ml_training_model_run_details
where modelDisplayName in  ('Beta-Cash-Stack-Model', 'beta_stack_model_cash')
and modelVersionId = 'v1'
),
  modelname as (
  select customerId,
  digitalLoanAccountId,
  start_time,
  prediction Beta_cash_stack_score,
  trenchCategory
  from parsed
  )
  ,
deliquency as
(select loanAccountNumber,
case when obs_min_inst_def0 >= 1 and min_inst_def0 = 1 then 1 else 0 end deffpd0,
case when obs_min_inst_def10 >=1 and min_inst_def10 =1 then 1 else 0 end deffpd10,
case when obs_min_inst_def30 >=1 and min_inst_def30 =1 then 1 else 0 end deffpd30,
case when obs_min_inst_def30 >=2 and min_inst_def30 in (1,2) then 1 else 0 end deffspd30,
case when obs_min_inst_def30 >=3 and min_inst_def30 in (1,2,3) then 1 else 0 end deffstpd30,
case when obs_min_inst_def0 >= 1 then 1 else 0 end flg_mature_fpd0,
case when obs_min_inst_def10 >=1 then 1 else 0 end flg_mature_fpd10,
case when obs_min_inst_def30 >=1 then 1 else 0 end flg_mature_fpd30,
case when obs_min_inst_def30 >=2 then 1 else 0 end flg_mature_fspd_30,
case when obs_min_inst_def30 >=3 then 1 else 0 end flg_mature_fstpd_30
from prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data)
  select r.customerId,
  r.digitalLoanAccountId,
  loanmaster.loanAccountNumber,
  r.Beta_cash_stack_score,
  r.trenchCategory,
  coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime)) AS appln_submit_datetime,
  date(loanmaster.disbursementDateTime) disbursementdate,
  format_date('%Y-%m', coalesce(IF(loanmaster.new_loan_type = 'Flex-up', loanmaster.startApplyDateTime, loanmaster.termsAndConditionsSubmitDateTime),  cast(r.start_time as datetime))) as Application_month, 
  'Train' Data_selection,  
  del.deffstpd30,
  del.flg_mature_fstpd_30,
  loanmaster.new_loan_type,
  from modelname r
  left join risk_credit_mis.loan_master_table loanmaster  ON loanmaster.digitalLoanAccountId = r.digitalLoanAccountId
  inner join deliquency del on del.loanAccountNumber = loanmaster.loanAccountNumber
  where loanmaster.flagDisbursement = 1
  and loanmaster.disbursementDateTime is not null
  and r.Beta_cash_stack_score is not null
  and del.flg_mature_fstpd_30 = 1
  and r.trenchCategory = 'Trench 3'
  ;
  """
  
dfd = client.query(sq).to_dataframe()
dfd = dfd.drop_duplicates(keep='first')
dfd.head()

df2 = dfd.copy()

df_concat = pd.concat([df2, df1], ignore_index=True)
df_concat.info()

df_concat['Beta_cash_stack_score'] = pd.to_numeric(df_concat['Beta_cash_stack_score'], errors='coerce')

# gini_results = calculate_periodic_gini(df_concat, 'Beta_cash_stack_score', 'deffstpd30', 'FSTPD30')
gini_results = calculate_periodic_gini_producttype(
    df_concat,
    'Beta_cash_stack_score',
    'deffstpd30',
    'FSTPD30',
    product_column='new_loan_type'
)

f4 = gini_results.copy()
print(f4.head())

import functools

dataframes = [f0, f1, f2, f3, f4]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'loan_type', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'loan_type', 'bad_rate','Beta_cash_stack_score_FPD0_gini',
                     'Beta_cash_stack_score_FPD10_gini',
       'Beta_cash_stack_score_FPD30_gini',
       'Beta_cash_stack_score_FSPD30_gini',
       'Beta_cash_stack_score_FSTPD30_gini']].copy()
final_df.rename(columns={'Beta_cash_stack_score_FPD0_gini':'Beta_cash_stack_score_FPD0_t3_gini'
                         , 'Beta_cash_stack_score_FPD10_gini':'Beta_cash_stack_score_FPD10_t3_gini'
                         , 'Beta_cash_stack_score_FPD30_gini':'Beta_cash_stack_score_FPD30_t3_gini'
                         , 'Beta_cash_stack_score_FSPD30_gini':'Beta_cash_stack_score_FSPD30_t3_gini'
                        , 'Beta_cash_stack_score_FSTPD30_gini':'Beta_cash_stack_score_FSTPD30_t3_gini', 
                        }, inplace = True)
final_df['Trench_category'] = 'Trench 3'
final_df['Model_display_name'] = 'beta_stack_model_cash'
final_df['Product_type'] = 'CASH'
final_df.dtypes

final_df.head()

# Upload to BigQuery
table_id = "prj-prod-dataplatform.dap_ds_poweruser_playground.beta_stack_model_cash_t3_v1_gini_v5"
job_config = bigquery.LoadJobConfig(
    write_disposition="WRITE_TRUNCATE",  # or "WRITE_APPEND"
)
job = client.load_table_from_dataframe(final_df, table_id, job_config=job_config)
job.result()  # Wait for the job to complete

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11690 entries, 0 to 11689
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             11690 non-null  object        
 1   digitalLoanAccountId   11690 non-null  object        
 2   loanAccountNumber      11690 non-null  object        
 3   Beta_cash_stack_score  11690 non-null  object        
 4   trenchCategory         11690 non-null  object        
 5   appln_submit_datetime  11690 non-null  datetime64[us]
 6   disbursementdate       11690 non-null  dbdate        
 7   Application_month      11690 non-null  object        
 8   Data_selection         11690 non-null  object        
 9   deffpd0                11690 non-null  Int64         
 10  flg_mature_fpd0        11690 non-null  Int64         
 11  new_loan_type          11690 non-null  object        
dtypes: Int64(2), datetime64[us](1), dbdate(1), object(8)
memory 

  df_concat = pd.concat([df2, df1], ignore_index=True)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10091 entries, 0 to 10090
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             10091 non-null  Int64         
 1   digitalLoanAccountId   10091 non-null  object        
 2   loanAccountNumber      10091 non-null  object        
 3   Beta_cash_stack_score  10091 non-null  float64       
 4   trenchCategory         10091 non-null  object        
 5   appln_submit_datetime  10091 non-null  datetime64[us]
 6   disbursementdate       10091 non-null  dbdate        
 7   Application_month      10091 non-null  object        
 8   Data_selection         10091 non-null  object        
 9   deffspd30              10091 non-null  Int64         
 10  flg_mature_fspd_30     10091 non-null  Int64         
 11  new_loan_type          10091 non-null  object        
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), objec

  df_concat = pd.concat([df2, df1], ignore_index=True)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8874 entries, 0 to 8873
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   customerId             8874 non-null   Int64         
 1   digitalLoanAccountId   8874 non-null   object        
 2   loanAccountNumber      8874 non-null   object        
 3   Beta_cash_stack_score  8874 non-null   float64       
 4   trenchCategory         8874 non-null   object        
 5   appln_submit_datetime  8874 non-null   datetime64[us]
 6   disbursementdate       8874 non-null   dbdate        
 7   Application_month      8874 non-null   object        
 8   Data_selection         8874 non-null   object        
 9   deffstpd30             8874 non-null   Int64         
 10  flg_mature_fstpd_30    8874 non-null   Int64         
 11  new_loan_type          8874 non-null   object        
dtypes: Int64(3), datetime64[us](1), dbdate(1), float64(1), object(



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=ae4f513c-24ac-40bb-b646-7a3cb1e1f0d1>

# End