# <center> Model Gini Calculation </center>

In [1]:
# %% [markdown]
# # Jupyter Notebook Loading Header
#
# This is a custom loading header for Jupyter Notebooks in Visual Studio Code.
# It includes common imports and settings to get you started quickly.

# %% [markdown]
## Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.cloud import bigquery
from google.cloud import storage
import os
path = r'C:\Users\DwaipayanChakroborti\AppData\Roaming\gcloud\legacy_credentials\dchakroborti@tonikbank.com\adc.json'
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = path
client = bigquery.Client(project='prj-prod-dataplatform')

from sklearn.metrics import roc_auc_score
from datetime import datetime, timedelta
# %% [markdown]
## Configure Settings
# Set options or configurations as needed
# Example: pd.set_option('display.max_columns', None)

# Function

## calculate_gini_for_threedigitscore

In [2]:
# def calculate_gini_for_threedigitscore(scores, labels):
#     """
#     Calculate Gini coefficient for three-digit scores and binary labels
    
#     Parameters:
#     scores: array-like, three-digit scores (higher is better)
#     labels: array-like, binary values (0 or 1, where 1 indicates default)
    
#     Returns:
#     float: Gini coefficient
#     """
#     # Combine scores and labels into a DataFrame
#     df = pd.DataFrame({'score': scores, 'label': labels})
    
#     # Sort by score in descending order (assuming higher score is better)
#     df = df.sort_values('score', ascending=False)
    
#     # Calculate cumulative values
#     total_pos = df['label'].sum()
#     total_neg = len(df) - total_pos
    
#     if total_pos == 0 or total_neg == 0:
#         return 0
    
#     # Calculate cumulative proportions
#     cum_pos = df['label'].cumsum()
#     cum_neg = np.arange(1, len(df) + 1) - cum_pos
    
#     # Convert to proportions
#     cum_pos_prop = cum_pos / total_pos
#     cum_neg_prop = cum_neg / total_neg
    
#     # Calculate Gini
#     gini = 1 - np.trapz(cum_pos_prop, cum_neg_prop)
    
#     return gini


## Modified one

def calculate_gini_for_threedigitscore(scores, labels):
    """
    Calculate Gini coefficient for three-digit scores and binary labels
    
    Parameters:
    scores: array-like, three-digit scores (higher is better)
    labels: array-like, binary values (0 or 1, where 1 indicates default)
    
    Returns:
    float: Gini coefficient
    """
    # Combine scores and labels into a DataFrame
    df = pd.DataFrame({'score': scores, 'label': labels})
    
    # Sort by score in descending order (assuming higher score means lower risk)
    # For default prediction, we want to sort scores in ascending order 
    # since higher default probability should correspond to higher risk
    df = df.sort_values('score', ascending=True)  # Changed to ascending=True
    
    # Calculate cumulative values
    total_pos = df['label'].sum()
    total_neg = len(df) - total_pos
    
    if total_pos == 0 or total_neg == 0:
        return 0
    
    # Calculate cumulative proportions
    cum_pos = df['label'].cumsum()
    cum_neg = np.arange(1, len(df) + 1) - cum_pos
    
    # Convert to proportions
    cum_pos_prop = cum_pos / total_pos
    cum_neg_prop = cum_neg / total_neg
    
    # Calculate area under curve
    auc = np.trapz(cum_pos_prop, cum_neg_prop)
    
    # Calculate Gini
    gini = 2 * auc - 1
    
    return gini

## calculate_gini

In [3]:
def calculate_gini(pd_scores, bad_indicators):
    """
    Calculate Gini coefficient from scores and binary indicators
    
    Parameters:
    pd_scores: array-like of scores/probabilities
    bad_indicators: array-like of binary outcomes (0/1)
    
    Returns:
    float: Gini coefficient
    """
    # Convert inputs to numpy arrays and ensure they're numeric
    pd_scores = np.array(pd_scores, dtype=float)
    bad_indicators = np.array(bad_indicators, dtype=int)
    
    # Check for valid input data
    if len(pd_scores) == 0 or len(bad_indicators) == 0:
        return np.nan
    
    # Check if we have both good and bad cases (needed for ROC AUC)
    if len(np.unique(bad_indicators)) < 2:
        return np.nan
    
    # Calculate AUC using sklearn
    try:
        auc = roc_auc_score(bad_indicators, pd_scores)
        # Calculate Gini from AUC
        gini = 2 * auc - 1
        return gini
    except ValueError:
        return np.nan

## calculate_hybrid_gini

In [4]:
# def calculate_hybrid_gini(scores, labels):
#     """
#     Calculate Gini coefficient handling both PD values and three-digit scores
    
#     Parameters:
#     scores: array-like, contains either PD values (0-1) or three-digit scores
#     labels: array-like, binary values (0 or 1, where 1 indicates default)
    
#     Returns:
#     float: Gini coefficient
#     """
#     # Convert inputs to numpy arrays
#     scores = np.array(scores, dtype=float)
#     labels = np.array(labels, dtype=int)
    
#     # Basic validation
#     if len(scores) == 0 or len(labels) == 0:
#         return np.nan
    
#     if len(np.unique(labels)) < 2:
#         return np.nan
        
#     # Determine if scores are PD values or three-digit scores
#     # PD values are between 0 and 1
#     is_pd = np.all((scores >= 0) & (scores <= 1))
    
#     if is_pd:
#         try:
#             auc = roc_auc_score(labels, scores)
#             gini = 2 * auc - 1
#             return gini
#         except ValueError:
#             return np.nan
#     else:
#         # Handle as three-digit score
#         df = pd.DataFrame({'score': scores, 'label': labels})
#         df = df.sort_values('score', ascending=False)
        
#         total_pos = df['label'].sum()
#         total_neg = len(df) - total_pos
        
#         if total_pos == 0 or total_neg == 0:
#             return np.nan
        
#         cum_pos = df['label'].cumsum()
#         cum_neg = np.arange(1, len(df) + 1) - cum_pos
        
#         cum_pos_prop = cum_pos / total_pos
#         cum_neg_prop = cum_neg / total_neg
        
#         gini = 1 - np.trapz(cum_pos_prop, cum_neg_prop)
#         return gini

## Modified one

def calculate_hybrid_gini(scores, labels):
    """
    Calculate Gini coefficient handling both PD values and three-digit scores
    
    Parameters:
    scores: array-like, contains either PD values (0-1) or three-digit scores
    labels: array-like, binary values (0 or 1, where 1 indicates default)
    
    Returns:
    float: Gini coefficient
    """
    # Convert inputs to numpy arrays
    scores = np.array(scores, dtype=float)
    labels = np.array(labels, dtype=int)
    
    # Basic validation
    if len(scores) == 0 or len(labels) == 0:
        return np.nan
    
    if len(np.unique(labels)) < 2:
        return np.nan
        
    # Determine if scores are PD values or three-digit scores
    # PD values are between 0 and 1
    is_pd = np.all((scores >= 0) & (scores <= 1))
    
    if is_pd:
        try:
            auc = roc_auc_score(labels, scores)
            gini = 2 * auc - 1
            return gini
        except ValueError:
            return np.nan
    else:
        # Handle as three-digit score
        df = pd.DataFrame({'score': scores, 'label': labels})
        # Sort by score in ascending order since higher score means higher risk
        df = df.sort_values('score', ascending=True)
        
        total_pos = df['label'].sum()
        total_neg = len(df) - total_pos
        
        if total_pos == 0 or total_neg == 0:
            return np.nan
        
        cum_pos = df['label'].cumsum()
        cum_neg = np.arange(1, len(df) + 1) - cum_pos
        
        cum_pos_prop = cum_pos / total_pos
        cum_neg_prop = cum_neg / total_neg
        
        # Calculate area under curve
        auc = np.trapz(cum_pos_prop, cum_neg_prop)
        
        # Calculate Gini using the same formula as PD values
        gini = 2 * auc - 1
        return gini

## calculate_periodic_gini_threedigit

In [5]:
# Main processing code
def calculate_periodic_gini_threedigit(df, score_column, label_column, namecolumn):
    """
    Calculate periodic Gini coefficients
    
    Parameters:
    df: DataFrame with disbursement dates and score/label columns
    score_column: name of the score column
    label_column: name of the label column
    """
    # Ensure date is datetime type
    df['disbursementdate'] = pd.to_datetime(df['disbursementdate'])
    
    # Calculate weekly Gini
    df['week'] = df['disbursementdate'].dt.to_period('W')
    weekly_gini = df.groupby('week').apply(
        lambda x: calculate_gini_for_threedigitscore(x[score_column], x[label_column])
    ).reset_index(name='gini')
    weekly_gini['period'] = 'Week'
    weekly_gini['start_date'] = weekly_gini['week'].apply(lambda x: x.to_timestamp())
    weekly_gini['end_date'] = weekly_gini['start_date'] + timedelta(days=6)
    weekly_gini = weekly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Calculate monthly Gini
    df['month'] = df['disbursementdate'].dt.to_period('M')
    monthly_gini = df.groupby('month').apply(
        lambda x: calculate_gini_for_threedigitscore(x[score_column], x[label_column])
    ).reset_index(name='gini')
    monthly_gini['period'] = 'Month'
    monthly_gini['start_date'] = monthly_gini['month'].apply(lambda x: x.to_timestamp())
    monthly_gini['end_date'] = monthly_gini['start_date'] + pd.DateOffset(months=1) - pd.Timedelta(days=1)
    monthly_gini = monthly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Combine and sort results
    gini_results = pd.concat([weekly_gini, monthly_gini])
    gini_results = gini_results.sort_values(by='start_date').reset_index(drop=True)
    
    # Add metadata columns
    gini_results['Model_Name'] = score_column
    gini_results['version'] = '1.1.0'
    gini_results['bad_rate'] = namecolumn
    gini_results.rename(columns={'gini': f'{score_column}_{namecolumn}_gini'}, inplace=True)
    
    return gini_results

## calculate_periodic_gini

In [6]:
def calculate_periodic_gini(df, score_column, label_column, namecolumn):
    """
    Calculate periodic Gini coefficients
    
    Parameters:
    df: DataFrame with disbursement dates and score/label columns
    score_column: name of the score column
    label_column: name of the label column
    """
    # Input validation
    required_columns = ['disbursementdate', score_column, label_column]
    if not all(col in df.columns for col in required_columns):
        raise ValueError(f"Missing required columns. Need: {required_columns}")
        
    # Create a copy to avoid modifying original dataframe
    df = df.copy()
    
    # Ensure date is datetime type
    df['disbursementdate'] = pd.to_datetime(df['disbursementdate'])
    
    # Ensure score and label columns are numeric
    df[score_column] = pd.to_numeric(df[score_column], errors='coerce')
    df[label_column] = pd.to_numeric(df[label_column], errors='coerce')
    
    # Drop rows with invalid values
    df = df.dropna(subset=[score_column, label_column])
    
    # Calculate weekly Gini
    df['week'] = df['disbursementdate'].dt.to_period('W')
    weekly_gini = df.groupby('week').apply(
        lambda x: calculate_gini(x[score_column], x[label_column])
        if len(x) >= 10 else np.nan  # Only calculate if we have enough samples
    ).reset_index(name='gini')
    weekly_gini['period'] = 'Week'
    weekly_gini['start_date'] = weekly_gini['week'].apply(lambda x: x.to_timestamp())
    weekly_gini['end_date'] = weekly_gini['start_date'] + timedelta(days=6)
    weekly_gini = weekly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Calculate monthly Gini
    df['month'] = df['disbursementdate'].dt.to_period('M')
    monthly_gini = df.groupby('month').apply(
        lambda x: calculate_gini(x[score_column], x[label_column])
        if len(x) >= 20 else np.nan  # Only calculate if we have enough samples
    ).reset_index(name='gini')
    monthly_gini['period'] = 'Month'
    monthly_gini['start_date'] = monthly_gini['month'].apply(lambda x: x.to_timestamp())
    monthly_gini['end_date'] = monthly_gini['start_date'] + pd.DateOffset(months=1) - pd.Timedelta(days=1)
    monthly_gini = monthly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Combine and sort results
    gini_results = pd.concat([weekly_gini, monthly_gini])
    gini_results = gini_results.sort_values(by='start_date').reset_index(drop=True)
    
    # Add metadata columns
    gini_results['Model_Name'] = score_column
    gini_results['version'] = '1.1.0'
    gini_results['bad_rate'] = namecolumn
    gini_results.rename(columns={'gini': f'{score_column}_{namecolumn}_gini'}, inplace=True)
    
    return gini_results

## calculate_periodic_hybrid_gini

In [7]:
def calculate_periodic_hybrid_gini(df, score_column, label_column, namecolumn):
    """
    Calculate periodic Gini coefficients for mixed score types
    
    Parameters:
    df: DataFrame with disbursement dates and score/label columns
    score_column: name of the score column
    label_column: name of the label column
    """
    # Input validation
    required_columns = ['disbursementdate', score_column, label_column]
    if not all(col in df.columns for col in required_columns):
        raise ValueError(f"Missing required columns. Need: {required_columns}")
        
    # Create a copy to avoid modifying original dataframe
    df = df.copy()
    
    # Ensure date is datetime type
    df['disbursementdate'] = pd.to_datetime(df['disbursementdate'])
    
    # Ensure score and label columns are numeric
    df[score_column] = pd.to_numeric(df[score_column], errors='coerce')
    df[label_column] = pd.to_numeric(df[label_column], errors='coerce')
    
    # Drop rows with invalid values
    df = df.dropna(subset=[score_column, label_column])
    
    # Calculate weekly Gini
    df['week'] = df['disbursementdate'].dt.to_period('W')
    weekly_gini = df.groupby('week').apply(
        lambda x: calculate_hybrid_gini(x[score_column], x[label_column])
        if len(x) >= 10 else np.nan  # Only calculate if we have enough samples
    ).reset_index(name='gini')
    weekly_gini['period'] = 'Week'
    weekly_gini['start_date'] = weekly_gini['week'].apply(lambda x: x.to_timestamp())
    weekly_gini['end_date'] = weekly_gini['start_date'] + pd.Timedelta(days=6)
    weekly_gini = weekly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Calculate monthly Gini
    df['month'] = df['disbursementdate'].dt.to_period('M')
    monthly_gini = df.groupby('month').apply(
        lambda x: calculate_hybrid_gini(x[score_column], x[label_column])
        if len(x) >= 20 else np.nan  # Only calculate if we have enough samples
    ).reset_index(name='gini')
    monthly_gini['period'] = 'Month'
    monthly_gini['start_date'] = monthly_gini['month'].apply(lambda x: x.to_timestamp())
    monthly_gini['end_date'] = monthly_gini['start_date'] + pd.DateOffset(months=1) - pd.Timedelta(days=1)
    monthly_gini = monthly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Combine and sort results
    gini_results = pd.concat([weekly_gini, monthly_gini])
    gini_results = gini_results.sort_values(by='start_date').reset_index(drop=True)
    
    # Add metadata columns
    gini_results['Model_Name'] = score_column
    gini_results['version'] = '1.1.0'
    gini_results['bad_rate'] = namecolumn
    gini_results.rename(columns={'gini': f'{score_column}_{namecolumn}_gini'}, inplace=True)
    
    return gini_results

# App Score FPD10

In [8]:
sq = """with appscore as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    s_apps_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    s_apps_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from appscore;"""

dfappscorefpd10 = client.query(sq).to_dataframe(progress_bar_type = 'tqdm')

Job ID 3e9e00d9-cc95-4399-881e-bde7805b916b successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [9]:
dfappscorefpd10.sample(5)

Unnamed: 0,disbursementdate,digitalLoanAccountId,s_apps_score,ln_fpd10_flag,ln_mature_fpd10_flag
72992,2024-11-02 15:07:32,4dc657f1-32c7-4f20-a688-ef84341c0f18,0.501227,0,1
60489,2024-09-10 12:42:28,590f2667-abbe-44da-8e35-2698deece937,0.466507,0,1
86805,2024-05-30 18:42:16,8c973664-b630-429f-835b-243a39586fbf,0.564591,0,1
97127,2024-11-15 13:05:18,11201cd3-8a41-43ca-9270-428d65c0a831,0.540353,0,1
8327,2024-05-12 17:35:16,d6818a61-9b8a-4f91-b694-a65a9fc0cab1,0.615861,0,1


In [10]:
gini_results = calculate_periodic_gini(dfappscorefpd10, 's_apps_score', 'ln_fpd10_flag', 'FPD10')

In [11]:
gini_results.head()

Unnamed: 0,start_date,end_date,s_apps_score_FPD10_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.54823,Week,s_apps_score,1.1.0,FPD10
1,2023-06-01,2023-06-30,0.384648,Month,s_apps_score,1.1.0,FPD10
2,2023-06-05,2023-06-11,0.5,Week,s_apps_score,1.1.0,FPD10
3,2023-06-12,2023-06-18,0.555195,Week,s_apps_score,1.1.0,FPD10
4,2023-06-19,2023-06-25,0.004831,Week,s_apps_score,1.1.0,FPD10


In [12]:
appscoreFPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{appscoreFPD10.shape}")
appscoreFPD10.columns.values

The shape of dataframe after copy is:	(105, 7)


array(['start_date', 'end_date', 's_apps_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [13]:
gini_results.head()

Unnamed: 0,start_date,end_date,s_apps_score_FPD10_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.54823,Week,s_apps_score,1.1.0,FPD10
1,2023-06-01,2023-06-30,0.384648,Month,s_apps_score,1.1.0,FPD10
2,2023-06-05,2023-06-11,0.5,Week,s_apps_score,1.1.0,FPD10
3,2023-06-12,2023-06-18,0.555195,Week,s_apps_score,1.1.0,FPD10
4,2023-06-19,2023-06-25,0.004831,Week,s_apps_score,1.1.0,FPD10


# App Score FPD30

In [14]:
sq = """
with appscore as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    s_apps_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    s_apps_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from appscore;
"""

dfappscorefpd30 = client.query(sq).to_dataframe(progress_bar_type = 'tqdm')

Job ID 4ae9a45a-3821-4b5e-af33-90d194deaac4 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [15]:
dfappscorefpd30.head()

Unnamed: 0,disbursementdate,digitalLoanAccountId,s_apps_score,ln_fpd30_flag,ln_mature_fpd30_flag
0,2023-06-01 11:02:18,7e7e7678-b36d-4b26-ad01-272565b54780,0.483115,0,1
1,2023-06-01 12:36:51,855bccd7-f3d4-461f-9748-bb50b07e0327,0.541669,0,1
2,2023-06-01 12:56:23,e585b35d-bc13-494d-970d-2272d976991b,0.411007,0,1
3,2023-06-01 15:52:56,b11e0b23-1b34-4d8c-984d-1239a7b389c1,0.385116,0,1
4,2023-06-01 18:40:13,ab22972c-29cf-4209-967c-3af7b19f382d,0.355267,0,1


In [16]:
gini_results = calculate_periodic_gini(dfappscorefpd30, 's_apps_score', 'ln_fpd30_flag', 'FPD30')
# gini_results['bad_rate'] = 'FPD30'
appscoreFPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{appscoreFPD30.shape}")
appscoreFPD30.columns.values

The shape of dataframe after copy is:	(101, 7)


array(['start_date', 'end_date', 's_apps_score_FPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [17]:
gini_results.sample(5)

Unnamed: 0,start_date,end_date,s_apps_score_FPD30_gini,period,Model_Name,version,bad_rate
66,2024-06-03,2024-06-09,0.451336,Week,s_apps_score,1.1.0,FPD30
72,2024-07-08,2024-07-14,0.318586,Week,s_apps_score,1.1.0,FPD30
37,2023-12-25,2023-12-31,0.417435,Week,s_apps_score,1.1.0,FPD30
50,2024-03-04,2024-03-10,0.435098,Week,s_apps_score,1.1.0,FPD30
98,2024-12-02,2024-12-08,0.329144,Week,s_apps_score,1.1.0,FPD30


# App Score FSPD30

In [18]:
sq = """with appscore as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    s_apps_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    s_apps_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from appscore;
"""

dfappscorefspd30 = client.query(sq).to_dataframe(progress_bar_type = 'tqdm')

Job ID d945cba3-bfaa-450d-bce7-5d851afb2dfc successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [19]:
gini_results = calculate_periodic_gini(dfappscorefspd30, 's_apps_score', 'ln_fspd30_flag', 'FSPD30')
# gini_results['bad_rate'] = 'FSPD30'
appscoreFSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{appscoreFSPD30.shape}")
appscoreFSPD30.columns.values

The shape of dataframe after copy is:	(96, 7)


array(['start_date', 'end_date', 's_apps_score_FSPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [20]:
gini_results.tail()

Unnamed: 0,start_date,end_date,s_apps_score_FSPD30_gini,period,Model_Name,version,bad_rate
91,2024-10-28,2024-11-03,0.285661,Week,s_apps_score,1.1.0,FSPD30
92,2024-11-01,2024-11-30,0.312137,Month,s_apps_score,1.1.0,FSPD30
93,2024-11-04,2024-11-10,0.295698,Week,s_apps_score,1.1.0,FSPD30
94,2024-11-11,2024-11-17,0.318956,Week,s_apps_score,1.1.0,FSPD30
95,2024-11-18,2024-11-24,0.317062,Week,s_apps_score,1.1.0,FSPD30


# App Score FSTPD30

In [21]:
sq = """with appscore as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    s_apps_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    s_apps_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from appscore;
"""

dfappscorefstpd30 = client.query(sq).to_dataframe(progress_bar_type = 'tqdm')

Job ID 4b543b4f-6753-40cb-96e0-598450aff9f6 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [22]:
gini_results = calculate_periodic_gini(dfappscorefstpd30, 's_apps_score', 'ln_fstpd30_flag', 'FSTPD30')
# gini_results['bad_rate'] = 'FSTPD30'
appscoreFSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{appscoreFSTPD30.shape}")
appscoreFSTPD30.columns.values

The shape of dataframe after copy is:	(90, 7)


array(['start_date', 'end_date', 's_apps_score_FSTPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [23]:
appscoreFSTPD30.head()

Unnamed: 0,start_date,end_date,s_apps_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.465098,Week,s_apps_score,1.1.0,FSTPD30
1,2023-06-01,2023-06-30,0.33742,Month,s_apps_score,1.1.0,FSTPD30
2,2023-06-05,2023-06-11,0.4133,Week,s_apps_score,1.1.0,FSTPD30
3,2023-06-12,2023-06-18,0.395676,Week,s_apps_score,1.1.0,FSTPD30
4,2023-06-19,2023-06-25,0.314497,Week,s_apps_score,1.1.0,FSTPD30


# Combining App Score

In [24]:
import functools

dataframes = [appscoreFPD10, appscoreFPD30, appscoreFSPD30, appscoreFSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 's_apps_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 's_apps_score_FPD30_gini',
       's_apps_score_FSPD30_gini', 's_apps_score_FSTPD30_gini'],
      dtype=object)

In [25]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','s_apps_score_FSTPD30_gini','s_apps_score_FSPD30_gini',
       's_apps_score_FPD30_gini', 's_apps_score_FPD10_gini']].copy()
final_df.dtypes

start_date                   datetime64[ns]
end_date                     datetime64[ns]
period                               object
Model_Name                           object
version                              object
bad_rate                             object
s_apps_score_FSTPD30_gini           float64
s_apps_score_FSPD30_gini            float64
s_apps_score_FPD30_gini             float64
s_apps_score_FPD10_gini             float64
dtype: object

## Creating app score table 

In [26]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_s_apps_score;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=0e826f4d-a8d1-45be-98c5-96f24d077086>

In [27]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('s_apps_score_FSTPD30_gini', 'FLOAT'),
    bigquery.SchemaField('s_apps_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('s_apps_score_FPD30_gini', 'FLOAT'),
    bigquery.SchemaField('s_apps_score_fpd10_gini', 'FLOAT')
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_s_apps_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=1a5e2d32-7e8d-4b92-b526-36ce27191630>

# sb_demo_score

## FPD10

In [28]:
sq = """
with sb_demo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    sb_demo_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    sb_demo_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from sb_demo_score;
"""

df_sb_demo_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID 9d2b23ad-399f-4be3-9132-b8f04de49f3b successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [29]:
df_sb_demo_scorefpd10.head()

Unnamed: 0,disbursementdate,digitalLoanAccountId,sb_demo_score,ln_fpd10_flag,ln_mature_fpd10_flag
0,2023-06-01 10:40:18,ac002041-b0be-446e-b173-65effe340651,0.177283,0,1
1,2023-06-01 10:42:34,f91cba31-7630-477a-8cdd-d010b63c4900,0.095033,0,1
2,2023-06-01 12:17:20,4fddb4fa-6e60-42b0-adec-295658bd1093,0.159133,0,1
3,2023-06-01 14:43:46,4e78ebf3-256e-426c-8840-a7f9e1403a97,0.154295,0,1
4,2023-06-01 15:36:13,a3dc185d-0118-4ebc-bf03-117fdd8f9c64,0.130198,0,1


In [30]:
gini_results = calculate_periodic_gini(df_sb_demo_scorefpd10, 'sb_demo_score', 'ln_fpd10_flag', 'FPD10')
sb_demo_scoreFPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{sb_demo_scoreFPD10.shape}")
sb_demo_scoreFPD10.columns.values

The shape of dataframe after copy is:	(105, 7)


array(['start_date', 'end_date', 'sb_demo_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [31]:
sb_demo_scoreFPD10.tail()

Unnamed: 0,start_date,end_date,sb_demo_score_FPD10_gini,period,Model_Name,version,bad_rate
100,2024-12-16,2024-12-22,0.241678,Week,sb_demo_score,1.1.0,FPD10
101,2024-12-23,2024-12-29,0.236387,Week,sb_demo_score,1.1.0,FPD10
102,2024-12-30,2025-01-05,0.23222,Week,sb_demo_score,1.1.0,FPD10
103,2025-01-01,2025-01-31,0.232223,Month,sb_demo_score,1.1.0,FPD10
104,2025-01-06,2025-01-12,0.210641,Week,sb_demo_score,1.1.0,FPD10


## FPD30

In [32]:
sq = """
with sb_demo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    sb_demo_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    sb_demo_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from sb_demo_score;
"""

df_sb_demo_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID 81999897-7964-432c-a8fa-2b5be9b7cfc6 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [33]:
gini_results = calculate_periodic_gini(df_sb_demo_scorefpd30, 'sb_demo_score', 'ln_fpd30_flag', 'FPD30')
sb_demo_scoreFPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{sb_demo_scoreFPD30.shape}")
sb_demo_scoreFPD30.columns.values

The shape of dataframe after copy is:	(101, 7)


array(['start_date', 'end_date', 'sb_demo_score_FPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [34]:
sb_demo_scoreFPD30.head() 

Unnamed: 0,start_date,end_date,sb_demo_score_FPD30_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.330909,Week,sb_demo_score,1.1.0,FPD30
1,2023-06-01,2023-06-30,0.220452,Month,sb_demo_score,1.1.0,FPD30
2,2023-06-05,2023-06-11,0.15037,Week,sb_demo_score,1.1.0,FPD30
3,2023-06-12,2023-06-18,0.478421,Week,sb_demo_score,1.1.0,FPD30
4,2023-06-19,2023-06-25,-0.064257,Week,sb_demo_score,1.1.0,FPD30


## FSPD30

In [35]:
sq = """
with sb_demo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    sb_demo_score,
    ln_fspd30_flag,   -- fspd30
	ln_mature_fspd30_flag,	--- fspd30 observation
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    sb_demo_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from sb_demo_score;
"""

df_sb_demo_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID ae618c1a-08e8-4323-9db7-64bc30a5299a successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [36]:
gini_results = calculate_periodic_gini(df_sb_demo_scorefspd30, 'sb_demo_score', 'ln_fspd30_flag', 'FSPD30')
sb_demo_scoreFSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{sb_demo_scoreFSPD30.shape}")
sb_demo_scoreFSPD30.columns.values

The shape of dataframe after copy is:	(96, 7)


array(['start_date', 'end_date', 'sb_demo_score_FSPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [37]:
sb_demo_scoreFSPD30.head()

Unnamed: 0,start_date,end_date,sb_demo_score_FSPD30_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.126685,Week,sb_demo_score,1.1.0,FSPD30
1,2023-06-01,2023-06-30,0.209807,Month,sb_demo_score,1.1.0,FSPD30
2,2023-06-05,2023-06-11,0.239192,Week,sb_demo_score,1.1.0,FSPD30
3,2023-06-12,2023-06-18,0.419355,Week,sb_demo_score,1.1.0,FSPD30
4,2023-06-19,2023-06-25,-0.008772,Week,sb_demo_score,1.1.0,FSPD30


## FSTPD30

In [38]:
sq = """
with sb_demo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    sb_demo_score,
    ln_fstpd30_flag,   -- fstpd30
	ln_mature_fstpd30_flag,	--- fstpd30 observation
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    sb_demo_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from sb_demo_score;
"""

df_sb_demo_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID 100980ef-c348-4be9-b097-b77030f7165c successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [39]:
gini_results = calculate_periodic_gini(df_sb_demo_scorefstpd30, 'sb_demo_score', 'ln_fstpd30_flag', 'FSTPD30')
sb_demo_scoreFSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{sb_demo_scoreFSTPD30.shape}")
sb_demo_scoreFSTPD30.columns.values

The shape of dataframe after copy is:	(90, 7)


array(['start_date', 'end_date', 'sb_demo_score_FSTPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [40]:
sb_demo_scoreFSTPD30.head()

Unnamed: 0,start_date,end_date,sb_demo_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.24183,Week,sb_demo_score,1.1.0,FSTPD30
1,2023-06-01,2023-06-30,0.18841,Month,sb_demo_score,1.1.0,FSTPD30
2,2023-06-05,2023-06-11,0.272886,Week,sb_demo_score,1.1.0,FSTPD30
3,2023-06-12,2023-06-18,0.23054,Week,sb_demo_score,1.1.0,FSTPD30
4,2023-06-19,2023-06-25,0.074133,Week,sb_demo_score,1.1.0,FSTPD30


# Combining sb demo score

In [41]:
import functools

dataframes = [sb_demo_scoreFPD10, sb_demo_scoreFPD30, sb_demo_scoreFSPD30, sb_demo_scoreFSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'sb_demo_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'sb_demo_score_FPD30_gini',
       'sb_demo_score_FSPD30_gini', 'sb_demo_score_FSTPD30_gini'],
      dtype=object)

In [42]:
final_df = final_df[['start_date', 'end_date', 'period',
       'Model_Name', 'version', 'bad_rate','sb_demo_score_FPD10_gini','sb_demo_score_FPD30_gini',
       'sb_demo_score_FSPD30_gini', 'sb_demo_score_FSTPD30_gini']].copy()

## creating sb demo score table 

In [43]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_sb_demo_score;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=ceb6a62f-47cd-4923-ae74-432bc4c27059>

In [44]:


import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('sb_demo_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('sb_demo_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('sb_demo_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('sb_demo_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_sb_demo_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=7f9cc3fc-5ac9-4753-b019-140256e45b3a>

# s_cic_score

## FPD10

In [45]:
sq = """
with s_cic_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    s_cic_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    s_cic_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from s_cic_score;
"""

df_s_cic_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID b79613e6-f539-433c-a27c-800c591e446d successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [46]:
df_s_cic_scorefpd10.head()

Unnamed: 0,disbursementdate,digitalLoanAccountId,s_cic_score,ln_fpd10_flag,ln_mature_fpd10_flag
0,2023-06-01 11:02:18,7e7e7678-b36d-4b26-ad01-272565b54780,0.08686,0,1
1,2023-06-02 13:49:38,69f07f7a-77ce-4f3d-87da-cf27b346ac52,0.086317,0,1
2,2023-06-02 14:41:10,832f93ae-46bb-4fdf-94ff-66db94b716c9,0.120927,1,1
3,2023-06-02 16:25:26,5dae7da0-f426-4402-b552-f84ab33d38bf,0.083796,1,1
4,2023-06-02 17:11:42,1d7abbe4-a62a-4f4b-91b2-bb360b9e61c2,0.142315,0,1


In [47]:
gini_results = calculate_periodic_gini(df_s_cic_scorefpd10, 's_cic_score', 'ln_fpd10_flag', 'FPD10')
s_cic_scoreFPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{s_cic_scoreFPD10.shape}")
s_cic_scoreFPD10.columns.values

The shape of dataframe after copy is:	(104, 7)


array(['start_date', 'end_date', 's_cic_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [48]:
s_cic_scoreFPD10.tail()

Unnamed: 0,start_date,end_date,s_cic_score_FPD10_gini,period,Model_Name,version,bad_rate
99,2024-12-16,2024-12-22,0.246156,Week,s_cic_score,1.1.0,FPD10
100,2024-12-23,2024-12-29,0.156457,Week,s_cic_score,1.1.0,FPD10
101,2024-12-30,2025-01-05,0.228576,Week,s_cic_score,1.1.0,FPD10
102,2025-01-01,2025-01-31,0.237186,Month,s_cic_score,1.1.0,FPD10
103,2025-01-06,2025-01-12,0.154881,Week,s_cic_score,1.1.0,FPD10


## FPD30

In [49]:
sq = """
with s_cic_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    s_cic_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    s_cic_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from s_cic_score;
"""

df_s_cic_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID b451b89a-a9b3-4ea2-9aa0-39bde865d724 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [50]:
df_s_cic_scorefpd30.head()

Unnamed: 0,disbursementdate,digitalLoanAccountId,s_cic_score,ln_fpd30_flag,ln_mature_fpd30_flag
0,2023-06-01 11:02:18,7e7e7678-b36d-4b26-ad01-272565b54780,0.08686,0,1
1,2023-06-02 13:49:38,69f07f7a-77ce-4f3d-87da-cf27b346ac52,0.086317,0,1
2,2023-06-02 14:41:10,832f93ae-46bb-4fdf-94ff-66db94b716c9,0.120927,1,1
3,2023-06-02 16:25:26,5dae7da0-f426-4402-b552-f84ab33d38bf,0.083796,1,1
4,2023-06-02 17:11:42,1d7abbe4-a62a-4f4b-91b2-bb360b9e61c2,0.142315,0,1


In [51]:
gini_results = calculate_periodic_gini(df_s_cic_scorefpd30, 's_cic_score', 'ln_fpd30_flag', 'FPD30')
s_cic_scoreFPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{s_cic_scoreFPD30.shape}")
s_cic_scoreFPD30.columns.values

The shape of dataframe after copy is:	(100, 7)


array(['start_date', 'end_date', 's_cic_score_FPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [52]:
s_cic_scoreFPD30.head()

Unnamed: 0,start_date,end_date,s_cic_score_FPD30_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.523077,Week,s_cic_score,1.1.0,FPD30
1,2023-06-01,2023-06-30,0.297076,Month,s_cic_score,1.1.0,FPD30
2,2023-06-05,2023-06-11,0.437229,Week,s_cic_score,1.1.0,FPD30
3,2023-06-12,2023-06-18,0.611765,Week,s_cic_score,1.1.0,FPD30
4,2023-06-19,2023-06-25,0.035484,Week,s_cic_score,1.1.0,FPD30


In [53]:
s_cic_scoreFPD30.describe()

Unnamed: 0,start_date,end_date,s_cic_score_FPD30_gini
count,100,100,100.0
mean,2024-03-08 15:36:00,2024-03-19 02:52:48,0.329673
min,2023-05-29 00:00:00,2023-06-04 00:00:00,0.035484
25%,2023-10-21 06:00:00,2023-10-30 12:00:00,0.273933
50%,2024-03-07 12:00:00,2024-03-20 12:00:00,0.319217
75%,2024-07-29 18:00:00,2024-08-05 18:00:00,0.380221
max,2024-12-16 00:00:00,2024-12-31 00:00:00,0.940476
std,,,0.116252


## FSPD30

In [54]:
sq = """
with s_cic_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    s_cic_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    s_cic_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from s_cic_score;
"""

df_s_cic_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID dc435351-fe0d-49f1-86ab-56a35dfbbe8d successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [55]:
gini_results = calculate_periodic_gini(df_s_cic_scorefspd30, 's_cic_score', 'ln_fspd30_flag', 'FSPD30')
s_cic_scoreFSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{s_cic_scoreFSPD30.shape}")
s_cic_scoreFSPD30.columns.values

The shape of dataframe after copy is:	(95, 7)


array(['start_date', 'end_date', 's_cic_score_FSPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [56]:
s_cic_scoreFSPD30.describe()

Unnamed: 0,start_date,end_date,s_cic_score_FSPD30_gini
count,95,95,95.0
mean,2024-02-23 09:20:50.526315776,2024-03-04 20:12:37.894736896,0.324255
min,2023-05-29 00:00:00,2023-06-04 00:00:00,0.066869
25%,2023-10-12 12:00:00,2023-10-25 12:00:00,0.279003
50%,2024-02-26 00:00:00,2024-03-03 00:00:00,0.319474
75%,2024-07-04 12:00:00,2024-07-17 12:00:00,0.360417
max,2024-11-18 00:00:00,2024-11-30 00:00:00,0.680714
std,,,0.083293


In [57]:
s_cic_scoreFSPD30.tail()

Unnamed: 0,start_date,end_date,s_cic_score_FSPD30_gini,period,Model_Name,version,bad_rate
90,2024-10-28,2024-11-03,0.258879,Week,s_cic_score,1.1.0,FSPD30
91,2024-11-01,2024-11-30,0.276323,Month,s_cic_score,1.1.0,FSPD30
92,2024-11-04,2024-11-10,0.342195,Week,s_cic_score,1.1.0,FSPD30
93,2024-11-11,2024-11-17,0.215753,Week,s_cic_score,1.1.0,FSPD30
94,2024-11-18,2024-11-24,0.30727,Week,s_cic_score,1.1.0,FSPD30


## FSTPD30

In [58]:
sq = """
with s_cic_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    s_cic_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    s_cic_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from s_cic_score;
"""

df_s_cic_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID c4331c60-f0de-4262-b57f-b2359669948a successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [59]:
gini_results = calculate_periodic_gini(df_s_cic_scorefstpd30, 's_cic_score', 'ln_fstpd30_flag', 'FSTPD30')
s_cic_scoreFSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{s_cic_scoreFSTPD30.shape}")
s_cic_scoreFSTPD30.columns.values

The shape of dataframe after copy is:	(89, 7)


array(['start_date', 'end_date', 's_cic_score_FSTPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [60]:
s_cic_scoreFSTPD30.describe()

Unnamed: 0,start_date,end_date,s_cic_score_FSTPD30_gini
count,89,89,89.0
mean,2024-02-06 06:12:08.089887744,2024-02-16 18:04:02.696629248,0.307951
min,2023-05-29 00:00:00,2023-06-04 00:00:00,0.154422
25%,2023-10-02 00:00:00,2023-10-15 00:00:00,0.259056
50%,2024-02-05 00:00:00,2024-02-18 00:00:00,0.301309
75%,2024-06-10 00:00:00,2024-06-23 00:00:00,0.325651
max,2024-10-14 00:00:00,2024-10-31 00:00:00,0.571813
std,,,0.072841


In [61]:
s_cic_scoreFSTPD30.tail()

Unnamed: 0,start_date,end_date,s_cic_score_FSTPD30_gini,period,Model_Name,version,bad_rate
84,2024-09-23,2024-09-29,0.315568,Week,s_cic_score,1.1.0,FSTPD30
85,2024-09-30,2024-10-06,0.313062,Week,s_cic_score,1.1.0,FSTPD30
86,2024-10-01,2024-10-31,0.306804,Month,s_cic_score,1.1.0,FSTPD30
87,2024-10-07,2024-10-13,0.321041,Week,s_cic_score,1.1.0,FSTPD30
88,2024-10-14,2024-10-20,0.273456,Week,s_cic_score,1.1.0,FSTPD30


# Combining s_cic_score

In [62]:
import functools

dataframes = [s_cic_scoreFPD10, s_cic_scoreFPD30, s_cic_scoreFSPD30, s_cic_scoreFSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 's_cic_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 's_cic_score_FPD30_gini',
       's_cic_score_FSPD30_gini', 's_cic_score_FSTPD30_gini'],
      dtype=object)

In [63]:
final_df = final_df[['start_date', 'end_date', 'period',
       'Model_Name', 'version', 'bad_rate','s_cic_score_FPD10_gini','s_cic_score_FPD30_gini', 's_cic_score_FSPD30_gini', 's_cic_score_FSTPD30_gini']].copy()

## Creating the table

In [64]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_s_cic_score;"""

client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=64f4c456-adf2-4874-b23a-a960d89cbf77>

In [65]:


import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('s_cic_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('s_cic_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('s_cic_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('s_cic_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_s_cic_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=6d03ecf1-821c-4404-917d-e96ccf1ef7a6>

# sb_stack_score

## FPD10

In [66]:
# sb_stack_score

sq = """
with sb_stack_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    sb_stack_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    sb_stack_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from sb_stack_score;
"""

df_sb_stack_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

df_sb_stack_scorefpd10.head()

Job ID 7d9d4d42-9cb3-4236-a732-64c12286d2f0 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,sb_stack_score,ln_fpd10_flag,ln_mature_fpd10_flag
0,2023-06-01 11:02:18,7e7e7678-b36d-4b26-ad01-272565b54780,0.055517,0,1
1,2023-06-01 12:36:51,855bccd7-f3d4-461f-9748-bb50b07e0327,0.143019,0,1
2,2023-06-01 12:56:23,e585b35d-bc13-494d-970d-2272d976991b,0.043838,0,1
3,2023-06-01 14:57:43,ac518432-9362-4014-86d4-565c0125b27b,0.078666,0,1
4,2023-06-01 15:52:56,b11e0b23-1b34-4d8c-984d-1239a7b389c1,0.02396,0,1


In [67]:
gini_results = calculate_periodic_gini(df_sb_stack_scorefpd10, 'sb_stack_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

The shape of dataframe after copy is:	(105, 7)


array(['start_date', 'end_date', 'sb_stack_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [68]:
M1FPD10.describe()

Unnamed: 0,start_date,end_date,sb_stack_score_FPD10_gini
count,105,105,105.0
mean,2024-03-17 18:44:34.285714176,2024-03-28 06:24:00,0.347361
min,2023-05-29 00:00:00,2023-06-04 00:00:00,-0.072727
25%,2023-10-23 00:00:00,2023-10-31 00:00:00,0.299216
50%,2024-03-18 00:00:00,2024-03-31 00:00:00,0.336151
75%,2024-08-12 00:00:00,2024-08-25 00:00:00,0.406833
max,2025-01-06 00:00:00,2025-01-31 00:00:00,0.675973
std,,,0.097998


In [69]:
M1FPD10.head()

Unnamed: 0,start_date,end_date,sb_stack_score_FPD10_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.487907,Week,sb_stack_score,1.1.0,FPD10
1,2023-06-01,2023-06-30,0.328648,Month,sb_stack_score,1.1.0,FPD10
2,2023-06-05,2023-06-11,0.412426,Week,sb_stack_score,1.1.0,FPD10
3,2023-06-12,2023-06-18,0.675973,Week,sb_stack_score,1.1.0,FPD10
4,2023-06-19,2023-06-25,-0.072727,Week,sb_stack_score,1.1.0,FPD10


## FPD30

In [70]:
# sb_stack_score

sq = """
with sb_stack_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    sb_stack_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    sb_stack_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from sb_stack_score;
"""

df_sb_stack_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

df_sb_stack_scorefpd30.head()

Job ID 1815ee65-cf65-453e-b190-0675272dbcc7 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,sb_stack_score,ln_fpd30_flag,ln_mature_fpd30_flag
0,2023-06-01 10:40:18,ac002041-b0be-446e-b173-65effe340651,0.133378,0,1
1,2023-06-01 10:42:34,f91cba31-7630-477a-8cdd-d010b63c4900,0.02009,0,1
2,2023-06-01 12:17:20,4fddb4fa-6e60-42b0-adec-295658bd1093,0.04893,0,1
3,2023-06-01 14:43:46,4e78ebf3-256e-426c-8840-a7f9e1403a97,0.084972,0,1
4,2023-06-01 15:36:13,a3dc185d-0118-4ebc-bf03-117fdd8f9c64,0.085976,0,1


In [71]:
gini_results = calculate_periodic_gini(df_sb_stack_scorefpd30, 'sb_stack_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

The shape of dataframe after copy is:	(101, 7)


array(['start_date', 'end_date', 'sb_stack_score_FPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [72]:
M2FPD30.describe()

Unnamed: 0,start_date,end_date,sb_stack_score_FPD30_gini
count,101,101,101.0
mean,2024-03-06 09:16:02.376237568,2024-03-16 19:29:06.534653440,0.370313
min,2023-05-29 00:00:00,2023-06-04 00:00:00,-0.018742
25%,2023-10-16 00:00:00,2023-10-29 00:00:00,0.32734
50%,2024-03-04 00:00:00,2024-03-17 00:00:00,0.362048
75%,2024-07-29 00:00:00,2024-08-04 00:00:00,0.42735
max,2024-12-16 00:00:00,2024-12-31 00:00:00,0.653811
std,,,0.096459


In [73]:
M2FPD30.tail()

Unnamed: 0,start_date,end_date,sb_stack_score_FPD30_gini,period,Model_Name,version,bad_rate
96,2024-11-25,2024-12-01,0.333881,Week,sb_stack_score,1.1.0,FPD30
97,2024-12-01,2024-12-31,0.337118,Month,sb_stack_score,1.1.0,FPD30
98,2024-12-02,2024-12-08,0.372214,Week,sb_stack_score,1.1.0,FPD30
99,2024-12-09,2024-12-15,0.305134,Week,sb_stack_score,1.1.0,FPD30
100,2024-12-16,2024-12-22,0.332189,Week,sb_stack_score,1.1.0,FPD30


## FSPD30

In [74]:
# sb_stack_score

sq = """
with sb_stack_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    sb_stack_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    sb_stack_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from sb_stack_score;
"""

df_sb_stack_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

df_sb_stack_scorefspd30.head()

Job ID d67344ec-e3d1-4a0a-940e-6575a602c08f successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,sb_stack_score,ln_fspd30_flag,ln_mature_fspd30_flag
0,2023-06-01 10:40:18,ac002041-b0be-446e-b173-65effe340651,0.133378,0,1
1,2023-06-01 10:42:34,f91cba31-7630-477a-8cdd-d010b63c4900,0.02009,0,1
2,2023-06-01 12:17:20,4fddb4fa-6e60-42b0-adec-295658bd1093,0.04893,0,1
3,2023-06-01 14:43:46,4e78ebf3-256e-426c-8840-a7f9e1403a97,0.084972,0,1
4,2023-06-01 15:36:13,a3dc185d-0118-4ebc-bf03-117fdd8f9c64,0.085976,0,1


In [75]:
gini_results = calculate_periodic_gini(df_sb_stack_scorefspd30, 'sb_stack_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
M3FSPD30.columns.values

The shape of dataframe after copy is:	(96, 7)


array(['start_date', 'end_date', 'sb_stack_score_FSPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [76]:
M3FSPD30.describe()

Unnamed: 0,start_date,end_date,sb_stack_score_FSPD30_gini
count,96,96,96.0
mean,2024-02-21 03:45:00,2024-03-02 13:30:00,0.372534
min,2023-05-29 00:00:00,2023-06-04 00:00:00,0.12888
25%,2023-10-07 06:00:00,2023-10-20 06:00:00,0.325477
50%,2024-02-22 12:00:00,2024-03-01 12:00:00,0.361073
75%,2024-07-02 18:00:00,2024-07-15 18:00:00,0.422907
max,2024-11-18 00:00:00,2024-11-30 00:00:00,0.580645
std,,,0.077547


In [77]:
M3FSPD30.tail()

Unnamed: 0,start_date,end_date,sb_stack_score_FSPD30_gini,period,Model_Name,version,bad_rate
91,2024-10-28,2024-11-03,0.332579,Week,sb_stack_score,1.1.0,FSPD30
92,2024-11-01,2024-11-30,0.340977,Month,sb_stack_score,1.1.0,FSPD30
93,2024-11-04,2024-11-10,0.309879,Week,sb_stack_score,1.1.0,FSPD30
94,2024-11-11,2024-11-17,0.360012,Week,sb_stack_score,1.1.0,FSPD30
95,2024-11-18,2024-11-24,0.302233,Week,sb_stack_score,1.1.0,FSPD30


## FSTPD30

In [78]:
# sb_stack_score

sq = """
with sb_stack_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    sb_stack_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    sb_stack_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from sb_stack_score;
"""

df_sb_stack_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

df_sb_stack_scorefstpd30.head()

Job ID ef1be34d-c730-4951-a9f3-e9d63a3ae395 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,sb_stack_score,ln_fstpd30_flag,ln_mature_fstpd30_flag
0,2023-06-01 10:40:18,ac002041-b0be-446e-b173-65effe340651,0.133378,0,1
1,2023-06-01 10:42:34,f91cba31-7630-477a-8cdd-d010b63c4900,0.02009,0,1
2,2023-06-01 12:17:20,4fddb4fa-6e60-42b0-adec-295658bd1093,0.04893,0,1
3,2023-06-01 14:43:46,4e78ebf3-256e-426c-8840-a7f9e1403a97,0.084972,0,1
4,2023-06-01 15:36:13,a3dc185d-0118-4ebc-bf03-117fdd8f9c64,0.085976,0,1


In [79]:
gini_results = calculate_periodic_gini(df_sb_stack_scorefstpd30, 'sb_stack_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
M4FSTPD30.columns.values

The shape of dataframe after copy is:	(90, 7)


array(['start_date', 'end_date', 'sb_stack_score_FSTPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [80]:
M4FSTPD30.describe()

Unnamed: 0,start_date,end_date,sb_stack_score_FSTPD30_gini
count,90,90,90.0
mean,2024-02-04 01:36:00,2024-02-14 12:16:00,0.347587
min,2023-05-29 00:00:00,2023-06-04 00:00:00,0.149705
25%,2023-10-01 06:00:00,2023-10-09 18:00:00,0.310736
50%,2024-02-03 00:00:00,2024-02-14 12:00:00,0.338725
75%,2024-06-08 06:00:00,2024-06-21 06:00:00,0.387055
max,2024-10-14 00:00:00,2024-10-31 00:00:00,0.501592
std,,,0.061746


In [81]:
M4FSTPD30.tail()

Unnamed: 0,start_date,end_date,sb_stack_score_FSTPD30_gini,period,Model_Name,version,bad_rate
85,2024-09-23,2024-09-29,0.330863,Week,sb_stack_score,1.1.0,FSTPD30
86,2024-09-30,2024-10-06,0.317665,Week,sb_stack_score,1.1.0,FSTPD30
87,2024-10-01,2024-10-31,0.310409,Month,sb_stack_score,1.1.0,FSTPD30
88,2024-10-07,2024-10-13,0.329735,Week,sb_stack_score,1.1.0,FSTPD30
89,2024-10-14,2024-10-20,0.293159,Week,sb_stack_score,1.1.0,FSTPD30


## Combining the dataframes

In [82]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'sb_stack_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'sb_stack_score_FPD30_gini',
       'sb_stack_score_FSPD30_gini', 'sb_stack_score_FSTPD30_gini'],
      dtype=object)

In [83]:
final_df = final_df[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'sb_stack_score_FPD10_gini', 'sb_stack_score_FPD30_gini',  'sb_stack_score_FSPD30_gini', 'sb_stack_score_FSTPD30_gini']].copy()

## Creating the table 

In [84]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_sb_stack_score;"""

client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=8e199684-d760-4848-9e5a-2752eb248fea>

In [85]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('sb_stack_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('sb_stack_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('sb_stack_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('sb_stack_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_sb_stack_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=d2c38b40-9079-4ecd-8efa-5120e4657d56>

# sa_stack_score

## FPD10

In [86]:
# sa_stack_score

sq = """
with sa_stack_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    sa_stack_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    sa_stack_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from sa_stack_score;
"""

sa_stack_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

sa_stack_scorefpd10.head()

Job ID 2f473982-5c97-4b49-a701-986ca9c469c7 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,sa_stack_score,ln_fpd10_flag,ln_mature_fpd10_flag
0,2023-06-01 10:40:18,ac002041-b0be-446e-b173-65effe340651,0.122612,0,1
1,2023-06-01 10:42:34,f91cba31-7630-477a-8cdd-d010b63c4900,0.020992,0,1
2,2023-06-01 12:17:20,4fddb4fa-6e60-42b0-adec-295658bd1093,0.043172,0,1
3,2023-06-01 14:43:46,4e78ebf3-256e-426c-8840-a7f9e1403a97,0.076677,0,1
4,2023-06-01 15:36:13,a3dc185d-0118-4ebc-bf03-117fdd8f9c64,0.074644,0,1


In [87]:
sa_stack_scorefpd10.head()

Unnamed: 0,disbursementdate,digitalLoanAccountId,sa_stack_score,ln_fpd10_flag,ln_mature_fpd10_flag
0,2023-06-01 10:40:18,ac002041-b0be-446e-b173-65effe340651,0.122612,0,1
1,2023-06-01 10:42:34,f91cba31-7630-477a-8cdd-d010b63c4900,0.020992,0,1
2,2023-06-01 12:17:20,4fddb4fa-6e60-42b0-adec-295658bd1093,0.043172,0,1
3,2023-06-01 14:43:46,4e78ebf3-256e-426c-8840-a7f9e1403a97,0.076677,0,1
4,2023-06-01 15:36:13,a3dc185d-0118-4ebc-bf03-117fdd8f9c64,0.074644,0,1


In [88]:
gini_results = calculate_periodic_gini(sa_stack_scorefpd10, 'sa_stack_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

The shape of dataframe after copy is:	(105, 7)


array(['start_date', 'end_date', 'sa_stack_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [89]:
M1FPD10.describe()

Unnamed: 0,start_date,end_date,sa_stack_score_FPD10_gini
count,105,105,105.0
mean,2024-03-17 18:44:34.285714176,2024-03-28 06:24:00,0.371531
min,2023-05-29 00:00:00,2023-06-04 00:00:00,-0.070303
25%,2023-10-23 00:00:00,2023-10-31 00:00:00,0.328902
50%,2024-03-18 00:00:00,2024-03-31 00:00:00,0.357051
75%,2024-08-12 00:00:00,2024-08-25 00:00:00,0.433874
max,2025-01-06 00:00:00,2025-01-31 00:00:00,0.664129
std,,,0.096958


In [90]:
M1FPD10.head()

Unnamed: 0,start_date,end_date,sa_stack_score_FPD10_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.581318,Week,sa_stack_score,1.1.0,FPD10
1,2023-06-01,2023-06-30,0.360251,Month,sa_stack_score,1.1.0,FPD10
2,2023-06-05,2023-06-11,0.466623,Week,sa_stack_score,1.1.0,FPD10
3,2023-06-12,2023-06-18,0.664129,Week,sa_stack_score,1.1.0,FPD10
4,2023-06-19,2023-06-25,-0.070303,Week,sa_stack_score,1.1.0,FPD10


## FPD30

In [91]:
# sa_stack_score

sq = """
with sa_stack_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    sa_stack_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    sa_stack_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from sa_stack_score;
"""

sa_stack_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

sa_stack_scorefpd30.head()

Job ID 7b866dd3-6d4c-40d0-877b-14abf70a20e9 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,sa_stack_score,ln_fpd30_flag,ln_mature_fpd30_flag
0,2023-06-01 10:40:18,ac002041-b0be-446e-b173-65effe340651,0.122612,0,1
1,2023-06-01 10:42:34,f91cba31-7630-477a-8cdd-d010b63c4900,0.020992,0,1
2,2023-06-01 12:17:20,4fddb4fa-6e60-42b0-adec-295658bd1093,0.043172,0,1
3,2023-06-01 14:43:46,4e78ebf3-256e-426c-8840-a7f9e1403a97,0.076677,0,1
4,2023-06-01 15:36:13,a3dc185d-0118-4ebc-bf03-117fdd8f9c64,0.074644,0,1


In [92]:
sa_stack_scorefpd30.head()

Unnamed: 0,disbursementdate,digitalLoanAccountId,sa_stack_score,ln_fpd30_flag,ln_mature_fpd30_flag
0,2023-06-01 10:40:18,ac002041-b0be-446e-b173-65effe340651,0.122612,0,1
1,2023-06-01 10:42:34,f91cba31-7630-477a-8cdd-d010b63c4900,0.020992,0,1
2,2023-06-01 12:17:20,4fddb4fa-6e60-42b0-adec-295658bd1093,0.043172,0,1
3,2023-06-01 14:43:46,4e78ebf3-256e-426c-8840-a7f9e1403a97,0.076677,0,1
4,2023-06-01 15:36:13,a3dc185d-0118-4ebc-bf03-117fdd8f9c64,0.074644,0,1


In [93]:
gini_results = calculate_periodic_gini(sa_stack_scorefpd30, 'sa_stack_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

The shape of dataframe after copy is:	(101, 7)


array(['start_date', 'end_date', 'sa_stack_score_FPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [94]:
M2FPD30.describe()

Unnamed: 0,start_date,end_date,sa_stack_score_FPD30_gini
count,101,101,101.0
mean,2024-03-06 09:16:02.376237568,2024-03-16 19:29:06.534653440,0.396827
min,2023-05-29 00:00:00,2023-06-04 00:00:00,-0.012048
25%,2023-10-16 00:00:00,2023-10-29 00:00:00,0.358263
50%,2024-03-04 00:00:00,2024-03-17 00:00:00,0.386505
75%,2024-07-29 00:00:00,2024-08-04 00:00:00,0.454364
max,2024-12-16 00:00:00,2024-12-31 00:00:00,0.633609
std,,,0.09377


In [95]:
M2FPD30.head()

Unnamed: 0,start_date,end_date,sa_stack_score_FPD30_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.549091,Week,sa_stack_score,1.1.0,FPD30
1,2023-06-01,2023-06-30,0.353838,Month,sa_stack_score,1.1.0,FPD30
2,2023-06-05,2023-06-11,0.424444,Week,sa_stack_score,1.1.0,FPD30
3,2023-06-12,2023-06-18,0.633609,Week,sa_stack_score,1.1.0,FPD30
4,2023-06-19,2023-06-25,-0.012048,Week,sa_stack_score,1.1.0,FPD30


## FSTPD30

In [96]:
# sa_stack_score

sq = """
with sa_stack_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    sa_stack_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    sa_stack_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from sa_stack_score;
"""

sa_stack_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

sa_stack_scorefspd30.head()

Job ID ef317cdd-6a6e-44cd-bda0-00352ad4e818 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,sa_stack_score,ln_fspd30_flag,ln_mature_fspd30_flag
0,2023-06-01 10:40:18,ac002041-b0be-446e-b173-65effe340651,0.122612,0,1
1,2023-06-01 10:42:34,f91cba31-7630-477a-8cdd-d010b63c4900,0.020992,0,1
2,2023-06-01 12:17:20,4fddb4fa-6e60-42b0-adec-295658bd1093,0.043172,0,1
3,2023-06-01 14:43:46,4e78ebf3-256e-426c-8840-a7f9e1403a97,0.076677,0,1
4,2023-06-01 15:36:13,a3dc185d-0118-4ebc-bf03-117fdd8f9c64,0.074644,0,1


In [97]:
gini_results = calculate_periodic_gini(sa_stack_scorefspd30, 'sa_stack_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
M3FSPD30.columns.values

The shape of dataframe after copy is:	(96, 7)


array(['start_date', 'end_date', 'sa_stack_score_FSPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [98]:
M3FSPD30.tail()

Unnamed: 0,start_date,end_date,sa_stack_score_FSPD30_gini,period,Model_Name,version,bad_rate
91,2024-10-28,2024-11-03,0.365627,Week,sa_stack_score,1.1.0,FSPD30
92,2024-11-01,2024-11-30,0.368638,Month,sa_stack_score,1.1.0,FSPD30
93,2024-11-04,2024-11-10,0.345465,Week,sa_stack_score,1.1.0,FSPD30
94,2024-11-11,2024-11-17,0.373365,Week,sa_stack_score,1.1.0,FSPD30
95,2024-11-18,2024-11-24,0.355703,Week,sa_stack_score,1.1.0,FSPD30


## FSTPD30

In [99]:
# sa_stack_score

sq = """
with sa_stack_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    sa_stack_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    sa_stack_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from sa_stack_score;
"""

sa_stack_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

sa_stack_scorefstpd30.head()

Job ID acf162c4-c9d0-46a6-a6ec-3f84ad96230a successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,sa_stack_score,ln_fstpd30_flag,ln_mature_fstpd30_flag
0,2023-06-01 11:02:18,7e7e7678-b36d-4b26-ad01-272565b54780,0.046504,0,1
1,2023-06-01 12:36:51,855bccd7-f3d4-461f-9748-bb50b07e0327,0.126367,0,1
2,2023-06-01 12:56:23,e585b35d-bc13-494d-970d-2272d976991b,0.038083,0,1
3,2023-06-01 14:57:43,ac518432-9362-4014-86d4-565c0125b27b,0.072716,0,1
4,2023-06-01 15:52:56,b11e0b23-1b34-4d8c-984d-1239a7b389c1,0.023263,0,1


In [100]:
sa_stack_scorefstpd30.tail()

Unnamed: 0,disbursementdate,digitalLoanAccountId,sa_stack_score,ln_fstpd30_flag,ln_mature_fstpd30_flag
122234,2024-09-10 14:12:34,56658481-bec0-4b00-8cb2-ee74f8bb8264,0.064016,0,1
122235,2024-06-16 12:44:25,12b10be4-53ce-4351-891e-4ec475e5048e,0.070799,0,1
122236,2024-02-25 12:50:09,c9e4ddc8-66b2-41f6-b2e4-8b2b745b9dad,0.01483,0,1
122237,2023-08-01 19:11:47,631755c2-727c-4b21-af6a-c081a668920e,0.102613,0,1
122238,2023-11-02 13:47:25,f86e4525-0762-454a-a929-84a2cc9d09a6,0.022795,0,1


In [101]:
gini_results = calculate_periodic_gini(sa_stack_scorefstpd30, 'sa_stack_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
M4FSTPD30.columns.values

The shape of dataframe after copy is:	(90, 7)


array(['start_date', 'end_date', 'sa_stack_score_FSTPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

## Combining the dataframes

In [102]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'sa_stack_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'sa_stack_score_FPD30_gini',
       'sa_stack_score_FSPD30_gini', 'sa_stack_score_FSTPD30_gini'],
      dtype=object)

In [103]:
final_df = final_df[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'sa_stack_score_FPD10_gini', 'sa_stack_score_FPD30_gini',  'sa_stack_score_FSPD30_gini', 'sa_stack_score_FSTPD30_gini']].copy()

## Creating the table 

In [104]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_sa_stack_score;"""

client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=ab529c8f-998f-4624-994e-aeb28bd203f5>

In [105]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('sa_stack_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('sa_stack_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('sa_stack_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('sa_stack_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_sa_stack_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=76a97f2e-f499-4ba4-8fc4-f859abc769fc>

# gen_credo_score

## FPD10

In [106]:


sq = """
with gen_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    gen_credo_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    gen_credo_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from gen_credo_score;
"""

gen_credo_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

gen_credo_scorefpd10.head()

Job ID 76ea0e6b-ca54-4528-9ed5-ae86e04a2bb7 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,gen_credo_score,ln_fpd10_flag,ln_mature_fpd10_flag
0,2023-01-02 10:19:54,73902306-ce07-4625-9d1e-a611eeb83166,0.175299,0,1
1,2023-01-02 10:56:20,ac41223e-0d42-4569-b21f-789bce021291,0.072991,0,1
2,2023-01-02 11:39:36,d438dd4f-4c6c-44b5-ba83-58585ba8530a,0.150079,0,1
3,2023-01-02 12:04:44,44247f51-ac7a-4035-8d34-af7af2d9548c,0.099274,1,1
4,2023-01-02 12:44:11,f0a9e5c7-560d-46a3-9c4c-16d518d1a605,0.187076,0,1


In [107]:
gini_results = calculate_periodic_gini(gen_credo_scorefpd10, 'gen_credo_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

The shape of dataframe after copy is:	(131, 7)


array(['start_date', 'end_date', 'gen_credo_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [108]:
M1FPD10.head()

Unnamed: 0,start_date,end_date,gen_credo_score_FPD10_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.246725,Month,gen_credo_score,1.1.0,FPD10
1,2023-01-02,2023-01-08,0.301125,Week,gen_credo_score,1.1.0,FPD10
2,2023-01-09,2023-01-15,0.2734,Week,gen_credo_score,1.1.0,FPD10
3,2023-01-16,2023-01-22,0.667532,Week,gen_credo_score,1.1.0,FPD10
4,2023-01-23,2023-01-29,0.086505,Week,gen_credo_score,1.1.0,FPD10


## FPD30

In [109]:
sq = """
with gen_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    gen_credo_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    gen_credo_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from gen_credo_score;
"""

gen_credo_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

gen_credo_scorefpd30.head()

Job ID 1fd580da-630f-4fa1-9dc7-465ae4f2a795 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,gen_credo_score,ln_fpd30_flag,ln_mature_fpd30_flag
0,2023-01-02 12:13:00,5e85ce9b-fb16-48a4-9f62-c543782b54e1,0.152459,0,1
1,2023-01-02 12:28:35,92a04afd-ec2e-4941-a1be-0e3af52aab1f,0.074451,0,1
2,2023-01-02 12:53:05,6c53ccd6-5de3-4b2a-8216-caa1b1146cd6,0.100128,1,1
3,2023-01-02 13:19:37,ea076112-bc60-4a12-9463-37a9be7da3e2,0.169417,0,1
4,2023-01-02 13:46:09,cfb70c1e-bd39-4202-a376-fc003a6b231d,0.047018,0,1


In [110]:
gen_credo_scorefpd30.head()

Unnamed: 0,disbursementdate,digitalLoanAccountId,gen_credo_score,ln_fpd30_flag,ln_mature_fpd30_flag
0,2023-01-02 12:13:00,5e85ce9b-fb16-48a4-9f62-c543782b54e1,0.152459,0,1
1,2023-01-02 12:28:35,92a04afd-ec2e-4941-a1be-0e3af52aab1f,0.074451,0,1
2,2023-01-02 12:53:05,6c53ccd6-5de3-4b2a-8216-caa1b1146cd6,0.100128,1,1
3,2023-01-02 13:19:37,ea076112-bc60-4a12-9463-37a9be7da3e2,0.169417,0,1
4,2023-01-02 13:46:09,cfb70c1e-bd39-4202-a376-fc003a6b231d,0.047018,0,1


In [111]:
gini_results = calculate_periodic_gini(gen_credo_scorefpd30, 'gen_credo_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

The shape of dataframe after copy is:	(127, 7)


array(['start_date', 'end_date', 'gen_credo_score_FPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [112]:
M2FPD30.head()

Unnamed: 0,start_date,end_date,gen_credo_score_FPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.253005,Month,gen_credo_score,1.1.0,FPD30
1,2023-01-02,2023-01-08,0.342149,Week,gen_credo_score,1.1.0,FPD30
2,2023-01-09,2023-01-15,0.407163,Week,gen_credo_score,1.1.0,FPD30
3,2023-01-16,2023-01-22,0.667532,Week,gen_credo_score,1.1.0,FPD30
4,2023-01-23,2023-01-29,0.025547,Week,gen_credo_score,1.1.0,FPD30


## FSPD30

In [113]:
sq = """
with gen_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    gen_credo_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    gen_credo_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from gen_credo_score;
"""

gen_credo_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(gen_credo_scorefspd30.head(2))

gini_results = calculate_periodic_gini(gen_credo_scorefspd30, 'gen_credo_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

Job ID 0ccce1d3-f031-40ba-bbb5-ad6575857140 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  gen_credo_score  \
0 2023-01-02 10:19:54  73902306-ce07-4625-9d1e-a611eeb83166         0.175299   
1 2023-01-02 10:56:20  ac41223e-0d42-4569-b21f-789bce021291         0.072991   

   ln_fspd30_flag  ln_mature_fspd30_flag  
0               0                      1  
1               0                      1  
The shape of dataframe after copy is:	(122, 7)
['start_date' 'end_date' 'gen_credo_score_FSPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


Unnamed: 0,start_date,end_date,gen_credo_score_FSPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.172055,Month,gen_credo_score,1.1.0,FSPD30
1,2023-01-02,2023-01-08,0.262803,Week,gen_credo_score,1.1.0,FSPD30
2,2023-01-09,2023-01-15,0.189744,Week,gen_credo_score,1.1.0,FSPD30
3,2023-01-16,2023-01-22,0.410738,Week,gen_credo_score,1.1.0,FSPD30
4,2023-01-23,2023-01-29,0.073579,Week,gen_credo_score,1.1.0,FSPD30


## FSTPD30

In [114]:
sq = """
with gen_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    gen_credo_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    gen_credo_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from gen_credo_score;
"""

gen_credo_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(gen_credo_scorefstpd30.head(2))

gini_results = calculate_periodic_gini(gen_credo_scorefstpd30, 'gen_credo_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()

Job ID 235addc4-0b68-4279-8dfc-38033a38e394 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  gen_credo_score  \
0 2023-01-02 10:19:54  73902306-ce07-4625-9d1e-a611eeb83166         0.175299   
1 2023-01-02 10:56:20  ac41223e-0d42-4569-b21f-789bce021291         0.072991   

   ln_fstpd30_flag  ln_mature_fstpd30_flag  
0                0                       1  
1                0                       1  
The shape of dataframe after copy is:	(116, 7)
['start_date' 'end_date' 'gen_credo_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


Unnamed: 0,start_date,end_date,gen_credo_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.180837,Month,gen_credo_score,1.1.0,FSTPD30
1,2023-01-02,2023-01-08,0.282959,Week,gen_credo_score,1.1.0,FSTPD30
2,2023-01-09,2023-01-15,0.189908,Week,gen_credo_score,1.1.0,FSTPD30
3,2023-01-16,2023-01-22,0.19884,Week,gen_credo_score,1.1.0,FSTPD30
4,2023-01-23,2023-01-29,0.162845,Week,gen_credo_score,1.1.0,FSTPD30


## Combining tables 

In [115]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'gen_credo_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'gen_credo_score_FPD30_gini',
       'gen_credo_score_FSPD30_gini', 'gen_credo_score_FSTPD30_gini'],
      dtype=object)

In [116]:
final_df = final_df[['start_date', 'end_date','period',
       'Model_Name', 'version', 'bad_rate', 'gen_credo_score_FPD10_gini','gen_credo_score_FPD30_gini',   'gen_credo_score_FSPD30_gini', 'gen_credo_score_FSTPD30_gini']].copy()

In [117]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_gen_credo_score;"""

client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=1e2e4552-7c95-46ac-b858-c58802a648e1>

In [118]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('gen_credo_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('gen_credo_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('gen_credo_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('gen_credo_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_gen_credo_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=e6933728-87a8-432d-a10f-bd1323896bc2>

# c_credo_score

In [119]:
sq = """
with c_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    c_credo_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    c_credo_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from c_credo_score;
"""

c_credo_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

c_credo_scorefpd10.head()

gini_results = calculate_periodic_gini(c_credo_scorefpd10, 'c_credo_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

# FPD30

sq = """
with c_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    c_credo_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    c_credo_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from c_credo_score;
"""

c_credo_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

c_credo_scorefpd30.head()

gini_results = calculate_periodic_gini(c_credo_scorefpd30, 'c_credo_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

sq = """
with c_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    c_credo_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    c_credo_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from c_credo_score;
"""

c_credo_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(c_credo_scorefspd30.head(2))

gini_results = calculate_periodic_gini(c_credo_scorefspd30, 'c_credo_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

sq = """
with c_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    c_credo_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    c_credo_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from c_credo_score;
"""

c_credo_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(c_credo_scorefstpd30.head(2))

gini_results = calculate_periodic_gini(c_credo_scorefstpd30, 'c_credo_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()



Job ID d05b1820-85aa-428d-9e56-d5e7c7055ba1 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(131, 7)
Job ID d092f0c9-30aa-4181-bdcd-0c70511bc205 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(127, 7)
Job ID 3aa61840-7649-488b-b9bd-3159d1287129 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  c_credo_score  \
0 2023-01-02 12:13:00  5e85ce9b-fb16-48a4-9f62-c543782b54e1       0.219463   
1 2023-01-02 12:28:35  92a04afd-ec2e-4941-a1be-0e3af52aab1f       0.281711   

   ln_fspd30_flag  ln_mature_fspd30_flag  
0               0                      1  
1               0                      1  
The shape of dataframe after copy is:	(122, 7)
['start_date' 'end_date' 'c_credo_score_FSPD30_gini' 'period' 'Model_Name'
 'version' 'bad_rate']
Job ID e1c6ce22-071a-4699-bc7c-0b2d8bb22d9f successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  c_credo_score  \
0 2023-01-02 10:19:54  73902306-ce07-4625-9d1e-a611eeb83166       0.198732   
1 2023-01-02 10:56:20  ac41223e-0d42-4569-b21f-789bce021291       0.205840   

   ln_fstpd30_flag  ln_mature_fstpd30_flag  
0                0                       1  
1                0                       1  
The shape of dataframe after copy is:	(116, 7)
['start_date' 'end_date' 'c_credo_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


Unnamed: 0,start_date,end_date,c_credo_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.035948,Month,c_credo_score,1.1.0,FSTPD30
1,2023-01-02,2023-01-08,0.036009,Week,c_credo_score,1.1.0,FSTPD30
2,2023-01-09,2023-01-15,0.062143,Week,c_credo_score,1.1.0,FSTPD30
3,2023-01-16,2023-01-22,0.275891,Week,c_credo_score,1.1.0,FSTPD30
4,2023-01-23,2023-01-29,-0.050558,Week,c_credo_score,1.1.0,FSTPD30


In [120]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'c_credo_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'c_credo_score_FPD30_gini',
       'c_credo_score_FSPD30_gini', 'c_credo_score_FSTPD30_gini'],
      dtype=object)

In [121]:
final_df = final_df[['start_date', 'end_date','period',    'Model_Name', 'version', 'bad_rate','c_credo_score_FPD10_gini', 'c_credo_score_FPD30_gini',    'c_credo_score_FSPD30_gini', 'c_credo_score_FSTPD30_gini']].copy()

In [122]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_c_credo_score;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=a0a288cc-e72b-4c82-9872-1ddeb115ce69>

In [123]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('c_credo_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('c_credo_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('c_credo_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('c_credo_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_c_credo_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=320030b9-eaf7-4844-869b-5803603ebae8>

# s_credo_score

In [124]:


sq = """
with s_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    s_credo_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    s_credo_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from s_credo_score;
"""

s_credo_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

s_credo_scorefpd10.head()

gini_results = calculate_periodic_gini(s_credo_scorefpd10, 's_credo_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

# FPD30

sq = """
with s_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    s_credo_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    s_credo_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from s_credo_score;
"""

s_credo_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

s_credo_scorefpd30.head()

gini_results = calculate_periodic_gini(s_credo_scorefpd30, 's_credo_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

sq = """
with s_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    s_credo_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    s_credo_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from s_credo_score;
"""

s_credo_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(s_credo_scorefspd30.head(2))

gini_results = calculate_periodic_gini(s_credo_scorefspd30, 's_credo_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

sq = """
with s_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    s_credo_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    s_credo_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from s_credo_score;
"""

s_credo_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(s_credo_scorefstpd30.head(2))

gini_results = calculate_periodic_gini(s_credo_scorefstpd30, 's_credo_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()



Job ID f2ae9f3f-aa46-4116-8d06-89f4ce68a901 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(131, 7)
Job ID e0de8efa-a6fa-4dc0-af7f-9d9a45677fbf successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(127, 7)
Job ID 4ab6e394-6cc7-4dcc-b2d9-1c3e4d4f49f1 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  s_credo_score  \
0 2023-01-02 10:19:54  73902306-ce07-4625-9d1e-a611eeb83166       0.151148   
1 2023-01-02 10:56:20  ac41223e-0d42-4569-b21f-789bce021291       0.093295   

   ln_fspd30_flag  ln_mature_fspd30_flag  
0               0                      1  
1               0                      1  
The shape of dataframe after copy is:	(122, 7)
['start_date' 'end_date' 's_credo_score_FSPD30_gini' 'period' 'Model_Name'
 'version' 'bad_rate']
Job ID 3c2fba14-5d44-4179-b217-a146b1757902 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  s_credo_score  \
0 2023-01-02 12:13:00  5e85ce9b-fb16-48a4-9f62-c543782b54e1       0.113832   
1 2023-01-02 12:28:35  92a04afd-ec2e-4941-a1be-0e3af52aab1f       0.073284   

   ln_fstpd30_flag  ln_mature_fstpd30_flag  
0                0                       1  
1                0                       1  
The shape of dataframe after copy is:	(116, 7)
['start_date' 'end_date' 's_credo_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


Unnamed: 0,start_date,end_date,s_credo_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.219086,Month,s_credo_score,1.1.0,FSTPD30
1,2023-01-02,2023-01-08,0.261708,Week,s_credo_score,1.1.0,FSTPD30
2,2023-01-09,2023-01-15,0.251056,Week,s_credo_score,1.1.0,FSTPD30
3,2023-01-16,2023-01-22,0.275062,Week,s_credo_score,1.1.0,FSTPD30
4,2023-01-23,2023-01-29,0.157554,Week,s_credo_score,1.1.0,FSTPD30


## Combining data

In [125]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 's_credo_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 's_credo_score_FPD30_gini',
       's_credo_score_FSPD30_gini', 's_credo_score_FSTPD30_gini'],
      dtype=object)

In [126]:
final_df = final_df[['start_date', 'end_date','period',    'Model_Name', 'version', 'bad_rate','s_credo_score_FPD10_gini', 's_credo_score_FPD30_gini',    's_credo_score_FSPD30_gini', 's_credo_score_FSTPD30_gini']].copy()

In [127]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_s_credo_score;"""

client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=e6953855-c9d3-4d8f-a28d-ca88f44bce2e>

In [128]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('s_credo_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('s_credo_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('s_credo_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('s_credo_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_s_credo_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=01153609-3a9b-46a2-a5cd-3b699438c22a>

In [129]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,s_credo_score_FPD10_gini,s_credo_score_FPD30_gini,s_credo_score_FSPD30_gini,s_credo_score_FSTPD30_gini
0,2023-01-01,2023-01-31,Month,s_credo_score,1.1.0,FPD10,0.230181,,,
1,2023-01-02,2023-01-08,Week,s_credo_score,1.1.0,FPD10,0.248978,,,
2,2023-01-09,2023-01-15,Week,s_credo_score,1.1.0,FPD10,0.296337,,,
3,2023-01-16,2023-01-22,Week,s_credo_score,1.1.0,FPD10,0.52987,,,
4,2023-01-23,2023-01-29,Week,s_credo_score,1.1.0,FPD10,0.079585,,,


# fu_credo_score

In [130]:


sq = """
with fu_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    fu_credo_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    fu_credo_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from fu_credo_score;
"""

fu_credo_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

fu_credo_scorefpd10.head()

gini_results = calculate_periodic_gini(fu_credo_scorefpd10, 'fu_credo_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

# FPD30

sq = """
with fu_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    fu_credo_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    fu_credo_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from fu_credo_score;
"""

fu_credo_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

fu_credo_scorefpd30.head()

gini_results = calculate_periodic_gini(fu_credo_scorefpd30, 'fu_credo_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

sq = """
with fu_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    fu_credo_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    fu_credo_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from fu_credo_score;
"""

fu_credo_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(fu_credo_scorefspd30.head(2))

gini_results = calculate_periodic_gini(fu_credo_scorefspd30, 'fu_credo_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

sq = """
with fu_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    fu_credo_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    fu_credo_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from fu_credo_score;
"""

fu_credo_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(fu_credo_scorefstpd30.head(2))

gini_results = calculate_periodic_gini(fu_credo_scorefstpd30, 'fu_credo_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()



Job ID 833dc06a-66ed-415c-9cf1-f27600c4d59f successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(131, 7)
Job ID 62c1ef3b-1ac4-47d8-870e-621a275c6311 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(127, 7)
Job ID fa04f87c-2248-42e4-a0fa-4dd1bc8d2588 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  fu_credo_score  \
0 2023-01-02 10:19:54  73902306-ce07-4625-9d1e-a611eeb83166        0.096835   
1 2023-01-02 10:56:20  ac41223e-0d42-4569-b21f-789bce021291        0.070143   

   ln_fspd30_flag  ln_mature_fspd30_flag  
0               0                      1  
1               0                      1  
The shape of dataframe after copy is:	(122, 7)
['start_date' 'end_date' 'fu_credo_score_FSPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']
Job ID 72f56254-3762-41a5-9cba-a454146c2d04 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  fu_credo_score  \
0 2023-01-02 10:19:54  73902306-ce07-4625-9d1e-a611eeb83166        0.096835   
1 2023-01-02 10:56:20  ac41223e-0d42-4569-b21f-789bce021291        0.070143   

   ln_fstpd30_flag  ln_mature_fstpd30_flag  
0                0                       1  
1                0                       1  
The shape of dataframe after copy is:	(116, 7)
['start_date' 'end_date' 'fu_credo_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


Unnamed: 0,start_date,end_date,fu_credo_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.07286,Month,fu_credo_score,1.1.0,FSTPD30
1,2023-01-02,2023-01-08,0.002165,Week,fu_credo_score,1.1.0,FSTPD30
2,2023-01-09,2023-01-15,0.056674,Week,fu_credo_score,1.1.0,FSTPD30
3,2023-01-16,2023-01-22,0.326429,Week,fu_credo_score,1.1.0,FSTPD30
4,2023-01-23,2023-01-29,0.109935,Week,fu_credo_score,1.1.0,FSTPD30


## Combining data

In [131]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'fu_credo_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'fu_credo_score_FPD30_gini',
       'fu_credo_score_FSPD30_gini', 'fu_credo_score_FSTPD30_gini'],
      dtype=object)

In [132]:
final_df = final_df[['start_date', 'end_date','period',    'Model_Name', 'version', 'bad_rate','fu_credo_score_FPD10_gini', 'fu_credo_score_FPD30_gini',    'fu_credo_score_FSPD30_gini', 'fu_credo_score_FSTPD30_gini']].copy()

In [133]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_fu_credo_score"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=36a4586a-a75f-486e-bdac-4f094edd8555>

In [134]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('fu_credo_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('fu_credo_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('fu_credo_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('fu_credo_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_fu_credo_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=689ef1f4-acf6-4774-a52f-e1a3cbc6da1f>

In [135]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,fu_credo_score_FPD10_gini,fu_credo_score_FPD30_gini,fu_credo_score_FSPD30_gini,fu_credo_score_FSTPD30_gini
0,2023-01-01,2023-01-31,Month,fu_credo_score,1.1.0,FPD10,0.023608,,,
1,2023-01-02,2023-01-08,Week,fu_credo_score,1.1.0,FPD10,-0.032464,,,
2,2023-01-09,2023-01-15,Week,fu_credo_score,1.1.0,FPD10,0.016278,,,
3,2023-01-16,2023-01-22,Week,fu_credo_score,1.1.0,FPD10,0.153247,,,
4,2023-01-23,2023-01-29,Week,fu_credo_score,1.1.0,FPD10,0.120242,,,


# r_credo_score

In [136]:


sq = """
with r_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    r_credo_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    r_credo_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from r_credo_score;
"""

r_credo_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

r_credo_scorefpd10.head()

gini_results = calculate_periodic_gini(r_credo_scorefpd10, 'r_credo_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

# FPD30

sq = """
with r_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    r_credo_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    r_credo_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from r_credo_score;
"""

r_credo_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

r_credo_scorefpd30.head()

gini_results = calculate_periodic_gini(r_credo_scorefpd30, 'r_credo_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

sq = """
with r_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    r_credo_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    r_credo_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from r_credo_score;
"""

r_credo_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(r_credo_scorefspd30.head(2))

gini_results = calculate_periodic_gini(r_credo_scorefspd30, 'r_credo_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

sq = """
with r_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    r_credo_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    r_credo_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from r_credo_score;
"""

r_credo_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(r_credo_scorefstpd30.head(2))

gini_results = calculate_periodic_gini(r_credo_scorefstpd30, 'r_credo_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()



Job ID f6bbd0f5-8cf4-48e9-b505-0747290fa958 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(131, 7)
Job ID 7eb4fd75-9f60-4476-a5d8-24f08b006dbd successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(127, 7)
Job ID 371fe1b2-37fc-4ad3-956c-1c37f8195eee successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  r_credo_score  \
0 2023-01-02 12:13:00  5e85ce9b-fb16-48a4-9f62-c543782b54e1       0.344378   
1 2023-01-02 12:28:35  92a04afd-ec2e-4941-a1be-0e3af52aab1f       0.112691   

   ln_fspd30_flag  ln_mature_fspd30_flag  
0               0                      1  
1               0                      1  
The shape of dataframe after copy is:	(122, 7)
['start_date' 'end_date' 'r_credo_score_FSPD30_gini' 'period' 'Model_Name'
 'version' 'bad_rate']
Job ID 4c860fa7-fbfb-4baf-92f2-a4ed9e19a73d successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  r_credo_score  \
0 2023-01-02 10:19:54  73902306-ce07-4625-9d1e-a611eeb83166       0.276277   
1 2023-01-02 10:56:20  ac41223e-0d42-4569-b21f-789bce021291       0.328053   

   ln_fstpd30_flag  ln_mature_fstpd30_flag  
0                0                       1  
1                0                       1  
The shape of dataframe after copy is:	(116, 7)
['start_date' 'end_date' 'r_credo_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


Unnamed: 0,start_date,end_date,r_credo_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.048984,Month,r_credo_score,1.1.0,FSTPD30
1,2023-01-02,2023-01-08,0.014955,Week,r_credo_score,1.1.0,FSTPD30
2,2023-01-09,2023-01-15,0.193885,Week,r_credo_score,1.1.0,FSTPD30
3,2023-01-16,2023-01-22,-0.019056,Week,r_credo_score,1.1.0,FSTPD30
4,2023-01-23,2023-01-29,0.024691,Week,r_credo_score,1.1.0,FSTPD30


## Combining data

In [137]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'r_credo_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'r_credo_score_FPD30_gini',
       'r_credo_score_FSPD30_gini', 'r_credo_score_FSTPD30_gini'],
      dtype=object)

In [138]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','r_credo_score_FPD10_gini','r_credo_score_FPD30_gini',    'r_credo_score_FSPD30_gini', 'r_credo_score_FSTPD30_gini']].copy()

In [139]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_r_credo_score;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=b3e4f38a-5e2b-40f2-b42a-027c4501a5cd>

In [140]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('r_credo_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('r_credo_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('r_credo_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('r_credo_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_r_credo_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=750fbf99-705c-454f-a925-fcd04572dc31>

# old_gen_credo_score

In [141]:


sq = """
with old_gen_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_gen_credo_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    old_gen_credo_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from old_gen_credo_score;
"""

old_gen_credo_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

old_gen_credo_scorefpd10.head()

gini_results = calculate_periodic_gini_threedigit(old_gen_credo_scorefpd10, 'old_gen_credo_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

# FPD30

sq = """
with old_gen_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_gen_credo_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    old_gen_credo_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from old_gen_credo_score;
"""

old_gen_credo_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

old_gen_credo_scorefpd30.head()

gini_results = calculate_periodic_gini_threedigit(old_gen_credo_scorefpd30, 'old_gen_credo_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

sq = """
with old_gen_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_gen_credo_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    old_gen_credo_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from old_gen_credo_score;
"""

old_gen_credo_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(old_gen_credo_scorefspd30.head(2))

gini_results = calculate_periodic_gini_threedigit(old_gen_credo_scorefspd30, 'old_gen_credo_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

sq = """
with old_gen_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_gen_credo_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    old_gen_credo_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from old_gen_credo_score;
"""

old_gen_credo_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(old_gen_credo_scorefstpd30.head(2))

gini_results = calculate_periodic_gini_threedigit(old_gen_credo_scorefstpd30, 'old_gen_credo_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()



Job ID 3db1853b-d77b-4082-8e3a-a1115c81c667 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(131, 7)
Job ID b9defdd9-74a5-40b5-9771-45a5b03f3d97 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(127, 7)
Job ID 0649cf0e-1888-4363-927c-c4a27870a4fc successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  \
0 2023-01-02 12:53:05  6c53ccd6-5de3-4b2a-8216-caa1b1146cd6   
1 2023-01-02 13:19:37  ea076112-bc60-4a12-9463-37a9be7da3e2   

  old_gen_credo_score  ln_fspd30_flag  ln_mature_fspd30_flag  
0               481.0               1                      1  
1               500.0               0                      1  
The shape of dataframe after copy is:	(122, 7)
['start_date' 'end_date' 'old_gen_credo_score_FSPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']
Job ID 57363b82-4e8c-47e5-911b-1629469db2eb successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  \
0 2023-01-02 12:04:44  44247f51-ac7a-4035-8d34-af7af2d9548c   
1 2023-01-02 12:44:11  f0a9e5c7-560d-46a3-9c4c-16d518d1a605   

  old_gen_credo_score  ln_fstpd30_flag  ln_mature_fstpd30_flag  
0               498.0                1                       1  
1               510.0                0                       1  
The shape of dataframe after copy is:	(116, 7)
['start_date' 'end_date' 'old_gen_credo_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


Unnamed: 0,start_date,end_date,old_gen_credo_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.122039,Month,old_gen_credo_score,1.1.0,FSTPD30
1,2023-01-02,2023-01-08,0.230849,Week,old_gen_credo_score,1.1.0,FSTPD30
2,2023-01-09,2023-01-15,0.143413,Week,old_gen_credo_score,1.1.0,FSTPD30
3,2023-01-16,2023-01-22,0.132488,Week,old_gen_credo_score,1.1.0,FSTPD30
4,2023-01-23,2023-01-29,-0.00372,Week,old_gen_credo_score,1.1.0,FSTPD30


## Combining data

In [142]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'old_gen_credo_score_FPD10_gini',
       'period', 'Model_Name', 'version', 'bad_rate',
       'old_gen_credo_score_FPD30_gini',
       'old_gen_credo_score_FSPD30_gini',
       'old_gen_credo_score_FSTPD30_gini'], dtype=object)

In [143]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','old_gen_credo_score_FPD10_gini','old_gen_credo_score_FPD30_gini',    'old_gen_credo_score_FSPD30_gini', 'old_gen_credo_score_FSTPD30_gini']].copy()

In [144]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_old_gen_credo_score;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=802cc6c4-002d-4245-a9e8-0bdb6848c545>

In [145]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('old_gen_credo_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('old_gen_credo_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('old_gen_credo_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('old_gen_credo_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_old_gen_credo_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=d4e1de2e-53c3-4d80-bfe7-7d93ea2bfab3>

In [146]:
sq = """select * from prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_old_gen_credo_score;"""

df = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID a1413798-06f2-4775-9b66-df58c8d26f70 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [147]:
df['bad_rate'].value_counts()

bad_rate
FPD10      131
FPD30      127
FSPD30     122
FSTPD30    116
Name: count, dtype: int64

# old_cic_score

In [148]:


sq = """
with old_cic_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_cic_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    old_cic_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from old_cic_score;
"""

old_cic_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

old_cic_scorefpd10.head()

gini_results = calculate_periodic_hybrid_gini(old_cic_scorefpd10, 'old_cic_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

# FPD30

sq = """
with old_cic_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_cic_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    old_cic_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from old_cic_score;
"""

old_cic_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

old_cic_scorefpd30.head()

gini_results = calculate_periodic_hybrid_gini(old_cic_scorefpd30, 'old_cic_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

sq = """
with old_cic_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_cic_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    old_cic_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from old_cic_score;
"""

old_cic_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(old_cic_scorefspd30.head(2))

gini_results = calculate_periodic_hybrid_gini(old_cic_scorefspd30, 'old_cic_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

sq = """
with old_cic_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_cic_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    old_cic_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from old_cic_score;
"""

old_cic_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(old_cic_scorefstpd30.head(2))

gini_results = calculate_periodic_hybrid_gini(old_cic_scorefstpd30, 'old_cic_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()



Job ID c590b88c-8c4b-4860-8c34-bfb7d8c8fac0 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(91, 7)
Job ID cbcb7a1b-80f1-4817-ba77-281c86b0155a successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(87, 7)
Job ID 17079e08-7de3-4145-820e-0b10f8005071 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId old_cic_score  \
0 2023-08-23 10:30:20  799382a3-b224-48b5-ad0b-ac075c638651       484.000   
1 2023-08-23 20:04:02  9c7f4086-33c8-4b8f-b1b1-f95cc0b104b2       480.000   

   ln_fspd30_flag  ln_mature_fspd30_flag  
0               1                      1  
1               1                      1  
The shape of dataframe after copy is:	(82, 7)
['start_date' 'end_date' 'old_cic_score_FSPD30_gini' 'period' 'Model_Name'
 'version' 'bad_rate']
Job ID 96264287-b4d1-42fa-94d2-c0afdb54e128 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId old_cic_score  \
0 2023-08-23 15:36:50  c8e6977b-a5ca-48b0-9ef0-e6e9581a7229       484.000   
1 2023-08-24 17:04:19  5c2c2e8b-7c9e-46d0-9216-990d9dc3aad7       484.000   

   ln_fstpd30_flag  ln_mature_fstpd30_flag  
0                1                       1  
1                0                       1  
The shape of dataframe after copy is:	(76, 7)
['start_date' 'end_date' 'old_cic_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


Unnamed: 0,start_date,end_date,old_cic_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-08-01,2023-08-31,0.148362,Month,old_cic_score,1.1.0,FSTPD30
1,2023-08-21,2023-08-27,0.071724,Week,old_cic_score,1.1.0,FSTPD30
2,2023-08-28,2023-09-03,0.173757,Week,old_cic_score,1.1.0,FSTPD30
3,2023-09-01,2023-09-30,0.232097,Month,old_cic_score,1.1.0,FSTPD30
4,2023-09-04,2023-09-10,0.247475,Week,old_cic_score,1.1.0,FSTPD30


## Combining data

In [149]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'old_cic_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'old_cic_score_FPD30_gini',
       'old_cic_score_FSPD30_gini', 'old_cic_score_FSTPD30_gini'],
      dtype=object)

In [150]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','old_cic_score_FPD10_gini','old_cic_score_FPD30_gini',    'old_cic_score_FSPD30_gini', 'old_cic_score_FSTPD30_gini']].copy()

In [151]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_old_cic_score;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=972a1b0f-fbc5-4d16-abda-eaa1ad0329ad>

In [152]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('old_cic_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('old_cic_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('old_cic_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('old_cic_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_old_cic_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=9975b9d6-7826-404e-a732-51d5b09951db>

# old_demo_score

In [153]:


sq = """
with old_demo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_demo_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    old_demo_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from old_demo_score;
"""

old_demo_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

old_demo_scorefpd10.head()

gini_results = calculate_periodic_hybrid_gini(old_demo_scorefpd10, 'old_demo_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

# FPD30

sq = """
with old_demo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_demo_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    old_demo_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from old_demo_score;
"""

old_demo_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

old_demo_scorefpd30.head()

gini_results = calculate_periodic_hybrid_gini(old_demo_scorefpd30, 'old_demo_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

sq = """
with old_demo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_demo_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    old_demo_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from old_demo_score;
"""

old_demo_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(old_demo_scorefspd30.head(2))

gini_results = calculate_periodic_hybrid_gini(old_demo_scorefspd30, 'old_demo_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

sq = """
with old_demo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_demo_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    old_demo_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from old_demo_score;
"""

old_demo_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(old_demo_scorefstpd30.head(2))

gini_results = calculate_periodic_hybrid_gini(old_demo_scorefstpd30, 'old_demo_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()



Job ID cb094ed4-5cf9-4e37-9ff6-e44094a20828 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(131, 7)
Job ID 87c16b23-8b6c-46f6-9c80-35f75366efac successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(127, 7)
Job ID ce43cceb-6b75-473b-8f07-d45869ad025f successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  old_demo_score  \
0 2023-01-02 12:13:00  5e85ce9b-fb16-48a4-9f62-c543782b54e1           406.0   
1 2023-01-02 12:28:35  92a04afd-ec2e-4941-a1be-0e3af52aab1f           411.0   

   ln_fspd30_flag  ln_mature_fspd30_flag  
0               0                      1  
1               0                      1  
The shape of dataframe after copy is:	(122, 7)
['start_date' 'end_date' 'old_demo_score_FSPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']
Job ID 8d9af353-367b-46c7-ae2d-4cf02483b19b successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  old_demo_score  \
0 2023-01-02 12:13:00  5e85ce9b-fb16-48a4-9f62-c543782b54e1           406.0   
1 2023-01-02 12:28:35  92a04afd-ec2e-4941-a1be-0e3af52aab1f           411.0   

   ln_fstpd30_flag  ln_mature_fstpd30_flag  
0                0                       1  
1                0                       1  
The shape of dataframe after copy is:	(116, 7)
['start_date' 'end_date' 'old_demo_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


Unnamed: 0,start_date,end_date,old_demo_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.12324,Month,old_demo_score,1.1.0,FSTPD30
1,2023-01-02,2023-01-08,0.182605,Week,old_demo_score,1.1.0,FSTPD30
2,2023-01-09,2023-01-15,0.039523,Week,old_demo_score,1.1.0,FSTPD30
3,2023-01-16,2023-01-22,0.135874,Week,old_demo_score,1.1.0,FSTPD30
4,2023-01-23,2023-01-29,0.126396,Week,old_demo_score,1.1.0,FSTPD30


## Combining data

In [154]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'old_demo_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'old_demo_score_FPD30_gini',
       'old_demo_score_FSPD30_gini', 'old_demo_score_FSTPD30_gini'],
      dtype=object)

In [155]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','old_demo_score_FPD10_gini','old_demo_score_FPD30_gini',    'old_demo_score_FSPD30_gini', 'old_demo_score_FSTPD30_gini']].copy()

In [156]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_old_demo_score;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=4ad7801f-1e8a-438e-82ca-4e2839eb05ee>

In [157]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('old_demo_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('old_demo_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('old_demo_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('old_demo_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_old_demo_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=c18acac4-c706-4d7f-bc89-a7fc461a9896>

# bu_bureau_score

In [158]:
sq = """
with bu_bureau_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    bu_bureau_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    coalesce(bu_bureau_score, 0.0) > 0.0
  AND
    ln_mature_fpd10_flag = 1
)
select * from bu_bureau_score;
"""

bu_bureau_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

bu_bureau_scorefpd10.head()

gini_results = calculate_periodic_gini_threedigit(bu_bureau_scorefpd10, 'bu_bureau_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

# FPD30

sq = """
with bu_bureau_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    bu_bureau_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    coalesce(bu_bureau_score, 0.0) > 0.0
  AND
    ln_mature_fpd30_flag = 1
)
select * from bu_bureau_score;
"""

bu_bureau_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

bu_bureau_scorefpd30.head()

gini_results = calculate_periodic_gini_threedigit(bu_bureau_scorefpd30, 'bu_bureau_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

sq = """
with bu_bureau_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    bu_bureau_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    coalesce(bu_bureau_score, 0.0) > 0.0
  AND
    ln_mature_fspd30_flag = 1
)
select * from bu_bureau_score;
"""

bu_bureau_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(bu_bureau_scorefspd30.head(2))

gini_results = calculate_periodic_gini_threedigit(bu_bureau_scorefspd30, 'bu_bureau_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

sq = """
with bu_bureau_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    bu_bureau_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    coalesce(bu_bureau_score, 0.0) > 0.0
  AND
    ln_mature_fstpd30_flag = 1
)
select * from bu_bureau_score;
"""

bu_bureau_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(bu_bureau_scorefstpd30.head(2))

gini_results = calculate_periodic_gini_threedigit(bu_bureau_scorefstpd30, 'bu_bureau_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()



Job ID 49a25b27-b3b9-44e1-b5d2-db2a8c97aec9 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(92, 7)
Job ID ec30ad38-6a29-4995-8d84-5bc6ff5c97ab successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(92, 7)
Job ID 4e067dcc-4eaf-4cd6-acf3-83a6598ed65d successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  bu_bureau_score  \
0 2023-01-10 13:04:21  a9a82293-c02a-44b9-a901-433ed8867b91            369.0   
1 2023-01-10 16:49:53  f7506ebf-ba0b-40a4-b0bd-8ec91ea0ab96            339.0   

   ln_fspd30_flag  ln_mature_fspd30_flag  
0               0                      1  
1               1                      1  
The shape of dataframe after copy is:	(92, 7)
['start_date' 'end_date' 'bu_bureau_score_FSPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']
Job ID 4c238682-a209-49ec-a624-a66b2394fc3f successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  bu_bureau_score  \
0 2023-01-13 18:19:41  546fd78e-d4c0-4eb6-8667-00aa187b0ddd            401.0   
1 2023-01-14 14:49:17  483095b5-2399-444e-b547-aba3fe31a3e0            347.0   

   ln_fstpd30_flag  ln_mature_fstpd30_flag  
0                0                       1  
1                1                       1  
The shape of dataframe after copy is:	(92, 7)
['start_date' 'end_date' 'bu_bureau_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


Unnamed: 0,start_date,end_date,bu_bureau_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.029167,Month,bu_bureau_score,1.1.0,FSTPD30
1,2023-01-09,2023-01-15,-0.08,Week,bu_bureau_score,1.1.0,FSTPD30
2,2023-01-16,2023-01-22,-0.833333,Week,bu_bureau_score,1.1.0,FSTPD30
3,2023-01-23,2023-01-29,0.416667,Week,bu_bureau_score,1.1.0,FSTPD30
4,2023-01-30,2023-02-05,0.244444,Week,bu_bureau_score,1.1.0,FSTPD30


## Combining data

In [159]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'bu_bureau_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'bu_bureau_score_FPD30_gini',
       'bu_bureau_score_FSPD30_gini', 'bu_bureau_score_FSTPD30_gini'],
      dtype=object)

In [160]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','bu_bureau_score_FPD10_gini','bu_bureau_score_FPD30_gini',    'bu_bureau_score_FSPD30_gini', 'bu_bureau_score_FSTPD30_gini']].copy()

In [161]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_bu_bureau_score;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=c805060c-43d6-4682-8d21-0dba9bd18a5a>

In [162]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('bu_bureau_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('bu_bureau_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('bu_bureau_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('bu_bureau_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_bu_bureau_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=c0a4de31-5591-42ec-a995-b926f0eae46a>