# <center> Model Gini Calculation </center>

In [1]:
# %% [markdown]
# # Jupyter Notebook Loading Header
#
# This is a custom loading header for Jupyter Notebooks in Visual Studio Code.
# It includes common imports and settings to get you started quickly.

# %% [markdown]
## Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.cloud import bigquery
from google.cloud import storage
import os
path = r'C:\Users\DwaipayanChakroborti\AppData\Roaming\gcloud\legacy_credentials\dchakroborti@tonikbank.com\adc.json'
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = path
client = bigquery.Client(project='prj-prod-dataplatform')

from sklearn.metrics import roc_auc_score
from datetime import datetime, timedelta
# %% [markdown]
## Configure Settings
# Set options or configurations as needed
# Example: pd.set_option('display.max_columns', None)

# Function

## calculate_gini_for_threedigitscore

In [2]:
# def calculate_gini_for_threedigitscore(scores, labels):
#     """
#     Calculate Gini coefficient for three-digit scores and binary labels
    
#     Parameters:
#     scores: array-like, three-digit scores (higher is better)
#     labels: array-like, binary values (0 or 1, where 1 indicates default)
    
#     Returns:
#     float: Gini coefficient
#     """
#     # Combine scores and labels into a DataFrame
#     df = pd.DataFrame({'score': scores, 'label': labels})
    
#     # Sort by score in descending order (assuming higher score is better)
#     df = df.sort_values('score', ascending=False)
    
#     # Calculate cumulative values
#     total_pos = df['label'].sum()
#     total_neg = len(df) - total_pos
    
#     if total_pos == 0 or total_neg == 0:
#         return 0
    
#     # Calculate cumulative proportions
#     cum_pos = df['label'].cumsum()
#     cum_neg = np.arange(1, len(df) + 1) - cum_pos
    
#     # Convert to proportions
#     cum_pos_prop = cum_pos / total_pos
#     cum_neg_prop = cum_neg / total_neg
    
#     # Calculate Gini
#     gini = 1 - np.trapz(cum_pos_prop, cum_neg_prop)
    
#     return gini


## Modified one

def calculate_gini_for_threedigitscore(scores, labels):
    """
    Calculate Gini coefficient for three-digit scores and binary labels
    
    Parameters:
    scores: array-like, three-digit scores (higher is better)
    labels: array-like, binary values (0 or 1, where 1 indicates default)
    
    Returns:
    float: Gini coefficient
    """
    # Combine scores and labels into a DataFrame
    df = pd.DataFrame({'score': scores, 'label': labels})
    
    # Sort by score in descending order (assuming higher score means lower risk)
    # For default prediction, we want to sort scores in ascending order 
    # since higher default probability should correspond to higher risk
    df = df.sort_values('score', ascending=True)  # Changed to ascending=True
    
    # Calculate cumulative values
    total_pos = df['label'].sum()
    total_neg = len(df) - total_pos
    
    if total_pos == 0 or total_neg == 0:
        return 0
    
    # Calculate cumulative proportions
    cum_pos = df['label'].cumsum()
    cum_neg = np.arange(1, len(df) + 1) - cum_pos
    
    # Convert to proportions
    cum_pos_prop = cum_pos / total_pos
    cum_neg_prop = cum_neg / total_neg
    
    # Calculate area under curve
    auc = np.trapz(cum_pos_prop, cum_neg_prop)
    
    # Calculate Gini
    gini = 2 * auc - 1
    
    return gini

## calculate_gini

In [3]:
def calculate_gini(pd_scores, bad_indicators):
    """
    Calculate Gini coefficient from scores and binary indicators
    
    Parameters:
    pd_scores: array-like of scores/probabilities
    bad_indicators: array-like of binary outcomes (0/1)
    
    Returns:
    float: Gini coefficient
    """
    # Convert inputs to numpy arrays and ensure they're numeric
    pd_scores = np.array(pd_scores, dtype=float)
    bad_indicators = np.array(bad_indicators, dtype=int)
    
    # Check for valid input data
    if len(pd_scores) == 0 or len(bad_indicators) == 0:
        return np.nan
    
    # Check if we have both good and bad cases (needed for ROC AUC)
    if len(np.unique(bad_indicators)) < 2:
        return np.nan
    
    # Calculate AUC using sklearn
    try:
        auc = roc_auc_score(bad_indicators, pd_scores)
        # Calculate Gini from AUC
        gini = 2 * auc - 1
        return gini
    except ValueError:
        return np.nan

## calculate_hybrid_gini

In [4]:
# def calculate_hybrid_gini(scores, labels):
#     """
#     Calculate Gini coefficient handling both PD values and three-digit scores
    
#     Parameters:
#     scores: array-like, contains either PD values (0-1) or three-digit scores
#     labels: array-like, binary values (0 or 1, where 1 indicates default)
    
#     Returns:
#     float: Gini coefficient
#     """
#     # Convert inputs to numpy arrays
#     scores = np.array(scores, dtype=float)
#     labels = np.array(labels, dtype=int)
    
#     # Basic validation
#     if len(scores) == 0 or len(labels) == 0:
#         return np.nan
    
#     if len(np.unique(labels)) < 2:
#         return np.nan
        
#     # Determine if scores are PD values or three-digit scores
#     # PD values are between 0 and 1
#     is_pd = np.all((scores >= 0) & (scores <= 1))
    
#     if is_pd:
#         try:
#             auc = roc_auc_score(labels, scores)
#             gini = 2 * auc - 1
#             return gini
#         except ValueError:
#             return np.nan
#     else:
#         # Handle as three-digit score
#         df = pd.DataFrame({'score': scores, 'label': labels})
#         df = df.sort_values('score', ascending=False)
        
#         total_pos = df['label'].sum()
#         total_neg = len(df) - total_pos
        
#         if total_pos == 0 or total_neg == 0:
#             return np.nan
        
#         cum_pos = df['label'].cumsum()
#         cum_neg = np.arange(1, len(df) + 1) - cum_pos
        
#         cum_pos_prop = cum_pos / total_pos
#         cum_neg_prop = cum_neg / total_neg
        
#         gini = 1 - np.trapz(cum_pos_prop, cum_neg_prop)
#         return gini

## Modified one

def calculate_hybrid_gini(scores, labels):
    """
    Calculate Gini coefficient handling both PD values and three-digit scores
    
    Parameters:
    scores: array-like, contains either PD values (0-1) or three-digit scores
    labels: array-like, binary values (0 or 1, where 1 indicates default)
    
    Returns:
    float: Gini coefficient
    """
    # Convert inputs to numpy arrays
    scores = np.array(scores, dtype=float)
    labels = np.array(labels, dtype=int)
    
    # Basic validation
    if len(scores) == 0 or len(labels) == 0:
        return np.nan
    
    if len(np.unique(labels)) < 2:
        return np.nan
        
    # Determine if scores are PD values or three-digit scores
    # PD values are between 0 and 1
    is_pd = np.all((scores >= 0) & (scores <= 1))
    
    if is_pd:
        try:
            auc = roc_auc_score(labels, scores)
            gini = 2 * auc - 1
            return gini
        except ValueError:
            return np.nan
    else:
        # Handle as three-digit score
        df = pd.DataFrame({'score': scores, 'label': labels})
        # Sort by score in ascending order since higher score means higher risk
        df = df.sort_values('score', ascending=True)
        
        total_pos = df['label'].sum()
        total_neg = len(df) - total_pos
        
        if total_pos == 0 or total_neg == 0:
            return np.nan
        
        cum_pos = df['label'].cumsum()
        cum_neg = np.arange(1, len(df) + 1) - cum_pos
        
        cum_pos_prop = cum_pos / total_pos
        cum_neg_prop = cum_neg / total_neg
        
        # Calculate area under curve
        auc = np.trapz(cum_pos_prop, cum_neg_prop)
        
        # Calculate Gini using the same formula as PD values
        gini = 2 * auc - 1
        return gini

## calculate_periodic_gini_threedigit

In [5]:
# Main processing code
def calculate_periodic_gini_threedigit(df, score_column, label_column, namecolumn):
    """
    Calculate periodic Gini coefficients
    
    Parameters:
    df: DataFrame with disbursement dates and score/label columns
    score_column: name of the score column
    label_column: name of the label column
    """
    # Ensure date is datetime type
    df['disbursementdate'] = pd.to_datetime(df['disbursementdate'])
    
    # Calculate weekly Gini
    df['week'] = df['disbursementdate'].dt.to_period('W')
    weekly_gini = df.groupby('week').apply(
        lambda x: calculate_gini_for_threedigitscore(x[score_column], x[label_column])
    ).reset_index(name='gini')
    weekly_gini['period'] = 'Week'
    weekly_gini['start_date'] = weekly_gini['week'].apply(lambda x: x.to_timestamp())
    weekly_gini['end_date'] = weekly_gini['start_date'] + timedelta(days=6)
    weekly_gini = weekly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Calculate monthly Gini
    df['month'] = df['disbursementdate'].dt.to_period('M')
    monthly_gini = df.groupby('month').apply(
        lambda x: calculate_gini_for_threedigitscore(x[score_column], x[label_column])
    ).reset_index(name='gini')
    monthly_gini['period'] = 'Month'
    monthly_gini['start_date'] = monthly_gini['month'].apply(lambda x: x.to_timestamp())
    monthly_gini['end_date'] = monthly_gini['start_date'] + pd.DateOffset(months=1) - pd.Timedelta(days=1)
    monthly_gini = monthly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Combine and sort results
    gini_results = pd.concat([weekly_gini, monthly_gini])
    gini_results = gini_results.sort_values(by='start_date').reset_index(drop=True)
    
    # Add metadata columns
    gini_results['Model_Name'] = score_column
    gini_results['version'] = '1.1.0'
    gini_results['bad_rate'] = namecolumn
    gini_results.rename(columns={'gini': f'{score_column}_{namecolumn}_gini'}, inplace=True)
    
    return gini_results

## calculate_periodic_gini

In [6]:
def calculate_periodic_gini(df, score_column, label_column, namecolumn):
    """
    Calculate periodic Gini coefficients
    
    Parameters:
    df: DataFrame with disbursement dates and score/label columns
    score_column: name of the score column
    label_column: name of the label column
    """
    # Input validation
    required_columns = ['disbursementdate', score_column, label_column]
    if not all(col in df.columns for col in required_columns):
        raise ValueError(f"Missing required columns. Need: {required_columns}")
        
    # Create a copy to avoid modifying original dataframe
    df = df.copy()
    
    # Ensure date is datetime type
    df['disbursementdate'] = pd.to_datetime(df['disbursementdate'])
    
    # Ensure score and label columns are numeric
    df[score_column] = pd.to_numeric(df[score_column], errors='coerce')
    df[label_column] = pd.to_numeric(df[label_column], errors='coerce')
    
    # Drop rows with invalid values
    df = df.dropna(subset=[score_column, label_column])
    
    # Calculate weekly Gini
    df['week'] = df['disbursementdate'].dt.to_period('W')
    weekly_gini = df.groupby('week').apply(
        lambda x: calculate_gini(x[score_column], x[label_column])
        if len(x) >= 10 else np.nan  # Only calculate if we have enough samples
    ).reset_index(name='gini')
    weekly_gini['period'] = 'Week'
    weekly_gini['start_date'] = weekly_gini['week'].apply(lambda x: x.to_timestamp())
    weekly_gini['end_date'] = weekly_gini['start_date'] + timedelta(days=6)
    weekly_gini = weekly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Calculate monthly Gini
    df['month'] = df['disbursementdate'].dt.to_period('M')
    monthly_gini = df.groupby('month').apply(
        lambda x: calculate_gini(x[score_column], x[label_column])
        if len(x) >= 20 else np.nan  # Only calculate if we have enough samples
    ).reset_index(name='gini')
    monthly_gini['period'] = 'Month'
    monthly_gini['start_date'] = monthly_gini['month'].apply(lambda x: x.to_timestamp())
    monthly_gini['end_date'] = monthly_gini['start_date'] + pd.DateOffset(months=1) - pd.Timedelta(days=1)
    monthly_gini = monthly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Combine and sort results
    gini_results = pd.concat([weekly_gini, monthly_gini])
    gini_results = gini_results.sort_values(by='start_date').reset_index(drop=True)
    
    # Add metadata columns
    gini_results['Model_Name'] = score_column
    gini_results['version'] = '1.1.0'
    gini_results['bad_rate'] = namecolumn
    gini_results.rename(columns={'gini': f'{score_column}_{namecolumn}_gini'}, inplace=True)
    
    return gini_results

## calculate_periodic_hybrid_gini

In [7]:
def calculate_periodic_hybrid_gini(df, score_column, label_column, namecolumn):
    """
    Calculate periodic Gini coefficients for mixed score types
    
    Parameters:
    df: DataFrame with disbursement dates and score/label columns
    score_column: name of the score column
    label_column: name of the label column
    """
    # Input validation
    required_columns = ['disbursementdate', score_column, label_column]
    if not all(col in df.columns for col in required_columns):
        raise ValueError(f"Missing required columns. Need: {required_columns}")
        
    # Create a copy to avoid modifying original dataframe
    df = df.copy()
    
    # Ensure date is datetime type
    df['disbursementdate'] = pd.to_datetime(df['disbursementdate'])
    
    # Ensure score and label columns are numeric
    df[score_column] = pd.to_numeric(df[score_column], errors='coerce')
    df[label_column] = pd.to_numeric(df[label_column], errors='coerce')
    
    # Drop rows with invalid values
    df = df.dropna(subset=[score_column, label_column])
    
    # Calculate weekly Gini
    df['week'] = df['disbursementdate'].dt.to_period('W')
    weekly_gini = df.groupby('week').apply(
        lambda x: calculate_hybrid_gini(x[score_column], x[label_column])
        if len(x) >= 10 else np.nan  # Only calculate if we have enough samples
    ).reset_index(name='gini')
    weekly_gini['period'] = 'Week'
    weekly_gini['start_date'] = weekly_gini['week'].apply(lambda x: x.to_timestamp())
    weekly_gini['end_date'] = weekly_gini['start_date'] + pd.Timedelta(days=6)
    weekly_gini = weekly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Calculate monthly Gini
    df['month'] = df['disbursementdate'].dt.to_period('M')
    monthly_gini = df.groupby('month').apply(
        lambda x: calculate_hybrid_gini(x[score_column], x[label_column])
        if len(x) >= 20 else np.nan  # Only calculate if we have enough samples
    ).reset_index(name='gini')
    monthly_gini['period'] = 'Month'
    monthly_gini['start_date'] = monthly_gini['month'].apply(lambda x: x.to_timestamp())
    monthly_gini['end_date'] = monthly_gini['start_date'] + pd.DateOffset(months=1) - pd.Timedelta(days=1)
    monthly_gini = monthly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Combine and sort results
    gini_results = pd.concat([weekly_gini, monthly_gini])
    gini_results = gini_results.sort_values(by='start_date').reset_index(drop=True)
    
    # Add metadata columns
    gini_results['Model_Name'] = score_column
    gini_results['version'] = '1.1.0'
    gini_results['bad_rate'] = namecolumn
    gini_results.rename(columns={'gini': f'{score_column}_{namecolumn}_gini'}, inplace=True)
    
    return gini_results

# App Score FPD10

In [8]:
sq = """with appscore as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    s_apps_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    s_apps_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from appscore;"""

dfappscorefpd10 = client.query(sq).to_dataframe(progress_bar_type = 'tqdm')

Job ID 81384a5d-d184-4fb9-a88a-1b17e08e84a2 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [9]:
dfappscorefpd10.sample(5)

Unnamed: 0,disbursementdate,digitalLoanAccountId,s_apps_score,ln_fpd10_flag,ln_mature_fpd10_flag
12067,2024-10-27 11:11:11,39f8c6f4-30df-4740-a10a-efb6c4e6d97f,0.697507,1,1
115526,2024-05-12 15:44:41,d528bf4a-4a76-48ac-a3fd-d4cba0c88ef8,0.453645,0,1
19608,2024-12-24 13:00:59,d0086da3-d119-445a-9736-f0332745ed0b,0.654137,0,1
136578,2024-05-22 14:10:34,3e2675bd-e162-4047-a37e-3ceadc3b9319,0.418178,0,1
109052,2024-12-08 16:05:47,5053756d-f6fb-4105-9704-7e92f760e4ea,0.288394,0,1


In [10]:
gini_results = calculate_periodic_gini(dfappscorefpd10, 's_apps_score', 'ln_fpd10_flag', 'FPD10')

In [11]:
gini_results.head()

Unnamed: 0,start_date,end_date,s_apps_score_FPD10_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.54823,Week,s_apps_score,1.1.0,FPD10
1,2023-06-01,2023-06-30,0.384648,Month,s_apps_score,1.1.0,FPD10
2,2023-06-05,2023-06-11,0.5,Week,s_apps_score,1.1.0,FPD10
3,2023-06-12,2023-06-18,0.555195,Week,s_apps_score,1.1.0,FPD10
4,2023-06-19,2023-06-25,0.004831,Week,s_apps_score,1.1.0,FPD10


In [12]:
appscoreFPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{appscoreFPD10.shape}")
appscoreFPD10.columns.values

The shape of dataframe after copy is:	(106, 7)


array(['start_date', 'end_date', 's_apps_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [13]:
gini_results.head()

Unnamed: 0,start_date,end_date,s_apps_score_FPD10_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.54823,Week,s_apps_score,1.1.0,FPD10
1,2023-06-01,2023-06-30,0.384648,Month,s_apps_score,1.1.0,FPD10
2,2023-06-05,2023-06-11,0.5,Week,s_apps_score,1.1.0,FPD10
3,2023-06-12,2023-06-18,0.555195,Week,s_apps_score,1.1.0,FPD10
4,2023-06-19,2023-06-25,0.004831,Week,s_apps_score,1.1.0,FPD10


# App Score FPD30

In [14]:
sq = """
with appscore as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    s_apps_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    s_apps_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from appscore;
"""

dfappscorefpd30 = client.query(sq).to_dataframe(progress_bar_type = 'tqdm')

Job ID fa5a3381-86bd-4639-9ef9-83fb498e2036 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [15]:
dfappscorefpd30.head()

Unnamed: 0,disbursementdate,digitalLoanAccountId,s_apps_score,ln_fpd30_flag,ln_mature_fpd30_flag
0,2023-06-22 14:25:06,12d13dfc-e307-4605-af97-361327cb3247,0.632675,0,1
1,2024-07-21 10:34:01,4238aea5-2cbb-4356-879b-71b4d3486fd2,0.497491,0,1
2,2023-12-31 10:00:00,ad89c5e8-d5bd-49a9-a10d-82139364463f,0.297527,0,1
3,2024-12-21 09:20:07,898c5779-730d-4755-8fb1-b18c254b30bd,0.552864,1,1
4,2024-12-16 17:44:58,a465017e-864c-4c77-84b2-1481fb8cfe9d,0.326016,0,1


In [16]:
gini_results = calculate_periodic_gini(dfappscorefpd30, 's_apps_score', 'ln_fpd30_flag', 'FPD30')
# gini_results['bad_rate'] = 'FPD30'
appscoreFPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{appscoreFPD30.shape}")
appscoreFPD30.columns.values

The shape of dataframe after copy is:	(102, 7)


array(['start_date', 'end_date', 's_apps_score_FPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [17]:
gini_results.sample(5)

Unnamed: 0,start_date,end_date,s_apps_score_FPD30_gini,period,Model_Name,version,bad_rate
1,2023-06-01,2023-06-30,0.351719,Month,s_apps_score,1.1.0,FPD30
10,2023-07-24,2023-07-30,0.291126,Week,s_apps_score,1.1.0,FPD30
12,2023-08-01,2023-08-31,0.3434,Month,s_apps_score,1.1.0,FPD30
25,2023-10-16,2023-10-22,0.279451,Week,s_apps_score,1.1.0,FPD30
96,2024-11-25,2024-12-01,0.277044,Week,s_apps_score,1.1.0,FPD30


# App Score FSPD30

In [18]:
sq = """with appscore as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    s_apps_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    s_apps_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from appscore;
"""

dfappscorefspd30 = client.query(sq).to_dataframe(progress_bar_type = 'tqdm')

Job ID 48438ad1-1de2-47bf-a492-95a8296bfff8 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [19]:
gini_results = calculate_periodic_gini(dfappscorefspd30, 's_apps_score', 'ln_fspd30_flag', 'FSPD30')
# gini_results['bad_rate'] = 'FSPD30'
appscoreFSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{appscoreFSPD30.shape}")
appscoreFSPD30.columns.values

The shape of dataframe after copy is:	(97, 7)


array(['start_date', 'end_date', 's_apps_score_FSPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [20]:
gini_results.tail()

Unnamed: 0,start_date,end_date,s_apps_score_FSPD30_gini,period,Model_Name,version,bad_rate
92,2024-11-01,2024-11-30,0.298595,Month,s_apps_score,1.1.0,FSPD30
93,2024-11-04,2024-11-10,0.295698,Week,s_apps_score,1.1.0,FSPD30
94,2024-11-11,2024-11-17,0.30673,Week,s_apps_score,1.1.0,FSPD30
95,2024-11-18,2024-11-24,0.277736,Week,s_apps_score,1.1.0,FSPD30
96,2024-11-25,2024-12-01,0.245972,Week,s_apps_score,1.1.0,FSPD30


# App Score FSTPD30

In [21]:
sq = """with appscore as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    s_apps_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    s_apps_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from appscore;
"""

dfappscorefstpd30 = client.query(sq).to_dataframe(progress_bar_type = 'tqdm')

Job ID 50760ff6-0dd7-495c-b41a-85fa0048b03d successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [22]:
gini_results = calculate_periodic_gini(dfappscorefstpd30, 's_apps_score', 'ln_fstpd30_flag', 'FSTPD30')
# gini_results['bad_rate'] = 'FSTPD30'
appscoreFSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{appscoreFSTPD30.shape}")
appscoreFSTPD30.columns.values

The shape of dataframe after copy is:	(91, 7)


array(['start_date', 'end_date', 's_apps_score_FSTPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [23]:
appscoreFSTPD30.head()

Unnamed: 0,start_date,end_date,s_apps_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.465098,Week,s_apps_score,1.1.0,FSTPD30
1,2023-06-01,2023-06-30,0.33742,Month,s_apps_score,1.1.0,FSTPD30
2,2023-06-05,2023-06-11,0.4133,Week,s_apps_score,1.1.0,FSTPD30
3,2023-06-12,2023-06-18,0.395676,Week,s_apps_score,1.1.0,FSTPD30
4,2023-06-19,2023-06-25,0.314497,Week,s_apps_score,1.1.0,FSTPD30


# Combining App Score

In [24]:
import functools

dataframes = [appscoreFPD10, appscoreFPD30, appscoreFSPD30, appscoreFSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 's_apps_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 's_apps_score_FPD30_gini',
       's_apps_score_FSPD30_gini', 's_apps_score_FSTPD30_gini'],
      dtype=object)

In [25]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','s_apps_score_FSTPD30_gini','s_apps_score_FSPD30_gini',
       's_apps_score_FPD30_gini', 's_apps_score_FPD10_gini']].copy()
final_df.dtypes

start_date                   datetime64[ns]
end_date                     datetime64[ns]
period                               object
Model_Name                           object
version                              object
bad_rate                             object
s_apps_score_FSTPD30_gini           float64
s_apps_score_FSPD30_gini            float64
s_apps_score_FPD30_gini             float64
s_apps_score_FPD10_gini             float64
dtype: object

## Creating app score table 

In [26]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_s_apps_score;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=5f93b95b-f6df-4991-ad7a-ffb978d2fea3>

In [27]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('s_apps_score_FSTPD30_gini', 'FLOAT'),
    bigquery.SchemaField('s_apps_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('s_apps_score_FPD30_gini', 'FLOAT'),
    bigquery.SchemaField('s_apps_score_fpd10_gini', 'FLOAT')
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_s_apps_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=efc3ca7a-3e63-451c-b4f9-903743024e7e>

# sb_demo_score

## FPD10

In [28]:
sq = """
with sb_demo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    sb_demo_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    sb_demo_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from sb_demo_score;
"""

df_sb_demo_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID 312d825a-4dee-4f86-a20c-a825eea7ab40 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [29]:
df_sb_demo_scorefpd10.head()

Unnamed: 0,disbursementdate,digitalLoanAccountId,sb_demo_score,ln_fpd10_flag,ln_mature_fpd10_flag
0,2023-06-22 14:25:06,12d13dfc-e307-4605-af97-361327cb3247,0.125463,0,1
1,2024-07-21 10:34:01,4238aea5-2cbb-4356-879b-71b4d3486fd2,0.194623,0,1
2,2023-12-31 10:00:00,ad89c5e8-d5bd-49a9-a10d-82139364463f,0.135019,0,1
3,2025-01-12 11:44:47,5d1c5ea0-6808-45b4-bb45-76ea9812d42f,0.188463,0,1
4,2024-12-21 09:20:07,898c5779-730d-4755-8fb1-b18c254b30bd,0.220743,1,1


In [30]:
gini_results = calculate_periodic_gini(df_sb_demo_scorefpd10, 'sb_demo_score', 'ln_fpd10_flag', 'FPD10')
sb_demo_scoreFPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{sb_demo_scoreFPD10.shape}")
sb_demo_scoreFPD10.columns.values

The shape of dataframe after copy is:	(106, 7)


array(['start_date', 'end_date', 'sb_demo_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [31]:
sb_demo_scoreFPD10.tail()

Unnamed: 0,start_date,end_date,sb_demo_score_FPD10_gini,period,Model_Name,version,bad_rate
101,2024-12-23,2024-12-29,0.235289,Week,sb_demo_score,1.1.0,FPD10
102,2024-12-30,2025-01-05,0.257868,Week,sb_demo_score,1.1.0,FPD10
103,2025-01-01,2025-01-31,0.23787,Month,sb_demo_score,1.1.0,FPD10
104,2025-01-06,2025-01-12,0.166198,Week,sb_demo_score,1.1.0,FPD10
105,2025-01-13,2025-01-19,0.338719,Week,sb_demo_score,1.1.0,FPD10


## FPD30

In [32]:
sq = """
with sb_demo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    sb_demo_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    sb_demo_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from sb_demo_score;
"""

df_sb_demo_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID 6f38beaa-6d18-4cc3-a5ab-c34f035377a8 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [33]:
gini_results = calculate_periodic_gini(df_sb_demo_scorefpd30, 'sb_demo_score', 'ln_fpd30_flag', 'FPD30')
sb_demo_scoreFPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{sb_demo_scoreFPD30.shape}")
sb_demo_scoreFPD30.columns.values

The shape of dataframe after copy is:	(102, 7)


array(['start_date', 'end_date', 'sb_demo_score_FPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [34]:
sb_demo_scoreFPD30.head() 

Unnamed: 0,start_date,end_date,sb_demo_score_FPD30_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.330909,Week,sb_demo_score,1.1.0,FPD30
1,2023-06-01,2023-06-30,0.189531,Month,sb_demo_score,1.1.0,FPD30
2,2023-06-05,2023-06-11,0.13037,Week,sb_demo_score,1.1.0,FPD30
3,2023-06-12,2023-06-18,0.371746,Week,sb_demo_score,1.1.0,FPD30
4,2023-06-19,2023-06-25,-0.052209,Week,sb_demo_score,1.1.0,FPD30


## FSPD30

In [35]:
sq = """
with sb_demo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    sb_demo_score,
    ln_fspd30_flag,   -- fspd30
	ln_mature_fspd30_flag,	--- fspd30 observation
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    sb_demo_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from sb_demo_score;
"""

df_sb_demo_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID bee68f97-a6b5-4908-a7e8-fc1ea9931a8d successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [36]:
gini_results = calculate_periodic_gini(df_sb_demo_scorefspd30, 'sb_demo_score', 'ln_fspd30_flag', 'FSPD30')
sb_demo_scoreFSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{sb_demo_scoreFSPD30.shape}")
sb_demo_scoreFSPD30.columns.values

The shape of dataframe after copy is:	(97, 7)


array(['start_date', 'end_date', 'sb_demo_score_FSPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [37]:
sb_demo_scoreFSPD30.head()

Unnamed: 0,start_date,end_date,sb_demo_score_FSPD30_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.098383,Week,sb_demo_score,1.1.0,FSPD30
1,2023-06-01,2023-06-30,0.184759,Month,sb_demo_score,1.1.0,FSPD30
2,2023-06-05,2023-06-11,0.239596,Week,sb_demo_score,1.1.0,FSPD30
3,2023-06-12,2023-06-18,0.312568,Week,sb_demo_score,1.1.0,FSPD30
4,2023-06-19,2023-06-25,0.010796,Week,sb_demo_score,1.1.0,FSPD30


## FSTPD30

In [38]:
sq = """
with sb_demo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    sb_demo_score,
    ln_fstpd30_flag,   -- fstpd30
	ln_mature_fstpd30_flag,	--- fstpd30 observation
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    sb_demo_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from sb_demo_score;
"""

df_sb_demo_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID 4829142c-a97e-4cbb-85a0-6c4aeeefe669 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [39]:
gini_results = calculate_periodic_gini(df_sb_demo_scorefstpd30, 'sb_demo_score', 'ln_fstpd30_flag', 'FSTPD30')
sb_demo_scoreFSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{sb_demo_scoreFSTPD30.shape}")
sb_demo_scoreFSTPD30.columns.values

The shape of dataframe after copy is:	(91, 7)


array(['start_date', 'end_date', 'sb_demo_score_FSTPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [40]:
sb_demo_scoreFSTPD30.head()

Unnamed: 0,start_date,end_date,sb_demo_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.215686,Week,sb_demo_score,1.1.0,FSTPD30
1,2023-06-01,2023-06-30,0.165765,Month,sb_demo_score,1.1.0,FSTPD30
2,2023-06-05,2023-06-11,0.281272,Week,sb_demo_score,1.1.0,FSTPD30
3,2023-06-12,2023-06-18,0.142612,Week,sb_demo_score,1.1.0,FSTPD30
4,2023-06-19,2023-06-25,0.096533,Week,sb_demo_score,1.1.0,FSTPD30


# Combining sb demo score

In [41]:
import functools

dataframes = [sb_demo_scoreFPD10, sb_demo_scoreFPD30, sb_demo_scoreFSPD30, sb_demo_scoreFSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'sb_demo_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'sb_demo_score_FPD30_gini',
       'sb_demo_score_FSPD30_gini', 'sb_demo_score_FSTPD30_gini'],
      dtype=object)

In [42]:
final_df = final_df[['start_date', 'end_date', 'period',
       'Model_Name', 'version', 'bad_rate','sb_demo_score_FPD10_gini','sb_demo_score_FPD30_gini',
       'sb_demo_score_FSPD30_gini', 'sb_demo_score_FSTPD30_gini']].copy()

## creating sb demo score table 

In [43]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_sb_demo_score;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=b497bc9f-289b-4b50-8191-531de85ad66c>

In [44]:


import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('sb_demo_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('sb_demo_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('sb_demo_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('sb_demo_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_sb_demo_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=0ba9ab6d-0ef5-427f-9a39-be69af3d25f3>

# s_cic_score

## FPD10

In [45]:
sq = """
with s_cic_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    s_cic_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    s_cic_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from s_cic_score;
"""

df_s_cic_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID 133b5563-b2dc-42a9-8b8f-b2f5354594a3 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [46]:
df_s_cic_scorefpd10.head()

Unnamed: 0,disbursementdate,digitalLoanAccountId,s_cic_score,ln_fpd10_flag,ln_mature_fpd10_flag
0,2024-12-21 09:20:07,898c5779-730d-4755-8fb1-b18c254b30bd,0.106536,1,1
1,2024-11-20 12:36:49,c6e59c0c-754a-4db2-9f12-53348b1a3ccb,0.057237,0,1
2,2024-11-22 11:09:40,f3a9b689-93a8-4906-9fad-ef191d00bbd9,0.098077,0,1
3,2024-12-04 16:39:23,2f278926-7fe7-4713-b2fe-edbe9d967ec8,0.068144,0,1
4,2025-01-07 11:53:39,9c588647-0491-4a27-b1ab-75b7659ac05c,0.075922,0,1


In [47]:
gini_results = calculate_periodic_gini(df_s_cic_scorefpd10, 's_cic_score', 'ln_fpd10_flag', 'FPD10')
s_cic_scoreFPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{s_cic_scoreFPD10.shape}")
s_cic_scoreFPD10.columns.values

The shape of dataframe after copy is:	(105, 7)


array(['start_date', 'end_date', 's_cic_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [48]:
s_cic_scoreFPD10.tail()

Unnamed: 0,start_date,end_date,s_cic_score_FPD10_gini,period,Model_Name,version,bad_rate
100,2024-12-23,2024-12-29,0.156457,Week,s_cic_score,1.1.0,FPD10
101,2024-12-30,2025-01-05,0.225236,Week,s_cic_score,1.1.0,FPD10
102,2025-01-01,2025-01-31,0.180375,Month,s_cic_score,1.1.0,FPD10
103,2025-01-06,2025-01-12,0.166526,Week,s_cic_score,1.1.0,FPD10
104,2025-01-13,2025-01-19,0.030842,Week,s_cic_score,1.1.0,FPD10


## FPD30

In [49]:
sq = """
with s_cic_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    s_cic_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    s_cic_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from s_cic_score;
"""

df_s_cic_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID 62528cb3-63b0-40d4-b79b-0ea2e676204d successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [50]:
df_s_cic_scorefpd30.head()

Unnamed: 0,disbursementdate,digitalLoanAccountId,s_cic_score,ln_fpd30_flag,ln_mature_fpd30_flag
0,2024-11-26 18:25:21,e7dd1f77-2200-43a6-afd2-a3ffaa55ebf6,0.046459,0,1
1,2024-09-18 15:41:01,1000d052-5dc0-43fd-bebc-df33aac78547,0.14187,0,1
2,2024-12-14 18:19:54,104972df-9b52-4d02-810a-c4dc4e6c292e,0.070071,0,1
3,2024-05-07 13:57:01,ce1a4a6a-f9a8-4353-a102-5c2874173c8e,0.22119,0,1
4,2024-11-20 19:30:09,59c7d18c-6a2e-49c9-80e4-5f43646cd1a6,0.066702,0,1


In [51]:
gini_results = calculate_periodic_gini(df_s_cic_scorefpd30, 's_cic_score', 'ln_fpd30_flag', 'FPD30')
s_cic_scoreFPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{s_cic_scoreFPD30.shape}")
s_cic_scoreFPD30.columns.values

The shape of dataframe after copy is:	(101, 7)


array(['start_date', 'end_date', 's_cic_score_FPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [52]:
s_cic_scoreFPD30.head()

Unnamed: 0,start_date,end_date,s_cic_score_FPD30_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.523077,Week,s_cic_score,1.1.0,FPD30
1,2023-06-01,2023-06-30,0.248378,Month,s_cic_score,1.1.0,FPD30
2,2023-06-05,2023-06-11,0.437229,Week,s_cic_score,1.1.0,FPD30
3,2023-06-12,2023-06-18,0.514851,Week,s_cic_score,1.1.0,FPD30
4,2023-06-19,2023-06-25,0.035484,Week,s_cic_score,1.1.0,FPD30


In [53]:
s_cic_scoreFPD30.describe()

Unnamed: 0,start_date,end_date,s_cic_score_FPD30_gini
count,101,101,101.0
mean,2024-03-11 12:21:23.168316928,2024-03-21 22:34:27.326732800,0.327125
min,2023-05-29 00:00:00,2023-06-04 00:00:00,0.035484
25%,2023-10-23 00:00:00,2023-10-31 00:00:00,0.268895
50%,2024-03-11 00:00:00,2024-03-24 00:00:00,0.315266
75%,2024-08-01 00:00:00,2024-08-11 00:00:00,0.380208
max,2024-12-23 00:00:00,2024-12-31 00:00:00,0.940476
std,,,0.114556


## FSPD30

In [54]:
sq = """
with s_cic_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    s_cic_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    s_cic_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from s_cic_score;
"""

df_s_cic_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID 65c28be1-5d42-4f11-bb4e-7a69a0876733 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [55]:
gini_results = calculate_periodic_gini(df_s_cic_scorefspd30, 's_cic_score', 'ln_fspd30_flag', 'FSPD30')
s_cic_scoreFSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{s_cic_scoreFSPD30.shape}")
s_cic_scoreFSPD30.columns.values

The shape of dataframe after copy is:	(96, 7)


array(['start_date', 'end_date', 's_cic_score_FSPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [56]:
s_cic_scoreFSPD30.describe()

Unnamed: 0,start_date,end_date,s_cic_score_FSPD30_gini
count,96,96,96.0
mean,2024-02-26 06:15:00,2024-03-07 16:00:00,0.320292
min,2023-05-29 00:00:00,2023-06-04 00:00:00,0.066869
25%,2023-10-14 06:00:00,2023-10-27 06:00:00,0.270472
50%,2024-02-28 00:00:00,2024-03-06 12:00:00,0.319791
75%,2024-07-09 18:00:00,2024-07-22 18:00:00,0.359617
max,2024-11-25 00:00:00,2024-12-01 00:00:00,0.680714
std,,,0.087323


In [57]:
s_cic_scoreFSPD30.tail()

Unnamed: 0,start_date,end_date,s_cic_score_FSPD30_gini,period,Model_Name,version,bad_rate
91,2024-11-01,2024-11-30,0.263308,Month,s_cic_score,1.1.0,FSPD30
92,2024-11-04,2024-11-10,0.342195,Week,s_cic_score,1.1.0,FSPD30
93,2024-11-11,2024-11-17,0.202355,Week,s_cic_score,1.1.0,FSPD30
94,2024-11-18,2024-11-24,0.273222,Week,s_cic_score,1.1.0,FSPD30
95,2024-11-25,2024-12-01,0.105903,Week,s_cic_score,1.1.0,FSPD30


## FSTPD30

In [58]:
sq = """
with s_cic_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    s_cic_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    s_cic_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from s_cic_score;
"""

df_s_cic_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID 488491e4-a1e6-45d6-997a-a4e1e8e2065a successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [59]:
gini_results = calculate_periodic_gini(df_s_cic_scorefstpd30, 's_cic_score', 'ln_fstpd30_flag', 'FSTPD30')
s_cic_scoreFSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{s_cic_scoreFSTPD30.shape}")
s_cic_scoreFSTPD30.columns.values

The shape of dataframe after copy is:	(90, 7)


array(['start_date', 'end_date', 's_cic_score_FSTPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [60]:
s_cic_scoreFSTPD30.describe()

Unnamed: 0,start_date,end_date,s_cic_score_FSTPD30_gini
count,90,90,90.0
mean,2024-02-09 02:56:00,2024-02-19 13:36:00,0.306488
min,2023-05-29 00:00:00,2023-06-04 00:00:00,0.154422
25%,2023-10-03 18:00:00,2023-10-16 18:00:00,0.257354
50%,2024-02-08 12:00:00,2024-02-21 12:00:00,0.300352
75%,2024-06-15 06:00:00,2024-06-28 06:00:00,0.325598
max,2024-10-21 00:00:00,2024-10-31 00:00:00,0.571813
std,,,0.073241


In [61]:
s_cic_scoreFSTPD30.tail()

Unnamed: 0,start_date,end_date,s_cic_score_FSTPD30_gini,period,Model_Name,version,bad_rate
85,2024-09-30,2024-10-06,0.313062,Week,s_cic_score,1.1.0,FSTPD30
86,2024-10-01,2024-10-31,0.302723,Month,s_cic_score,1.1.0,FSTPD30
87,2024-10-07,2024-10-13,0.321294,Week,s_cic_score,1.1.0,FSTPD30
88,2024-10-14,2024-10-20,0.271744,Week,s_cic_score,1.1.0,FSTPD30
89,2024-10-21,2024-10-27,0.299608,Week,s_cic_score,1.1.0,FSTPD30


# Combining s_cic_score

In [62]:
import functools

dataframes = [s_cic_scoreFPD10, s_cic_scoreFPD30, s_cic_scoreFSPD30, s_cic_scoreFSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 's_cic_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 's_cic_score_FPD30_gini',
       's_cic_score_FSPD30_gini', 's_cic_score_FSTPD30_gini'],
      dtype=object)

In [63]:
final_df = final_df[['start_date', 'end_date', 'period',
       'Model_Name', 'version', 'bad_rate','s_cic_score_FPD10_gini','s_cic_score_FPD30_gini', 's_cic_score_FSPD30_gini', 's_cic_score_FSTPD30_gini']].copy()

## Creating the table

In [64]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_s_cic_score;"""

client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=7b896d41-0381-4398-8f3a-9c0c7b9eff82>

In [65]:


import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('s_cic_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('s_cic_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('s_cic_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('s_cic_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_s_cic_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=dd731b13-78a5-48c5-a70d-a98b87b23eed>

# sb_stack_score

## FPD10

In [66]:
# sb_stack_score

sq = """
with sb_stack_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    sb_stack_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    sb_stack_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from sb_stack_score;
"""

df_sb_stack_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

df_sb_stack_scorefpd10.head()

Job ID a83ce1db-504d-43c7-bd35-1dac3d62a849 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,sb_stack_score,ln_fpd10_flag,ln_mature_fpd10_flag
0,2024-04-29 17:13:42,ec0662e6-2722-41c4-b483-70f9ff80ca2c,0.147727,0,1
1,2024-11-30 11:10:23,5a00b591-2f4c-4aa5-97e5-a9a36f9f5b72,0.28019,0,1
2,2023-11-08 12:53:36,183b56bf-1764-4bf7-a348-a70c4f504803,0.032809,0,1
3,2024-11-26 18:25:21,e7dd1f77-2200-43a6-afd2-a3ffaa55ebf6,0.074894,0,1
4,2024-09-18 15:41:01,1000d052-5dc0-43fd-bebc-df33aac78547,0.242276,0,1


In [67]:
gini_results = calculate_periodic_gini(df_sb_stack_scorefpd10, 'sb_stack_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

The shape of dataframe after copy is:	(106, 7)


array(['start_date', 'end_date', 'sb_stack_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [68]:
M1FPD10.describe()

Unnamed: 0,start_date,end_date,sb_stack_score_FPD10_gini
count,106,106,106.0
mean,2024-03-20 14:56:36.226415104,2024-03-31 01:35:05.660377344,0.349336
min,2023-05-29 00:00:00,2023-06-04 00:00:00,-0.067879
25%,2023-10-24 18:00:00,2023-11-01 06:00:00,0.301373
50%,2024-03-21 12:00:00,2024-03-31 00:00:00,0.333806
75%,2024-08-17 06:00:00,2024-08-29 12:00:00,0.411849
max,2025-01-13 00:00:00,2025-01-31 00:00:00,0.659591
std,,,0.097183


In [69]:
M1FPD10.head()

Unnamed: 0,start_date,end_date,sb_stack_score_FPD10_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.482902,Week,sb_stack_score,1.1.0,FPD10
1,2023-06-01,2023-06-30,0.318382,Month,sb_stack_score,1.1.0,FPD10
2,2023-06-05,2023-06-11,0.417052,Week,sb_stack_score,1.1.0,FPD10
3,2023-06-12,2023-06-18,0.659591,Week,sb_stack_score,1.1.0,FPD10
4,2023-06-19,2023-06-25,-0.067879,Week,sb_stack_score,1.1.0,FPD10


## FPD30

In [70]:
# sb_stack_score

sq = """
with sb_stack_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    sb_stack_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    sb_stack_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from sb_stack_score;
"""

df_sb_stack_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

df_sb_stack_scorefpd30.head()

Job ID faf5ed6d-d6e9-42c7-a15a-56a5d1f55b83 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,sb_stack_score,ln_fpd30_flag,ln_mature_fpd30_flag
0,2024-04-29 17:13:42,ec0662e6-2722-41c4-b483-70f9ff80ca2c,0.147727,0,1
1,2024-11-30 11:10:23,5a00b591-2f4c-4aa5-97e5-a9a36f9f5b72,0.28019,0,1
2,2023-11-08 12:53:36,183b56bf-1764-4bf7-a348-a70c4f504803,0.032809,0,1
3,2024-11-26 18:25:21,e7dd1f77-2200-43a6-afd2-a3ffaa55ebf6,0.074894,0,1
4,2024-09-18 15:41:01,1000d052-5dc0-43fd-bebc-df33aac78547,0.242276,0,1


In [71]:
gini_results = calculate_periodic_gini(df_sb_stack_scorefpd30, 'sb_stack_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

The shape of dataframe after copy is:	(102, 7)


array(['start_date', 'end_date', 'sb_stack_score_FPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [72]:
M2FPD30.describe()

Unnamed: 0,start_date,end_date,sb_stack_score_FPD30_gini
count,102,102,102.0
mean,2024-03-09 05:52:56.470588160,2024-03-19 15:03:31.764705792,0.370856
min,2023-05-29 00:00:00,2023-06-04 00:00:00,-0.013387
25%,2023-10-17 18:00:00,2023-10-29 12:00:00,0.32419
50%,2024-03-07 12:00:00,2024-03-20 12:00:00,0.363071
75%,2024-07-31 06:00:00,2024-08-09 06:00:00,0.428887
max,2024-12-23 00:00:00,2024-12-31 00:00:00,0.636364
std,,,0.096995


In [73]:
M2FPD30.tail()

Unnamed: 0,start_date,end_date,sb_stack_score_FPD30_gini,period,Model_Name,version,bad_rate
97,2024-12-01,2024-12-31,0.328661,Month,sb_stack_score,1.1.0,FPD30
98,2024-12-02,2024-12-08,0.370326,Week,sb_stack_score,1.1.0,FPD30
99,2024-12-09,2024-12-15,0.301145,Week,sb_stack_score,1.1.0,FPD30
100,2024-12-16,2024-12-22,0.325655,Week,sb_stack_score,1.1.0,FPD30
101,2024-12-23,2024-12-29,0.293112,Week,sb_stack_score,1.1.0,FPD30


## FSPD30

In [74]:
# sb_stack_score

sq = """
with sb_stack_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    sb_stack_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    sb_stack_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from sb_stack_score;
"""

df_sb_stack_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

df_sb_stack_scorefspd30.head()

Job ID 2663888b-d107-4a04-b0b5-8798c50102f7 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,sb_stack_score,ln_fspd30_flag,ln_mature_fspd30_flag
0,2023-06-22 14:25:06,12d13dfc-e307-4605-af97-361327cb3247,0.156423,0,1
1,2024-07-21 10:34:01,4238aea5-2cbb-4356-879b-71b4d3486fd2,0.090595,0,1
2,2023-12-31 10:00:00,ad89c5e8-d5bd-49a9-a10d-82139364463f,0.016439,0,1
3,2024-11-20 12:36:49,c6e59c0c-754a-4db2-9f12-53348b1a3ccb,0.046784,0,1
4,2024-11-22 11:09:40,f3a9b689-93a8-4906-9fad-ef191d00bbd9,0.109773,0,1


In [75]:
gini_results = calculate_periodic_gini(df_sb_stack_scorefspd30, 'sb_stack_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
M3FSPD30.columns.values

The shape of dataframe after copy is:	(97, 7)


array(['start_date', 'end_date', 'sb_stack_score_FSPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [76]:
M3FSPD30.describe()

Unnamed: 0,start_date,end_date,sb_stack_score_FSPD30_gini
count,97,97,97.0
mean,2024-02-24 00:29:41.443299072,2024-03-05 09:09:16.701030912,0.375034
min,2023-05-29 00:00:00,2023-06-04 00:00:00,0.144399
25%,2023-10-09 00:00:00,2023-10-22 00:00:00,0.326019
50%,2024-02-26 00:00:00,2024-03-03 00:00:00,0.355523
75%,2024-07-08 00:00:00,2024-07-21 00:00:00,0.425277
max,2024-11-25 00:00:00,2024-12-01 00:00:00,0.552671
std,,,0.077535


In [77]:
M3FSPD30.tail()

Unnamed: 0,start_date,end_date,sb_stack_score_FSPD30_gini,period,Model_Name,version,bad_rate
92,2024-11-01,2024-11-30,0.332131,Month,sb_stack_score,1.1.0,FSPD30
93,2024-11-04,2024-11-10,0.317366,Week,sb_stack_score,1.1.0,FSPD30
94,2024-11-11,2024-11-17,0.343547,Week,sb_stack_score,1.1.0,FSPD30
95,2024-11-18,2024-11-24,0.304006,Week,sb_stack_score,1.1.0,FSPD30
96,2024-11-25,2024-12-01,0.336791,Week,sb_stack_score,1.1.0,FSPD30


## FSTPD30

In [78]:
# sb_stack_score

sq = """
with sb_stack_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    sb_stack_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    sb_stack_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from sb_stack_score;
"""

df_sb_stack_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

df_sb_stack_scorefstpd30.head()

Job ID 1c5dda88-41af-49e6-84e5-a4b8691117c0 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,sb_stack_score,ln_fstpd30_flag,ln_mature_fstpd30_flag
0,2024-04-29 17:13:42,ec0662e6-2722-41c4-b483-70f9ff80ca2c,0.147727,0,1
1,2023-11-08 12:53:36,183b56bf-1764-4bf7-a348-a70c4f504803,0.032809,1,1
2,2024-09-18 15:41:01,1000d052-5dc0-43fd-bebc-df33aac78547,0.242276,1,1
3,2024-05-07 13:57:01,ce1a4a6a-f9a8-4353-a102-5c2874173c8e,0.12231,0,1
4,2024-07-21 11:18:18,afdded1c-4c11-4661-bca2-35fc16462709,0.12092,1,1


In [79]:
gini_results = calculate_periodic_gini(df_sb_stack_scorefstpd30, 'sb_stack_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
M4FSTPD30.columns.values

The shape of dataframe after copy is:	(91, 7)


array(['start_date', 'end_date', 'sb_stack_score_FSTPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [80]:
M4FSTPD30.describe()

Unnamed: 0,start_date,end_date,sb_stack_score_FSTPD30_gini
count,91,91,91.0
mean,2024-02-06 22:09:13.846153728,2024-02-17 07:38:54.065934080,0.34958
min,2023-05-29 00:00:00,2023-06-04 00:00:00,0.162606
25%,2023-10-01 12:00:00,2023-10-11 12:00:00,0.309198
50%,2024-02-05 00:00:00,2024-02-18 00:00:00,0.339706
75%,2024-06-13 12:00:00,2024-06-26 12:00:00,0.389561
max,2024-10-21 00:00:00,2024-10-31 00:00:00,0.506416
std,,,0.061146


In [81]:
M4FSTPD30.tail()

Unnamed: 0,start_date,end_date,sb_stack_score_FSTPD30_gini,period,Model_Name,version,bad_rate
86,2024-09-30,2024-10-06,0.319762,Week,sb_stack_score,1.1.0,FSTPD30
87,2024-10-01,2024-10-31,0.309712,Month,sb_stack_score,1.1.0,FSTPD30
88,2024-10-07,2024-10-13,0.327533,Week,sb_stack_score,1.1.0,FSTPD30
89,2024-10-14,2024-10-20,0.297031,Week,sb_stack_score,1.1.0,FSTPD30
90,2024-10-21,2024-10-27,0.320464,Week,sb_stack_score,1.1.0,FSTPD30


## Combining the dataframes

In [82]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'sb_stack_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'sb_stack_score_FPD30_gini',
       'sb_stack_score_FSPD30_gini', 'sb_stack_score_FSTPD30_gini'],
      dtype=object)

In [83]:
final_df = final_df[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'sb_stack_score_FPD10_gini', 'sb_stack_score_FPD30_gini',  'sb_stack_score_FSPD30_gini', 'sb_stack_score_FSTPD30_gini']].copy()

## Creating the table 

In [84]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_sb_stack_score;"""

client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=c65884e8-9754-44d2-bfd2-02ba4487423a>

In [85]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('sb_stack_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('sb_stack_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('sb_stack_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('sb_stack_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_sb_stack_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=721b394e-6585-4a09-bd76-e71a624624c3>

# sa_stack_score

## FPD10

In [86]:
# sa_stack_score

sq = """
with sa_stack_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    sa_stack_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    sa_stack_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from sa_stack_score;
"""

sa_stack_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

sa_stack_scorefpd10.head()

Job ID 4ddb8269-c1e5-4967-afaf-6d9266340cd8 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,sa_stack_score,ln_fpd10_flag,ln_mature_fpd10_flag
0,2024-04-29 17:13:42,ec0662e6-2722-41c4-b483-70f9ff80ca2c,0.153215,0,1
1,2024-11-30 11:10:23,5a00b591-2f4c-4aa5-97e5-a9a36f9f5b72,0.301541,0,1
2,2023-11-08 12:53:36,183b56bf-1764-4bf7-a348-a70c4f504803,0.04587,0,1
3,2024-11-26 18:25:21,e7dd1f77-2200-43a6-afd2-a3ffaa55ebf6,0.055139,0,1
4,2024-09-18 15:41:01,1000d052-5dc0-43fd-bebc-df33aac78547,0.282324,0,1


In [87]:
sa_stack_scorefpd10.head()

Unnamed: 0,disbursementdate,digitalLoanAccountId,sa_stack_score,ln_fpd10_flag,ln_mature_fpd10_flag
0,2024-04-29 17:13:42,ec0662e6-2722-41c4-b483-70f9ff80ca2c,0.153215,0,1
1,2024-11-30 11:10:23,5a00b591-2f4c-4aa5-97e5-a9a36f9f5b72,0.301541,0,1
2,2023-11-08 12:53:36,183b56bf-1764-4bf7-a348-a70c4f504803,0.04587,0,1
3,2024-11-26 18:25:21,e7dd1f77-2200-43a6-afd2-a3ffaa55ebf6,0.055139,0,1
4,2024-09-18 15:41:01,1000d052-5dc0-43fd-bebc-df33aac78547,0.282324,0,1


In [88]:
gini_results = calculate_periodic_gini(sa_stack_scorefpd10, 'sa_stack_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

The shape of dataframe after copy is:	(106, 7)


array(['start_date', 'end_date', 'sa_stack_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [89]:
M1FPD10.describe()

Unnamed: 0,start_date,end_date,sa_stack_score_FPD10_gini
count,106,106,106.0
mean,2024-03-20 14:56:36.226415104,2024-03-31 01:35:05.660377344,0.371102
min,2023-05-29 00:00:00,2023-06-04 00:00:00,-0.060606
25%,2023-10-24 18:00:00,2023-11-01 06:00:00,0.328973
50%,2024-03-21 12:00:00,2024-03-31 00:00:00,0.357341
75%,2024-08-17 06:00:00,2024-08-29 12:00:00,0.436444
max,2025-01-13 00:00:00,2025-01-31 00:00:00,0.663522
std,,,0.098484


In [90]:
M1FPD10.head()

Unnamed: 0,start_date,end_date,sa_stack_score_FPD10_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.546289,Week,sa_stack_score,1.1.0,FPD10
1,2023-06-01,2023-06-30,0.347113,Month,sa_stack_score,1.1.0,FPD10
2,2023-06-05,2023-06-11,0.469266,Week,sa_stack_score,1.1.0,FPD10
3,2023-06-12,2023-06-18,0.663522,Week,sa_stack_score,1.1.0,FPD10
4,2023-06-19,2023-06-25,-0.060606,Week,sa_stack_score,1.1.0,FPD10


## FPD30

In [91]:
# sa_stack_score

sq = """
with sa_stack_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    sa_stack_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    sa_stack_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from sa_stack_score;
"""

sa_stack_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

sa_stack_scorefpd30.head()

Job ID 7966f45f-7dcb-4b19-b9a7-daa56b3399fd successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,sa_stack_score,ln_fpd30_flag,ln_mature_fpd30_flag
0,2024-04-29 17:13:42,ec0662e6-2722-41c4-b483-70f9ff80ca2c,0.153215,0,1
1,2024-11-30 11:10:23,5a00b591-2f4c-4aa5-97e5-a9a36f9f5b72,0.301541,0,1
2,2023-11-08 12:53:36,183b56bf-1764-4bf7-a348-a70c4f504803,0.04587,0,1
3,2024-11-26 18:25:21,e7dd1f77-2200-43a6-afd2-a3ffaa55ebf6,0.055139,0,1
4,2024-09-18 15:41:01,1000d052-5dc0-43fd-bebc-df33aac78547,0.282324,0,1


In [92]:
sa_stack_scorefpd30.head()

Unnamed: 0,disbursementdate,digitalLoanAccountId,sa_stack_score,ln_fpd30_flag,ln_mature_fpd30_flag
0,2024-04-29 17:13:42,ec0662e6-2722-41c4-b483-70f9ff80ca2c,0.153215,0,1
1,2024-11-30 11:10:23,5a00b591-2f4c-4aa5-97e5-a9a36f9f5b72,0.301541,0,1
2,2023-11-08 12:53:36,183b56bf-1764-4bf7-a348-a70c4f504803,0.04587,0,1
3,2024-11-26 18:25:21,e7dd1f77-2200-43a6-afd2-a3ffaa55ebf6,0.055139,0,1
4,2024-09-18 15:41:01,1000d052-5dc0-43fd-bebc-df33aac78547,0.282324,0,1


In [93]:
gini_results = calculate_periodic_gini(sa_stack_scorefpd30, 'sa_stack_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

The shape of dataframe after copy is:	(102, 7)


array(['start_date', 'end_date', 'sa_stack_score_FPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [94]:
M2FPD30.describe()

Unnamed: 0,start_date,end_date,sa_stack_score_FPD30_gini
count,102,102,102.0
mean,2024-03-09 05:52:56.470588160,2024-03-19 15:03:31.764705792,0.394905
min,2023-05-29 00:00:00,2023-06-04 00:00:00,-0.03079
25%,2023-10-17 18:00:00,2023-10-29 12:00:00,0.355277
50%,2024-03-07 12:00:00,2024-03-20 12:00:00,0.389325
75%,2024-07-31 06:00:00,2024-08-09 06:00:00,0.451029
max,2024-12-23 00:00:00,2024-12-31 00:00:00,0.633803
std,,,0.097723


In [95]:
M2FPD30.head()

Unnamed: 0,start_date,end_date,sa_stack_score_FPD30_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.507273,Week,sa_stack_score,1.1.0,FPD30
1,2023-06-01,2023-06-30,0.334271,Month,sa_stack_score,1.1.0,FPD30
2,2023-06-05,2023-06-11,0.420741,Week,sa_stack_score,1.1.0,FPD30
3,2023-06-12,2023-06-18,0.633803,Week,sa_stack_score,1.1.0,FPD30
4,2023-06-19,2023-06-25,-0.03079,Week,sa_stack_score,1.1.0,FPD30


## FSTPD30

In [96]:
# sa_stack_score

sq = """
with sa_stack_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    sa_stack_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    sa_stack_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from sa_stack_score;
"""

sa_stack_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

sa_stack_scorefspd30.head()

Job ID e033f074-8281-401c-80b3-55123d1aa358 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,sa_stack_score,ln_fspd30_flag,ln_mature_fspd30_flag
0,2023-06-22 14:25:06,12d13dfc-e307-4605-af97-361327cb3247,0.169229,0,1
1,2024-07-21 10:34:01,4238aea5-2cbb-4356-879b-71b4d3486fd2,0.0956,0,1
2,2023-12-31 10:00:00,ad89c5e8-d5bd-49a9-a10d-82139364463f,0.022477,0,1
3,2024-11-20 12:36:49,c6e59c0c-754a-4db2-9f12-53348b1a3ccb,0.037582,0,1
4,2024-11-22 11:09:40,f3a9b689-93a8-4906-9fad-ef191d00bbd9,0.097,0,1


In [97]:
gini_results = calculate_periodic_gini(sa_stack_scorefspd30, 'sa_stack_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
M3FSPD30.columns.values

The shape of dataframe after copy is:	(97, 7)


array(['start_date', 'end_date', 'sa_stack_score_FSPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [98]:
M3FSPD30.tail()

Unnamed: 0,start_date,end_date,sa_stack_score_FSPD30_gini,period,Model_Name,version,bad_rate
92,2024-11-01,2024-11-30,0.355617,Month,sa_stack_score,1.1.0,FSPD30
93,2024-11-04,2024-11-10,0.355446,Week,sa_stack_score,1.1.0,FSPD30
94,2024-11-11,2024-11-17,0.355728,Week,sa_stack_score,1.1.0,FSPD30
95,2024-11-18,2024-11-24,0.334413,Week,sa_stack_score,1.1.0,FSPD30
96,2024-11-25,2024-12-01,0.320317,Week,sa_stack_score,1.1.0,FSPD30


## FSTPD30

In [99]:
# sa_stack_score

sq = """
with sa_stack_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    sa_stack_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    sa_stack_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from sa_stack_score;
"""

sa_stack_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

sa_stack_scorefstpd30.head()

Job ID 543368b6-a422-4658-aeeb-66b21bfe2871 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,sa_stack_score,ln_fstpd30_flag,ln_mature_fstpd30_flag
0,2024-04-29 17:13:42,ec0662e6-2722-41c4-b483-70f9ff80ca2c,0.153215,0,1
1,2023-11-08 12:53:36,183b56bf-1764-4bf7-a348-a70c4f504803,0.04587,1,1
2,2024-09-18 15:41:01,1000d052-5dc0-43fd-bebc-df33aac78547,0.282324,1,1
3,2024-05-07 13:57:01,ce1a4a6a-f9a8-4353-a102-5c2874173c8e,0.18535,0,1
4,2024-07-21 11:18:18,afdded1c-4c11-4661-bca2-35fc16462709,0.136095,1,1


In [100]:
sa_stack_scorefstpd30.tail()

Unnamed: 0,disbursementdate,digitalLoanAccountId,sa_stack_score,ln_fstpd30_flag,ln_mature_fstpd30_flag
124268,2024-02-11 13:09:16,ab6acfbe-e6c9-4be4-b1cb-95d6fb09b860,0.095912,1,1
124269,2024-10-06 14:06:34,9f9613b5-bb33-463b-aca1-09f1a376c72d,0.240745,1,1
124270,2024-09-24 11:31:46,52c4651e-76f5-45d1-867e-bb77dbc75ef4,0.236803,1,1
124271,2024-04-28 19:48:05,afb5e47d-1ceb-4e75-b540-0da4838c385b,0.076165,1,1
124272,2024-10-16 18:27:30,61e5730f-5a8e-4e8b-8b03-2ebdaa9266fb,0.026642,1,1


In [101]:
gini_results = calculate_periodic_gini(sa_stack_scorefstpd30, 'sa_stack_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
M4FSTPD30.columns.values

The shape of dataframe after copy is:	(91, 7)


array(['start_date', 'end_date', 'sa_stack_score_FSTPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

## Combining the dataframes

In [102]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'sa_stack_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'sa_stack_score_FPD30_gini',
       'sa_stack_score_FSPD30_gini', 'sa_stack_score_FSTPD30_gini'],
      dtype=object)

In [103]:
final_df = final_df[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'sa_stack_score_FPD10_gini', 'sa_stack_score_FPD30_gini',  'sa_stack_score_FSPD30_gini', 'sa_stack_score_FSTPD30_gini']].copy()

## Creating the table 

In [104]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_sa_stack_score;"""

client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=a64b244f-92af-4e94-89a7-38615a6dfc7c>

In [105]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('sa_stack_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('sa_stack_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('sa_stack_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('sa_stack_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_sa_stack_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=979beb16-2473-4e0e-9eba-4dc577ccbfaf>

# gen_credo_score

## FPD10

In [106]:


sq = """
with gen_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    gen_credo_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    gen_credo_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from gen_credo_score;
"""

gen_credo_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

gen_credo_scorefpd10.head()

Job ID 33c0843d-508e-4f19-ba66-f4e0d33233d9 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,gen_credo_score,ln_fpd10_flag,ln_mature_fpd10_flag
0,2024-04-29 17:13:42,ec0662e6-2722-41c4-b483-70f9ff80ca2c,0.076897,0,1
1,2024-11-30 11:10:23,5a00b591-2f4c-4aa5-97e5-a9a36f9f5b72,0.292605,0,1
2,2023-11-08 12:53:36,183b56bf-1764-4bf7-a348-a70c4f504803,0.150332,0,1
3,2024-11-26 18:25:21,e7dd1f77-2200-43a6-afd2-a3ffaa55ebf6,0.184645,0,1
4,2024-09-18 15:41:01,1000d052-5dc0-43fd-bebc-df33aac78547,0.232588,0,1


In [107]:
gini_results = calculate_periodic_gini(gen_credo_scorefpd10, 'gen_credo_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

The shape of dataframe after copy is:	(132, 7)


array(['start_date', 'end_date', 'gen_credo_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [108]:
M1FPD10.head()

Unnamed: 0,start_date,end_date,gen_credo_score_FPD10_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.246725,Month,gen_credo_score,1.1.0,FPD10
1,2023-01-02,2023-01-08,0.301125,Week,gen_credo_score,1.1.0,FPD10
2,2023-01-09,2023-01-15,0.2734,Week,gen_credo_score,1.1.0,FPD10
3,2023-01-16,2023-01-22,0.667532,Week,gen_credo_score,1.1.0,FPD10
4,2023-01-23,2023-01-29,0.086505,Week,gen_credo_score,1.1.0,FPD10


## FPD30

In [109]:
sq = """
with gen_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    gen_credo_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    gen_credo_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from gen_credo_score;
"""

gen_credo_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

gen_credo_scorefpd30.head()

Job ID 0cb97077-967c-4d8e-a645-fa03070e1bad successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,gen_credo_score,ln_fpd30_flag,ln_mature_fpd30_flag
0,2024-04-29 17:13:42,ec0662e6-2722-41c4-b483-70f9ff80ca2c,0.076897,0,1
1,2024-11-30 11:10:23,5a00b591-2f4c-4aa5-97e5-a9a36f9f5b72,0.292605,0,1
2,2023-11-08 12:53:36,183b56bf-1764-4bf7-a348-a70c4f504803,0.150332,0,1
3,2024-11-26 18:25:21,e7dd1f77-2200-43a6-afd2-a3ffaa55ebf6,0.184645,0,1
4,2024-09-18 15:41:01,1000d052-5dc0-43fd-bebc-df33aac78547,0.232588,0,1


In [110]:
gen_credo_scorefpd30.head()

Unnamed: 0,disbursementdate,digitalLoanAccountId,gen_credo_score,ln_fpd30_flag,ln_mature_fpd30_flag
0,2024-04-29 17:13:42,ec0662e6-2722-41c4-b483-70f9ff80ca2c,0.076897,0,1
1,2024-11-30 11:10:23,5a00b591-2f4c-4aa5-97e5-a9a36f9f5b72,0.292605,0,1
2,2023-11-08 12:53:36,183b56bf-1764-4bf7-a348-a70c4f504803,0.150332,0,1
3,2024-11-26 18:25:21,e7dd1f77-2200-43a6-afd2-a3ffaa55ebf6,0.184645,0,1
4,2024-09-18 15:41:01,1000d052-5dc0-43fd-bebc-df33aac78547,0.232588,0,1


In [111]:
gini_results = calculate_periodic_gini(gen_credo_scorefpd30, 'gen_credo_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

The shape of dataframe after copy is:	(128, 7)


array(['start_date', 'end_date', 'gen_credo_score_FPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [112]:
M2FPD30.head()

Unnamed: 0,start_date,end_date,gen_credo_score_FPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.253005,Month,gen_credo_score,1.1.0,FPD30
1,2023-01-02,2023-01-08,0.342149,Week,gen_credo_score,1.1.0,FPD30
2,2023-01-09,2023-01-15,0.407163,Week,gen_credo_score,1.1.0,FPD30
3,2023-01-16,2023-01-22,0.667532,Week,gen_credo_score,1.1.0,FPD30
4,2023-01-23,2023-01-29,0.025547,Week,gen_credo_score,1.1.0,FPD30


## FSPD30

In [113]:
sq = """
with gen_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    gen_credo_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    gen_credo_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from gen_credo_score;
"""

gen_credo_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(gen_credo_scorefspd30.head(2))

gini_results = calculate_periodic_gini(gen_credo_scorefspd30, 'gen_credo_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

Job ID c1504b1f-1559-454b-8272-e2fcaa97f797 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  gen_credo_score  \
0 2023-06-22 14:25:06  12d13dfc-e307-4605-af97-361327cb3247         0.139659   
1 2024-07-21 10:34:01  4238aea5-2cbb-4356-879b-71b4d3486fd2         0.054078   

   ln_fspd30_flag  ln_mature_fspd30_flag  
0               0                      1  
1               0                      1  
The shape of dataframe after copy is:	(123, 7)
['start_date' 'end_date' 'gen_credo_score_FSPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


Unnamed: 0,start_date,end_date,gen_credo_score_FSPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.172055,Month,gen_credo_score,1.1.0,FSPD30
1,2023-01-02,2023-01-08,0.262803,Week,gen_credo_score,1.1.0,FSPD30
2,2023-01-09,2023-01-15,0.189744,Week,gen_credo_score,1.1.0,FSPD30
3,2023-01-16,2023-01-22,0.410738,Week,gen_credo_score,1.1.0,FSPD30
4,2023-01-23,2023-01-29,0.073579,Week,gen_credo_score,1.1.0,FSPD30


## FSTPD30

In [114]:
sq = """
with gen_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    gen_credo_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    gen_credo_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from gen_credo_score;
"""

gen_credo_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(gen_credo_scorefstpd30.head(2))

gini_results = calculate_periodic_gini(gen_credo_scorefstpd30, 'gen_credo_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()

Job ID 6c1b0ad7-4b8b-4a2b-95c9-8cabdd0f5c72 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  gen_credo_score  \
0 2024-04-29 17:13:42  ec0662e6-2722-41c4-b483-70f9ff80ca2c         0.076897   
1 2023-11-08 12:53:36  183b56bf-1764-4bf7-a348-a70c4f504803         0.150332   

   ln_fstpd30_flag  ln_mature_fstpd30_flag  
0                0                       1  
1                1                       1  
The shape of dataframe after copy is:	(117, 7)
['start_date' 'end_date' 'gen_credo_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


Unnamed: 0,start_date,end_date,gen_credo_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.180837,Month,gen_credo_score,1.1.0,FSTPD30
1,2023-01-02,2023-01-08,0.282959,Week,gen_credo_score,1.1.0,FSTPD30
2,2023-01-09,2023-01-15,0.189908,Week,gen_credo_score,1.1.0,FSTPD30
3,2023-01-16,2023-01-22,0.19884,Week,gen_credo_score,1.1.0,FSTPD30
4,2023-01-23,2023-01-29,0.162845,Week,gen_credo_score,1.1.0,FSTPD30


## Combining tables 

In [115]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'gen_credo_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'gen_credo_score_FPD30_gini',
       'gen_credo_score_FSPD30_gini', 'gen_credo_score_FSTPD30_gini'],
      dtype=object)

In [116]:
final_df = final_df[['start_date', 'end_date','period',
       'Model_Name', 'version', 'bad_rate', 'gen_credo_score_FPD10_gini','gen_credo_score_FPD30_gini',   'gen_credo_score_FSPD30_gini', 'gen_credo_score_FSTPD30_gini']].copy()

In [117]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_gen_credo_score;"""

client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=79e4f877-0c82-47e6-8f51-5de3040d61c1>

In [118]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('gen_credo_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('gen_credo_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('gen_credo_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('gen_credo_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_gen_credo_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=a7649c18-4255-4ffa-b21f-7bcb9df39532>

# c_credo_score

In [119]:
sq = """
with c_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    c_credo_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    c_credo_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from c_credo_score;
"""

c_credo_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

c_credo_scorefpd10.head()

gini_results = calculate_periodic_gini(c_credo_scorefpd10, 'c_credo_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

# FPD30

sq = """
with c_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    c_credo_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    c_credo_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from c_credo_score;
"""

c_credo_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

c_credo_scorefpd30.head()

gini_results = calculate_periodic_gini(c_credo_scorefpd30, 'c_credo_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

sq = """
with c_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    c_credo_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    c_credo_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from c_credo_score;
"""

c_credo_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(c_credo_scorefspd30.head(2))

gini_results = calculate_periodic_gini(c_credo_scorefspd30, 'c_credo_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

sq = """
with c_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    c_credo_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    c_credo_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from c_credo_score;
"""

c_credo_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(c_credo_scorefstpd30.head(2))

gini_results = calculate_periodic_gini(c_credo_scorefstpd30, 'c_credo_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()



Job ID 6c262fec-e455-4344-a431-f984d27d18f2 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(132, 7)
Job ID 436196a2-5ef3-4b86-9485-a85727edc036 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(128, 7)
Job ID 3b1f5415-b0bf-4a77-9922-89eac0ec0ab5 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  c_credo_score  \
0 2023-06-22 14:25:06  12d13dfc-e307-4605-af97-361327cb3247       0.381071   
1 2024-07-21 10:34:01  4238aea5-2cbb-4356-879b-71b4d3486fd2       0.568046   

   ln_fspd30_flag  ln_mature_fspd30_flag  
0               0                      1  
1               0                      1  
The shape of dataframe after copy is:	(123, 7)
['start_date' 'end_date' 'c_credo_score_FSPD30_gini' 'period' 'Model_Name'
 'version' 'bad_rate']
Job ID 0eeaf4bb-5be9-48fc-b324-d3cb8932a2d9 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  c_credo_score  \
0 2023-06-22 14:25:06  12d13dfc-e307-4605-af97-361327cb3247       0.381071   
1 2024-07-21 10:34:01  4238aea5-2cbb-4356-879b-71b4d3486fd2       0.568046   

   ln_fstpd30_flag  ln_mature_fstpd30_flag  
0                0                       1  
1                0                       1  
The shape of dataframe after copy is:	(117, 7)
['start_date' 'end_date' 'c_credo_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


Unnamed: 0,start_date,end_date,c_credo_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.035948,Month,c_credo_score,1.1.0,FSTPD30
1,2023-01-02,2023-01-08,0.036009,Week,c_credo_score,1.1.0,FSTPD30
2,2023-01-09,2023-01-15,0.062143,Week,c_credo_score,1.1.0,FSTPD30
3,2023-01-16,2023-01-22,0.275891,Week,c_credo_score,1.1.0,FSTPD30
4,2023-01-23,2023-01-29,-0.050558,Week,c_credo_score,1.1.0,FSTPD30


In [120]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'c_credo_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'c_credo_score_FPD30_gini',
       'c_credo_score_FSPD30_gini', 'c_credo_score_FSTPD30_gini'],
      dtype=object)

In [121]:
final_df = final_df[['start_date', 'end_date','period',    'Model_Name', 'version', 'bad_rate','c_credo_score_FPD10_gini', 'c_credo_score_FPD30_gini',    'c_credo_score_FSPD30_gini', 'c_credo_score_FSTPD30_gini']].copy()

In [122]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_c_credo_score;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=f3589aa9-e8d3-4570-b81f-039365d5944d>

In [123]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('c_credo_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('c_credo_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('c_credo_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('c_credo_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_c_credo_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=aa72531d-b3d2-4b86-9260-1bd39ef4da88>

# s_credo_score

In [124]:


sq = """
with s_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    s_credo_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    s_credo_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from s_credo_score;
"""

s_credo_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

s_credo_scorefpd10.head()

gini_results = calculate_periodic_gini(s_credo_scorefpd10, 's_credo_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

# FPD30

sq = """
with s_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    s_credo_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    s_credo_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from s_credo_score;
"""

s_credo_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

s_credo_scorefpd30.head()

gini_results = calculate_periodic_gini(s_credo_scorefpd30, 's_credo_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

sq = """
with s_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    s_credo_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    s_credo_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from s_credo_score;
"""

s_credo_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(s_credo_scorefspd30.head(2))

gini_results = calculate_periodic_gini(s_credo_scorefspd30, 's_credo_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

sq = """
with s_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    s_credo_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    s_credo_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from s_credo_score;
"""

s_credo_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(s_credo_scorefstpd30.head(2))

gini_results = calculate_periodic_gini(s_credo_scorefstpd30, 's_credo_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()



Job ID 91ae9228-1053-4213-9484-c6c2290d700b successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(132, 7)
Job ID 15f6583f-5b67-44a1-ade6-958fd06da1da successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(128, 7)
Job ID 05e36bba-5aaf-4b4c-8501-ae6a282df25b successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  s_credo_score  \
0 2024-04-29 17:13:42  ec0662e6-2722-41c4-b483-70f9ff80ca2c       0.132600   
1 2023-11-08 12:53:36  183b56bf-1764-4bf7-a348-a70c4f504803       0.122895   

   ln_fspd30_flag  ln_mature_fspd30_flag  
0               0                      1  
1               0                      1  
The shape of dataframe after copy is:	(123, 7)
['start_date' 'end_date' 's_credo_score_FSPD30_gini' 'period' 'Model_Name'
 'version' 'bad_rate']
Job ID 2d578685-babe-4733-b147-6f6bc0e4a640 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  s_credo_score  \
0 2023-06-22 14:25:06  12d13dfc-e307-4605-af97-361327cb3247       0.138661   
1 2024-07-21 10:34:01  4238aea5-2cbb-4356-879b-71b4d3486fd2       0.072748   

   ln_fstpd30_flag  ln_mature_fstpd30_flag  
0                0                       1  
1                0                       1  
The shape of dataframe after copy is:	(117, 7)
['start_date' 'end_date' 's_credo_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


Unnamed: 0,start_date,end_date,s_credo_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.219086,Month,s_credo_score,1.1.0,FSTPD30
1,2023-01-02,2023-01-08,0.261708,Week,s_credo_score,1.1.0,FSTPD30
2,2023-01-09,2023-01-15,0.251056,Week,s_credo_score,1.1.0,FSTPD30
3,2023-01-16,2023-01-22,0.275062,Week,s_credo_score,1.1.0,FSTPD30
4,2023-01-23,2023-01-29,0.157554,Week,s_credo_score,1.1.0,FSTPD30


## Combining data

In [125]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 's_credo_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 's_credo_score_FPD30_gini',
       's_credo_score_FSPD30_gini', 's_credo_score_FSTPD30_gini'],
      dtype=object)

In [126]:
final_df = final_df[['start_date', 'end_date','period',    'Model_Name', 'version', 'bad_rate','s_credo_score_FPD10_gini', 's_credo_score_FPD30_gini',    's_credo_score_FSPD30_gini', 's_credo_score_FSTPD30_gini']].copy()

In [127]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_s_credo_score;"""

client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=8f3b2c64-18c8-4a65-b08a-fb34440c9c2a>

In [128]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('s_credo_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('s_credo_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('s_credo_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('s_credo_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_s_credo_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=44e33e7e-e0bc-4a6e-813a-162cdb9ae379>

In [129]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,s_credo_score_FPD10_gini,s_credo_score_FPD30_gini,s_credo_score_FSPD30_gini,s_credo_score_FSTPD30_gini
0,2023-01-01,2023-01-31,Month,s_credo_score,1.1.0,FPD10,0.230181,,,
1,2023-01-02,2023-01-08,Week,s_credo_score,1.1.0,FPD10,0.248978,,,
2,2023-01-09,2023-01-15,Week,s_credo_score,1.1.0,FPD10,0.296337,,,
3,2023-01-16,2023-01-22,Week,s_credo_score,1.1.0,FPD10,0.52987,,,
4,2023-01-23,2023-01-29,Week,s_credo_score,1.1.0,FPD10,0.079585,,,


# fu_credo_score

In [130]:


sq = """
with fu_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    fu_credo_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    fu_credo_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from fu_credo_score;
"""

fu_credo_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

fu_credo_scorefpd10.head()

gini_results = calculate_periodic_gini(fu_credo_scorefpd10, 'fu_credo_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

# FPD30

sq = """
with fu_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    fu_credo_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    fu_credo_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from fu_credo_score;
"""

fu_credo_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

fu_credo_scorefpd30.head()

gini_results = calculate_periodic_gini(fu_credo_scorefpd30, 'fu_credo_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

sq = """
with fu_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    fu_credo_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    fu_credo_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from fu_credo_score;
"""

fu_credo_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(fu_credo_scorefspd30.head(2))

gini_results = calculate_periodic_gini(fu_credo_scorefspd30, 'fu_credo_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

sq = """
with fu_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    fu_credo_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    fu_credo_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from fu_credo_score;
"""

fu_credo_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(fu_credo_scorefstpd30.head(2))

gini_results = calculate_periodic_gini(fu_credo_scorefstpd30, 'fu_credo_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()



Job ID ef8222aa-cd29-4216-a0e1-cbf7a3b44371 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(132, 7)
Job ID 8515e362-42a5-4f29-a11a-25b581fbc367 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(128, 7)
Job ID ce311bf4-ff31-4392-9a5f-69ef55f7bfd4 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  fu_credo_score  \
0 2024-04-29 17:13:42  ec0662e6-2722-41c4-b483-70f9ff80ca2c        0.095045   
1 2023-11-08 12:53:36  183b56bf-1764-4bf7-a348-a70c4f504803        0.181235   

   ln_fspd30_flag  ln_mature_fspd30_flag  
0               0                      1  
1               0                      1  
The shape of dataframe after copy is:	(123, 7)
['start_date' 'end_date' 'fu_credo_score_FSPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']
Job ID debed2b2-7414-4989-b022-51df424bd85a successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  fu_credo_score  \
0 2024-04-29 17:13:42  ec0662e6-2722-41c4-b483-70f9ff80ca2c        0.095045   
1 2023-11-08 12:53:36  183b56bf-1764-4bf7-a348-a70c4f504803        0.181235   

   ln_fstpd30_flag  ln_mature_fstpd30_flag  
0                0                       1  
1                1                       1  
The shape of dataframe after copy is:	(117, 7)
['start_date' 'end_date' 'fu_credo_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


Unnamed: 0,start_date,end_date,fu_credo_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.07286,Month,fu_credo_score,1.1.0,FSTPD30
1,2023-01-02,2023-01-08,0.002165,Week,fu_credo_score,1.1.0,FSTPD30
2,2023-01-09,2023-01-15,0.056674,Week,fu_credo_score,1.1.0,FSTPD30
3,2023-01-16,2023-01-22,0.326429,Week,fu_credo_score,1.1.0,FSTPD30
4,2023-01-23,2023-01-29,0.109935,Week,fu_credo_score,1.1.0,FSTPD30


## Combining data

In [131]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'fu_credo_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'fu_credo_score_FPD30_gini',
       'fu_credo_score_FSPD30_gini', 'fu_credo_score_FSTPD30_gini'],
      dtype=object)

In [132]:
final_df = final_df[['start_date', 'end_date','period',    'Model_Name', 'version', 'bad_rate','fu_credo_score_FPD10_gini', 'fu_credo_score_FPD30_gini',    'fu_credo_score_FSPD30_gini', 'fu_credo_score_FSTPD30_gini']].copy()

In [133]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_fu_credo_score"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=072afdc8-28ee-4fd9-a89e-04bdac5fc2d4>

In [134]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('fu_credo_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('fu_credo_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('fu_credo_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('fu_credo_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_fu_credo_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=cc052d2b-02e3-4c50-9217-36ee1aa6e6c8>

In [135]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,fu_credo_score_FPD10_gini,fu_credo_score_FPD30_gini,fu_credo_score_FSPD30_gini,fu_credo_score_FSTPD30_gini
0,2023-01-01,2023-01-31,Month,fu_credo_score,1.1.0,FPD10,0.023608,,,
1,2023-01-02,2023-01-08,Week,fu_credo_score,1.1.0,FPD10,-0.032464,,,
2,2023-01-09,2023-01-15,Week,fu_credo_score,1.1.0,FPD10,0.016278,,,
3,2023-01-16,2023-01-22,Week,fu_credo_score,1.1.0,FPD10,0.153247,,,
4,2023-01-23,2023-01-29,Week,fu_credo_score,1.1.0,FPD10,0.120242,,,


# r_credo_score

In [136]:


sq = """
with r_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    r_credo_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    r_credo_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from r_credo_score;
"""

r_credo_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

r_credo_scorefpd10.head()

gini_results = calculate_periodic_gini(r_credo_scorefpd10, 'r_credo_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

# FPD30

sq = """
with r_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    r_credo_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    r_credo_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from r_credo_score;
"""

r_credo_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

r_credo_scorefpd30.head()

gini_results = calculate_periodic_gini(r_credo_scorefpd30, 'r_credo_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

sq = """
with r_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    r_credo_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    r_credo_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from r_credo_score;
"""

r_credo_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(r_credo_scorefspd30.head(2))

gini_results = calculate_periodic_gini(r_credo_scorefspd30, 'r_credo_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

sq = """
with r_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    r_credo_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    r_credo_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from r_credo_score;
"""

r_credo_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(r_credo_scorefstpd30.head(2))

gini_results = calculate_periodic_gini(r_credo_scorefstpd30, 'r_credo_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()



Job ID 1bce65e5-51a2-4849-9a77-e890b8925342 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(132, 7)
Job ID 92f6ccaf-8097-46e4-a487-34393c2b0546 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(128, 7)
Job ID 9f7712ec-9c0c-4e7a-a6c9-c83d48f8a760 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  r_credo_score  \
0 2024-04-29 17:13:42  ec0662e6-2722-41c4-b483-70f9ff80ca2c       0.345692   
1 2023-11-08 12:53:36  183b56bf-1764-4bf7-a348-a70c4f504803       0.150332   

   ln_fspd30_flag  ln_mature_fspd30_flag  
0               0                      1  
1               0                      1  
The shape of dataframe after copy is:	(123, 7)
['start_date' 'end_date' 'r_credo_score_FSPD30_gini' 'period' 'Model_Name'
 'version' 'bad_rate']
Job ID 8a034567-41aa-4986-8db2-7d142493bb59 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  r_credo_score  \
0 2023-06-22 14:25:06  12d13dfc-e307-4605-af97-361327cb3247       0.212395   
1 2024-07-21 10:34:01  4238aea5-2cbb-4356-879b-71b4d3486fd2       0.128083   

   ln_fstpd30_flag  ln_mature_fstpd30_flag  
0                0                       1  
1                0                       1  
The shape of dataframe after copy is:	(117, 7)
['start_date' 'end_date' 'r_credo_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


Unnamed: 0,start_date,end_date,r_credo_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.048984,Month,r_credo_score,1.1.0,FSTPD30
1,2023-01-02,2023-01-08,0.014955,Week,r_credo_score,1.1.0,FSTPD30
2,2023-01-09,2023-01-15,0.193885,Week,r_credo_score,1.1.0,FSTPD30
3,2023-01-16,2023-01-22,-0.019056,Week,r_credo_score,1.1.0,FSTPD30
4,2023-01-23,2023-01-29,0.024691,Week,r_credo_score,1.1.0,FSTPD30


## Combining data

In [137]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'r_credo_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'r_credo_score_FPD30_gini',
       'r_credo_score_FSPD30_gini', 'r_credo_score_FSTPD30_gini'],
      dtype=object)

In [138]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','r_credo_score_FPD10_gini','r_credo_score_FPD30_gini',    'r_credo_score_FSPD30_gini', 'r_credo_score_FSTPD30_gini']].copy()

In [139]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_r_credo_score;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=2936b61a-486e-4676-b816-808f42ca2d67>

In [140]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('r_credo_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('r_credo_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('r_credo_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('r_credo_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_r_credo_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=6effb36c-460b-46c9-baf2-05f3aae96c3d>

# old_gen_credo_score

In [141]:


sq = """
with old_gen_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_gen_credo_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    old_gen_credo_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from old_gen_credo_score;
"""

old_gen_credo_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

old_gen_credo_scorefpd10.head()

gini_results = calculate_periodic_gini_threedigit(old_gen_credo_scorefpd10, 'old_gen_credo_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

# FPD30

sq = """
with old_gen_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_gen_credo_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    old_gen_credo_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from old_gen_credo_score;
"""

old_gen_credo_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

old_gen_credo_scorefpd30.head()

gini_results = calculate_periodic_gini_threedigit(old_gen_credo_scorefpd30, 'old_gen_credo_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

sq = """
with old_gen_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_gen_credo_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    old_gen_credo_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from old_gen_credo_score;
"""

old_gen_credo_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(old_gen_credo_scorefspd30.head(2))

gini_results = calculate_periodic_gini_threedigit(old_gen_credo_scorefspd30, 'old_gen_credo_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

sq = """
with old_gen_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_gen_credo_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    old_gen_credo_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from old_gen_credo_score;
"""

old_gen_credo_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(old_gen_credo_scorefstpd30.head(2))

gini_results = calculate_periodic_gini_threedigit(old_gen_credo_scorefstpd30, 'old_gen_credo_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()



Job ID fc5fa070-64b5-49a5-9d88-2e04e1bde160 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(132, 7)
Job ID d990cfc1-4309-4b43-b0ea-c4b15592548f successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(128, 7)
Job ID 61ba3e88-d064-4871-9287-dcfb8ad97c5d successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  \
0 2024-04-29 17:13:42  ec0662e6-2722-41c4-b483-70f9ff80ca2c   
1 2024-05-07 13:57:01  ce1a4a6a-f9a8-4353-a102-5c2874173c8e   

  old_gen_credo_score  ln_fspd30_flag  ln_mature_fspd30_flag  
0                 400               0                      1  
1                 440               0                      1  
The shape of dataframe after copy is:	(123, 7)
['start_date' 'end_date' 'old_gen_credo_score_FSPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']
Job ID 83d09d0b-c6f7-40ff-a3a9-b761c1854ce1 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  \
0 2024-04-29 17:13:42  ec0662e6-2722-41c4-b483-70f9ff80ca2c   
1 2024-05-07 13:57:01  ce1a4a6a-f9a8-4353-a102-5c2874173c8e   

  old_gen_credo_score  ln_fstpd30_flag  ln_mature_fstpd30_flag  
0                 400                0                       1  
1                 440                0                       1  
The shape of dataframe after copy is:	(117, 7)
['start_date' 'end_date' 'old_gen_credo_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


Unnamed: 0,start_date,end_date,old_gen_credo_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.120833,Month,old_gen_credo_score,1.1.0,FSTPD30
1,2023-01-02,2023-01-08,0.229296,Week,old_gen_credo_score,1.1.0,FSTPD30
2,2023-01-09,2023-01-15,0.138871,Week,old_gen_credo_score,1.1.0,FSTPD30
3,2023-01-16,2023-01-22,0.132488,Week,old_gen_credo_score,1.1.0,FSTPD30
4,2023-01-23,2023-01-29,0.000744,Week,old_gen_credo_score,1.1.0,FSTPD30


## Combining data

In [142]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'old_gen_credo_score_FPD10_gini',
       'period', 'Model_Name', 'version', 'bad_rate',
       'old_gen_credo_score_FPD30_gini',
       'old_gen_credo_score_FSPD30_gini',
       'old_gen_credo_score_FSTPD30_gini'], dtype=object)

In [143]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','old_gen_credo_score_FPD10_gini','old_gen_credo_score_FPD30_gini',    'old_gen_credo_score_FSPD30_gini', 'old_gen_credo_score_FSTPD30_gini']].copy()

In [144]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_old_gen_credo_score;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=1f65da3e-b8e5-4928-bbfc-d45c7b6533cf>

In [145]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('old_gen_credo_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('old_gen_credo_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('old_gen_credo_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('old_gen_credo_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_old_gen_credo_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=4ae934f3-bde1-4c38-99d8-28eec47a6ee3>

In [146]:
sq = """select * from prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_old_gen_credo_score;"""

df = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID 77b02e65-5f12-4e45-b398-4b03aeffaf3b successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [147]:
df['bad_rate'].value_counts()

bad_rate
FPD10      132
FPD30      128
FSPD30     123
FSTPD30    117
Name: count, dtype: int64

# old_cic_score

In [148]:


sq = """
with old_cic_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_cic_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    old_cic_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from old_cic_score;
"""

old_cic_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

old_cic_scorefpd10.head()

gini_results = calculate_periodic_hybrid_gini(old_cic_scorefpd10, 'old_cic_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

# FPD30

sq = """
with old_cic_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_cic_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    old_cic_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from old_cic_score;
"""

old_cic_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

old_cic_scorefpd30.head()

gini_results = calculate_periodic_hybrid_gini(old_cic_scorefpd30, 'old_cic_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

sq = """
with old_cic_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_cic_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    old_cic_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from old_cic_score;
"""

old_cic_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(old_cic_scorefspd30.head(2))

gini_results = calculate_periodic_hybrid_gini(old_cic_scorefspd30, 'old_cic_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

sq = """
with old_cic_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_cic_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    old_cic_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from old_cic_score;
"""

old_cic_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(old_cic_scorefstpd30.head(2))

gini_results = calculate_periodic_hybrid_gini(old_cic_scorefstpd30, 'old_cic_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()



Job ID 2be6e2f5-e24c-4283-8ccb-abe26180cf9e successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(92, 7)
Job ID 5616a980-b13f-42d7-8fea-b3974157c9e6 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(88, 7)
Job ID 126553f3-01f5-4250-9855-adf4036f138d successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId old_cic_score  \
0 2024-11-22 19:40:30  d0e6cf6d-c167-4643-8f13-3097b15826b6         541.0   
1 2024-02-20 18:17:04  cba282c2-7020-49fa-a082-c3193150510d       537.000   

   ln_fspd30_flag  ln_mature_fspd30_flag  
0               0                      1  
1               0                      1  
The shape of dataframe after copy is:	(83, 7)
['start_date' 'end_date' 'old_cic_score_FSPD30_gini' 'period' 'Model_Name'
 'version' 'bad_rate']
Job ID 8fce90fb-0507-4f4b-ba26-3abf3e98e11a successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId old_cic_score  \
0 2024-02-20 18:17:04  cba282c2-7020-49fa-a082-c3193150510d       537.000   
1 2024-07-01 13:02:16  daa3bebc-3b11-4abe-970c-3c3005f29c06       584.000   

   ln_fstpd30_flag  ln_mature_fstpd30_flag  
0                1                       1  
1                1                       1  
The shape of dataframe after copy is:	(77, 7)
['start_date' 'end_date' 'old_cic_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


Unnamed: 0,start_date,end_date,old_cic_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-08-01,2023-08-31,0.086705,Month,old_cic_score,1.1.0,FSTPD30
1,2023-08-21,2023-08-27,0.064138,Week,old_cic_score,1.1.0,FSTPD30
2,2023-08-28,2023-09-03,0.120423,Week,old_cic_score,1.1.0,FSTPD30
3,2023-09-01,2023-09-30,0.293879,Month,old_cic_score,1.1.0,FSTPD30
4,2023-09-04,2023-09-10,0.159596,Week,old_cic_score,1.1.0,FSTPD30


## Combining data

In [149]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'old_cic_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'old_cic_score_FPD30_gini',
       'old_cic_score_FSPD30_gini', 'old_cic_score_FSTPD30_gini'],
      dtype=object)

In [150]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','old_cic_score_FPD10_gini','old_cic_score_FPD30_gini',    'old_cic_score_FSPD30_gini', 'old_cic_score_FSTPD30_gini']].copy()

In [151]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_old_cic_score;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=10ab94f8-a9da-45a7-b854-dc35d66583b1>

In [152]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('old_cic_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('old_cic_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('old_cic_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('old_cic_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_old_cic_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=3bc1faba-7848-4ad9-941d-b4497f3cd5f5>

# old_demo_score

In [153]:


sq = """
with old_demo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_demo_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    old_demo_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from old_demo_score;
"""

old_demo_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

old_demo_scorefpd10.head()

gini_results = calculate_periodic_hybrid_gini(old_demo_scorefpd10, 'old_demo_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

# FPD30

sq = """
with old_demo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_demo_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    old_demo_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from old_demo_score;
"""

old_demo_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

old_demo_scorefpd30.head()

gini_results = calculate_periodic_hybrid_gini(old_demo_scorefpd30, 'old_demo_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

sq = """
with old_demo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_demo_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    old_demo_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from old_demo_score;
"""

old_demo_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(old_demo_scorefspd30.head(2))

gini_results = calculate_periodic_hybrid_gini(old_demo_scorefspd30, 'old_demo_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

sq = """
with old_demo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_demo_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    old_demo_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from old_demo_score;
"""

old_demo_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(old_demo_scorefstpd30.head(2))

gini_results = calculate_periodic_hybrid_gini(old_demo_scorefstpd30, 'old_demo_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()



Job ID 7a1ea00f-f6c6-4766-b9c6-69a6bc456f4a successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(132, 7)
Job ID af9dcd72-ea21-4b44-b6a1-5bf2770d68c5 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(128, 7)
Job ID 89accafc-654e-4168-9ccc-72a5a1fd423e successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  old_demo_score  \
0 2024-11-22 19:40:30  d0e6cf6d-c167-4643-8f13-3097b15826b6           485.0   
1 2023-12-09 14:27:42  9c92bd46-f2d0-4cd1-a3f6-f0186dea12e4           424.0   

   ln_fspd30_flag  ln_mature_fspd30_flag  
0               0                      1  
1               0                      1  
The shape of dataframe after copy is:	(123, 7)
['start_date' 'end_date' 'old_demo_score_FSPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']
Job ID 8a3d3fff-5f1d-4926-8cbc-5fdd338dfcd4 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  old_demo_score  \
0 2023-12-09 14:27:42  9c92bd46-f2d0-4cd1-a3f6-f0186dea12e4           424.0   
1 2023-02-23 14:33:24  789147fe-481f-4914-83ee-7c98c3436510           402.0   

   ln_fstpd30_flag  ln_mature_fstpd30_flag  
0                0                       1  
1                0                       1  
The shape of dataframe after copy is:	(117, 7)
['start_date' 'end_date' 'old_demo_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


Unnamed: 0,start_date,end_date,old_demo_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.121696,Month,old_demo_score,1.1.0,FSTPD30
1,2023-01-02,2023-01-08,0.180638,Week,old_demo_score,1.1.0,FSTPD30
2,2023-01-09,2023-01-15,0.043003,Week,old_demo_score,1.1.0,FSTPD30
3,2023-01-16,2023-01-22,0.139188,Week,old_demo_score,1.1.0,FSTPD30
4,2023-01-23,2023-01-29,0.12816,Week,old_demo_score,1.1.0,FSTPD30


## Combining data

In [154]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'old_demo_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'old_demo_score_FPD30_gini',
       'old_demo_score_FSPD30_gini', 'old_demo_score_FSTPD30_gini'],
      dtype=object)

In [155]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','old_demo_score_FPD10_gini','old_demo_score_FPD30_gini',    'old_demo_score_FSPD30_gini', 'old_demo_score_FSTPD30_gini']].copy()

In [156]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_old_demo_score;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=825ed0ba-7a4a-4800-91db-960a7e3aca35>

In [157]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('old_demo_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('old_demo_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('old_demo_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('old_demo_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_old_demo_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=b3a3e1b6-22c8-48d8-8e1d-1a741e5d433e>

# bu_bureau_score

In [158]:
sq = """
with bu_bureau_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    bu_bureau_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    coalesce(bu_bureau_score, 0.0) > 0.0
  AND
    ln_mature_fpd10_flag = 1
)
select * from bu_bureau_score;
"""

bu_bureau_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

bu_bureau_scorefpd10.head()

gini_results = calculate_periodic_gini_threedigit(bu_bureau_scorefpd10, 'bu_bureau_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

# FPD30

sq = """
with bu_bureau_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    bu_bureau_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    coalesce(bu_bureau_score, 0.0) > 0.0
  AND
    ln_mature_fpd30_flag = 1
)
select * from bu_bureau_score;
"""

bu_bureau_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

bu_bureau_scorefpd30.head()

gini_results = calculate_periodic_gini_threedigit(bu_bureau_scorefpd30, 'bu_bureau_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

sq = """
with bu_bureau_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    bu_bureau_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    coalesce(bu_bureau_score, 0.0) > 0.0
  AND
    ln_mature_fspd30_flag = 1
)
select * from bu_bureau_score;
"""

bu_bureau_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(bu_bureau_scorefspd30.head(2))

gini_results = calculate_periodic_gini_threedigit(bu_bureau_scorefspd30, 'bu_bureau_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

sq = """
with bu_bureau_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    bu_bureau_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    prj-prod-dataplatform.risk_mart.sil_risk_ds_master_20230101_20250206
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    coalesce(bu_bureau_score, 0.0) > 0.0
  AND
    ln_mature_fstpd30_flag = 1
)
select * from bu_bureau_score;
"""

bu_bureau_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(bu_bureau_scorefstpd30.head(2))

gini_results = calculate_periodic_gini_threedigit(bu_bureau_scorefstpd30, 'bu_bureau_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()



Job ID 80baf5d0-1fc3-4f49-9f6c-de4fe1118269 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(92, 7)
Job ID 2d9422c7-6023-4128-8c41-b53175f9245e successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(92, 7)
Job ID bb39097c-5e2c-435c-92dc-acecd5c436e2 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  bu_bureau_score  \
0 2023-04-29 19:07:39  060c80e4-c373-4951-9eb8-022b5e10a242            151.0   
1 2024-05-02 17:17:31  64be9c19-7dec-4a44-a506-edbadcac6263            504.0   

   ln_fspd30_flag  ln_mature_fspd30_flag  
0               0                      1  
1               0                      1  
The shape of dataframe after copy is:	(92, 7)
['start_date' 'end_date' 'bu_bureau_score_FSPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']
Job ID 5d4237c0-08bf-498c-92f7-5cbc5046d11a successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  bu_bureau_score  \
0 2024-04-27 16:27:30  3dd4e296-814b-4ce2-9af8-d38bbfb1518c            258.0   
1 2024-04-30 19:27:39  5601b9ed-e5bd-4eb5-8c11-9ebb55bc2c59            286.0   

   ln_fstpd30_flag  ln_mature_fstpd30_flag  
0                0                       1  
1                0                       1  
The shape of dataframe after copy is:	(92, 7)
['start_date' 'end_date' 'bu_bureau_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


Unnamed: 0,start_date,end_date,bu_bureau_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.022917,Month,bu_bureau_score,1.1.0,FSTPD30
1,2023-01-09,2023-01-15,-0.106667,Week,bu_bureau_score,1.1.0,FSTPD30
2,2023-01-16,2023-01-22,-0.833333,Week,bu_bureau_score,1.1.0,FSTPD30
3,2023-01-23,2023-01-29,0.416667,Week,bu_bureau_score,1.1.0,FSTPD30
4,2023-01-30,2023-02-05,0.266667,Week,bu_bureau_score,1.1.0,FSTPD30


## Combining data

In [159]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'bu_bureau_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'bu_bureau_score_FPD30_gini',
       'bu_bureau_score_FSPD30_gini', 'bu_bureau_score_FSTPD30_gini'],
      dtype=object)

In [160]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','bu_bureau_score_FPD10_gini','bu_bureau_score_FPD30_gini',    'bu_bureau_score_FSPD30_gini', 'bu_bureau_score_FSTPD30_gini']].copy()

In [161]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_bu_bureau_score;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=15dcdd74-ebf5-4387-9520-a083c1e3476c>

In [162]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('bu_bureau_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('bu_bureau_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('bu_bureau_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('bu_bureau_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_bu_bureau_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()

LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=d89c8927-7e2f-4a5f-9fcc-7a835391126c>