# <center> Model Gini Calculation </center>

In [1]:
# %% [markdown]
# # Jupyter Notebook Loading Header
#
# This is a custom loading header for Jupyter Notebooks in Visual Studio Code.
# It includes common imports and settings to get you started quickly.

# %% [markdown]
## Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.cloud import bigquery
from google.cloud import storage
import os
path = r'C:\Users\Dwaipayan\AppData\Roaming\gcloud\legacy_credentials\dchakroborti@tonikbank.com\adc.json'
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = path
client = bigquery.Client(project='prj-prod-dataplatform')

from sklearn.metrics import roc_auc_score
from datetime import datetime, timedelta
# %% [markdown]
## Configure Settings
# Set options or configurations as needed
# Example: pd.set_option('display.max_columns', None)

# Function

## calculate_gini_for_threedigitscore

In [2]:
# def calculate_gini_for_threedigitscore(scores, labels):
#     """
#     Calculate Gini coefficient for three-digit scores and binary labels
    
#     Parameters:
#     scores: array-like, three-digit scores (higher is better)
#     labels: array-like, binary values (0 or 1, where 1 indicates default)
    
#     Returns:
#     float: Gini coefficient
#     """
#     # Combine scores and labels into a DataFrame
#     df = pd.DataFrame({'score': scores, 'label': labels})
    
#     # Sort by score in descending order (assuming higher score is better)
#     df = df.sort_values('score', ascending=False)
    
#     # Calculate cumulative values
#     total_pos = df['label'].sum()
#     total_neg = len(df) - total_pos
    
#     if total_pos == 0 or total_neg == 0:
#         return 0
    
#     # Calculate cumulative proportions
#     cum_pos = df['label'].cumsum()
#     cum_neg = np.arange(1, len(df) + 1) - cum_pos
    
#     # Convert to proportions
#     cum_pos_prop = cum_pos / total_pos
#     cum_neg_prop = cum_neg / total_neg
    
#     # Calculate Gini
#     gini = 1 - np.trapz(cum_pos_prop, cum_neg_prop)
    
#     return gini


## Modified one

def calculate_gini_for_threedigitscore(scores, labels):
    """
    Calculate Gini coefficient for three-digit scores and binary labels
    
    Parameters:
    scores: array-like, three-digit scores (higher is better)
    labels: array-like, binary values (0 or 1, where 1 indicates default)
    
    Returns:
    float: Gini coefficient
    """
    # Combine scores and labels into a DataFrame
    df = pd.DataFrame({'score': scores, 'label': labels})
    
    # Sort by score in descending order (assuming higher score means lower risk)
    # For default prediction, we want to sort scores in ascending order 
    # since higher default probability should correspond to higher risk
    df = df.sort_values('score', ascending=True)  # Changed to ascending=True
    
    # Calculate cumulative values
    total_pos = df['label'].sum()
    total_neg = len(df) - total_pos
    
    if total_pos == 0 or total_neg == 0:
        return 0
    
    # Calculate cumulative proportions
    cum_pos = df['label'].cumsum()
    cum_neg = np.arange(1, len(df) + 1) - cum_pos
    
    # Convert to proportions
    cum_pos_prop = cum_pos / total_pos
    cum_neg_prop = cum_neg / total_neg
    
    # Calculate area under curve
    auc = np.trapz(cum_pos_prop, cum_neg_prop)
    
    # Calculate Gini
    gini = 2 * auc - 1
    
    return gini

## calculate_gini

In [3]:
def calculate_gini(pd_scores, bad_indicators):
    """
    Calculate Gini coefficient from scores and binary indicators
    
    Parameters:
    pd_scores: array-like of scores/probabilities
    bad_indicators: array-like of binary outcomes (0/1)
    
    Returns:
    float: Gini coefficient
    """
    # Convert inputs to numpy arrays and ensure they're numeric
    pd_scores = np.array(pd_scores, dtype=float)
    bad_indicators = np.array(bad_indicators, dtype=int)
    
    # Check for valid input data
    if len(pd_scores) == 0 or len(bad_indicators) == 0:
        return np.nan
    
    # Check if we have both good and bad cases (needed for ROC AUC)
    if len(np.unique(bad_indicators)) < 2:
        return np.nan
    
    # Calculate AUC using sklearn
    try:
        auc = roc_auc_score(bad_indicators, pd_scores)
        # Calculate Gini from AUC
        gini = 2 * auc - 1
        return gini
    except ValueError:
        return np.nan

## calculate_hybrid_gini

In [4]:
# def calculate_hybrid_gini(scores, labels):
#     """
#     Calculate Gini coefficient handling both PD values and three-digit scores
    
#     Parameters:
#     scores: array-like, contains either PD values (0-1) or three-digit scores
#     labels: array-like, binary values (0 or 1, where 1 indicates default)
    
#     Returns:
#     float: Gini coefficient
#     """
#     # Convert inputs to numpy arrays
#     scores = np.array(scores, dtype=float)
#     labels = np.array(labels, dtype=int)
    
#     # Basic validation
#     if len(scores) == 0 or len(labels) == 0:
#         return np.nan
    
#     if len(np.unique(labels)) < 2:
#         return np.nan
        
#     # Determine if scores are PD values or three-digit scores
#     # PD values are between 0 and 1
#     is_pd = np.all((scores >= 0) & (scores <= 1))
    
#     if is_pd:
#         try:
#             auc = roc_auc_score(labels, scores)
#             gini = 2 * auc - 1
#             return gini
#         except ValueError:
#             return np.nan
#     else:
#         # Handle as three-digit score
#         df = pd.DataFrame({'score': scores, 'label': labels})
#         df = df.sort_values('score', ascending=False)
        
#         total_pos = df['label'].sum()
#         total_neg = len(df) - total_pos
        
#         if total_pos == 0 or total_neg == 0:
#             return np.nan
        
#         cum_pos = df['label'].cumsum()
#         cum_neg = np.arange(1, len(df) + 1) - cum_pos
        
#         cum_pos_prop = cum_pos / total_pos
#         cum_neg_prop = cum_neg / total_neg
        
#         gini = 1 - np.trapz(cum_pos_prop, cum_neg_prop)
#         return gini

## Modified one

def calculate_hybrid_gini(scores, labels):
    """
    Calculate Gini coefficient handling both PD values and three-digit scores
    
    Parameters:
    scores: array-like, contains either PD values (0-1) or three-digit scores
    labels: array-like, binary values (0 or 1, where 1 indicates default)
    
    Returns:
    float: Gini coefficient
    """
    # Convert inputs to numpy arrays
    scores = np.array(scores, dtype=float)
    labels = np.array(labels, dtype=int)
    
    # Basic validation
    if len(scores) == 0 or len(labels) == 0:
        return np.nan
    
    if len(np.unique(labels)) < 2:
        return np.nan
        
    # Determine if scores are PD values or three-digit scores
    # PD values are between 0 and 1
    is_pd = np.all((scores >= 0) & (scores <= 1))
    
    if is_pd:
        try:
            auc = roc_auc_score(labels, scores)
            gini = 2 * auc - 1
            return gini
        except ValueError:
            return np.nan
    else:
        # Handle as three-digit score
        df = pd.DataFrame({'score': scores, 'label': labels})
        # Sort by score in ascending order since higher score means higher risk
        df = df.sort_values('score', ascending=True)
        
        total_pos = df['label'].sum()
        total_neg = len(df) - total_pos
        
        if total_pos == 0 or total_neg == 0:
            return np.nan
        
        cum_pos = df['label'].cumsum()
        cum_neg = np.arange(1, len(df) + 1) - cum_pos
        
        cum_pos_prop = cum_pos / total_pos
        cum_neg_prop = cum_neg / total_neg
        
        # Calculate area under curve
        auc = np.trapz(cum_pos_prop, cum_neg_prop)
        
        # Calculate Gini using the same formula as PD values
        gini = 2 * auc - 1
        return gini

## calculate_periodic_gini_threedigit

In [5]:
# Main processing code
def calculate_periodic_gini_threedigit(df, score_column, label_column, namecolumn):
    """
    Calculate periodic Gini coefficients
    
    Parameters:
    df: DataFrame with disbursement dates and score/label columns
    score_column: name of the score column
    label_column: name of the label column
    """
    # Ensure date is datetime type
    df['disbursementdate'] = pd.to_datetime(df['disbursementdate'])
    
    # Calculate weekly Gini
    df['week'] = df['disbursementdate'].dt.to_period('W')
    weekly_gini = df.groupby('week').apply(
        lambda x: calculate_gini_for_threedigitscore(x[score_column], x[label_column])
    ).reset_index(name='gini')
    weekly_gini['period'] = 'Week'
    weekly_gini['start_date'] = weekly_gini['week'].apply(lambda x: x.to_timestamp())
    weekly_gini['end_date'] = weekly_gini['start_date'] + timedelta(days=6)
    weekly_gini = weekly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Calculate monthly Gini
    df['month'] = df['disbursementdate'].dt.to_period('M')
    monthly_gini = df.groupby('month').apply(
        lambda x: calculate_gini_for_threedigitscore(x[score_column], x[label_column])
    ).reset_index(name='gini')
    monthly_gini['period'] = 'Month'
    monthly_gini['start_date'] = monthly_gini['month'].apply(lambda x: x.to_timestamp())
    monthly_gini['end_date'] = monthly_gini['start_date'] + pd.DateOffset(months=1) - pd.Timedelta(days=1)
    monthly_gini = monthly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Combine and sort results
    gini_results = pd.concat([weekly_gini, monthly_gini])
    gini_results = gini_results.sort_values(by='start_date').reset_index(drop=True)
    
    # Add metadata columns
    gini_results['Model_Name'] = score_column
    gini_results['version'] = '1.1.0'
    gini_results['bad_rate'] = namecolumn
    gini_results.rename(columns={'gini': f'{score_column}_{namecolumn}_gini'}, inplace=True)
    
    return gini_results

## calculate_periodic_gini

In [6]:
def calculate_periodic_gini(df, score_column, label_column, namecolumn):
    """
    Calculate periodic Gini coefficients
    
    Parameters:
    df: DataFrame with disbursement dates and score/label columns
    score_column: name of the score column
    label_column: name of the label column
    """
    # Input validation
    required_columns = ['disbursementdate', score_column, label_column]
    if not all(col in df.columns for col in required_columns):
        raise ValueError(f"Missing required columns. Need: {required_columns}")
        
    # Create a copy to avoid modifying original dataframe
    df = df.copy()
    
    # Ensure date is datetime type
    df['disbursementdate'] = pd.to_datetime(df['disbursementdate'])
    
    # Ensure score and label columns are numeric
    df[score_column] = pd.to_numeric(df[score_column], errors='coerce')
    df[label_column] = pd.to_numeric(df[label_column], errors='coerce')
    
    # Drop rows with invalid values
    df = df.dropna(subset=[score_column, label_column])
    
    # Calculate weekly Gini
    df['week'] = df['disbursementdate'].dt.to_period('W')
    weekly_gini = df.groupby('week').apply(
        lambda x: calculate_gini(x[score_column], x[label_column])
        if len(x) >= 10 else np.nan  # Only calculate if we have enough samples
    ).reset_index(name='gini')
    weekly_gini['period'] = 'Week'
    weekly_gini['start_date'] = weekly_gini['week'].apply(lambda x: x.to_timestamp())
    weekly_gini['end_date'] = weekly_gini['start_date'] + timedelta(days=6)
    weekly_gini = weekly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Calculate monthly Gini
    df['month'] = df['disbursementdate'].dt.to_period('M')
    monthly_gini = df.groupby('month').apply(
        lambda x: calculate_gini(x[score_column], x[label_column])
        if len(x) >= 20 else np.nan  # Only calculate if we have enough samples
    ).reset_index(name='gini')
    monthly_gini['period'] = 'Month'
    monthly_gini['start_date'] = monthly_gini['month'].apply(lambda x: x.to_timestamp())
    monthly_gini['end_date'] = monthly_gini['start_date'] + pd.DateOffset(months=1) - pd.Timedelta(days=1)
    monthly_gini = monthly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Combine and sort results
    gini_results = pd.concat([weekly_gini, monthly_gini])
    gini_results = gini_results.sort_values(by='start_date').reset_index(drop=True)
    
    # Add metadata columns
    gini_results['Model_Name'] = score_column
    gini_results['version'] = '1.1.0'
    gini_results['bad_rate'] = namecolumn
    gini_results.rename(columns={'gini': f'{score_column}_{namecolumn}_gini'}, inplace=True)
    
    return gini_results

## calculate_periodic_hybrid_gini

In [7]:
def calculate_periodic_hybrid_gini(df, score_column, label_column, namecolumn):
    """
    Calculate periodic Gini coefficients for mixed score types
    
    Parameters:
    df: DataFrame with disbursement dates and score/label columns
    score_column: name of the score column
    label_column: name of the label column
    """
    # Input validation
    required_columns = ['disbursementdate', score_column, label_column]
    if not all(col in df.columns for col in required_columns):
        raise ValueError(f"Missing required columns. Need: {required_columns}")
        
    # Create a copy to avoid modifying original dataframe
    df = df.copy()
    
    # Ensure date is datetime type
    df['disbursementdate'] = pd.to_datetime(df['disbursementdate'])
    
    # Ensure score and label columns are numeric
    df[score_column] = pd.to_numeric(df[score_column], errors='coerce')
    df[label_column] = pd.to_numeric(df[label_column], errors='coerce')
    
    # Drop rows with invalid values
    df = df.dropna(subset=[score_column, label_column])
    
    # Calculate weekly Gini
    df['week'] = df['disbursementdate'].dt.to_period('W')
    weekly_gini = df.groupby('week').apply(
        lambda x: calculate_hybrid_gini(x[score_column], x[label_column])
        if len(x) >= 10 else np.nan  # Only calculate if we have enough samples
    ).reset_index(name='gini')
    weekly_gini['period'] = 'Week'
    weekly_gini['start_date'] = weekly_gini['week'].apply(lambda x: x.to_timestamp())
    weekly_gini['end_date'] = weekly_gini['start_date'] + pd.Timedelta(days=6)
    weekly_gini = weekly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Calculate monthly Gini
    df['month'] = df['disbursementdate'].dt.to_period('M')
    monthly_gini = df.groupby('month').apply(
        lambda x: calculate_hybrid_gini(x[score_column], x[label_column])
        if len(x) >= 20 else np.nan  # Only calculate if we have enough samples
    ).reset_index(name='gini')
    monthly_gini['period'] = 'Month'
    monthly_gini['start_date'] = monthly_gini['month'].apply(lambda x: x.to_timestamp())
    monthly_gini['end_date'] = monthly_gini['start_date'] + pd.DateOffset(months=1) - pd.Timedelta(days=1)
    monthly_gini = monthly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Combine and sort results
    gini_results = pd.concat([weekly_gini, monthly_gini])
    gini_results = gini_results.sort_values(by='start_date').reset_index(drop=True)
    
    # Add metadata columns
    gini_results['Model_Name'] = score_column
    gini_results['version'] = '1.1.0'
    gini_results['bad_rate'] = namecolumn
    gini_results.rename(columns={'gini': f'{score_column}_{namecolumn}_gini'}, inplace=True)
    
    return gini_results

In [8]:
a = " `prj-prod-dataplatform.risk_credit_mis.application_score_master`"

# App Score FPD10

In [9]:
sq = f"""with appscore as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    beta_apps_score apps_score ,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    beta_apps_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from appscore;"""

dfappscorefpd10 = client.query(sq).to_dataframe(progress_bar_type = 'tqdm')

Job ID 224822bd-9bc6-41a5-be5c-f942da40681d successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [10]:
dfappscorefpd10.sample(5)

Unnamed: 0,disbursementdate,digitalLoanAccountId,apps_score,ln_fpd10_flag,ln_mature_fpd10_flag
110246,2025-01-28 19:51:27,94f65e53-e126-4b7b-93c2-1f2b9b2c36ea,0.6656492723520586,0,1
31983,2024-05-03 18:32:29,796d9b2d-e790-4eb0-b9ac-bb738cbbbb3c,0.6008270200660013,0,1
76274,2025-01-06 19:53:34,6d22b5cd-5aa7-4ca7-8c1d-0c43bd8a8ee3,0.425646368102502,0,1
7008,2024-07-07 18:25:09,47b6ea58-94b4-4d57-9cf3-6f46e2f54d75,0.5866267077922432,0,1
149295,2024-01-15 15:01:07,a0939d72-4a88-4bcd-80c2-dd602319684a,0.5167057203853798,0,1


In [11]:
gini_results = calculate_periodic_gini(dfappscorefpd10, 'apps_score', 'ln_fpd10_flag', 'FPD10')

  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


In [12]:
gini_results.head()

Unnamed: 0,start_date,end_date,apps_score_FPD10_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.54823,Week,apps_score,1.1.0,FPD10
1,2023-06-01,2023-06-30,0.384648,Month,apps_score,1.1.0,FPD10
2,2023-06-05,2023-06-11,0.5,Week,apps_score,1.1.0,FPD10
3,2023-06-12,2023-06-18,0.555195,Week,apps_score,1.1.0,FPD10
4,2023-06-19,2023-06-25,0.004831,Week,apps_score,1.1.0,FPD10


In [13]:
appscoreFPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{appscoreFPD10.shape}")
appscoreFPD10.columns.values

The shape of dataframe after copy is:	(119, 7)


array(['start_date', 'end_date', 'apps_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [14]:
gini_results.head()

Unnamed: 0,start_date,end_date,apps_score_FPD10_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.54823,Week,apps_score,1.1.0,FPD10
1,2023-06-01,2023-06-30,0.384648,Month,apps_score,1.1.0,FPD10
2,2023-06-05,2023-06-11,0.5,Week,apps_score,1.1.0,FPD10
3,2023-06-12,2023-06-18,0.555195,Week,apps_score,1.1.0,FPD10
4,2023-06-19,2023-06-25,0.004831,Week,apps_score,1.1.0,FPD10


# App Score FPD30

In [15]:
sq = f"""
with appscore as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    beta_apps_score apps_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    beta_apps_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from appscore;
"""

dfappscorefpd30 = client.query(sq).to_dataframe(progress_bar_type = 'tqdm')

Job ID b6266ec6-ec0f-4640-ad30-43257321018e successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [16]:
dfappscorefpd30.head()

Unnamed: 0,disbursementdate,digitalLoanAccountId,apps_score,ln_fpd30_flag,ln_mature_fpd30_flag
0,2024-07-16 19:09:27,a731f941-ed8a-4278-9ba4-1fd41132aa5e,0.4981627319801684,0,1
1,2024-11-24 18:45:09,5be2f2c7-2401-48bc-a384-6f62c22866db,0.657169949961698,0,1
2,2023-12-17 19:38:05,0c97d2b7-3157-4f68-a502-636f59339b6d,0.2742596363545263,0,1
3,2024-12-01 12:21:56,1eac1869-c3c5-4ed8-aa58-aeb90c730784,0.2301509099399173,0,1
4,2024-10-26 15:36:19,4085569c-fc38-485b-9eb9-87e23466e91e,0.537695869634077,0,1


In [17]:
gini_results = calculate_periodic_gini(dfappscorefpd30, 'apps_score', 'ln_fpd30_flag', 'FPD30')
# gini_results['bad_rate'] = 'FPD30'
appscoreFPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{appscoreFPD30.shape}")
appscoreFPD30.columns.values

  weekly_gini = df.groupby('week').apply(


The shape of dataframe after copy is:	(116, 7)


  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'apps_score_FPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [18]:
gini_results.sample(5)

Unnamed: 0,start_date,end_date,apps_score_FPD30_gini,period,Model_Name,version,bad_rate
69,2024-06-24,2024-06-30,0.451433,Week,apps_score,1.1.0,FPD30
41,2024-01-15,2024-01-21,0.431345,Week,apps_score,1.1.0,FPD30
25,2023-10-16,2023-10-22,0.279451,Week,apps_score,1.1.0,FPD30
83,2024-09-09,2024-09-15,0.285449,Week,apps_score,1.1.0,FPD30
98,2024-12-02,2024-12-08,0.329726,Week,apps_score,1.1.0,FPD30


# App Score FSPD30

In [19]:
sq = f"""with appscore as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    beta_apps_score apps_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    beta_apps_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from appscore;
"""

dfappscorefspd30 = client.query(sq).to_dataframe(progress_bar_type = 'tqdm')

Job ID 1daa4085-b859-4e24-ba0e-51ebe4d31be7 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [20]:
gini_results = calculate_periodic_gini(dfappscorefspd30, 'apps_score', 'ln_fspd30_flag', 'FSPD30')
# gini_results['bad_rate'] = 'FSPD30'
appscoreFSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{appscoreFSPD30.shape}")
appscoreFSPD30.columns.values

The shape of dataframe after copy is:	(111, 7)


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'apps_score_FSPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [21]:
gini_results.tail()

Unnamed: 0,start_date,end_date,apps_score_FSPD30_gini,period,Model_Name,version,bad_rate
106,2025-01-20,2025-01-26,0.271975,Week,apps_score,1.1.0,FSPD30
107,2025-01-27,2025-02-02,0.269205,Week,apps_score,1.1.0,FSPD30
108,2025-02-01,2025-02-28,0.313412,Month,apps_score,1.1.0,FSPD30
109,2025-02-03,2025-02-09,0.300222,Week,apps_score,1.1.0,FSPD30
110,2025-02-10,2025-02-16,0.377378,Week,apps_score,1.1.0,FSPD30


# App Score FSTPD30

In [22]:
sq = f"""with appscore as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    beta_apps_score apps_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    beta_apps_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from appscore;
"""

dfappscorefstpd30 = client.query(sq).to_dataframe(progress_bar_type = 'tqdm')

Job ID 7920d7f5-4e81-426e-9a2b-b51fe8eedb80 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [23]:
gini_results = calculate_periodic_gini(dfappscorefstpd30, 'apps_score', 'ln_fstpd30_flag', 'FSTPD30')
# gini_results['bad_rate'] = 'FSTPD30'
appscoreFSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{appscoreFSTPD30.shape}")
appscoreFSTPD30.columns.values

  weekly_gini = df.groupby('week').apply(


The shape of dataframe after copy is:	(106, 7)


  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'apps_score_FSTPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [24]:
appscoreFSTPD30.head()

Unnamed: 0,start_date,end_date,apps_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.465098,Week,apps_score,1.1.0,FSTPD30
1,2023-06-01,2023-06-30,0.33742,Month,apps_score,1.1.0,FSTPD30
2,2023-06-05,2023-06-11,0.4133,Week,apps_score,1.1.0,FSTPD30
3,2023-06-12,2023-06-18,0.395676,Week,apps_score,1.1.0,FSTPD30
4,2023-06-19,2023-06-25,0.314497,Week,apps_score,1.1.0,FSTPD30


# Combining App Score

In [25]:
import functools

dataframes = [appscoreFPD10, appscoreFPD30, appscoreFSPD30, appscoreFSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'apps_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'apps_score_FPD30_gini',
       'apps_score_FSPD30_gini', 'apps_score_FSTPD30_gini'], dtype=object)

In [26]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','apps_score_FSTPD30_gini','apps_score_FSPD30_gini',
       'apps_score_FPD30_gini', 'apps_score_FPD10_gini']].copy()
final_df.dtypes

start_date                 datetime64[ns]
end_date                   datetime64[ns]
period                             object
Model_Name                         object
version                            object
bad_rate                           object
apps_score_FSTPD30_gini           float64
apps_score_FSPD30_gini            float64
apps_score_FPD30_gini             float64
apps_score_FPD10_gini             float64
dtype: object

## Creating app score table 

In [27]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_s_apps_score;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=0d3cbfe9-822c-43f2-bcd4-5f4e592b03aa>

In [28]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('apps_score_FSTPD30_gini', 'FLOAT'),
    bigquery.SchemaField('apps_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('apps_score_FPD30_gini', 'FLOAT'),
    bigquery.SchemaField('apps_score_fpd10_gini', 'FLOAT')
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_s_apps_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=31feac1b-64e3-42d9-83c3-cad3efc949d3>

# sb_demo_score

## FPD10

In [29]:
sq = f"""
with sb_demo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    beta_demo_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    beta_demo_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from sb_demo_score;
"""

df_sb_demo_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID db536116-14d1-4328-ba7a-fa2c385a7aff successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [30]:
df_sb_demo_scorefpd10.head()

Unnamed: 0,disbursementdate,digitalLoanAccountId,beta_demo_score,ln_fpd10_flag,ln_mature_fpd10_flag
0,2024-07-16 19:09:27,a731f941-ed8a-4278-9ba4-1fd41132aa5e,0.1960012463880533,0,1
1,2024-11-24 18:45:09,5be2f2c7-2401-48bc-a384-6f62c22866db,0.0652144820910423,0,1
2,2023-12-17 19:38:05,0c97d2b7-3157-4f68-a502-636f59339b6d,0.0550146636965219,0,1
3,2024-12-01 12:21:56,1eac1869-c3c5-4ed8-aa58-aeb90c730784,0.1898496045269172,0,1
4,2024-10-26 15:36:19,4085569c-fc38-485b-9eb9-87e23466e91e,0.0831317870245146,0,1


In [31]:
gini_results = calculate_periodic_gini(df_sb_demo_scorefpd10, 'beta_demo_score', 'ln_fpd10_flag', 'FPD10')
sb_demo_scoreFPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{sb_demo_scoreFPD10.shape}")
sb_demo_scoreFPD10.columns.values

  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(119, 7)


array(['start_date', 'end_date', 'beta_demo_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [32]:
sb_demo_scoreFPD10.tail()

Unnamed: 0,start_date,end_date,beta_demo_score_FPD10_gini,period,Model_Name,version,bad_rate
114,2025-03-03,2025-03-09,0.258517,Week,beta_demo_score,1.1.0,FPD10
115,2025-03-10,2025-03-16,-0.384615,Week,beta_demo_score,1.1.0,FPD10
116,2025-03-24,2025-03-30,0.12877,Week,beta_demo_score,1.1.0,FPD10
117,2025-03-31,2025-04-06,-0.063636,Week,beta_demo_score,1.1.0,FPD10
118,2025-04-01,2025-04-30,-0.491667,Month,beta_demo_score,1.1.0,FPD10


## FPD30

In [33]:
sq = f"""
with sb_demo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    beta_demo_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    beta_demo_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from sb_demo_score;
"""

df_sb_demo_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID 42bf37bc-925a-42ba-86e9-d04fe369553c successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [34]:
gini_results = calculate_periodic_gini(df_sb_demo_scorefpd30, 'beta_demo_score', 'ln_fpd30_flag', 'FPD30')
sb_demo_scoreFPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{sb_demo_scoreFPD30.shape}")
sb_demo_scoreFPD30.columns.values

  weekly_gini = df.groupby('week').apply(


The shape of dataframe after copy is:	(116, 7)


  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'beta_demo_score_FPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [35]:
sb_demo_scoreFPD30.head() 

Unnamed: 0,start_date,end_date,beta_demo_score_FPD30_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.289091,Week,beta_demo_score,1.1.0,FPD30
1,2023-06-01,2023-06-30,0.183001,Month,beta_demo_score,1.1.0,FPD30
2,2023-06-05,2023-06-11,0.207407,Week,beta_demo_score,1.1.0,FPD30
3,2023-06-12,2023-06-18,0.404959,Week,beta_demo_score,1.1.0,FPD30
4,2023-06-19,2023-06-25,-0.243641,Week,beta_demo_score,1.1.0,FPD30


## FSPD30

In [36]:
sq = f"""
with sb_demo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    beta_demo_score,
    ln_fspd30_flag,   -- fspd30
	ln_mature_fspd30_flag,	--- fspd30 observation
	FROM 
   {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    beta_demo_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from sb_demo_score;
"""

df_sb_demo_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID 2cbb203c-11f5-45ef-90ed-4fbc13ca1557 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [37]:
gini_results = calculate_periodic_gini(df_sb_demo_scorefspd30, 'beta_demo_score', 'ln_fspd30_flag', 'FSPD30')
sb_demo_scoreFSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{sb_demo_scoreFSPD30.shape}")
sb_demo_scoreFSPD30.columns.values

  weekly_gini = df.groupby('week').apply(


The shape of dataframe after copy is:	(111, 7)


  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'beta_demo_score_FSPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [38]:
sb_demo_scoreFSPD30.head()

Unnamed: 0,start_date,end_date,beta_demo_score_FSPD30_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.087601,Week,beta_demo_score,1.1.0,FSPD30
1,2023-06-01,2023-06-30,0.212694,Month,beta_demo_score,1.1.0,FSPD30
2,2023-06-05,2023-06-11,0.239192,Week,beta_demo_score,1.1.0,FSPD30
3,2023-06-12,2023-06-18,0.365591,Week,beta_demo_score,1.1.0,FSPD30
4,2023-06-19,2023-06-25,0.077598,Week,beta_demo_score,1.1.0,FSPD30


## FSTPD30

In [39]:
sq = f"""
with sb_demo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    beta_demo_score ,
    ln_fstpd30_flag,   -- fstpd30
	ln_mature_fstpd30_flag,	--- fstpd30 observation
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    beta_demo_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from sb_demo_score;
"""

df_sb_demo_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID b594bfe4-9971-4e9b-8379-285c4c0091c5 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [40]:
gini_results = calculate_periodic_gini(df_sb_demo_scorefstpd30, 'beta_demo_score', 'ln_fstpd30_flag', 'FSTPD30')
sb_demo_scoreFSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{sb_demo_scoreFSTPD30.shape}")
sb_demo_scoreFSTPD30.columns.values

  weekly_gini = df.groupby('week').apply(


The shape of dataframe after copy is:	(106, 7)


  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'beta_demo_score_FSTPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [41]:
sb_demo_scoreFSTPD30.head()

Unnamed: 0,start_date,end_date,beta_demo_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.21024,Week,beta_demo_score,1.1.0,FSTPD30
1,2023-06-01,2023-06-30,0.192045,Month,beta_demo_score,1.1.0,FSTPD30
2,2023-06-05,2023-06-11,0.291754,Week,beta_demo_score,1.1.0,FSTPD30
3,2023-06-12,2023-06-18,0.163873,Week,beta_demo_score,1.1.0,FSTPD30
4,2023-06-19,2023-06-25,0.098133,Week,beta_demo_score,1.1.0,FSTPD30


# Combining sb demo score

In [42]:
import functools

dataframes = [sb_demo_scoreFPD10, sb_demo_scoreFPD30, sb_demo_scoreFSPD30, sb_demo_scoreFSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'beta_demo_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'beta_demo_score_FPD30_gini',
       'beta_demo_score_FSPD30_gini', 'beta_demo_score_FSTPD30_gini'],
      dtype=object)

In [43]:
final_df = final_df[['start_date', 'end_date', 'period',
       'Model_Name', 'version', 'bad_rate','beta_demo_score_FPD10_gini','beta_demo_score_FPD30_gini',
       'beta_demo_score_FSPD30_gini', 'beta_demo_score_FSTPD30_gini']].copy()

## creating sb demo score table 

In [44]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_sb_demo_score;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=b2715a11-da5f-434f-be78-cb909670e083>

In [45]:


import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('beta_demo_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('beta_demo_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('beta_demo_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('beta_demo_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_sb_demo_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=bca44872-8cfc-4d2c-8f90-c499dd2c8528>

# s_cic_score

## FPD10

In [46]:
sq = f"""
with s_cic_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    cic_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    cic_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from s_cic_score;
"""

df_s_cic_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID a208e16a-95a7-4c57-96cc-025cb4ed688b successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [47]:
df_s_cic_scorefpd10.head()

Unnamed: 0,disbursementdate,digitalLoanAccountId,cic_score,ln_fpd10_flag,ln_mature_fpd10_flag
0,2024-08-11 13:28:11,d41da9d2-9d65-4f4f-bfb6-398eaa9d10e1,0.2323093149610403,0,1
1,2025-01-25 19:05:49,e549776f-7a2d-4b6c-9edc-e3b59e8955f6,0.1536765617984415,0,1
2,2024-05-02 17:24:11,9b118965-74ea-4832-a640-8b5ea2cc140e,0.125540777534828,0,1
3,2024-09-15 09:21:50,2f8156e6-5c5b-454c-92d9-628291e6f99e,0.122659908078722,0,1
4,2024-06-14 13:20:28,5f412bbb-9b17-4761-8023-1a08640ad81c,0.1347365952237831,0,1


In [48]:
gini_results = calculate_periodic_gini(df_s_cic_scorefpd10, 'cic_score', 'ln_fpd10_flag', 'FPD10')
s_cic_scoreFPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{s_cic_scoreFPD10.shape}")
s_cic_scoreFPD10.columns.values

The shape of dataframe after copy is:	(118, 7)


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'cic_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [49]:
s_cic_scoreFPD10.tail()

Unnamed: 0,start_date,end_date,cic_score_FPD10_gini,period,Model_Name,version,bad_rate
113,2025-03-03,2025-03-09,0.223482,Week,cic_score,1.1.0,FPD10
114,2025-03-10,2025-03-16,-0.181818,Week,cic_score,1.1.0,FPD10
115,2025-03-24,2025-03-30,0.317551,Week,cic_score,1.1.0,FPD10
116,2025-03-31,2025-04-06,-0.068966,Week,cic_score,1.1.0,FPD10
117,2025-04-01,2025-04-30,-0.145833,Month,cic_score,1.1.0,FPD10


## FPD30

In [50]:
sq = f"""
with s_cic_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    cic_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
  {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    cic_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from s_cic_score;
"""

df_s_cic_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID 2f7d407e-6be9-4d86-8636-1c661d668462 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [51]:
df_s_cic_scorefpd30.head()

Unnamed: 0,disbursementdate,digitalLoanAccountId,cic_score,ln_fpd30_flag,ln_mature_fpd30_flag
0,2024-08-11 13:28:11,d41da9d2-9d65-4f4f-bfb6-398eaa9d10e1,0.2323093149610403,0,1
1,2025-01-25 19:05:49,e549776f-7a2d-4b6c-9edc-e3b59e8955f6,0.1536765617984415,0,1
2,2024-05-02 17:24:11,9b118965-74ea-4832-a640-8b5ea2cc140e,0.125540777534828,0,1
3,2024-09-15 09:21:50,2f8156e6-5c5b-454c-92d9-628291e6f99e,0.122659908078722,0,1
4,2024-06-14 13:20:28,5f412bbb-9b17-4761-8023-1a08640ad81c,0.1347365952237831,0,1


In [52]:
gini_results = calculate_periodic_gini(df_s_cic_scorefpd30, 'cic_score', 'ln_fpd30_flag', 'FPD30')
s_cic_scoreFPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{s_cic_scoreFPD30.shape}")
s_cic_scoreFPD30.columns.values

The shape of dataframe after copy is:	(115, 7)


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'cic_score_FPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [53]:
s_cic_scoreFPD30.head()

Unnamed: 0,start_date,end_date,cic_score_FPD30_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.261538,Week,cic_score,1.1.0,FPD30
1,2023-06-01,2023-06-30,0.266733,Month,cic_score,1.1.0,FPD30
2,2023-06-05,2023-06-11,0.19697,Week,cic_score,1.1.0,FPD30
3,2023-06-12,2023-06-18,-0.052326,Week,cic_score,1.1.0,FPD30
4,2023-06-19,2023-06-25,0.435484,Week,cic_score,1.1.0,FPD30


In [54]:
s_cic_scoreFPD30.describe()

Unnamed: 0,start_date,end_date,cic_score_FPD30_gini
count,115,115,115.0
mean,2024-04-20 10:13:33.913043456,2024-04-30 21:54:46.956521728,0.257123
min,2023-05-29 00:00:00,2023-06-04 00:00:00,-0.125
25%,2023-11-09 12:00:00,2023-11-22 12:00:00,0.19906
50%,2024-04-22 00:00:00,2024-04-30 00:00:00,0.263294
75%,2024-09-30 12:00:00,2024-10-09 12:00:00,0.317139
max,2025-03-10 00:00:00,2025-03-31 00:00:00,0.74915
std,,,0.116137


## FSPD30

In [55]:
sq = f"""
with s_cic_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    cic_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    cic_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from s_cic_score;
"""

df_s_cic_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID e5811c28-8e1b-4d1c-96b7-7d403faafd31 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [56]:
gini_results = calculate_periodic_gini(df_s_cic_scorefspd30, 'cic_score', 'ln_fspd30_flag', 'FSPD30')
s_cic_scoreFSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{s_cic_scoreFSPD30.shape}")
s_cic_scoreFSPD30.columns.values

The shape of dataframe after copy is:	(110, 7)


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'cic_score_FSPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [57]:
s_cic_scoreFSPD30.describe()

Unnamed: 0,start_date,end_date,cic_score_FSPD30_gini
count,110,110,110.0
mean,2024-04-06 04:21:49.090909184,2024-04-16 15:42:32.727272704,0.260615
min,2023-05-29 00:00:00,2023-06-04 00:00:00,0.089886
25%,2023-11-02 06:00:00,2023-11-13 18:00:00,0.209298
50%,2024-04-04 12:00:00,2024-04-17 12:00:00,0.265784
75%,2024-09-07 06:00:00,2024-09-20 06:00:00,0.303817
max,2025-02-10 00:00:00,2025-02-28 00:00:00,0.545299
std,,,0.076227


In [58]:
s_cic_scoreFSPD30.tail()

Unnamed: 0,start_date,end_date,cic_score_FSPD30_gini,period,Model_Name,version,bad_rate
105,2025-01-20,2025-01-26,0.223633,Week,cic_score,1.1.0,FSPD30
106,2025-01-27,2025-02-02,0.210801,Week,cic_score,1.1.0,FSPD30
107,2025-02-01,2025-02-28,0.240118,Month,cic_score,1.1.0,FSPD30
108,2025-02-03,2025-02-09,0.245407,Week,cic_score,1.1.0,FSPD30
109,2025-02-10,2025-02-16,0.248281,Week,cic_score,1.1.0,FSPD30


## FSTPD30

In [59]:
sq = f"""
with s_cic_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    cic_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    cic_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from s_cic_score;
"""

df_s_cic_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID 0f62ce2c-23ca-478c-bea9-82d7439e349f successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [60]:
gini_results = calculate_periodic_gini(df_s_cic_scorefstpd30, 'cic_score', 'ln_fstpd30_flag', 'FSTPD30')
s_cic_scoreFSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{s_cic_scoreFSTPD30.shape}")
s_cic_scoreFSTPD30.columns.values

  weekly_gini = df.groupby('week').apply(


The shape of dataframe after copy is:	(105, 7)


  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'cic_score_FSTPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [61]:
s_cic_scoreFSTPD30.describe()

Unnamed: 0,start_date,end_date,cic_score_FSTPD30_gini
count,105,105,105.0
mean,2024-03-22 21:56:34.285714176,2024-04-02 09:36:00,0.248332
min,2023-05-29 00:00:00,2023-06-04 00:00:00,0.065741
25%,2023-10-30 00:00:00,2023-11-05 00:00:00,0.208664
50%,2024-03-25 00:00:00,2024-03-31 00:00:00,0.251476
75%,2024-08-19 00:00:00,2024-08-31 00:00:00,0.292329
max,2025-01-13 00:00:00,2025-01-31 00:00:00,0.479082
std,,,0.072975


In [62]:
s_cic_scoreFSTPD30.tail()

Unnamed: 0,start_date,end_date,cic_score_FSTPD30_gini,period,Model_Name,version,bad_rate
100,2024-12-23,2024-12-29,0.208664,Week,cic_score,1.1.0,FSTPD30
101,2024-12-30,2025-01-05,0.190895,Week,cic_score,1.1.0,FSTPD30
102,2025-01-01,2025-01-31,0.17678,Month,cic_score,1.1.0,FSTPD30
103,2025-01-06,2025-01-12,0.138369,Week,cic_score,1.1.0,FSTPD30
104,2025-01-13,2025-01-19,0.191878,Week,cic_score,1.1.0,FSTPD30


# Combining s_cic_score

In [63]:
import functools

dataframes = [s_cic_scoreFPD10, s_cic_scoreFPD30, s_cic_scoreFSPD30, s_cic_scoreFSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'cic_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'cic_score_FPD30_gini',
       'cic_score_FSPD30_gini', 'cic_score_FSTPD30_gini'], dtype=object)

In [64]:
final_df = final_df[['start_date', 'end_date', 'period',
       'Model_Name', 'version', 'bad_rate','cic_score_FPD10_gini','cic_score_FPD30_gini', 'cic_score_FSPD30_gini', 'cic_score_FSTPD30_gini']].copy()

## Creating the table

In [65]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_s_cic_score;"""

client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=804d5105-7881-48c6-9cd9-8afa027318b1>

In [66]:


import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('cic_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('cic_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('cic_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('cic_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_s_cic_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=587c009d-d720-4bcc-94ab-894178d60e21>

# sb_stack_score

## FPD10

In [67]:
# sb_stack_score

sq = f"""
with sb_stack_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    beta_stack_score beta_stack_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    beta_stack_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from sb_stack_score;
"""

df_sb_stack_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

df_sb_stack_scorefpd10.head()

Job ID 41f5dde8-a5ad-4815-909e-eaf3da73cd6c successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,beta_stack_score,ln_fpd10_flag,ln_mature_fpd10_flag
0,2024-07-16 19:09:27,a731f941-ed8a-4278-9ba4-1fd41132aa5e,0.1079624429034287,0,1
1,2024-11-24 18:45:09,5be2f2c7-2401-48bc-a384-6f62c22866db,0.0976092748418532,0,1
2,2023-12-17 19:38:05,0c97d2b7-3157-4f68-a502-636f59339b6d,0.009398460947724,0,1
3,2024-12-01 12:21:56,1eac1869-c3c5-4ed8-aa58-aeb90c730784,0.0234567998067551,0,1
4,2024-10-26 15:36:19,4085569c-fc38-485b-9eb9-87e23466e91e,0.0757262596787547,0,1


In [68]:
gini_results = calculate_periodic_gini(df_sb_stack_scorefpd10, 'beta_stack_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

The shape of dataframe after copy is:	(119, 7)


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'beta_stack_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [69]:
M1FPD10.describe()

Unnamed: 0,start_date,end_date,beta_stack_score_FPD10_gini
count,119,119,119.0
mean,2024-04-26 18:45:22.689075712,2024-05-07 07:27:43.865546240,0.335722
min,2023-05-29 00:00:00,2023-06-04 00:00:00,-0.030303
25%,2023-11-09 12:00:00,2023-11-22 12:00:00,0.292816
50%,2024-04-29 00:00:00,2024-05-05 00:00:00,0.321062
75%,2024-10-10 12:00:00,2024-10-23 12:00:00,0.409706
max,2025-04-01 00:00:00,2025-04-30 00:00:00,0.670897
std,,,0.101616


In [70]:
M1FPD10.head()

Unnamed: 0,start_date,end_date,beta_stack_score_FPD10_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.506255,Week,beta_stack_score,1.1.0,FPD10
1,2023-06-01,2023-06-30,0.345668,Month,beta_stack_score,1.1.0,FPD10
2,2023-06-05,2023-06-11,0.407799,Week,beta_stack_score,1.1.0,FPD10
3,2023-06-12,2023-06-18,0.670897,Week,beta_stack_score,1.1.0,FPD10
4,2023-06-19,2023-06-25,-0.030303,Week,beta_stack_score,1.1.0,FPD10


## FPD30

In [71]:
# sb_stack_score

sq = f"""
with sb_stack_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    beta_stack_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    beta_stack_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from sb_stack_score;
"""

df_sb_stack_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

df_sb_stack_scorefpd30.head()

Job ID bb0323a8-6d5f-424e-9e9a-164fcdae6079 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,beta_stack_score,ln_fpd30_flag,ln_mature_fpd30_flag
0,2024-08-11 13:28:11,d41da9d2-9d65-4f4f-bfb6-398eaa9d10e1,0.0853637836806951,0,1
1,2025-01-25 19:05:49,e549776f-7a2d-4b6c-9edc-e3b59e8955f6,0.0591563899966423,0,1
2,2024-05-02 17:24:11,9b118965-74ea-4832-a640-8b5ea2cc140e,0.0114227896554983,0,1
3,2024-02-26 15:08:47,acebf8de-bd42-4b00-83bc-d7c0999a7488,0.0603778199617865,0,1
4,2024-09-15 09:21:50,2f8156e6-5c5b-454c-92d9-628291e6f99e,0.0753455691896986,0,1


In [72]:
gini_results = calculate_periodic_gini(df_sb_stack_scorefpd30, 'beta_stack_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

  weekly_gini = df.groupby('week').apply(


The shape of dataframe after copy is:	(116, 7)


  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'beta_stack_score_FPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [73]:
M2FPD30.describe()

Unnamed: 0,start_date,end_date,beta_stack_score_FPD30_gini
count,116,116,116.0
mean,2024-04-18 02:04:08.275862016,2024-04-28 12:49:39.310344704,0.357056
min,2023-05-29 00:00:00,2023-06-04 00:00:00,-0.036145
25%,2023-11-04 18:00:00,2023-11-17 06:00:00,0.308708
50%,2024-04-18 12:00:00,2024-04-29 00:00:00,0.347357
75%,2024-09-30 06:00:00,2024-10-07 18:00:00,0.416462
max,2025-03-10 00:00:00,2025-03-31 00:00:00,0.654729
std,,,0.099064


In [74]:
M2FPD30.tail()

Unnamed: 0,start_date,end_date,beta_stack_score_FPD30_gini,period,Model_Name,version,bad_rate
111,2025-02-17,2025-02-23,0.334034,Week,beta_stack_score,1.1.0,FPD30
112,2025-02-24,2025-03-02,0.35131,Week,beta_stack_score,1.1.0,FPD30
113,2025-03-01,2025-03-31,0.304883,Month,beta_stack_score,1.1.0,FPD30
114,2025-03-03,2025-03-09,0.289876,Week,beta_stack_score,1.1.0,FPD30
115,2025-03-10,2025-03-16,0.0,Week,beta_stack_score,1.1.0,FPD30


## FSPD30

In [75]:
# sb_stack_score

sq = f"""
with sb_stack_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    beta_stack_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    beta_stack_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from sb_stack_score;
"""

df_sb_stack_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

df_sb_stack_scorefspd30.head()

Job ID 3bf94ae4-8b58-41e6-8f50-13231632e5ff successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,beta_stack_score,ln_fspd30_flag,ln_mature_fspd30_flag
0,2024-07-16 19:09:27,a731f941-ed8a-4278-9ba4-1fd41132aa5e,0.1079624429034287,0,1
1,2024-11-24 18:45:09,5be2f2c7-2401-48bc-a384-6f62c22866db,0.0976092748418532,0,1
2,2023-12-17 19:38:05,0c97d2b7-3157-4f68-a502-636f59339b6d,0.009398460947724,0,1
3,2024-12-01 12:21:56,1eac1869-c3c5-4ed8-aa58-aeb90c730784,0.0234567998067551,0,1
4,2024-10-26 15:36:19,4085569c-fc38-485b-9eb9-87e23466e91e,0.0757262596787547,0,1


In [76]:
gini_results = calculate_periodic_gini(df_sb_stack_scorefspd30, 'beta_stack_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
M3FSPD30.columns.values

  weekly_gini = df.groupby('week').apply(


The shape of dataframe after copy is:	(111, 7)


  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'beta_stack_score_FSPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [77]:
M3FSPD30.describe()

Unnamed: 0,start_date,end_date,beta_stack_score_FSPD30_gini
count,111,111,111.0
mean,2024-04-03 20:45:24.324324352,2024-04-14 07:08:06.486486528,0.362004
min,2023-05-29 00:00:00,2023-06-04 00:00:00,0.179581
25%,2023-10-31 00:00:00,2023-11-08 12:00:00,0.313103
50%,2024-04-01 00:00:00,2024-04-14 00:00:00,0.343793
75%,2024-09-05 12:00:00,2024-09-18 12:00:00,0.418011
max,2025-02-10 00:00:00,2025-02-28 00:00:00,0.551886
std,,,0.075841


In [78]:
M3FSPD30.tail()

Unnamed: 0,start_date,end_date,beta_stack_score_FSPD30_gini,period,Model_Name,version,bad_rate
106,2025-01-20,2025-01-26,0.318566,Week,beta_stack_score,1.1.0,FSPD30
107,2025-01-27,2025-02-02,0.313046,Week,beta_stack_score,1.1.0,FSPD30
108,2025-02-01,2025-02-28,0.340677,Month,beta_stack_score,1.1.0,FSPD30
109,2025-02-03,2025-02-09,0.342838,Week,beta_stack_score,1.1.0,FSPD30
110,2025-02-10,2025-02-16,0.371273,Week,beta_stack_score,1.1.0,FSPD30


## FSTPD30

In [79]:
# sb_stack_score

sq = f"""
with sb_stack_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    beta_stack_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    beta_stack_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from sb_stack_score;
"""

df_sb_stack_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

df_sb_stack_scorefstpd30.head()

Job ID 56b476d5-c4f3-46fc-8fa1-2d847a49039d successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,beta_stack_score,ln_fstpd30_flag,ln_mature_fstpd30_flag
0,2024-08-11 13:28:11,d41da9d2-9d65-4f4f-bfb6-398eaa9d10e1,0.0853637836806951,0,1
1,2024-05-02 17:24:11,9b118965-74ea-4832-a640-8b5ea2cc140e,0.0114227896554983,0,1
2,2024-02-26 15:08:47,acebf8de-bd42-4b00-83bc-d7c0999a7488,0.0603778199617865,0,1
3,2024-09-15 09:21:50,2f8156e6-5c5b-454c-92d9-628291e6f99e,0.0753455691896986,0,1
4,2024-06-14 13:20:28,5f412bbb-9b17-4761-8023-1a08640ad81c,0.0505555827670769,0,1


In [80]:
gini_results = calculate_periodic_gini(df_sb_stack_scorefstpd30, 'beta_stack_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
M4FSTPD30.columns.values

The shape of dataframe after copy is:	(106, 7)


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'beta_stack_score_FSTPD30_gini',
       'period', 'Model_Name', 'version', 'bad_rate'], dtype=object)

In [81]:
M4FSTPD30.describe()

Unnamed: 0,start_date,end_date,beta_stack_score_FSTPD30_gini
count,106,106,106.0
mean,2024-03-20 14:56:36.226415104,2024-03-31 01:35:05.660377344,0.338658
min,2023-05-29 00:00:00,2023-06-04 00:00:00,0.160057
25%,2023-10-24 18:00:00,2023-11-01 06:00:00,0.298421
50%,2024-03-21 12:00:00,2024-03-31 00:00:00,0.327405
75%,2024-08-17 06:00:00,2024-08-29 12:00:00,0.383627
max,2025-01-13 00:00:00,2025-01-31 00:00:00,0.500133
std,,,0.05991


In [82]:
M4FSTPD30.tail()

Unnamed: 0,start_date,end_date,beta_stack_score_FSTPD30_gini,period,Model_Name,version,bad_rate
101,2024-12-23,2024-12-29,0.309834,Week,beta_stack_score,1.1.0,FSTPD30
102,2024-12-30,2025-01-05,0.309648,Week,beta_stack_score,1.1.0,FSTPD30
103,2025-01-01,2025-01-31,0.302942,Month,beta_stack_score,1.1.0,FSTPD30
104,2025-01-06,2025-01-12,0.300896,Week,beta_stack_score,1.1.0,FSTPD30
105,2025-01-13,2025-01-19,0.412716,Week,beta_stack_score,1.1.0,FSTPD30


## Combining the dataframes

In [83]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'beta_stack_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'beta_stack_score_FPD30_gini',
       'beta_stack_score_FSPD30_gini', 'beta_stack_score_FSTPD30_gini'],
      dtype=object)

In [84]:
final_df = final_df[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'beta_stack_score_FPD10_gini', 'beta_stack_score_FPD30_gini',
                     'beta_stack_score_FSPD30_gini', 'beta_stack_score_FSTPD30_gini']].copy()

## Creating the table 

In [85]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_sb_stack_score;"""

client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=613b1bab-6d47-431c-8f11-7a25990abe96>

In [86]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('beta_stack_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('beta_stack_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('beta_stack_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('beta_stack_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_sb_stack_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=386ccfc9-d452-43ea-8dc3-edb0980546eb>

# sa_stack_score

## FPD10

In [87]:
# sa_stack_score

sq = f"""
with sa_stack_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    alpha_stack_score ,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    alpha_stack_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from sa_stack_score;
"""

sa_stack_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

sa_stack_scorefpd10.head()

Job ID 5a481ff4-18aa-4311-8106-c2fa30628cda successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,alpha_stack_score,ln_fpd10_flag,ln_mature_fpd10_flag
0,2024-07-16 19:09:27,a731f941-ed8a-4278-9ba4-1fd41132aa5e,0.0699738064823699,0,1
1,2024-11-24 18:45:09,5be2f2c7-2401-48bc-a384-6f62c22866db,0.0771153012056294,0,1
2,2024-12-01 12:21:56,1eac1869-c3c5-4ed8-aa58-aeb90c730784,0.026754988001112,0,1
3,2024-10-26 15:36:19,4085569c-fc38-485b-9eb9-87e23466e91e,0.0570348930097323,0,1
4,2024-10-06 13:54:14,beba4758-2d8a-48b5-81df-7aaabc7e133e,0.0595457277638278,0,1


In [88]:
sa_stack_scorefpd10.head()

Unnamed: 0,disbursementdate,digitalLoanAccountId,alpha_stack_score,ln_fpd10_flag,ln_mature_fpd10_flag
0,2024-07-16 19:09:27,a731f941-ed8a-4278-9ba4-1fd41132aa5e,0.0699738064823699,0,1
1,2024-11-24 18:45:09,5be2f2c7-2401-48bc-a384-6f62c22866db,0.0771153012056294,0,1
2,2024-12-01 12:21:56,1eac1869-c3c5-4ed8-aa58-aeb90c730784,0.026754988001112,0,1
3,2024-10-26 15:36:19,4085569c-fc38-485b-9eb9-87e23466e91e,0.0570348930097323,0,1
4,2024-10-06 13:54:14,beba4758-2d8a-48b5-81df-7aaabc7e133e,0.0595457277638278,0,1


In [89]:
gini_results = calculate_periodic_gini(sa_stack_scorefpd10, 'alpha_stack_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.head()

  weekly_gini = df.groupby('week').apply(


The shape of dataframe after copy is:	(118, 7)


  monthly_gini = df.groupby('month').apply(


Unnamed: 0,start_date,end_date,alpha_stack_score_FPD10_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.579487,Week,alpha_stack_score,1.1.0,FPD10
1,2023-06-01,2023-06-30,0.363135,Month,alpha_stack_score,1.1.0,FPD10
2,2023-06-05,2023-06-11,0.632035,Week,alpha_stack_score,1.1.0,FPD10
3,2023-06-12,2023-06-18,0.764706,Week,alpha_stack_score,1.1.0,FPD10
4,2023-06-19,2023-06-25,0.103226,Week,alpha_stack_score,1.1.0,FPD10


In [90]:
M1FPD10.describe()

Unnamed: 0,start_date,end_date,alpha_stack_score_FPD10_gini
count,118,118,118.0
mean,2024-04-29 03:15:15.254237184,2024-05-09 16:52:52.881356032,0.371542
min,2023-05-29 00:00:00,2023-06-04 00:00:00,-0.21875
25%,2023-11-14 18:00:00,2023-11-27 00:00:00,0.334647
50%,2024-04-30 00:00:00,2024-05-08 12:00:00,0.374145
75%,2024-10-12 06:00:00,2024-10-25 06:00:00,0.451756
max,2025-04-01 00:00:00,2025-04-30 00:00:00,0.764706
std,,,0.146116


In [91]:
M1FPD10.head()

Unnamed: 0,start_date,end_date,alpha_stack_score_FPD10_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.579487,Week,alpha_stack_score,1.1.0,FPD10
1,2023-06-01,2023-06-30,0.363135,Month,alpha_stack_score,1.1.0,FPD10
2,2023-06-05,2023-06-11,0.632035,Week,alpha_stack_score,1.1.0,FPD10
3,2023-06-12,2023-06-18,0.764706,Week,alpha_stack_score,1.1.0,FPD10
4,2023-06-19,2023-06-25,0.103226,Week,alpha_stack_score,1.1.0,FPD10


## FPD30

In [92]:
# sa_stack_score

sq = f"""
with sa_stack_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    alpha_stack_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    alpha_stack_score  is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from sa_stack_score;
"""

sa_stack_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

sa_stack_scorefpd30.head()

Job ID c2349b2c-f9a0-430d-93d0-c6c09159f4bb successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,alpha_stack_score,ln_fpd30_flag,ln_mature_fpd30_flag
0,2024-07-16 19:09:27,a731f941-ed8a-4278-9ba4-1fd41132aa5e,0.0699738064823699,0,1
1,2024-11-24 18:45:09,5be2f2c7-2401-48bc-a384-6f62c22866db,0.0771153012056294,0,1
2,2024-12-01 12:21:56,1eac1869-c3c5-4ed8-aa58-aeb90c730784,0.026754988001112,0,1
3,2024-10-26 15:36:19,4085569c-fc38-485b-9eb9-87e23466e91e,0.0570348930097323,0,1
4,2024-10-06 13:54:14,beba4758-2d8a-48b5-81df-7aaabc7e133e,0.0595457277638278,0,1


In [93]:
sa_stack_scorefpd30.head()

Unnamed: 0,disbursementdate,digitalLoanAccountId,alpha_stack_score,ln_fpd30_flag,ln_mature_fpd30_flag
0,2024-07-16 19:09:27,a731f941-ed8a-4278-9ba4-1fd41132aa5e,0.0699738064823699,0,1
1,2024-11-24 18:45:09,5be2f2c7-2401-48bc-a384-6f62c22866db,0.0771153012056294,0,1
2,2024-12-01 12:21:56,1eac1869-c3c5-4ed8-aa58-aeb90c730784,0.026754988001112,0,1
3,2024-10-26 15:36:19,4085569c-fc38-485b-9eb9-87e23466e91e,0.0570348930097323,0,1
4,2024-10-06 13:54:14,beba4758-2d8a-48b5-81df-7aaabc7e133e,0.0595457277638278,0,1


In [94]:
gini_results = calculate_periodic_gini(sa_stack_scorefpd30, 'alpha_stack_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(115, 7)


array(['start_date', 'end_date', 'alpha_stack_score_FPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [95]:
M2FPD30.describe()

Unnamed: 0,start_date,end_date,alpha_stack_score_FPD30_gini
count,115,115,115.0
mean,2024-04-20 10:13:33.913043456,2024-04-30 21:54:46.956521728,0.404407
min,2023-05-29 00:00:00,2023-06-04 00:00:00,-0.262626
25%,2023-11-09 12:00:00,2023-11-22 12:00:00,0.358653
50%,2024-04-22 00:00:00,2024-04-30 00:00:00,0.4
75%,2024-09-30 12:00:00,2024-10-09 12:00:00,0.478766
max,2025-03-10 00:00:00,2025-03-31 00:00:00,0.784014
std,,,0.142616


In [96]:
M2FPD30.head()

Unnamed: 0,start_date,end_date,alpha_stack_score_FPD30_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.579487,Week,alpha_stack_score,1.1.0,FPD30
1,2023-06-01,2023-06-30,0.358308,Month,alpha_stack_score,1.1.0,FPD30
2,2023-06-05,2023-06-11,0.632035,Week,alpha_stack_score,1.1.0,FPD30
3,2023-06-12,2023-06-18,0.732558,Week,alpha_stack_score,1.1.0,FPD30
4,2023-06-19,2023-06-25,0.103226,Week,alpha_stack_score,1.1.0,FPD30


## FSTPD30

In [97]:
# sa_stack_score

sq = f"""
with sa_stack_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    alpha_stack_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    alpha_stack_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from sa_stack_score;
"""

sa_stack_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

sa_stack_scorefspd30.head()

Job ID 3e9c79aa-2663-418e-8621-9f621e48dd02 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,alpha_stack_score,ln_fspd30_flag,ln_mature_fspd30_flag
0,2024-08-11 13:28:11,d41da9d2-9d65-4f4f-bfb6-398eaa9d10e1,0.1159329009610408,0,1
1,2025-01-25 19:05:49,e549776f-7a2d-4b6c-9edc-e3b59e8955f6,0.0614016441193192,0,1
2,2024-05-02 17:24:11,9b118965-74ea-4832-a640-8b5ea2cc140e,0.019327939422041,0,1
3,2024-09-15 09:21:50,2f8156e6-5c5b-454c-92d9-628291e6f99e,0.0459328399438693,0,1
4,2024-06-14 13:20:28,5f412bbb-9b17-4761-8023-1a08640ad81c,0.0886808251109808,0,1


In [98]:
gini_results = calculate_periodic_gini(sa_stack_scorefspd30, 'alpha_stack_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
M3FSPD30.columns.values

The shape of dataframe after copy is:	(110, 7)


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'alpha_stack_score_FSPD30_gini',
       'period', 'Model_Name', 'version', 'bad_rate'], dtype=object)

In [99]:
M3FSPD30.tail()

Unnamed: 0,start_date,end_date,alpha_stack_score_FSPD30_gini,period,Model_Name,version,bad_rate
105,2025-01-20,2025-01-26,0.349004,Week,alpha_stack_score,1.1.0,FSPD30
106,2025-01-27,2025-02-02,0.359841,Week,alpha_stack_score,1.1.0,FSPD30
107,2025-02-01,2025-02-28,0.349173,Month,alpha_stack_score,1.1.0,FSPD30
108,2025-02-03,2025-02-09,0.339926,Week,alpha_stack_score,1.1.0,FSPD30
109,2025-02-10,2025-02-16,0.390851,Week,alpha_stack_score,1.1.0,FSPD30


## FSTPD30

In [100]:
# sa_stack_score

sq = f"""
with sa_stack_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    alpha_stack_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    alpha_stack_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from sa_stack_score;
"""

sa_stack_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

sa_stack_scorefstpd30.head()

Job ID a7394d9e-80d9-45aa-a558-68515c5e94c7 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,alpha_stack_score,ln_fstpd30_flag,ln_mature_fstpd30_flag
0,2024-07-16 19:09:27,a731f941-ed8a-4278-9ba4-1fd41132aa5e,0.0699738064823699,0,1
1,2024-11-24 18:45:09,5be2f2c7-2401-48bc-a384-6f62c22866db,0.0771153012056294,0,1
2,2024-12-01 12:21:56,1eac1869-c3c5-4ed8-aa58-aeb90c730784,0.026754988001112,0,1
3,2024-10-26 15:36:19,4085569c-fc38-485b-9eb9-87e23466e91e,0.0570348930097323,0,1
4,2024-10-06 13:54:14,beba4758-2d8a-48b5-81df-7aaabc7e133e,0.0595457277638278,0,1


In [101]:
sa_stack_scorefstpd30.tail()

Unnamed: 0,disbursementdate,digitalLoanAccountId,alpha_stack_score,ln_fstpd30_flag,ln_mature_fstpd30_flag
113516,2024-11-06 18:16:04,4158dd38-80a4-47af-8e3a-3f8338ea68bc,0.1090251278898702,1,1
113517,2024-09-07 19:12:55,a351cd38-c308-402d-aad4-d8b450a829db,0.1047628678751238,1,1
113518,2024-09-24 17:14:50,4d135fe9-82fe-4a3e-808e-83af626c54ef,0.0554706222763464,1,1
113519,2024-11-20 18:35:51,7bf3cc80-e339-4a69-b862-b98634a5c9d5,0.1194238265430105,1,1
113520,2024-12-15 11:48:55,4dc30243-efda-464f-abf5-55d198cd53d1,0.103429661844832,1,1


In [102]:
gini_results = calculate_periodic_gini(sa_stack_scorefstpd30, 'alpha_stack_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
M4FSTPD30.columns.values

The shape of dataframe after copy is:	(105, 7)


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'alpha_stack_score_FSTPD30_gini',
       'period', 'Model_Name', 'version', 'bad_rate'], dtype=object)

## Combining the dataframes

In [103]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'alpha_stack_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate',
       'alpha_stack_score_FPD30_gini', 'alpha_stack_score_FSPD30_gini',
       'alpha_stack_score_FSTPD30_gini'], dtype=object)

In [104]:
final_df = final_df[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'alpha_stack_score_FPD10_gini', 'alpha_stack_score_FPD30_gini',  'alpha_stack_score_FSPD30_gini', 'alpha_stack_score_FSTPD30_gini']].copy()

## Creating the table 

In [105]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_sa_stack_score;"""

client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=24dff5ea-c707-45fb-99e3-7d2814b27edc>

In [106]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('alpha_stack_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('alpha_stack_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('alpha_stack_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('alpha_stack_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_sa_stack_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=c8c22a9e-9d66-422e-a65a-720cd5730306>

# gen_credo_score

## FPD10

In [107]:


sq = f"""
with gen_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_gen_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
     {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    credo_gen_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from gen_credo_score;
"""

gen_credo_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

gen_credo_scorefpd10.head()

Job ID 633e4e4c-192a-4d7e-a774-34f8ee7c8476 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,credo_gen_score,ln_fpd10_flag,ln_mature_fpd10_flag
0,2024-07-16 19:09:27,a731f941-ed8a-4278-9ba4-1fd41132aa5e,0.185555630270027,0,1
1,2024-11-24 18:45:09,5be2f2c7-2401-48bc-a384-6f62c22866db,0.117975604455025,0,1
2,2023-12-17 19:38:05,0c97d2b7-3157-4f68-a502-636f59339b6d,0.0569014060520808,0,1
3,2024-12-01 12:21:56,1eac1869-c3c5-4ed8-aa58-aeb90c730784,0.118863497623177,0,1
4,2024-10-26 15:36:19,4085569c-fc38-485b-9eb9-87e23466e91e,0.041753556447116,0,1


In [108]:
gini_results = calculate_periodic_gini(gen_credo_scorefpd10, 'credo_gen_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

  weekly_gini = df.groupby('week').apply(


The shape of dataframe after copy is:	(145, 7)


  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'credo_gen_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [109]:
M1FPD10.head()

Unnamed: 0,start_date,end_date,credo_gen_score_FPD10_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.246725,Month,credo_gen_score,1.1.0,FPD10
1,2023-01-02,2023-01-08,0.301125,Week,credo_gen_score,1.1.0,FPD10
2,2023-01-09,2023-01-15,0.2734,Week,credo_gen_score,1.1.0,FPD10
3,2023-01-16,2023-01-22,0.667532,Week,credo_gen_score,1.1.0,FPD10
4,2023-01-23,2023-01-29,0.086505,Week,credo_gen_score,1.1.0,FPD10


## FPD30

In [110]:
sq = f"""
with gen_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_gen_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    credo_gen_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from gen_credo_score;
"""

gen_credo_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

gen_credo_scorefpd30.head()

Job ID 7512c7bb-d51c-46fc-89e9-1f5edfb51c2e successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,credo_gen_score,ln_fpd30_flag,ln_mature_fpd30_flag
0,2024-08-11 13:28:11,d41da9d2-9d65-4f4f-bfb6-398eaa9d10e1,0.138506316979051,0,1
1,2025-01-25 19:05:49,e549776f-7a2d-4b6c-9edc-e3b59e8955f6,0.106291202938258,0,1
2,2024-05-02 17:24:11,9b118965-74ea-4832-a640-8b5ea2cc140e,0.0975612449375841,0,1
3,2024-02-26 15:08:47,acebf8de-bd42-4b00-83bc-d7c0999a7488,0.110976951075583,0,1
4,2024-09-15 09:21:50,2f8156e6-5c5b-454c-92d9-628291e6f99e,0.134107791698632,0,1


In [111]:
gen_credo_scorefpd30.head()

Unnamed: 0,disbursementdate,digitalLoanAccountId,credo_gen_score,ln_fpd30_flag,ln_mature_fpd30_flag
0,2024-08-11 13:28:11,d41da9d2-9d65-4f4f-bfb6-398eaa9d10e1,0.138506316979051,0,1
1,2025-01-25 19:05:49,e549776f-7a2d-4b6c-9edc-e3b59e8955f6,0.106291202938258,0,1
2,2024-05-02 17:24:11,9b118965-74ea-4832-a640-8b5ea2cc140e,0.0975612449375841,0,1
3,2024-02-26 15:08:47,acebf8de-bd42-4b00-83bc-d7c0999a7488,0.110976951075583,0,1
4,2024-09-15 09:21:50,2f8156e6-5c5b-454c-92d9-628291e6f99e,0.134107791698632,0,1


In [112]:
gini_results = calculate_periodic_gini(gen_credo_scorefpd30, 'credo_gen_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

  weekly_gini = df.groupby('week').apply(


The shape of dataframe after copy is:	(142, 7)


  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'credo_gen_score_FPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [113]:
M2FPD30.head()

Unnamed: 0,start_date,end_date,credo_gen_score_FPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.253005,Month,credo_gen_score,1.1.0,FPD30
1,2023-01-02,2023-01-08,0.342149,Week,credo_gen_score,1.1.0,FPD30
2,2023-01-09,2023-01-15,0.407163,Week,credo_gen_score,1.1.0,FPD30
3,2023-01-16,2023-01-22,0.667532,Week,credo_gen_score,1.1.0,FPD30
4,2023-01-23,2023-01-29,0.025547,Week,credo_gen_score,1.1.0,FPD30


## FSPD30

In [114]:
sq = f"""
with gen_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_gen_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    credo_gen_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from gen_credo_score;
"""

gen_credo_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(gen_credo_scorefspd30.head(2))

gini_results = calculate_periodic_gini(gen_credo_scorefspd30, 'credo_gen_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

Job ID 812538b4-e14f-4e5e-baf2-940bd613012a successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  \
0 2024-08-11 13:28:11  d41da9d2-9d65-4f4f-bfb6-398eaa9d10e1   
1 2025-01-25 19:05:49  e549776f-7a2d-4b6c-9edc-e3b59e8955f6   

     credo_gen_score  ln_fspd30_flag  ln_mature_fspd30_flag  
0  0.138506316979051               0                      1  
1  0.106291202938258               0                      1  


  weekly_gini = df.groupby('week').apply(


The shape of dataframe after copy is:	(137, 7)
['start_date' 'end_date' 'credo_gen_score_FSPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


  monthly_gini = df.groupby('month').apply(


Unnamed: 0,start_date,end_date,credo_gen_score_FSPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.172055,Month,credo_gen_score,1.1.0,FSPD30
1,2023-01-02,2023-01-08,0.262803,Week,credo_gen_score,1.1.0,FSPD30
2,2023-01-09,2023-01-15,0.189744,Week,credo_gen_score,1.1.0,FSPD30
3,2023-01-16,2023-01-22,0.410738,Week,credo_gen_score,1.1.0,FSPD30
4,2023-01-23,2023-01-29,0.073579,Week,credo_gen_score,1.1.0,FSPD30


## FSTPD30

In [115]:
sq = f"""
with gen_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_gen_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    credo_gen_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from gen_credo_score;
"""

gen_credo_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(gen_credo_scorefstpd30.head(2))

gini_results = calculate_periodic_gini(gen_credo_scorefstpd30, 'credo_gen_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()

Job ID e4e894a9-54b6-4337-9615-d9f441f3db81 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  \
0 2024-07-16 19:09:27  a731f941-ed8a-4278-9ba4-1fd41132aa5e   
1 2024-11-24 18:45:09  5be2f2c7-2401-48bc-a384-6f62c22866db   

     credo_gen_score  ln_fstpd30_flag  ln_mature_fstpd30_flag  
0  0.185555630270027                0                       1  
1  0.117975604455025                0                       1  


  weekly_gini = df.groupby('week').apply(


The shape of dataframe after copy is:	(132, 7)
['start_date' 'end_date' 'credo_gen_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


  monthly_gini = df.groupby('month').apply(


Unnamed: 0,start_date,end_date,credo_gen_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.180837,Month,credo_gen_score,1.1.0,FSTPD30
1,2023-01-02,2023-01-08,0.282959,Week,credo_gen_score,1.1.0,FSTPD30
2,2023-01-09,2023-01-15,0.189908,Week,credo_gen_score,1.1.0,FSTPD30
3,2023-01-16,2023-01-22,0.19884,Week,credo_gen_score,1.1.0,FSTPD30
4,2023-01-23,2023-01-29,0.162845,Week,credo_gen_score,1.1.0,FSTPD30


## Combining tables 

In [116]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'credo_gen_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'credo_gen_score_FPD30_gini',
       'credo_gen_score_FSPD30_gini', 'credo_gen_score_FSTPD30_gini'],
      dtype=object)

In [117]:
final_df = final_df[['start_date', 'end_date','period',
       'Model_Name', 'version', 'bad_rate', 'credo_gen_score_FPD10_gini','credo_gen_score_FPD30_gini',   'credo_gen_score_FSPD30_gini', 'credo_gen_score_FSTPD30_gini']].copy()

In [118]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_gen_credo_score;"""

client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=84e99ee2-5c04-4c63-8457-a0c99ec5ced0>

In [119]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('credo_gen_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('credo_gen_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('credo_gen_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('credo_gen_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_gen_credo_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=77220f70-3c99-4943-a9e9-9e7fbefd7b04>

# c_credo_score

In [120]:
sq = f"""
with c_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_quick_score credo_cash_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    credo_quick_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from c_credo_score;
"""

c_credo_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

c_credo_scorefpd10.head()

gini_results = calculate_periodic_gini(c_credo_scorefpd10, 'credo_cash_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

# FPD30

sq = f"""
with c_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_quick_score credo_cash_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    credo_quick_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from c_credo_score;
"""

c_credo_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

c_credo_scorefpd30.head()

gini_results = calculate_periodic_gini(c_credo_scorefpd30, 'credo_cash_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

sq = f"""
with c_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_quick_score credo_cash_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
   {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    credo_quick_score  is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from c_credo_score;
"""

c_credo_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(c_credo_scorefspd30.head(2))

gini_results = calculate_periodic_gini(c_credo_scorefspd30, 'credo_cash_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

sq = f"""
with c_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_quick_score credo_cash_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    credo_quick_score  is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from c_credo_score;
"""

c_credo_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(c_credo_scorefstpd30.head(2))

gini_results = calculate_periodic_gini(c_credo_scorefstpd30, 'credo_cash_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()



Job ID b09a310e-83be-460c-9e17-5cb05230ee7c successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(143, 7)
Job ID b58d4448-4ea0-4919-91a0-937d26c843ed successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(139, 7)
Job ID ba36d604-58d3-4bc5-9287-4b6ea283dc37 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  \
0 2024-07-16 19:09:27  a731f941-ed8a-4278-9ba4-1fd41132aa5e   
1 2024-11-24 18:45:09  5be2f2c7-2401-48bc-a384-6f62c22866db   

    credo_cash_score  ln_fspd30_flag  ln_mature_fspd30_flag  
0  0.375585243842437               0                      1  
1  0.346790857604298               0                      1  


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(137, 7)
['start_date' 'end_date' 'credo_cash_score_FSPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']
Job ID b63d5858-1b11-41a2-bc83-99cac37d984a successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  \
0 2024-07-16 19:09:27  a731f941-ed8a-4278-9ba4-1fd41132aa5e   
1 2024-11-24 18:45:09  5be2f2c7-2401-48bc-a384-6f62c22866db   

    credo_cash_score  ln_fstpd30_flag  ln_mature_fstpd30_flag  
0  0.375585243842437                0                       1  
1  0.346790857604298                0                       1  
The shape of dataframe after copy is:	(132, 7)
['start_date' 'end_date' 'credo_cash_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


Unnamed: 0,start_date,end_date,credo_cash_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.035948,Month,credo_cash_score,1.1.0,FSTPD30
1,2023-01-02,2023-01-08,0.036009,Week,credo_cash_score,1.1.0,FSTPD30
2,2023-01-09,2023-01-15,0.062143,Week,credo_cash_score,1.1.0,FSTPD30
3,2023-01-16,2023-01-22,0.275891,Week,credo_cash_score,1.1.0,FSTPD30
4,2023-01-23,2023-01-29,-0.050558,Week,credo_cash_score,1.1.0,FSTPD30


In [121]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'credo_cash_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'credo_cash_score_FPD30_gini',
       'credo_cash_score_FSPD30_gini', 'credo_cash_score_FSTPD30_gini'],
      dtype=object)

In [122]:
final_df = final_df[['start_date', 'end_date','period',    'Model_Name', 'version', 'bad_rate',
                     'credo_cash_score_FPD10_gini', 
                     'credo_cash_score_FPD30_gini', 
                     'credo_cash_score_FSPD30_gini',
                     'credo_cash_score_FSTPD30_gini']].copy()

In [123]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_c_credo_score;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=609bd857-a465-4670-8e6c-a21fd8aef3fb>

In [124]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('credo_cash_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('credo_cash_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('credo_cash_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('credo_cash_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_c_credo_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=00c1eddf-996f-464b-8b1e-21d9aebfb3e6>

# s_credo_score

In [125]:


sq = f"""
with s_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_sil_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    credo_sil_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from s_credo_score;
"""

s_credo_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

s_credo_scorefpd10.head()

gini_results = calculate_periodic_gini(s_credo_scorefpd10, 'credo_sil_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

# FPD30

sq = f"""
with s_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_sil_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
   {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    credo_sil_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from s_credo_score;
"""

s_credo_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

s_credo_scorefpd30.head()

gini_results = calculate_periodic_gini(s_credo_scorefpd30, 'credo_sil_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

sq = f"""
with s_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_sil_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    credo_sil_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from s_credo_score;
"""

s_credo_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(s_credo_scorefspd30.head(2))

gini_results = calculate_periodic_gini(s_credo_scorefspd30, 'credo_sil_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

sq = f"""
with s_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_sil_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    credo_sil_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from s_credo_score;
"""

s_credo_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(s_credo_scorefstpd30.head(2))

gini_results = calculate_periodic_gini(s_credo_scorefstpd30, 'credo_sil_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()



Job ID 219fd09b-a3bf-4f7e-bc06-1c0e5be06661 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(143, 7)
Job ID a584fcf0-e63c-485a-80b1-63d34ace2b83 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(139, 7)
Job ID 5136ed91-2fd9-4b4f-bf48-e4fd8b4c0bd9 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  \
0 2024-08-11 13:28:11  d41da9d2-9d65-4f4f-bfb6-398eaa9d10e1   
1 2025-01-25 19:05:49  e549776f-7a2d-4b6c-9edc-e3b59e8955f6   

     credo_sil_score  ln_fspd30_flag  ln_mature_fspd30_flag  
0   0.17085650025779               0                      1  
1  0.121637216776814               0                      1  


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(137, 7)
['start_date' 'end_date' 'credo_sil_score_FSPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']
Job ID 65ea8a91-a292-42ee-a719-36dbf1d0b59a successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  \
0 2024-08-11 13:28:11  d41da9d2-9d65-4f4f-bfb6-398eaa9d10e1   
1 2024-05-02 17:24:11  9b118965-74ea-4832-a640-8b5ea2cc140e   

      credo_sil_score  ln_fstpd30_flag  ln_mature_fstpd30_flag  
0    0.17085650025779                0                       1  
1  0.0724266449334963                0                       1  
The shape of dataframe after copy is:	(132, 7)
['start_date' 'end_date' 'credo_sil_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


Unnamed: 0,start_date,end_date,credo_sil_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.219086,Month,credo_sil_score,1.1.0,FSTPD30
1,2023-01-02,2023-01-08,0.261708,Week,credo_sil_score,1.1.0,FSTPD30
2,2023-01-09,2023-01-15,0.251056,Week,credo_sil_score,1.1.0,FSTPD30
3,2023-01-16,2023-01-22,0.275062,Week,credo_sil_score,1.1.0,FSTPD30
4,2023-01-23,2023-01-29,0.157554,Week,credo_sil_score,1.1.0,FSTPD30


## Combining data

In [126]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'credo_sil_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'credo_sil_score_FPD30_gini',
       'credo_sil_score_FSPD30_gini', 'credo_sil_score_FSTPD30_gini'],
      dtype=object)

In [127]:
final_df = final_df[['start_date', 'end_date','period',    'Model_Name', 'version', 'bad_rate',
                     'credo_sil_score_FPD10_gini', 
                     'credo_sil_score_FPD30_gini',  
                     'credo_sil_score_FSPD30_gini',
                     'credo_sil_score_FSTPD30_gini']].copy()

In [128]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_s_credo_score;"""

client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=8af12932-2abb-42f6-a705-10768468f46e>

In [129]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('credo_sil_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('credo_sil_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('credo_sil_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('credo_sil_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_s_credo_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=a2cdcc10-d729-429c-ad11-e3b9b3378b30>

In [130]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,credo_sil_score_FPD10_gini,credo_sil_score_FPD30_gini,credo_sil_score_FSPD30_gini,credo_sil_score_FSTPD30_gini
0,2023-01-01,2023-01-31,Month,credo_sil_score,1.1.0,FPD10,0.230181,,,
1,2023-01-01,2023-01-31,Month,credo_sil_score,1.1.0,FPD30,,0.25398,,
2,2023-01-01,2023-01-31,Month,credo_sil_score,1.1.0,FSPD30,,,0.19884,
3,2023-01-01,2023-01-31,Month,credo_sil_score,1.1.0,FSTPD30,,,,0.219086
4,2023-01-02,2023-01-08,Week,credo_sil_score,1.1.0,FPD10,0.248978,,,


# fu_credo_score

In [131]:


sq = f"""
with fu_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_flex_score credo_flexup_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    credo_flex_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from fu_credo_score;
"""

fu_credo_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

fu_credo_scorefpd10.head()

gini_results = calculate_periodic_gini(fu_credo_scorefpd10, 'credo_flexup_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

# FPD30

sq = f"""
with fu_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_flex_score credo_flexup_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    credo_flex_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from fu_credo_score;
"""

fu_credo_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

fu_credo_scorefpd30.head()

gini_results = calculate_periodic_gini(fu_credo_scorefpd30, 'credo_flexup_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

sq = f"""
with fu_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_flex_score credo_flexup_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    credo_flex_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from fu_credo_score;
"""

fu_credo_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(fu_credo_scorefspd30.head(2))

gini_results = calculate_periodic_gini(fu_credo_scorefspd30, 'credo_flexup_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

sq = f"""
with fu_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_flex_score credo_flexup_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    credo_flex_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from fu_credo_score;
"""

fu_credo_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(fu_credo_scorefstpd30.head(2))

gini_results = calculate_periodic_gini(fu_credo_scorefstpd30, 'credo_flexup_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()



Job ID 6f593096-6dfc-4abe-b3d9-0318e73c873d successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(143, 7)
Job ID 842cfca0-bec4-4900-baa5-d0e790033d7d successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(139, 7)
Job ID 89790e76-1ce9-430f-a53a-399bebe13ab5 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  \
0 2024-07-16 19:09:27  a731f941-ed8a-4278-9ba4-1fd41132aa5e   
1 2024-11-24 18:45:09  5be2f2c7-2401-48bc-a384-6f62c22866db   

   credo_flexup_score  ln_fspd30_flag  ln_mature_fspd30_flag  
0  0.0571600303978391               0                      1  
1  0.0923064986121803               0                      1  


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(137, 7)
['start_date' 'end_date' 'credo_flexup_score_FSPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']
Job ID 71cfe578-b3b3-4dc6-b408-ccfce0d1e2a3 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  \
0 2024-07-16 19:09:27  a731f941-ed8a-4278-9ba4-1fd41132aa5e   
1 2024-11-24 18:45:09  5be2f2c7-2401-48bc-a384-6f62c22866db   

   credo_flexup_score  ln_fstpd30_flag  ln_mature_fstpd30_flag  
0  0.0571600303978391                0                       1  
1  0.0923064986121803                0                       1  
The shape of dataframe after copy is:	(132, 7)
['start_date' 'end_date' 'credo_flexup_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


Unnamed: 0,start_date,end_date,credo_flexup_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.07286,Month,credo_flexup_score,1.1.0,FSTPD30
1,2023-01-02,2023-01-08,0.002165,Week,credo_flexup_score,1.1.0,FSTPD30
2,2023-01-09,2023-01-15,0.056674,Week,credo_flexup_score,1.1.0,FSTPD30
3,2023-01-16,2023-01-22,0.326429,Week,credo_flexup_score,1.1.0,FSTPD30
4,2023-01-23,2023-01-29,0.109935,Week,credo_flexup_score,1.1.0,FSTPD30


## Combining data

In [132]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'credo_flexup_score_FPD10_gini',
       'period', 'Model_Name', 'version', 'bad_rate',
       'credo_flexup_score_FPD30_gini', 'credo_flexup_score_FSPD30_gini',
       'credo_flexup_score_FSTPD30_gini'], dtype=object)

In [133]:
final_df = final_df[['start_date', 'end_date','period',    'Model_Name', 'version', 'bad_rate',
                     'credo_flexup_score_FPD10_gini', 'credo_flexup_score_FPD30_gini',    'credo_flexup_score_FSPD30_gini', 'credo_flexup_score_FSTPD30_gini']].copy()

In [134]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_fu_credo_score"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=a343235e-c245-4e4c-ac0d-5b0eb6cb81bc>

In [135]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('credo_flexup_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('credo_flexup_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('v_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('credo_flexup_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_fu_credo_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=3324e895-0335-468c-940f-3587aee8186f>

In [136]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,credo_flexup_score_FPD10_gini,credo_flexup_score_FPD30_gini,credo_flexup_score_FSPD30_gini,credo_flexup_score_FSTPD30_gini
0,2023-01-01,2023-01-31,Month,credo_flexup_score,1.1.0,FPD10,0.023608,,,
1,2023-01-01,2023-01-31,Month,credo_flexup_score,1.1.0,FPD30,,0.006985,,
2,2023-01-01,2023-01-31,Month,credo_flexup_score,1.1.0,FSPD30,,,0.052302,
3,2023-01-01,2023-01-31,Month,credo_flexup_score,1.1.0,FSTPD30,,,,0.07286
4,2023-01-02,2023-01-08,Week,credo_flexup_score,1.1.0,FPD10,-0.032464,,,


# r_credo_score

In [137]:


sq = f"""
with r_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_reloan_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    credo_reloan_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from r_credo_score;
"""

r_credo_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

r_credo_scorefpd10.head()

gini_results = calculate_periodic_gini(r_credo_scorefpd10, 'credo_reloan_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

# FPD30

sq = f"""
with r_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_reloan_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    credo_reloan_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from r_credo_score;
"""

r_credo_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

r_credo_scorefpd30.head()

gini_results = calculate_periodic_gini(r_credo_scorefpd30, 'credo_reloan_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

sq = f"""
with r_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_reloan_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    credo_reloan_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from r_credo_score;
"""

r_credo_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(r_credo_scorefspd30.head(2))

gini_results = calculate_periodic_gini(r_credo_scorefspd30, 'credo_reloan_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

sq = f"""
with r_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_reloan_score ,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    credo_reloan_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from r_credo_score;
"""

r_credo_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(r_credo_scorefstpd30.head(2))

gini_results = calculate_periodic_gini(r_credo_scorefstpd30, 'credo_reloan_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()



Job ID c4165bc1-65ca-4a4d-9ea2-b5812f93d9d4 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(143, 7)
Job ID 3a3a8582-ea48-4d67-b4f6-b39f05f5065d successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(139, 7)
Job ID 4e372390-6e25-4d7b-b947-8a3d6111ad71 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  \
0 2024-07-16 19:09:27  a731f941-ed8a-4278-9ba4-1fd41132aa5e   
1 2024-11-24 18:45:09  5be2f2c7-2401-48bc-a384-6f62c22866db   

  credo_reloan_score  ln_fspd30_flag  ln_mature_fspd30_flag  
0  0.366341998630943               0                      1  
1   0.33596981640975               0                      1  


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(137, 7)
['start_date' 'end_date' 'credo_reloan_score_FSPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']
Job ID fe99fa80-9e1a-4157-aba7-cb62534e5e81 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  \
0 2024-07-16 19:09:27  a731f941-ed8a-4278-9ba4-1fd41132aa5e   
1 2024-11-24 18:45:09  5be2f2c7-2401-48bc-a384-6f62c22866db   

  credo_reloan_score  ln_fstpd30_flag  ln_mature_fstpd30_flag  
0  0.366341998630943                0                       1  
1   0.33596981640975                0                       1  


  weekly_gini = df.groupby('week').apply(


The shape of dataframe after copy is:	(132, 7)
['start_date' 'end_date' 'credo_reloan_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


  monthly_gini = df.groupby('month').apply(


Unnamed: 0,start_date,end_date,credo_reloan_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.048984,Month,credo_reloan_score,1.1.0,FSTPD30
1,2023-01-02,2023-01-08,0.014955,Week,credo_reloan_score,1.1.0,FSTPD30
2,2023-01-09,2023-01-15,0.193885,Week,credo_reloan_score,1.1.0,FSTPD30
3,2023-01-16,2023-01-22,-0.019056,Week,credo_reloan_score,1.1.0,FSTPD30
4,2023-01-23,2023-01-29,0.024691,Week,credo_reloan_score,1.1.0,FSTPD30


## Combining data

In [138]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'credo_reloan_score_FPD10_gini',
       'period', 'Model_Name', 'version', 'bad_rate',
       'credo_reloan_score_FPD30_gini', 'credo_reloan_score_FSPD30_gini',
       'credo_reloan_score_FSTPD30_gini'], dtype=object)

In [139]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate',
                     'credo_reloan_score_FPD10_gini',
                     'credo_reloan_score_FPD30_gini',
                     'credo_reloan_score_FSPD30_gini',
                     'credo_reloan_score_FSTPD30_gini']].copy()

In [140]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_r_credo_score;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=6c7bc6d5-e849-46a3-9d14-ead62af81b10>

In [141]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('credo_reloan_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('credo_reloan_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('credo_reloan_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('credo_reloan_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_r_credo_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=f6d9d42c-cfab-40cf-9498-d6d662d3b76b>

# old_gen_credo_score

In [142]:
sq = f"""
with old_gen_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_old_gen_score ,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    credo_old_gen_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from old_gen_credo_score;
"""

old_gen_credo_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

old_gen_credo_scorefpd10.head()

gini_results = calculate_periodic_gini_threedigit(old_gen_credo_scorefpd10, 'credo_old_gen_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
print(M1FPD10.columns.values)

# FPD30

sq = f"""
with old_gen_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_old_gen_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    credo_old_gen_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from old_gen_credo_score;
"""

old_gen_credo_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

old_gen_credo_scorefpd30.head()

gini_results = calculate_periodic_gini_threedigit(old_gen_credo_scorefpd30, 'credo_old_gen_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
print(M2FPD30.columns.values)

sq = f"""
with old_gen_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_old_gen_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    credo_old_gen_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from old_gen_credo_score;
"""

old_gen_credo_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(old_gen_credo_scorefspd30.head(2))

gini_results = calculate_periodic_gini_threedigit(old_gen_credo_scorefspd30, 'credo_old_gen_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

sq = f"""
with old_gen_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_old_gen_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    credo_old_gen_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from old_gen_credo_score;
"""

old_gen_credo_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(old_gen_credo_scorefstpd30.head(2))

gini_results = calculate_periodic_gini_threedigit(old_gen_credo_scorefstpd30, 'credo_old_gen_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()



Job ID faefcea6-6130-4a4f-b478-2797daa48cf4 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(145, 7)
['start_date' 'end_date' 'credo_old_gen_score_FPD10_gini' 'period'
 'Model_Name' 'version' 'bad_rate']
Job ID cbbf3cbe-b200-4d00-a3b6-3a14494b918f successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(142, 7)
['start_date' 'end_date' 'credo_old_gen_score_FPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']
Job ID d02f3eaf-ad59-4d2d-be53-2a153c0b872e successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  \
0 2025-01-25 19:05:49  e549776f-7a2d-4b6c-9edc-e3b59e8955f6   
1 2024-05-02 17:24:11  9b118965-74ea-4832-a640-8b5ea2cc140e   

  credo_old_gen_score  ln_fspd30_flag  ln_mature_fspd30_flag  
0               350.0               0                      1  
1                 507               0                      1  


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(137, 7)
['start_date' 'end_date' 'credo_old_gen_score_FSPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']
Job ID 123caa42-5393-419a-84dd-607e1a1dcae8 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  \
0 2024-05-02 17:24:11  9b118965-74ea-4832-a640-8b5ea2cc140e   
1 2024-02-26 15:08:47  acebf8de-bd42-4b00-83bc-d7c0999a7488   

  credo_old_gen_score  ln_fstpd30_flag  ln_mature_fstpd30_flag  
0                 507                0                       1  
1                 468                0                       1  
The shape of dataframe after copy is:	(132, 7)
['start_date' 'end_date' 'credo_old_gen_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


Unnamed: 0,start_date,end_date,credo_old_gen_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.123128,Month,credo_old_gen_score,1.1.0,FSTPD30
1,2023-01-02,2023-01-08,0.234472,Week,credo_old_gen_score,1.1.0,FSTPD30
2,2023-01-09,2023-01-15,0.140169,Week,credo_old_gen_score,1.1.0,FSTPD30
3,2023-01-16,2023-01-22,0.12788,Week,credo_old_gen_score,1.1.0,FSTPD30
4,2023-01-23,2023-01-29,-0.00372,Week,credo_old_gen_score,1.1.0,FSTPD30


## Combining data

In [143]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'credo_old_gen_score_FPD10_gini',
       'period', 'Model_Name', 'version', 'bad_rate',
       'credo_old_gen_score_FPD30_gini',
       'credo_old_gen_score_FSPD30_gini',
       'credo_old_gen_score_FSTPD30_gini'], dtype=object)

In [144]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate',
                     'credo_old_gen_score_FPD10_gini',
                     'credo_old_gen_score_FPD30_gini',
                     'credo_old_gen_score_FSPD30_gini', 
                     'credo_old_gen_score_FSTPD30_gini']].copy()

In [145]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_old_gen_credo_score;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=2de95249-6209-4aef-962d-9ed2f98964eb>

In [146]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('credo_old_gen_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('credo_old_gen_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('credo_old_gen_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('credo_old_gen_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_old_gen_credo_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=64586a2d-8b12-4b4a-8f47-acd2a0442f1f>

In [147]:
sq = """select * from prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_old_gen_credo_score;"""

df = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID 87beb619-5210-4650-b2a7-ba21fbfde3d3 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [148]:
df['bad_rate'].value_counts()

bad_rate
FPD10      145
FPD30      142
FSPD30     137
FSTPD30    132
Name: count, dtype: int64

# old_cic_score

In [149]:


sq = f"""
with old_cic_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_cic_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    old_cic_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from old_cic_score;
"""

old_cic_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

old_cic_scorefpd10.head()

gini_results = calculate_periodic_hybrid_gini(old_cic_scorefpd10, 'old_cic_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
print(M1FPD10.columns.values)

# FPD30

sq = f"""
with old_cic_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_cic_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    old_cic_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from old_cic_score;
"""

old_cic_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

old_cic_scorefpd30.head()

gini_results = calculate_periodic_hybrid_gini(old_cic_scorefpd30, 'old_cic_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
print(M2FPD30.columns.values)

sq = f"""
with old_cic_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_cic_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    old_cic_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from old_cic_score;
"""

old_cic_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(old_cic_scorefspd30.head(2))

gini_results = calculate_periodic_hybrid_gini(old_cic_scorefspd30, 'old_cic_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

sq = f"""
with old_cic_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_cic_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    old_cic_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from old_cic_score;
"""

old_cic_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(old_cic_scorefstpd30.head(2))

gini_results = calculate_periodic_hybrid_gini(old_cic_scorefstpd30, 'old_cic_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()



Job ID 19e5bb23-5cd3-4170-bd45-5f365bfab4af successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(105, 7)
['start_date' 'end_date' 'old_cic_score_FPD10_gini' 'period' 'Model_Name'
 'version' 'bad_rate']
Job ID cd57e5ef-6112-4122-a511-37eba534a2cd successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(102, 7)
['start_date' 'end_date' 'old_cic_score_FPD30_gini' 'period' 'Model_Name'
 'version' 'bad_rate']
Job ID 6d1a7bb2-d281-4bb7-bb10-3aba0748c01d successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId old_cic_score  \
0 2024-06-14 13:20:28  5f412bbb-9b17-4761-8023-1a08640ad81c       605.000   
1 2023-09-22 15:00:18  a4bfaa18-0189-4e1e-99ac-e45b036ea021       605.000   

   ln_fspd30_flag  ln_mature_fspd30_flag  
0               0                      1  
1               0                      1  


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(97, 7)
['start_date' 'end_date' 'old_cic_score_FSPD30_gini' 'period' 'Model_Name'
 'version' 'bad_rate']
Job ID dc9a02c2-1d9e-4053-818d-5cd34aeac14a successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId old_cic_score  \
0 2024-06-14 13:20:28  5f412bbb-9b17-4761-8023-1a08640ad81c       605.000   
1 2023-09-22 15:00:18  a4bfaa18-0189-4e1e-99ac-e45b036ea021       605.000   

   ln_fstpd30_flag  ln_mature_fstpd30_flag  
0                0                       1  
1                0                       1  
The shape of dataframe after copy is:	(92, 7)
['start_date' 'end_date' 'old_cic_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


Unnamed: 0,start_date,end_date,old_cic_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-08-01,2023-08-31,0.098266,Month,old_cic_score,1.1.0,FSTPD30
1,2023-08-21,2023-08-27,0.152414,Week,old_cic_score,1.1.0,FSTPD30
2,2023-08-28,2023-09-03,0.14582,Week,old_cic_score,1.1.0,FSTPD30
3,2023-09-01,2023-09-30,0.255766,Month,old_cic_score,1.1.0,FSTPD30
4,2023-09-04,2023-09-10,0.150842,Week,old_cic_score,1.1.0,FSTPD30


## Combining data

In [150]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'old_cic_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'old_cic_score_FPD30_gini',
       'old_cic_score_FSPD30_gini', 'old_cic_score_FSTPD30_gini'],
      dtype=object)

In [151]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','old_cic_score_FPD10_gini','old_cic_score_FPD30_gini',    'old_cic_score_FSPD30_gini', 'old_cic_score_FSTPD30_gini']].copy()

In [152]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_old_cic_score;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=43862d4b-eddf-4b1f-8688-fa832ab1b58d>

In [153]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('old_cic_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('old_cic_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('old_cic_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('old_cic_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_old_cic_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=3be0aad6-93b6-41d4-8b31-70cfe8145640>

# old_demo_score

In [154]:


sq = f"""
with old_demo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_demo_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    old_demo_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from old_demo_score;
"""

old_demo_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

old_demo_scorefpd10.head()

gini_results = calculate_periodic_hybrid_gini(old_demo_scorefpd10, 'old_demo_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
print(M1FPD10.columns.values)

# FPD30

sq = f"""
with old_demo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_demo_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    old_demo_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from old_demo_score;
"""

old_demo_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

old_demo_scorefpd30.head()

gini_results = calculate_periodic_hybrid_gini(old_demo_scorefpd30, 'old_demo_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
print(M2FPD30.columns.values)

sq = f"""
with old_demo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_demo_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    old_demo_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from old_demo_score;
"""

old_demo_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(old_demo_scorefspd30.head(2))

gini_results = calculate_periodic_hybrid_gini(old_demo_scorefspd30, 'old_demo_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

sq = f"""
with old_demo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_demo_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    old_demo_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from old_demo_score;
"""

old_demo_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(old_demo_scorefstpd30.head(2))

gini_results = calculate_periodic_hybrid_gini(old_demo_scorefstpd30, 'old_demo_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()



Job ID 1c25c641-9d27-449b-b166-d4c574dc6f57 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(145, 7)
['start_date' 'end_date' 'old_demo_score_FPD10_gini' 'period' 'Model_Name'
 'version' 'bad_rate']
Job ID 724f358f-ef1f-4537-ae73-bba972bd27f7 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(142, 7)
['start_date' 'end_date' 'old_demo_score_FPD30_gini' 'period' 'Model_Name'
 'version' 'bad_rate']
Job ID f5b1c471-d37e-46f5-908b-751f790b2bd0 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  old_demo_score  \
0 2025-01-25 19:05:49  e549776f-7a2d-4b6c-9edc-e3b59e8955f6        0.138973   
1 2024-06-14 13:20:28  5f412bbb-9b17-4761-8023-1a08640ad81c      435.000000   

   ln_fspd30_flag  ln_mature_fspd30_flag  
0               0                      1  
1               0                      1  


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(137, 7)
['start_date' 'end_date' 'old_demo_score_FSPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']
Job ID f713f54a-4b8b-4c20-bc9b-fbb0c65f7087 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  old_demo_score  \
0 2024-12-01 12:21:56  1eac1869-c3c5-4ed8-aa58-aeb90c730784           427.0   
1 2024-08-24 17:39:29  56058a72-c8e5-4400-92e1-34400a8b6286           484.0   

   ln_fstpd30_flag  ln_mature_fstpd30_flag  
0                0                       1  
1                0                       1  
The shape of dataframe after copy is:	(132, 7)
['start_date' 'end_date' 'old_demo_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


Unnamed: 0,start_date,end_date,old_demo_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.124696,Month,old_demo_score,1.1.0,FSTPD30
1,2023-01-02,2023-01-08,0.174341,Week,old_demo_score,1.1.0,FSTPD30
2,2023-01-09,2023-01-15,0.042506,Week,old_demo_score,1.1.0,FSTPD30
3,2023-01-16,2023-01-22,0.128418,Week,old_demo_score,1.1.0,FSTPD30
4,2023-01-23,2023-01-29,0.120517,Week,old_demo_score,1.1.0,FSTPD30


## Combining data

In [155]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'old_demo_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'old_demo_score_FPD30_gini',
       'old_demo_score_FSPD30_gini', 'old_demo_score_FSTPD30_gini'],
      dtype=object)

In [156]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','old_demo_score_FPD10_gini','old_demo_score_FPD30_gini',    'old_demo_score_FSPD30_gini', 'old_demo_score_FSTPD30_gini']].copy()

In [157]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_old_demo_score;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=730a47bd-f105-4f7b-8edf-63d6231ab0e4>

In [158]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('old_demo_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('old_demo_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('old_demo_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('old_demo_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_old_demo_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=ca2f1dbd-68f1-4348-84e5-1170c82436aa>

# bu_bureau_score

In [159]:
sq = f"""
with bu_bureau_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    bu_bureau_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    coalesce(bu_bureau_score, 0.0) > 0.0
  AND
    ln_mature_fpd10_flag = 1
)
select * from bu_bureau_score;
"""

bu_bureau_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

bu_bureau_scorefpd10.head()

gini_results = calculate_periodic_gini_threedigit(bu_bureau_scorefpd10, 'bu_bureau_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
print(M1FPD10.columns.values)

# FPD30

sq = f"""
with bu_bureau_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    bu_bureau_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    coalesce(bu_bureau_score, 0.0) > 0.0
  AND
    ln_mature_fpd30_flag = 1
)
select * from bu_bureau_score;
"""

bu_bureau_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

bu_bureau_scorefpd30.head()

gini_results = calculate_periodic_gini_threedigit(bu_bureau_scorefpd30, 'bu_bureau_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
print(M2FPD30.columns.values)

sq = f"""
with bu_bureau_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    bu_bureau_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    coalesce(bu_bureau_score, 0.0) > 0.0
  AND
    ln_mature_fspd30_flag = 1
)
select * from bu_bureau_score;
"""

bu_bureau_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(bu_bureau_scorefspd30.head(2))

gini_results = calculate_periodic_gini_threedigit(bu_bureau_scorefspd30, 'bu_bureau_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

sq = f"""
with bu_bureau_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    bu_bureau_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    coalesce(bu_bureau_score, 0.0) > 0.0
  AND
    ln_mature_fstpd30_flag = 1
)
select * from bu_bureau_score;
"""

bu_bureau_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(bu_bureau_scorefstpd30.head(2))

gini_results = calculate_periodic_gini_threedigit(bu_bureau_scorefstpd30, 'bu_bureau_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()



Job ID 6381eaee-56c2-47c3-977f-6f16362616ac successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(92, 7)
['start_date' 'end_date' 'bu_bureau_score_FPD10_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


Job ID b91a7a22-1c63-468b-9a6b-13b522d94d0f successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(92, 7)
['start_date' 'end_date' 'bu_bureau_score_FPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


Job ID ca1358c4-0fbc-4049-bd5b-cbea728ede70 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  bu_bureau_score  \
0 2023-12-07 12:25:39  c464e02f-5b1a-460a-9686-9facb57c699f            418.0   
1 2023-12-26 18:05:40  0fc1e665-b7ba-4b10-89f2-e20b149035e0            365.0   

   ln_fspd30_flag  ln_mature_fspd30_flag  
0               0                      1  
1               0                      1  
The shape of dataframe after copy is:	(92, 7)
['start_date' 'end_date' 'bu_bureau_score_FSPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


Job ID ab3487ad-3eb9-4f96-bf79-84949fbde7c5 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  bu_bureau_score  \
0 2023-12-07 12:25:39  c464e02f-5b1a-460a-9686-9facb57c699f            418.0   
1 2023-12-26 18:05:40  0fc1e665-b7ba-4b10-89f2-e20b149035e0            365.0   

   ln_fstpd30_flag  ln_mature_fstpd30_flag  
0                0                       1  
1                0                       1  
The shape of dataframe after copy is:	(92, 7)
['start_date' 'end_date' 'bu_bureau_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


Unnamed: 0,start_date,end_date,bu_bureau_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.027083,Month,bu_bureau_score,1.1.0,FSTPD30
1,2023-01-09,2023-01-15,-0.08,Week,bu_bureau_score,1.1.0,FSTPD30
2,2023-01-16,2023-01-22,-0.833333,Week,bu_bureau_score,1.1.0,FSTPD30
3,2023-01-23,2023-01-29,0.416667,Week,bu_bureau_score,1.1.0,FSTPD30
4,2023-01-30,2023-02-05,0.266667,Week,bu_bureau_score,1.1.0,FSTPD30


## Combining data

In [160]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'bu_bureau_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'bu_bureau_score_FPD30_gini',
       'bu_bureau_score_FSPD30_gini', 'bu_bureau_score_FSTPD30_gini'],
      dtype=object)

In [161]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','bu_bureau_score_FPD10_gini','bu_bureau_score_FPD30_gini',    'bu_bureau_score_FSPD30_gini', 'bu_bureau_score_FSTPD30_gini']].copy()

In [162]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_bu_bureau_score;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=8b29ebef-453f-4202-970d-acd7386cbdcb>

In [163]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('bu_bureau_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('bu_bureau_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('bu_bureau_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('bu_bureau_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_bu_bureau_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=55f38dc4-4583-4dc3-b879-ba9e342effa7>