# <center> Model Gini Calculation </center>

In [1]:
# %% [markdown]
# # Jupyter Notebook Loading Header
#
# This is a custom loading header for Jupyter Notebooks in Visual Studio Code.
# It includes common imports and settings to get you started quickly.

# %% [markdown]
## Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.cloud import bigquery
from google.cloud import storage
import os
path = r'C:\Users\Dwaipayan\AppData\Roaming\gcloud\legacy_credentials\dchakroborti@tonikbank.com\adc.json'
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = path
client = bigquery.Client(project='prj-prod-dataplatform')

from sklearn.metrics import roc_auc_score
from datetime import datetime, timedelta
# %% [markdown]
## Configure Settings
# Set options or configurations as needed
# Example: pd.set_option('display.max_columns', None)

# Function

## calculate_gini_for_threedigitscore

In [2]:
# def calculate_gini_for_threedigitscore(scores, labels):
#     """
#     Calculate Gini coefficient for three-digit scores and binary labels
    
#     Parameters:
#     scores: array-like, three-digit scores (higher is better)
#     labels: array-like, binary values (0 or 1, where 1 indicates default)
    
#     Returns:
#     float: Gini coefficient
#     """
#     # Combine scores and labels into a DataFrame
#     df = pd.DataFrame({'score': scores, 'label': labels})
    
#     # Sort by score in descending order (assuming higher score is better)
#     df = df.sort_values('score', ascending=False)
    
#     # Calculate cumulative values
#     total_pos = df['label'].sum()
#     total_neg = len(df) - total_pos
    
#     if total_pos == 0 or total_neg == 0:
#         return 0
    
#     # Calculate cumulative proportions
#     cum_pos = df['label'].cumsum()
#     cum_neg = np.arange(1, len(df) + 1) - cum_pos
    
#     # Convert to proportions
#     cum_pos_prop = cum_pos / total_pos
#     cum_neg_prop = cum_neg / total_neg
    
#     # Calculate Gini
#     gini = 1 - np.trapz(cum_pos_prop, cum_neg_prop)
    
#     return gini


## Modified one

def calculate_gini_for_threedigitscore(scores, labels):
    """
    Calculate Gini coefficient for three-digit scores and binary labels
    
    Parameters:
    scores: array-like, three-digit scores (higher is better)
    labels: array-like, binary values (0 or 1, where 1 indicates default)
    
    Returns:
    float: Gini coefficient
    """
    # Combine scores and labels into a DataFrame
    df = pd.DataFrame({'score': scores, 'label': labels})
    
    # Sort by score in descending order (assuming higher score means lower risk)
    # For default prediction, we want to sort scores in ascending order 
    # since higher default probability should correspond to higher risk
    df = df.sort_values('score', ascending=True)  # Changed to ascending=True
    
    # Calculate cumulative values
    total_pos = df['label'].sum()
    total_neg = len(df) - total_pos
    
    if total_pos == 0 or total_neg == 0:
        return 0
    
    # Calculate cumulative proportions
    cum_pos = df['label'].cumsum()
    cum_neg = np.arange(1, len(df) + 1) - cum_pos
    
    # Convert to proportions
    cum_pos_prop = cum_pos / total_pos
    cum_neg_prop = cum_neg / total_neg
    
    # Calculate area under curve
    auc = np.trapz(cum_pos_prop, cum_neg_prop)
    
    # Calculate Gini
    gini = 2 * auc - 1
    
    return gini

## calculate_gini

In [3]:
def calculate_gini(pd_scores, bad_indicators):
    """
    Calculate Gini coefficient from scores and binary indicators
    
    Parameters:
    pd_scores: array-like of scores/probabilities
    bad_indicators: array-like of binary outcomes (0/1)
    
    Returns:
    float: Gini coefficient
    """
    # Convert inputs to numpy arrays and ensure they're numeric
    pd_scores = np.array(pd_scores, dtype=float)
    bad_indicators = np.array(bad_indicators, dtype=int)
    
    # Check for valid input data
    if len(pd_scores) == 0 or len(bad_indicators) == 0:
        return np.nan
    
    # Check if we have both good and bad cases (needed for ROC AUC)
    if len(np.unique(bad_indicators)) < 2:
        return np.nan
    
    # Calculate AUC using sklearn
    try:
        auc = roc_auc_score(bad_indicators, pd_scores)
        # Calculate Gini from AUC
        gini = 2 * auc - 1
        return gini
    except ValueError:
        return np.nan

## calculate_hybrid_gini

In [4]:
# def calculate_hybrid_gini(scores, labels):
#     """
#     Calculate Gini coefficient handling both PD values and three-digit scores
    
#     Parameters:
#     scores: array-like, contains either PD values (0-1) or three-digit scores
#     labels: array-like, binary values (0 or 1, where 1 indicates default)
    
#     Returns:
#     float: Gini coefficient
#     """
#     # Convert inputs to numpy arrays
#     scores = np.array(scores, dtype=float)
#     labels = np.array(labels, dtype=int)
    
#     # Basic validation
#     if len(scores) == 0 or len(labels) == 0:
#         return np.nan
    
#     if len(np.unique(labels)) < 2:
#         return np.nan
        
#     # Determine if scores are PD values or three-digit scores
#     # PD values are between 0 and 1
#     is_pd = np.all((scores >= 0) & (scores <= 1))
    
#     if is_pd:
#         try:
#             auc = roc_auc_score(labels, scores)
#             gini = 2 * auc - 1
#             return gini
#         except ValueError:
#             return np.nan
#     else:
#         # Handle as three-digit score
#         df = pd.DataFrame({'score': scores, 'label': labels})
#         df = df.sort_values('score', ascending=False)
        
#         total_pos = df['label'].sum()
#         total_neg = len(df) - total_pos
        
#         if total_pos == 0 or total_neg == 0:
#             return np.nan
        
#         cum_pos = df['label'].cumsum()
#         cum_neg = np.arange(1, len(df) + 1) - cum_pos
        
#         cum_pos_prop = cum_pos / total_pos
#         cum_neg_prop = cum_neg / total_neg
        
#         gini = 1 - np.trapz(cum_pos_prop, cum_neg_prop)
#         return gini

## Modified one

def calculate_hybrid_gini(scores, labels):
    """
    Calculate Gini coefficient handling both PD values and three-digit scores
    
    Parameters:
    scores: array-like, contains either PD values (0-1) or three-digit scores
    labels: array-like, binary values (0 or 1, where 1 indicates default)
    
    Returns:
    float: Gini coefficient
    """
    # Convert inputs to numpy arrays
    scores = np.array(scores, dtype=float)
    labels = np.array(labels, dtype=int)
    
    # Basic validation
    if len(scores) == 0 or len(labels) == 0:
        return np.nan
    
    if len(np.unique(labels)) < 2:
        return np.nan
        
    # Determine if scores are PD values or three-digit scores
    # PD values are between 0 and 1
    is_pd = np.all((scores >= 0) & (scores <= 1))
    
    if is_pd:
        try:
            auc = roc_auc_score(labels, scores)
            gini = 2 * auc - 1
            return gini
        except ValueError:
            return np.nan
    else:
        # Handle as three-digit score
        df = pd.DataFrame({'score': scores, 'label': labels})
        # Sort by score in ascending order since higher score means higher risk
        df = df.sort_values('score', ascending=True)
        
        total_pos = df['label'].sum()
        total_neg = len(df) - total_pos
        
        if total_pos == 0 or total_neg == 0:
            return np.nan
        
        cum_pos = df['label'].cumsum()
        cum_neg = np.arange(1, len(df) + 1) - cum_pos
        
        cum_pos_prop = cum_pos / total_pos
        cum_neg_prop = cum_neg / total_neg
        
        # Calculate area under curve
        auc = np.trapz(cum_pos_prop, cum_neg_prop)
        
        # Calculate Gini using the same formula as PD values
        gini = 2 * auc - 1
        return gini

## calculate_periodic_gini_threedigit

In [5]:
# Main processing code
def calculate_periodic_gini_threedigit(df, score_column, label_column, namecolumn):
    """
    Calculate periodic Gini coefficients
    
    Parameters:
    df: DataFrame with disbursement dates and score/label columns
    score_column: name of the score column
    label_column: name of the label column
    """
    # Ensure date is datetime type
    df['disbursementdate'] = pd.to_datetime(df['disbursementdate'])
    
    # Calculate weekly Gini
    df['week'] = df['disbursementdate'].dt.to_period('W')
    weekly_gini = df.groupby('week').apply(
        lambda x: calculate_gini_for_threedigitscore(x[score_column], x[label_column])
    ).reset_index(name='gini')
    weekly_gini['period'] = 'Week'
    weekly_gini['start_date'] = weekly_gini['week'].apply(lambda x: x.to_timestamp())
    weekly_gini['end_date'] = weekly_gini['start_date'] + timedelta(days=6)
    weekly_gini = weekly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Calculate monthly Gini
    df['month'] = df['disbursementdate'].dt.to_period('M')
    monthly_gini = df.groupby('month').apply(
        lambda x: calculate_gini_for_threedigitscore(x[score_column], x[label_column])
    ).reset_index(name='gini')
    monthly_gini['period'] = 'Month'
    monthly_gini['start_date'] = monthly_gini['month'].apply(lambda x: x.to_timestamp())
    monthly_gini['end_date'] = monthly_gini['start_date'] + pd.DateOffset(months=1) - pd.Timedelta(days=1)
    monthly_gini = monthly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Combine and sort results
    gini_results = pd.concat([weekly_gini, monthly_gini])
    gini_results = gini_results.sort_values(by='start_date').reset_index(drop=True)
    
    # Add metadata columns
    gini_results['Model_Name'] = score_column
    gini_results['version'] = '1.1.0'
    gini_results['bad_rate'] = namecolumn
    gini_results.rename(columns={'gini': f'{score_column}_{namecolumn}_gini'}, inplace=True)
    
    return gini_results

## calculate_periodic_gini

In [6]:
def calculate_periodic_gini(df, score_column, label_column, namecolumn):
    """
    Calculate periodic Gini coefficients
    
    Parameters:
    df: DataFrame with disbursement dates and score/label columns
    score_column: name of the score column
    label_column: name of the label column
    """
    # Input validation
    required_columns = ['disbursementdate', score_column, label_column]
    if not all(col in df.columns for col in required_columns):
        raise ValueError(f"Missing required columns. Need: {required_columns}")
        
    # Create a copy to avoid modifying original dataframe
    df = df.copy()
    
    # Ensure date is datetime type
    df['disbursementdate'] = pd.to_datetime(df['disbursementdate'])
    
    # Ensure score and label columns are numeric
    df[score_column] = pd.to_numeric(df[score_column], errors='coerce')
    df[label_column] = pd.to_numeric(df[label_column], errors='coerce')
    
    # Drop rows with invalid values
    df = df.dropna(subset=[score_column, label_column])
    
    # Calculate weekly Gini
    df['week'] = df['disbursementdate'].dt.to_period('W')
    weekly_gini = df.groupby('week').apply(
        lambda x: calculate_gini(x[score_column], x[label_column])
        if len(x) >= 10 else np.nan  # Only calculate if we have enough samples
    ).reset_index(name='gini')
    weekly_gini['period'] = 'Week'
    weekly_gini['start_date'] = weekly_gini['week'].apply(lambda x: x.to_timestamp())
    weekly_gini['end_date'] = weekly_gini['start_date'] + timedelta(days=6)
    weekly_gini = weekly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Calculate monthly Gini
    df['month'] = df['disbursementdate'].dt.to_period('M')
    monthly_gini = df.groupby('month').apply(
        lambda x: calculate_gini(x[score_column], x[label_column])
        if len(x) >= 20 else np.nan  # Only calculate if we have enough samples
    ).reset_index(name='gini')
    monthly_gini['period'] = 'Month'
    monthly_gini['start_date'] = monthly_gini['month'].apply(lambda x: x.to_timestamp())
    monthly_gini['end_date'] = monthly_gini['start_date'] + pd.DateOffset(months=1) - pd.Timedelta(days=1)
    monthly_gini = monthly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Combine and sort results
    gini_results = pd.concat([weekly_gini, monthly_gini])
    gini_results = gini_results.sort_values(by='start_date').reset_index(drop=True)
    
    # Add metadata columns
    gini_results['Model_Name'] = score_column
    gini_results['version'] = '1.1.0'
    gini_results['bad_rate'] = namecolumn
    gini_results.rename(columns={'gini': f'{score_column}_{namecolumn}_gini'}, inplace=True)
    
    return gini_results

## calculate_periodic_hybrid_gini

In [7]:
def calculate_periodic_hybrid_gini(df, score_column, label_column, namecolumn):
    """
    Calculate periodic Gini coefficients for mixed score types
    
    Parameters:
    df: DataFrame with disbursement dates and score/label columns
    score_column: name of the score column
    label_column: name of the label column
    """
    # Input validation
    required_columns = ['disbursementdate', score_column, label_column]
    if not all(col in df.columns for col in required_columns):
        raise ValueError(f"Missing required columns. Need: {required_columns}")
        
    # Create a copy to avoid modifying original dataframe
    df = df.copy()
    
    # Ensure date is datetime type
    df['disbursementdate'] = pd.to_datetime(df['disbursementdate'])
    
    # Ensure score and label columns are numeric
    df[score_column] = pd.to_numeric(df[score_column], errors='coerce')
    df[label_column] = pd.to_numeric(df[label_column], errors='coerce')
    
    # Drop rows with invalid values
    df = df.dropna(subset=[score_column, label_column])
    
    # Calculate weekly Gini
    df['week'] = df['disbursementdate'].dt.to_period('W')
    weekly_gini = df.groupby('week').apply(
        lambda x: calculate_hybrid_gini(x[score_column], x[label_column])
        if len(x) >= 10 else np.nan  # Only calculate if we have enough samples
    ).reset_index(name='gini')
    weekly_gini['period'] = 'Week'
    weekly_gini['start_date'] = weekly_gini['week'].apply(lambda x: x.to_timestamp())
    weekly_gini['end_date'] = weekly_gini['start_date'] + pd.Timedelta(days=6)
    weekly_gini = weekly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Calculate monthly Gini
    df['month'] = df['disbursementdate'].dt.to_period('M')
    monthly_gini = df.groupby('month').apply(
        lambda x: calculate_hybrid_gini(x[score_column], x[label_column])
        if len(x) >= 20 else np.nan  # Only calculate if we have enough samples
    ).reset_index(name='gini')
    monthly_gini['period'] = 'Month'
    monthly_gini['start_date'] = monthly_gini['month'].apply(lambda x: x.to_timestamp())
    monthly_gini['end_date'] = monthly_gini['start_date'] + pd.DateOffset(months=1) - pd.Timedelta(days=1)
    monthly_gini = monthly_gini[['start_date', 'end_date', 'gini', 'period']]
    
    # Combine and sort results
    gini_results = pd.concat([weekly_gini, monthly_gini])
    gini_results = gini_results.sort_values(by='start_date').reset_index(drop=True)
    
    # Add metadata columns
    gini_results['Model_Name'] = score_column
    gini_results['version'] = '1.1.0'
    gini_results['bad_rate'] = namecolumn
    gini_results.rename(columns={'gini': f'{score_column}_{namecolumn}_gini'}, inplace=True)
    
    return gini_results

In [8]:
a = " `prj-prod-dataplatform.risk_credit_mis.application_score_master`"

# App Score FPD10

In [9]:
sq = f"""with appscore as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    beta_apps_score apps_score ,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    beta_apps_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from appscore;"""

dfappscorefpd10 = client.query(sq).to_dataframe(progress_bar_type = 'tqdm')

Job ID 4e5850ff-3507-467b-8e3a-8b1953f830a3 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [10]:
dfappscorefpd10.sample(5)

Unnamed: 0,disbursementdate,digitalLoanAccountId,apps_score,ln_fpd10_flag,ln_mature_fpd10_flag
88782,2024-05-19 14:09:22,ed7f8b0e-437a-4ea5-b410-de9ca7db48be,0.6975683921310817,1,1
117243,2024-03-17 18:46:16,0b4d7012-6e4b-4dcb-b162-5d6dccbf7db4,0.4875922856354744,0,1
44217,2023-12-23 10:22:31,96f8b311-1240-4683-8ef3-deb30def5501,0.5513070297353566,0,1
89815,2024-09-29 16:36:53,36edbe3e-1d9b-4823-8809-07901431a1bf,0.5261382968626116,0,1
78431,2024-09-27 18:18:18,83400ad0-b6fb-4b7d-a6d0-d313086f38cc,0.6177352728682435,1,1


In [11]:
gini_results = calculate_periodic_gini(dfappscorefpd10, 'apps_score', 'ln_fpd10_flag', 'FPD10')

  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


In [12]:
gini_results.head()

Unnamed: 0,start_date,end_date,apps_score_FPD10_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.54823,Week,apps_score,1.1.0,FPD10
1,2023-06-01,2023-06-30,0.384648,Month,apps_score,1.1.0,FPD10
2,2023-06-05,2023-06-11,0.5,Week,apps_score,1.1.0,FPD10
3,2023-06-12,2023-06-18,0.555195,Week,apps_score,1.1.0,FPD10
4,2023-06-19,2023-06-25,0.004831,Week,apps_score,1.1.0,FPD10


In [13]:
appscoreFPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{appscoreFPD10.shape}")
appscoreFPD10.columns.values

The shape of dataframe after copy is:	(116, 7)


array(['start_date', 'end_date', 'apps_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [14]:
gini_results.head()

Unnamed: 0,start_date,end_date,apps_score_FPD10_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.54823,Week,apps_score,1.1.0,FPD10
1,2023-06-01,2023-06-30,0.384648,Month,apps_score,1.1.0,FPD10
2,2023-06-05,2023-06-11,0.5,Week,apps_score,1.1.0,FPD10
3,2023-06-12,2023-06-18,0.555195,Week,apps_score,1.1.0,FPD10
4,2023-06-19,2023-06-25,0.004831,Week,apps_score,1.1.0,FPD10


# App Score FPD30

In [15]:
sq = f"""
with appscore as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    beta_apps_score apps_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    beta_apps_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from appscore;
"""

dfappscorefpd30 = client.query(sq).to_dataframe(progress_bar_type = 'tqdm')

Job ID 1d8da417-2ad0-4fd8-a22a-7325bf6bc393 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [16]:
dfappscorefpd30.head()

Unnamed: 0,disbursementdate,digitalLoanAccountId,apps_score,ln_fpd30_flag,ln_mature_fpd30_flag
0,2024-07-16 19:09:27,a731f941-ed8a-4278-9ba4-1fd41132aa5e,0.4981627319801684,0,1
1,2024-08-08 19:11:28,574ba16b-8e38-4e6d-8b75-2eb87b28c65c,0.5205239959548125,0,1
2,2024-02-26 15:08:47,acebf8de-bd42-4b00-83bc-d7c0999a7488,0.5149792575653369,0,1
3,2023-09-22 15:00:18,a4bfaa18-0189-4e1e-99ac-e45b036ea021,0.5913036946119814,0,1
4,2024-05-02 17:24:11,9b118965-74ea-4832-a640-8b5ea2cc140e,0.2397503307098872,0,1


In [17]:
gini_results = calculate_periodic_gini(dfappscorefpd30, 'apps_score', 'ln_fpd30_flag', 'FPD30')
# gini_results['bad_rate'] = 'FPD30'
appscoreFPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{appscoreFPD30.shape}")
appscoreFPD30.columns.values

The shape of dataframe after copy is:	(115, 7)


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'apps_score_FPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [18]:
gini_results.sample(5)

Unnamed: 0,start_date,end_date,apps_score_FPD30_gini,period,Model_Name,version,bad_rate
14,2023-08-14,2023-08-20,0.290723,Week,apps_score,1.1.0,FPD30
33,2023-12-01,2023-12-31,0.440162,Month,apps_score,1.1.0,FPD30
32,2023-11-27,2023-12-03,0.382282,Week,apps_score,1.1.0,FPD30
3,2023-06-12,2023-06-18,0.521893,Week,apps_score,1.1.0,FPD30
19,2023-09-11,2023-09-17,0.49655,Week,apps_score,1.1.0,FPD30


# App Score FSPD30

In [19]:
sq = f"""with appscore as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    beta_apps_score apps_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    beta_apps_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from appscore;
"""

dfappscorefspd30 = client.query(sq).to_dataframe(progress_bar_type = 'tqdm')

Job ID 30f15878-25f8-4310-8b1e-58f29e33db6c successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [20]:
gini_results = calculate_periodic_gini(dfappscorefspd30, 'apps_score', 'ln_fspd30_flag', 'FSPD30')
# gini_results['bad_rate'] = 'FSPD30'
appscoreFSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{appscoreFSPD30.shape}")
appscoreFSPD30.columns.values

The shape of dataframe after copy is:	(109, 7)


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'apps_score_FSPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [21]:
gini_results.tail()

Unnamed: 0,start_date,end_date,apps_score_FSPD30_gini,period,Model_Name,version,bad_rate
104,2025-01-06,2025-01-12,0.271966,Week,apps_score,1.1.0,FSPD30
105,2025-01-13,2025-01-19,0.304202,Week,apps_score,1.1.0,FSPD30
106,2025-01-20,2025-01-26,0.271975,Week,apps_score,1.1.0,FSPD30
107,2025-01-27,2025-02-02,0.25901,Week,apps_score,1.1.0,FSPD30
108,2025-02-01,2025-02-28,0.295258,Month,apps_score,1.1.0,FSPD30


# App Score FSTPD30

In [22]:
sq = f"""with appscore as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    beta_apps_score apps_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    beta_apps_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from appscore;
"""

dfappscorefstpd30 = client.query(sq).to_dataframe(progress_bar_type = 'tqdm')

Job ID adfb7cea-941c-4480-9eaf-3d0461658983 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [23]:
gini_results = calculate_periodic_gini(dfappscorefstpd30, 'apps_score', 'ln_fstpd30_flag', 'FSTPD30')
# gini_results['bad_rate'] = 'FSTPD30'
appscoreFSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{appscoreFSTPD30.shape}")
appscoreFSTPD30.columns.values

The shape of dataframe after copy is:	(104, 7)


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'apps_score_FSTPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [24]:
appscoreFSTPD30.head()

Unnamed: 0,start_date,end_date,apps_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.465098,Week,apps_score,1.1.0,FSTPD30
1,2023-06-01,2023-06-30,0.33742,Month,apps_score,1.1.0,FSTPD30
2,2023-06-05,2023-06-11,0.4133,Week,apps_score,1.1.0,FSTPD30
3,2023-06-12,2023-06-18,0.395676,Week,apps_score,1.1.0,FSTPD30
4,2023-06-19,2023-06-25,0.314497,Week,apps_score,1.1.0,FSTPD30


# Combining App Score

In [25]:
import functools

dataframes = [appscoreFPD10, appscoreFPD30, appscoreFSPD30, appscoreFSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'apps_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'apps_score_FPD30_gini',
       'apps_score_FSPD30_gini', 'apps_score_FSTPD30_gini'], dtype=object)

In [26]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','apps_score_FSTPD30_gini','apps_score_FSPD30_gini',
       'apps_score_FPD30_gini', 'apps_score_FPD10_gini']].copy()
final_df.dtypes

start_date                 datetime64[ns]
end_date                   datetime64[ns]
period                             object
Model_Name                         object
version                            object
bad_rate                           object
apps_score_FSTPD30_gini           float64
apps_score_FSPD30_gini            float64
apps_score_FPD30_gini             float64
apps_score_FPD10_gini             float64
dtype: object

## Creating app score table 

In [27]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_s_apps_score;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=45f11c76-c989-4914-9b6f-7550118ce211>

In [28]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('apps_score_FSTPD30_gini', 'FLOAT'),
    bigquery.SchemaField('apps_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('apps_score_FPD30_gini', 'FLOAT'),
    bigquery.SchemaField('apps_score_fpd10_gini', 'FLOAT')
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_s_apps_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=05ecffff-d28c-40ae-92c6-8bbae0b99f63>

# sb_demo_score

## FPD10

In [29]:
sq = f"""
with sb_demo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    beta_demo_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    beta_demo_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from sb_demo_score;
"""

df_sb_demo_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID 6ae793b4-45aa-4f4c-8056-ab7af5c416d5 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [30]:
df_sb_demo_scorefpd10.head()

Unnamed: 0,disbursementdate,digitalLoanAccountId,beta_demo_score,ln_fpd10_flag,ln_mature_fpd10_flag
0,2024-07-16 19:09:27,a731f941-ed8a-4278-9ba4-1fd41132aa5e,0.1960012463880533,0,1
1,2024-08-08 19:11:28,574ba16b-8e38-4e6d-8b75-2eb87b28c65c,0.111709086610038,0,1
2,2024-02-26 15:08:47,acebf8de-bd42-4b00-83bc-d7c0999a7488,0.0758802309114607,0,1
3,2023-09-22 15:00:18,a4bfaa18-0189-4e1e-99ac-e45b036ea021,0.1020244351792594,0,1
4,2024-05-02 17:24:11,9b118965-74ea-4832-a640-8b5ea2cc140e,0.088430295541893,0,1


In [31]:
gini_results = calculate_periodic_gini(df_sb_demo_scorefpd10, 'beta_demo_score', 'ln_fpd10_flag', 'FPD10')
sb_demo_scoreFPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{sb_demo_scoreFPD10.shape}")
sb_demo_scoreFPD10.columns.values

The shape of dataframe after copy is:	(116, 7)


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'beta_demo_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [32]:
sb_demo_scoreFPD10.tail()

Unnamed: 0,start_date,end_date,beta_demo_score_FPD10_gini,period,Model_Name,version,bad_rate
111,2025-02-17,2025-02-23,0.241132,Week,beta_demo_score,1.1.0,FPD10
112,2025-02-24,2025-03-02,0.25044,Week,beta_demo_score,1.1.0,FPD10
113,2025-03-01,2025-03-31,0.237316,Month,beta_demo_score,1.1.0,FPD10
114,2025-03-03,2025-03-09,0.258517,Week,beta_demo_score,1.1.0,FPD10
115,2025-03-10,2025-03-16,-0.384615,Week,beta_demo_score,1.1.0,FPD10


## FPD30

In [33]:
sq = f"""
with sb_demo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    beta_demo_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    beta_demo_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from sb_demo_score;
"""

df_sb_demo_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID 8105a488-320b-401b-9652-8e4e692dc864 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [34]:
gini_results = calculate_periodic_gini(df_sb_demo_scorefpd30, 'beta_demo_score', 'ln_fpd30_flag', 'FPD30')
sb_demo_scoreFPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{sb_demo_scoreFPD30.shape}")
sb_demo_scoreFPD30.columns.values

The shape of dataframe after copy is:	(115, 7)


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'beta_demo_score_FPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [35]:
sb_demo_scoreFPD30.head() 

Unnamed: 0,start_date,end_date,beta_demo_score_FPD30_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.289091,Week,beta_demo_score,1.1.0,FPD30
1,2023-06-01,2023-06-30,0.183001,Month,beta_demo_score,1.1.0,FPD30
2,2023-06-05,2023-06-11,0.207407,Week,beta_demo_score,1.1.0,FPD30
3,2023-06-12,2023-06-18,0.404959,Week,beta_demo_score,1.1.0,FPD30
4,2023-06-19,2023-06-25,-0.243641,Week,beta_demo_score,1.1.0,FPD30


## FSPD30

In [36]:
sq = f"""
with sb_demo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    beta_demo_score,
    ln_fspd30_flag,   -- fspd30
	ln_mature_fspd30_flag,	--- fspd30 observation
	FROM 
   {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    beta_demo_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from sb_demo_score;
"""

df_sb_demo_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID 52252e66-4cdc-4732-9cc1-1a471daef4ae successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [37]:
gini_results = calculate_periodic_gini(df_sb_demo_scorefspd30, 'beta_demo_score', 'ln_fspd30_flag', 'FSPD30')
sb_demo_scoreFSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{sb_demo_scoreFSPD30.shape}")
sb_demo_scoreFSPD30.columns.values

The shape of dataframe after copy is:	(109, 7)


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'beta_demo_score_FSPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [38]:
sb_demo_scoreFSPD30.head()

Unnamed: 0,start_date,end_date,beta_demo_score_FSPD30_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.087601,Week,beta_demo_score,1.1.0,FSPD30
1,2023-06-01,2023-06-30,0.212694,Month,beta_demo_score,1.1.0,FSPD30
2,2023-06-05,2023-06-11,0.239192,Week,beta_demo_score,1.1.0,FSPD30
3,2023-06-12,2023-06-18,0.365591,Week,beta_demo_score,1.1.0,FSPD30
4,2023-06-19,2023-06-25,0.077598,Week,beta_demo_score,1.1.0,FSPD30


## FSTPD30

In [39]:
sq = f"""
with sb_demo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    beta_demo_score ,
    ln_fstpd30_flag,   -- fstpd30
	ln_mature_fstpd30_flag,	--- fstpd30 observation
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    beta_demo_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from sb_demo_score;
"""

df_sb_demo_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID fef721e1-a948-4b53-9717-71794cf2863b successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [40]:
gini_results = calculate_periodic_gini(df_sb_demo_scorefstpd30, 'beta_demo_score', 'ln_fstpd30_flag', 'FSTPD30')
sb_demo_scoreFSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{sb_demo_scoreFSTPD30.shape}")
sb_demo_scoreFSTPD30.columns.values

The shape of dataframe after copy is:	(104, 7)


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'beta_demo_score_FSTPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [41]:
sb_demo_scoreFSTPD30.head()

Unnamed: 0,start_date,end_date,beta_demo_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.21024,Week,beta_demo_score,1.1.0,FSTPD30
1,2023-06-01,2023-06-30,0.192045,Month,beta_demo_score,1.1.0,FSTPD30
2,2023-06-05,2023-06-11,0.291754,Week,beta_demo_score,1.1.0,FSTPD30
3,2023-06-12,2023-06-18,0.163873,Week,beta_demo_score,1.1.0,FSTPD30
4,2023-06-19,2023-06-25,0.098133,Week,beta_demo_score,1.1.0,FSTPD30


# Combining sb demo score

In [42]:
import functools

dataframes = [sb_demo_scoreFPD10, sb_demo_scoreFPD30, sb_demo_scoreFSPD30, sb_demo_scoreFSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'beta_demo_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'beta_demo_score_FPD30_gini',
       'beta_demo_score_FSPD30_gini', 'beta_demo_score_FSTPD30_gini'],
      dtype=object)

In [43]:
final_df = final_df[['start_date', 'end_date', 'period',
       'Model_Name', 'version', 'bad_rate','beta_demo_score_FPD10_gini','beta_demo_score_FPD30_gini',
       'beta_demo_score_FSPD30_gini', 'beta_demo_score_FSTPD30_gini']].copy()

## creating sb demo score table 

In [44]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_sb_demo_score;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=d14007c8-c18a-4844-a1ee-6ab73ef43a53>

In [45]:


import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('beta_demo_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('beta_demo_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('beta_demo_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('beta_demo_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_sb_demo_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=cf8d67af-c787-4b26-93a6-21f032dbc39b>

# s_cic_score

## FPD10

In [46]:
sq = f"""
with s_cic_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    cic_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    cic_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from s_cic_score;
"""

df_s_cic_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID b6c72e8a-a751-46cb-9666-c083e8cc09df successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [47]:
df_s_cic_scorefpd10.head()

Unnamed: 0,disbursementdate,digitalLoanAccountId,cic_score,ln_fpd10_flag,ln_mature_fpd10_flag
0,2024-09-15 09:21:50,2f8156e6-5c5b-454c-92d9-628291e6f99e,0.122659908078722,0,1
1,2024-11-24 18:45:09,5be2f2c7-2401-48bc-a384-6f62c22866db,0.1382686682555339,0,1
2,2024-06-14 13:20:28,5f412bbb-9b17-4761-8023-1a08640ad81c,0.1347365952237831,0,1
3,2024-12-01 12:21:56,1eac1869-c3c5-4ed8-aa58-aeb90c730784,0.1092633117793074,0,1
4,2024-08-11 13:28:11,d41da9d2-9d65-4f4f-bfb6-398eaa9d10e1,0.2323093149610403,0,1


In [48]:
gini_results = calculate_periodic_gini(df_s_cic_scorefpd10, 'cic_score', 'ln_fpd10_flag', 'FPD10')
s_cic_scoreFPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{s_cic_scoreFPD10.shape}")
s_cic_scoreFPD10.columns.values

The shape of dataframe after copy is:	(115, 7)


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'cic_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [49]:
s_cic_scoreFPD10.tail()

Unnamed: 0,start_date,end_date,cic_score_FPD10_gini,period,Model_Name,version,bad_rate
110,2025-02-17,2025-02-23,0.231247,Week,cic_score,1.1.0,FPD10
111,2025-02-24,2025-03-02,0.186812,Week,cic_score,1.1.0,FPD10
112,2025-03-01,2025-03-31,0.218695,Month,cic_score,1.1.0,FPD10
113,2025-03-03,2025-03-09,0.223482,Week,cic_score,1.1.0,FPD10
114,2025-03-10,2025-03-16,-0.181818,Week,cic_score,1.1.0,FPD10


## FPD30

In [50]:
sq = f"""
with s_cic_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    cic_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
  {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    cic_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from s_cic_score;
"""

df_s_cic_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID c3f5143d-9cb2-40ab-a78c-bc44491dad68 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [51]:
df_s_cic_scorefpd30.head()

Unnamed: 0,disbursementdate,digitalLoanAccountId,cic_score,ln_fpd30_flag,ln_mature_fpd30_flag
0,2024-09-15 09:21:50,2f8156e6-5c5b-454c-92d9-628291e6f99e,0.122659908078722,0,1
1,2024-11-24 18:45:09,5be2f2c7-2401-48bc-a384-6f62c22866db,0.1382686682555339,0,1
2,2024-06-14 13:20:28,5f412bbb-9b17-4761-8023-1a08640ad81c,0.1347365952237831,0,1
3,2024-12-01 12:21:56,1eac1869-c3c5-4ed8-aa58-aeb90c730784,0.1092633117793074,0,1
4,2024-08-11 13:28:11,d41da9d2-9d65-4f4f-bfb6-398eaa9d10e1,0.2323093149610403,0,1


In [52]:
gini_results = calculate_periodic_gini(df_s_cic_scorefpd30, 'cic_score', 'ln_fpd30_flag', 'FPD30')
s_cic_scoreFPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{s_cic_scoreFPD30.shape}")
s_cic_scoreFPD30.columns.values

The shape of dataframe after copy is:	(114, 7)


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'cic_score_FPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [53]:
s_cic_scoreFPD30.head()

Unnamed: 0,start_date,end_date,cic_score_FPD30_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.261538,Week,cic_score,1.1.0,FPD30
1,2023-06-01,2023-06-30,0.266733,Month,cic_score,1.1.0,FPD30
2,2023-06-05,2023-06-11,0.19697,Week,cic_score,1.1.0,FPD30
3,2023-06-12,2023-06-18,-0.052326,Week,cic_score,1.1.0,FPD30
4,2023-06-19,2023-06-25,0.435484,Week,cic_score,1.1.0,FPD30


In [54]:
s_cic_scoreFPD30.describe()

Unnamed: 0,start_date,end_date,cic_score_FPD30_gini
count,114,114,114.0
mean,2024-04-17 14:06:18.947368448,2024-04-28 02:44:12.631578880,0.260761
min,2023-05-29 00:00:00,2023-06-04 00:00:00,-0.103571
25%,2023-11-07 18:00:00,2023-11-20 18:00:00,0.201259
50%,2024-04-18 12:00:00,2024-04-29 00:00:00,0.264002
75%,2024-09-28 06:00:00,2024-10-04 12:00:00,0.31726
max,2025-03-03 00:00:00,2025-03-31 00:00:00,0.74915
std,,,0.110855


## FSPD30

In [55]:
sq = f"""
with s_cic_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    cic_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    cic_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from s_cic_score;
"""

df_s_cic_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID c7e0d425-7366-4fa2-b3f2-ff0a3b89c8f3 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [56]:
gini_results = calculate_periodic_gini(df_s_cic_scorefspd30, 'cic_score', 'ln_fspd30_flag', 'FSPD30')
s_cic_scoreFSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{s_cic_scoreFSPD30.shape}")
s_cic_scoreFSPD30.columns.values

The shape of dataframe after copy is:	(108, 7)


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'cic_score_FSPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [57]:
s_cic_scoreFSPD30.describe()

Unnamed: 0,start_date,end_date,cic_score_FSPD30_gini
count,108,108,108.0
mean,2024-03-31 12:13:20,2024-04-11 01:33:20,0.261927
min,2023-05-29 00:00:00,2023-06-04 00:00:00,0.089886
25%,2023-10-31 12:00:00,2023-11-10 06:00:00,0.208582
50%,2024-04-01 00:00:00,2024-04-10 12:00:00,0.26866
75%,2024-09-01 06:00:00,2024-09-09 18:00:00,0.307823
max,2025-02-01 00:00:00,2025-02-28 00:00:00,0.545299
std,,,0.077158


In [58]:
s_cic_scoreFSPD30.tail()

Unnamed: 0,start_date,end_date,cic_score_FSPD30_gini,period,Model_Name,version,bad_rate
103,2025-01-06,2025-01-12,0.164699,Week,cic_score,1.1.0,FSPD30
104,2025-01-13,2025-01-19,0.171142,Week,cic_score,1.1.0,FSPD30
105,2025-01-20,2025-01-26,0.223633,Week,cic_score,1.1.0,FSPD30
106,2025-01-27,2025-02-02,0.227029,Week,cic_score,1.1.0,FSPD30
107,2025-02-01,2025-02-28,0.338045,Month,cic_score,1.1.0,FSPD30


## FSTPD30

In [59]:
sq = f"""
with s_cic_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    cic_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    cic_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from s_cic_score;
"""

df_s_cic_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID 951330cc-ab48-45cd-82b3-0456ba2fd0ff successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [60]:
gini_results = calculate_periodic_gini(df_s_cic_scorefstpd30, 'cic_score', 'ln_fstpd30_flag', 'FSTPD30')
s_cic_scoreFSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{s_cic_scoreFSTPD30.shape}")
s_cic_scoreFSTPD30.columns.values

The shape of dataframe after copy is:	(103, 7)


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'cic_score_FSTPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [61]:
s_cic_scoreFSTPD30.describe()

Unnamed: 0,start_date,end_date,cic_score_FSTPD30_gini
count,103,103,103.0
mean,2024-03-17 05:35:32.038834944,2024-03-27 19:20:23.300970752,0.250638
min,2023-05-29 00:00:00,2023-06-04 00:00:00,0.065741
25%,2023-10-26 12:00:00,2023-11-02 12:00:00,0.212329
50%,2024-03-18 00:00:00,2024-03-31 00:00:00,0.254645
75%,2024-08-08 12:00:00,2024-08-21 12:00:00,0.29292
max,2025-01-01 00:00:00,2025-01-31 00:00:00,0.479082
std,,,0.072338


In [62]:
s_cic_scoreFSTPD30.tail()

Unnamed: 0,start_date,end_date,cic_score_FSTPD30_gini,period,Model_Name,version,bad_rate
98,2024-12-09,2024-12-15,0.251476,Week,cic_score,1.1.0,FSTPD30
99,2024-12-16,2024-12-22,0.197458,Week,cic_score,1.1.0,FSTPD30
100,2024-12-23,2024-12-29,0.208664,Week,cic_score,1.1.0,FSTPD30
101,2024-12-30,2025-01-05,0.184186,Week,cic_score,1.1.0,FSTPD30
102,2025-01-01,2025-01-31,0.254645,Month,cic_score,1.1.0,FSTPD30


# Combining s_cic_score

In [63]:
import functools

dataframes = [s_cic_scoreFPD10, s_cic_scoreFPD30, s_cic_scoreFSPD30, s_cic_scoreFSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'cic_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'cic_score_FPD30_gini',
       'cic_score_FSPD30_gini', 'cic_score_FSTPD30_gini'], dtype=object)

In [64]:
final_df = final_df[['start_date', 'end_date', 'period',
       'Model_Name', 'version', 'bad_rate','cic_score_FPD10_gini','cic_score_FPD30_gini', 'cic_score_FSPD30_gini', 'cic_score_FSTPD30_gini']].copy()

## Creating the table

In [65]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_s_cic_score;"""

client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=a7506541-ecdb-4c74-9f72-6c3cda6ff192>

In [66]:


import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('cic_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('cic_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('cic_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('cic_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_s_cic_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=f797f361-1edb-4b33-b98e-8b5d540a09ce>

# sb_stack_score

## FPD10

In [67]:
# sb_stack_score

sq = f"""
with sb_stack_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    beta_stack_score beta_stack_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    beta_stack_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from sb_stack_score;
"""

df_sb_stack_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

df_sb_stack_scorefpd10.head()

Job ID b41ebb86-f0c4-4c18-ac81-baf07d79a8c0 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,beta_stack_score,ln_fpd10_flag,ln_mature_fpd10_flag
0,2024-07-16 19:09:27,a731f941-ed8a-4278-9ba4-1fd41132aa5e,0.1079624429034287,0,1
1,2024-08-08 19:11:28,574ba16b-8e38-4e6d-8b75-2eb87b28c65c,0.0737160749246524,0,1
2,2024-02-26 15:08:47,acebf8de-bd42-4b00-83bc-d7c0999a7488,0.0603778199617865,0,1
3,2023-09-22 15:00:18,a4bfaa18-0189-4e1e-99ac-e45b036ea021,0.1062656910526088,0,1
4,2024-05-02 17:24:11,9b118965-74ea-4832-a640-8b5ea2cc140e,0.0114227896554983,0,1


In [68]:
gini_results = calculate_periodic_gini(df_sb_stack_scorefpd10, 'beta_stack_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

The shape of dataframe after copy is:	(116, 7)


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'beta_stack_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [69]:
M1FPD10.describe()

Unnamed: 0,start_date,end_date,beta_stack_score_FPD10_gini
count,116,116,116.0
mean,2024-04-18 02:04:08.275862016,2024-04-28 12:49:39.310344704,0.339805
min,2023-05-29 00:00:00,2023-06-04 00:00:00,-0.030303
25%,2023-11-04 18:00:00,2023-11-17 06:00:00,0.295132
50%,2024-04-18 12:00:00,2024-04-29 00:00:00,0.324847
75%,2024-09-30 06:00:00,2024-10-07 18:00:00,0.41034
max,2025-03-10 00:00:00,2025-03-31 00:00:00,0.670897
std,,,0.09875


In [70]:
M1FPD10.head()

Unnamed: 0,start_date,end_date,beta_stack_score_FPD10_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.506255,Week,beta_stack_score,1.1.0,FPD10
1,2023-06-01,2023-06-30,0.345668,Month,beta_stack_score,1.1.0,FPD10
2,2023-06-05,2023-06-11,0.407799,Week,beta_stack_score,1.1.0,FPD10
3,2023-06-12,2023-06-18,0.670897,Week,beta_stack_score,1.1.0,FPD10
4,2023-06-19,2023-06-25,-0.030303,Week,beta_stack_score,1.1.0,FPD10


## FPD30

In [71]:
# sb_stack_score

sq = f"""
with sb_stack_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    beta_stack_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    beta_stack_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from sb_stack_score;
"""

df_sb_stack_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

df_sb_stack_scorefpd30.head()

Job ID 6198eee1-1609-48b9-a972-4adb40653624 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,beta_stack_score,ln_fpd30_flag,ln_mature_fpd30_flag
0,2024-07-16 19:09:27,a731f941-ed8a-4278-9ba4-1fd41132aa5e,0.1079624429034287,0,1
1,2024-08-08 19:11:28,574ba16b-8e38-4e6d-8b75-2eb87b28c65c,0.0737160749246524,0,1
2,2024-02-26 15:08:47,acebf8de-bd42-4b00-83bc-d7c0999a7488,0.0603778199617865,0,1
3,2023-09-22 15:00:18,a4bfaa18-0189-4e1e-99ac-e45b036ea021,0.1062656910526088,0,1
4,2024-05-02 17:24:11,9b118965-74ea-4832-a640-8b5ea2cc140e,0.0114227896554983,0,1


In [72]:
gini_results = calculate_periodic_gini(df_sb_stack_scorefpd30, 'beta_stack_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

The shape of dataframe after copy is:	(115, 7)


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'beta_stack_score_FPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [73]:
M2FPD30.describe()

Unnamed: 0,start_date,end_date,beta_stack_score_FPD30_gini
count,115,115,115.0
mean,2024-04-15 06:03:07.826086912,2024-04-25 17:44:20.869565184,0.359498
min,2023-05-29 00:00:00,2023-06-04 00:00:00,-0.036145
25%,2023-11-03 12:00:00,2023-11-15 12:00:00,0.311527
50%,2024-04-15 00:00:00,2024-04-28 00:00:00,0.346408
75%,2024-09-26 12:00:00,2024-10-03 00:00:00,0.416932
max,2025-03-03 00:00:00,2025-03-31 00:00:00,0.654729
std,,,0.094191


In [74]:
M2FPD30.tail()

Unnamed: 0,start_date,end_date,beta_stack_score_FPD30_gini,period,Model_Name,version,bad_rate
110,2025-02-10,2025-02-16,0.423046,Week,beta_stack_score,1.1.0,FPD30
111,2025-02-17,2025-02-23,0.334034,Week,beta_stack_score,1.1.0,FPD30
112,2025-02-24,2025-03-02,0.343103,Week,beta_stack_score,1.1.0,FPD30
113,2025-03-01,2025-03-31,0.279692,Month,beta_stack_score,1.1.0,FPD30
114,2025-03-03,2025-03-09,0.247036,Week,beta_stack_score,1.1.0,FPD30


## FSPD30

In [75]:
# sb_stack_score

sq = f"""
with sb_stack_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    beta_stack_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    beta_stack_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from sb_stack_score;
"""

df_sb_stack_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

df_sb_stack_scorefspd30.head()

Job ID 9d060a73-484d-42a0-8645-fc009ebb920a successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,beta_stack_score,ln_fspd30_flag,ln_mature_fspd30_flag
0,2024-07-21 14:55:36,a6162692-9ae1-46da-a98a-b35fdd0e573a,0.0351827717979695,0,1
1,2024-10-22 14:49:26,48497aca-9337-48f6-b015-12be0787333d,0.024071980523393,0,1
2,2024-09-15 09:21:50,2f8156e6-5c5b-454c-92d9-628291e6f99e,0.0753455691896986,0,1
3,2024-11-24 18:45:09,5be2f2c7-2401-48bc-a384-6f62c22866db,0.0976092748418532,0,1
4,2024-12-15 12:58:01,950353da-8475-4d52-964b-e334dcbd95dc,0.2325342908802298,0,1


In [76]:
gini_results = calculate_periodic_gini(df_sb_stack_scorefspd30, 'beta_stack_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
M3FSPD30.columns.values

The shape of dataframe after copy is:	(109, 7)


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'beta_stack_score_FSPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [77]:
M3FSPD30.describe()

Unnamed: 0,start_date,end_date,beta_stack_score_FSPD30_gini
count,109,109,109.0
mean,2024-03-29 04:50:38.532110080,2024-04-08 17:10:27.522935808,0.361575
min,2023-05-29 00:00:00,2023-06-04 00:00:00,0.179581
25%,2023-10-30 00:00:00,2023-11-05 00:00:00,0.311129
50%,2024-04-01 00:00:00,2024-04-07 00:00:00,0.343793
75%,2024-09-01 00:00:00,2024-09-08 00:00:00,0.419151
max,2025-02-01 00:00:00,2025-02-28 00:00:00,0.551886
std,,,0.076845


In [78]:
M3FSPD30.tail()

Unnamed: 0,start_date,end_date,beta_stack_score_FSPD30_gini,period,Model_Name,version,bad_rate
104,2025-01-06,2025-01-12,0.310515,Week,beta_stack_score,1.1.0,FSPD30
105,2025-01-13,2025-01-19,0.32665,Week,beta_stack_score,1.1.0,FSPD30
106,2025-01-20,2025-01-26,0.318566,Week,beta_stack_score,1.1.0,FSPD30
107,2025-01-27,2025-02-02,0.311153,Week,beta_stack_score,1.1.0,FSPD30
108,2025-02-01,2025-02-28,0.285892,Month,beta_stack_score,1.1.0,FSPD30


## FSTPD30

In [79]:
# sb_stack_score

sq = f"""
with sb_stack_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    beta_stack_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    beta_stack_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from sb_stack_score;
"""

df_sb_stack_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

df_sb_stack_scorefstpd30.head()

Job ID aaf92df6-ea32-402c-8937-c4c73627105f successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,beta_stack_score,ln_fstpd30_flag,ln_mature_fstpd30_flag
0,2024-07-16 19:09:27,a731f941-ed8a-4278-9ba4-1fd41132aa5e,0.1079624429034287,0,1
1,2024-08-08 19:11:28,574ba16b-8e38-4e6d-8b75-2eb87b28c65c,0.0737160749246524,1,1
2,2024-02-26 15:08:47,acebf8de-bd42-4b00-83bc-d7c0999a7488,0.0603778199617865,0,1
3,2023-09-22 15:00:18,a4bfaa18-0189-4e1e-99ac-e45b036ea021,0.1062656910526088,0,1
4,2024-05-02 17:24:11,9b118965-74ea-4832-a640-8b5ea2cc140e,0.0114227896554983,0,1


In [80]:
gini_results = calculate_periodic_gini(df_sb_stack_scorefstpd30, 'beta_stack_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
M4FSTPD30.columns.values

The shape of dataframe after copy is:	(104, 7)


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'beta_stack_score_FSTPD30_gini',
       'period', 'Model_Name', 'version', 'bad_rate'], dtype=object)

In [81]:
M4FSTPD30.describe()

Unnamed: 0,start_date,end_date,beta_stack_score_FSTPD30_gini
count,104,104,104.0
mean,2024-03-14 22:50:46.153846272,2024-03-25 11:32:18.461538560,0.338798
min,2023-05-29 00:00:00,2023-06-04 00:00:00,0.160057
25%,2023-10-21 06:00:00,2023-10-30 12:00:00,0.298363
50%,2024-03-14 12:00:00,2024-03-27 12:00:00,0.331005
75%,2024-08-06 18:00:00,2024-08-19 18:00:00,0.381961
max,2025-01-01 00:00:00,2025-01-31 00:00:00,0.500133
std,,,0.059793


In [82]:
M4FSTPD30.tail()

Unnamed: 0,start_date,end_date,beta_stack_score_FSTPD30_gini,period,Model_Name,version,bad_rate
99,2024-12-09,2024-12-15,0.319254,Week,beta_stack_score,1.1.0,FSTPD30
100,2024-12-16,2024-12-22,0.289116,Week,beta_stack_score,1.1.0,FSTPD30
101,2024-12-23,2024-12-29,0.309834,Week,beta_stack_score,1.1.0,FSTPD30
102,2024-12-30,2025-01-05,0.344271,Week,beta_stack_score,1.1.0,FSTPD30
103,2025-01-01,2025-01-31,0.31914,Month,beta_stack_score,1.1.0,FSTPD30


## Combining the dataframes

In [83]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'beta_stack_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'beta_stack_score_FPD30_gini',
       'beta_stack_score_FSPD30_gini', 'beta_stack_score_FSTPD30_gini'],
      dtype=object)

In [84]:
final_df = final_df[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'beta_stack_score_FPD10_gini', 'beta_stack_score_FPD30_gini',
                     'beta_stack_score_FSPD30_gini', 'beta_stack_score_FSTPD30_gini']].copy()

## Creating the table 

In [85]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_sb_stack_score;"""

client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=68b32ebe-121b-4246-a816-346364fbf1ff>

In [86]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('beta_stack_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('beta_stack_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('beta_stack_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('beta_stack_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_sb_stack_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=cb871d1a-f16b-4e2a-9a4e-33c01acb2cbe>

# sa_stack_score

## FPD10

In [87]:
# sa_stack_score

sq = f"""
with sa_stack_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    alpha_stack_score ,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    alpha_stack_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from sa_stack_score;
"""

sa_stack_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

sa_stack_scorefpd10.head()

Job ID 808f72e1-d8d5-420a-ae00-1847094f528b successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,alpha_stack_score,ln_fpd10_flag,ln_mature_fpd10_flag
0,2024-07-16 19:09:27,a731f941-ed8a-4278-9ba4-1fd41132aa5e,0.0699738064823699,0,1
1,2023-09-22 15:00:18,a4bfaa18-0189-4e1e-99ac-e45b036ea021,0.0390741384219851,0,1
2,2024-05-02 17:24:11,9b118965-74ea-4832-a640-8b5ea2cc140e,0.019327939422041,0,1
3,2025-01-25 19:05:49,e549776f-7a2d-4b6c-9edc-e3b59e8955f6,0.0614016441193192,0,1
4,2023-12-07 12:25:39,c464e02f-5b1a-460a-9686-9facb57c699f,0.0273728856440862,0,1


In [88]:
sa_stack_scorefpd10.head()

Unnamed: 0,disbursementdate,digitalLoanAccountId,alpha_stack_score,ln_fpd10_flag,ln_mature_fpd10_flag
0,2024-07-16 19:09:27,a731f941-ed8a-4278-9ba4-1fd41132aa5e,0.0699738064823699,0,1
1,2023-09-22 15:00:18,a4bfaa18-0189-4e1e-99ac-e45b036ea021,0.0390741384219851,0,1
2,2024-05-02 17:24:11,9b118965-74ea-4832-a640-8b5ea2cc140e,0.019327939422041,0,1
3,2025-01-25 19:05:49,e549776f-7a2d-4b6c-9edc-e3b59e8955f6,0.0614016441193192,0,1
4,2023-12-07 12:25:39,c464e02f-5b1a-460a-9686-9facb57c699f,0.0273728856440862,0,1


In [89]:
gini_results = calculate_periodic_gini(sa_stack_scorefpd10, 'alpha_stack_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.head()

The shape of dataframe after copy is:	(115, 7)


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


Unnamed: 0,start_date,end_date,alpha_stack_score_FPD10_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.579487,Week,alpha_stack_score,1.1.0,FPD10
1,2023-06-01,2023-06-30,0.363135,Month,alpha_stack_score,1.1.0,FPD10
2,2023-06-05,2023-06-11,0.632035,Week,alpha_stack_score,1.1.0,FPD10
3,2023-06-12,2023-06-18,0.764706,Week,alpha_stack_score,1.1.0,FPD10
4,2023-06-19,2023-06-25,0.103226,Week,alpha_stack_score,1.1.0,FPD10


In [90]:
M1FPD10.describe()

Unnamed: 0,start_date,end_date,alpha_stack_score_FPD10_gini
count,115,115,115.0
mean,2024-04-20 10:13:33.913043456,2024-04-30 21:54:46.956521728,0.381108
min,2023-05-29 00:00:00,2023-06-04 00:00:00,-0.181818
25%,2023-11-09 12:00:00,2023-11-22 12:00:00,0.336383
50%,2024-04-22 00:00:00,2024-04-30 00:00:00,0.374921
75%,2024-09-30 12:00:00,2024-10-09 12:00:00,0.451053
max,2025-03-10 00:00:00,2025-03-31 00:00:00,0.764706
std,,,0.126634


In [91]:
M1FPD10.head()

Unnamed: 0,start_date,end_date,alpha_stack_score_FPD10_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.579487,Week,alpha_stack_score,1.1.0,FPD10
1,2023-06-01,2023-06-30,0.363135,Month,alpha_stack_score,1.1.0,FPD10
2,2023-06-05,2023-06-11,0.632035,Week,alpha_stack_score,1.1.0,FPD10
3,2023-06-12,2023-06-18,0.764706,Week,alpha_stack_score,1.1.0,FPD10
4,2023-06-19,2023-06-25,0.103226,Week,alpha_stack_score,1.1.0,FPD10


## FPD30

In [92]:
# sa_stack_score

sq = f"""
with sa_stack_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    alpha_stack_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    alpha_stack_score  is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from sa_stack_score;
"""

sa_stack_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

sa_stack_scorefpd30.head()

Job ID 15dc0061-1389-4714-8a9f-2a37f6f99696 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,alpha_stack_score,ln_fpd30_flag,ln_mature_fpd30_flag
0,2024-09-15 09:21:50,2f8156e6-5c5b-454c-92d9-628291e6f99e,0.0459328399438693,0,1
1,2024-11-24 18:45:09,5be2f2c7-2401-48bc-a384-6f62c22866db,0.0771153012056294,0,1
2,2024-06-14 13:20:28,5f412bbb-9b17-4761-8023-1a08640ad81c,0.0886808251109808,0,1
3,2024-12-01 12:21:56,1eac1869-c3c5-4ed8-aa58-aeb90c730784,0.026754988001112,0,1
4,2024-08-11 13:28:11,d41da9d2-9d65-4f4f-bfb6-398eaa9d10e1,0.1159329009610408,0,1


In [93]:
sa_stack_scorefpd30.head()

Unnamed: 0,disbursementdate,digitalLoanAccountId,alpha_stack_score,ln_fpd30_flag,ln_mature_fpd30_flag
0,2024-09-15 09:21:50,2f8156e6-5c5b-454c-92d9-628291e6f99e,0.0459328399438693,0,1
1,2024-11-24 18:45:09,5be2f2c7-2401-48bc-a384-6f62c22866db,0.0771153012056294,0,1
2,2024-06-14 13:20:28,5f412bbb-9b17-4761-8023-1a08640ad81c,0.0886808251109808,0,1
3,2024-12-01 12:21:56,1eac1869-c3c5-4ed8-aa58-aeb90c730784,0.026754988001112,0,1
4,2024-08-11 13:28:11,d41da9d2-9d65-4f4f-bfb6-398eaa9d10e1,0.1159329009610408,0,1


In [94]:
gini_results = calculate_periodic_gini(sa_stack_scorefpd30, 'alpha_stack_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

The shape of dataframe after copy is:	(114, 7)


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'alpha_stack_score_FPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [95]:
M2FPD30.describe()

Unnamed: 0,start_date,end_date,alpha_stack_score_FPD30_gini
count,114,114,114.0
mean,2024-04-17 14:06:18.947368448,2024-04-28 02:44:12.631578880,0.409705
min,2023-05-29 00:00:00,2023-06-04 00:00:00,-0.262626
25%,2023-11-07 18:00:00,2023-11-20 18:00:00,0.359126
50%,2024-04-18 12:00:00,2024-04-29 00:00:00,0.400573
75%,2024-09-28 06:00:00,2024-10-04 12:00:00,0.478903
max,2025-03-03 00:00:00,2025-03-31 00:00:00,0.784014
std,,,0.129604


In [96]:
M2FPD30.head()

Unnamed: 0,start_date,end_date,alpha_stack_score_FPD30_gini,period,Model_Name,version,bad_rate
0,2023-05-29,2023-06-04,0.579487,Week,alpha_stack_score,1.1.0,FPD30
1,2023-06-01,2023-06-30,0.358308,Month,alpha_stack_score,1.1.0,FPD30
2,2023-06-05,2023-06-11,0.632035,Week,alpha_stack_score,1.1.0,FPD30
3,2023-06-12,2023-06-18,0.732558,Week,alpha_stack_score,1.1.0,FPD30
4,2023-06-19,2023-06-25,0.103226,Week,alpha_stack_score,1.1.0,FPD30


## FSTPD30

In [97]:
# sa_stack_score

sq = f"""
with sa_stack_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    alpha_stack_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    alpha_stack_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from sa_stack_score;
"""

sa_stack_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

sa_stack_scorefspd30.head()

Job ID a848f065-c14b-4ed8-86f9-a184de8631c1 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,alpha_stack_score,ln_fspd30_flag,ln_mature_fspd30_flag
0,2024-09-15 09:21:50,2f8156e6-5c5b-454c-92d9-628291e6f99e,0.0459328399438693,0,1
1,2024-11-24 18:45:09,5be2f2c7-2401-48bc-a384-6f62c22866db,0.0771153012056294,0,1
2,2024-06-14 13:20:28,5f412bbb-9b17-4761-8023-1a08640ad81c,0.0886808251109808,0,1
3,2024-12-01 12:21:56,1eac1869-c3c5-4ed8-aa58-aeb90c730784,0.026754988001112,0,1
4,2024-08-11 13:28:11,d41da9d2-9d65-4f4f-bfb6-398eaa9d10e1,0.1159329009610408,0,1


In [98]:
gini_results = calculate_periodic_gini(sa_stack_scorefspd30, 'alpha_stack_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
M3FSPD30.columns.values

The shape of dataframe after copy is:	(108, 7)


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'alpha_stack_score_FSPD30_gini',
       'period', 'Model_Name', 'version', 'bad_rate'], dtype=object)

In [99]:
M3FSPD30.tail()

Unnamed: 0,start_date,end_date,alpha_stack_score_FSPD30_gini,period,Model_Name,version,bad_rate
103,2025-01-06,2025-01-12,0.281455,Week,alpha_stack_score,1.1.0,FSPD30
104,2025-01-13,2025-01-19,0.332836,Week,alpha_stack_score,1.1.0,FSPD30
105,2025-01-20,2025-01-26,0.349004,Week,alpha_stack_score,1.1.0,FSPD30
106,2025-01-27,2025-02-02,0.394794,Week,alpha_stack_score,1.1.0,FSPD30
107,2025-02-01,2025-02-28,0.477943,Month,alpha_stack_score,1.1.0,FSPD30


## FSTPD30

In [100]:
# sa_stack_score

sq = f"""
with sa_stack_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    alpha_stack_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-06-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    alpha_stack_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from sa_stack_score;
"""

sa_stack_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

sa_stack_scorefstpd30.head()

Job ID 0884e5c9-b97f-49f4-8581-f9a5b05e5172 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,alpha_stack_score,ln_fstpd30_flag,ln_mature_fstpd30_flag
0,2024-09-15 09:21:50,2f8156e6-5c5b-454c-92d9-628291e6f99e,0.0459328399438693,0,1
1,2024-11-24 18:45:09,5be2f2c7-2401-48bc-a384-6f62c22866db,0.0771153012056294,0,1
2,2024-06-14 13:20:28,5f412bbb-9b17-4761-8023-1a08640ad81c,0.0886808251109808,0,1
3,2024-12-01 12:21:56,1eac1869-c3c5-4ed8-aa58-aeb90c730784,0.026754988001112,0,1
4,2024-08-11 13:28:11,d41da9d2-9d65-4f4f-bfb6-398eaa9d10e1,0.1159329009610408,0,1


In [101]:
sa_stack_scorefstpd30.tail()

Unnamed: 0,disbursementdate,digitalLoanAccountId,alpha_stack_score,ln_fstpd30_flag,ln_mature_fstpd30_flag
109267,2024-12-15 18:07:43,bf74ac7f-b3e6-4cf5-bddc-4d5efbc55cb0,0.1932840031979862,1,1
109268,2024-09-07 15:22:21,d8037d32-0e24-4bcf-8052-84ff91070002,0.1020097878318281,1,1
109269,2024-11-20 18:35:51,7bf3cc80-e339-4a69-b862-b98634a5c9d5,0.1194238265430105,1,1
109270,2024-09-24 17:14:50,4d135fe9-82fe-4a3e-808e-83af626c54ef,0.0554706222763464,1,1
109271,2024-12-21 11:19:15,1c20727c-0156-4057-8b60-5bd9e280e18f,0.0486991592702407,1,1


In [102]:
gini_results = calculate_periodic_gini(sa_stack_scorefstpd30, 'alpha_stack_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
M4FSTPD30.columns.values

The shape of dataframe after copy is:	(103, 7)


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'alpha_stack_score_FSTPD30_gini',
       'period', 'Model_Name', 'version', 'bad_rate'], dtype=object)

## Combining the dataframes

In [103]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'alpha_stack_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate',
       'alpha_stack_score_FPD30_gini', 'alpha_stack_score_FSPD30_gini',
       'alpha_stack_score_FSTPD30_gini'], dtype=object)

In [104]:
final_df = final_df[['start_date', 'end_date', 'period', 'Model_Name', 'version', 'bad_rate', 'alpha_stack_score_FPD10_gini', 'alpha_stack_score_FPD30_gini',  'alpha_stack_score_FSPD30_gini', 'alpha_stack_score_FSTPD30_gini']].copy()

## Creating the table 

In [105]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_sa_stack_score;"""

client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=7c549ac2-61fe-4bbe-a313-39222506eef1>

In [106]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('alpha_stack_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('alpha_stack_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('alpha_stack_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('alpha_stack_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_sa_stack_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=87e50f9d-0923-4446-a27a-a9c572a9ef1f>

# gen_credo_score

## FPD10

In [107]:


sq = f"""
with gen_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_gen_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
     {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    credo_gen_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from gen_credo_score;
"""

gen_credo_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

gen_credo_scorefpd10.head()

Job ID 6e310586-9cab-495a-8a35-76db22bca631 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,credo_gen_score,ln_fpd10_flag,ln_mature_fpd10_flag
0,2024-07-21 14:55:36,a6162692-9ae1-46da-a98a-b35fdd0e573a,0.0468581067084864,0,1
1,2024-10-22 14:49:26,48497aca-9337-48f6-b015-12be0787333d,0.113452492288366,0,1
2,2024-09-15 09:21:50,2f8156e6-5c5b-454c-92d9-628291e6f99e,0.134107791698632,0,1
3,2024-11-24 18:45:09,5be2f2c7-2401-48bc-a384-6f62c22866db,0.117975604455025,0,1
4,2024-12-15 12:58:01,950353da-8475-4d52-964b-e334dcbd95dc,0.190540321426483,0,1


In [108]:
gini_results = calculate_periodic_gini(gen_credo_scorefpd10, 'credo_gen_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

The shape of dataframe after copy is:	(142, 7)


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'credo_gen_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [109]:
M1FPD10.head()

Unnamed: 0,start_date,end_date,credo_gen_score_FPD10_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.246725,Month,credo_gen_score,1.1.0,FPD10
1,2023-01-02,2023-01-08,0.301125,Week,credo_gen_score,1.1.0,FPD10
2,2023-01-09,2023-01-15,0.2734,Week,credo_gen_score,1.1.0,FPD10
3,2023-01-16,2023-01-22,0.667532,Week,credo_gen_score,1.1.0,FPD10
4,2023-01-23,2023-01-29,0.086505,Week,credo_gen_score,1.1.0,FPD10


## FPD30

In [110]:
sq = f"""
with gen_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_gen_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    credo_gen_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from gen_credo_score;
"""

gen_credo_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

gen_credo_scorefpd30.head()

Job ID 6a752c69-db5f-48bf-99e0-cf8a3f877a60 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


Unnamed: 0,disbursementdate,digitalLoanAccountId,credo_gen_score,ln_fpd30_flag,ln_mature_fpd30_flag
0,2024-07-16 19:09:27,a731f941-ed8a-4278-9ba4-1fd41132aa5e,0.185555630270027,0,1
1,2024-08-08 19:11:28,574ba16b-8e38-4e6d-8b75-2eb87b28c65c,0.0824577243349081,0,1
2,2024-02-26 15:08:47,acebf8de-bd42-4b00-83bc-d7c0999a7488,0.110976951075583,0,1
3,2023-09-22 15:00:18,a4bfaa18-0189-4e1e-99ac-e45b036ea021,0.117558372458722,0,1
4,2024-05-02 17:24:11,9b118965-74ea-4832-a640-8b5ea2cc140e,0.0975612449375841,0,1


In [111]:
gen_credo_scorefpd30.head()

Unnamed: 0,disbursementdate,digitalLoanAccountId,credo_gen_score,ln_fpd30_flag,ln_mature_fpd30_flag
0,2024-07-16 19:09:27,a731f941-ed8a-4278-9ba4-1fd41132aa5e,0.185555630270027,0,1
1,2024-08-08 19:11:28,574ba16b-8e38-4e6d-8b75-2eb87b28c65c,0.0824577243349081,0,1
2,2024-02-26 15:08:47,acebf8de-bd42-4b00-83bc-d7c0999a7488,0.110976951075583,0,1
3,2023-09-22 15:00:18,a4bfaa18-0189-4e1e-99ac-e45b036ea021,0.117558372458722,0,1
4,2024-05-02 17:24:11,9b118965-74ea-4832-a640-8b5ea2cc140e,0.0975612449375841,0,1


In [112]:
gini_results = calculate_periodic_gini(gen_credo_scorefpd30, 'credo_gen_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

The shape of dataframe after copy is:	(141, 7)


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


array(['start_date', 'end_date', 'credo_gen_score_FPD30_gini', 'period',
       'Model_Name', 'version', 'bad_rate'], dtype=object)

In [113]:
M2FPD30.head()

Unnamed: 0,start_date,end_date,credo_gen_score_FPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.253005,Month,credo_gen_score,1.1.0,FPD30
1,2023-01-02,2023-01-08,0.342149,Week,credo_gen_score,1.1.0,FPD30
2,2023-01-09,2023-01-15,0.407163,Week,credo_gen_score,1.1.0,FPD30
3,2023-01-16,2023-01-22,0.667532,Week,credo_gen_score,1.1.0,FPD30
4,2023-01-23,2023-01-29,0.025547,Week,credo_gen_score,1.1.0,FPD30


## FSPD30

In [114]:
sq = f"""
with gen_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_gen_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    credo_gen_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from gen_credo_score;
"""

gen_credo_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(gen_credo_scorefspd30.head(2))

gini_results = calculate_periodic_gini(gen_credo_scorefspd30, 'credo_gen_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

Job ID 82139a2b-43ea-4893-8aff-4249e26e946c successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  \
0 2024-07-21 14:55:36  a6162692-9ae1-46da-a98a-b35fdd0e573a   
1 2024-10-22 14:49:26  48497aca-9337-48f6-b015-12be0787333d   

      credo_gen_score  ln_fspd30_flag  ln_mature_fspd30_flag  
0  0.0468581067084864               0                      1  
1   0.113452492288366               0                      1  
The shape of dataframe after copy is:	(135, 7)
['start_date' 'end_date' 'credo_gen_score_FSPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


Unnamed: 0,start_date,end_date,credo_gen_score_FSPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.172055,Month,credo_gen_score,1.1.0,FSPD30
1,2023-01-02,2023-01-08,0.262803,Week,credo_gen_score,1.1.0,FSPD30
2,2023-01-09,2023-01-15,0.189744,Week,credo_gen_score,1.1.0,FSPD30
3,2023-01-16,2023-01-22,0.410738,Week,credo_gen_score,1.1.0,FSPD30
4,2023-01-23,2023-01-29,0.073579,Week,credo_gen_score,1.1.0,FSPD30


## FSTPD30

In [115]:
sq = f"""
with gen_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_gen_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    credo_gen_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from gen_credo_score;
"""

gen_credo_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(gen_credo_scorefstpd30.head(2))

gini_results = calculate_periodic_gini(gen_credo_scorefstpd30, 'credo_gen_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()

Job ID fe959047-a4cc-46ac-b81b-0fc1919eea14 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  \
0 2024-07-21 14:55:36  a6162692-9ae1-46da-a98a-b35fdd0e573a   
1 2024-10-22 14:49:26  48497aca-9337-48f6-b015-12be0787333d   

      credo_gen_score  ln_fstpd30_flag  ln_mature_fstpd30_flag  
0  0.0468581067084864                0                       1  
1   0.113452492288366                0                       1  
The shape of dataframe after copy is:	(130, 7)
['start_date' 'end_date' 'credo_gen_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


Unnamed: 0,start_date,end_date,credo_gen_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.180837,Month,credo_gen_score,1.1.0,FSTPD30
1,2023-01-02,2023-01-08,0.282959,Week,credo_gen_score,1.1.0,FSTPD30
2,2023-01-09,2023-01-15,0.189908,Week,credo_gen_score,1.1.0,FSTPD30
3,2023-01-16,2023-01-22,0.19884,Week,credo_gen_score,1.1.0,FSTPD30
4,2023-01-23,2023-01-29,0.162845,Week,credo_gen_score,1.1.0,FSTPD30


## Combining tables 

In [116]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'credo_gen_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'credo_gen_score_FPD30_gini',
       'credo_gen_score_FSPD30_gini', 'credo_gen_score_FSTPD30_gini'],
      dtype=object)

In [117]:
final_df = final_df[['start_date', 'end_date','period',
       'Model_Name', 'version', 'bad_rate', 'credo_gen_score_FPD10_gini','credo_gen_score_FPD30_gini',   'credo_gen_score_FSPD30_gini', 'credo_gen_score_FSTPD30_gini']].copy()

In [118]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_gen_credo_score;"""

client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=c65e3c09-e0ec-4f47-adbb-9495a8c44d66>

In [119]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('credo_gen_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('credo_gen_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('credo_gen_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('credo_gen_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_gen_credo_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=237549b8-a005-4052-addc-73ed1d9a1207>

# c_credo_score

In [120]:
sq = f"""
with c_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_quick_score credo_cash_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    credo_quick_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from c_credo_score;
"""

c_credo_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

c_credo_scorefpd10.head()

gini_results = calculate_periodic_gini(c_credo_scorefpd10, 'credo_cash_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

# FPD30

sq = f"""
with c_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_quick_score credo_cash_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    credo_quick_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from c_credo_score;
"""

c_credo_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

c_credo_scorefpd30.head()

gini_results = calculate_periodic_gini(c_credo_scorefpd30, 'credo_cash_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

sq = f"""
with c_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_quick_score credo_cash_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
   {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    credo_quick_score  is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from c_credo_score;
"""

c_credo_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(c_credo_scorefspd30.head(2))

gini_results = calculate_periodic_gini(c_credo_scorefspd30, 'credo_cash_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

sq = f"""
with c_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_quick_score credo_cash_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    credo_quick_score  is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from c_credo_score;
"""

c_credo_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(c_credo_scorefstpd30.head(2))

gini_results = calculate_periodic_gini(c_credo_scorefstpd30, 'credo_cash_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()



Job ID 3a9bda04-3cbb-4236-b67a-7dc2ced839d8 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(139, 7)
Job ID 54a6e574-3ff7-4028-ae4c-b1eb8f2d2137 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(139, 7)
Job ID e420ce69-9aae-48ec-a8e6-fa2957ccdb71 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  \
0 2024-07-21 14:55:36  a6162692-9ae1-46da-a98a-b35fdd0e573a   
1 2024-10-22 14:49:26  48497aca-9337-48f6-b015-12be0787333d   

    credo_cash_score  ln_fspd30_flag  ln_mature_fspd30_flag  
0  0.213719344482408               0                      1  
1  0.199193814318321               0                      1  


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(135, 7)
['start_date' 'end_date' 'credo_cash_score_FSPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']
Job ID 23c4d9bd-f1da-40f8-b031-763e38364cd7 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  \
0 2024-07-16 19:09:27  a731f941-ed8a-4278-9ba4-1fd41132aa5e   
1 2024-08-08 19:11:28  574ba16b-8e38-4e6d-8b75-2eb87b28c65c   

    credo_cash_score  ln_fstpd30_flag  ln_mature_fstpd30_flag  
0  0.375585243842437                0                       1  
1  0.224559523467528                1                       1  
The shape of dataframe after copy is:	(130, 7)
['start_date' 'end_date' 'credo_cash_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


Unnamed: 0,start_date,end_date,credo_cash_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.035948,Month,credo_cash_score,1.1.0,FSTPD30
1,2023-01-02,2023-01-08,0.036009,Week,credo_cash_score,1.1.0,FSTPD30
2,2023-01-09,2023-01-15,0.062143,Week,credo_cash_score,1.1.0,FSTPD30
3,2023-01-16,2023-01-22,0.275891,Week,credo_cash_score,1.1.0,FSTPD30
4,2023-01-23,2023-01-29,-0.050558,Week,credo_cash_score,1.1.0,FSTPD30


In [121]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'credo_cash_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'credo_cash_score_FPD30_gini',
       'credo_cash_score_FSPD30_gini', 'credo_cash_score_FSTPD30_gini'],
      dtype=object)

In [122]:
final_df = final_df[['start_date', 'end_date','period',    'Model_Name', 'version', 'bad_rate',
                     'credo_cash_score_FPD10_gini', 
                     'credo_cash_score_FPD30_gini', 
                     'credo_cash_score_FSPD30_gini',
                     'credo_cash_score_FSTPD30_gini']].copy()

In [123]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_c_credo_score;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=edb656e6-0bf9-4a17-873e-a503746e92be>

In [124]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('credo_cash_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('credo_cash_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('credo_cash_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('credo_cash_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_c_credo_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=83be2245-baad-45d9-904c-3c2fbad86a49>

# s_credo_score

In [125]:


sq = f"""
with s_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_sil_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    credo_sil_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from s_credo_score;
"""

s_credo_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

s_credo_scorefpd10.head()

gini_results = calculate_periodic_gini(s_credo_scorefpd10, 'credo_sil_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

# FPD30

sq = f"""
with s_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_sil_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
   {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    credo_sil_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from s_credo_score;
"""

s_credo_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

s_credo_scorefpd30.head()

gini_results = calculate_periodic_gini(s_credo_scorefpd30, 'credo_sil_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

sq = f"""
with s_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_sil_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    credo_sil_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from s_credo_score;
"""

s_credo_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(s_credo_scorefspd30.head(2))

gini_results = calculate_periodic_gini(s_credo_scorefspd30, 'credo_sil_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

sq = f"""
with s_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_sil_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    credo_sil_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from s_credo_score;
"""

s_credo_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(s_credo_scorefstpd30.head(2))

gini_results = calculate_periodic_gini(s_credo_scorefstpd30, 'credo_sil_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()



Job ID ad80beb1-a7f9-49ca-ac39-fcfd8c760394 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(139, 7)
Job ID 0963248d-e064-4095-abbe-935ed1e2892a successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(139, 7)
Job ID 05b65897-964c-4539-8c8e-1a052ba13691 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  \
0 2024-07-21 14:55:36  a6162692-9ae1-46da-a98a-b35fdd0e573a   
1 2024-10-22 14:49:26  48497aca-9337-48f6-b015-12be0787333d   

      credo_sil_score  ln_fspd30_flag  ln_mature_fspd30_flag  
0  0.0783256742367736               0                      1  
1  0.0924230466600994               0                      1  


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(135, 7)
['start_date' 'end_date' 'credo_sil_score_FSPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']
Job ID ad11b7d3-8b75-4ab8-8e66-e73056cf0835 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  \
0 2024-07-21 14:55:36  a6162692-9ae1-46da-a98a-b35fdd0e573a   
1 2024-10-22 14:49:26  48497aca-9337-48f6-b015-12be0787333d   

      credo_sil_score  ln_fstpd30_flag  ln_mature_fstpd30_flag  
0  0.0783256742367736                0                       1  
1  0.0924230466600994                0                       1  
The shape of dataframe after copy is:	(130, 7)
['start_date' 'end_date' 'credo_sil_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


Unnamed: 0,start_date,end_date,credo_sil_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.219086,Month,credo_sil_score,1.1.0,FSTPD30
1,2023-01-02,2023-01-08,0.261708,Week,credo_sil_score,1.1.0,FSTPD30
2,2023-01-09,2023-01-15,0.251056,Week,credo_sil_score,1.1.0,FSTPD30
3,2023-01-16,2023-01-22,0.275062,Week,credo_sil_score,1.1.0,FSTPD30
4,2023-01-23,2023-01-29,0.157554,Week,credo_sil_score,1.1.0,FSTPD30


## Combining data

In [126]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'credo_sil_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'credo_sil_score_FPD30_gini',
       'credo_sil_score_FSPD30_gini', 'credo_sil_score_FSTPD30_gini'],
      dtype=object)

In [127]:
final_df = final_df[['start_date', 'end_date','period',    'Model_Name', 'version', 'bad_rate',
                     'credo_sil_score_FPD10_gini', 
                     'credo_sil_score_FPD30_gini',  
                     'credo_sil_score_FSPD30_gini',
                     'credo_sil_score_FSTPD30_gini']].copy()

In [128]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_s_credo_score;"""

client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=5cfbfd77-3f7d-4bcb-8cdf-24e0fa51882a>

In [129]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('credo_sil_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('credo_sil_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('credo_sil_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('credo_sil_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_s_credo_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=27463085-b968-4b3c-b6e3-2b93d4b49bb2>

In [130]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,credo_sil_score_FPD10_gini,credo_sil_score_FPD30_gini,credo_sil_score_FSPD30_gini,credo_sil_score_FSTPD30_gini
0,2023-01-01,2023-01-31,Month,credo_sil_score,1.1.0,FPD10,0.230181,,,
1,2023-01-01,2023-01-31,Month,credo_sil_score,1.1.0,FPD30,,0.25398,,
2,2023-01-01,2023-01-31,Month,credo_sil_score,1.1.0,FSPD30,,,0.19884,
3,2023-01-01,2023-01-31,Month,credo_sil_score,1.1.0,FSTPD30,,,,0.219086
4,2023-01-02,2023-01-08,Week,credo_sil_score,1.1.0,FPD10,0.248978,,,


# fu_credo_score

In [131]:


sq = f"""
with fu_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_flex_score credo_flexup_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    credo_flex_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from fu_credo_score;
"""

fu_credo_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

fu_credo_scorefpd10.head()

gini_results = calculate_periodic_gini(fu_credo_scorefpd10, 'credo_flexup_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

# FPD30

sq = f"""
with fu_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_flex_score credo_flexup_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    credo_flex_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from fu_credo_score;
"""

fu_credo_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

fu_credo_scorefpd30.head()

gini_results = calculate_periodic_gini(fu_credo_scorefpd30, 'credo_flexup_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

sq = f"""
with fu_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_flex_score credo_flexup_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    credo_flex_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from fu_credo_score;
"""

fu_credo_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(fu_credo_scorefspd30.head(2))

gini_results = calculate_periodic_gini(fu_credo_scorefspd30, 'credo_flexup_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

sq = f"""
with fu_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_flex_score credo_flexup_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    credo_flex_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from fu_credo_score;
"""

fu_credo_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(fu_credo_scorefstpd30.head(2))

gini_results = calculate_periodic_gini(fu_credo_scorefstpd30, 'credo_flexup_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()



Job ID 6ffe8d02-7a5c-4254-9a5d-e8ca20dc5ad7 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(139, 7)
Job ID 42930752-8a0d-404c-a027-7740cf1b595b successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(139, 7)
Job ID ceede233-da1d-4534-8569-189396fd4630 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  \
0 2024-07-16 19:09:27  a731f941-ed8a-4278-9ba4-1fd41132aa5e   
1 2024-08-08 19:11:28  574ba16b-8e38-4e6d-8b75-2eb87b28c65c   

   credo_flexup_score  ln_fspd30_flag  ln_mature_fspd30_flag  
0  0.0571600303978391               0                      1  
1  0.0861410593850208               0                      1  


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(135, 7)
['start_date' 'end_date' 'credo_flexup_score_FSPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']
Job ID b6d036e4-e283-4c35-beaf-0e30262024c0 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  \
0 2024-07-21 14:55:36  a6162692-9ae1-46da-a98a-b35fdd0e573a   
1 2024-10-22 14:49:26  48497aca-9337-48f6-b015-12be0787333d   

   credo_flexup_score  ln_fstpd30_flag  ln_mature_fstpd30_flag  
0  0.0949243166819351                0                       1  
1   0.093346578126713                0                       1  
The shape of dataframe after copy is:	(130, 7)
['start_date' 'end_date' 'credo_flexup_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


Unnamed: 0,start_date,end_date,credo_flexup_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.07286,Month,credo_flexup_score,1.1.0,FSTPD30
1,2023-01-02,2023-01-08,0.002165,Week,credo_flexup_score,1.1.0,FSTPD30
2,2023-01-09,2023-01-15,0.056674,Week,credo_flexup_score,1.1.0,FSTPD30
3,2023-01-16,2023-01-22,0.326429,Week,credo_flexup_score,1.1.0,FSTPD30
4,2023-01-23,2023-01-29,0.109935,Week,credo_flexup_score,1.1.0,FSTPD30


## Combining data

In [132]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'credo_flexup_score_FPD10_gini',
       'period', 'Model_Name', 'version', 'bad_rate',
       'credo_flexup_score_FPD30_gini', 'credo_flexup_score_FSPD30_gini',
       'credo_flexup_score_FSTPD30_gini'], dtype=object)

In [133]:
final_df = final_df[['start_date', 'end_date','period',    'Model_Name', 'version', 'bad_rate',
                     'credo_flexup_score_FPD10_gini', 'credo_flexup_score_FPD30_gini',    'credo_flexup_score_FSPD30_gini', 'credo_flexup_score_FSTPD30_gini']].copy()

In [134]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_fu_credo_score"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=649862d8-d814-47ff-b252-f0ac5e2fa065>

In [135]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('credo_flexup_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('credo_flexup_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('v_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('credo_flexup_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_fu_credo_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=de0ae7a5-73cf-491b-afc5-e7bd5cb63554>

In [136]:
final_df.head()

Unnamed: 0,start_date,end_date,period,Model_Name,version,bad_rate,credo_flexup_score_FPD10_gini,credo_flexup_score_FPD30_gini,credo_flexup_score_FSPD30_gini,credo_flexup_score_FSTPD30_gini
0,2023-01-01,2023-01-31,Month,credo_flexup_score,1.1.0,FPD10,0.023608,,,
1,2023-01-01,2023-01-31,Month,credo_flexup_score,1.1.0,FPD30,,0.006985,,
2,2023-01-01,2023-01-31,Month,credo_flexup_score,1.1.0,FSPD30,,,0.052302,
3,2023-01-01,2023-01-31,Month,credo_flexup_score,1.1.0,FSTPD30,,,,0.07286
4,2023-01-02,2023-01-08,Week,credo_flexup_score,1.1.0,FPD10,-0.032464,,,


# r_credo_score

In [137]:


sq = f"""
with r_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_reloan_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    credo_reloan_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from r_credo_score;
"""

r_credo_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

r_credo_scorefpd10.head()

gini_results = calculate_periodic_gini(r_credo_scorefpd10, 'credo_reloan_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
M1FPD10.columns.values

# FPD30

sq = f"""
with r_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_reloan_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    credo_reloan_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from r_credo_score;
"""

r_credo_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

r_credo_scorefpd30.head()

gini_results = calculate_periodic_gini(r_credo_scorefpd30, 'credo_reloan_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
M2FPD30.columns.values

sq = f"""
with r_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_reloan_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    credo_reloan_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from r_credo_score;
"""

r_credo_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(r_credo_scorefspd30.head(2))

gini_results = calculate_periodic_gini(r_credo_scorefspd30, 'credo_reloan_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

sq = f"""
with r_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_reloan_score ,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    credo_reloan_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from r_credo_score;
"""

r_credo_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(r_credo_scorefstpd30.head(2))

gini_results = calculate_periodic_gini(r_credo_scorefstpd30, 'credo_reloan_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()



Job ID 55602494-2adb-43ac-8164-e6f4cd03ffc0 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(139, 7)
Job ID c0a17dea-880d-4a09-a2dd-c2a1f9f7edae successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(139, 7)
Job ID fc340ec5-e466-4ecd-9370-7db63256b738 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  \
0 2024-07-16 19:09:27  a731f941-ed8a-4278-9ba4-1fd41132aa5e   
1 2024-08-08 19:11:28  574ba16b-8e38-4e6d-8b75-2eb87b28c65c   

  credo_reloan_score  ln_fspd30_flag  ln_mature_fspd30_flag  
0  0.366341998630943               0                      1  
1  0.410788304229105               0                      1  


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(135, 7)
['start_date' 'end_date' 'credo_reloan_score_FSPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']
Job ID 1706e27e-cfe4-49f1-ab99-e1329d19d1bb successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  \
0 2024-07-16 19:09:27  a731f941-ed8a-4278-9ba4-1fd41132aa5e   
1 2024-08-08 19:11:28  574ba16b-8e38-4e6d-8b75-2eb87b28c65c   

  credo_reloan_score  ln_fstpd30_flag  ln_mature_fstpd30_flag  
0  0.366341998630943                0                       1  
1  0.410788304229105                1                       1  
The shape of dataframe after copy is:	(130, 7)
['start_date' 'end_date' 'credo_reloan_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


Unnamed: 0,start_date,end_date,credo_reloan_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.048984,Month,credo_reloan_score,1.1.0,FSTPD30
1,2023-01-02,2023-01-08,0.014955,Week,credo_reloan_score,1.1.0,FSTPD30
2,2023-01-09,2023-01-15,0.193885,Week,credo_reloan_score,1.1.0,FSTPD30
3,2023-01-16,2023-01-22,-0.019056,Week,credo_reloan_score,1.1.0,FSTPD30
4,2023-01-23,2023-01-29,0.024691,Week,credo_reloan_score,1.1.0,FSTPD30


## Combining data

In [138]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'credo_reloan_score_FPD10_gini',
       'period', 'Model_Name', 'version', 'bad_rate',
       'credo_reloan_score_FPD30_gini', 'credo_reloan_score_FSPD30_gini',
       'credo_reloan_score_FSTPD30_gini'], dtype=object)

In [139]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate',
                     'credo_reloan_score_FPD10_gini',
                     'credo_reloan_score_FPD30_gini',
                     'credo_reloan_score_FSPD30_gini',
                     'credo_reloan_score_FSTPD30_gini']].copy()

In [140]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_r_credo_score;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=becb86b7-6b01-41ef-ab9b-06faa712f8d1>

In [141]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('credo_reloan_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('credo_reloan_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('credo_reloan_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('credo_reloan_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_r_credo_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=f3cc95ee-a73f-4aab-ae3a-8e22bbbc46ee>

# old_gen_credo_score

In [142]:
sq = f"""
with old_gen_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_old_gen_score ,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    credo_old_gen_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from old_gen_credo_score;
"""

old_gen_credo_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

old_gen_credo_scorefpd10.head()

gini_results = calculate_periodic_gini_threedigit(old_gen_credo_scorefpd10, 'credo_old_gen_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
print(M1FPD10.columns.values)

# FPD30

sq = f"""
with old_gen_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_old_gen_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    credo_old_gen_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from old_gen_credo_score;
"""

old_gen_credo_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

old_gen_credo_scorefpd30.head()

gini_results = calculate_periodic_gini_threedigit(old_gen_credo_scorefpd30, 'credo_old_gen_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
print(M2FPD30.columns.values)

sq = f"""
with old_gen_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_old_gen_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    credo_old_gen_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from old_gen_credo_score;
"""

old_gen_credo_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(old_gen_credo_scorefspd30.head(2))

gini_results = calculate_periodic_gini_threedigit(old_gen_credo_scorefspd30, 'credo_old_gen_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

sq = f"""
with old_gen_credo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    credo_old_gen_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    credo_old_gen_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from old_gen_credo_score;
"""

old_gen_credo_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(old_gen_credo_scorefstpd30.head(2))

gini_results = calculate_periodic_gini_threedigit(old_gen_credo_scorefstpd30, 'credo_old_gen_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()



Job ID 92218e51-bd91-4017-88c3-047de37e9784 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(142, 7)
['start_date' 'end_date' 'credo_old_gen_score_FPD10_gini' 'period'
 'Model_Name' 'version' 'bad_rate']
Job ID ef86c258-831d-4390-8fa5-e6a6fcb5b9f8 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(141, 7)
['start_date' 'end_date' 'credo_old_gen_score_FPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']
Job ID da1c7163-0819-4ffc-9221-a4147c098857 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  \
0 2024-12-01 12:21:56  1eac1869-c3c5-4ed8-aa58-aeb90c730784   
1 2023-12-17 19:38:05  0c97d2b7-3157-4f68-a502-636f59339b6d   

  credo_old_gen_score  ln_fspd30_flag  ln_mature_fspd30_flag  
0               571.0               0                      1  
1                 497               0                      1  


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(135, 7)
['start_date' 'end_date' 'credo_old_gen_score_FSPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']
Job ID 0e5b754e-0a42-445c-8ec2-f0dc875ee932 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  \
0 2024-12-01 12:21:56  1eac1869-c3c5-4ed8-aa58-aeb90c730784   
1 2023-12-17 19:38:05  0c97d2b7-3157-4f68-a502-636f59339b6d   

  credo_old_gen_score  ln_fstpd30_flag  ln_mature_fstpd30_flag  
0               571.0                0                       1  
1                 497                0                       1  
The shape of dataframe after copy is:	(130, 7)
['start_date' 'end_date' 'credo_old_gen_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


Unnamed: 0,start_date,end_date,credo_old_gen_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.122116,Month,credo_old_gen_score,1.1.0,FSTPD30
1,2023-01-02,2023-01-08,0.228261,Week,credo_old_gen_score,1.1.0,FSTPD30
2,2023-01-09,2023-01-15,0.144711,Week,credo_old_gen_score,1.1.0,FSTPD30
3,2023-01-16,2023-01-22,0.125576,Week,credo_old_gen_score,1.1.0,FSTPD30
4,2023-01-23,2023-01-29,-0.001488,Week,credo_old_gen_score,1.1.0,FSTPD30


## Combining data

In [143]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'credo_old_gen_score_FPD10_gini',
       'period', 'Model_Name', 'version', 'bad_rate',
       'credo_old_gen_score_FPD30_gini',
       'credo_old_gen_score_FSPD30_gini',
       'credo_old_gen_score_FSTPD30_gini'], dtype=object)

In [144]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate',
                     'credo_old_gen_score_FPD10_gini',
                     'credo_old_gen_score_FPD30_gini',
                     'credo_old_gen_score_FSPD30_gini', 
                     'credo_old_gen_score_FSTPD30_gini']].copy()

In [145]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_old_gen_credo_score;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=5e0c0b56-08db-4ff4-9304-25432cafbe12>

In [146]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('credo_old_gen_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('credo_old_gen_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('credo_old_gen_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('credo_old_gen_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_old_gen_credo_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=ba264aed-650d-469f-a742-855f9b2bbeee>

In [147]:
sq = """select * from prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_old_gen_credo_score;"""

df = client.query(sq).to_dataframe(progress_bar_type='tqdm')

Job ID a3566736-9731-4cb2-869d-a4b8a057e42d successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


In [148]:
df['bad_rate'].value_counts()

bad_rate
FPD10      142
FPD30      141
FSPD30     135
FSTPD30    130
Name: count, dtype: int64

# old_cic_score

In [149]:


sq = f"""
with old_cic_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_cic_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    old_cic_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from old_cic_score;
"""

old_cic_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

old_cic_scorefpd10.head()

gini_results = calculate_periodic_hybrid_gini(old_cic_scorefpd10, 'old_cic_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
print(M1FPD10.columns.values)

# FPD30

sq = f"""
with old_cic_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_cic_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    old_cic_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from old_cic_score;
"""

old_cic_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

old_cic_scorefpd30.head()

gini_results = calculate_periodic_hybrid_gini(old_cic_scorefpd30, 'old_cic_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
print(M2FPD30.columns.values)

sq = f"""
with old_cic_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_cic_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    old_cic_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from old_cic_score;
"""

old_cic_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(old_cic_scorefspd30.head(2))

gini_results = calculate_periodic_hybrid_gini(old_cic_scorefspd30, 'old_cic_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

sq = f"""
with old_cic_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_cic_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    old_cic_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from old_cic_score;
"""

old_cic_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(old_cic_scorefstpd30.head(2))

gini_results = calculate_periodic_hybrid_gini(old_cic_scorefstpd30, 'old_cic_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()



Job ID c1fdc39b-1d67-41e8-a7cb-1b447719faa6 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(102, 7)
['start_date' 'end_date' 'old_cic_score_FPD10_gini' 'period' 'Model_Name'
 'version' 'bad_rate']
Job ID 7cd758a4-cc8b-440c-a071-97f5458cc6ed successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(101, 7)
['start_date' 'end_date' 'old_cic_score_FPD30_gini' 'period' 'Model_Name'
 'version' 'bad_rate']
Job ID 03bf4d58-e85a-4811-b643-7a21468ed2ab successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId old_cic_score  \
0 2024-06-14 13:20:28  5f412bbb-9b17-4761-8023-1a08640ad81c       605.000   
1 2024-12-01 12:21:56  1eac1869-c3c5-4ed8-aa58-aeb90c730784         584.0   

   ln_fspd30_flag  ln_mature_fspd30_flag  
0               0                      1  
1               0                      1  


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(95, 7)
['start_date' 'end_date' 'old_cic_score_FSPD30_gini' 'period' 'Model_Name'
 'version' 'bad_rate']
Job ID 66a64d56-9531-4db7-a79c-7ac6106a846a successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId old_cic_score  \
0 2024-06-14 13:20:28  5f412bbb-9b17-4761-8023-1a08640ad81c       605.000   
1 2024-12-01 12:21:56  1eac1869-c3c5-4ed8-aa58-aeb90c730784         584.0   

   ln_fstpd30_flag  ln_mature_fstpd30_flag  
0                0                       1  
1                0                       1  
The shape of dataframe after copy is:	(90, 7)
['start_date' 'end_date' 'old_cic_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


Unnamed: 0,start_date,end_date,old_cic_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-08-01,2023-08-31,0.253594,Month,old_cic_score,1.1.0,FSTPD30
1,2023-08-21,2023-08-27,0.195172,Week,old_cic_score,1.1.0,FSTPD30
2,2023-08-28,2023-09-03,0.285079,Week,old_cic_score,1.1.0,FSTPD30
3,2023-09-01,2023-09-30,0.307897,Month,old_cic_score,1.1.0,FSTPD30
4,2023-09-04,2023-09-10,0.284848,Week,old_cic_score,1.1.0,FSTPD30


## Combining data

In [150]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'old_cic_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'old_cic_score_FPD30_gini',
       'old_cic_score_FSPD30_gini', 'old_cic_score_FSTPD30_gini'],
      dtype=object)

In [151]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','old_cic_score_FPD10_gini','old_cic_score_FPD30_gini',    'old_cic_score_FSPD30_gini', 'old_cic_score_FSTPD30_gini']].copy()

In [152]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_old_cic_score;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=d78a1df0-9da3-40cd-ad60-9cba2d9bc15b>

In [153]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('old_cic_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('old_cic_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('old_cic_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('old_cic_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_old_cic_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=19a0a68b-f21c-4e67-b937-f548602a5985>

# old_demo_score

In [154]:


sq = f"""
with old_demo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_demo_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    old_demo_score is not null
  AND
    ln_mature_fpd10_flag = 1
)
select * from old_demo_score;
"""

old_demo_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

old_demo_scorefpd10.head()

gini_results = calculate_periodic_hybrid_gini(old_demo_scorefpd10, 'old_demo_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
print(M1FPD10.columns.values)

# FPD30

sq = f"""
with old_demo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_demo_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    old_demo_score is not null
  AND
    ln_mature_fpd30_flag = 1
)
select * from old_demo_score;
"""

old_demo_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

old_demo_scorefpd30.head()

gini_results = calculate_periodic_hybrid_gini(old_demo_scorefpd30, 'old_demo_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
print(M2FPD30.columns.values)

sq = f"""
with old_demo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_demo_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    old_demo_score is not null
  AND
    ln_mature_fspd30_flag = 1
)
select * from old_demo_score;
"""

old_demo_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(old_demo_scorefspd30.head(2))

gini_results = calculate_periodic_hybrid_gini(old_demo_scorefspd30, 'old_demo_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

sq = f"""
with old_demo_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    old_demo_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    old_demo_score is not null
  AND
    ln_mature_fstpd30_flag = 1
)
select * from old_demo_score;
"""

old_demo_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(old_demo_scorefstpd30.head(2))

gini_results = calculate_periodic_hybrid_gini(old_demo_scorefstpd30, 'old_demo_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()



Job ID 0a009653-1159-45e0-b5dc-2994ed03105d successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(142, 7)
['start_date' 'end_date' 'old_demo_score_FPD10_gini' 'period' 'Model_Name'
 'version' 'bad_rate']
Job ID 082d11c1-c1bc-41e0-a30b-53dc89647d53 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(141, 7)
['start_date' 'end_date' 'old_demo_score_FPD30_gini' 'period' 'Model_Name'
 'version' 'bad_rate']
Job ID eec82337-ba43-4d77-b883-8bddec637b11 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  old_demo_score  \
0 2025-01-25 19:05:49  e549776f-7a2d-4b6c-9edc-e3b59e8955f6        0.138973   
1 2024-09-07 19:35:30  ea94bed1-57e5-4344-a8a8-49ed935b7937      435.000000   

   ln_fspd30_flag  ln_mature_fspd30_flag  
0               0                      1  
1               0                      1  


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


The shape of dataframe after copy is:	(135, 7)
['start_date' 'end_date' 'old_demo_score_FSPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']
Job ID f3b13831-cef8-4486-9143-3bc09aac4a4d successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  old_demo_score  \
0 2024-09-07 19:35:30  ea94bed1-57e5-4344-a8a8-49ed935b7937           435.0   
1 2024-06-28 16:01:01  0bac8daf-c442-4b6c-a8c9-fe6260603d9d           429.0   

   ln_fstpd30_flag  ln_mature_fstpd30_flag  
0                0                       1  
1                0                       1  
The shape of dataframe after copy is:	(130, 7)
['start_date' 'end_date' 'old_demo_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


Unnamed: 0,start_date,end_date,old_demo_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.123894,Month,old_demo_score,1.1.0,FSTPD30
1,2023-01-02,2023-01-08,0.167651,Week,old_demo_score,1.1.0,FSTPD30
2,2023-01-09,2023-01-15,0.042008,Week,old_demo_score,1.1.0,FSTPD30
3,2023-01-16,2023-01-22,0.134217,Week,old_demo_score,1.1.0,FSTPD30
4,2023-01-23,2023-01-29,0.129924,Week,old_demo_score,1.1.0,FSTPD30


## Combining data

In [155]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'old_demo_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'old_demo_score_FPD30_gini',
       'old_demo_score_FSPD30_gini', 'old_demo_score_FSTPD30_gini'],
      dtype=object)

In [156]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','old_demo_score_FPD10_gini','old_demo_score_FPD30_gini',    'old_demo_score_FSPD30_gini', 'old_demo_score_FSTPD30_gini']].copy()

In [157]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_old_demo_score;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=abeee7f5-1949-4ffc-8b7c-f33fa9771126>

In [158]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('old_demo_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('old_demo_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('old_demo_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('old_demo_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_old_demo_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=f96a9c89-0e97-49e3-adc9-94fbd114c4f2>

# bu_bureau_score

In [159]:
sq = f"""
with bu_bureau_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    bu_bureau_score,
    ln_fpd10_flag,
	ln_mature_fpd10_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd10_flag is not null
  AND
    coalesce(bu_bureau_score, 0.0) > 0.0
  AND
    ln_mature_fpd10_flag = 1
)
select * from bu_bureau_score;
"""

bu_bureau_scorefpd10 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

bu_bureau_scorefpd10.head()

gini_results = calculate_periodic_gini_threedigit(bu_bureau_scorefpd10, 'bu_bureau_score', 'ln_fpd10_flag', 'FPD10')
M1FPD10 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M1FPD10.shape}")
print(M1FPD10.columns.values)

# FPD30

sq = f"""
with bu_bureau_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    bu_bureau_score,
    ln_fpd30_flag,
	ln_mature_fpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fpd30_flag is not null
  AND
    coalesce(bu_bureau_score, 0.0) > 0.0
  AND
    ln_mature_fpd30_flag = 1
)
select * from bu_bureau_score;
"""

bu_bureau_scorefpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

bu_bureau_scorefpd30.head()

gini_results = calculate_periodic_gini_threedigit(bu_bureau_scorefpd30, 'bu_bureau_score', 'ln_fpd30_flag', 'FPD30')
M2FPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M2FPD30.shape}")
print(M2FPD30.columns.values)

sq = f"""
with bu_bureau_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    bu_bureau_score,
    ln_fspd30_flag,
	ln_mature_fspd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fspd30_flag is not null
  AND
    coalesce(bu_bureau_score, 0.0) > 0.0
  AND
    ln_mature_fspd30_flag = 1
)
select * from bu_bureau_score;
"""

bu_bureau_scorefspd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(bu_bureau_scorefspd30.head(2))

gini_results = calculate_periodic_gini_threedigit(bu_bureau_scorefspd30, 'bu_bureau_score', 'ln_fspd30_flag', 'FSPD30')
M3FSPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M3FSPD30.shape}")
print(M3FSPD30.columns.values)

M3FSPD30.head()

sq = f"""
with bu_bureau_score as 
(SELECT 
    ln_disb_dtime disbursementdate,
	digitalLoanAccountId,
    bu_bureau_score,
    ln_fstpd30_flag,
	ln_mature_fstpd30_flag,	
	FROM 
    {a}
  WHERE 
     -- ln_disb_dtime >= '2023-07-01'
    ln_appln_submit_datetime >= '2023-01-01'
  -- AND
  --   format_date('%Y-%m', ln_disb_dtime) = '2024-09'
  AND
    ln_fstpd30_flag is not null
  AND
    coalesce(bu_bureau_score, 0.0) > 0.0
  AND
    ln_mature_fstpd30_flag = 1
)
select * from bu_bureau_score;
"""

bu_bureau_scorefstpd30 = client.query(sq).to_dataframe(progress_bar_type='tqdm')

print(bu_bureau_scorefstpd30.head(2))

gini_results = calculate_periodic_gini_threedigit(bu_bureau_scorefstpd30, 'bu_bureau_score', 'ln_fstpd30_flag', 'FSTPD30')
M4FSTPD30 = gini_results.copy()
print(f"The shape of dataframe after copy is:\t{M4FSTPD30.shape}")
print(M4FSTPD30.columns.values)

M4FSTPD30.head()



Job ID 69b5bd49-a630-4da6-9306-6898f4afe747 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(92, 7)
['start_date' 'end_date' 'bu_bureau_score_FPD10_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


Job ID 622e7bab-773a-4efd-aff8-9272768a7168 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
The shape of dataframe after copy is:	(92, 7)
['start_date' 'end_date' 'bu_bureau_score_FPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


Job ID 1c048ba1-9b29-4193-9433-09aed62f2178 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  bu_bureau_score  \
0 2023-12-07 12:25:39  c464e02f-5b1a-460a-9686-9facb57c699f            418.0   
1 2023-08-21 14:19:38  1f8f08bd-5bff-4f5b-99fd-6b75528cb71e            191.0   

   ln_fspd30_flag  ln_mature_fspd30_flag  
0               0                      1  
1               0                      1  
The shape of dataframe after copy is:	(92, 7)
['start_date' 'end_date' 'bu_bureau_score_FSPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


Job ID 1355890a-2a95-4f59-be09-622de53d7678 successfully executed: 100%|[32m██████████[0m|




Downloading: 100%|[32m██████████[0m|
     disbursementdate                  digitalLoanAccountId  bu_bureau_score  \
0 2023-12-07 12:25:39  c464e02f-5b1a-460a-9686-9facb57c699f            418.0   
1 2023-08-21 14:19:38  1f8f08bd-5bff-4f5b-99fd-6b75528cb71e            191.0   

   ln_fstpd30_flag  ln_mature_fstpd30_flag  
0                0                       1  
1                0                       1  
The shape of dataframe after copy is:	(92, 7)
['start_date' 'end_date' 'bu_bureau_score_FSTPD30_gini' 'period'
 'Model_Name' 'version' 'bad_rate']


  weekly_gini = df.groupby('week').apply(
  monthly_gini = df.groupby('month').apply(


Unnamed: 0,start_date,end_date,bu_bureau_score_FSTPD30_gini,period,Model_Name,version,bad_rate
0,2023-01-01,2023-01-31,0.025,Month,bu_bureau_score,1.1.0,FSTPD30
1,2023-01-09,2023-01-15,-0.093333,Week,bu_bureau_score,1.1.0,FSTPD30
2,2023-01-16,2023-01-22,-0.833333,Week,bu_bureau_score,1.1.0,FSTPD30
3,2023-01-23,2023-01-29,0.416667,Week,bu_bureau_score,1.1.0,FSTPD30
4,2023-01-30,2023-02-05,0.266667,Week,bu_bureau_score,1.1.0,FSTPD30


## Combining data

In [160]:
import functools

dataframes = [M1FPD10, M2FPD30, M3FSPD30, M4FSTPD30]
common_columns = ['start_date', 'end_date', 'period', 'Model_Name','version', 'bad_rate']

def merge_dataframes(df1, df2):
    return pd.merge(df1, df2, on=common_columns, how='outer')

final_df = functools.reduce(merge_dataframes, dataframes)

final_df.columns.values

array(['start_date', 'end_date', 'bu_bureau_score_FPD10_gini', 'period',
       'Model_Name', 'version', 'bad_rate', 'bu_bureau_score_FPD30_gini',
       'bu_bureau_score_FSPD30_gini', 'bu_bureau_score_FSTPD30_gini'],
      dtype=object)

In [161]:
final_df = final_df[['start_date', 'end_date', 'period',   'Model_Name', 'version', 'bad_rate','bu_bureau_score_FPD10_gini','bu_bureau_score_FPD30_gini',    'bu_bureau_score_FSPD30_gini', 'bu_bureau_score_FSTPD30_gini']].copy()

In [162]:
sq = """drop table if exists prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_bu_bureau_score;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=f932e51c-c092-4fe1-959e-42ca85039f0f>

In [163]:
import pandas as pd
from google.cloud import bigquery

# Create a BigQuery client
client = bigquery.Client('prj-prod-dataplatform')

# Define your table schema
table_schema = [
    bigquery.SchemaField('start_date', 'TIMESTAMP'),
    bigquery.SchemaField('end_date', 'TIMESTAMP'),
    bigquery.SchemaField('period', 'STRING'),
    bigquery.SchemaField('Model_Name', 'STRING'),
    bigquery.SchemaField('version', 'STRING'),
    bigquery.SchemaField('Badrate', 'STRING'),
    bigquery.SchemaField('bu_bureau_score_FPD10_gini', 'FLOAT'),
    bigquery.SchemaField('bu_bureau_score_FPD30_gini', 'FLOAT'),    
    bigquery.SchemaField('bu_bureau_score_FSPD30_gini', 'FLOAT'),
    bigquery.SchemaField('bu_bureau_score_FSTPD30_gini', 'FLOAT')
    
]

# Create your BigQuery table
table_id = 'prj-prod-dataplatform.dap_ds_poweruser_playground.Model_gini_bu_bureau_score'
table = bigquery.Table(table_id, schema=table_schema)
table = client.create_table(table)

# Load your DataFrame into BigQuery
job_config = bigquery.LoadJobConfig(
    write_disposition='WRITE_TRUNCATE'
)

load_job = client.load_table_from_dataframe(
    final_df, table_id, job_config=job_config
)

load_job.result()



LoadJob<project=prj-prod-dataplatform, location=asia-southeast1, id=dd0ae3d1-0ddf-4c4e-9dee-9116db905a7e>