In [1]:
# %% [markdown]
# # Jupyter Notebook Loading Header
#
# This is a custom loading header for Jupyter Notebooks in Visual Studio Code.
# It includes common imports and settings to get you started quickly.

# %% [markdown]
## Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.cloud import bigquery
import os
path = r'C:\Users\DwaipayanChakroborti\AppData\Roaming\gcloud\legacy_credentials\dchakroborti@tonikbank.com\adc.json'
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = path
client = bigquery.Client(project='prj-prod-dataplatform')

# %% [markdown]
## Configure Settings
# Set options or configurations as needed
# Example: pd.set_option('display.max_columns', None)

# Run the query to bring in the data

In [2]:
sq = "Select *  from  prj-prod-dataplatform.risk_credit_cic_data.granted_contracts;"
dfgranted = client.query(sq).to_dataframe(progress_bar_type = 'tqdm')

sq = "Select *  from  prj-prod-dataplatform.risk_credit_cic_data.notgranted_contracts;"
dfnongranted = client.query(sq).to_dataframe(progress_bar_type = 'tqdm')


Job ID 7cd4d6c3-a0be-4220-a5ac-3b3381dce189 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Job ID 74d40569-5434-42aa-ae42-e812361f6fb4 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|


# Check the columns in each dataframe

In [3]:
dfgranted.columns

Index(['digitalLoanAccountId', 'crifApplicationId', 'customerId',
       'processEngineGuid', 'requestGuid', 'ContractHistoryType',
       'CBContractCode', 'ContractEndDate', 'ContractPhase',
       'ContractPhaseDesc', 'ContractStartDate', 'ContractStatus',
       'ContractStatusDesc', 'ContractType', 'ContractTypeDesc', 'Currency',
       'CurrencyDesc', 'LastUpdateDate', 'OriginalCurrency',
       'OriginalCurrencyDesc', 'ProviderCodeEncrypted', 'ProviderContractNo',
       'ReferenceNo', 'Role', 'RoleDesc', 'BilledAmount',
       'BoardResolutionFlag', 'BoardResolutionFlagDesc', 'CancellationDate',
       'CardReferenceCode', 'ChargedAmount', 'CreditLimit', 'CreditPurpose',
       'CreditPurposeDesc', 'FinancedAmount', 'FirstPaymentDate',
       'FlagCardUsed', 'HolderLiability', 'HolderLiabilityDesc',
       'InstallmentType', 'InstallmentTypeDesc', 'InstallmentsNumber',
       'LastChargeDate', 'LastPaymentAmount', 'LastPaymentDate',
       'MinPaymentIndicator', 'MinPaymentIndi

In [4]:
dfnongranted.columns

Index(['digitalLoanAccountId', 'crifApplicationId', 'customerId',
       'processEngineGuid', 'requestGuid', 'CBContractCode',
       'ContractTypeDesc', 'ContractRequestDate', 'LastUpdateDate',
       'ProviderCodeEncrypted', 'Role', 'ReferenceNo', 'ContractPhase',
       'ContractType', 'ProviderContractNo', 'ContractPhaseDesc', 'RoleDesc',
       'CreditLimit', 'FinancedAmount', 'InstallmentsNumber',
       'MonthlyPaymentAmount', 'PaymentPeriodicity', 'PaymentPeriodicityDesc',
       'LinkedSubject_CBSubjectCode', 'LinkedSubject_Name',
       'LinkedSubject_Role', 'LinkedSubject_RoleDesc', 'Note_TypeDesc',
       'Note_Text', 'Note_Type', 'run_date'],
      dtype='object')

In [5]:
# Add missing columns to dfgranted with NULL values
dfgranted['ContractRequestDate'] = None
dfgranted['source'] = 'granted'

In [6]:
# Add missing columns to dfnongranted with NULL values
null_columns = [
    'ContractHistoryType', 'ContractEndDate', 'ContractStartDate', 'ContractStatus', 'ContractStatusDesc',
    'Currency', 'CurrencyDesc', 'OriginalCurrency', 'OriginalCurrencyDesc', 'BilledAmount',
    'BoardResolutionFlag', 'BoardResolutionFlagDesc', 'CancellationDate', 'CardReferenceCode',
    'ChargedAmount', 'CreditPurpose', 'CreditPurposeDesc', 'FirstPaymentDate', 'FlagCardUsed',
    'HolderLiability', 'HolderLiabilityDesc', 'InstallmentType', 'InstallmentTypeDesc',
    'LastChargeDate', 'LastPaymentAmount', 'LastPaymentDate', 'MinPaymentIndicator',
    'MinPaymentIndicatorDesc', 'MinPaymentPercentage', 'NextPayment', 'NextPaymentDate',
    'OutstandingBalance', 'OutstandingBalanceUnbilled', 'OutstandingPaymentsNumber',
    'OverallCreditLimit', 'OverdueDays', 'OverdueDaysDesc', 'OverduePaymentsAmount',
    'OverduePaymentsNumber', 'PaymentMethod', 'PaymentMethodDesc', 'PremiumCard',
    'PremiumCardDesc', 'ReorganizedCreditCode', 'ReorganizedCreditCodeDesc', 'ServicesLinesNo',
    'TimesCardUsed', 'TransactionType', 'TransactionTypeDesc', 'Utilization'
]

In [7]:
for col in null_columns:
    dfnongranted[col] = None

dfnongranted['source'] = 'nongranted'

# Combine the two DataFrames
base = pd.concat([dfgranted, dfnongranted], ignore_index=True)

# Select only the columns mentioned in the SQL query
columns = [
    'digitalLoanAccountId', 'crifApplicationId', 'customerId', 'processEngineGuid', 'requestGuid',
    'ContractHistoryType', 'CBContractCode', 'ContractEndDate', 'ContractPhase', 'ContractPhaseDesc',
    'ContractStartDate', 'ContractStatus', 'ContractStatusDesc', 'ContractType', 'ContractTypeDesc',
    'Currency', 'CurrencyDesc', 'LastUpdateDate', 'OriginalCurrency', 'OriginalCurrencyDesc',
    'ProviderCodeEncrypted', 'ProviderContractNo', 'ReferenceNo', 'Role', 'RoleDesc',
    'BilledAmount', 'BoardResolutionFlag', 'BoardResolutionFlagDesc', 'CancellationDate',
    'CardReferenceCode', 'ChargedAmount', 'CreditLimit', 'CreditPurpose', 'CreditPurposeDesc',
    'FinancedAmount', 'FirstPaymentDate', 'FlagCardUsed', 'HolderLiability', 'HolderLiabilityDesc',
    'InstallmentType', 'InstallmentTypeDesc', 'InstallmentsNumber', 'LastChargeDate',
    'LastPaymentAmount', 'LastPaymentDate', 'MinPaymentIndicator', 'MinPaymentIndicatorDesc',
    'MinPaymentPercentage', 'MonthlyPaymentAmount', 'NextPayment', 'NextPaymentDate',
    'OutstandingBalance', 'OutstandingBalanceUnbilled', 'OutstandingPaymentsNumber',
    'OverallCreditLimit', 'OverdueDays', 'OverdueDaysDesc', 'OverduePaymentsAmount',
    'OverduePaymentsNumber', 'PaymentMethod', 'PaymentMethodDesc', 'PaymentPeriodicity',
    'PaymentPeriodicityDesc', 'PremiumCard', 'PremiumCardDesc', 'ReorganizedCreditCode',
    'ReorganizedCreditCodeDesc', 'ServicesLinesNo', 'TimesCardUsed', 'TransactionType',
    'TransactionTypeDesc', 'Utilization', 'LinkedSubject_CBSubjectCode', 'LinkedSubject_Name',
    'LinkedSubject_Role', 'LinkedSubject_RoleDesc', 'Note_TypeDesc', 'Note_Text', 'Note_Type',
    'run_date', 'ContractRequestDate', 'source'
]

base = base[columns]

In [8]:
base.sample(10)

Unnamed: 0,digitalLoanAccountId,crifApplicationId,customerId,processEngineGuid,requestGuid,ContractHistoryType,CBContractCode,ContractEndDate,ContractPhase,ContractPhaseDesc,...,LinkedSubject_CBSubjectCode,LinkedSubject_Name,LinkedSubject_Role,LinkedSubject_RoleDesc,Note_TypeDesc,Note_Text,Note_Type,run_date,ContractRequestDate,source
51,550b153f-fa92-48b4-bd6e-14ab3587e52f,tonik-tul-298385,1992248,b79ef600-2417-11ee-b763-0242ace60006,b7810dc0-2417-11ee-8a6d-0242ace60010,Installments,P04191454,2018-10-12,AC,Active,...,,,,,,,,2023-07-17,,granted
196,9ded19d9-6258-44a5-96ad-16e79e3ba908,tonik-tul-545902,2556836,5ba6a5e0-2426-11ef-b07b-0242ace6000f,5b831850-2426-11ef-b6ea-0242ace60004,,S04600049,,RQ,Requested,...,,,,,,,,2024-06-07,2023-02-09,nongranted
107,86c7d2f6-eebd-49c4-b999-a26f30bc42f9,tonik-sbcl-296564,1954463,fac53180-221c-11ee-8088-0242ace60005,fa99b4b0-221c-11ee-b410-0242ace60015,,804608939,,RQ,Requested,...,,,,,,,,2023-07-14,2023-02-17,nongranted
62,ba1c2383-fcb2-4d11-9c6c-d637b3581959,tonik-sbcl-299012,2139895,951ce020-24a7-11ee-8088-0242ace60005,94f44980-24a7-11ee-b410-0242ace60015,Installments,C04351591,2022-12-01,CL,Closed,...,,,,,,,,2023-07-17,,granted
111,6a2b6322-c41a-4af3-b945-388795ec3166,tonik-tul-296505,2126300,cee49040-2214-11ee-b763-0242ace60006,cec680f0-2214-11ee-8a6d-0242ace60010,,B04012195,,RQ,Requested,...,,,,,,,,2023-07-14,2022-05-11,nongranted
25,db298806-189b-4c79-b423-2cbc9322bfd5,tonik-tul-299007,1863287,c0e57ba0-24a6-11ee-8088-0242ace60005,c0bae930-24a6-11ee-b410-0242ace60015,Installments,K00859360,2020-08-11,CL,Closed,...,,,,,,,,2023-07-17,,granted
135,b6e43bbd-790b-4f19-baee-8bbd80bdd98a,tonik-tul-296483,1782245,5f2fa350-2211-11ee-8088-0242ace60005,5f069780-2211-11ee-b410-0242ace60015,,704045793,,RQ,Requested,...,,,,,,,,2023-07-14,2022-05-23,nongranted
93,ae98ce71-2a66-4b10-add2-395e8fef2fc2,tonik-tul-298689,1377654,5e964fb0-2465-11ee-b763-0242ace60006,5e77a420-2465-11ee-8a6d-0242ace60010,Installments,901503093,2019-08-10,CL,Closed,...,,,,,,,,2023-07-17,,granted
100,8a6d47f9-ec9d-42c2-b068-20f448a95504,tonik-tul-296490,2136215,0e73a640-2212-11ee-b763-0242ace60006,0e5596f0-2212-11ee-8a6d-0242ace60010,,B04933876,,RQ,Requested,...,,,,,,,,2023-07-14,2023-07-12,nongranted
177,837bdf4b-cf9f-4213-b7a6-17cca9f8afc2,tonik-tul-547178,2559299,2aecd170-24d2-11ef-b07b-0242ace6000f,2ac99200-24d2-11ef-b6ea-0242ace60004,,906985082,,RQ,Requested,...,,,,,,,,2024-06-07,2024-06-07,nongranted


In [9]:
loan_master_table = client.query("""select * from `risk_credit_mis.loan_master_table` ;""").to_dataframe(progress_bar_type='tqdm')
loan_deliquency_data = client.query("""select * from  prj-prod-dataplatform.risk_credit_mis.loan_deliquency_data;""").to_dataframe(progress_bar_type='tqdm')

print(f"The shape of loan_master_table:\t{loan_master_table.shape}")
print(f"The shape of loan_delinquency_data:\t{loan_deliquency_data.shape}")

Job ID 796ea460-123f-49c5-8ab3-c36eaa32390e successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
Job ID 0d5d7fa7-d8fc-47f6-826d-e9027e5c1820 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of loan_master_table:	(1565281, 225)
The shape of loan_delinquency_data:	(148813, 13)


In [10]:
# Merge the DataFrames
merged_df = base.merge(loan_master_table, left_on='digitalLoanAccountId', right_on='digitalLoanAccountId')
merged_df = merged_df.merge(loan_deliquency_data, left_on='loanAccountNumber', right_on='loanAccountNumber')


In [11]:
loan_deliquency_data.to_csv("LoanMasterdata20240729.csv", index = False)
loan_deliquency_data.to_csv("loandelinquencydata.csv", index = False)

In [12]:
del(loan_master_table)
del(loan_deliquency_data)

In [13]:
# Apply filters
filtered_df = merged_df[
    (merged_df['flagDisbursement'] == 1) &
    (merged_df['disbursementDateTime'].notnull())
]

In [14]:
# Group and aggregate
result = filtered_df.groupby('new_loan_type').agg({
    'run_date': ['min', 'max'],
    'loanAccountNumber': [
        lambda x: x[filtered_df['defFPD15'] == 1].nunique(),
        lambda x: x[filtered_df['obsFPD15'] == 1].nunique(),
        lambda x: x[filtered_df['min_inst_def30'] == 1].nunique(),
        lambda x: x[filtered_df['obs_min_inst_def30'] >= 1].nunique(),
        lambda x: x[(filtered_df['defFPD15'] == 1) | (filtered_df['defSPD15'] == 1)].nunique(),
        lambda x: x[filtered_df['obsSPD15'] == 1].nunique(),
        lambda x: x[filtered_df['min_inst_def30'].isin([1, 2])].nunique(),
        lambda x: x[filtered_df['obs_min_inst_def30'] >= 2].nunique(),
        lambda x: x[filtered_df['min_inst_def30'].isin([1, 2, 3])].nunique(),
        lambda x: x[filtered_df['obs_min_inst_def30'] >= 3].nunique()
    ]
})

# Flatten column names
result.columns = [
    'firstavailabledate', 'lastavailabledate',
    'defFPD15', 'obsFPD15', 'defFPD30', 'obsFPD30',
    'defFSPD15', 'obsFSPD15', 'defFSPD30', 'obsFPS30',
    'defFSTPD30', 'obsFSTPD30'
]

# Reset index to make 'new_loan_type' a column
result = result.reset_index()



In [16]:
result.head()

Unnamed: 0,new_loan_type,firstavailabledate,lastavailabledate,defFPD15,obsFPD15,defFPD30,obsFPD30,defFSPD15,obsFSPD15,defFSPD30,obsFPS30,defFSTPD30,obsFSTPD30
0,Flex,2023-07-14,2023-07-17,0,4,0,4,0,4,0,4,0,4
1,Quick,2023-07-14,2023-07-17,4,21,4,21,5,21,5,21,6,21
2,SIL-Instore,2023-07-14,2024-06-07,0,6,0,4,1,4,1,4,2,4
