In [1]:
# %% [markdown]
# # Jupyter Notebook Loading Header
#
# This is a custom loading header for Jupyter Notebooks in Visual Studio Code.
# It includes common imports and settings to get you started quickly.

# %% [markdown]
## Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.cloud import bigquery
import os
path = r'C:\Users\DwaipayanChakroborti\AppData\Roaming\gcloud\legacy_credentials\dchakroborti@tonikbank.com\adc.json'
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = path
client = bigquery.Client(project='prj-prod-dataplatform')

# %% [markdown]
## Configure Settings
# Set options or configurations as needed
# Example: pd.set_option('display.max_columns', None)

In [2]:
sq = """ 

WITH
  CICBaseTable AS ----To create combine CIC raw data combining Granted and Non Granted table
  ( -- Query FOR dfgranted
  SELECT
    digitalLoanAccountId,
    crifApplicationId,
    customerId,
    processEngineGuid,
    requestGuid,
    ContractHistoryType,
    CBContractCode,
    ContractEndDate,
    ContractPhase,
    ContractPhaseDesc,
    ContractStartDate,
    ContractStatus,
    ContractStatusDesc,
    ContractType,
    ContractTypeDesc,
    Currency,
    CurrencyDesc,
    LastUpdateDate,
    OriginalCurrency,
    OriginalCurrencyDesc,
    ProviderCodeEncrypted,
    ProviderContractNo,
    ReferenceNo,
    Role,
    RoleDesc,
    BilledAmount,
    BoardResolutionFlag,
    BoardResolutionFlagDesc,
    CancellationDate,
    CardReferenceCode,
    ChargedAmount,
    CreditLimit,
    CreditPurpose,
    CreditPurposeDesc,
    FinancedAmount,
    FirstPaymentDate,
    FlagCardUsed,
    HolderLiability,
    HolderLiabilityDesc,
    InstallmentType,
    InstallmentTypeDesc,
    InstallmentsNumber,
    LastChargeDate,
    LastPaymentAmount,
    LastPaymentDate,
    MinPaymentIndicator,
    MinPaymentIndicatorDesc,
    MinPaymentPercentage,
    MonthlyPaymentAmount,
    NextPayment,
    NextPaymentDate,
    OutstandingBalance,
    OutstandingBalanceUnbilled,
    OutstandingPaymentsNumber,
    OverallCreditLimit,
    OverdueDays,
    OverdueDaysDesc,
    OverduePaymentsAmount,
    OverduePaymentsNumber,
    PaymentMethod,
    PaymentMethodDesc,
    PaymentPeriodicity,
    PaymentPeriodicityDesc,
    PremiumCard,
    PremiumCardDesc,
    ReorganizedCreditCode,
    ReorganizedCreditCodeDesc,
    ServicesLinesNo,
    TimesCardUsed,
    TransactionType,
    TransactionTypeDesc,
    Utilization,
    LinkedSubject_CBSubjectCode,
    LinkedSubject_Name,
    LinkedSubject_Role,
    LinkedSubject_RoleDesc,
    Note_TypeDesc,
    Note_Text,
    Note_Type,
    run_date,
    NULL AS ContractRequestDate,
    'granted' AS SOURCE
  FROM
    prj-prod-dataplatform.risk_credit_cic_data.granted_contracts
  UNION ALL
    -- Query FOR dfnongranted
  SELECT
    digitalLoanAccountId,
    crifApplicationId,
    customerId,
    processEngineGuid,
    requestGuid,
    NULL AS ContractHistoryType,
    CBContractCode,
    NULL AS ContractEndDate,
    ContractPhase,
    ContractPhaseDesc,
    NULL AS ContractStartDate,
    NULL AS ContractStatus,
    NULL AS ContractStatusDesc,
    ContractType,
    ContractTypeDesc,
    NULL AS Currency,
    NULL AS CurrencyDesc,
    LastUpdateDate,
    NULL AS OriginalCurrency,
    NULL AS OriginalCurrencyDesc,
    ProviderCodeEncrypted,
    ProviderContractNo,
    ReferenceNo,
    Role,
    RoleDesc,
    NULL AS BilledAmount,
    NULL AS BoardResolutionFlag,
    NULL AS BoardResolutionFlagDesc,
    NULL AS CancellationDate,
    NULL AS CardReferenceCode,
    NULL AS ChargedAmount,
    CreditLimit,
    NULL AS CreditPurpose,
    NULL AS CreditPurposeDesc,
    FinancedAmount,
    NULL AS FirstPaymentDate,
    NULL AS FlagCardUsed,
    NULL AS HolderLiability,
    NULL AS HolderLiabilityDesc,
    NULL AS InstallmentType,
    NULL AS InstallmentTypeDesc,
    InstallmentsNumber,
    NULL AS LastChargeDate,
    NULL AS LastPaymentAmount,
    NULL AS LastPaymentDate,
    NULL AS MinPaymentIndicator,
    NULL AS MinPaymentIndicatorDesc,
    NULL AS MinPaymentPercentage,
    MonthlyPaymentAmount,
    NULL AS NextPayment,
    NULL AS NextPaymentDate,
    NULL AS OutstandingBalance,
    NULL AS OutstandingBalanceUnbilled,
    NULL AS OutstandingPaymentsNumber,
    NULL AS OverallCreditLimit,
    NULL AS OverdueDays,
    NULL AS OverdueDaysDesc,
    NULL AS OverduePaymentsAmount,
    NULL AS OverduePaymentsNumber,
    NULL AS PaymentMethod,
    NULL AS PaymentMethodDesc,
    PaymentPeriodicity,
    PaymentPeriodicityDesc,
    NULL AS PremiumCard,
    NULL AS PremiumCardDesc,
    NULL AS ReorganizedCreditCode,
    NULL AS ReorganizedCreditCodeDesc,
    NULL AS ServicesLinesNo,
    NULL AS TimesCardUsed,
    NULL AS TransactionType,
    NULL AS TransactionTypeDesc,
    NULL AS Utilization,
    LinkedSubject_CBSubjectCode,
    LinkedSubject_Name,
    LinkedSubject_Role,
    LinkedSubject_RoleDesc,
    Note_TypeDesc,
    Note_Text,
    Note_Type,
    run_date,
    ContractRequestDate,
    'nongranted' AS SOURCE
  FROM
    prj-prod-dataplatform.risk_credit_cic_data.notgranted_contracts )
    -- select * from CICBaseTable where digitalLoanAccountId = 'c7948327-e6a6-46ee-96fc-66c1b3b56f93'
-- SELECT  ContractHistoryType, RoleDesc, count(digitalLoanAccountId) cnt FROM  CICBaseTable where COALESCE(ContractHistoryType, 'NA') in ('Installments', 'CreditCards', 'NA') group by 1,2 order by 3 desc;
,
employementdata as
(SELECT distinct
  digitalLoanAccountId, 
  crifApplicationId, 
  customerId,   
  AnnualMonthlyIndicator, 
  Currency, 
  DateHiredFrom, 
  DateHiredTo, 
  GrossIncome,
  CAST(
    CASE 
      WHEN COALESCE(AnnualMonthlyIndicator, 'NA') LIKE 'M' THEN CAST(COALESCE(GrossIncome, '0') AS NUMERIC)
      WHEN COALESCE(AnnualMonthlyIndicator, 'NA') LIKE 'Y' THEN ROUND(CAST(COALESCE(GrossIncome, '0') AS NUMERIC)/12, 0)
      ELSE 0 
    END AS INT64
  ) AS MonthlyIncome,
  CAST(
    CASE 
      WHEN COALESCE(AnnualMonthlyIndicator, 'NA') LIKE 'M' THEN ROUND(CAST(COALESCE(GrossIncome, '0') AS NUMERIC)*12, 0)
      WHEN COALESCE(AnnualMonthlyIndicator, 'NA') LIKE 'Y' THEN CAST(COALESCE(GrossIncome, '0') AS NUMERIC)
      ELSE 0 
    END AS INT64
  ) AS AnnualIncome,    
  OccupationDesc,
  OccupationStatusDesc,
  PSIC, 
  REGEXP_REPLACE(PSICDesc, r'^\d+\s*-\s*', '') AS PSICDesc ,
  row_number() over (partition by digitalLoanAccountId order by digitalLoanAccountId ) as rnk
FROM prj-prod-dataplatform.risk_credit_cic_data.employment_data),
CICBase2Table as 
(SELECT digitalLoanAccountId, crifApplicationId, customerId,
       processEngineGuid, requestGuid, ContractHistoryType,
       CBContractCode, ContractEndDate, ContractPhase,
       ContractPhaseDesc, ContractStartDate, ContractStatus,
       ContractStatusDesc, ContractType, ContractTypeDesc,
       Currency, CurrencyDesc, LastUpdateDate, OriginalCurrency,
       OriginalCurrencyDesc, ProviderCodeEncrypted,
       ProviderContractNo, ReferenceNo, Role, RoleDesc,
       BilledAmount, BoardResolutionFlag, BoardResolutionFlagDesc,
       CancellationDate, CardReferenceCode, ChargedAmount,
       CreditLimit, CreditPurpose, CreditPurposeDesc,
       FinancedAmount, FirstPaymentDate, FlagCardUsed,
       HolderLiability, HolderLiabilityDesc, InstallmentType,
       InstallmentTypeDesc, InstallmentsNumber, LastChargeDate,
       LastPaymentAmount, LastPaymentDate, MinPaymentIndicator,
       MinPaymentIndicatorDesc, MinPaymentPercentage,
       MonthlyPaymentAmount, NextPayment, NextPaymentDate,
       OutstandingBalance, OutstandingBalanceUnbilled,
       OutstandingPaymentsNumber, OverallCreditLimit, OverdueDays,
       OverdueDaysDesc, OverduePaymentsAmount,
       OverduePaymentsNumber, PaymentMethod, PaymentMethodDesc,
       PaymentPeriodicity, PaymentPeriodicityDesc, PremiumCard,
       PremiumCardDesc, ReorganizedCreditCode,
       ReorganizedCreditCodeDesc, ServicesLinesNo, TimesCardUsed,
       TransactionType, TransactionTypeDesc, Utilization,
       LinkedSubject_CBSubjectCode, LinkedSubject_Name,
       LinkedSubject_Role, LinkedSubject_RoleDesc, Note_TypeDesc,
       Note_Text, Note_Type, run_date, ContractRequestDate,  SOURCE
,
  CASE
    WHEN ContractPhaseDesc = 'Active' AND ContractStatusDesc = '' THEN 'Neutral'
    WHEN ContractPhaseDesc = 'Active' AND ContractStatusDesc is null THEN 'Neutral'
    WHEN ContractPhaseDesc = 'Active' AND ContractStatusDesc = 'Pre-Activated' THEN 'Good'
    WHEN ContractPhaseDesc = 'Active' AND ContractStatusDesc = 'Foreclosure' THEN 'Good'
    WHEN ContractPhaseDesc = 'Closed' AND ContractStatusDesc = '' THEN 'Good'
    WHEN ContractPhaseDesc = 'Closed' AND ContractStatusDesc is null THEN 'Good'
    WHEN ContractPhaseDesc = 'Closed in advance' AND ContractStatusDesc = '' THEN 'Good'
    WHEN ContractPhaseDesc = 'Closed in advance' AND ContractStatusDesc is null THEN 'Good'
    WHEN ContractPhaseDesc = 'Closed in advance' AND ContractStatusDesc = 'Foreclosure' THEN 'Good'
    WHEN ContractStatusDesc IN ('Debt Assumption', 'Repossessed') THEN 'Neutral'
    WHEN ContractStatusDesc IN (
      'Write-off (BLW)', 'Past Due', 'Blocked by the Bank due to Credit Reasons',
      'Under dispute / non performing', 'Under litigation / Delinquent',
      'Blocked or Closed voluntary by the Customer', 'Blocked or Closed due to Restructuring',
      'There are unpaid amounts, Negotiated Settlement', 'Previous delinquency settled',
      'Write-off and Credit transferred to third party / Collection',
      'Write-off and Fully Settled', 'Blocked by the Bank due to card lost/stolen',
      'Blocked by the Bank due to fraud', 'Dispute / Litigation contested'
    ) THEN 'Bad'
    ELSE 'Unknown'
  END AS Repaymentcategory,
CASE
    WHEN ContractTypeDesc IN ('Salary loan', 'Personal Loan', 'Unsecured loan', 'Vehicle Loan', 'Mortgage/Real Estate', 'Time Loan', 'Short Term Loan', 'Benefit Loan', 'Home equity loan', 'Agricultural Loan', 'Student Loan', 'Vehicle leasing', 'Credit Card', 'Credit Card - Shared Limit', 'Credit Card - MultiCurrency', 'Revolving Credit', 'Trust Loan', 'Credit Line') 
      OR (ContractTypeDesc = 'Term Loan' AND CreditPurposeDesc NOT LIKE 'Small and Medium Enterprise Loans%')
      OR (ContractTypeDesc = 'Loan Line' AND CreditPurposeDesc NOT LIKE 'Small and Medium Enterprise Loans%')
      OR (CreditPurposeDesc LIKE 'Loans to Individual%' AND ContractTypeDesc != 'Business Loan')
      OR (CreditPurposeDesc LIKE 'Microfinance Loans' AND ContractTypeDesc != 'Business Loan')
      OR (CreditPurposeDesc LIKE 'Other Agricultural Credit' AND ContractTypeDesc != 'Business Loan')
      OR (ContractHistoryType LIKE 'Installments' AND ContractTypeDesc = 'Term Loan' and CreditPurposeDesc is null)
      OR (ContractHistoryType is null AND ContractTypeDesc = 'Term Loan' and CreditPurposeDesc is null)
      OR CreditPurposeDesc IN ('Agrarian Reform', 'Development Loan Incentives - Socialized Low Cost Housing (Loans to individuals for housing purposes )')
      OR ContractHistoryType = 'CreditCards'
    THEN 'B2C'
    
    WHEN ContractTypeDesc IN ('Business Loan', 'Real estate leasing', 'Equipment leasing')
      OR CreditPurposeDesc IN ('Development Loan Incentives - Cooperatives', 'Development Loan Incentives - Educational Inst.', 'Loan to Government - GOCCs (Other Financial)', 'Loan to Government - GOCCs (Social Security Institutions)', 'Loan to Government - LGUs', 'Loan to Government - National Government', 'Loans to Private Corporation (Financial)', 'Loans to Private Corporation (Non-Financial)', 'Small and Medium Enterprise Loans (Medium Scale Enterprise)', 'Small and Medium Enterprise Loans (Small Scale Enterprise)')
      OR (ContractTypeDesc = 'Vehicle Loan' AND CreditPurposeDesc NOT LIKE 'Loans to Individual%')
      OR (ContractTypeDesc = 'Loan Line' AND CreditPurposeDesc LIKE 'Small and Medium Enterprise Loans%')
      OR (ContractTypeDesc = 'Term Loan' AND CreditPurposeDesc LIKE 'Small and Medium Enterprise Loans%')
    THEN 'B2B'
    
    ELSE 'Unknown'
  END AS BusinessType,
 CASE
    WHEN ContractTypeDesc = 'Time Loan' THEN 'Time Loans'
    WHEN ContractTypeDesc IN ('Short Term Loan', 'Term Loan') THEN 'Short and Term Loans'
    WHEN ContractTypeDesc = 'Home equity loan' THEN 'Home Equity Loans'
    WHEN ContractTypeDesc IN ('Credit Card', 'Credit Card - MultiCurrency', 'Credit Card - Shared Limit') THEN 'Credit Cards'
    WHEN ContractTypeDesc IN ('Loan Line', 'Credit Line') THEN 'Credit Lines'
    WHEN ContractTypeDesc IN ('Mortgage/Real Estate', 'Real estate leasing') THEN 'Real Estate Loans'
    WHEN ContractTypeDesc = 'Trust Loan' THEN 'Trust Loans'
    WHEN ContractTypeDesc = 'Personal Loan' THEN 'Personal Loans'
    ELSE 'Other Loans'
  END AS loan_segment
from CICBaseTable 
  where COALESCE(ContractHistoryType, 'NA') in ('Installments', 'CreditCards', 'NA')
  and COALESCE(RoleDesc, 'NA') in ('Borrower', 'Co-Borrower', 'NA')
),
CICBase3Table as
(select distinct * FROM  CICBase2Table where BusinessType in ('B2C', 'Unknown')
)
-- select distinct * from CICBase3Table where (digitalLoanAccountid||crifApplicationId||run_date||CBContractCode) = 'c7948327-e6a6-46ee-96fc-66c1b3b56f93taran-2629742-80860572024-07-05302970230';
,
custtname as (SELECT distinct  cast(custId as numeric) custid, firstName, middleName, LastName FROM `prj-prod-dataplatform.dl_loans_db_raw.tdbk_loan_customer_details` 
),
stepAtablebase as
(
select 
(a.digitalLoanAccountid||b.crifApplicationId||b.run_date||b.CBContractCode) uniquekey,
a.digitalLoanAccountId,
a.customerId, cn.Firstname, cn.middleName, cn.LastName,
a.loanAccountNumber,
a.flagDisbursement,
a.disbursementDateTime,
a.termsAndConditionsSubmitDateTime,
a.natureofwork,
a.subIndustryDescription,
a.industryDescription,
case when a.reloan_flag = 1 and a.loantype not like 'FLEXUP'then 'Reloan'
      when a.loantype = 'FLEXUP' and a.new_loan_type = 'Flex-up' and a.reloan_flag = 0 and a.flagDisbursement = 1 then 'Flex-up' 
              else a.new_loan_type end as LoanProduct, b.crifApplicationId, 
       processEngineGuid, requestGuid, ContractHistoryType,
       CBContractCode, ContractEndDate, ContractPhase,
       ContractPhaseDesc, ContractStartDate, ContractStatus,
       ContractStatusDesc, ContractType, ContractTypeDesc,
       b.Currency, CurrencyDesc, LastUpdateDate, OriginalCurrency,
       OriginalCurrencyDesc, ProviderCodeEncrypted,
       ProviderContractNo, ReferenceNo, Role, RoleDesc,
       BilledAmount, BoardResolutionFlag, BoardResolutionFlagDesc,
       CancellationDate, CardReferenceCode, ChargedAmount,
       CreditLimit, CreditPurpose, CreditPurposeDesc,
       FinancedAmount, FirstPaymentDate, FlagCardUsed,
       HolderLiability, HolderLiabilityDesc, InstallmentType,
       InstallmentTypeDesc, InstallmentsNumber, LastChargeDate,
       LastPaymentAmount, LastPaymentDate, MinPaymentIndicator,
       MinPaymentIndicatorDesc, MinPaymentPercentage,
       MonthlyPaymentAmount, NextPayment, NextPaymentDate,
       b.OutstandingBalance, OutstandingBalanceUnbilled,
       OutstandingPaymentsNumber, OverallCreditLimit, OverdueDays,
       OverdueDaysDesc, OverduePaymentsAmount,
       OverduePaymentsNumber, PaymentMethod, PaymentMethodDesc,
       PaymentPeriodicity, PaymentPeriodicityDesc, PremiumCard,
       PremiumCardDesc, ReorganizedCreditCode,
       ReorganizedCreditCodeDesc, ServicesLinesNo, TimesCardUsed,
       TransactionType, TransactionTypeDesc, Utilization,
       LinkedSubject_CBSubjectCode, LinkedSubject_Name,
       LinkedSubject_Role, LinkedSubject_RoleDesc, Note_TypeDesc,
       Note_Text, Note_Type, run_date, ContractRequestDate,  SOURCE, Repaymentcategory, BusinessType, loan_segment
       , ed.AnnualMonthlyIndicator, ed.Currency, ed.DateHiredFrom, ed.DateHiredTo, ed.GrossIncome, ed.MonthlyIncome, ed.AnnualIncome, ed.OccupationDesc, ed.OccupationStatusDesc, ed.PSIC, ed.PSICDesc
FROM `risk_credit_mis.loan_master_table` a 
inner join CICBase3Table b
ON a.digitalLoanAccountId = b.digitalLoanAccountId
    AND a.crifApplicationId = b.crifApplicationId
left join (select * from employementdata where rnk = 1) ed on ed.digitalLoanAccountId = a.digitalLoanAccountId
Left join custtname cn on cn.custid = a.customerId
where a.disbursementDateTime is not null
and date_trunc(a.disbursementDateTime, day) >= '2023-01-10'
and date_trunc(a.disbursementDateTime, day) < current_date()
),
stepAtable2base as 
(select *, row_number() over(partition by uniquekey order by uniquekey) rnk from stepAtablebase)
,
-- select * from stepAtable2base where rnk > 1;
-- select * from stepAtablebase where  (digitalLoanAccountid||crifApplicationId||run_date||CBContractCode) = 'c7948327-e6a6-46ee-96fc-66c1b3b56f93taran-2629742-80860572024-07-05302970230'
-- select (digitalLoanAccountid||crifApplicationId||run_date||CBContractCode), count((digitalLoanAccountid||crifApplicationId||run_date||CBContractCode)) from stepAtable2base group by 1 having count((digitalLoanAccountid||crifApplicationId||run_date||CBContractCode)) > 1;
base as 
(select a.*, d.obsFSPD30, d.defFSPD30 
, case when date_trunc(a.disbursementDateTime, day) <= '2024-03-31' then 'Train_Validation' 
         when date_trunc(a.disbursementDateTime, day) >'2024-03-31' and date_trunc(a.disbursementDateTime, day) <= '2024-06-07' then 'Test' else 'Other' end targetdataselectiontype
from stepAtable2base a 
inner join 
(
    SELECT  
        loanAccountNumber
        , SUM(CASE WHEN obs_min_inst_def30 >= 1 THEN 1 ELSE 0 END) as obsFPD30
        , SUM(CASE WHEN min_inst_def30 = 1 THEN 1 else 0 END) as defFPD30
        , sum(case when obs_min_inst_def30>=1 then (select max(disbursedloanamount) from `risk_credit_mis.loan_master_table` where loanAccountNumber = a1.loanAccountNumber) else 0 end) as obs_fpd30_vol
        , sum(case when min_inst_def30=1 then (select max(disbursedloanamount) from `risk_credit_mis.loan_master_table` where loanAccountNumber = a1.loanAccountNumber) else 0 end) as def_fpd30_vol
        , SUM(CASE WHEN obs_min_inst_def30 >= 2 THEN 1 ELSE 0 END) as obsFSPD30
        , SUM(CASE WHEN obs_min_inst_def30 >= 2 AND (min_inst_def30 = 2 or min_inst_def30 = 1) THEN 1 else 0 END) as defFSPD30
        , SUM(CASE WHEN obs_min_inst_def30 >= 2 THEN (select max(disbursedloanamount) from `risk_credit_mis.loan_master_table` where loanAccountNumber = a1.loanAccountNumber) ELSE 0 END) as obsFSPD30_vol
        , SUM(CASE WHEN obs_min_inst_def30 >= 2 AND (min_inst_def30 = 2 or min_inst_def30 = 1) THEN (select max(disbursedloanamount) from `risk_credit_mis.loan_master_table` where loanAccountNumber = a1.loanAccountNumber) else 0 END) as defFSPD30_vol
        , SUM(CASE WHEN obs_min_inst_def30 >= 3 THEN 1 ELSE 0 END) as obsFSTPD30
        , SUM(CASE WHEN obs_min_inst_def30 >= 3 AND (min_inst_def30 = 3 or min_inst_def30 = 2 or min_inst_def30 = 1) THEN 1 else 0 END) as defFSTPD30
        , SUM(CASE WHEN obs_min_inst_def30 >= 3 THEN (select max(disbursedloanamount) from `risk_credit_mis.loan_master_table` where loanAccountNumber = a1.loanAccountNumber) ELSE 0 END) as obsFSTPD30_vol
        , SUM(CASE WHEN obs_min_inst_def30 >= 3 AND (min_inst_def30 = 3 or min_inst_def30 = 2 or min_inst_def30 = 1) THEN (select max(disbursedloanamount) from `risk_credit_mis.loan_master_table` where loanAccountNumber = a1.loanAccountNumber) else 0 END) as defFSTPD30_vol
    FROM `risk_credit_mis.loan_deliquency_data` a1 
    GROUP BY 1
  ) d
ON a.loanAccountNumber = d.loanAccountNumber
where 
a.rnk = 1
and a.LoanProduct in ('Quick', 'SIL-Instore')
)
select * from base 
--  where digitalLoanAccountId = '42c268b9-1fe7-445c-b459-8c66d0483884'
-- select digitalLoanAccountId, count(digitalLoanAccountId) cnt from base group by 1 having count(digitalLoanAccountId) > 1
-- select uniquekey, count(uniquekey) from base group by 1 having count(uniquekey) > 1
-- and 
-- uniquekey = 'c7948327-e6a6-46ee-96fc-66c1b3b56f93taran-2629742-80860572024-07-05302970230'
;
"""
df = client.query(sq).to_dataframe(progress_bar_type = 'tqdm')
df.to_csv(r"C:\Users\DwaipayanChakroborti\OneDrive - Tonik Financial Pte Ltd\MyStuff\Biswa\CIC Data Analysis\CICModel\DataPreparation\Data\cicfinaldataset20240807_v3.csv", index = False)
print(f"The rows and columns in cic final dataset are:\t {df.shape}")
pd.set_option("display.max_columns", None)
df.head(10)


Job ID 1a14c00f-1d37-4caf-9b27-035ac2386387 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The rows and columns in cic final dataset are:	 (245037, 112)


Unnamed: 0,uniquekey,digitalLoanAccountId,customerId,Firstname,middleName,LastName,loanAccountNumber,flagDisbursement,disbursementDateTime,termsAndConditionsSubmitDateTime,natureofwork,subIndustryDescription,industryDescription,LoanProduct,crifApplicationId,processEngineGuid,requestGuid,ContractHistoryType,CBContractCode,ContractEndDate,ContractPhase,ContractPhaseDesc,ContractStartDate,ContractStatus,ContractStatusDesc,ContractType,ContractTypeDesc,Currency,CurrencyDesc,LastUpdateDate,OriginalCurrency,OriginalCurrencyDesc,ProviderCodeEncrypted,ProviderContractNo,ReferenceNo,Role,RoleDesc,BilledAmount,BoardResolutionFlag,BoardResolutionFlagDesc,CancellationDate,CardReferenceCode,ChargedAmount,CreditLimit,CreditPurpose,CreditPurposeDesc,FinancedAmount,FirstPaymentDate,FlagCardUsed,HolderLiability,HolderLiabilityDesc,InstallmentType,InstallmentTypeDesc,InstallmentsNumber,LastChargeDate,LastPaymentAmount,LastPaymentDate,MinPaymentIndicator,MinPaymentIndicatorDesc,MinPaymentPercentage,MonthlyPaymentAmount,NextPayment,NextPaymentDate,OutstandingBalance,OutstandingBalanceUnbilled,OutstandingPaymentsNumber,OverallCreditLimit,OverdueDays,OverdueDaysDesc,OverduePaymentsAmount,OverduePaymentsNumber,PaymentMethod,PaymentMethodDesc,PaymentPeriodicity,PaymentPeriodicityDesc,PremiumCard,PremiumCardDesc,ReorganizedCreditCode,ReorganizedCreditCodeDesc,ServicesLinesNo,TimesCardUsed,TransactionType,TransactionTypeDesc,Utilization,LinkedSubject_CBSubjectCode,LinkedSubject_Name,LinkedSubject_Role,LinkedSubject_RoleDesc,Note_TypeDesc,Note_Text,Note_Type,run_date,ContractRequestDate,SOURCE,Repaymentcategory,BusinessType,loan_segment,AnnualMonthlyIndicator,Currency_1,DateHiredFrom,DateHiredTo,GrossIncome,MonthlyIncome,AnnualIncome,OccupationDesc,OccupationStatusDesc,PSIC,PSICDesc,rnk,obsFSPD30,defFSPD30,targetdataselectiontype
0,000d5b99-ffd3-45ad-b650-32fe43a95dc7taran-2614...,000d5b99-ffd3-45ad-b650-32fe43a95dc7,2614354,NENITA,UMALI,CAIDIC,60826143540016,1,2024-06-27 16:59:48,2024-06-27 16:55:44,,,,SIL-Instore,taran-2614354-9950345,9be21cf75100ec9fe03b3f154c42f1fe0bfc7364,16b6e2be-fd2d-4b5f-87cb-01e72ffbb5ab,CreditCards,O04057747,,AC,Active,2018-06-23,,,31,Credit Card,PHP,Philippine peso,2022-07-31,PHP,Philippine peso,CC005,,1,B,Borrower,,,,,,15446.0,167000.0,,,,,,,,V,variable,,2022-05-11,103177.0,2022-07-29,,,,,850.0,2022-08-01,8655.0,7167.0,,,0,Paid as agreed / Current,0.0,0.0,CCR,Credit card payment,M,monthly installments-30 days,,,0.0,Credit is not re-organized,,0.0,PCC,PRIMARY CREDIT CARD,,,,,,,,,2024-06-27,,granted,Neutral,B2C,Credit Cards,Y,PHP,,,360000.0,30000.0,360000.0,,Self Employed,,,1,0,0,Other
1,0010fa50-dedd-471b-93b8-7e2cabf2b71ftonik-bnpl...,0010fa50-dedd-471b-93b8-7e2cabf2b71f,2383606,EDILBERTO,GARCIA,DELA CRUZ,60823836060011,1,2024-01-29 16:06:12,2024-01-29 16:03:47,Sales/Marketing Professional,Others,Services,SIL-Instore,tonik-bnpl-437300,effb4cb0-be7c-11ee-b07b-0242ace6000f,efdd6470-be7c-11ee-82ea-0242ace60004,Installments,K02698546,2018-08-23,CL,Closed,2017-11-15,,,12,Personal Loan,PHP,Philippine peso,2018-08-31,PHP,Philippine peso,PF002,,1,B,Borrower,,0.0,No,,,,,,,4029.0,,,,,,,9.0,,782.0,2018-08-22,,,,582.0,0.0,,0.0,,0.0,,0,Paid as agreed / Current,0.0,0.0,,,M,monthly installments-30 days,,,0.0,Credit is not re-organized,,,,NOT APPLICABLE,,,,,,,,,2024-01-29,,granted,Good,B2C,Personal Loans,,,,,,0.0,0.0,,,,,1,1,0,Train_Validation
2,00437977-13bc-4093-ae5d-a596cabe6c5btaran-2610...,00437977-13bc-4093-ae5d-a596cabe6c5b,2610898,BEVERLY JOY,ARCENAS,VARGAS,60826108980017,1,2024-06-26 13:19:02,2024-06-26 13:15:13,Other Non Professional Services,Hotel / Resort / Lodge,Services,SIL-Instore,taran-2610898-2682737,9be21cf75100ec9fe03b3f154c42f1fe0bfc7364,1b90657d-5e5f-4d4b-abe5-1fdef3a09d35,CreditCards,200967832,2020-11-21,AC,Active,2018-11-19,,,32,Credit Card - Shared Limit,PHP,Philippine peso,2020-11-30,PHP,Philippine peso,UB002,,1,B,Borrower,,,,,8003671.0,,20000.0,,,,,,,,V,variable,,,0.0,,,,,,0.0,,0.0,,,,0,Paid as agreed / Current,0.0,0.0,,,M,monthly installments-30 days,,,,,,,PCC,PRIMARY CREDIT CARD,,,,,,,,,2024-06-26,,granted,Neutral,B2C,Credit Cards,,,,,,0.0,0.0,,Permanent Job (Private sector),,,1,0,0,Other
3,009a4dd3-534c-4314-9ad2-4f0b2aedc3cetaran-2621...,009a4dd3-534c-4314-9ad2-4f0b2aedc3ce,2621550,JOSEPH,CANESO,GARSULA,60826215500011,1,2024-06-30 11:54:10,2024-06-30 11:40:15,Other Professional services,Hotel / Resort / Lodge,Services,SIL-Instore,taran-2621550-2754203,9be21cf75100ec9fe03b3f154c42f1fe0bfc7364,df6d1a82-1113-4acd-a802-02cd96d8311e,,N09032213,,RQ,Requested,,,,12,Personal Loan,,,2024-05-01,,,PF002,,1,B,Borrower,,,,,,,,,,9.0,,,,,,,9.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2024-06-30,2024-05-01,nongranted,Unknown,B2C,Personal Loans,,,,,,0.0,0.0,,,,,1,0,0,Other
4,00bc1313-97ba-4525-a357-63f3e4f03fd5tonik-tul-...,00bc1313-97ba-4525-a357-63f3e4f03fd5,2475312,CRISTINE JOY,GODINEZ,TESALUNA,60824753120018,1,2024-04-21 19:33:09,2024-04-21 18:53:18,IT Professional,Call Centre / BPO,Services,Quick,tonik-tul-494838,5db67040-ffcd-11ee-a41d-0242ace60015,5d8cef40-ffcd-11ee-a4cc-0242ace60008,Installments,503557137,2021-10-05,AC,Active,2021-09-06,,,12,Personal Loan,PHP,Philippine peso,2021-09-30,PHP,Philippine peso,PF003,,1,B,Borrower,,,,,,,,31.0,Loans to Individual for Consumption Purposes -...,780.0,2021-10-05,,,,,,1.0,,0.0,,,,,811.0,,,811.0,,1.0,,N,Too new to be rated / Not Available,0.0,0.0,OTH,Other,M,monthly installments-30 days,,,,,,,,NOT APPLICABLE,,,,,,,,,2024-04-21,,granted,Neutral,B2C,Personal Loans,,,,,,0.0,0.0,,,,,1,1,0,Test
5,00da0f6e-06ad-4477-9404-7a0b7c5edd14tonik-tul-...,00da0f6e-06ad-4477-9404-7a0b7c5edd14,1538747,SHARON MYRNA,AGUSTIN,LOPEZ,60815387470021,1,2023-10-09 19:47:46,2023-10-09 19:11:53,Admin/Secretarial,Call Centre / BPO,Services,Quick,tonik-tul-362449,a7c96250-6694-11ee-b07b-0242ace6000f,a7a7a980-6694-11ee-8875-0242ace60004,Installments,K04899295,2022-01-05,CL,Closed,2021-12-19,,,12,Personal Loan,PHP,Philippine peso,2022-01-31,PHP,Philippine peso,PF004,,1,B,Borrower,,,,,,,,31.0,Loans to Individual for Consumption Purposes -...,623.0,2022-01-05,,,,,,1.0,,648.0,2022-01-05,,,,648.0,,,0.0,,0.0,,0,Paid as agreed / Current,0.0,0.0,OTH,Other,M,monthly installments-30 days,,,,,,,,NOT APPLICABLE,,,,,,,,,2023-10-09,,granted,Good,B2C,Personal Loans,,,,,,0.0,0.0,,Other,,,1,1,0,Train_Validation
6,01051743-8bc0-4d7c-9270-e928d0c15e40tonik-bnpl...,01051743-8bc0-4d7c-9270-e928d0c15e40,2259838,ZENAIDA,CAROSOS,CALIZO,60822598380017,1,2023-10-04 17:37:56,2023-10-04 17:24:15,,,,SIL-Instore,tonik-bnpl-358727,cac0f0d0-6297-11ee-a41d-0242ace60015,ca99b9c0-6297-11ee-be29-0242ace60008,,M05252504,,RQ,Requested,,,,12,Personal Loan,,,2023-10-04,,,PF002,,1,B,Borrower,,,,,,,,,,13214.0,,,,,,,6.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2023-10-04,2023-10-04,nongranted,Unknown,B2C,Personal Loans,,,,,,0.0,0.0,,,,,1,1,0,Train_Validation
7,01054836-b41f-444e-aac2-41e47dd3a425taran-2594...,01054836-b41f-444e-aac2-41e47dd3a425,2594438,CRISTIAN,GOROZA,DELA CRUZ,60825944380013,1,2024-06-20 12:21:59,2024-06-20 12:16:25,Govt Employee,Government Department / Institutions,Government Institution,SIL-Instore,taran-2594438-9992349,78688bf241a2619a5b43e9caf60995d610ea94fe,d02fb454-3f4e-4b15-8d0d-9529a13c50d4,,806987989,,RQ,Requested,,,,12,Personal Loan,,,2024-05-15,,,PF002,,1,B,Borrower,,,,,,,,,,1800.0,,,,,,,6.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2024-06-20,2024-05-15,nongranted,Unknown,B2C,Personal Loans,,,,,,0.0,0.0,,,,,1,0,0,Other
8,01140bfa-041b-4896-ad7a-a02c682a77aatonik-tul-...,01140bfa-041b-4896-ad7a-a02c682a77aa,2181158,ARVIN JOHN,TAMAYO,LUCAS,60821811580017,1,2024-06-12 14:04:59,2024-06-07 08:20:05,Engineer,IT / Tech Consultancy / Projects / AI ML Solut...,Technology,Quick,tonik-tul-546058,b1972690-2463-11ef-a41d-0242ace60015,b16d0950-2463-11ef-a494-0242ace60008,,G01685345,,RQ,Requested,,,,20,Salary loan,,,2019-09-15,,,PF002,,1,B,Borrower,,,,,,,,,,,,,,,,,10.0,,,,,,,210000.0,,,,,,,,,,,,,D,Daily installments-1 day,,,,,,,,,,,,,,,,,2024-06-07,2019-09-15,nongranted,Unknown,B2C,Other Loans,,,,,,,,,,,,1,0,0,Other
9,01456da9-4435-4077-a199-15ba238dd709tonik-bnpl...,01456da9-4435-4077-a199-15ba238dd709,2543137,MYRNA,MENDOZA,MERGINIO,60825431370014,1,2024-05-31 19:37:43,2024-05-31 19:27:50,Owner,Others,Services,SIL-Instore,tonik-bnpl-537730,d1973390-1f40-11ef-a41d-0242ace60015,d16b4190-1f40-11ef-a494-0242ace60008,CreditCards,E03357822,,AC,Active,2021-01-14,,,31,Credit Card,PHP,Philippine peso,2024-03-31,PHP,Philippine peso,PF002,,1,B,Borrower,,,,,,,15000.0,,,,,,,,V,variable,,,988.0,,,,,,0.0,,0.0,,,,0,Paid as agreed / Current,0.0,0.0,,,M,monthly installments-30 days,,,,,,,PCC,PRIMARY CREDIT CARD,,,,,,,,,2024-05-31,,granted,Neutral,B2C,Credit Cards,,,,,,0.0,0.0,,,,,1,0,0,Test


In [3]:
df['disbursementDateTime'][df['obsFSPD30']==1].max()

Timestamp('2024-05-07 20:09:44')

In [4]:
import pandas as pd

# Assuming df is your original DataFrame

# Calculate duplicate counts
duplicate_counts = df.groupby('targetdataselectiontype')['digitalLoanAccountId'].apply(lambda x: x.duplicated().sum()).reset_index(name='duplicate_count')

# Count total unique digitalLoanAccountId
unique_counts = df.groupby('targetdataselectiontype')['digitalLoanAccountId'].nunique().reset_index(name='unique_count')

# Count where defFSPD30 = 1 and obsFSPD30 = 1
def_count = df[df['defFSPD30'] == 1].groupby('targetdataselectiontype')['digitalLoanAccountId'].nunique().reset_index(name='defFSPD30_count')
obs_count = df[df['obsFSPD30'] == 1].groupby('targetdataselectiontype')['digitalLoanAccountId'].nunique().reset_index(name='obsFSPD30_count')

# Sum defFSPD30 and obsFSPD30
sum_values = df.groupby('targetdataselectiontype')[['defFSPD30', 'obsFSPD30']].sum().reset_index()

# Merge all the dataframes
merged_df = pd.merge(unique_counts, duplicate_counts, on='targetdataselectiontype')
merged_df = pd.merge(merged_df, def_count, on='targetdataselectiontype', how='left')
merged_df = pd.merge(merged_df, obs_count, on='targetdataselectiontype', how='left')
merged_df = pd.merge(merged_df, sum_values, on='targetdataselectiontype', how='left')

# Fill NaN values with 0 for count columns
merged_df[['defFSPD30_count', 'obsFSPD30_count']] = merged_df[['defFSPD30_count', 'obsFSPD30_count']].fillna(0)

# Calculate ratios
merged_df['FSPD30_ratio'] = merged_df['defFSPD30'] / merged_df['obsFSPD30']
merged_df['FSPD30_percentage'] = merged_df['FSPD30_ratio'].apply(lambda x: f"{x:.2%}")

# Create the pivot table
pivot_table = pd.pivot_table(
    merged_df,
    values=['unique_count', 'duplicate_count', 'defFSPD30_count', 'obsFSPD30_count', 'defFSPD30', 'obsFSPD30', 'FSPD30_ratio'],
    index=['targetdataselectiontype'],
    aggfunc='sum',
    margins=True,
    margins_name='Total'
)

# Format the percentage column
pivot_table['FSPD30_percentage'] = pivot_table['FSPD30_ratio'].apply(lambda x: f"{x:.2%}")

# Reorder columns if desired
pivot_table = pivot_table[['unique_count', 'duplicate_count', 'defFSPD30_count', 'obsFSPD30_count', 'defFSPD30', 'obsFSPD30', 'FSPD30_ratio', 'FSPD30_percentage']]

# Display the pivot table
pivot_table

Unnamed: 0_level_0,unique_count,duplicate_count,defFSPD30_count,obsFSPD30_count,defFSPD30,obsFSPD30,FSPD30_ratio,FSPD30_percentage
targetdataselectiontype,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Other,8674,40863,0.0,0.0,0,0,,nan%
Test,11710,50371,583.0,5012.0,2453,25566,0.095948,9.59%
Train_Validation,25200,108219,3451.0,25200.0,17902,133419,0.134179,13.42%
Total,45584,199453,4034.0,30212.0,20355,158985,,nan%


In [5]:
sq = """
select * from prj-prod-dataplatform.risk_credit_cic_data.cic_summary
;"""

dfd = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The rows and columns of cicsummary are:\t {dfd.shape}")

Job ID 2b1f4922-3e51-4d55-9ec0-69866f3be41a successfully executed: |[32m          [0m| 
Downloading: 100%|[32m██████████[0m|
The rows and columns of cicsummary are:	 (427136, 93)


In [6]:
# Merge both the data based on digitalLoanAccountId
dfd = dfd[['digitalLoanAccountId','DescisionValue', 'exclusionRuleLabelDescription', 'ScoreRaw', 'ScoreRange',
       'Accounts30Days', 'Accounts30DaysOther', 'Accounts60Days',
       'Accounts60DaysOther', 'Accounts90Days', 'Accounts90DaysOther',
       'AgeAtLoanTermination', 'AgeAtLoanTerminationMax',
       'CreditAvgCreditLimit', 'CreditHighestCreditLimit',
       'CreditMaxOutstandingBalance', 'CreditMaxOutstandingBalanceDate',
       'CreditMaxOverdueAmount', 'CreditMaxOverdueAmountDate',
       'CreditMaxOverdueDays', 'CreditMaxOverdueDaysDate',
       'CreditNumberOfContracts', 'CreditTotalCreditLimit',
       'CreditTotalOutstandingBalanceAmount', 'CreditTotalOverdueAmount', 'InstMaxOverdueAmount', 'InstMaxOverdueAmountDate',
       'InstMaxOverdueDays', 'InstMaxOverdueDaysDate', 'InstNumberOfContracts',
       'InstTotalMonthlyPaymentsAmount', 'InstTotalOutstandingBalanceAmount',
       'InstTotalOverdueAmount', 'LoanToValue', 'MonthsOfBooks',
       'MonthsOfBooksOther', 'NonInstAvgCreditLimit',
       'NonInstHighestCreditLimit', 'NonInstNumberOfContracts',
       'NonInstTotalCreditLimit', 'NonInstTotalOverdraftAmount',
       'NonInstTotalUtilization', 'NumberOfContract', 'NumberOfContractAC',
       'NumberOfContractACOther', 'NumberOfContractCLCA',
       'NumberOfContractCLCAOther', 'NumberOfContractOther',
       'NumberOfContractRF', 'NumberOfContractRFOther', 'NumberOfContractRN',
       'NumberOfContractRNOther', 'NumberOfContractRQ',
       'NumberOfContractRQOther', 'OverlimitFlag', 'OverlimitFlagOther',
       'ReportingProvidersNumber', 'ReportingProvidersNumberOther',
       'SubjectEventDate', 'SubjectInfoTypeCode', 'SubjectInfoTypeCodeDesc',
       'TotalOutstanding', 'TotalOutstandingOther', 'UtilityMaxBilledAmount',
       'UtilityMaxBilledAmountDate', 'UtilityMaxOutstandingBalance',
       'UtilityMaxOutstandingBalanceDate', 'UtilityMaxOverdueAmount',
       'UtilityMaxOverdueAmountDate', 'UtilityMaxOverdueDays',
       'UtilityMaxOverdueDaysDate', 'UtilityNumberOfContracts',
       'UtilityTotalBilledAmount', 'UtilityTotalOutstandingBalanceAmount',
       'UtilityTotalOverdueAmount', 'UtilizationRateCreditCard',
       'UtilizationRateCreditCardOther', 'UtilizationRateNonInstallment',
       'UtilizationRateNonInstallmentOther','scoreLabelDesc']].copy()



In [7]:
# Assuming you have a dataframe called 'df'
duplicate_rows = dfd[dfd.duplicated()]

# To see all duplicate rows, including the first occurrence:
all_duplicates = dfd[dfd.duplicated(keep=False)]

print(f"The number of duplicate rows are:\t{len(duplicate_rows)}")

The number of duplicate rows are:	68


In [8]:
# Remove duplicates, keeping the last occurrence
df_cleaned = dfd.drop_duplicates(keep='last')

print(f"Original dataframe shape: {dfd.shape}")
print(f"Cleaned dataframe shape: {df_cleaned.shape}")

Original dataframe shape: (427136, 80)
Cleaned dataframe shape: (427068, 80)


In [9]:
mergeddf = pd.merge(df, df_cleaned, left_on='digitalLoanAccountId', right_on='digitalLoanAccountId', how = 'left')
mergeddf.head()

Unnamed: 0,uniquekey,digitalLoanAccountId,customerId,Firstname,middleName,LastName,loanAccountNumber,flagDisbursement,disbursementDateTime,termsAndConditionsSubmitDateTime,natureofwork,subIndustryDescription,industryDescription,LoanProduct,crifApplicationId,processEngineGuid,requestGuid,ContractHistoryType,CBContractCode,ContractEndDate,ContractPhase,ContractPhaseDesc,ContractStartDate,ContractStatus,ContractStatusDesc,ContractType,ContractTypeDesc,Currency,CurrencyDesc,LastUpdateDate,OriginalCurrency,OriginalCurrencyDesc,ProviderCodeEncrypted,ProviderContractNo,ReferenceNo,Role,RoleDesc,BilledAmount,BoardResolutionFlag,BoardResolutionFlagDesc,CancellationDate,CardReferenceCode,ChargedAmount,CreditLimit,CreditPurpose,CreditPurposeDesc,FinancedAmount,FirstPaymentDate,FlagCardUsed,HolderLiability,HolderLiabilityDesc,InstallmentType,InstallmentTypeDesc,InstallmentsNumber,LastChargeDate,LastPaymentAmount,LastPaymentDate,MinPaymentIndicator,MinPaymentIndicatorDesc,MinPaymentPercentage,MonthlyPaymentAmount,NextPayment,NextPaymentDate,OutstandingBalance,OutstandingBalanceUnbilled,OutstandingPaymentsNumber,OverallCreditLimit,OverdueDays,OverdueDaysDesc,OverduePaymentsAmount,OverduePaymentsNumber,PaymentMethod,PaymentMethodDesc,PaymentPeriodicity,PaymentPeriodicityDesc,PremiumCard,PremiumCardDesc,ReorganizedCreditCode,ReorganizedCreditCodeDesc,ServicesLinesNo,TimesCardUsed,TransactionType,TransactionTypeDesc,Utilization,LinkedSubject_CBSubjectCode,LinkedSubject_Name,LinkedSubject_Role,LinkedSubject_RoleDesc,Note_TypeDesc,Note_Text,Note_Type,run_date,ContractRequestDate,SOURCE,Repaymentcategory,BusinessType,loan_segment,AnnualMonthlyIndicator,Currency_1,DateHiredFrom,DateHiredTo,GrossIncome,MonthlyIncome,AnnualIncome,OccupationDesc,OccupationStatusDesc,PSIC,PSICDesc,rnk,obsFSPD30,defFSPD30,targetdataselectiontype,DescisionValue,exclusionRuleLabelDescription,ScoreRaw,ScoreRange,Accounts30Days,Accounts30DaysOther,Accounts60Days,Accounts60DaysOther,Accounts90Days,Accounts90DaysOther,AgeAtLoanTermination,AgeAtLoanTerminationMax,CreditAvgCreditLimit,CreditHighestCreditLimit,CreditMaxOutstandingBalance,CreditMaxOutstandingBalanceDate,CreditMaxOverdueAmount,CreditMaxOverdueAmountDate,CreditMaxOverdueDays,CreditMaxOverdueDaysDate,CreditNumberOfContracts,CreditTotalCreditLimit,CreditTotalOutstandingBalanceAmount,CreditTotalOverdueAmount,InstMaxOverdueAmount,InstMaxOverdueAmountDate,InstMaxOverdueDays,InstMaxOverdueDaysDate,InstNumberOfContracts,InstTotalMonthlyPaymentsAmount,InstTotalOutstandingBalanceAmount,InstTotalOverdueAmount,LoanToValue,MonthsOfBooks,MonthsOfBooksOther,NonInstAvgCreditLimit,NonInstHighestCreditLimit,NonInstNumberOfContracts,NonInstTotalCreditLimit,NonInstTotalOverdraftAmount,NonInstTotalUtilization,NumberOfContract,NumberOfContractAC,NumberOfContractACOther,NumberOfContractCLCA,NumberOfContractCLCAOther,NumberOfContractOther,NumberOfContractRF,NumberOfContractRFOther,NumberOfContractRN,NumberOfContractRNOther,NumberOfContractRQ,NumberOfContractRQOther,OverlimitFlag,OverlimitFlagOther,ReportingProvidersNumber,ReportingProvidersNumberOther,SubjectEventDate,SubjectInfoTypeCode,SubjectInfoTypeCodeDesc,TotalOutstanding,TotalOutstandingOther,UtilityMaxBilledAmount,UtilityMaxBilledAmountDate,UtilityMaxOutstandingBalance,UtilityMaxOutstandingBalanceDate,UtilityMaxOverdueAmount,UtilityMaxOverdueAmountDate,UtilityMaxOverdueDays,UtilityMaxOverdueDaysDate,UtilityNumberOfContracts,UtilityTotalBilledAmount,UtilityTotalOutstandingBalanceAmount,UtilityTotalOverdueAmount,UtilizationRateCreditCard,UtilizationRateCreditCardOther,UtilizationRateNonInstallment,UtilizationRateNonInstallmentOther,scoreLabelDesc
0,000d5b99-ffd3-45ad-b650-32fe43a95dc7taran-2614...,000d5b99-ffd3-45ad-b650-32fe43a95dc7,2614354,NENITA,UMALI,CAIDIC,60826143540016,1,2024-06-27 16:59:48,2024-06-27 16:55:44,,,,SIL-Instore,taran-2614354-9950345,9be21cf75100ec9fe03b3f154c42f1fe0bfc7364,16b6e2be-fd2d-4b5f-87cb-01e72ffbb5ab,CreditCards,O04057747,,AC,Active,2018-06-23,,,31,Credit Card,PHP,Philippine peso,2022-07-31,PHP,Philippine peso,CC005,,1,B,Borrower,,,,,,15446.0,167000.0,,,,,,,,V,variable,,2022-05-11,103177.0,2022-07-29,,,,,850.0,2022-08-01,8655.0,7167.0,,,0,Paid as agreed / Current,0.0,0.0,CCR,Credit card payment,M,monthly installments-30 days,,,0.0,Credit is not re-organized,,0.0,PCC,PRIMARY CREDIT CARD,,,,,,,,,2024-06-27,,granted,Neutral,B2C,Credit Cards,Y,PHP,,,360000.0,30000,360000,,Self Employed,,,1,0,0,Other,,,440.0,Ei,0,0,0,0,0,0,0,52,69052,167000,108457,202401.0,2582,201903.0,1.0,201903.0,19,1312000,703979,0,0,202401.0,0.0,202401.0,1,2481,12405,0,0.0,89,89,7000,10000,2,14000,0,5239,27,22,22,5,5,27,0,0,0,0,0,0,False,False,10,10,2021-12-02,P,Frauds,721623,721623,0,,0,,0,,,,0,0,0,0,0,0,0,0,Medium Risk
1,0010fa50-dedd-471b-93b8-7e2cabf2b71ftonik-bnpl...,0010fa50-dedd-471b-93b8-7e2cabf2b71f,2383606,EDILBERTO,GARCIA,DELA CRUZ,60823836060011,1,2024-01-29 16:06:12,2024-01-29 16:03:47,Sales/Marketing Professional,Others,Services,SIL-Instore,tonik-bnpl-437300,effb4cb0-be7c-11ee-b07b-0242ace6000f,efdd6470-be7c-11ee-82ea-0242ace60004,Installments,K02698546,2018-08-23,CL,Closed,2017-11-15,,,12,Personal Loan,PHP,Philippine peso,2018-08-31,PHP,Philippine peso,PF002,,1,B,Borrower,,0.0,No,,,,,,,4029.0,,,,,,,9.0,,782.0,2018-08-22,,,,582.0,0.0,,0.0,,0.0,,0,Paid as agreed / Current,0.0,0.0,,,M,monthly installments-30 days,,,0.0,Credit is not re-organized,,,,NOT APPLICABLE,,,,,,,,,2024-01-29,,granted,Good,B2C,Personal Loans,,,,,,0,0,,,,,1,1,0,Train_Validation,Review,The customer has been inactive from last 18 mo...,,,0,0,0,0,0,0,47,47,0,0,0,,0,,,,0,0,0,0,1186,202007.0,1.0,202007.0,0,0,0,0,0.0,0,0,0,0,0,0,0,0,3,0,0,2,2,3,0,0,0,0,1,1,False,False,2,2,,,,0,0,0,,0,,0,,,,0,0,0,0,0,0,0,0,
2,00437977-13bc-4093-ae5d-a596cabe6c5btaran-2610...,00437977-13bc-4093-ae5d-a596cabe6c5b,2610898,BEVERLY JOY,ARCENAS,VARGAS,60826108980017,1,2024-06-26 13:19:02,2024-06-26 13:15:13,Other Non Professional Services,Hotel / Resort / Lodge,Services,SIL-Instore,taran-2610898-2682737,9be21cf75100ec9fe03b3f154c42f1fe0bfc7364,1b90657d-5e5f-4d4b-abe5-1fdef3a09d35,CreditCards,200967832,2020-11-21,AC,Active,2018-11-19,,,32,Credit Card - Shared Limit,PHP,Philippine peso,2020-11-30,PHP,Philippine peso,UB002,,1,B,Borrower,,,,,8003671.0,,20000.0,,,,,,,,V,variable,,,0.0,,,,,,0.0,,0.0,,,,0,Paid as agreed / Current,0.0,0.0,,,M,monthly installments-30 days,,,,,,,PCC,PRIMARY CREDIT CARD,,,,,,,,,2024-06-26,,granted,Neutral,B2C,Credit Cards,,,,,,0,0,,Permanent Job (Private sector),,,1,0,0,Other,,The customer has been inactive from last 18 mo...,,,0,0,1,1,0,0,0,34,20000,20000,7835,202201.0,1000,202012.0,2.0,202201.0,3,60000,10335,607,0,,,,0,0,0,0,0.0,67,67,0,0,0,0,0,0,3,3,3,0,0,3,0,0,0,0,0,0,False,False,2,2,,,,10335,10335,0,,0,,0,,,,0,0,0,0,0,0,0,0,
3,009a4dd3-534c-4314-9ad2-4f0b2aedc3cetaran-2621...,009a4dd3-534c-4314-9ad2-4f0b2aedc3ce,2621550,JOSEPH,CANESO,GARSULA,60826215500011,1,2024-06-30 11:54:10,2024-06-30 11:40:15,Other Professional services,Hotel / Resort / Lodge,Services,SIL-Instore,taran-2621550-2754203,9be21cf75100ec9fe03b3f154c42f1fe0bfc7364,df6d1a82-1113-4acd-a802-02cd96d8311e,,N09032213,,RQ,Requested,,,,12,Personal Loan,,,2024-05-01,,,PF002,,1,B,Borrower,,,,,,,,,,9.0,,,,,,,9.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2024-06-30,2024-05-01,nongranted,Unknown,B2C,Personal Loans,,,,,,0,0,,,,,1,0,0,Other,,The customer has no minimum scoring criteria,,,0,0,0,0,0,0,0,39,0,0,0,,0,,,,0,0,0,0,0,,,,0,0,0,0,0.0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,1,False,False,1,1,,,,0,0,0,,0,,0,,,,0,0,0,0,0,0,0,0,
4,00bc1313-97ba-4525-a357-63f3e4f03fd5tonik-tul-...,00bc1313-97ba-4525-a357-63f3e4f03fd5,2475312,CRISTINE JOY,GODINEZ,TESALUNA,60824753120018,1,2024-04-21 19:33:09,2024-04-21 18:53:18,IT Professional,Call Centre / BPO,Services,Quick,tonik-tul-494838,5db67040-ffcd-11ee-a41d-0242ace60015,5d8cef40-ffcd-11ee-a4cc-0242ace60008,Installments,503557137,2021-10-05,AC,Active,2021-09-06,,,12,Personal Loan,PHP,Philippine peso,2021-09-30,PHP,Philippine peso,PF003,,1,B,Borrower,,,,,,,,31.0,Loans to Individual for Consumption Purposes -...,780.0,2021-10-05,,,,,,1.0,,0.0,,,,,811.0,,,811.0,,1.0,,N,Too new to be rated / Not Available,0.0,0.0,OTH,Other,M,monthly installments-30 days,,,,,,,,NOT APPLICABLE,,,,,,,,,2024-04-21,,granted,Neutral,B2C,Personal Loans,,,,,,0,0,,,,,1,1,0,Test,Review,,440.0,Ei,0,0,0,0,0,0,40,41,0,0,0,,0,,,,0,0,0,0,0,202401.0,0.0,202401.0,11,25827,69567,0,0.0,50,50,20000,30000,2,40000,0,35334,48,13,13,33,33,48,0,0,0,0,2,2,False,False,4,4,,,,104901,104901,0,,0,,0,,,,0,0,0,0,0,0,0,0,Medium Risk


In [10]:
# 1. Check for duplicate rows
duplicates = mergeddf.duplicated()

# 2. Get the number of duplicate rows
num_duplicates = duplicates.sum()

print(f"Number of duplicate rows: {num_duplicates}")

# 3. View the duplicate rows
print("\nDuplicate rows:")
print(mergeddf[duplicates])

Number of duplicate rows: 0

Duplicate rows:
Empty DataFrame
Columns: [uniquekey, digitalLoanAccountId, customerId, Firstname, middleName, LastName, loanAccountNumber, flagDisbursement, disbursementDateTime, termsAndConditionsSubmitDateTime, natureofwork, subIndustryDescription, industryDescription, LoanProduct, crifApplicationId, processEngineGuid, requestGuid, ContractHistoryType, CBContractCode, ContractEndDate, ContractPhase, ContractPhaseDesc, ContractStartDate, ContractStatus, ContractStatusDesc, ContractType, ContractTypeDesc, Currency, CurrencyDesc, LastUpdateDate, OriginalCurrency, OriginalCurrencyDesc, ProviderCodeEncrypted, ProviderContractNo, ReferenceNo, Role, RoleDesc, BilledAmount, BoardResolutionFlag, BoardResolutionFlagDesc, CancellationDate, CardReferenceCode, ChargedAmount, CreditLimit, CreditPurpose, CreditPurposeDesc, FinancedAmount, FirstPaymentDate, FlagCardUsed, HolderLiability, HolderLiabilityDesc, InstallmentType, InstallmentTypeDesc, InstallmentsNumber, LastC

In [11]:
mergeddf.to_csv(r"C:\Users\DwaipayanChakroborti\OneDrive - Tonik Financial Pte Ltd\MyStuff\Biswa\CIC Data Analysis\CICModel\DataPreparation\Data\CICFinalDataSetWithCICSummaryData20240807_v3.csv", index = False)