# <div align="center" style="color: #ff5733;">CIC Raw Data Preparation</div>

In [2]:
# %% [markdown]
# # Jupyter Notebook Loading Header
#
# This is a custom loading header for Jupyter Notebooks in Visual Studio Code.
# It includes common imports and settings to get you started quickly.

# %% [markdown]
## Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.cloud import bigquery
import os
path = r'C:\Users\DwaipayanChakroborti\AppData\Roaming\gcloud\legacy_credentials\dchakroborti@tonikbank.com\adc.json'
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = path
client = bigquery.Client(project='prj-prod-dataplatform')

# %% [markdown]
## Configure Settings
# Set options or configurations as needed
# Example: pd.set_option('display.max_columns', None)

In [3]:
sq = """
WITH
  CICBaseTable AS ( -- Query FOR dfgranted
  SELECT
    digitalLoanAccountId,
    crifApplicationId,
    customerId,
    processEngineGuid,
    requestGuid,
    ContractHistoryType,
    CBContractCode,
    ContractEndDate,
    ContractPhase,
    ContractPhaseDesc,
    ContractStartDate,
    ContractStatus,
    ContractStatusDesc,
    ContractType,
    ContractTypeDesc,
    Currency,
    CurrencyDesc,
    LastUpdateDate,
    OriginalCurrency,
    OriginalCurrencyDesc,
    ProviderCodeEncrypted,
    ProviderContractNo,
    ReferenceNo,
    Role,
    RoleDesc,
    BilledAmount,
    BoardResolutionFlag,
    BoardResolutionFlagDesc,
    CancellationDate,
    CardReferenceCode,
    ChargedAmount,
    CreditLimit,
    CreditPurpose,
    CreditPurposeDesc,
    FinancedAmount,
    FirstPaymentDate,
    FlagCardUsed,
    HolderLiability,
    HolderLiabilityDesc,
    InstallmentType,
    InstallmentTypeDesc,
    InstallmentsNumber,
    LastChargeDate,
    LastPaymentAmount,
    LastPaymentDate,
    MinPaymentIndicator,
    MinPaymentIndicatorDesc,
    MinPaymentPercentage,
    MonthlyPaymentAmount,
    NextPayment,
    NextPaymentDate,
    OutstandingBalance,
    OutstandingBalanceUnbilled,
    OutstandingPaymentsNumber,
    OverallCreditLimit,
    OverdueDays,
    OverdueDaysDesc,
    OverduePaymentsAmount,
    OverduePaymentsNumber,
    PaymentMethod,
    PaymentMethodDesc,
    PaymentPeriodicity,
    PaymentPeriodicityDesc,
    PremiumCard,
    PremiumCardDesc,
    ReorganizedCreditCode,
    ReorganizedCreditCodeDesc,
    ServicesLinesNo,
    TimesCardUsed,
    TransactionType,
    TransactionTypeDesc,
    Utilization,
    LinkedSubject_CBSubjectCode,
    LinkedSubject_Name,
    LinkedSubject_Role,
    LinkedSubject_RoleDesc,
    Note_TypeDesc,
    Note_Text,
    Note_Type,
    run_date,
    NULL AS ContractRequestDate,
    'granted' AS SOURCE
  FROM
    prj-prod-dataplatform.risk_credit_cic_data.granted_contracts
  UNION ALL
    -- Query FOR dfnongranted
  SELECT
    digitalLoanAccountId,
    crifApplicationId,
    customerId,
    processEngineGuid,
    requestGuid,
    NULL AS ContractHistoryType,
    CBContractCode,
    NULL AS ContractEndDate,
    ContractPhase,
    ContractPhaseDesc,
    NULL AS ContractStartDate,
    NULL AS ContractStatus,
    NULL AS ContractStatusDesc,
    ContractType,
    ContractTypeDesc,
    NULL AS Currency,
    NULL AS CurrencyDesc,
    LastUpdateDate,
    NULL AS OriginalCurrency,
    NULL AS OriginalCurrencyDesc,
    ProviderCodeEncrypted,
    ProviderContractNo,
    ReferenceNo,
    Role,
    RoleDesc,
    NULL AS BilledAmount,
    NULL AS BoardResolutionFlag,
    NULL AS BoardResolutionFlagDesc,
    NULL AS CancellationDate,
    NULL AS CardReferenceCode,
    NULL AS ChargedAmount,
    CreditLimit,
    NULL AS CreditPurpose,
    NULL AS CreditPurposeDesc,
    FinancedAmount,
    NULL AS FirstPaymentDate,
    NULL AS FlagCardUsed,
    NULL AS HolderLiability,
    NULL AS HolderLiabilityDesc,
    NULL AS InstallmentType,
    NULL AS InstallmentTypeDesc,
    InstallmentsNumber,
    NULL AS LastChargeDate,
    NULL AS LastPaymentAmount,
    NULL AS LastPaymentDate,
    NULL AS MinPaymentIndicator,
    NULL AS MinPaymentIndicatorDesc,
    NULL AS MinPaymentPercentage,
    MonthlyPaymentAmount,
    NULL AS NextPayment,
    NULL AS NextPaymentDate,
    NULL AS OutstandingBalance,
    NULL AS OutstandingBalanceUnbilled,
    NULL AS OutstandingPaymentsNumber,
    NULL AS OverallCreditLimit,
    NULL AS OverdueDays,
    NULL AS OverdueDaysDesc,
    NULL AS OverduePaymentsAmount,
    NULL AS OverduePaymentsNumber,
    NULL AS PaymentMethod,
    NULL AS PaymentMethodDesc,
    PaymentPeriodicity,
    PaymentPeriodicityDesc,
    NULL AS PremiumCard,
    NULL AS PremiumCardDesc,
    NULL AS ReorganizedCreditCode,
    NULL AS ReorganizedCreditCodeDesc,
    NULL AS ServicesLinesNo,
    NULL AS TimesCardUsed,
    NULL AS TransactionType,
    NULL AS TransactionTypeDesc,
    NULL AS Utilization,
    LinkedSubject_CBSubjectCode,
    LinkedSubject_Name,
    LinkedSubject_Role,
    LinkedSubject_RoleDesc,
    Note_TypeDesc,
    Note_Text,
    Note_Type,
    run_date,
    ContractRequestDate,
    'nongranted' AS SOURCE
  FROM
    prj-prod-dataplatform.risk_credit_cic_data.notgranted_contracts )
select * from CICBaseTable;
"""

df = client.query(sq).to_dataframe(progress_bar_type='tqdm')


Job ID caadfc91-47e9-4d8c-abce-6e0da8ea5706 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|


In [4]:
df.columns.values

array(['digitalLoanAccountId', 'crifApplicationId', 'customerId',
       'processEngineGuid', 'requestGuid', 'ContractHistoryType',
       'CBContractCode', 'ContractEndDate', 'ContractPhase',
       'ContractPhaseDesc', 'ContractStartDate', 'ContractStatus',
       'ContractStatusDesc', 'ContractType', 'ContractTypeDesc',
       'Currency', 'CurrencyDesc', 'LastUpdateDate', 'OriginalCurrency',
       'OriginalCurrencyDesc', 'ProviderCodeEncrypted',
       'ProviderContractNo', 'ReferenceNo', 'Role', 'RoleDesc',
       'BilledAmount', 'BoardResolutionFlag', 'BoardResolutionFlagDesc',
       'CancellationDate', 'CardReferenceCode', 'ChargedAmount',
       'CreditLimit', 'CreditPurpose', 'CreditPurposeDesc',
       'FinancedAmount', 'FirstPaymentDate', 'FlagCardUsed',
       'HolderLiability', 'HolderLiabilityDesc', 'InstallmentType',
       'InstallmentTypeDesc', 'InstallmentsNumber', 'LastChargeDate',
       'LastPaymentAmount', 'LastPaymentDate', 'MinPaymentIndicator',
       'MinPaym

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1510245 entries, 0 to 1510244
Data columns (total 82 columns):
 #   Column                       Non-Null Count    Dtype 
---  ------                       --------------    ----- 
 0   digitalLoanAccountId         1510245 non-null  object
 1   crifApplicationId            1510245 non-null  object
 2   customerId                   1510245 non-null  object
 3   processEngineGuid            1510245 non-null  object
 4   requestGuid                  1510245 non-null  object
 5   ContractHistoryType          1291024 non-null  object
 6   CBContractCode               1510245 non-null  object
 7   ContractEndDate              1197890 non-null  object
 8   ContractPhase                1510245 non-null  object
 9   ContractPhaseDesc            1510245 non-null  object
 10  ContractStartDate            1291024 non-null  object
 11  ContractStatus               94006 non-null    object
 12  ContractStatusDesc           94006 non-null    object
 1

In [6]:
df.to_csv(r"C:\Users\DwaipayanChakroborti\OneDrive - Tonik Financial Pte Ltd\MyStuff\Biswa\CIC Data Analysis\CICModel\DataPreparation\Data\CICRawData.csv", index = False)