# <div align="center" style="color: #ff5733;">Data Requested By Annastasia for Monthly Reporting</div>

In [1]:
# %% [markdown]
# # Jupyter Notebook Loading Header
#
# This is a custom loading header for Jupyter Notebooks in Visual Studio Code.
# It includes common imports and settings to get you started quickly.

# %% [markdown]
## Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.cloud import bigquery
import os
path = r'C:\Users\DwaipayanChakroborti\AppData\Roaming\gcloud\legacy_credentials\dchakroborti@tonikbank.com\adc.json'
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = path
client = bigquery.Client(project='prj-prod-dataplatform')

# %% [markdown]
## Configure Settings
# Set options or configurations as needed
# Example: pd.set_option('display.max_columns', None)

In [2]:
# Year and Month for which data is extracted
data_year = 2024
data_month = 7

In [3]:
# How many Applications applied to each month

sq = """
# loan applied
SELECT 
    DATE_TRUNC( startApplyDateTime,MONTH) as mm, 
    case when reloan_flag = 1  and loantype not like 'FLEXUP' then 'Reloan'
         when loantype = 'FLEXUP' and new_loan_type = 'Flex-up' and reloan_flag = 0 and flagDisbursement = 1 then 'Flex-up' 
         
         else new_loan_type end as LoanProduct,
    count (distinct digitalLoanAccountId) as StartedApps
FROM 
    `prj-prod-dataplatform.risk_credit_mis.loan_master_table` 

group by 1,2
order by 1 desc,2;
"""

StartedApps = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The rows and columns in StartedApps are:\t {StartedApps.shape}")

Job ID 5120a6c6-d749-4de0-997d-118d9eb14a94 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The rows and columns in StartedApps are:	 (196, 3)


In [4]:
# How many submitted applications to each month
sq = """
# loan Submitted
SELECT 
    DATE_TRUNC((case when loantype = 'FLEXUP' and new_loan_type = 'Flex-up' and reloan_flag = 0 and flagDisbursement = 1 then startApplyDateTime else termsAndConditionsSubmitDateTime end),MONTH) as mm,  
    case when reloan_flag = 1 and loantype not like 'FLEXUP'then 'Reloan'
         when loantype = 'FLEXUP' and new_loan_type = 'Flex-up' and reloan_flag = 0 and flagDisbursement = 1 then 'Flex-up' 
                  else new_loan_type end as LoanProduct,
    count (distinct digitalLoanAccountId) as SubmittedApps
FROM 
    `prj-prod-dataplatform.risk_credit_mis.loan_master_table` 
group by 1,2
order by 1 desc,2;
"""

SubmittedApps = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The rows and columns of SubmittedApps are:\t {SubmittedApps.shape}")
SubmittedApps.sample(10)

Job ID af1c8038-3c90-4dee-b671-ba9a04acbfc9 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The rows and columns of SubmittedApps are:	 (196, 3)


Unnamed: 0,mm,LoanProduct,SubmittedApps
116,2023-01-01,Quick,12469
149,2022-06-01,Quick,13181
187,NaT,ACL Patner 711,10
177,2021-10-01,Flex,1986
48,2023-12-01,Big Loan,197
136,2022-09-01,Flex,4587
45,2024-01-01,Quick,15442
134,2022-10-01,SIL-Online(discontinued),23
117,2023-01-01,Reloan,198
16,2024-06-01,Reloan,232


In [5]:
# Approved Loans for each month

sq = """
# Approved loans
SELECT 
DATE_TRUNC((case when loantype = 'FLEXUP' and new_loan_type = 'Flex-up' and reloan_flag = 0 and flagDisbursement = 1 then startApplyDateTime 
                  when reloan_flag = 1 and loantype not like 'FLEXUP' then startApplyDateTime else decision_date end),MONTH) as mm,
        case when reloan_flag = 1 and loantype not like 'FLEXUP'then 'Reloan'
         when loantype = 'FLEXUP' and new_loan_type = 'Flex-up' and reloan_flag = 0 and flagDisbursement = 1 then 'Flex-up' 
                  else new_loan_type end as LoanProduct,
    count (distinct digitalLoanAccountId) as ApprovedApps
FROM 
    `prj-prod-dataplatform.risk_credit_mis.loan_master_table`
where 
(case when loantype = 'FLEXUP' and new_loan_type = 'Flex-up' and reloan_flag = 0 and flagDisbursement = 1 then flagDisbursement 
      when reloan_flag = 1 and loantype not like 'FLEXUP' then flagDisbursement  else flagApproval end) = 1
group by 1,2
order by 1 desc,2
;
"""

ApprovedApps = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The rows and columns in ApprovedApps are:\t {ApprovedApps.shape}")
ApprovedApps.head(10)

Job ID 3e2bb8ea-9349-47a0-a7ae-f4306552d683 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The rows and columns in ApprovedApps are:	 (159, 3)


Unnamed: 0,mm,LoanProduct,ApprovedApps
0,2024-08-01,Flex-up,50
1,2024-08-01,Quick,283
2,2024-08-01,Reloan,58
3,2024-08-01,SIL ZERO,43
4,2024-08-01,SIL-Instore,2264
5,2024-07-01,Flex-up,398
6,2024-07-01,Quick,1627
7,2024-07-01,Reloan,441
8,2024-07-01,SIL-Instore,12135
9,2024-06-01,ACL TSA,3


In [6]:
# Booked Loans for each month

sq = """
#Booked loans
SELECT 
DATE_TRUNC( disbursementDateTime,MONTH) as mm, 
            case when reloan_flag = 1 and loantype not like 'FLEXUP' and flagDisbursement = 1 then 'Reloan'
         when loantype = 'FLEXUP' and new_loan_type = 'Flex-up' and reloan_flag = 0 and flagDisbursement = 1 then 'Flex-up' 
                  else new_loan_type end as LoanProduct,
    count (distinct digitalLoanAccountId) as BookedApps
FROM 
    `prj-prod-dataplatform.risk_credit_mis.loan_master_table` 
    where flagDisbursement = 1

group by 1,2
order by 1 desc,2
;
"""

BookedApps = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The rows and columns in BookedApps are:\t {BookedApps.shape}")
BookedApps.head(10)

Job ID 45717585-8231-4e4a-b1d9-47daa1a38168 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The rows and columns in BookedApps are:	 (158, 3)


Unnamed: 0,mm,LoanProduct,BookedApps
0,2024-08-01,Flex-up,81
1,2024-08-01,Quick,286
2,2024-08-01,Reloan,58
3,2024-08-01,SIL ZERO,42
4,2024-08-01,SIL-Instore,2238
5,2024-07-01,Flex-up,494
6,2024-07-01,Quick,1577
7,2024-07-01,Reloan,441
8,2024-07-01,SIL-Instore,12121
9,2024-06-01,ACL TSA,3


In [7]:
# Get the total booked amount for a month

sq = """
# Booked Amount
SELECT 
DATE_TRUNC( disbursementDateTime,MONTH) as mm, 
    case when reloan_flag = 1 and loantype not like 'FLEXUP' and flagDisbursement = 1 then 'Reloan'
         when loantype = 'FLEXUP' and new_loan_type = 'Flex-up' and reloan_flag = 0 and flagDisbursement = 1 then 'Flex-up' 
                  else new_loan_type end as LoanProduct,
    sum (disbursedLoanAmount) as BookedAmt
FROM 
    `prj-prod-dataplatform.risk_credit_mis.loan_master_table` 
     where flagDisbursement = 1
group by 1,2
order by 1 desc,2
;
"""
BookedAmt = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The rows and columns in BookedAmt are:\t {BookedAmt.shape}")
BookedAmt.head(10)

Job ID c89eaaf0-f53a-48b6-8ff7-ddf3c8a76db0 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The rows and columns in BookedAmt are:	 (158, 3)


Unnamed: 0,mm,LoanProduct,BookedAmt
0,2024-08-01,Flex-up,3334296.0
1,2024-08-01,Quick,3780000.0
2,2024-08-01,Reloan,1284712.0
3,2024-08-01,SIL ZERO,655595.0
4,2024-08-01,SIL-Instore,21438140.0
5,2024-07-01,Flex-up,16821100.0
6,2024-07-01,Quick,20946470.0
7,2024-07-01,Reloan,10694090.0
8,2024-07-01,SIL-Instore,118242800.0
9,2024-06-01,ACL TSA,35000.0


In [8]:
BookedAmt.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 158 entries, 0 to 157
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   mm           158 non-null    datetime64[us]
 1   LoanProduct  158 non-null    object        
 2   BookedAmt    158 non-null    float64       
dtypes: datetime64[us](1), float64(1), object(1)
memory usage: 3.8+ KB


In [9]:
raw_data = StartedApps.merge(SubmittedApps,how='outer').merge(ApprovedApps,how='outer').merge(BookedApps,how='outer').merge(BookedAmt,how='outer')

In [10]:
raw_data.tail(10)

Unnamed: 0,mm,LoanProduct,StartedApps,SubmittedApps,ApprovedApps,BookedApps,BookedAmt
189,2024-07-01,Quick,55619.0,46472.0,1627.0,1577.0,20946470.0
190,2024-07-01,Reloan,480.0,463.0,441.0,441.0,10694090.0
191,2024-07-01,SIL-Instore,20640.0,20383.0,12135.0,12121.0,118242800.0
192,2024-08-01,ACL Patner 711,1.0,,,,
193,2024-08-01,Flex,,1.0,,,
194,2024-08-01,Flex-up,50.0,50.0,50.0,81.0,3334296.0
195,2024-08-01,Quick,8422.0,7168.0,283.0,286.0,3780000.0
196,2024-08-01,Reloan,67.0,64.0,58.0,58.0,1284712.0
197,2024-08-01,SIL ZERO,79.0,78.0,43.0,42.0,655595.0
198,2024-08-01,SIL-Instore,3606.0,3565.0,2264.0,2238.0,21438140.0


In [11]:
print(f'{data_year}-{data_month:02d}-01')

2024-07-01


In [12]:
raw_data_current_month = raw_data[(raw_data['mm']>='2022-08-01') &(raw_data['mm']<=f'{data_year}-{data_month:02d}-01')]

In [13]:
raw_data_current_month

Unnamed: 0,mm,LoanProduct,StartedApps,SubmittedApps,ApprovedApps,BookedApps,BookedAmt
54,2022-08-01,Flex,5558,4732,594,599,1.198500e+07
55,2022-08-01,Quick,43764,30660,1910,1926,3.300100e+07
56,2022-08-01,SIL-Instore,967,956,456,457,1.137941e+07
57,2022-08-01,SIL-Online(discontinued),1192,652,41,41,1.197499e+06
58,2022-09-01,Big Loan,13,12,,,
...,...,...,...,...,...,...,...
187,2024-07-01,Flex,18,5,,,
188,2024-07-01,Flex-up,398,398,398,494,1.682110e+07
189,2024-07-01,Quick,55619,46472,1627,1577,2.094647e+07
190,2024-07-01,Reloan,480,463,441,441,1.069409e+07


### Loan Account Tagging

In [14]:
sq="""
# Loan Tagging
SELECT
    loanAccountNumber,
    Case when reloan_flag = 1 and loantype not like 'FLEXUP' and flagDisbursement = 1 then 'Reloan'
         when loantype = 'FLEXUP' and new_loan_type = 'Flex-up' and reloan_flag = 0 and flagDisbursement = 1 then 'Flex-up' 
                  else new_loan_type end as LoanProduct,
    disbursementDateTime,
    (CASE WHEN new_loan_type = 'Flex-up' then
    LAG(new_loan_type) OVER (PARTITION BY customerId ORDER BY disbursementDateTime) END) AS OriginalLoanProduct
  FROM
    `prj-prod-dataplatform.risk_credit_mis.loan_master_table`
   where flagDisbursement=1
;
"""
loan_type = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The rows and columns in loan_type are: {loan_type.shape}")
loan_type.tail(10)

Job ID 33d3a586-e6c5-43b4-941f-d2b1abd1beed successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The rows and columns in loan_type are: (169863, 4)


Unnamed: 0,loanAccountNumber,LoanProduct,disbursementDateTime,OriginalLoanProduct
169853,60827170500011,SIL-Instore,2024-08-03 12:29:25,
169854,60827173960014,SIL-Instore,2024-08-03 14:00:17,
169855,60827182610014,SIL-Instore,2024-08-03 17:40:45,
169856,60827197560016,SIL-Instore,2024-08-04 10:27:45,
169857,60827199610019,SIL-Instore,2024-08-04 11:27:00,
169858,60827205470018,SIL-Instore,2024-08-04 13:35:33,
169859,60827213080019,SIL-Instore,2024-08-04 15:45:34,
169860,60827216100012,SIL-Instore,2024-08-04 16:38:02,
169861,60827217800019,SIL-Instore,2024-08-04 17:18:16,
169862,60827224190019,SIL-Instore,2024-08-04 20:43:40,


In [15]:
loan_type[loan_type['loanAccountNumber']=='60826104750018']

Unnamed: 0,loanAccountNumber,LoanProduct,disbursementDateTime,OriginalLoanProduct
121628,60826104750018,SIL-Instore,2024-06-26 11:11:06,


In [16]:
Loan_Acs_current_month = loan_type[(loan_type['disbursementDateTime'].dt.month == data_month) & (loan_type['disbursementDateTime'].dt.year == data_year)]

In [17]:
Loan_Acs_current_month = Loan_Acs_current_month[['loanAccountNumber', 'LoanProduct','OriginalLoanProduct']]

#### VAS

In [18]:
sq = """
# Vas 
SELECT 
    extract(year from disbursementDateTime) as year , 
    extract(month from disbursementDateTime) as month,
    Case when reloan_flag = 1 and b.loantype not like 'FLEXUP' and flagDisbursement = 1 then 'Reloan'
         when b.loantype = 'FLEXUP' and new_loan_type = 'Flex-up' and reloan_flag = 0 and flagDisbursement = 1 then 'Flex-up' 
                  else new_loan_type end as new_loan_type, 
    count(distinct  b.loanAccountNumber ) AS TotalSold,
   
   count(distinct CASE WHEN vas_flag = 'true' THEN b.loanAccountNumber END) AS SoldWithVAS
     FROM `prj-prod-dataplatform.dl_loans_db_raw.tdbk_digital_loan_application` a join
`prj-prod-dataplatform.risk_credit_mis.loan_master_table` b on a.loanAccountNumber = b.loanAccountNumber
where flagDisbursement = 1
group by 1,2 ,3 
order by 1 desc,2 desc,3
;
"""

vas_df = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The rows and columns in vas_df are:\t {vas_df.shape}")
vas_df.head(10)

Job ID d81f9f56-d691-42be-b498-1a28d37d54c4 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The rows and columns in vas_df are:	 (158, 5)


Unnamed: 0,year,month,new_loan_type,TotalSold,SoldWithVAS
0,2024,8,Flex-up,81,0
1,2024,8,Quick,286,183
2,2024,8,Reloan,58,0
3,2024,8,SIL ZERO,42,24
4,2024,8,SIL-Instore,2238,1503
5,2024,7,Flex-up,494,0
6,2024,7,Quick,1577,952
7,2024,7,Reloan,441,0
8,2024,7,SIL-Instore,12121,7918
9,2024,6,ACL TSA,3,2


In [19]:
vas_df_current_month = vas_df[(vas_df['year']==data_year)& (vas_df['month']==data_month)]

In [20]:
with pd.ExcelWriter(f'loan_accounts_tagging_{data_year}_{data_month:02d}_v1.xlsx') as writer:
    # Write each DataFrame to a specific sheet in the Excel file
    raw_data_current_month.to_excel(writer, sheet_name='raw_data', index=False)
    Loan_Acs_current_month.to_excel(writer, sheet_name='Loan Acs', index=False)
    vas_df_current_month.to_excel(writer, sheet_name='VAS', index=False)