In [1]:
# %% [markdown]
# # Jupyter Notebook Loading Header
#
# This is a custom loading header for Jupyter Notebooks in Visual Studio Code.
# It includes common imports and settings to get you started quickly.

# %% [markdown]
## Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.cloud import bigquery
import os
path = r'C:\Users\DwaipayanChakroborti\AppData\Roaming\gcloud\legacy_credentials\dchakroborti@tonikbank.com\adc.json'
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = path
client = bigquery.Client(project='prj-prod-dataplatform')

# %% [markdown]
## Configure Settings
# Set options or configurations as needed
# Example: pd.set_option('display.max_columns', None)



# Total TSA Distinct Customers in Customer Accounts Table

In [2]:
sq = """Select distinct OFCUSTOMERID from `core_raw.customer_accounts` where CRINTERDESC like 'Transactional Savings Account Inv_R';"""
d = client.query(sq).to_dataframe(progress_bar_type='tqdm')
d['Description'] = "Total TSA Distinct Customers in Customer Accounts Table"
d['Set'] = 'Overall'
d['Serialno'] = 1
d.head()

Job ID 94b42313-b477-48cb-8623-41350f8d8ac9 successfully executed: |[32m          [0m|   
Downloading: 100%|[32m██████████[0m|


Unnamed: 0,OFCUSTOMERID,Description,Set,Serialno
0,2436327,Total TSA Distinct Customers in Customer Accou...,Overall,1
1,1461289,Total TSA Distinct Customers in Customer Accou...,Overall,1
2,1460380,Total TSA Distinct Customers in Customer Accou...,Overall,1
3,1460011,Total TSA Distinct Customers in Customer Accou...,Overall,1
4,1460484,Total TSA Distinct Customers in Customer Accou...,Overall,1


In [3]:
df = d.copy()

# TSA Distinct Customer in Customer Accounts Which were closed

In [4]:
sq = """Select distinct OFCUSTOMERID from `core_raw.customer_accounts` where CRINTERDESC like 'Transactional Savings Account Inv_R' and OFISCLOSED = 'Y';"""
d = client.query(sq).to_dataframe(progress_bar_type='tqdm')
d['Description'] = "TSA Distinct Customer in Customer Accounts Which were closed"
d['Set'] = 'Subset of Set 1'
d['Serialno'] = 1.1
d.head()

Job ID 88634a57-a5e5-4062-94da-8c60ca769d19 successfully executed: |[32m          [0m|
Downloading: 100%|[32m██████████[0m|


Unnamed: 0,OFCUSTOMERID,Description,Set,Serialno
0,1087354,TSA Distinct Customer in Customer Accounts Whi...,Subset of Set 1,1.1
1,1391252,TSA Distinct Customer in Customer Accounts Whi...,Subset of Set 1,1.1
2,1390862,TSA Distinct Customer in Customer Accounts Whi...,Subset of Set 1,1.1
3,1390965,TSA Distinct Customer in Customer Accounts Whi...,Subset of Set 1,1.1
4,1391139,TSA Distinct Customer in Customer Accounts Whi...,Subset of Set 1,1.1


In [5]:
df1 = d.copy()
merged_df = pd.concat([df, df1], ignore_index=False)
print(f"Merged DataFrame Shape: {merged_df.shape}")
merged_df['Description'].value_counts()

Merged DataFrame Shape: (2118884, 4)


Description
Total TSA Distinct Customers in Customer Accounts Table         1328081
TSA Distinct Customer in Customer Accounts Which were closed     790803
Name: count, dtype: int64

# TSA Distinct Customer in Customer Accounts Which are Open

In [6]:
sq = """Select distinct OFCUSTOMERID from `core_raw.customer_accounts` where CRINTERDESC like 'Transactional Savings Account Inv_R' and OFISCLOSED = 'N';"""
d = client.query(sq).to_dataframe(progress_bar_type='tqdm')
d['Description'] = "TSA Distinct Customer in Customer Accounts Which are open"
d['Set'] = 'Subset of Set 1'
d['Serialno'] = 1.2
print(f"The shape of d is \t {d.shape}")
d.head()

Job ID 9031e437-2cf2-4317-b925-210bb099745a successfully executed: |[32m          [0m|   
Downloading: 100%|[32m██████████[0m|
The shape of d is 	 (537335, 4)


Unnamed: 0,OFCUSTOMERID,Description,Set,Serialno
0,1498746,TSA Distinct Customer in Customer Accounts Whi...,Subset of Set 1,1.2
1,1499044,TSA Distinct Customer in Customer Accounts Whi...,Subset of Set 1,1.2
2,1499923,TSA Distinct Customer in Customer Accounts Whi...,Subset of Set 1,1.2
3,1499491,TSA Distinct Customer in Customer Accounts Whi...,Subset of Set 1,1.2
4,1501356,TSA Distinct Customer in Customer Accounts Whi...,Subset of Set 1,1.2


In [7]:
df2 = d.copy()
merged_df = pd.concat([df, df1, df2], ignore_index=False)
print(f"Merged DataFrame Shape: {merged_df.shape}")
merged_df['Description'].value_counts()

Merged DataFrame Shape: (2656219, 4)


Description
Total TSA Distinct Customers in Customer Accounts Table         1328081
TSA Distinct Customer in Customer Accounts Which were closed     790803
TSA Distinct Customer in Customer Accounts Which are open        537335
Name: count, dtype: int64

# TSA Distinct customer who have both Closed and Open TSA Accounts

In [8]:
sq = """Select OFCUSTOMERID, count(OFCUSTOMERID) cnt 
, sum(case when OFISCLOSED = 'Y' then 1
           when OFISCLOSED = 'N' then 1
           else 0 end) check
from `core_raw.customer_accounts` where CRINTERDESC like 'Transactional Savings Account Inv_R'
and OFISCLOSED in ('Y', 'N')
 group by 1 having count(OFCUSTOMERID) > 1;"""
 
d1 = client.query(sq).to_dataframe(progress_bar_type='tqdm')
print(f"The shape of the d1 is \t {d1.shape}")
d = d1[['OFCUSTOMERID']].copy()



Job ID ffc424f2-6e46-4317-9f8b-c1bc5f1beac6 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|
The shape of the d1 is 	 (102, 3)


In [9]:
df3 = d.copy()

In [10]:
df3['Description'] = "TSA Distinct customer who have both Closed and Open TSA Accounts"
df3['Set'] = 'Subset of Set 1.1 and 1.2'
df3['Serialno'] = 1.3
print(f"The shape of df3 is \t {d.shape}")
df3.head()

The shape of df3 is 	 (102, 1)


Unnamed: 0,OFCUSTOMERID,Description,Set,Serialno
0,1000177,TSA Distinct customer who have both Closed and...,Subset of Set 1.1 and 1.2,1.3
1,2162871,TSA Distinct customer who have both Closed and...,Subset of Set 1.1 and 1.2,1.3
2,1042438,TSA Distinct customer who have both Closed and...,Subset of Set 1.1 and 1.2,1.3
3,2135598,TSA Distinct customer who have both Closed and...,Subset of Set 1.1 and 1.2,1.3
4,2209695,TSA Distinct customer who have both Closed and...,Subset of Set 1.1 and 1.2,1.3


In [11]:
merged_df = pd.concat([df, df1, df2, df3], ignore_index=False)
print(f"Merged DataFrame Shape: {merged_df.shape}")
merged_df['Description'].value_counts()

Merged DataFrame Shape: (2656321, 4)


Description
Total TSA Distinct Customers in Customer Accounts Table             1328081
TSA Distinct Customer in Customer Accounts Which were closed         790803
TSA Distinct Customer in Customer Accounts Which are open            537335
TSA Distinct customer who have both Closed and Open TSA Accounts        102
Name: count, dtype: int64

In [12]:
sq = """select OFCUSTOMERID, OFSTANDARDACCOUNTID,  OFPRODUCTCATEGORY , OFISCLOSED, CRINTERDESC , OFDATEOPENED, OFDATECLOSED , OFCLEAREDBALANCE from `core_raw.customer_accounts`  where OFCUSTOMERID in 
(select OFCUSTOMERID from (
Select OFCUSTOMERID, count(OFCUSTOMERID) cnt 
, sum(case when OFISCLOSED = 'Y' then 1
           when OFISCLOSED = 'N' then 1
           else 0 end) check
from `core_raw.customer_accounts` where CRINTERDESC like 'Transactional Savings Account Inv_R'
and OFISCLOSED in ('Y', 'N')
 group by 1 having count(OFCUSTOMERID) > 1
))
and CRINTERDESC like 'Transactional Savings Account Inv_R'
order by 1, 4;"""

d1 = client.query(sq).to_dataframe()
d1.to_csv(r"Customerwithopenandclosedaccounts.csv", index = False)

In [13]:
md = merged_df.sort_values(by='Serialno')
# Sort by 'Serialno' within each group
md = md.groupby(['Description', 'Set', 'Serialno'])['OFCUSTOMERID'].count().sort_values(ascending=False)
md

Description                                                       Set                        Serialno
Total TSA Distinct Customers in Customer Accounts Table           Overall                    1.0         1328081
TSA Distinct Customer in Customer Accounts Which were closed      Subset of Set 1            1.1          790803
TSA Distinct Customer in Customer Accounts Which are open         Subset of Set 1            1.2          537335
TSA Distinct customer who have both Closed and Open TSA Accounts  Subset of Set 1.1 and 1.2  1.3             102
Name: OFCUSTOMERID, dtype: int64