In [1]:
# %% [markdown]
# # Jupyter Notebook Loading Header
#
# This is a custom loading header for Jupyter Notebooks in Visual Studio Code.
# It includes common imports and settings to get you started quickly.

# %% [markdown]
## Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.cloud import bigquery
import os
path = r'C:\Users\DwaipayanChakroborti\AppData\Roaming\gcloud\legacy_credentials\dchakroborti@tonikbank.com\adc.json'
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = path
client = bigquery.Client(project='prj-prod-dataplatform')

# %% [markdown]
## Configure Settings
# Set options or configurations as needed
pd.set_option("Display.max_columns", None)
# Example: pd.set_option('display.max_columns', None)

In [2]:
import datetime
from dateutil.relativedelta import relativedelta

def get_week_start_dates(start_date, end_date):
    week_start_dates = []
    current_date = start_date
    while current_date <= end_date:
        week_start_date = current_date - datetime.timedelta(days=current_date.weekday())
        week_start_dates.append(week_start_date)
        current_date += relativedelta(weeks=1)
    return week_start_dates

start_date = datetime.date(2024, 1, 1)
end_date = datetime.date(2024, 6, 2)

week_start_dates = get_week_start_dates(start_date, end_date)
print(week_start_dates)

[datetime.date(2024, 1, 1), datetime.date(2024, 1, 8), datetime.date(2024, 1, 15), datetime.date(2024, 1, 22), datetime.date(2024, 1, 29), datetime.date(2024, 2, 5), datetime.date(2024, 2, 12), datetime.date(2024, 2, 19), datetime.date(2024, 2, 26), datetime.date(2024, 3, 4), datetime.date(2024, 3, 11), datetime.date(2024, 3, 18), datetime.date(2024, 3, 25), datetime.date(2024, 4, 1), datetime.date(2024, 4, 8), datetime.date(2024, 4, 15), datetime.date(2024, 4, 22), datetime.date(2024, 4, 29), datetime.date(2024, 5, 6), datetime.date(2024, 5, 13), datetime.date(2024, 5, 20), datetime.date(2024, 5, 27)]


In [3]:
%%time
res =  pd.DataFrame()
for date in week_start_dates:
    query = f"""
                With 
                    loan_cust --list of customers having loan either in status Normal or Arrears at the cut off date
                            AS (
                                SELECT 
                                    distinct CAST(lmt.customerId AS STRING) customerId
                                FROM 
                                    `prj-prod-dataplatform.risk_credit_mis.loan_master_table` lmt
                                    inner join prj-prod-dataplatform.risk_credit_mis.loan_bucket_flow_report_core lbfrc on lbfrc.loanAccountNumber = lmt.loanAccountNumber
                                WHERE lbfrc.bucketDate = date_sub(date("{date}"), interval 1 day)  --- need to change the date
                                and lbfrc.loanStatus IN ('Normal', 'In Arrears')
                                    ) 
                    select "{date}" date
                    , round(sum(case when credit_debit_indicator = 'CREDIT' then coalesce(trx_amount, 0) else 0 end),2) IncomingAmount  
                    , round(sum(case when credit_debit_indicator = 'DEBIT' then coalesce(abs(trx_amount), 0) else 0 end),2) OutgoingAmount
                    from prj-prod-dataplatform.risk_mart.customer_transactions 
                    where customer_id not in (select customerId from loan_cust)
                            and account_type like 'Tonik Account'
                            and status = 'Success'
                    and transaction_date between date_sub("{date}", interval 7 day) and "{date}"
                    group by 1 order by 1
            """
    df = client.query(query).to_dataframe()
    res = pd.concat([res,df])

res
                                    

CPU times: total: 1.03 s
Wall time: 52 s


Unnamed: 0,date,IncomingAmount,OutgoingAmount
0,2024-01-01,324463400.0,501368000.0
0,2024-01-08,394705200.0,657776100.0
0,2024-01-15,358443000.0,544583200.0
0,2024-01-22,303238600.0,484688100.0
0,2024-01-29,298947700.0,462689600.0
0,2024-02-05,410494800.0,626595300.0
0,2024-02-12,282292000.0,434326500.0
0,2024-02-19,326481700.0,476644600.0
0,2024-02-26,267593500.0,414811500.0
0,2024-03-04,411679700.0,615607900.0


In [4]:
res.to_excel("check.xlsx", index = False)

In [5]:
sq = """drop table if exists dap_ds_poweruser_playground.TSA_Report_Marketing_Template_v2_backup;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=9b2616b5-5fa4-45b2-81ac-22efad88d914>

In [6]:
sq = """create table dap_ds_poweruser_playground.TSA_Report_Marketing_Template_v2_backup as select * from dap_ds_poweruser_playground.TSA_Report_Marketing_Template_v2;"""
client.query(sq)


QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=ba6c31dc-e6bf-4fc3-8029-f6125a70f5b5>

In [7]:
dfbackup = client.query("""select * from dap_ds_poweruser_playground.TSA_Report_Marketing_Template_v2_backup""").to_dataframe(progress_bar_type='tqdm')
dfbackup

Job ID 68ab0676-ee94-4b9f-ac42-3774719dff46 successfully executed: 100%|[32m██████████[0m|
Downloading: 100%|[32m██████████[0m|


Unnamed: 0,Date,Total_registered_users,Total_registered_users_WoW_Change,Total_open_TSA_customers,Total_open_TSA_customers_WoW_Change,MAU,MAU_WoW_Change,MMAU,MMAU_WoW_Change,ACL_Applied_Loans,ACL_Applied_Loans_WoW_Change,ACL_Processed_Loans,ACL_Processed_Loans_WoW_Change,ACL_customers,ACL_customers_WoW_Change,MMAU_ACL_difference,MMAU_ACL_difference_WoW_Change,Offers_extended,Uptake,MAU_budget,ACLTSAbudget,IncomingAmount,OutgoingAmount
0,2024-01-01 00:00:00+00:00,1355336,,516362,,73572,,50617,,0,,0,,0,,50617,,0,,82693,150,466156300.0,644502400.0
1,2024-01-15 00:00:00+00:00,1369085,6793.0,523114,3275.0,74108,273.0,50845,67.0,0,0.0,0,0.0,0,0.0,50845,67.0,0,,82693,150,503609400.0,704358300.0
2,2024-01-08 00:00:00+00:00,1362292,6956.0,519839,3477.0,73835,263.0,50778,161.0,0,0.0,0,0.0,0,0.0,50778,161.0,0,,82693,150,548790300.0,853834500.0
3,2024-01-22 00:00:00+00:00,1376006,6921.0,516477,-6637.0,74288,180.0,51028,183.0,0,0.0,0,0.0,0,0.0,51028,183.0,0,,82693,150,420100800.0,622506500.0
4,2024-01-29 00:00:00+00:00,1383107,7101.0,519803,3326.0,74193,-95.0,50900,-128.0,0,0.0,0,0.0,0,0.0,50900,-128.0,0,,82693,150,413171700.0,600877000.0
5,2024-03-11 00:00:00+00:00,1425333,6945.0,527673,2663.0,75776,-42.0,50900,-87.0,0,0.0,0,0.0,0,0.0,50900,-87.0,0,,100000,150,472706100.0,629271100.0
6,2024-03-25 00:00:00+00:00,1439787,6925.0,533434,2688.0,75501,-181.0,50783,-180.0,0,0.0,0,0.0,0,0.0,50783,-180.0,0,,100000,150,415088200.0,537943600.0
7,2024-03-04 00:00:00+00:00,1418388,6748.0,525010,3027.0,75818,719.0,50987,147.0,0,0.0,0,0.0,0,0.0,50987,147.0,0,,100000,150,607551900.0,797835500.0
8,2024-03-18 00:00:00+00:00,1432862,7529.0,530746,3073.0,75682,-94.0,50963,63.0,0,0.0,0,0.0,0,0.0,50963,63.0,0,,100000,150,484223700.0,605665300.0
9,2024-02-12 00:00:00+00:00,1397598,7260.0,525724,2910.0,74490,203.0,50903,-82.0,0,0.0,0,0.0,0,0.0,50903,-82.0,0,,86193,150,397324200.0,577561900.0


In [8]:
dfbackup.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22 entries, 0 to 21
Data columns (total 23 columns):
 #   Column                               Non-Null Count  Dtype              
---  ------                               --------------  -----              
 0   Date                                 22 non-null     datetime64[us, UTC]
 1   Total_registered_users               22 non-null     Int64              
 2   Total_registered_users_WoW_Change    21 non-null     Int64              
 3   Total_open_TSA_customers             22 non-null     Int64              
 4   Total_open_TSA_customers_WoW_Change  21 non-null     Int64              
 5   MAU                                  22 non-null     Int64              
 6   MAU_WoW_Change                       21 non-null     Int64              
 7   MMAU                                 22 non-null     Int64              
 8   MMAU_WoW_Change                      21 non-null     Int64              
 9   ACL_Applied_Loans                 

In [9]:
res.info()

<class 'pandas.core.frame.DataFrame'>
Index: 22 entries, 0 to 0
Data columns (total 3 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   date            22 non-null     object 
 1   IncomingAmount  22 non-null     float64
 2   OutgoingAmount  22 non-null     float64
dtypes: float64(2), object(1)
memory usage: 704.0+ bytes


In [10]:
# Convert object column to datetime64[us, UTC]
res['date'] = pd.to_datetime(res['date'], utc=True)

In [11]:
res.rename(columns = {'date':'Date'}, inplace = True)

In [14]:
dfbackup.columns

Index(['Date', 'Total_registered_users', 'Total_registered_users_WoW_Change',
       'Total_open_TSA_customers', 'Total_open_TSA_customers_WoW_Change',
       'MAU', 'MAU_WoW_Change', 'MMAU', 'MMAU_WoW_Change', 'ACL_Applied_Loans',
       'ACL_Applied_Loans_WoW_Change', 'ACL_Processed_Loans',
       'ACL_Processed_Loans_WoW_Change', 'ACL_customers',
       'ACL_customers_WoW_Change', 'MMAU_ACL_difference',
       'MMAU_ACL_difference_WoW_Change', 'Offers_extended', 'Uptake',
       'MAU_budget', 'ACLTSAbudget'],
      dtype='object')

In [13]:
dfbackup = dfbackup[['Date', 'Total_registered_users', 'Total_registered_users_WoW_Change',
       'Total_open_TSA_customers', 'Total_open_TSA_customers_WoW_Change',
       'MAU', 'MAU_WoW_Change', 'MMAU', 'MMAU_WoW_Change', 'ACL_Applied_Loans',
       'ACL_Applied_Loans_WoW_Change', 'ACL_Processed_Loans',
       'ACL_Processed_Loans_WoW_Change', 'ACL_customers',
       'ACL_customers_WoW_Change', 'MMAU_ACL_difference',
       'MMAU_ACL_difference_WoW_Change', 'Offers_extended', 'Uptake',
       'MAU_budget', 'ACLTSAbudget']].copy()

In [15]:
dfbackup = dfbackup.merge(res, left_on='Date', right_on='Date', how = 'left')

In [16]:
dfbackup.columns

Index(['Date', 'Total_registered_users', 'Total_registered_users_WoW_Change',
       'Total_open_TSA_customers', 'Total_open_TSA_customers_WoW_Change',
       'MAU', 'MAU_WoW_Change', 'MMAU', 'MMAU_WoW_Change', 'ACL_Applied_Loans',
       'ACL_Applied_Loans_WoW_Change', 'ACL_Processed_Loans',
       'ACL_Processed_Loans_WoW_Change', 'ACL_customers',
       'ACL_customers_WoW_Change', 'MMAU_ACL_difference',
       'MMAU_ACL_difference_WoW_Change', 'Offers_extended', 'Uptake',
       'MAU_budget', 'ACLTSAbudget', 'IncomingAmount', 'OutgoingAmount'],
      dtype='object')

In [17]:
final = dfbackup[['Date', 'Total_registered_users', 'Total_registered_users_WoW_Change', 'Total_open_TSA_customers','Total_open_TSA_customers_WoW_Change', 'MAU', 'MAU_WoW_Change'
    , 'MMAU', 'MMAU_WoW_Change', 'ACL_Applied_Loans','ACL_Applied_Loans_WoW_Change',
    'ACL_Processed_Loans', 'ACL_Processed_Loans_WoW_Change', 
    'ACL_customers', 'ACL_customers_WoW_Change' , 'MMAU_ACL_difference','MMAU_ACL_difference_WoW_Change', 'Offers_extended', 'Uptake', 'MAU_budget', 'ACLTSAbudget', 'IncomingAmount', 'OutgoingAmount']].copy()

In [18]:
final.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22 entries, 0 to 21
Data columns (total 23 columns):
 #   Column                               Non-Null Count  Dtype              
---  ------                               --------------  -----              
 0   Date                                 22 non-null     datetime64[us, UTC]
 1   Total_registered_users               22 non-null     Int64              
 2   Total_registered_users_WoW_Change    21 non-null     Int64              
 3   Total_open_TSA_customers             22 non-null     Int64              
 4   Total_open_TSA_customers_WoW_Change  21 non-null     Int64              
 5   MAU                                  22 non-null     Int64              
 6   MAU_WoW_Change                       21 non-null     Int64              
 7   MMAU                                 22 non-null     Int64              
 8   MMAU_WoW_Change                      21 non-null     Int64              
 9   ACL_Applied_Loans                 

In [19]:
sq = """drop table if exists dap_ds_poweruser_playground.TSA_Report_Marketing_Template_v3;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=dc1dfeaf-aca2-46ea-92e5-cc9d80b3adcd>

In [20]:
# Define the dataset and table references
dataset_id = 'dap_ds_poweruser_playground'
table_id = 'TSA_Report_Marketing_Template_v3'
# Define the table schema as per your DataFrame columns
schema = [
    bigquery.SchemaField("Date", "DATE"),
    bigquery.SchemaField("Total_registered_users", "INT64"),
    bigquery.SchemaField("Total_registered_users_WoW_Change", "INT64"),
    bigquery.SchemaField("Total_open_TSA_customers", "INT64"),
    bigquery.SchemaField("Total_open_TSA_customers_WoW_Change", "INT64"),
    bigquery.SchemaField("MAU", "INT64"),
    bigquery.SchemaField("MAU_WoW_Change", "INT64"),
    bigquery.SchemaField("MMAU", "INT64"),
    bigquery.SchemaField("MMAU_WoW_Change", "INT64"),
    bigquery.SchemaField("ACL_Applied_Loans", "Int64"),
    bigquery.SchemaField("ACL_Applied_Loans_WoW_Change", "Int64"),
    bigquery.SchemaField("ACL_Processed_Loans", "Int64"),
    bigquery.SchemaField("ACL_Processed_Loans_WoW_Change", "Int64"),
    bigquery.SchemaField("ACL_customers", "INT64"),
    bigquery.SchemaField("ACL_customers_WoW_Change", "INT64"),
    bigquery.SchemaField("MMAU_ACL_difference", "INT64"),
    bigquery.SchemaField("MMAU_ACL_difference_WoW_Change", "INT64"),
    bigquery.SchemaField("Offers_extended", "INT64"),
    bigquery.SchemaField("Uptake", "INT64"),
    bigquery.SchemaField("MAU_budget", "INT64"),
    bigquery.SchemaField("ACLTSAbudget", "FLOAT64"),
    bigquery.SchemaField("IncomingAmount", "FLOAT64"),
    bigquery.SchemaField("OutgoingAmount", "FLOAT64"),
]
# Create the dataset reference
dataset_ref = client.dataset(dataset_id)
# Define the table reference
table_ref = dataset_ref.table(table_id)
# Configure the job to overwrite the table if it already exists
job_config = bigquery.LoadJobConfig()
# Load the DataFrame into BigQuery
job = client.load_table_from_dataframe(final, table_ref, job_config=job_config)
# Wait for the job to complete
job.result()
print(f"Table {table_id} created in dataset {dataset_id}.")


Table TSA_Report_Marketing_Template_v3 created in dataset dap_ds_poweruser_playground.
