In [1]:
# %% [markdown]
# # Jupyter Notebook Loading Header
#
# This is a custom loading header for Jupyter Notebooks in Visual Studio Code.
# It includes common imports and settings to get you started quickly.

# %% [markdown]
## Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from google.cloud import bigquery
import os
path = r'C:\Users\DwaipayanChakroborti\AppData\Roaming\gcloud\legacy_credentials\dchakroborti@tonikbank.com\adc.json'
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = path
client = bigquery.Client(project='prj-prod-dataplatform')

# %% [markdown]
## Configure Settings
# Set options or configurations as needed
# Example: pd.set_option('display.max_columns', None)

In [2]:
import datetime
from dateutil.relativedelta import relativedelta

def get_week_start_dates(start_date, end_date):
    week_start_dates = []
    current_date = start_date
    while current_date <= end_date:
        week_start_date = current_date - datetime.timedelta(days=current_date.weekday())
        week_start_dates.append(week_start_date)
        current_date += relativedelta(weeks=1)
    return week_start_dates

start_date = datetime.date(2024, 1, 1)
end_date = datetime.date(2024, 5, 6)

week_start_dates = get_week_start_dates(start_date, end_date)
print(week_start_dates)

[datetime.date(2024, 1, 1), datetime.date(2024, 1, 8), datetime.date(2024, 1, 15), datetime.date(2024, 1, 22), datetime.date(2024, 1, 29), datetime.date(2024, 2, 5), datetime.date(2024, 2, 12), datetime.date(2024, 2, 19), datetime.date(2024, 2, 26), datetime.date(2024, 3, 4), datetime.date(2024, 3, 11), datetime.date(2024, 3, 18), datetime.date(2024, 3, 25), datetime.date(2024, 4, 1), datetime.date(2024, 4, 8), datetime.date(2024, 4, 15), datetime.date(2024, 4, 22), datetime.date(2024, 4, 29), datetime.date(2024, 5, 6)]


In [3]:
%%time
res =  pd.DataFrame()
for date in week_start_dates:
    # date = date.date()
    query = f"""with 
    all_cust 
    AS( 
        SELECT count(distinct cust_id) Total_registered_users
        from prj-prod-dataplatform.dl_customers_db_raw.tdbk_customer_mtb
        where created_dt <= "{date}" and cust_id is not null ),
    open_TSA_cust
    AS(
        SELECT count (distinct OFCUSTOMERID) Total_open_TSA_customers from `core_raw.customer_accounts` 
            where CRINTERDESC like 'Transactional Savings Account Inv_R'
            and OFCUSTOMERID in (select distinct cust_id 
            from prj-prod-dataplatform.dl_customers_db_raw.tdbk_customer_mtb where cust_id is not null)
            and OFISCLOSED = 'N'
            AND OFDATEOPENED <= "{date}"),
    loan_cust --list of customers having active loan
    AS (
        SELECT 
            distinct CAST(customerId AS STRING) customerId
        FROM 
            `prj-prod-dataplatform.risk_credit_mis.loan_master_table`
        WHERE 
            loanPaidStatus IN ('Normal', 'In Arrears')),
    non_ACL_cust --list of customers not having ACL loans
    AS (
        SELECT 
            distinct CAST(customerId AS STRING) customerId
        FROM 
            `prj-prod-dataplatform.risk_credit_mis.loan_master_table`
        WHERE 
            loanType != "TSBL" 
            AND date(approvedDateTime) = "{date}"),    
    MAU_with_trx 
    AS (
        SELECT DISTINCT customer_id 
        FROM `prj-prod-dataplatform.risk_mart.customer_transactions`
        WHERE transaction_date BETWEEN DATE_SUB('{date}', INTERVAL 30 DAY) AND '{date}'
        AND ABS(trx_amount) > 100
        AND account_type = "Tonik Account"
        AND customer_id NOT IN (SELECT customerId FROM loan_cust)),
    MAU_with_bal 
     AS (
        SELECT 
            DISTINCT client_id 
        FROM 
            `prj-prod-dataplatform.risk_mart.customer_balance`
        WHERE
            clearedbalance > 100
            AND account_type IN ("Tonik Account","Group Stash","Individual Stash")
            AND balanceDateAsOf = '{date}'
            AND client_id NOT IN (SELECT customerId FROM loan_cust)),
    MAU 
        AS (
        SELECT COUNT(customer_id) AS MAU 
        FROM 
            (SELECT customer_id 
            FROM 
                MAU_with_trx
            UNION DISTINCT
            SELECT 
                client_id AS customer_id
            FROM 
                MAU_with_bal)),
    transactional_cust --Mareks criteria
    AS (
        SELECT 
            DISTINCT customer_id
        FROM 
            `prj-prod-dataplatform.risk_mart.customer_transactions`
        WHERE
            transaction_date BETWEEN DATE_SUB('{date}', INTERVAL 3 MONTH) AND '{date}'
            AND customer_id NOT IN (SELECT customerId FROM non_ACL_cust)
        GROUP BY
            customer_id
        HAVING 
            COUNT(*) >= 5
    ),
    cust_with_balance --Mareks criteria
    AS (
        SELECT 
            DISTINCT client_id AS customer_id 
        FROM 
            `prj-prod-dataplatform.risk_mart.customer_balance`
        WHERE
            clearedbalance > 1000
            AND account_type IN ("Tonik Account","Group Stash","Individual Stash")
            AND balanceDateAsOf = '{date}'
            AND client_id NOT IN (SELECT customerId FROM non_ACL_cust)),
    MMAU
    AS (
        SELECT 
            COUNT(customer_id) AS MMAU 
        FROM 
            ( SELECT 
            customer_id 
        FROM 
            transactional_cust
        UNION DISTINCT
        SELECT 
            customer_id
        FROM 
            cust_with_balance)),
    ACL_customers 
    AS (
        SELECT 
      count(distinct customerId ) ACL_customers
        FROM 
            `prj-prod-dataplatform.risk_credit_mis.loan_master_table`
        WHERE 
            loanType = "TSBL" 
            AND date(approvedDateTime) = "{date}"),
    Offers_extended 
    AS (
        SELECT count(distinct cust_id) Offers_extended
    FROM 
    `prj-prod-dataplatform.dl_loans_db_raw.tdbk_loan_offers_trx` 
    where date(offer_start_date) <= "{date}"),
    b
    AS (
        SELECT 
            '{date}' AS Date,
            *
        FROM 
            all_cust  
        CROSS JOIN 
            open_TSA_cust
        CROSS JOIN 
            MAU 
        CROSS JOIN 
            MMAU
        CROSS JOIN
            ACL_customers
        CROSS JOIN 
            Offers_extended)
    select Date,
            Total_registered_users,
            Total_open_TSA_customers,
            MAU,
            MMAU,
            ACL_customers,
            MMAU - ACL_customers  MMAU_ACL_difference, 
            Offers_extended,
            CASE 
                WHEN Offers_extended<> 0 then ACL_customers/Offers_extended 
                ELSE NULL END AS Uptake
            
            from b;
            """
    df = client.query(query).to_dataframe()
    res = pd.concat([res,df])


CPU times: total: 1.36 s
Wall time: 1min 19s


In [4]:
res

Unnamed: 0,Date,Total_registered_users,Total_open_TSA_customers,MAU,MMAU,ACL_customers,MMAU_ACL_difference,Offers_extended,Uptake
0,2024-01-01,1355329,460796,78886,56015,0,56015,0,
0,2024-01-08,1362285,463658,78951,55441,4,55437,0,
0,2024-01-15,1369078,466267,79180,55546,0,55546,0,
0,2024-01-22,1375999,468787,79016,55797,2,55795,0,
0,2024-01-29,1383099,472476,78698,56717,2,56715,0,
0,2024-02-05,1390330,476669,78808,54921,0,54921,0,
0,2024-02-12,1397590,480949,78722,55797,0,55797,0,
0,2024-02-19,1404522,485155,78836,56572,0,56572,0,
0,2024-02-26,1411632,489315,78827,55649,0,55649,0,
0,2024-03-04,1418380,493570,79150,56157,0,56157,0,


In [5]:
df = res.copy()

In [6]:
# Assuming df contains your DataFrame with the data
# Convert the 'Date' column to datetime type
df['Date'] = pd.to_datetime(df['Date'])

# Sort the DataFrame by date
df = df.sort_values('Date')


# Calculate the week-on-week change for each column
for column in df.columns[1:]:  # Exclude 'Date' column
    df[f'{column}_WoW_Change'] = df[column].diff()
    # df[f'{column}_WoW_Change'] = df[column].shift(1) - df[column]

# Display the DataFrame with week-on-week changes
df
df['MAU_budget'] = 75000
df['perMAUbudget_Completion'] = round(df['MAU']/df['MAU_budget'], 2)
df['ACLTSAbudget'] = 150
df['UptakeACLTSAvsbudget'] = round(df['Uptake'] / df['ACLTSAbudget'], 2)
df['MAUas_percent_oftotalregisteredusers'] = round(df['MAU']/df['Total_registered_users'], 2 )
df['MMAU_of_MAU'] = round(df['MMAU']/df['MAU'], 2)

In [7]:
df

Unnamed: 0,Date,Total_registered_users,Total_open_TSA_customers,MAU,MMAU,ACL_customers,MMAU_ACL_difference,Offers_extended,Uptake,Total_registered_users_WoW_Change,...,ACL_customers_WoW_Change,MMAU_ACL_difference_WoW_Change,Offers_extended_WoW_Change,Uptake_WoW_Change,MAU_budget,perMAUbudget_Completion,ACLTSAbudget,UptakeACLTSAvsbudget,MAUas_percent_oftotalregisteredusers,MMAU_of_MAU
0,2024-01-01,1355329,460796,78886,56015,0,56015,0,,,...,,,,,75000,1.05,150,,0.06,0.71
0,2024-01-08,1362285,463658,78951,55441,4,55437,0,,6956.0,...,4.0,-578.0,0.0,,75000,1.05,150,,0.06,0.7
0,2024-01-15,1369078,466267,79180,55546,0,55546,0,,6793.0,...,-4.0,109.0,0.0,,75000,1.06,150,,0.06,0.7
0,2024-01-22,1375999,468787,79016,55797,2,55795,0,,6921.0,...,2.0,249.0,0.0,,75000,1.05,150,,0.06,0.71
0,2024-01-29,1383099,472476,78698,56717,2,56715,0,,7100.0,...,0.0,920.0,0.0,,75000,1.05,150,,0.06,0.72
0,2024-02-05,1390330,476669,78808,54921,0,54921,0,,7231.0,...,-2.0,-1794.0,0.0,,75000,1.05,150,,0.06,0.7
0,2024-02-12,1397590,480949,78722,55797,0,55797,0,,7260.0,...,0.0,876.0,0.0,,75000,1.05,150,,0.06,0.71
0,2024-02-19,1404522,485155,78836,56572,0,56572,0,,6932.0,...,0.0,775.0,0.0,,75000,1.05,150,,0.06,0.72
0,2024-02-26,1411632,489315,78827,55649,0,55649,0,,7110.0,...,0.0,-923.0,0.0,,75000,1.05,150,,0.06,0.71
0,2024-03-04,1418380,493570,79150,56157,0,56157,0,,6748.0,...,0.0,508.0,0.0,,75000,1.06,150,,0.06,0.71


In [8]:
df.columns

Index(['Date', 'Total_registered_users', 'Total_open_TSA_customers', 'MAU',
       'MMAU', 'ACL_customers', 'MMAU_ACL_difference', 'Offers_extended',
       'Uptake', 'Total_registered_users_WoW_Change',
       'Total_open_TSA_customers_WoW_Change', 'MAU_WoW_Change',
       'MMAU_WoW_Change', 'ACL_customers_WoW_Change',
       'MMAU_ACL_difference_WoW_Change', 'Offers_extended_WoW_Change',
       'Uptake_WoW_Change', 'MAU_budget', 'perMAUbudget_Completion',
       'ACLTSAbudget', 'UptakeACLTSAvsbudget',
       'MAUas_percent_oftotalregisteredusers', 'MMAU_of_MAU'],
      dtype='object')

In [9]:
final = df[['Date', 'Total_registered_users', 'Total_registered_users_WoW_Change', 'Total_open_TSA_customers','Total_open_TSA_customers_WoW_Change', 'MAU', 'MAU_WoW_Change'
    , 'MMAU', 'MMAU_WoW_Change',  'ACL_customers', 'ACL_customers_WoW_Change' , 'MMAU_ACL_difference','MMAU_ACL_difference_WoW_Change', 'Offers_extended', 'Uptake', 'MAU_budget'
    ,'perMAUbudget_Completion','ACLTSAbudget', 'UptakeACLTSAvsbudget', 'MAUas_percent_oftotalregisteredusers', 'MMAU_of_MAU']].copy()

In [10]:
final.info()

<class 'pandas.core.frame.DataFrame'>
Index: 19 entries, 0 to 0
Data columns (total 21 columns):
 #   Column                                Non-Null Count  Dtype         
---  ------                                --------------  -----         
 0   Date                                  19 non-null     datetime64[ns]
 1   Total_registered_users                19 non-null     Int64         
 2   Total_registered_users_WoW_Change     18 non-null     Int64         
 3   Total_open_TSA_customers              19 non-null     Int64         
 4   Total_open_TSA_customers_WoW_Change   18 non-null     Int64         
 5   MAU                                   19 non-null     Int64         
 6   MAU_WoW_Change                        18 non-null     Int64         
 7   MMAU                                  19 non-null     Int64         
 8   MMAU_WoW_Change                       18 non-null     Int64         
 9   ACL_customers                         19 non-null     Int64         
 10  ACL_custom

In [11]:
final.head()

Unnamed: 0,Date,Total_registered_users,Total_registered_users_WoW_Change,Total_open_TSA_customers,Total_open_TSA_customers_WoW_Change,MAU,MAU_WoW_Change,MMAU,MMAU_WoW_Change,ACL_customers,...,MMAU_ACL_difference,MMAU_ACL_difference_WoW_Change,Offers_extended,Uptake,MAU_budget,perMAUbudget_Completion,ACLTSAbudget,UptakeACLTSAvsbudget,MAUas_percent_oftotalregisteredusers,MMAU_of_MAU
0,2024-01-01,1355329,,460796,,78886,,56015,,0,...,56015,,0,,75000,1.05,150,,0.06,0.71
0,2024-01-08,1362285,6956.0,463658,2862.0,78951,65.0,55441,-574.0,4,...,55437,-578.0,0,,75000,1.05,150,,0.06,0.7
0,2024-01-15,1369078,6793.0,466267,2609.0,79180,229.0,55546,105.0,0,...,55546,109.0,0,,75000,1.06,150,,0.06,0.7
0,2024-01-22,1375999,6921.0,468787,2520.0,79016,-164.0,55797,251.0,2,...,55795,249.0,0,,75000,1.05,150,,0.06,0.71
0,2024-01-29,1383099,7100.0,472476,3689.0,78698,-318.0,56717,920.0,2,...,56715,920.0,0,,75000,1.05,150,,0.06,0.72


In [12]:
sq = """drop table if exists dap_ds_poweruser_playground.TSA_Report_Marketing_Template;"""
client.query(sq)

QueryJob<project=prj-prod-dataplatform, location=asia-southeast1, id=d552e2be-e101-497d-84d5-c968f2d466a4>

In [13]:
# Define the dataset and table references
dataset_id = 'dap_ds_poweruser_playground'
table_id = 'TSA_Report_Marketing_Template'
# Define the table schema as per your DataFrame columns
schema = [
    bigquery.SchemaField("Date", "DATE"),
    bigquery.SchemaField("Total_registered_users", "INT64"),
    bigquery.SchemaField("Total_registered_users_WoW_Change", "INT64"),
    bigquery.SchemaField("Total_open_TSA_customers", "INT64"),
    bigquery.SchemaField("Total_open_TSA_customers_WoW_Change", "INT64"),
    bigquery.SchemaField("MAU", "INT64"),
    bigquery.SchemaField("MAU_WoW_Change", "INT64"),
    bigquery.SchemaField("MMAU", "INT64"),
    bigquery.SchemaField("MMAU_WoW_Change", "INT64"),
    bigquery.SchemaField("ACL_customers", "INT64"),
    bigquery.SchemaField("ACL_customers_WoW_Change", "INT64"),
    bigquery.SchemaField("MMAU_ACL_difference", "INT64"),
    bigquery.SchemaField("MMAU_ACL_difference_WoW_Change", "INT64"),
    bigquery.SchemaField("Offers_extended", "INT64"),
    bigquery.SchemaField("Uptake", "INT64"),
    bigquery.SchemaField("MAU_budget", "INT64"),
    bigquery.SchemaField("perMAUbudget_Completion", "FLOAT64"),
    bigquery.SchemaField("ACLTSAbudget", "INT64"),
    bigquery.SchemaField("UptakeACLTSAvsbudget", "FLOAT64"),
    bigquery.SchemaField("MAUas_percent_oftotalregisteredusers", "FLOAT64"),
    bigquery.SchemaField("MMAU_of_MAU", "FLOAT64"),
]
# Create the dataset reference
dataset_ref = client.dataset(dataset_id)
# Define the table reference
table_ref = dataset_ref.table(table_id)
# Configure the job to overwrite the table if it already exists
job_config = bigquery.LoadJobConfig()
# Load the DataFrame into BigQuery
job = client.load_table_from_dataframe(final, table_ref, job_config=job_config)
# Wait for the job to complete
job.result()
print(f"Table {table_id} created in dataset {dataset_id}.")


Table TSA_Report_Marketing_Template created in dataset dap_ds_poweruser_playground.
