# Client Demographs 

In [0]:
from sqlalchemy import create_engine
import pandas as pd 
import numpy as np 

import json
with open('/Workspace/Credentials/db_data.json', 'r') as fp:
    data = json.load(fp)


host = data['redshift']['host']
user = data['redshift']['user']
passwd = data['redshift']['passwd']
database = data['redshift']['database']

conn = create_engine(f"postgresql+psycopg2://{user}:{passwd}@{host}:5439/{database}")


pd.set_option('display.float_format', lambda x: '%.2f' % x)


from datetime import datetime, timedelta
today = datetime.today().strftime('%Y-%m-%d')
yesterday =  (datetime.today() - timedelta(days = 1)).strftime('%Y-%m-%d')
print(today)
print(yesterday)


last_2_wks = datetime.today() - timedelta(days = 14)
last_2_wks = last_2_wks.strftime('%Y-%m-%d')
print('------------------------------------')
print(last_2_wks)

print('\n')
now = datetime.today().strftime('%Y-%m-%d %H:%M:%S')

last_30_mins = (datetime.today() - timedelta(days = 1)).strftime('%Y-%m-%d %H:%M:%S')
trunc_last_30_mins = (datetime.today() - timedelta(days = 1)).strftime('%Y-%m-%d %H:%M')
print(last_30_mins, 'to', now)


## Generation demographic attributes

In [0]:
%%time

rcdem = pd.read_sql_query(f'''
-- CLIENT DEMOGRAPH

SELECT
    DISTINCT dac.client_id,
    dac.client_name,
    dac.mobile_number,
    dcbd.bvn_phone_no AS bvn_phone_no,
    dac.email_address,
    dac.state,
    dcbd.bvn_email bvn_email,
    dcbd.bvn,
    dcbd.bvn_gender AS gender,
    dcbd.bvn_dob AS date_of_birth,
    CASE
        WHEN dcbd.bvn_dob IS NULL THEN 'Not Found'
        WHEN RIGHT(dcbd.bvn_dob, 4) < '1945' THEN 'Silent Generation'
        WHEN RIGHT(dcbd.bvn_dob, 4) BETWEEN '1946' AND '1964' THEN 'Baby Boomers'
        WHEN RIGHT(dcbd.bvn_dob, 4) BETWEEN '1965' AND '1979' THEN 'Generation X'
        WHEN RIGHT(dcbd.bvn_dob, 4) BETWEEN '1980' AND '1994' THEN 'Millennials'
        WHEN RIGHT(dcbd.bvn_dob, 4) BETWEEN '1995' AND '2012' THEN 'Generation Z'
        WHEN RIGHT(dcbd.bvn_dob, 4) > '2012' THEN 'Generation Alpha'
    END AS generation,
    dcbd.bvn_state_of_origin AS state_of_origin,
    dcbd.bvn_state_of_residence AS residence_state,
    dac.client_tier,
    dac.client_category,
   /* rb.client_id AS referral_id,
    rb.client_name AS referral_name,
    rb.referral_code,*/
    MIN(dac.activation_date) AS date_onboarded,
    CURRENT_DATE as run_date
    
FROM
    dwh_all_clients dac
-- LEFT JOIN referred_by rb ON rb.client_id = dac.referred_by_id
LEFT OUTER JOIN dwh_clients_bvn_data dcbd ON dac.client_id = dcbd.client_id
WHERE
    dac.client_status != 'closed' 
GROUP BY
    dac.client_id,
    dac.client_name,
    dac.mobile_number,
    dac.email_address,
    dcbd.bvn_email,
    dac.state,
    dcbd.bvn,
    dcbd.bvn_phone_no,
    dcbd.bvn_gender,
    dcbd.bvn_dob,
    dcbd.bvn_state_of_origin,
    dcbd.bvn_state_of_residence,
    dac.client_tier,
    dac.client_category
/*    rb.client_id,
    rb.client_name,
    rb.referral_code*/
--  LIMIT 1000;



''' , conn)


rcdem

In [0]:
%%time
rcdem.to_sql("dwh_clients_demograph", conn, index = False, if_exists = 'replace', chunksize = 30000, method = 'multi')

In [0]:
'''

# Write the data in iterative batches into Redshift
batch_size = 10000
for i in range(0, len(rcdem), batch_size):
    rcdem_batch = rcdem[i:i+batch_size]
    rcdem_batch.to_sql("dwh_clients_demograph", conn, index=False, if_exists='replace')
'''



In [0]:
print("run completed successfully on " + now)

## resulting tables on redshift public schema includes 

* dwh_clients_demograph - client attributes and features
