
# SUP Eligibility Expansion Analysis

Determine the impact of expanding SUP merchant eligibility, so that all consumers can see all merchants.

## Impact of the decision

    Customers may make more purchases given the expanded merchant pool, increasing GPV

    Higher risk customers will have higher losses at these 

In [0]:
import json
# from jsonpath_ng import jsonpath, parse
# from jsonpath import jsonpath
import pandas as pd
import numpy as np

# from utils import policy_dict, policy_data_dict, find_non_zero_riskweight_rules, find_zero_riskweight_rules
from pandasql import sqldf
from tqdm import tqdm
import logging

from pysnowflake import Session

pd.set_option('display.max_rows', 300)
pd.set_option('display.max_columns', 100)
pd.set_option('display.float_format', lambda x: '%.3f' % x)

run_query = lambda query: sqldf(query, globals())




In [0]:
USER_NAME = 'jobyg' #replace it with your ldap name
sess = Session(
   connection_override_args={
       'autocommit': True,
       'authenticator': 'externalbrowser',
       'account': 'square',
       'database': f'PERSONAL_{USER_NAME.upper()}',
       'user': f'{USER_NAME}@squareup.com'
   }
   
)
conn = sess.open()



In [0]:
conn.execute('use warehouse ADHOC__XLARGE')
conn.execute('use database AP_CUR_FRDRISK_G')
conn.execute('use schema public')

query = '''create or replace  table ap_cur_Frdrisk_g.public.jobyg_US_karma as
(select
a.event_info_event_time
,a.event_info_event_time::date as event_date
, a.consumer_uuid
, c.first_order_date
, case
when c.first_order_date is null then 'no orders'
when datediff('day',c.first_order_date, a.par_process_date) < 0 then 'no orders'
when datediff('day',c.first_order_date, a.par_process_date) <= 14 then 'new'
when datediff('day',c.first_order_date, a.par_process_date) > 14 then 'tenured'
end as tenure_grp
, datediff('day',c.first_order_date, a.event_info_event_time) as tenure
, a.is_in_treatment
, actions
,a.par_region
, length(actions) as action_length
, case when (length(actions) - 94) <= 0 then 0
       else round((length(actions) - 94)/12)
       end as no_of_merchants_new,

case  when a.par_Region = 'GB' and no_of_merchants_new between 19 and 29 then 'gb_high'
      when a.par_Region = 'GB' and no_of_merchants_new between 29 and 42 then 'gb_med'
      when a.par_Region = 'GB' and no_of_merchants_new > 42 then 'gb_low'

      when a.par_Region = 'US' and no_of_merchants_new = 83 then 'us_high'
      when a.par_Region = 'US' and no_of_merchants_new between 83 and 126 then 'us_med'
      when a.par_Region = 'US' and no_of_merchants_new = 127 then 'us_low'
    end as eligbility_tier
from AP_CUR_R_FEATSCI.CURATED_FEATURE_SCIENCE_RED.TBL_RAW_R_E_REKARMA_RL_EXEC_RSLT_RULES__jobyg_DSL3_SV a
left join AP_RAW_GREEN.GREEN.D_CONSUMER c
on a.consumer_uuid = c.uuid
where a.rule_id in ('us_fraud_online_sup_eligibility_model_risk_filter_v3'
--,'gb_fraud_online_sup_eligibility_model_risk_filter_v3'
)
and a.event_info_event_time >= '2024-08-01' --and '2024-01-20'
and a.par_region in ('US')
);'''
conn.execute(query)

In [0]:
full_merchant_set = conn.download('''select distinct actions from ap_cur_Frdrisk_g.public.jobyg_US_karma where tenure_grp != 'tenured' ''')

In [0]:
import json

def extract_merchant_ids(json_str):
    try:
        # Parse the JSON string - assuming it's a list of dictionaries
        actions = json.loads(json_str)
        
        # If it's a single dictionary, convert to list
        if isinstance(actions, dict):
            actions = [actions]
            
        for action in actions:
            if action.get('action_name') == 'allowed_merchant_store_ids_assign':
                return action.get('output_map', {}).get('allowed_merchant_ids', [])
        return []
    except Exception as e:
        print(f"Error processing: {json_str[:100]}...")  # Print first 100 chars of problematic string
        print(f"Error: {str(e)}")
        return []
    
all_merchant_ids = []
for actions in full_merchant_set['actions'].dropna():
    merchant_ids = extract_merchant_ids(actions)
    all_merchant_ids.extend(merchant_ids)

# Get unique merchant IDs
unique_merchant_ids = sorted(list(set(all_merchant_ids)))

print(f"Found {len(unique_merchant_ids)} unique merchant IDs:")
print(unique_merchant_ids)

In [0]:
merchant_mapping = conn.download('''select distinct merchant_id, merchant_name from ap_cur_riskbi_g.curated_risk_bi_green.dwm_order_loss_tagging where sup_yn = 1 and 
par_region in ('US')
and order_date >= '2024-08-01';''')

In [0]:
# merchant_mapping

In [0]:
merchant_dict = {}
for merchant in unique_merchant_ids:
    int_merchant = int(merchant)
    try:
        merchant_dict[merchant] = str(merchant_mapping.loc[merchant_mapping['merchant_id'] == int_merchant]['merchant_name'].values[0])
    except:
        merchant_dict[merchant] = merchant


In [0]:
merchant_mapping.loc[merchant_mapping.merchant_id == 100140976]['merchant_name'].values[0]

In [0]:
merch_list_to_fix = []
for v in merchant_dict.values():
    try:
        v = int(v)
        print(v)
        merch_list_to_fix.append(v)
    except:
        continue

In [0]:
    
query = f'''select distinct merchant_id, merchant_name from ap_cur_riskbi_g.curated_risk_bi_green.dwm_order_loss_tagging
where (sup_yn = 1 and 
par_region in ('US')
and order_date >= '2024-08-01')
or merchant_id in (
'''
for i in range(len(merch_list_to_fix)):
  if i == 0:
       query += f'{merch_list_to_fix[i]}'
  else:
       query+= f',{merch_list_to_fix[i]}'
query+=''') 
;'''
print(query)
merchant_mapping = conn.download(query)


In [0]:
merchant_dict = {}
for merchant in unique_merchant_ids:
    int_merchant = int(merchant)
    try:
        merchant_dict[merchant] = str(merchant_mapping.loc[merchant_mapping['merchant_id'] == int_merchant]['merchant_name'].values[0])
    except:
        merchant_dict[merchant] = merchant


In [0]:
merchant_mapping

In [0]:
merchant_dict
#400510048 -- Harrods
#106668596 -- Vitacost
#653056 SSENSE
# dupe_ct = {}
# for k,v in merchant_dict.items():
#     dup



In [0]:
merchant_dict['400510048'] = 'Harrods'
merchant_dict['106668596'] = 'Vitacost'
merchant_dict['653056'] = 'SSENSE'
#100150231 #400475455 #653056


## Determine how often a merchant is present

In [0]:
from collections import defaultdict

def invert_dict_to_list(original_dict):
    new_dict = defaultdict(list)
    for key, value in original_dict.items():
        new_dict[value.lower()].append(key)
    return dict(new_dict)

# Example usage
dupe_dict = invert_dict_to_list(merchant_dict)
for k,v in dupe_dict.items():
    if len(v) >1:
        print(k,v)

In [0]:
query = '''create or replace  table ap_cur_Frdrisk_g.public.jobyg_US_karma_v2 as (
    select *
    '''
ebay_count = 0
nike_count = 0
ssense_count = 0 
coach_ct = 0
samsung_ct = 0
body_shop_flag = 0
etsy_ct = 0
apple_ct = 0
uniqlo_ct = 0

for k,v in merchant_dict.items():
    # if v == "Macy's":
    #     v = 'Macys'
    # elif v == "Victoria's Secret":
    #     v = 'Victorias_Secret'
    # elif v == "AT&T":
    #     v='ATT'
    # elif v == "H&M":
    #     v="HM"
    # elif v == "Kohl's":
    #     v = 'Kohls'
    # elif v=="Bloomingdale's":
    #     v = 'Bloomingdales'
    if v == '1 800 Flowers':
        v= "one_eight_hundred_flowers"
    
    if k in ('100152771', '400461110'):
        
        ebay_count+=1
        print(k,'ebay',ebay_count)
        if ebay_count == 1:
             query+= f''',case when actions ilike '%100152771%' or actions ilike '%400461110%' then 1 else 0 end as {v}_flag'''
    if k in ('100140981', '400474394'):
        nike_count+=1
        print(k,'nike',nike_count)
        if nike_count == 1:
             query+= f''',case when actions ilike '%100140981%' or actions ilike '%400474394%' then 1 else 0 end as {v}_flag'''
    if k in ('100150231', '400475455', '653056'):
        ssense_count+=1
        print(k,'ssense', ssense_count)
        if ssense_count == 1:
             query+= f''',case when actions ilike '%100150231%' or actions ilike '%400475455%' or actions ilike '%653056%' then 1 else 0 end as {v}_flag'''


    if k in ('100152858', '400504709'):
        coach_ct+=1
        print(k,'coach', coach_ct)
        if coach_ct == 1:
             query+= f''',case when actions ilike '%100152858%' or actions ilike '%400504709%'  then 1 else 0 end as {v}_flag'''
    if k in ('400507557', '100162744'):
        samsung_ct+=1
        print(k,'samsung', samsung_ct)
        if samsung_ct == 1:
             query+= f''',case when actions ilike '%400507557%' or actions ilike '%100162744%'  then 1 else 0 end as {v}_flag'''


    if k in ('100152836', '400515891'):
        etsy_ct+=1
        print(k,'etsy', etsy_ct)
        if etsy_ct == 1:
             query+= f''',case when actions ilike '%100152836%' or actions ilike '%400515891%'  then 1 else 0 end as {v}_flag'''
    
    if k in ('100150199', '400514646'):

        apple_ct+=1
        print(k,'apple', apple_ct)
        if apple_ct == 1:
             query+= f''',case when actions ilike '%100150199%' or actions ilike '%400514646%'  then 1 else 0 end as {v}_flag'''
    if k in ('100150219', '400575275'):

        uniqlo_ct+=1
        print(k,'uniqlo', uniqlo_ct)
        if uniqlo_ct == 1:
             query+= f''',case when actions ilike '%100150219%' or actions ilike '%400575275%'  then 1 else 0 end as {v}_flag'''

    

    v = v.replace("&",'')
    v = v.replace("'",'')
    v = v.replace(",",'')

    v = v.replace("-",'')
    v = v.replace(" ", "_") 
    v = v.replace(".", "dot")
    if k in ('101464464','400510042'):
        body_shop_flag+=1
        print(k,'coach', body_shop_flag)
        if body_shop_flag == 1:
             query+= f''',case when actions ilike '%101464464%' or actions ilike '%400510042%'  then 1 else 0 end as {v}_flag'''
   
    if k not in ('100152771', '400461110','100140981', '400474394','100150231', '400475455', '653056', '100152858', '400504709','400507557', '100162744', '101464464','400510042','100152836', '400515891','100150199', '400514646', '100150219', '400575275'):
        query += f''',case when actions ilike '%{k}%' then 1 else 0 end as {v}_flag'''
query += ' from US_karma);'
     
print(query)
conn.execute(query)

In [0]:
b= conn.download('select * from karma_v2 where ebay_flag = 1 and no_of_merchants_new != 127 limit 5')
b

In [0]:
merchant_mapping

In [0]:
ebay_count = 0
nike_count = 0
ssense_count = 0 
coach_ct = 0
samsung_ct = 0
body_shop_flag = 0
etsy_ct = 0
apple_ct = 0
uniqlo_ct = 0

query = 'select max(1) as temp'
for k,v in merchant_dict.items():
    if v == '1 800 Flowers':
        v= "one_eight_hundred_flowers"
    
    if k in ('100152771', '400461110'):
        
        ebay_count+=1
        print(k,'ebay',ebay_count)
        if ebay_count == 1:
             query+= f''',count((case when {v}_flag=1 then consumer_uuid end)) as {v}_ct'''
    if k in ('100140981', '400474394'):
        nike_count+=1
        print(k,'nike',nike_count)
        if nike_count == 1:
             query+= f''',count((case when {v}_flag=1 then consumer_uuid end)) as {v}_ct'''
    if k in ('100150231', '400475455', '653056'):
        ssense_count+=1
        print(k,'ssense', ssense_count)
        if ssense_count == 1:
             query+= f''',count((case when {v}_flag=1 then consumer_uuid end)) as {v}_ct'''


    if k in ('100152858', '400504709'):
        coach_ct+=1
        print(k,'coach', coach_ct)
        if coach_ct == 1:
             query+= f''',count((case when {v}_flag=1 then consumer_uuid end)) as {v}_ct'''
    if k in ('400507557', '100162744'):
        samsung_ct+=1
        print(k,'samsung', samsung_ct)
        if samsung_ct == 1:
             query+= f''',count((case when {v}_flag=1 then consumer_uuid end)) as {v}_ct'''


    if k in ('100152836', '400515891'):
        etsy_ct+=1
        print(k,'etsy', etsy_ct)
        if etsy_ct == 1:
             query+= f''',count((case when {v}_flag=1 then consumer_uuid end)) as {v}_ct'''
    
    if k in ('100150199', '400514646'):

        apple_ct+=1
        print(k,'apple', apple_ct)
        if apple_ct == 1:
             query+= f''',count((case when {v}_flag=1 then consumer_uuid end)) as {v}_ct'''
    if k in ('100150219', '400575275'):

        uniqlo_ct+=1
        print(k,'uniqlo', uniqlo_ct)
        if uniqlo_ct == 1:
             query+= f''',count((case when {v}_flag=1 then consumer_uuid end)) as {v}_ct'''

    

    v = v.replace("&",'')
    v = v.replace("'",'')
    v = v.replace(",",'')

    v = v.replace("-",'')
    v = v.replace(" ", "_") 
    v = v.replace(".", "dot")
    if k in ('101464464','400510042'):
        body_shop_flag+=1
        print(k,'coach', body_shop_flag)
        if body_shop_flag == 1:
             query+= f''',count((case when {v}_flag=1 then consumer_uuid end)) as {v}_ct'''
   
    if k not in ('100152771', '400461110','100140981', '400474394','100150231', '400475455', '653056', '100152858', '400504709','400507557', '100162744', '101464464','400510042','100152836', '400515891','100150199', '400514646', '100150219', '400575275'):
        query += f''',count((case when {v}_flag=1 then consumer_uuid end)) as {v}_ct'''
query += ''' from karma_v2 where par_region = 'US' and tenure_grp !='tenured' and no_of_merchants_new!=127;'''
     
print(query)


In [0]:
output = conn.download(query)

In [0]:
print(output.T)

In [0]:
conn.execute('''create or replace temp table sup_merchant_driver as (
select distinct merchant_id, sup_yn, order_date, par_region
from ap_cur_riskbi_g.curated_risk_bi_green.dwm_order_loss_tagging 
where sup_yn = 1
and par_region in ('GB','US')
and order_date between '2024-08-01' and '2025-02-28'
--and checkpoint = 'CHECKOUT_CONFIRM'
);''')



conn.execute('''create or replace temp table sup_attempts as (
select a.*, b.p2_overdue_d0_local, b.p2_due_local, c.sup_yn
from ap_cur_r_frdrisk.curated_fraud_risk_red.unified_feature_datamart_base__jobyg_dsl3_sv a
left join ap_cur_riskbi_g.curated_risk_bi_green.dwm_order_loss_tagging  b
on a.order_token = b.order_token
left join sup_merchant_driver c
on a.merchant_id = c.merchant_id
and a.par_process_date = c.order_Date
where a.par_region in ('US','GB')
and a.checkpoint = 'CHECKOUT_CONFIRM'
and a.par_process_date between '2024-08-01' and '2025-02-28'
and coalesce(days_since_first_order_date, 0) <15
and c.sup_yn = 1
and dedup = 1
);''')

In [0]:
conn.execute('''create or replace temp table med_high_US_users as (
    select * from karma_v2 where no_of_merchants_new != 127);'''
)

In [0]:
C = conn.download('select * from med_high_us_users limit 5')
C

In [0]:
eligibility_percentage = conn.download('''select eligbility_tier, count(consumer_uuid) as login_ct, count(distinct(consumer_uuid)) as consumer_ct from US_karma where tenure_grp !='tenured'group by 1 order by 1''')
eligibility_percentage

In [0]:
conn.execute('''create or replace  table ap_cur_frdrisk_g.public.sup_attempts_eligibility_joined as (
    select a.*, b.event_Date as sup_Risk_filter_date, b.tenure_grp, b.actions, b.no_of_merchants_new, b.eligbility_tier, b.vivid_seats_flag, b.lowes_flag, b.sams_club_flag, b.apple_flag, b.walmart_Flag, b.lululemon_flag, b.amazon_flag, b.best_buy_flag,
    case when vivid_seats_flag = 1 or lowes_flag = 1 or sams_club_flag =1 or apple_flag = 1 or walmart_Flag =1 or lululemon_flag =1 or amazon_flag =1 or best_buy_flag =1 then 1 else 0 end as hrm_enabled_flag
    from sup_attempts a 
    left join  karma_v2 b
    on a.consumer_uuid = b.consumer_uuid
    AND b.event_info_event_Time <= a.checkout_Time
    where a.consumer_uuid in (select consumer_uuid from karma_v2)
    qualify ROW_NUMBER() OVER (
            PARTITION BY b.consumer_uuid, a.checkout_Time
            ORDER BY b.event_info_event_Time DESC
        ) =1);'''
)

In [0]:
conn.execute('''create or replace  table ap_cur_frdrisk_g.public.sup_attempts_eligibility_joined_v2 as (
    select a.*, b.event_Date as sup_Risk_filter_date, b.tenure_grp, b.actions, b.no_of_merchants_new, b.eligbility_tier, b.vivid_seats_flag, b.lowes_flag, b.sams_club_flag, b.apple_flag, b.walmart_Flag, b.lululemon_flag, b.amazon_flag, b.best_buy_flag,
    case when vivid_seats_flag = 1 or lowes_flag = 1 or sams_club_flag =1 or apple_flag = 1 or walmart_Flag =1 or lululemon_flag =1 or amazon_flag =1 or best_buy_flag =1 then 1 else 0 end as hrm_enabled_flag
    from sup_attempts a 
    left join  karma_v2 b
    on a.consumer_uuid = b.consumer_uuid
    AND b.event_info_event_Time <= a.checkout_Time
    where a.consumer_uuid in (select consumer_uuid from karma_v2)
    qualify ROW_NUMBER() OVER (
            PARTITION BY b.consumer_uuid, a.checkout_Time
            ORDER BY b.event_info_event_Time asc
        ) =1);'''
)

In [0]:
conn.execute('''create or replace  table ap_cur_frdrisk_g.public.sup_attempts_med_high_risk_users as (
    select a.*, b.event_Date as sup_Risk_filter_date, b.tenure_grp, b.actions, b.no_of_merchants_new, b.eligbility_tier, b.vivid_seats_flag, b.lowes_flag, b.sams_club_flag, b.apple_flag, b.walmart_Flag, b.lululemon_flag, b.amazon_flag, b.best_buy_flag,
    case when vivid_seats_flag = 1 or lowes_flag = 1 or sams_club_flag =1 or apple_flag = 1 or walmart_Flag =1 or lululemon_flag =1 or amazon_flag =1 or best_buy_flag =1 then 1 else 0 end as hrm_enabled_flag
    from sup_attempts a 
    left join  med_high_us_users b
    on a.consumer_uuid = b.consumer_uuid
    AND b.event_info_event_Time <= a.checkout_Time
    where a.consumer_uuid in (select consumer_uuid from med_high_US_users)
    qualify ROW_NUMBER() OVER (
            PARTITION BY b.consumer_uuid, a.checkout_Time
            ORDER BY b.event_info_event_Time DESC
        ) =1);'''
)

In [0]:
test = conn.download('''
             select 
                case when bp_c_batch_consumer_batch_model_v1 >= 800  then 'H'
                     when  bp_c_batch_consumer_batch_model_v1 between 700 and 800 or 
                     bp_c_batch_consumer_batch_model_v1 = -999 and whitepages_identity_check_score <= 350 then 'M'
                     else 'null' end as risk_level,
                amazon_Flag,
                count(distinct(consumer_uuid)) as consumer_ct,
                count(distinct(order_token)) as token_ct,
                sum(order_amount_local) as gpv
                from sup_attempts_med_high_risk_users
                group by 1,2,3
                order by 1,2,3;
             ''')

In [0]:
test = conn.download('''select * from US_karma where consumer_uuid = 'd5580f54-008e-4a03-becc-54f4af78e4a0'  ''')

In [0]:
test.sort_values(by='event_date')

In [0]:

## find all medium & high risk who had access to amazon, walmart, best buy

In [0]:
import hashlib
def _track_key(consumer_uuid, salt):
    test_id = str(consumer_uuid) + str(salt)
    test_id_hashed =hashlib.md5(test_id.encode('ascii')).hexdigest()
    test_id_first_digits = test_id_hashed[:6]
    test_id_final_int = int(test_id_first_digits,16)
    test_key = (test_id_final_int/0xFFFFFF)
    return(test_key)

a = _track_key('733ff59c-a307-4c04-bf89-5ac3bc8c7d3e', 'eligibility_and_rule_sup')

consumer_id = '733ff59c-a307-4c04-bf89-5ac3bc8c7d3e'

salt = 'eligibility_and_rule_sup'

a