In [1]:
# Python libraries
import numpy as np
import pandas as pd
# import datetime

# 3rd party libraries
import ness

# Internal imports
from data_manager import to_numeric, create_agg_var, two_date_cols_diff, days_from_today, last_row, create_unpaid_cols

In [2]:
pd.options.display.max_columns = 200
# pd.options.display.max_rows = 10000

In [3]:
# Set the ness parameters
ness_parameters = {
    'bucket':"data.postpay.io",
    'key':"api",
    'profile':"default"
}
# Instantiate the data lake
dl = ness.dl(**ness_parameters)
# Read/Sync all the tables from the data lake
dl.sync()
# Create data frames from the tables
cart_df = dl.read("cart")
orders_df = dl.read("orders")
customers_df = dl.read("customers")
addresses_df = dl.read("addresses")
instalment_plans_df = dl.read("instalment-plans")
instalments_df = dl.read("instalments")
transactions_df = dl.read("transactions")
refunds_df = dl.read("refunds")

# Instalments_df & instalment_plans_df - PI3 & AE & 'paid' or 'unpaid'

In [4]:
# Convert the anount columns to numeric
to_numeric(df=instalments_df, columns_list=['refunded_amount', 'penalty_fee', 'amount', 'total'])
to_numeric(df=instalment_plans_df, columns_list=['downpayment_amount','total_downpayment', 'total_amount', 'shipping_amount', 'downpayment_refunded_amount'])

In [5]:
# Initial filters
pi3_bool = instalment_plans_df['num_instalments'] == 3
ae_bool = instalment_plans_df['payment_method_country'] == 'AE'

In [33]:
# Create instalment_plans_df for ONLY "pi3" and "AE"
pi3_ae_instalment_plans_df = instalment_plans_df[pi3_bool & ae_bool]

In [7]:
# Create ID's table for ONLY "pi3" and "AE"
pi3_ae_instalment_plans_id_df = instalment_plans_df[pi3_bool & ae_bool][['customer_id', 'instalment_plan_id', 'order_id']]

In [8]:
# Create instalments_df for ONLY "pi3" and "AE"
pi3_ae_instalments_df = pi3_ae_instalment_plans_id_df.merge(instalments_df, how='left', on='instalment_plan_id')

In [9]:
# Create filtered df by the last instalment per instalment_plan_id for ONLY "pi3" and "AE"
instalments_df_max_inst_number = last_row(
    df=pi3_ae_instalments_df,
    groupby_col='instalment_plan_id',
    by_last_col='order'
    )

In [10]:
# Get instalments_df with only last instalment
instalments_df_last_inst = instalments_df_max_inst_number.merge(pi3_ae_instalments_df, how='left', on=['instalment_plan_id', 'order'])

In [11]:
# Create colun with number of days between today and the scheduled date
instalments_df_last_inst_d = days_from_today(
    df = instalments_df_last_inst,
    new_col_name = 'days_since_scheduled',
    date_col = 'scheduled',
    drop_orig_col = False,
)

## Here we remove the immature data (orders not passed scheduled date)

In [12]:
# # Filter the df by mature data: passed the scheduled date and only ('paid' or 'unpaid')
# last_mature_pi3_ae_df = instalments_df_last_inst_d[(instalments_df_last_inst_d['days_since_scheduled'] >= 0) & (instalments_df_last_inst_d['status'].isin(['paid', 'unpaid']))]

In [13]:
# Filter the df by mature data: only ('paid' or 'unpaid')
last_mature_pi3_ae_df = instalments_df_last_inst_d[(instalments_df_last_inst_d['status'].isin(['paid', 'unpaid']))]

In [14]:
# unique instalment_plan_id for pi3 & AE & passed the scheduled date & only ('paid' or 'unpaid')
unique_id_pi3_ae_paid_unpaid = last_mature_pi3_ae_df['instalment_plan_id'].unique()

In [15]:
# Create the new instalments_df table with the entire instalments per instalment_plan_id
inst_pi3_ae_paid_unpaid_df = pi3_ae_instalments_df[pi3_ae_instalments_df['instalment_plan_id'].isin(unique_id_pi3_ae_paid_unpaid)]

# Continue with creating the unpaid variables

In [16]:
# Create unpaid variables per instalment_plan_id
unpaid_per_instalment_plan_id = create_agg_var(
    df=inst_pi3_ae_paid_unpaid_df,
    condition_column='status',
    condition_cat='unpaid',
    groupby_col='instalment_plan_id',
    orig_cols=['amount', 'total'],
    new_col_names=['total_unpaid_amount', 'total_unpaid_total'],
    agg_fnc='sum'
    )

In [17]:
# Create penalty variable per instalment_plan_id
penalty_fees_per_instalment_plan_id = create_agg_var(
    df=inst_pi3_ae_paid_unpaid_df,
    groupby_col='instalment_plan_id',
    orig_cols=['penalty_fee'],
    new_col_names=['total_penalty_fee'],
    agg_fnc='sum'
    )

In [18]:
# Create refunds variable per order_id
redunds_per_order_id = create_agg_var(
    df=refunds_df,
    groupby_col='order_id',
    orig_cols=['amount'],
    new_col_names=['total_refunds'],
    agg_fnc='sum'
    )

In [19]:
# Create new days diff columns

# Scheduled-Complete Diff
inst_pi3_ae_paid_unpaid_d1_df = two_date_cols_diff(
    df=inst_pi3_ae_paid_unpaid_df,
    new_col_name='days_scheduled_completed',
    first_col='scheduled',
    second_col='completed'
    )

# Scheduled-Today Diff
inst_pi3_ae_paid_unpaid_d2_df = days_from_today(
    df=inst_pi3_ae_paid_unpaid_d1_df,
    new_col_name='days_since_scheduled',
    date_col='scheduled'
    )

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_new[new_col_name] = (
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_new[new_col_name] = (


In [20]:
# Create filtered df by the last instalment per instalment_plan_id
instalments_df_max_inst_number = last_row(
    df=inst_pi3_ae_paid_unpaid_d2_df,
    groupby_col='instalment_plan_id',
    by_last_col='order'
    )

In [21]:
# Join instalments_df to instalments_df_max_inst_number
last_inst_df = pd.merge(instalments_df_max_inst_number, inst_pi3_ae_paid_unpaid_d2_df, how='left', on=['instalment_plan_id', 'order']).rename(columns={'completed':'inst_completed_date', 'status':'inst_status'})

In [22]:
# Join unpaid_per_instalment_plan_id to last_inst_df
last_inst_df_with_unpaid = last_inst_df.merge(unpaid_per_instalment_plan_id, how='left', on=['instalment_plan_id']).merge(penalty_fees_per_instalment_plan_id, how='left', on=['instalment_plan_id'])

In [23]:
col_to_keep = [
    'instalment_plan_id',
    'customer_id',
    'order_id',
    # 'inst_status',
    'days_scheduled_completed',
    'days_since_scheduled',
    'total_unpaid_amount',
    'total_unpaid_total',
    'total_penalty_fee'
]
last_inst_df_with_unpaid = last_inst_df_with_unpaid[col_to_keep]

In [44]:
first_joined_postpay_df = customers_df.groupby(by="customer_id")["created"].min().reset_index().rename(columns={"created": "first_postpay_order"})

In [45]:
first_joined_postpay_df

Unnamed: 0,customer_id,first_postpay_order
0,64,2020-04-23 16:01:56.534361
1,68,2020-04-26 11:22:25.708620
2,71,2020-04-26 12:25:32.492322
3,73,2020-04-30 01:28:47.737338
4,76,2020-04-30 21:40:59.613117
...,...,...
53518,53904,2021-11-30 00:07:33.159666
53519,53905,2021-11-30 00:08:09.744716
53520,53906,2021-11-30 00:11:45.214359
53521,53907,2021-11-30 00:11:49.542261


In [34]:
# first_pi3_order_df = pi3_ae_instalment_plans_df.groupby(by="customer_id")["created"].min().reset_index().rename(columns={"created": "first_pi3_order"})

In [46]:
pi3_ae_instalment_plans_df = pi3_ae_instalment_plans_df.merge(first_joined_postpay_df, how="left", on="customer_id")

In [47]:
pi3_ae_instalment_plans_df

Unnamed: 0,created,payment_method_fingerprint,downpayment_amount,billing_address_id,device_fingerprint,total_downpayment,checkout_completed,payment_method_brand,plan,payment_method_expires,shipping_address_id,total_amount,merchant_id,customer_id,currency,gateway_name,id_number,checkout_verified,cancelled,payment_method_country,shipping_amount,reference,merchant_name,completed,instalment_plan_id,payment_interval,customer_date_joined,status_changed,order_id,checkout_type,customer_email,status,num_instalments,shipping_id,customer_blacklisted,payment_method_type,date_of_birth,downpayment_refunded_amount,user_agent,phone,ip_address,transaction_cost_rate,transaction_cost_amount,first_pi3_order,is_returning,first_postpay_order
0,2021-05-02 02:35:13.690649,a5ec3e6bec144d0eabfcf6ca454a8fe4,315.33,20790.0,073bf4f5cadf4f70851675931c6fd4e5,315.33,2021-05-02 02:34:15.699920,visa,funded,2025-10-31,20790.0,946.00,214,9338.0,AED,checkoutV2,784199853242727,2021-05-02 02:33:56.766680,NaT,AE,0.0,FLAEHDE0538711-608e0f7c295bf,Footlocker,2021-06-02 11:42:37.241234,14654,,2021-05-02 02:16:32,2021-05-02 02:35:22.805498,28266,default,thekingak47@live.com,captured,3,15773.0,NaT,debit,1998-03-01,0.0,Mozilla/5.0 (iPhone; CPU iPhone OS 14_4_2 like...,971544747417,2001:8f8:1727:7c10:186c:7355:bb08:2989,0.022,0.5,2021-05-02 02:35:13.690649,False,2021-05-02 02:33:56.754755
1,2021-10-01 13:59:30.149380,e5446f3c3d80493486c016e631847f43,72.53,52897.0,2f417b2266ba16b1f39112c62559ac9a,72.53,2021-10-01 13:59:25.672952,mastercard,funded,2026-07-31,52897.0,217.60,270,26316.0,AED,checkoutV2,784198049595360,2021-10-01 13:55:53.211809,NaT,AE,0.0,AEAEHDE0568976-6157133d1ca89,American Eagle,NaT,37867,,2021-10-01 11:57:48,2021-10-01 13:59:39.521883,73503,seamless,Keliru_sphinx@yahoo.com,captured,3,43801.0,NaT,debit,,0.0,Mozilla/5.0 (Linux; Android 11; SM-N986B Build...,971501631098,2001:8f8:1363:a330:a58b:dd7b:a59b:1c39,0.022,0.5,2021-10-01 13:59:30.149380,False,2021-10-01 13:55:53.190460
2,2021-07-01 07:35:38.289312,cfecd53243e94c81b15461947a0f1b89,111.67,30710.0,2d7fb18bc3eddff9df55cd961955177c,111.67,2021-07-01 07:34:36.891851,mastercard,funded,2024-05-31,30710.0,335.00,280,13871.0,AED,checkoutV2,784198764376921,2021-07-01 07:33:16.292938,NaT,AE,0.0,VSAEHDE1786492-60dd6facc9fc5,Victoria's Secret,2021-09-01 16:03:36.033232,22158,,2018-11-30 10:28:01,2021-07-01 07:35:47.522226,41746,default,franciaprincesskarmela@yahoo.com.ph,captured,3,23986.0,NaT,credit,1987-08-15,0.0,Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like M...,971505948015,2001:8f8:172d:96f7:845f:317a:598f:e9f6,0.022,0.5,2021-07-01 07:35:38.289312,False,2021-07-01 07:33:16.284137
3,2021-11-17 19:20:47.656802,6bcf125249074995a3aac47d32a1c0f1,70.00,76529.0,27997d716aa7f8dcc47a0bd4e346bbf8,70.00,2021-11-17 19:20:43.790176,mastercard,funded,2026-11-30,76529.0,210.00,103,44137.0,AED,checkoutV2,784199003294867,2021-11-17 19:19:10.747879,NaT,AE,0.0,924455-6195559d8ebac,Squat Wolf,NaT,56994,,2021-08-19 19:11:11,2021-11-17 19:20:56.108753,109348,seamless,raana_90@hotmail.com,captured,3,65753.0,NaT,credit,,0.0,Mozilla/5.0 (Linux; Android 11; SM-N980F) Appl...,971552877706,2001:8f8:1125:4e66:d952:485e:b784:db99,0.022,0.5,2021-11-17 19:20:47.656802,False,2021-11-17 19:19:10.730467
4,2021-06-04 02:04:53.894458,e77a76105e6a438199d679d4c81f4937,66.67,26044.0,1f7214019e70affc376db3587c423589,66.67,2021-06-04 02:03:30.884191,visa,funded,2026-07-31,26044.0,200.00,280,11959.0,AED,checkoutV2,784198379652922,2021-06-04 02:01:46.235462,NaT,AE,0.0,VSAEHDE1772584-60b9896d5bff3,Victoria's Secret,2021-08-05 03:36:51.958389,18992,,2021-06-04 01:58:11,2021-06-04 02:05:03.258619,35922,default,aleliwee@gmail.com,captured,3,20267.0,NaT,debit,1983-11-18,0.0,Mozilla/5.0 (Linux; Android 11; SM-A515F) Appl...,971551250023,87.200.183.94,0.022,0.5,2021-06-04 02:04:53.894458,False,2021-06-04 02:01:46.224867
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38026,2021-02-13 08:47:26.868956,5e5ce4004e394696a2f07902d6cea06d,171.50,,896bbc47b9638cf05bc7bfa31d7ae392,171.50,2021-02-13 08:46:08.779851,visa,funded,2022-09-30,9251.0,514.50,161,4501.0,AED,checkout,784197296535947,2021-02-13 08:44:44.775681,NaT,AE,0.0,PPC-000002669,The Entertainer,2021-04-13 02:00:51.772486,7200,,NaT,2021-02-13 08:47:35.938060,13733,default,berish74@gmail.com,captured,3,6219.0,NaT,debit,1972-05-24,0.0,Mozilla/5.0 (Linux; Android 11; SM-G975F Build...,971525460572,5.31.169.92,0.026,0.5,2021-02-13 08:47:26.868956,False,2021-02-13 08:44:44.769413
38027,2021-10-30 09:20:25.235976,927bdc7e2eaa43d28c217fcb71c44454,128.70,51527.0,65d3d2b20b710daadd57072607b1aa3b,128.70,2021-10-30 09:17:19.198514,visa,funded,2023-05-31,51527.0,386.10,271,25355.0,AED,checkoutV2,784197841320738,2021-10-30 09:17:19.233227,NaT,AE,0.0,BWAEHDE5162396-617d0d8cd5242,Bath and Body Works,NaT,47480,,2020-07-27 10:57:05,2021-10-30 09:20:34.260524,92186,default,hurtada_leahme@yahoo.com,captured,3,42534.0,NaT,credit,,0.0,Mozilla/5.0 (iPhone; CPU iPhone OS 14_8 like M...,971565455729,91.74.255.82,0.022,0.5,2021-09-28 13:01:38.176137,True,2021-09-28 13:00:30.378439
38028,2021-10-06 06:12:19.800821,927bdc7e2eaa43d28c217fcb71c44454,105.00,51527.0,3568286f28a378692e4898fd037c671e,105.00,2021-10-06 06:11:43.408410,visa,funded,2023-05-31,51527.0,315.00,271,25355.0,AED,checkoutV2,784197841320738,2021-10-06 06:11:43.440479,NaT,AE,0.0,BWAEHDE5020764-615d3df95d4b2,Bath and Body Works,NaT,39467,,2020-07-27 10:57:05,2021-10-06 06:12:28.868434,76576,default,hurtada_leahme@yahoo.com,captured,3,42534.0,NaT,credit,,0.0,Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.3...,971565455729,91.74.255.82,0.022,0.5,2021-09-28 13:01:38.176137,True,2021-09-28 13:00:30.378439
38029,2021-08-25 07:42:41.148955,7b4b2fdb0d5e4d828cdd53e102d1487a,42.07,40824.0,f10930a52d4078baa3e5e3e234de95c4,42.07,2021-08-25 07:42:37.363473,visa,funded,2025-12-31,40820.0,126.22,273,18901.0,AED,checkoutV2,784197962791691,2021-08-25 07:41:40.189211,NaT,AE,0.0,HMAESSE2943669-6125f42039916,H&M,2021-10-25 07:44:32.823872,29368,,2019-11-30 10:01:18,2021-08-25 07:42:49.891018,55614,seamless,dunsten.sebastian@gmail.com,captured,3,32680.0,NaT,credit,,0.0,Mozilla/5.0 (Windows NT 6.1; Win64; x64) Apple...,971507705946,5.107.2.167,0.022,0.5,2021-08-25 07:42:41.148955,False,2021-08-25 07:37:56.561836


In [48]:
# Create is_returning variable
pi3_ae_instalment_plans_df['is_returning'] = np.where((pi3_ae_instalment_plans_df['created'] - pi3_ae_instalment_plans_df['first_postpay_order']).dt.days < 1, False, True)

In [49]:
pi3_ae_instalment_plans_df[pi3_ae_instalment_plans_df['customer_id'] == 868.0]

Unnamed: 0,created,payment_method_fingerprint,downpayment_amount,billing_address_id,device_fingerprint,total_downpayment,checkout_completed,payment_method_brand,plan,payment_method_expires,shipping_address_id,total_amount,merchant_id,customer_id,currency,gateway_name,id_number,checkout_verified,cancelled,payment_method_country,shipping_amount,reference,merchant_name,completed,instalment_plan_id,payment_interval,customer_date_joined,status_changed,order_id,checkout_type,customer_email,status,num_instalments,shipping_id,customer_blacklisted,payment_method_type,date_of_birth,downpayment_refunded_amount,user_agent,phone,ip_address,transaction_cost_rate,transaction_cost_amount,first_pi3_order,is_returning,first_postpay_order
14333,2021-07-28 07:02:53.016673,ca657bc840aa49509c0f17d7a3fe2423,113.33,35497.0,5f270cc0b49f94f2f15dd7fba9c5ea8e,113.33,2021-07-28 07:02:14.531153,mastercard,funded,2025-07-31,35496.0,340.0,273,868.0,AED,checkout,784198315497523,2021-07-28 07:00:57.961985,NaT,AE,0.0,HMAESSE2878922-6101005ed9ee9,H&M,2021-08-28 07:11:21.631681,25603,,2021-07-28 06:55:07,2021-07-28 07:03:02.517023,47979,seamless,gemmaacebey@gmail.com,captured,3,28156.0,NaT,credit,1983-05-26,0.0,Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKi...,971561796774,212.185.0.191,0.026,0.5,2021-01-22 09:54:06.283458,True,2020-09-24 06:16:56.709862
14334,2021-01-22 09:54:06.283458,ca657bc840aa49509c0f17d7a3fe2423,138.25,,b1ee3aea2e2fc79c5a85668338d880f0,138.25,2021-01-22 09:53:57.330704,mastercard,funded,2025-07-31,9013.0,414.75,161,868.0,AED,checkout,784198315497523,2021-01-22 09:52:46.537164,NaT,AE,0.0,PPC-000000862,The Entertainer,2021-03-22 02:00:44.389009,5303,,2017-05-30 08:14:33,2021-01-22 09:54:14.838481,10170,default,gemmaacebey@gmail.com,captured,3,6019.0,NaT,credit,1983-05-26,0.0,Mozilla/5.0 (iPhone; CPU iPhone OS 14_3 like M...,971561796774,94.201.217.128,0.026,0.5,2021-01-22 09:54:06.283458,True,2020-09-24 06:16:56.709862
14335,2021-01-22 10:02:01.805665,ca657bc840aa49509c0f17d7a3fe2423,138.25,,b1ee3aea2e2fc79c5a85668338d880f0,138.25,2021-01-22 10:01:58.229500,mastercard,funded,2025-07-31,9013.0,414.75,161,868.0,AED,checkout,784198315497523,2021-01-22 10:01:58.341500,NaT,AE,0.0,PPC-000000866,The Entertainer,2021-03-22 02:00:46.600672,5305,,2017-05-30 08:14:33,2021-01-22 10:02:09.911269,10174,default,gemmaacebey@gmail.com,captured,3,6019.0,NaT,credit,1983-05-26,0.0,Mozilla/5.0 (iPhone; CPU iPhone OS 14_3 like M...,971561796774,94.201.217.128,0.026,0.5,2021-01-22 09:54:06.283458,True,2020-09-24 06:16:56.709862


In [39]:
# Join the unpaid table to the instalment_plans_df
instalment_plans_unp_df = pd.merge(last_inst_df_with_unpaid, pi3_ae_instalment_plans_df[pi3_ae_instalment_plans_df.columns[~pi3_ae_instalment_plans_df.columns.isin(['order_id', 'customer_id', 'date_of_birth'])]], how='left', on=['instalment_plan_id']).rename(columns={"completed": "inst_plan_completed_date", "status": "inst_plan_status"})

In [40]:
# Join the refunds_df to the instalment_plans_unp_df
instalment_plans_unp_ref_df = pd.merge(instalment_plans_unp_df, redunds_per_order_id, how='left', on=['order_id'])

In [28]:
# Create nr_of_items variable per order_id
cart_total_order_df = create_agg_var(
    df=cart_df,
    groupby_col='order_id',
    orig_cols=['qty'],
    new_col_names=['nr_of_items'],
    agg_fnc='sum'
    )

In [29]:
# Join the cart_df to the instalment_plans_unp_df
instalment_plans_unp_ref_cart_df = pd.merge(instalment_plans_unp_ref_df, cart_total_order_df, how='left', on=['order_id'])

# Recreate the unpaid status variable

In [30]:
instalment_plans_unp_ref_cart_df['unpaid_status'] = np.where(instalment_plans_unp_ref_cart_df['total_unpaid_amount'] > 0, 'unpaid', 'paid')

# Create the Unpaids - For real

In [31]:
# Create the unpaid columns
create_unpaid_cols(
    df=instalment_plans_unp_ref_cart_df,
    unpaid_col_names=['unpaid_at_due', 'unpaid_at_5', 'unpaid_at_10', 'unpaid_at_20', 'unpaid_at_30', 'unpaid_at_60', 'unpaid_at_90'],
    condition_col='unpaid_status',
    col_to_check_paid='days_scheduled_completed',
    col_to_check_unpaid='days_since_scheduled',
    col_to_use_paid='total_amount',
    col_to_use_unpaid='total_unpaid_amount'
    )

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["unpaid_at_due"][i] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["unpaid_at_5"][i] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["unpaid_at_10"][i] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["unpaid_at_20"][i] = 0
A value is trying to be set on a copy of a slice from a DataFram

In [47]:
# Keep only relevant variables (for my models)
columns_to_drop = [
    "payment_method_fingerprint",
    "downpayment_amount",
    "billing_address_id",
    "device_fingerprint",
    "total_downpayment",
    "checkout_completed",
    "merchant_id",
    "plan",
    "shipping_address_id",
    # "total_amount",
    "currency",
    "id_number",
    "checkout_verified",
    "cancelled",
    "shipping_amount",
    "reference",
    "inst_plan_completed_date",
    "payment_interval",
    "status_changed",
    "customer_email",
    "inst_plan_status",
    "shipping_id",
    "customer_blacklisted",
    "downpayment_refunded_amount",
    "phone",
    "ip_address",
    "transaction_cost_rate",
    "transaction_cost_amount",
    # "order",
    # "refunded_amount",
    # "penalty_fee",
    # "amount",
    # "scheduled",
    # "inst_completed_date",
    # "total",
    "days_scheduled_completed",
    "days_since_scheduled",
    "total_unpaid_amount",
    "total_unpaid_total",
    "gateway_name",
    "payment_method_country",
    "first_postpay_order",
    "num_instalments",
    "unpaid_status",
    "customer_date_joined",
]
df_mature_at_due = instalment_plans_unp_ref_cart_df.drop(columns=columns_to_drop)

# Calculate the unpaids again

In [32]:
# Unpaid rate as of today - pi3 & AE & passed the scheduled date & only ('paid' or 'unpaid')
(instalment_plans_unp_ref_cart_df['total_unpaid_amount'].sum() / instalment_plans_unp_ref_cart_df['total_amount'].sum()) * 100

2.8924672094506736

In [33]:
for u in ['unpaid_at_due', 'unpaid_at_5', 'unpaid_at_10', 'unpaid_at_20', 'unpaid_at_30', 'unpaid_at_60', 'unpaid_at_90']:
    mature_df = instalment_plans_unp_ref_cart_df[pd.notnull(instalment_plans_unp_ref_cart_df[u])]
    print(f"{u}: {(mature_df[u].sum() / mature_df['total_amount'].sum()) * 100}")

unpaid_at_due: 10.641644896903934
unpaid_at_5: 5.133833765260647
unpaid_at_10: 3.752161876531726
unpaid_at_20: 2.5702559173082196
unpaid_at_30: 2.125905974170692
unpaid_at_60: 1.5940417623275929
unpaid_at_90: 0.9788269282605024


In [61]:
instalment_plans_unp_ref_cart_df['checkout_type'].value_counts()

default         13769
seamless         2211
postpay-card       42
Name: checkout_type, dtype: int64

In [34]:
instalment_plans_unp_ref_cart_df['is_returning'].value_counts()

True     10204
False     5818
Name: is_returning, dtype: int64

In [38]:
returning_cst = instalment_plans_unp_ref_cart_df[instalment_plans_unp_ref_cart_df['is_returning'] == True]

In [39]:
for u in ['unpaid_at_due', 'unpaid_at_5', 'unpaid_at_10', 'unpaid_at_20', 'unpaid_at_30', 'unpaid_at_60', 'unpaid_at_90']:
    mature_df = returning_cst[pd.notnull(returning_cst[u])]
    print(f"{u}: {(mature_df[u].sum() / mature_df['total_amount'].sum()) * 100}")

unpaid_at_due: 10.062246818216119
unpaid_at_5: 4.937358153903105
unpaid_at_10: 3.6260255070518124
unpaid_at_20: 2.5576862874480955
unpaid_at_30: 2.1814665319849174
unpaid_at_60: 1.6384290546436249
unpaid_at_90: 0.9973724926406647


In [40]:
new_cst = instalment_plans_unp_ref_cart_df[instalment_plans_unp_ref_cart_df['is_returning'] == False]

In [41]:
for u in ['unpaid_at_due', 'unpaid_at_5', 'unpaid_at_10', 'unpaid_at_20', 'unpaid_at_30', 'unpaid_at_60', 'unpaid_at_90']:
    mature_df = new_cst[pd.notnull(new_cst[u])]
    print(f"{u}: {(mature_df[u].sum() / mature_df['total_amount'].sum()) * 100}")

unpaid_at_due: 11.614325522954994
unpaid_at_5: 5.465015897412039
unpaid_at_10: 3.96619620727623
unpaid_at_20: 2.591675628719696
unpaid_at_30: 2.03104390072919
unpaid_at_60: 1.5182379361997063
unpaid_at_90: 0.9471641669383312


In [45]:
instalment_plans_unp_ref_cart_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 16022 entries, 0 to 16021
Data columns (total 59 columns):
 #   Column                       Non-Null Count  Dtype         
---  ------                       --------------  -----         
 0   instalment_plan_id           16022 non-null  int64         
 1   customer_id                  16022 non-null  float64       
 2   order_id                     16022 non-null  int32         
 3   days_scheduled_completed     15565 non-null  float64       
 4   days_since_scheduled         16022 non-null  int64         
 5   total_unpaid_amount          467 non-null    float64       
 6   total_unpaid_total           467 non-null    float64       
 7   total_penalty_fee            16022 non-null  float64       
 8   created                      16022 non-null  datetime64[ns]
 9   payment_method_fingerprint   16022 non-null  object        
 10  downpayment_amount           16022 non-null  float64       
 11  billing_address_id           10925 non-nu

In [46]:
instalment_plans_unp_ref_cart_df['device_fingerprint']

0        4dcbfee3bc994c2385f76cfa8afd720d
1        4f16ed23967b4fcfb0a15671f649af1f
2        8eb1eea41241481aafc471c6596274ae
3        f165d02e2c6240aa9f5e9f4af696d33d
4        43a329d05e274448ad5d5133a40faf3c
                       ...               
16017    b0c05242f1154a7dbab0dc7835de3417
16018    b0c05242f1154a7dbab0dc7835de3417
16019    b0c05242f1154a7dbab0dc7835de3417
16020    1af70be9b4504b02aa1eeb70091836ef
16021    4d35743d20b2457d97fbdad00ef636aa
Name: payment_method_fingerprint, Length: 16022, dtype: object

In [48]:
df_mature_at_due

Unnamed: 0,instalment_plan_id,customer_id,order_id,days_since_scheduled,total_unpaid_amount,total_unpaid_total,total_penalty_fee,created,payment_method_brand,payment_method_expires,total_amount,gateway_name,payment_method_country,merchant_name,customer_date_joined,checkout_type,num_instalments,payment_method_type,date_of_birth,user_agent,is_returning,total_refunds,nr_of_items,unpaid_status,unpaid_at_due,unpaid_at_5,unpaid_at_10,unpaid_at_20,unpaid_at_30,unpaid_at_60,unpaid_at_90
0,4413,2361.0,8436,259,,,0.0,2021-01-08 15:10:24.680792,mastercard,2024-09-30,519.75,checkout,AE,The Entertainer,NaT,default,3,credit,1990-10-16,Mozilla/5.0 (Linux; Android 10; SM-G960F Build...,False,,1.0,paid,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,4466,2391.0,8543,257,,,25.0,2021-01-10 12:09:57.871553,visa,2025-10-31,409.50,checkout,AE,The Entertainer,NaT,default,3,credit,1988-09-08,Mozilla/5.0 (iPhone; CPU iPhone OS 14_3 like M...,False,,1.0,paid,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,4467,2391.0,8547,257,,,25.0,2021-01-10 12:15:43.086929,visa,2024-04-30,309.75,checkout,AE,The Entertainer,NaT,default,3,debit,1988-09-08,Mozilla/5.0 (iPhone; CPU iPhone OS 14_3 like M...,False,,1.0,paid,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4469,2393.0,8551,257,,,0.0,2021-01-10 12:33:42.173393,visa,2022-11-30,309.75,checkout,AE,The Entertainer,NaT,default,3,credit,1986-12-06,Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5...,False,,1.0,paid,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4492,2408.0,8594,256,,,0.0,2021-01-11 08:42:29.724145,visa,2022-12-31,414.75,checkout,AE,The Entertainer,NaT,default,3,debit,1979-11-14,Mozilla/5.0 (Linux; Android 9; SM-N950F Build/...,False,,1.0,paid,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16017,58334,25487.0,111682,-59,,,0.0,2021-11-20 18:58:46.319082,mastercard,2023-05-31,117.50,checkoutV2,AE,Mothercare,2021-11-13 22:20:22,default,3,debit,,Mozilla/5.0 (iPhone; CPU iPhone OS 14_1 like M...,True,,5.0,paid,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16018,58371,25487.0,111731,-59,,,0.0,2021-11-20 19:56:29.973159,mastercard,2023-05-31,106.00,checkoutV2,AE,Bath and Body Works,2021-09-23 05:42:21,default,3,debit,,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,True,,8.0,paid,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16019,58386,25487.0,111762,-59,,,0.0,2021-11-20 20:34:25.738560,mastercard,2023-05-31,143.00,checkoutV2,AE,LANDMARK RETAIL,NaT,postpay-card,3,debit,,,False,,,paid,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16020,58553,29540.0,112043,-60,,,0.0,2021-11-21 08:16:02.644516,mastercard,2023-10-31,1330.10,checkoutV2,AE,Amazon,NaT,postpay-card,3,credit,1986-03-11,,False,,,paid,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [49]:
df_mature_at_due['gateway_name'].value_counts()

checkoutV2    13290
checkout       2732
Name: gateway_name, dtype: int64