In [1]:
# Python libraries
import numpy as np
import pandas as pd
# import datetime

# 3rd party libraries
import ness

# Internal imports
from data_manager import to_numeric, create_agg_var, two_date_cols_diff, days_from_today, last_row, create_unpaid_cols

In [2]:
pd.options.display.max_columns = 200
# pd.options.display.max_rows = 10000

In [3]:
# Set the ness parameters
ness_parameters = {
    'bucket':"data.postpay.io",
    'key':"api",
    'profile':"default"
}
# Instantiate the data lake
dl = ness.dl(**ness_parameters)
# Read/Sync all the tables from the data lake
dl.sync()
# Create data frames from the tables
cart_df = dl.read("cart")
orders_df = dl.read("orders")
customers_df = dl.read("customers")
addresses_df = dl.read("addresses")
instalment_plans_df = dl.read("instalment-plans")
instalments_df = dl.read("instalments")
transactions_df = dl.read("transactions")
refunds_df = dl.read("refunds")

In [4]:
# Convert the anount columns to numeric
to_numeric(df=instalments_df, columns_list=['refunded_amount', 'penalty_fee', 'amount', 'total'])

In [76]:
instalments_df[instalments_df['instalment_plan_id'] == 57284]

Unnamed: 0,order,refunded_amount,penalty_fee,amount,instalment_plan_id,scheduled,completed,status,total,days_scheduled_completed,days_since_scheduled
47457,0,0.0,0.0,570.67,57284,2021-12-18,2021-11-19 00:12:05.445246,paid,570.67,-29.0,-29
47458,1,0.0,0.0,570.67,57284,2022-01-18,NaT,due,570.67,,-60


In [5]:
# Create unpaid variables per instalment_plan_id
unpaid_per_instalment_plan_id = create_agg_var(
    df=instalments_df,
    condition_column='status',
    condition_cat='unpaid',
    groupby_col='instalment_plan_id',
    orig_cols=['amount', 'total'],
    new_col_names=['total_unpaid_amount', 'total_unpaid_total'],
    agg_fnc='sum'
    )

In [77]:
unpaid_per_instalment_plan_id[unpaid_per_instalment_plan_id['instalment_plan_id'] == 57284]

Unnamed: 0,instalment_plan_id,total_unpaid_amount,total_unpaid_total


In [6]:
# Create penalty variable per instalment_plan_id
penalty_fees_per_instalment_plan_id = create_agg_var(
    df=instalments_df,
    groupby_col='instalment_plan_id',
    orig_cols=['penalty_fee'],
    new_col_names=['total_penalty_fee'],
    agg_fnc='sum'
    )

In [78]:
penalty_fees_per_instalment_plan_id[penalty_fees_per_instalment_plan_id['instalment_plan_id'] == 57284]

Unnamed: 0,instalment_plan_id,total_penalty_fee
41837,57284,0.0


In [7]:
# Create new days diff columns

# Scheduled-Complete Diff
two_date_cols_diff(
    df=instalments_df,
    new_col_name='days_scheduled_completed',
    first_col='scheduled',
    second_col='completed'
    )

# Scheduled-Today Diff
days_from_today(
    df=instalments_df,
    new_col_name='days_since_scheduled',
    date_col='scheduled'
    )

In [79]:
instalments_df[instalments_df['instalment_plan_id'] == 57284]

Unnamed: 0,order,refunded_amount,penalty_fee,amount,instalment_plan_id,scheduled,completed,status,total,days_scheduled_completed,days_since_scheduled
47457,0,0.0,0.0,570.67,57284,2021-12-18,2021-11-19 00:12:05.445246,paid,570.67,-29.0,-29
47458,1,0.0,0.0,570.67,57284,2022-01-18,NaT,due,570.67,,-60


In [8]:
# Create filtered df by the last instalment per instalment_plan_id
instalments_df_max_inst_number = last_row(
    df=instalments_df,
    groupby_col='instalment_plan_id',
    by_last_col='order'
    )

In [80]:
instalments_df_max_inst_number[instalments_df_max_inst_number['instalment_plan_id'] == 57284]

Unnamed: 0,instalment_plan_id,order
41837,57284,1


In [9]:
# Join instalments_df to instalments_df_max_inst_number
last_inst_df = pd.merge(instalments_df_max_inst_number, instalments_df, how='left', on=['instalment_plan_id', 'order']).rename(columns={'completed':'inst_completed_date', 'status':'inst_status'})

In [81]:
last_inst_df[last_inst_df['instalment_plan_id'] == 57284]

Unnamed: 0,instalment_plan_id,order,refunded_amount,penalty_fee,amount,scheduled,inst_completed_date,inst_status,total,days_scheduled_completed,days_since_scheduled
41837,57284,1,0.0,0.0,570.67,2022-01-18,NaT,due,570.67,,-60


In [10]:
# Join unpaid_per_instalment_plan_id to last_inst_df
last_inst_df_with_unpaid = last_inst_df.merge(unpaid_per_instalment_plan_id, how='left', on=['instalment_plan_id']).merge(penalty_fees_per_instalment_plan_id, how='left', on=['instalment_plan_id'])

In [83]:
last_inst_df_with_unpaid[last_inst_df_with_unpaid['instalment_plan_id'] == 57284]

Unnamed: 0,instalment_plan_id,order,refunded_amount,penalty_fee,amount,scheduled,inst_completed_date,inst_status,total,days_scheduled_completed,days_since_scheduled,total_unpaid_amount,total_unpaid_total,total_penalty_fee
41837,57284,1,0.0,0.0,570.67,2022-01-18,NaT,due,570.67,,-60,,,0.0


In [11]:
# Join the unpaid table to the instalment_plans_df
instalment_plans_unp_df = pd.merge(instalment_plans_df[instalment_plans_df['num_instalments'] == 3], last_inst_df_with_unpaid, how='left', on=['instalment_plan_id']).rename(columns={"completed": "inst_plan_completed_date", "status": "inst_plan_status"})

In [84]:
instalment_plans_unp_df[instalment_plans_unp_df['instalment_plan_id'] == 57284]

Unnamed: 0,created,payment_method_fingerprint,downpayment_amount,billing_address_id,device_fingerprint,total_downpayment,checkout_completed,payment_method_brand,plan,payment_method_expires,shipping_address_id,total_amount,merchant_id,customer_id,currency,gateway_name,id_number,checkout_verified,cancelled,payment_method_country,shipping_amount,reference,merchant_name,inst_plan_completed_date,instalment_plan_id,payment_interval,customer_date_joined,status_changed,order_id,checkout_type,customer_email,inst_plan_status,num_instalments,shipping_id,customer_blacklisted,payment_method_type,date_of_birth,downpayment_refunded_amount,user_agent,phone,ip_address,transaction_cost_rate,transaction_cost_amount,order,refunded_amount,penalty_fee,amount,scheduled,inst_completed_date,inst_status,total,days_scheduled_completed,days_since_scheduled,total_unpaid_amount,total_unpaid_total,total_penalty_fee,refund_amount,nr_of_items,unpaid_at_due,unpaid_at_5,unpaid_at_10,unpaid_at_20,unpaid_at_30,unpaid_at_60,unpaid_at_90
22794,2021-11-18 14:01:24.345699,1af70be9b4504b02aa1eeb70091836ef,570.67,,,570.67,NaT,mastercard,,2023-10-31,,1712.01,50ad32718bdd42ce87d6570dafef4e0b,23904.0,AED,checkoutV2,784199273763286,NaT,NaT,AE,,781062-1637244081,Sharaf DG,NaT,57284,,NaT,2021-11-18 14:01:22.951915,109838,postpay-card,,captured,3,,NaT,credit,1992-08-29,0.0,,971544702929,,0.022,0.5,1,0.0,0.0,570.67,2022-01-18,NaT,due,570.67,,-60,,,0.0,,,,,,,,,


In [12]:
# Join the refunds_df to the instalment_plans_unp_df
instalment_plans_unp_df = pd.merge(instalment_plans_unp_df, refunds_df[['order_id', 'amount']].rename(columns={'amount':'refund_amount'}), how='left', on=['order_id'])

In [85]:
instalment_plans_unp_df[instalment_plans_unp_df['instalment_plan_id'] == 57284]

Unnamed: 0,created,payment_method_fingerprint,downpayment_amount,billing_address_id,device_fingerprint,total_downpayment,checkout_completed,payment_method_brand,plan,payment_method_expires,shipping_address_id,total_amount,merchant_id,customer_id,currency,gateway_name,id_number,checkout_verified,cancelled,payment_method_country,shipping_amount,reference,merchant_name,inst_plan_completed_date,instalment_plan_id,payment_interval,customer_date_joined,status_changed,order_id,checkout_type,customer_email,inst_plan_status,num_instalments,shipping_id,customer_blacklisted,payment_method_type,date_of_birth,downpayment_refunded_amount,user_agent,phone,ip_address,transaction_cost_rate,transaction_cost_amount,order,refunded_amount,penalty_fee,amount,scheduled,inst_completed_date,inst_status,total,days_scheduled_completed,days_since_scheduled,total_unpaid_amount,total_unpaid_total,total_penalty_fee,refund_amount,nr_of_items,unpaid_at_due,unpaid_at_5,unpaid_at_10,unpaid_at_20,unpaid_at_30,unpaid_at_60,unpaid_at_90
22794,2021-11-18 14:01:24.345699,1af70be9b4504b02aa1eeb70091836ef,570.67,,,570.67,NaT,mastercard,,2023-10-31,,1712.01,50ad32718bdd42ce87d6570dafef4e0b,23904.0,AED,checkoutV2,784199273763286,NaT,NaT,AE,,781062-1637244081,Sharaf DG,NaT,57284,,NaT,2021-11-18 14:01:22.951915,109838,postpay-card,,captured,3,,NaT,credit,1992-08-29,0.0,,971544702929,,0.022,0.5,1,0.0,0.0,570.67,2022-01-18,NaT,due,570.67,,-60,,,0.0,,,,,,,,,


In [13]:
# Create nr_of_items variable per order_id
cart_total_order_df = create_agg_var(
    df=cart_df,
    groupby_col='order_id',
    orig_cols=['qty'],
    new_col_names=['nr_of_items'],
    agg_fnc='sum'
    )

In [14]:
# Join the cart_df to the instalment_plans_unp_df
instalment_plans_unp_df = pd.merge(instalment_plans_unp_df, cart_total_order_df, how='left', on=['order_id'])

In [87]:
instalment_plans_unp_df[instalment_plans_unp_df['instalment_plan_id'] == 57284]

Unnamed: 0,created,payment_method_fingerprint,downpayment_amount,billing_address_id,device_fingerprint,total_downpayment,checkout_completed,payment_method_brand,plan,payment_method_expires,shipping_address_id,total_amount,merchant_id,customer_id,currency,gateway_name,id_number,checkout_verified,cancelled,payment_method_country,shipping_amount,reference,merchant_name,inst_plan_completed_date,instalment_plan_id,payment_interval,customer_date_joined,status_changed,order_id,checkout_type,customer_email,inst_plan_status,num_instalments,shipping_id,customer_blacklisted,payment_method_type,date_of_birth,downpayment_refunded_amount,user_agent,phone,ip_address,transaction_cost_rate,transaction_cost_amount,order,refunded_amount,penalty_fee,amount,scheduled,inst_completed_date,inst_status,total,days_scheduled_completed,days_since_scheduled,total_unpaid_amount,total_unpaid_total,total_penalty_fee,refund_amount,nr_of_items,unpaid_at_due,unpaid_at_5,unpaid_at_10,unpaid_at_20,unpaid_at_30,unpaid_at_60,unpaid_at_90
22794,2021-11-18 14:01:24.345699,1af70be9b4504b02aa1eeb70091836ef,570.67,,,570.67,NaT,mastercard,,2023-10-31,,1712.01,50ad32718bdd42ce87d6570dafef4e0b,23904.0,AED,checkoutV2,784199273763286,NaT,NaT,AE,,781062-1637244081,Sharaf DG,NaT,57284,,NaT,2021-11-18 14:01:22.951915,109838,postpay-card,,captured,3,,NaT,credit,1992-08-29,0.0,,971544702929,,0.022,0.5,1,0.0,0.0,570.67,2022-01-18,NaT,due,570.67,,-60,,,0.0,,,,,,,,,


In [15]:
# Create the unpaid columns
create_unpaid_cols(
    df=instalment_plans_unp_df,
    unpaid_col_names=['unpaid_at_due', 'unpaid_at_5', 'unpaid_at_10', 'unpaid_at_20', 'unpaid_at_30', 'unpaid_at_60', 'unpaid_at_90'],
    condition_col='inst_status',
    col_to_check_paid='days_scheduled_completed',
    col_to_check_unpaid='days_since_scheduled',
    col_to_use_paid='total_amount',
    col_to_use_unpaid='total_unpaid_amount'
    )

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["unpaid_at_due"][i] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["unpaid_at_5"][i] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["unpaid_at_10"][i] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["unpaid_at_20"][i] = 0
A value is trying to be set on a copy of a slice from a DataFram

In [88]:
instalment_plans_unp_df[instalment_plans_unp_df['instalment_plan_id'] == 57284]

Unnamed: 0,created,payment_method_fingerprint,downpayment_amount,billing_address_id,device_fingerprint,total_downpayment,checkout_completed,payment_method_brand,plan,payment_method_expires,shipping_address_id,total_amount,merchant_id,customer_id,currency,gateway_name,id_number,checkout_verified,cancelled,payment_method_country,shipping_amount,reference,merchant_name,inst_plan_completed_date,instalment_plan_id,payment_interval,customer_date_joined,status_changed,order_id,checkout_type,customer_email,inst_plan_status,num_instalments,shipping_id,customer_blacklisted,payment_method_type,date_of_birth,downpayment_refunded_amount,user_agent,phone,ip_address,transaction_cost_rate,transaction_cost_amount,order,refunded_amount,penalty_fee,amount,scheduled,inst_completed_date,inst_status,total,days_scheduled_completed,days_since_scheduled,total_unpaid_amount,total_unpaid_total,total_penalty_fee,refund_amount,nr_of_items,unpaid_at_due,unpaid_at_5,unpaid_at_10,unpaid_at_20,unpaid_at_30,unpaid_at_60,unpaid_at_90
22794,2021-11-18 14:01:24.345699,1af70be9b4504b02aa1eeb70091836ef,570.67,,,570.67,NaT,mastercard,,2023-10-31,,1712.01,50ad32718bdd42ce87d6570dafef4e0b,23904.0,AED,checkoutV2,784199273763286,NaT,NaT,AE,,781062-1637244081,Sharaf DG,NaT,57284,,NaT,2021-11-18 14:01:22.951915,109838,postpay-card,,captured,3,,NaT,credit,1992-08-29,0.0,,971544702929,,0.022,0.5,1,0.0,0.0,570.67,2022-01-18,NaT,due,570.67,,-60,,,0.0,,,,,,,,,


In [93]:
instalment_plans_unp_df.shape

(35358, 65)

In [92]:
instalment_plans_unp_df[instalment_plans_unp_df['num_instalments'] == 3]

Unnamed: 0,created,payment_method_fingerprint,downpayment_amount,billing_address_id,device_fingerprint,total_downpayment,checkout_completed,payment_method_brand,plan,payment_method_expires,shipping_address_id,total_amount,merchant_id,customer_id,currency,gateway_name,id_number,checkout_verified,cancelled,payment_method_country,shipping_amount,reference,merchant_name,inst_plan_completed_date,instalment_plan_id,payment_interval,customer_date_joined,status_changed,order_id,checkout_type,customer_email,inst_plan_status,num_instalments,shipping_id,customer_blacklisted,payment_method_type,date_of_birth,downpayment_refunded_amount,user_agent,phone,ip_address,transaction_cost_rate,transaction_cost_amount,order,refunded_amount,penalty_fee,amount,scheduled,inst_completed_date,inst_status,total,days_scheduled_completed,days_since_scheduled,total_unpaid_amount,total_unpaid_total,total_penalty_fee,refund_amount,nr_of_items,unpaid_at_due,unpaid_at_5,unpaid_at_10,unpaid_at_20,unpaid_at_30,unpaid_at_60,unpaid_at_90
0,2021-11-11 13:40:22.453145,c2168ba8f48b4b728b1a8db201b7c7fe,541.67,72920.0,973ba9e918711784386cbe3b473bbb1c,541.67,2021-11-11 13:40:18.859538,visa,funded,2026-03-31,72920.0,1625.00,209,40500.0,AED,checkoutV2,784199157917255,2021-11-11 13:37:33.945995,NaT,AE,0.00,295832984214082296192877,Ounass,NaT,53500,,NaT,2021-11-11 13:40:33.557085,103444,seamless,sara.j.hamdan@gmail.com,captured,3,62319.0,NaT,debit,,0.00,Mozilla/5.0 (iPhone; CPU iPhone OS 14_4_1 like...,971581771646,91.75.215.152,0.022,0.5,1,0.00,0.0,541.67,2022-01-11,NaT,due,541.67,,-53,,,0.0,,2.0,,,,,,,
1,2021-07-24 12:24:25.953241,0bbd516d9d3841c78db2121a06a73f8c,91.67,34807.0,ba606ad5da0f6b0d9e722d38a7d00a13,91.67,2021-07-24 12:23:39.643465,visa,funded,2026-07-31,34807.0,275.00,214,15950.0,AED,checkoutV2,784197410865048,2021-07-24 12:22:37.474975,NaT,AE,0.00,FLAEHDE0605147-60fc05e3736b7,Footlocker,2021-09-24 12:33:47.692180,25161,,2021-07-23 22:12:36,2021-07-24 12:24:34.530518,47087,default,magnoelizer@gmail.com,captured,3,27563.0,NaT,credit,1974-01-30,0.00,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,971566519794,5.30.141.67,0.022,0.5,1,0.00,0.0,91.67,2021-09-24,2021-09-24 12:33:47.655182,paid,91.67,0.0,56,,,0.0,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2021-09-11 11:58:44.413922,72280bb4d5784f789e11e65c82ebd24d,130.05,45861.0,425db8d63ca25389de6338f964b168b8,130.05,2021-09-11 11:58:39.611180,visa,funded,2026-02-28,45861.0,390.15,279,21820.0,AED,checkoutV2,784199058321326,2021-09-11 11:57:07.823328,NaT,AE,0.00,PKAEHDE0088425-613c998139e6e,Pottery Barn Kids,2021-11-11 12:00:31.743115,32819,,2021-09-04 13:41:24,2021-09-11 11:58:53.932230,63045,seamless,ahmed.mohmd.a@gmail.com,captured,3,37273.0,NaT,debit,,0.00,Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like...,971568385050,2001:8f8:132f:df93:1c59:7961:24d0:63f1,0.022,0.5,1,0.00,0.0,130.05,2021-11-11,2021-11-11 12:00:31.708700,paid,130.05,0.0,8,,,0.0,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2021-09-14 18:44:39.868046,49b9bd9843944f79aee3ff028dc426bb,186.33,46224.0,97f1edfa706c6d9feb95875c7582fe76,0.00,2021-09-14 18:44:39.182747,visa,funded,2021-11-30,46748.0,559.00,214,22023.0,AED,checkoutV2,784200110646864,2021-09-14 18:44:39.217521,2021-09-19 11:29:35.911652,AE,0.00,FLAESSE0621354-6140ed8c420cb,Footlocker,NaT,33489,,2021-09-12 16:59:16,2021-09-14 18:44:56.463800,64455,default,myasser3132@gmail.com,captured,3,38044.0,NaT,debit,2001-03-30,186.33,Mozilla/5.0 (iPhone; CPU iPhone OS 14_8 like M...,971544337022,2001:8f8:1335:84be:e000:adc5:f27a:fd8e,0.022,0.5,1,186.33,0.0,186.33,2021-11-14,NaT,cancelled,0.00,,5,,,0.0,559.00,1.0,,,,,,,
4,2021-11-06 15:20:45.653042,501c440d38084292bee8de8fff30a366,1658.33,10157.0,55005a574ebd1fc30a942c3a688a0324,1658.33,2021-11-06 15:19:50.955347,visa,funded,2026-07-31,10157.0,4975.00,133,3800.0,AED,checkoutV2,784198593653623,2021-11-06 15:19:33.758840,NaT,AE,0.00,4150845374649,The Loom Collection,NaT,51225,,2021-01-30 15:57:50,2021-11-06 15:20:47.760120,99138,default,caitlinjennawhite@gmail.com,captured,3,59684.0,NaT,credit,1985-03-21,0.00,Mozilla/5.0 (iPhone; CPU iPhone OS 15_1 like M...,971525967797,5.31.196.241,0.022,0.5,1,0.00,0.0,1658.33,2022-01-06,NaT,due,1658.33,,-48,,,0.0,,1.0,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35353,2021-11-03 18:06:27.324776,119f9e3a68aa43939e4c1691488e9bf2,16.67,68077.0,57c501996a4b6bf4a3b02d2f73699bb4,16.67,2021-11-03 18:05:56.348014,visa,funded,2025-01-31,68076.0,50.00,280,10573.0,AED,checkoutV2,784199459427524,2021-11-03 18:05:56.390167,NaT,AE,0.00,VSAESSE1931374-6182cf702b2cc,Victoria's Secret,NaT,49495,,2021-02-10 14:41:35,2021-11-03 18:06:36.696888,95856,default,landichosharmaine@gmail.com,captured,3,57713.0,NaT,debit,1994-03-07,0.00,Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like M...,971563800639,2001:8f8:1a65:7d5a:7dcd:6fc6:5888:6001,0.022,0.5,1,0.00,0.0,16.67,2022-01-03,NaT,due,16.67,,-45,,,0.0,,2.0,,,,,,,
35354,2021-10-09 06:42:39.747006,d6661d6cce4b4c7da507f1a686e2d048,215.00,,a99b80775b23403c628e2599e43852a3,215.00,2021-10-09 06:42:35.337720,visa,funded,2022-09-30,56464.0,645.00,161,28923.0,AED,checkoutV2,784197987371610,2021-10-09 06:41:16.176748,NaT,AE,0.00,PPC-000012866,The Entertainer,NaT,40515,,NaT,2021-10-09 06:42:48.875275,78581,seamless,moe_atieh@yahoo.com,captured,3,47103.0,NaT,credit,,0.00,Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like...,971556904509,87.200.128.76,0.022,0.5,1,0.00,0.0,215.00,2021-12-09,NaT,due,215.00,,-20,,,0.0,,1.0,,,,,,,
35355,2021-10-06 21:27:04.111685,25f89ccdd4af4994be4654e40a539e76,558.33,55466.0,425db8d63ca25389de6338f964b168b8,558.33,2021-10-06 21:25:51.340890,visa,funded,2023-11-30,55466.0,1675.00,215,28164.0,AED,checkoutV2,784199083297079,2021-10-06 21:25:51.371413,NaT,SA,100.00,WEAEHDE0327588-615e1450e6143,West Elm,NaT,39778,,NaT,2021-10-06 21:27:13.893787,77137,default,leenalsudairy6@gmail.com,captured,3,46174.0,NaT,debit,1990-02-15,0.00,Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like...,971545959077,91.73.157.47,0.022,0.5,1,0.00,0.0,558.33,2021-12-06,NaT,due,558.33,,-17,,,0.0,,1.0,,,,,,,
35356,2021-11-09 14:46:31.620758,d5fa858f1c52434b8067f931719a143d,250.00,71631.0,1874f54e2166f7569df8105f7d8765db,250.00,2021-11-09 14:46:03.165212,mastercard,funded,2022-03-31,71631.0,750.00,209,39287.0,SAR,checkoutV3,1045470778,2021-11-09 14:44:00.598406,NaT,SA,0.00,606198589477852133246560,Ounass,NaT,52345,,NaT,2021-11-09 14:46:38.697366,101385,seamless,miss.shgawa@gmail.com,captured,3,61040.0,NaT,debit,,0.00,Mozilla/5.0 (iPhone; CPU iPhone OS 15_0_2 like...,966540014611,31.167.38.180,0.013,0.5,1,0.00,0.0,250.00,2022-01-09,NaT,due,250.00,,-51,,,0.0,,1.0,,,,,,,


In [17]:
# Save a new df with mature (unpaid_at_60 is not null) orders
df_mature_at_due = instalment_plans_unp_df[pd.notnull(instalment_plans_unp_df['unpaid_at_due'])]

In [89]:
df_mature_at_due[df_mature_at_due['instalment_plan_id'] == 57284]

Unnamed: 0,created,payment_method_brand,payment_method_expires,customer_id,gateway_name,payment_method_country,merchant_name,instalment_plan_id,customer_date_joined,order_id,checkout_type,num_instalments,payment_method_type,date_of_birth,user_agent,inst_status,days_since_scheduled,total_unpaid_amount,total_unpaid_total,total_penalty_fee,refund_amount,nr_of_items,unpaid_at_due,unpaid_at_5,unpaid_at_10,unpaid_at_20,unpaid_at_30,unpaid_at_60,unpaid_at_90


In [18]:
# Keep only relevant variables (for my models)
columns_to_drop = [
    'payment_method_fingerprint',
    'downpayment_amount',
    'billing_address_id',
    'device_fingerprint',
    'total_downpayment',
    'checkout_completed',
    'merchant_id',
    'plan',
    'shipping_address_id',
    'total_amount',
    'currency',
    'id_number',
    'checkout_verified',
    'cancelled',
    'shipping_amount',
    'reference',
    'inst_plan_completed_date',
    'payment_interval',
    'status_changed',
    'customer_email',
    'inst_plan_status',
    'shipping_id',
    'customer_blacklisted',
    'downpayment_refunded_amount',
    'phone',
    'ip_address',
    'transaction_cost_rate',
    'transaction_cost_amount',
    'order',
    'refunded_amount',
    'penalty_fee',
    'amount',
    'scheduled',
    'inst_completed_date',
    'total',
    'days_scheduled_completed',
    ]
df_mature_at_due.drop(columns=columns_to_drop, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [19]:
df_mature_at_due.head()

Unnamed: 0,created,payment_method_brand,payment_method_expires,customer_id,gateway_name,payment_method_country,merchant_name,instalment_plan_id,customer_date_joined,order_id,checkout_type,num_instalments,payment_method_type,date_of_birth,user_agent,inst_status,days_since_scheduled,total_unpaid_amount,total_unpaid_total,total_penalty_fee,refund_amount,nr_of_items,unpaid_at_due,unpaid_at_5,unpaid_at_10,unpaid_at_20,unpaid_at_30,unpaid_at_60,unpaid_at_90
1,2021-07-24 12:24:25.953241,visa,2026-07-31,15950.0,checkoutV2,AE,Footlocker,25161,2021-07-23 22:12:36,47087,default,3,credit,1974-01-30,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,paid,56,,,0.0,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2021-09-11 11:58:44.413922,visa,2026-02-28,21820.0,checkoutV2,AE,Pottery Barn Kids,32819,2021-09-04 13:41:24,63045,seamless,3,debit,,Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like...,paid,8,,,0.0,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,2021-08-28 12:26:06.183760,visa,2024-08-31,19395.0,checkoutV2,AE,Bath and Body Works,29967,2021-08-28 07:25:00,56849,default,3,debit,1992-02-02,Mozilla/5.0 (Windows NT 10.0; Win64; x64) Appl...,paid,22,,,0.0,,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,2021-05-03 07:20:10.133428,visa,2024-10-31,9464.0,checkoutV2,AE,The Entertainer,14855,NaT,28605,default,3,debit,1992-11-03,Mozilla/5.0 (iPhone; CPU iPhone OS 14_5 like M...,paid,139,,,0.0,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,2021-09-04 20:00:10.501235,visa,2024-06-30,20634.0,checkoutV2,AE,Bath and Body Works,31484,2021-07-31 02:08:10,60344,seamless,3,debit,,Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like...,paid,15,,,0.0,,13.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
df_mature_at_due.shape

(16536, 29)

In [45]:
df_mature_at_due.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 16536 entries, 1 to 35357
Data columns (total 29 columns):
 #   Column                  Non-Null Count  Dtype         
---  ------                  --------------  -----         
 0   created                 16536 non-null  datetime64[ns]
 1   payment_method_brand    16536 non-null  object        
 2   payment_method_expires  16536 non-null  object        
 3   customer_id             16536 non-null  float64       
 4   gateway_name            16536 non-null  object        
 5   payment_method_country  16536 non-null  object        
 6   merchant_name           16536 non-null  object        
 7   instalment_plan_id      16536 non-null  int32         
 8   customer_date_joined    10313 non-null  datetime64[ns]
 9   order_id                16536 non-null  int32         
 10  checkout_type           16536 non-null  object        
 11  num_instalments         16536 non-null  int32         
 12  payment_method_type     16536 non-null  object

# More than just unpaid

In [29]:
# Calculate the unpaid rate
print((df_mature_60[df_mature_60['unpaid_at_60'] > 0]['unpaid_at_60'].count() / df_mature_60['unpaid_at_60'].count())*100)


1.461933965186149


In [22]:
instalments_df[instalments_df['instalment_plan_id'] == 194]

Unnamed: 0,order,refunded_amount,penalty_fee,amount,instalment_plan_id,scheduled,completed,status,total,days_scheduled_completed,days_since_scheduled
85212,2,0.0,57.75,314.74,194,2020-08-04,2020-08-27 08:14:56.671049,paid,372.49,23.0,469
85213,1,0.0,0.0,314.74,194,2020-07-04,2020-07-04 02:00:09.905535,paid,314.74,0.0,500
85214,0,0.0,0.0,314.74,194,2020-06-04,2020-06-04 02:00:45.132116,paid,314.74,0.0,530


In [28]:
df_mature_60.head()

Unnamed: 0,created,payment_method_brand,payment_method_expires,customer_id,gateway_name,payment_method_country,merchant_name,instalment_plan_id,customer_date_joined,order_id,checkout_type,num_instalments,payment_method_type,date_of_birth,user_agent,inst_status,days_since_scheduled,total_unpaid_amount,total_unpaid_total,total_penalty_fee,refund_amount,nr_of_items,unpaid_at_due,unpaid_at_5,unpaid_at_10,unpaid_at_20,unpaid_at_30,unpaid_at_60,unpaid_at_90
0,2021-05-14 07:42:50.738431,visa,2022-09-30,10813.0,checkoutV2,AE,Footlocker,16825,2021-04-01 18:29:05,32144,default,3,credit,1985-07-25,Mozilla/5.0 (Linux; Android 11; SAMSUNG SM-N98...,paid,127,,,0.0,,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2021-08-05 17:13:07.203306,visa,2022-12-31,17161.0,checkoutV2,AE,Bath and Body Works,26959,2019-10-14 13:44:45,50495,default,3,debit,1991-12-31,Mozilla/5.0 (iPhone; CPU iPhone OS 12_5_4 like...,paid,44,,,0.0,,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,2021-07-15 19:03:03.040587,visa,2024-09-30,14679.0,checkoutV2,AE,Pottery Barn,24142,2021-05-25 03:30:41,45219,default,3,debit,1997-01-02,Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like M...,paid,64,,,0.0,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,2021-09-07 10:16:56.037200,visa,2026-03-31,10561.0,checkoutV2,AE,Bath and Body Works,32041,2021-06-20 06:06:08,61407,default,3,debit,1986-09-22,Mozilla/5.0 (Linux; Android 11; SM-G975F) Appl...,paid,11,,,0.0,,15.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
10,2021-08-25 13:06:40.725385,visa,2026-03-31,10561.0,checkoutV2,AE,Bath and Body Works,29432,2021-08-25 09:40:32,55747,default,3,debit,1986-09-22,Mozilla/5.0 (Linux; Android 11; SM-G975F) Appl...,paid,24,,,0.0,,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [30]:
df_mature_60['inst_status'].value_counts()

paid      15923
unpaid      220
Name: inst_status, dtype: int64

In [31]:
df_mature_60[df_mature_60['customer_id'] == 14042]

Unnamed: 0,created,payment_method_brand,payment_method_expires,customer_id,gateway_name,payment_method_country,merchant_name,instalment_plan_id,customer_date_joined,order_id,checkout_type,num_instalments,payment_method_type,date_of_birth,user_agent,inst_status,days_since_scheduled,total_unpaid_amount,total_unpaid_total,total_penalty_fee,refund_amount,nr_of_items,unpaid_at_due,unpaid_at_5,unpaid_at_10,unpaid_at_20,unpaid_at_30,unpaid_at_60,unpaid_at_90
11552,2021-07-02 09:57:58.861706,mastercard,2026-01-31,14042.0,checkoutV2,AE,Muji,22383,2020-12-04 14:50:41,42170,default,3,credit,1971-06-24,Mozilla/5.0 (Linux; Android 9; SAMSUNG SM-N960...,paid,77,,,0.0,,28.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [32]:
cart_df[cart_df['order_id'] == 42170].sort_values(by='reference', ascending=False)

Unnamed: 0,image_url,unit_price,item_id,merchant_id,url,description,name,reference,qty,order_id
179745,,122.5,29149,277,,,Degree-Packed Tenjiku Knitting Dress One Size,3104235,1,42170
302882,,122.5,29148,277,,,Degree-Packed Tenjiku Knitting Dress One Size,3104223,1,42170
145679,,70.0,29147,277,,,Complete Seamless Slip Dress,3103857,1,42170
16696,,52.5,29146,277,,,Complete Seamless Petti Pants,3103839,1,42170
209081,,157.5,29145,277,,,Organic Linen Washed Tunic,3103809,1,42170
193102,,157.5,29144,277,,,Adjustable Pants,3103743,1,42170
94094,,175.0,29143,277,,,French Linen Straight Pants Ankle Length,3103599,1,42170
83335,,14.0,29142,277,,,Foaming Net Ball L,3103359,1,42170
287817,,28.0,29141,277,,,Double Fastener Case / M,3103197,1,42170
120294,,31.5,29140,277,,,Double Fastener Case / L,3103173,1,42170


In [33]:
cart_df[cart_df['order_id'] == 42170]['unit_price'].sum()

Decimal('1877.30')

In [34]:
orders_df[orders_df['order_id'] == 42170]

Unnamed: 0,order_id,shipping_id,merchant_id,total_amount,status,status_changed,billing_address_id,created,reference,currency,customer_email,customer_date_joined,merchant_name,shipping_amount,shipping_address_id,customer_id,checkout_type,num_instalments,payment_interval,ip_address,user_agent,rules,device_fingerprint,checkout_verified,checkout_completed
10869,42170,24264.0,277,1720.44,captured,2021-07-02 09:58:08.696653,31045.0,2021-07-02 09:54:56.397326,MUAEHDE0057435-60dee26f86eca,AED,jltalan@yahoo.com,2020-12-04 14:50:41,Muji,0.0,31045.0,14042,default,3.0,,2.48.67.51,Mozilla/5.0 (Linux; Android 9; SAMSUNG SM-N960...,,c18ae0a8d7c27e5939bd339805ba6477,2021-07-02 09:55:40.219513,2021-07-02 09:56:59.980946


In [35]:
instalment_plans_df[instalment_plans_df['order_id'] == 42170]

Unnamed: 0,created,payment_method_fingerprint,downpayment_amount,billing_address_id,device_fingerprint,total_downpayment,checkout_completed,payment_method_brand,plan,payment_method_expires,shipping_address_id,total_amount,merchant_id,customer_id,currency,gateway_name,id_number,checkout_verified,cancelled,payment_method_country,shipping_amount,reference,merchant_name,completed,instalment_plan_id,payment_interval,customer_date_joined,status_changed,order_id,checkout_type,customer_email,status,num_instalments,shipping_id,customer_blacklisted,payment_method_type,date_of_birth,downpayment_refunded_amount,user_agent,phone,ip_address,transaction_cost_rate,transaction_cost_amount
16459,2021-07-02 09:57:58.861706,f1e4a85f56e848bf8754fe2af1c909eb,573.48,31045.0,c18ae0a8d7c27e5939bd339805ba6477,573.48,2021-07-02 09:56:59.980946,mastercard,funded,2026-01-31,31045.0,1720.44,277,14042.0,AED,checkoutV2,784197131907509,2021-07-02 09:55:40.219513,NaT,AE,0.0,MUAEHDE0057435-60dee26f86eca,Muji,2021-09-02 10:00:49.079225,22383,,2020-12-04 14:50:41,2021-07-02 09:58:08.696653,42170,default,jltalan@yahoo.com,captured,3,24264.0,NaT,credit,1971-06-24,0.0,Mozilla/5.0 (Linux; Android 9; SAMSUNG SM-N960...,971565054341,2.48.67.51,0.022,0.5


In [36]:
instalments_df[instalments_df['instalment_plan_id'] == 22383]

Unnamed: 0,order,refunded_amount,penalty_fee,amount,instalment_plan_id,scheduled,completed,status,total,days_scheduled_completed,days_since_scheduled
89668,1,0.0,0.0,573.48,22383,2021-09-02,2021-09-02 10:00:49.047170,paid,573.48,0.0,77
89669,0,0.0,0.0,573.48,22383,2021-08-02,2021-08-02 10:00:12.148276,paid,573.48,0.0,108


# For later - Make a column for device type

In [71]:
def get_substring(string_value:str, strings_list:list):
    if string_value is None:
        return np.nan
    else:
        for s in strings_list:
            if s in string_value:
                return s

In [None]:
df_mature_60['user_agent'].apply(lambda row: get_substring(row, strings_list = ['iPhone', 'Windows NT', 'Macintosh', 'Android']))

In [None]:
df_mature_60['user_agent'].apply(get_substring, strings_list = ['iPhone', 'Windows NT', 'Macintosh', 'Android'])

In [37]:
instalments_df

Unnamed: 0,order,refunded_amount,penalty_fee,amount,instalment_plan_id,scheduled,completed,status,total,days_scheduled_completed,days_since_scheduled
0,1,0.0,0.0,209.70,22887,2021-09-05,2021-09-05 17:11:47.638448,paid,209.70,0.0,74
1,0,0.0,0.0,209.70,22887,2021-08-05,2021-08-05 17:11:35.015848,paid,209.70,0.0,105
2,1,0.0,0.0,68.25,23165,2021-09-08,2021-09-08 06:22:48.133229,paid,68.25,0.0,71
3,0,0.0,0.0,68.25,23165,2021-08-08,2021-08-08 06:22:35.305137,paid,68.25,0.0,102
4,1,0.0,0.0,65.67,16370,2021-07-12,2021-07-12 02:22:45.617066,paid,65.67,0.0,129
...,...,...,...,...,...,...,...,...,...,...,...
101930,0,0.0,0.0,52.67,56619,2021-12-17,NaT,due,52.67,,-29
101931,0,0.0,0.0,198.33,43370,2021-11-19,NaT,due,198.33,,-1
101932,1,0.0,0.0,198.33,43370,2021-12-19,NaT,due,198.33,,-31
101933,1,0.0,0.0,3112.50,37133,2021-11-30,NaT,due,3112.50,,-12


In [38]:
instalments_df['status'].value_counts()

paid         61519
due          34096
cancelled     4711
unpaid        1357
refunded       252
Name: status, dtype: int64

In [65]:
instalments_df[(instalments_df['status'] == 'paid') | (instalments_df['status'] == 'unpaid')]

Unnamed: 0,order,refunded_amount,penalty_fee,amount,instalment_plan_id,scheduled,completed,status,total,days_scheduled_completed,days_since_scheduled
0,1,0.0,0.0,209.70,22887,2021-09-05,2021-09-05 17:11:47.638448,paid,209.70,0.0,74
1,0,0.0,0.0,209.70,22887,2021-08-05,2021-08-05 17:11:35.015848,paid,209.70,0.0,105
2,1,0.0,0.0,68.25,23165,2021-09-08,2021-09-08 06:22:48.133229,paid,68.25,0.0,71
3,0,0.0,0.0,68.25,23165,2021-08-08,2021-08-08 06:22:35.305137,paid,68.25,0.0,102
4,1,0.0,0.0,65.67,16370,2021-07-12,2021-07-12 02:22:45.617066,paid,65.67,0.0,129
...,...,...,...,...,...,...,...,...,...,...,...
101925,2,0.0,0.0,994.50,11195,2021-05-17,2021-05-17 11:22:26.198012,paid,994.50,0.0,185
101926,0,0.0,0.0,994.50,11195,2021-04-19,2021-04-19 02:00:30.789025,paid,994.50,0.0,213
101927,0,0.0,0.0,138.25,5343,2021-02-22,2021-02-22 02:00:17.311812,paid,138.25,0.0,269
101928,1,0.0,0.0,138.25,5343,2021-03-22,2021-03-22 02:00:59.713595,paid,138.25,0.0,241


In [54]:
instalments_df[(instalments_df['status'] == 'paid') | (instalments_df['status'] == 'unpaid')]['amount'].sum()

22621918.099999998

In [56]:
instalments_df[(instalments_df['status'] == 'unpaid')]['amount'].sum()

721928.1799999999

In [58]:
(721928.1799999999/22621918.099999998)*100

3.1912774894185474

In [60]:
instalment_plans_df.head()

Unnamed: 0,created,payment_method_fingerprint,downpayment_amount,billing_address_id,device_fingerprint,total_downpayment,checkout_completed,payment_method_brand,plan,payment_method_expires,shipping_address_id,total_amount,merchant_id,customer_id,currency,gateway_name,id_number,checkout_verified,cancelled,payment_method_country,shipping_amount,reference,merchant_name,completed,instalment_plan_id,payment_interval,customer_date_joined,status_changed,order_id,checkout_type,customer_email,status,num_instalments,shipping_id,customer_blacklisted,payment_method_type,date_of_birth,downpayment_refunded_amount,user_agent,phone,ip_address,transaction_cost_rate,transaction_cost_amount
0,2021-05-14 07:42:50.738431,7228d4f3aff94188b3e00b52afe0f0fd,224.67,23316.0,e594f49a8ee50212c7acfeab895cd7fa,224.67,2021-05-14 07:41:44.172260,visa,funded,2022-09-30,23315.0,674.0,214,10813.0,AED,checkoutV2,784198515728362,2021-05-14 07:41:09.299151,NaT,AE,0.0,FLAESSE0569978-609e29816cb48,Footlocker,2021-07-14 07:44:45.065457,16825,,2021-04-01 18:29:05,2021-05-14 07:42:59.367529,32144,default,sandytlegario@gmail.com,captured,3,18049.0,NaT,credit,1985-07-25,0.0,Mozilla/5.0 (Linux; Android 11; SAMSUNG SM-N98...,971585023349,91.73.13.225,0.022,0.5
1,2021-08-05 17:13:07.203306,a19502a644f94e4faa9359932136f353,67.33,37147.0,f72bd5340276de52b0371411a4d7119b,67.33,2021-08-05 17:12:29.728856,visa,funded,2022-12-31,37147.0,202.0,271,17161.0,AED,checkoutV2,784199148043625,2021-08-05 17:10:35.818419,NaT,AE,0.0,BWAEHDE4655497-610c1b7037b8e,Bath and Body Works,2021-10-04 12:32:27.967838,26959,,2019-10-14 13:44:45,2021-08-05 17:13:18.239926,50495,default,Miarosa.fernandes@hotmail.com,captured,3,29575.0,NaT,debit,1991-12-31,0.0,Mozilla/5.0 (iPhone; CPU iPhone OS 12_5_4 like...,971551230977,2.49.57.33,0.022,0.5
2,2021-10-26 16:33:34.827041,556d364afabb4963aa46d70a78164ad2,39.0,63443.0,8616dfed435dffac97fb1e9bd0ef18f1,39.0,2021-10-26 16:33:30.408296,visa,funded,2022-12-31,63443.0,117.0,271,33677.0,AED,checkoutV2,784198715851378,2021-10-26 16:32:07.486339,NaT,AE,0.0,BWAEHDE5118337-61782d789357f,Bath and Body Works,NaT,45710,,NaT,2021-10-26 16:33:43.629770,88762,seamless,cdungca@edgehotels.com,captured,3,53430.0,NaT,debit,,0.0,Mozilla/5.0 (iPhone; CPU iPhone OS 14_4_2 like...,971502031523,2.51.12.133,0.022,0.5
3,2021-11-12 12:51:14.797158,ce302048d85f48a09b94cad6df4202fa,123.33,73717.0,3e4aa738b3647b0b8e1383c4aae57cdd,123.33,2021-11-12 12:50:40.624293,mastercard,funded,2024-12-31,73717.0,370.0,270,36030.0,AED,checkoutV2,784198938749490,2021-11-12 12:50:40.670523,NaT,AE,0.0,AEAEHDE0610037-618e6300ac249,American Eagle,NaT,54182,,2021-11-11 20:28:45,2021-11-12 12:51:24.487050,104648,default,rpa_mine@yahoo.com,captured,3,63105.0,NaT,credit,1989-09-09,0.0,Mozilla/5.0 (Linux; Android 11; ASUS_I003D Bui...,971543086992,5.31.137.87,0.022,0.5
4,2021-09-20 15:30:44.948013,50fc457bb8c946b98e972d87f87445d8,849.5,48294.0,b59a77acddb16bfaf5ef94bf18c5e7b0,849.5,2021-09-20 15:30:12.102783,visa,funded,2024-12-31,,1699.0,238,20980.0,AED,checkoutV2,784199651824767,2021-09-20 15:30:12.138029,NaT,AE,,7644-6148a8ea2abda,CRYO,NaT,34440,,NaT,2021-09-20 15:30:55.472781,66587,default,almurrkajoor@gmail.com,captured,2,,NaT,credit,,0.0,Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like...,971557110173,91.73.34.11,0.022,0.5


In [62]:
instalment_plans_df[instalment_plans_df['num_instalments'] == 3][['total_amount', 'customer_id', 'instalment_plan_id']]

Unnamed: 0,total_amount,customer_id,instalment_plan_id
0,674.00,10813.0,16825
1,202.00,17161.0,26959
2,117.00,33677.0,45710
3,370.00,36030.0,54182
5,214.20,35022.0,47341
...,...,...,...
49986,220.00,1788.0,54335
49987,166.00,1788.0,34910
49990,200.00,16111.0,25337
49991,899.00,16111.0,30506


In [63]:
instalments_df[instalments_df['instalment_plan_id'] == 16825]

Unnamed: 0,order,refunded_amount,penalty_fee,amount,instalment_plan_id,scheduled,completed,status,total,days_scheduled_completed,days_since_scheduled
29401,1,0.0,0.0,224.67,16825,2021-07-14,2021-07-14 07:44:45.022681,paid,224.67,0.0,127
29402,0,0.0,0.0,224.67,16825,2021-06-14,2021-06-14 07:44:17.277625,paid,224.67,0.0,157


In [66]:
instalments_df[instalments_df['instalment_plan_id'] == 11401]

Unnamed: 0,order,refunded_amount,penalty_fee,amount,instalment_plan_id,scheduled,completed,status,total,days_scheduled_completed,days_since_scheduled
101507,0,0.0,55.0,1513.0,11401,2021-05-07,NaT,unpaid,1568.0,,195
101508,1,0.0,55.0,1513.0,11401,2021-06-07,NaT,unpaid,1568.0,,164


In [67]:
instalment_plans_df[instalment_plans_df['instalment_plan_id'] == 11401]

Unnamed: 0,created,payment_method_fingerprint,downpayment_amount,billing_address_id,device_fingerprint,total_downpayment,checkout_completed,payment_method_brand,plan,payment_method_expires,shipping_address_id,total_amount,merchant_id,customer_id,currency,gateway_name,id_number,checkout_verified,cancelled,payment_method_country,shipping_amount,reference,merchant_name,completed,instalment_plan_id,payment_interval,customer_date_joined,status_changed,order_id,checkout_type,customer_email,status,num_instalments,shipping_id,customer_blacklisted,payment_method_type,date_of_birth,downpayment_refunded_amount,user_agent,phone,ip_address,transaction_cost_rate,transaction_cost_amount
46495,2021-04-07 15:03:27.197242,a2635d30ccbb492d89701942cc0fcc7c,1513.0,14978.0,5812eace2f527d54b9c93b9c61201aba,1513.0,2021-04-07 15:03:24.241251,visa,funded,2025-12-31,14978.0,4539.0,214,6171.0,AED,checkout,784199743509277,2021-04-07 15:03:24.278310,NaT,AE,0.0,FLAEHDE0453403-606dc9a02c806,Footlocker,NaT,11401,,2019-10-25 13:59:35,2021-04-07 15:03:36.986788,22206,default,saberw764@gmail.com,captured,3,10960.0,2021-04-08 05:23:50,debit,1997-11-07,0.0,Mozilla/5.0 (Linux; Android 9; JKM-LX1) AppleW...,971562290350,2001:8f8:146d:585f:bdbc:9c2b:717a:b73,0.026,0.5


In [22]:
pi3_df = instalment_plans_df[instalment_plans_df['num_instalments'] == 3][['total_amount', 'customer_id', 'instalment_plan_id']]

In [23]:
pi3_df.shape

(35240, 3)

In [24]:
len(pi3_df)

35240

In [25]:
pd.to_numeric(pi3_df['total_amount'].sum())

31009320.82

In [6]:
pi3_df.head()

Unnamed: 0,total_amount,customer_id,instalment_plan_id
0,1625.0,40500.0,53500
1,275.0,15950.0,25161
2,390.15,21820.0,32819
5,559.0,22023.0,33489
8,4975.0,3800.0,51225


In [25]:
pi3_inst_df = pi3_df.merge(instalments_df, how='left', on='instalment_plan_id')

In [26]:
pi3_inst_df.shape

(70480, 13)

In [62]:
paid_unpaid_df = (pi3_inst_df[(pi3_inst_df['status'] == 'paid') | (pi3_inst_df['status'] == 'unpaid')][['instalment_plan_id', 'status']].groupby('instalment_plan_id').max().reset_index())

In [63]:
paid_unpaid_df

Unnamed: 0,instalment_plan_id,status
0,4413,paid
1,4466,paid
2,4467,paid
3,4469,paid
4,4492,paid
...,...,...
22893,57010,paid
22894,57056,paid
22895,57213,paid
22896,57284,paid


In [38]:
len(paid_unpaid_df)

22898

In [95]:
with_diff_df2 = instalment_plans_unp_df.merge(paid_unpaid_df, how='left', on='instalment_plan_id')

In [96]:
diff_df2 = with_diff_df2[pd.isnull(with_diff_df2['status'])]

In [97]:
diff_df2.shape

(12422, 66)

In [99]:
diff_df2.head()

Unnamed: 0,created,payment_method_fingerprint,downpayment_amount,billing_address_id,device_fingerprint,total_downpayment,checkout_completed,payment_method_brand,plan,payment_method_expires,shipping_address_id,total_amount,merchant_id,customer_id,currency,gateway_name,id_number,checkout_verified,cancelled,payment_method_country,shipping_amount,reference,merchant_name,inst_plan_completed_date,instalment_plan_id,payment_interval,customer_date_joined,status_changed,order_id,checkout_type,customer_email,inst_plan_status,num_instalments,shipping_id,customer_blacklisted,payment_method_type,date_of_birth,downpayment_refunded_amount,user_agent,phone,ip_address,transaction_cost_rate,transaction_cost_amount,order,refunded_amount,penalty_fee,amount,scheduled,inst_completed_date,inst_status,total,days_scheduled_completed,days_since_scheduled,total_unpaid_amount,total_unpaid_total,total_penalty_fee,refund_amount,nr_of_items,unpaid_at_due,unpaid_at_5,unpaid_at_10,unpaid_at_20,unpaid_at_30,unpaid_at_60,unpaid_at_90,status
0,2021-11-11 13:40:22.453145,c2168ba8f48b4b728b1a8db201b7c7fe,541.67,72920.0,973ba9e918711784386cbe3b473bbb1c,541.67,2021-11-11 13:40:18.859538,visa,funded,2026-03-31,72920.0,1625.0,209,40500.0,AED,checkoutV2,784199157917255,2021-11-11 13:37:33.945995,NaT,AE,0.0,295832984214082296192877,Ounass,NaT,53500,,NaT,2021-11-11 13:40:33.557085,103444,seamless,sara.j.hamdan@gmail.com,captured,3,62319.0,NaT,debit,,0.0,Mozilla/5.0 (iPhone; CPU iPhone OS 14_4_1 like...,971581771646,91.75.215.152,0.022,0.5,1,0.0,0.0,541.67,2022-01-11,NaT,due,541.67,,-53,,,0.0,,2.0,,,,,,,,
3,2021-09-14 18:44:39.868046,49b9bd9843944f79aee3ff028dc426bb,186.33,46224.0,97f1edfa706c6d9feb95875c7582fe76,0.0,2021-09-14 18:44:39.182747,visa,funded,2021-11-30,46748.0,559.0,214,22023.0,AED,checkoutV2,784200110646864,2021-09-14 18:44:39.217521,2021-09-19 11:29:35.911652,AE,0.0,FLAESSE0621354-6140ed8c420cb,Footlocker,NaT,33489,,2021-09-12 16:59:16,2021-09-14 18:44:56.463800,64455,default,myasser3132@gmail.com,captured,3,38044.0,NaT,debit,2001-03-30,186.33,Mozilla/5.0 (iPhone; CPU iPhone OS 14_8 like M...,971544337022,2001:8f8:1335:84be:e000:adc5:f27a:fd8e,0.022,0.5,1,186.33,0.0,186.33,2021-11-14,NaT,cancelled,0.0,,5,,,0.0,559.0,1.0,,,,,,,,
4,2021-11-06 15:20:45.653042,501c440d38084292bee8de8fff30a366,1658.33,10157.0,55005a574ebd1fc30a942c3a688a0324,1658.33,2021-11-06 15:19:50.955347,visa,funded,2026-07-31,10157.0,4975.0,133,3800.0,AED,checkoutV2,784198593653623,2021-11-06 15:19:33.758840,NaT,AE,0.0,4150845374649,The Loom Collection,NaT,51225,,2021-01-30 15:57:50,2021-11-06 15:20:47.760120,99138,default,caitlinjennawhite@gmail.com,captured,3,59684.0,NaT,credit,1985-03-21,0.0,Mozilla/5.0 (iPhone; CPU iPhone OS 15_1 like M...,971525967797,5.31.196.241,0.022,0.5,1,0.0,0.0,1658.33,2022-01-06,NaT,due,1658.33,,-48,,,0.0,,1.0,,,,,,,,
8,2021-11-09 15:31:20.522137,9058043edb004300adc021b82cc7e29b,503.33,71644.0,51e0fc05b0b4be282126c3e58cdf94a9,503.33,2021-11-09 15:31:16.472149,visa,funded,2023-01-31,71644.0,1510.0,215,39303.0,AED,checkoutV2,784198886362718,2021-11-09 15:29:37.852209,NaT,AE,0.0,WEAEHDE0351729-618a93c535116,West Elm,NaT,52355,,NaT,2021-11-09 15:31:32.136591,101403,seamless,ryanzaki88@gmail.com,captured,3,61052.0,NaT,credit,,0.0,Mozilla/5.0 (iPhone; CPU iPhone OS 14_8 like M...,971552383971,94.204.69.158,0.022,0.5,1,0.0,0.0,503.33,2022-01-09,NaT,due,503.33,,-51,,,0.0,,1.0,,,,,,,,
14,2021-11-08 09:01:15.213267,6d7f32f9aba34a25a2fe91a8313db302,300.0,69828.0,b6f8845a4b521aab8d452a3e964b21d4,300.0,2021-11-08 08:59:58.368382,mastercard,funded,2023-12-31,69828.0,900.0,209,37895.0,SAR,checkoutV3,1107690354,2021-11-08 08:59:39.968358,NaT,SA,0.0,712839744377698347470041,Ounass,NaT,51874,,NaT,2021-11-08 09:01:22.348220,100418,default,6w8yf5ms5d@privaterelay.appleid.com,captured,3,59287.0,NaT,debit,1997-05-12,0.0,Mozilla/5.0 (iPhone; CPU iPhone OS 14_8_1 like...,966582183294,2a02:cb80:404a:1bd4:7826:18d8:cad9:3b8,0.013,0.5,1,0.0,0.0,300.0,2022-01-08,NaT,due,300.0,,-50,,,0.0,,1.0,,,,,,,,


In [104]:
instalment_plans_df['status'].value_counts()

captured    50344
Name: status, dtype: int64

In [53]:
with_diff_df = paid_unpaid_df.merge(df_mature_at_due[['instalment_plan_id', 'inst_status']], how='left', on='instalment_plan_id')

In [57]:
diff_df = with_diff_df[pd.isnull(with_diff_df['inst_status'])][['instalment_plan_id', 'status']]

In [75]:
diff_df.merge(pi3_inst_df, how='left', on='instalment_plan_id')

Unnamed: 0,instalment_plan_id,status_x,total_amount,customer_id,order,refunded_amount,penalty_fee,amount,scheduled,completed,status_y,total,days_scheduled_completed,days_since_scheduled
0,5373,paid,1249.50,3118.0,1,416.50,0.0,416.50,2021-03-23,2021-03-23 02:01:14.000000,refunded,0.00,0.0,241
1,5373,paid,1249.50,3118.0,0,208.25,0.0,416.50,2021-02-23,2021-02-23 02:00:13.000000,paid,208.25,0.0,269
2,9162,paid,829.50,4903.0,0,138.25,25.0,276.50,2021-04-07,2021-04-10 08:14:38.623301,paid,163.25,3.0,226
3,9162,paid,829.50,4903.0,1,276.50,0.0,276.50,2021-05-07,NaT,cancelled,0.00,,196
4,10117,paid,764.00,6168.0,1,254.67,0.0,254.67,2021-05-23,NaT,cancelled,0.00,,180
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12731,56452,paid,142.50,40095.0,0,0.00,0.0,47.50,2021-12-16,2021-11-18 13:37:09.906813,paid,47.50,-28.0,-27
12732,57213,paid,636.00,13860.0,1,0.00,0.0,212.00,2022-01-18,NaT,due,212.00,,-60
12733,57213,paid,636.00,13860.0,0,0.00,0.0,212.00,2021-12-18,2021-11-18 21:02:16.596282,paid,212.00,-30.0,-29
12734,57284,paid,1712.01,23904.0,0,0.00,0.0,570.67,2021-12-18,2021-11-19 00:12:05.445246,paid,570.67,-29.0,-29


In [27]:
pi3_inst_df.head()

Unnamed: 0,total_amount,customer_id,instalment_plan_id,order,refunded_amount,penalty_fee,amount,scheduled,completed,status,total,days_scheduled_completed,days_since_scheduled
0,1625.0,40500.0,53500,1,0.0,0.0,541.67,2022-01-11,NaT,due,541.67,,-53
1,1625.0,40500.0,53500,0,0.0,0.0,541.67,2021-12-11,NaT,due,541.67,,-22
2,275.0,15950.0,25161,1,0.0,0.0,91.67,2021-09-24,2021-09-24 12:33:47.655182,paid,91.67,0.0,56
3,275.0,15950.0,25161,0,0.0,0.0,91.67,2021-08-24,2021-08-24 12:33:26.426251,paid,91.67,0.0,87
4,390.15,21820.0,32819,0,0.0,0.0,130.05,2021-10-11,2021-10-11 12:00:48.590549,paid,130.05,0.0,39


In [26]:
pi3_inst_df[pi3_inst_df['status'] == 'cancelled']

Unnamed: 0,total_amount,customer_id,instalment_plan_id,order,refunded_amount,penalty_fee,amount,scheduled,completed,status,total
6,559.00,22023.0,33489,0,186.33,0.0,186.33,2021-10-14,NaT,cancelled,0.0
7,559.00,22023.0,33489,1,186.33,0.0,186.33,2021-11-14,NaT,cancelled,0.0
144,245.00,10818.0,16831,0,81.67,0.0,81.67,2021-06-14,NaT,cancelled,0.0
145,245.00,10818.0,16831,1,81.67,0.0,81.67,2021-07-14,NaT,cancelled,0.0
172,57.00,37858.0,50828,1,19.00,0.0,19.00,2022-01-05,NaT,cancelled,0.0
...,...,...,...,...,...,...,...,...,...,...,...
70415,204.75,16792.0,31825,0,68.25,0.0,68.25,2021-10-06,NaT,cancelled,0.0
70416,304.50,16792.0,31820,0,101.50,0.0,101.50,2021-10-06,NaT,cancelled,0.0
70417,304.50,16792.0,31820,1,101.50,0.0,101.50,2021-11-06,NaT,cancelled,0.0
70451,250.00,13414.0,21576,1,83.33,0.0,83.33,2021-08-25,NaT,cancelled,0.0


In [107]:
refunds_df[refunds_df['order_id'] == 18261]

Unnamed: 0,created,amount,order_id,full,id
2342,2021-03-08 10:50:52.351925,414.75,18261,False,741


In [106]:
instalment_plans_df[instalment_plans_df['instalment_plan_id'] == 9162]

Unnamed: 0,created,payment_method_fingerprint,downpayment_amount,billing_address_id,device_fingerprint,total_downpayment,checkout_completed,payment_method_brand,plan,payment_method_expires,shipping_address_id,total_amount,merchant_id,customer_id,currency,gateway_name,id_number,checkout_verified,cancelled,payment_method_country,shipping_amount,reference,merchant_name,completed,instalment_plan_id,payment_interval,customer_date_joined,status_changed,order_id,checkout_type,customer_email,status,num_instalments,shipping_id,customer_blacklisted,payment_method_type,date_of_birth,downpayment_refunded_amount,user_agent,phone,ip_address,transaction_cost_rate,transaction_cost_amount
33806,2021-03-07 04:10:39.881272,a11247ae389f40749cf8be4bd8ce6688,276.5,,d9184d7c67554cdfc7be56d67a987de8,276.5,2021-03-07 04:10:20.121037,visa,funded,2025-08-31,12300.0,829.5,161,4903.0,AED,checkout,784197319157489,2021-03-07 04:10:20.154657,NaT,AE,0.0,PPC-000004172,The Entertainer,2021-04-10 08:14:38.626526,9162,,NaT,2021-03-07 04:10:49.680480,18261,default,amal@powerhouseae.com,captured,3,8829.0,NaT,debit,1973-08-04,0.0,Mozilla/5.0 (Linux; Android 9; SM-N950F) Apple...,971521354178,94.206.202.18,0.026,0.5


In [9]:
pi3_inst_df[pi3_inst_df['status'] == 'unpaid']['total'].sum()

625364.1699999999

In [10]:
pi3_inst_df[pi3_inst_df['status'] == 'paid']['total'].sum()

9620888.34

In [11]:
pi3_inst_df[pi3_inst_df['status'] == 'unpaid']['total'].sum() + pi3_inst_df[pi3_inst_df['status'] == 'paid']['total'].sum()

10246252.51

In [12]:
pi3_inst_df[(pi3_inst_df['status'] == 'paid') | (pi3_inst_df['status'] == 'unpaid')]['total'].sum()

10246252.51

In [13]:
((pi3_inst_df[pi3_inst_df['status'] == 'unpaid']['total'].sum()) / (pi3_inst_df[(pi3_inst_df['status'] == 'paid') | (pi3_inst_df['status'] == 'unpaid')]['total'].sum())) * 100

6.103345290287013

In [None]:
pi3_inst_df = pi3_df.merge(instalments_df, how='left', on='instalment_plan_id')
pi3_inst_df = pi3_df.merge(instalments_df, how='left', on='instalment_plan_id')

In [84]:
((pi3_inst_df[pi3_inst_df['status'] == 'unpaid']['amount'].sum()) / (pi3_inst_df[(pi3_inst_df['status'] == 'paid') | (pi3_inst_df['status'] == 'unpaid')]['amount'].sum())) * 100

5.736627928810326

In [93]:
pi3_inst_df_filtered = pi3_inst_df[pi3_inst_df['scheduled'] > pd.to_datetime('2021-06-01')]

  result = libops.scalar_compare(x.ravel(), y, op)


In [91]:
import datetime
pd.to_datetime('2021-06-01')

Timestamp('2021-06-01 00:00:00')

In [94]:
((pi3_inst_df_filtered[pi3_inst_df_filtered['status'] == 'unpaid']['amount'].sum()) / (pi3_inst_df_filtered[(pi3_inst_df_filtered['status'] == 'paid') | (pi3_inst_df_filtered['status'] == 'unpaid')]['amount'].sum())) * 100

6.259748724619331