In [148]:
import pandas as pd
import numpy as np
import sys, importlib
from pathlib import Path

In [None]:
#sys.path.append(str(Path("..") / "03_src"))

In [155]:
import utils

pd.set_option('display.max_columns', None)

In [13]:
customers = pd.read_csv(utils.path() + "\\01_raw\\olist_customers_dataset.csv")
sellers = pd.read_csv(utils.path() + "\\01_raw\\olist_sellers_dataset.csv")
orders = pd.read_csv(utils.path() + "\\02_interim\\orders_kpi_clean.csv")
order_items = pd.read_csv(utils.path() + "\\02_interim\\order_items.csv")

In [14]:
orders.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 96455 entries, 0 to 96454
Data columns (total 8 columns):
 #   Column                         Non-Null Count  Dtype 
---  ------                         --------------  ----- 
 0   order_id                       96455 non-null  object
 1   customer_id                    96455 non-null  object
 2   order_status                   96455 non-null  object
 3   order_purchase_timestamp       96455 non-null  object
 4   order_approved_at              96455 non-null  object
 5   order_delivered_carrier_date   96455 non-null  object
 6   order_delivered_customer_date  96455 non-null  object
 7   order_estimated_delivery_date  96455 non-null  object
dtypes: object(8)
memory usage: 5.9+ MB


### Tasks

- Processing time
- Shipping time
- Delivery time

In [15]:
order_date_cols = [
    'order_purchase_timestamp', 'order_approved_at', 
    'order_delivered_carrier_date', 'order_delivered_customer_date',
    'order_estimated_delivery_date'
]

In [16]:
for col in order_date_cols:
    orders[col] = pd.to_datetime(orders[col], errors='coerce')

In [17]:
orders['processing_time'] = (
    orders['order_approved_at'] - orders['order_purchase_timestamp']
).dt.days

In [19]:
orders['processing_time'].describe()

count    96455.000000
mean         0.264362
std          0.729955
min          0.000000
25%          0.000000
50%          0.000000
75%          0.000000
max         30.000000
Name: processing_time, dtype: float64

In [32]:
orders['IsSlowApproval'] = orders['processing_time'] > 7

In [20]:
orders['shipping_time'] = (
    orders['order_delivered_carrier_date'] - orders['order_approved_at']
).dt.days

In [21]:
orders['shipping_time'].describe()

count    96455.000000
mean         2.295672
std          3.548012
min       -172.000000
25%          0.000000
50%          1.000000
75%          3.000000
max        125.000000
Name: shipping_time, dtype: float64

In [35]:
orders['extreme_shipping_flag'] = orders['shipping_time'] > 30

In [26]:
shipping_outliers = orders[(orders['shipping_time'] < 0) | (orders['shipping_time'] > 30)]
shipping_outliers.shape

(1514, 11)

In [24]:
orders['delivery_time'] = (
    orders['order_delivered_customer_date'] - orders['order_delivered_carrier_date']
).dt.days

In [25]:
orders['delivery_time'].describe()

count    96455.000000
mean         8.878140
std          8.746466
min        -17.000000
25%          4.000000
50%          7.000000
75%         12.000000
max        205.000000
Name: delivery_time, dtype: float64

In [36]:
orders['extreme_delivery_flag'] = orders['delivery_time'] > 30  

In [27]:
delivery_outliers = orders[(orders['delivery_time'] < 0) | (orders['delivery_time'] > 30) ]
delivery_outliers.shape

(2477, 11)

In [50]:
orders_clean = orders[~((orders['shipping_time'] < 0) | (orders['delivery_time'] < 0))].copy()


In [56]:
orders_clean.shape, orders.shape

((95082, 14), (96455, 14))

In [61]:
orders.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 96455 entries, 0 to 96454
Data columns (total 14 columns):
 #   Column                         Non-Null Count  Dtype         
---  ------                         --------------  -----         
 0   order_id                       96455 non-null  object        
 1   customer_id                    96455 non-null  object        
 2   order_status                   96455 non-null  object        
 3   order_purchase_timestamp       96455 non-null  datetime64[ns]
 4   order_approved_at              96455 non-null  datetime64[ns]
 5   order_delivered_carrier_date   96455 non-null  datetime64[ns]
 6   order_delivered_customer_date  96455 non-null  datetime64[ns]
 7   order_estimated_delivery_date  96455 non-null  datetime64[ns]
 8   processing_time                96455 non-null  int64         
 9   shipping_time                  96455 non-null  int64         
 10  delivery_time                  96455 non-null  int64         
 11  IsSlowApproval 

In [66]:
orders_clean.info()

<class 'pandas.core.frame.DataFrame'>
Index: 95082 entries, 0 to 96454
Data columns (total 15 columns):
 #   Column                         Non-Null Count  Dtype          
---  ------                         --------------  -----          
 0   order_id                       95082 non-null  object         
 1   customer_id                    95082 non-null  object         
 2   order_status                   95082 non-null  object         
 3   order_purchase_timestamp       95082 non-null  datetime64[ns] 
 4   order_approved_at              95082 non-null  datetime64[ns] 
 5   order_delivered_carrier_date   95082 non-null  datetime64[ns] 
 6   order_delivered_customer_date  95082 non-null  datetime64[ns] 
 7   order_estimated_delivery_date  95082 non-null  datetime64[ns] 
 8   processing_time                95082 non-null  int64          
 9   shipping_time                  95082 non-null  int64          
 10  delivery_time                  95082 non-null  int64          
 11  IsSlowA

In [69]:
orders_clean['delivery_delay'] = (orders_clean['order_delivered_customer_date'] -
orders_clean['order_estimated_delivery_date']).dt.days.astype(int)

orders_clean['late_delivery_flag'] = orders_clean['delivery_delay'] > 0
orders_clean['on_time_delivery'] = ~orders_clean['late_delivery_flag'] 


In [72]:
orders_clean.info()

<class 'pandas.core.frame.DataFrame'>
Index: 95082 entries, 0 to 96454
Data columns (total 17 columns):
 #   Column                         Non-Null Count  Dtype         
---  ------                         --------------  -----         
 0   order_id                       95082 non-null  object        
 1   customer_id                    95082 non-null  object        
 2   order_status                   95082 non-null  object        
 3   order_purchase_timestamp       95082 non-null  datetime64[ns]
 4   order_approved_at              95082 non-null  datetime64[ns]
 5   order_delivered_carrier_date   95082 non-null  datetime64[ns]
 6   order_delivered_customer_date  95082 non-null  datetime64[ns]
 7   order_estimated_delivery_date  95082 non-null  datetime64[ns]
 8   processing_time                95082 non-null  int64         
 9   shipping_time                  95082 non-null  int64         
 10  delivery_time                  95082 non-null  int64         
 11  IsSlowApproval      

In [63]:
orders_clean[['processing_time','shipping_time','delivery_time']].describe()

Unnamed: 0,processing_time,shipping_time,delivery_time
count,95082.0,95082.0,95082.0
mean,0.241139,2.351581,8.91032
std,0.666343,3.49063,8.755815
min,0.0,0.0,0.0
25%,0.0,0.0,4.0
50%,0.0,1.0,7.0
75%,0.0,3.0,12.0
max,30.0,125.0,205.0


In [74]:
order_items_primary = order_items[
    order_items['order_item_id'] == 1
][['order_id', 'seller_id']]


In [88]:
orders_clean.shape, order_items_primary.shape

((95082, 17), (98666, 2))

In [85]:
orders_clean.columns


Index(['order_id', 'customer_id', 'order_status', 'order_purchase_timestamp',
       'order_approved_at', 'order_delivered_carrier_date',
       'order_delivered_customer_date', 'order_estimated_delivery_date',
       'processing_time', 'shipping_time', 'delivery_time', 'IsSlowApproval',
       'extreme_shipping_flag', 'extreme_delivery_flag', 'delivery_delay',
       'late_delivery_flag', 'on_time_delivery'],
      dtype='object')

In [86]:
orders_enriched = orders_clean.merge(
    order_items_primary,
    on='order_id',
    how='left',
    validate='one_to_one'
)


In [87]:
orders_enriched.shape

(95082, 18)

In [89]:
orders_enriched.head()

Unnamed: 0,order_id,customer_id,order_status,order_purchase_timestamp,order_approved_at,order_delivered_carrier_date,order_delivered_customer_date,order_estimated_delivery_date,processing_time,shipping_time,delivery_time,IsSlowApproval,extreme_shipping_flag,extreme_delivery_flag,delivery_delay,late_delivery_flag,on_time_delivery,seller_id
0,e481f51cbdc54678b7cc49136f2d6af7,9ef432eb6251297304e76186b10a928d,delivered,2017-10-02 10:56:33,2017-10-02 11:07:15,2017-10-04 19:55:00,2017-10-10 21:25:13,2017-10-18,0,2,6,False,False,False,-8,False,True,3504c0cb71d7fa48d967e0e4c94d59d9
1,53cdb2fc8bc7dce0b6741e2150273451,b0830fb4747a6c6d20dea0b8c802d7ef,delivered,2018-07-24 20:41:37,2018-07-26 03:24:27,2018-07-26 14:31:00,2018-08-07 15:27:45,2018-08-13,1,0,12,False,False,False,-6,False,True,289cdb325fb7e7f891c38608bf9e0962
2,47770eb9100c2d0c44946d9cf07ec65d,41ce2a54c0b03bf3443c3d931a367089,delivered,2018-08-08 08:38:49,2018-08-08 08:55:23,2018-08-08 13:50:00,2018-08-17 18:06:29,2018-09-04,0,0,9,False,False,False,-18,False,True,4869f7a5dfa277a7dca6462dcf3b52b2
3,949d5b44dbf5de918fe9c16f97b45f8a,f88197465ea7920adcdbec7375364d82,delivered,2017-11-18 19:28:06,2017-11-18 19:45:59,2017-11-22 13:39:59,2017-12-02 00:28:42,2017-12-15,0,3,9,False,False,False,-13,False,True,66922902710d126a0e7d26b0e3805106
4,ad21c59c0840e6cb83a9ceb5573f8159,8ab97904e6daea8866dbdbc4fb7aad2c,delivered,2018-02-13 21:18:39,2018-02-13 22:20:29,2018-02-14 19:46:34,2018-02-16 18:17:02,2018-02-26,0,0,1,False,False,False,-10,False,True,2c9e548be18521d1c43cde1c582c6de8


In [90]:
orders_enriched = orders_enriched.merge(
    customers[['customer_id', 'customer_city', 'customer_state']],
    on='customer_id',
    how='left',
    validate='many_to_one'
)


In [92]:
orders_enriched.head()

Unnamed: 0,order_id,customer_id,order_status,order_purchase_timestamp,order_approved_at,order_delivered_carrier_date,order_delivered_customer_date,order_estimated_delivery_date,processing_time,shipping_time,delivery_time,IsSlowApproval,extreme_shipping_flag,extreme_delivery_flag,delivery_delay,late_delivery_flag,on_time_delivery,seller_id,customer_city,customer_state
0,e481f51cbdc54678b7cc49136f2d6af7,9ef432eb6251297304e76186b10a928d,delivered,2017-10-02 10:56:33,2017-10-02 11:07:15,2017-10-04 19:55:00,2017-10-10 21:25:13,2017-10-18,0,2,6,False,False,False,-8,False,True,3504c0cb71d7fa48d967e0e4c94d59d9,sao paulo,SP
1,53cdb2fc8bc7dce0b6741e2150273451,b0830fb4747a6c6d20dea0b8c802d7ef,delivered,2018-07-24 20:41:37,2018-07-26 03:24:27,2018-07-26 14:31:00,2018-08-07 15:27:45,2018-08-13,1,0,12,False,False,False,-6,False,True,289cdb325fb7e7f891c38608bf9e0962,barreiras,BA
2,47770eb9100c2d0c44946d9cf07ec65d,41ce2a54c0b03bf3443c3d931a367089,delivered,2018-08-08 08:38:49,2018-08-08 08:55:23,2018-08-08 13:50:00,2018-08-17 18:06:29,2018-09-04,0,0,9,False,False,False,-18,False,True,4869f7a5dfa277a7dca6462dcf3b52b2,vianopolis,GO
3,949d5b44dbf5de918fe9c16f97b45f8a,f88197465ea7920adcdbec7375364d82,delivered,2017-11-18 19:28:06,2017-11-18 19:45:59,2017-11-22 13:39:59,2017-12-02 00:28:42,2017-12-15,0,3,9,False,False,False,-13,False,True,66922902710d126a0e7d26b0e3805106,sao goncalo do amarante,RN
4,ad21c59c0840e6cb83a9ceb5573f8159,8ab97904e6daea8866dbdbc4fb7aad2c,delivered,2018-02-13 21:18:39,2018-02-13 22:20:29,2018-02-14 19:46:34,2018-02-16 18:17:02,2018-02-26,0,0,1,False,False,False,-10,False,True,2c9e548be18521d1c43cde1c582c6de8,santo andre,SP


In [93]:
orders_enriched = orders_enriched.merge(
    sellers[['seller_id', 'seller_city', 'seller_state']],
    on='seller_id',
    how='left',
    validate='many_to_one'
)


In [94]:
orders_enriched.head()

Unnamed: 0,order_id,customer_id,order_status,order_purchase_timestamp,order_approved_at,order_delivered_carrier_date,order_delivered_customer_date,order_estimated_delivery_date,processing_time,shipping_time,...,extreme_shipping_flag,extreme_delivery_flag,delivery_delay,late_delivery_flag,on_time_delivery,seller_id,customer_city,customer_state,seller_city,seller_state
0,e481f51cbdc54678b7cc49136f2d6af7,9ef432eb6251297304e76186b10a928d,delivered,2017-10-02 10:56:33,2017-10-02 11:07:15,2017-10-04 19:55:00,2017-10-10 21:25:13,2017-10-18,0,2,...,False,False,-8,False,True,3504c0cb71d7fa48d967e0e4c94d59d9,sao paulo,SP,maua,SP
1,53cdb2fc8bc7dce0b6741e2150273451,b0830fb4747a6c6d20dea0b8c802d7ef,delivered,2018-07-24 20:41:37,2018-07-26 03:24:27,2018-07-26 14:31:00,2018-08-07 15:27:45,2018-08-13,1,0,...,False,False,-6,False,True,289cdb325fb7e7f891c38608bf9e0962,barreiras,BA,belo horizonte,SP
2,47770eb9100c2d0c44946d9cf07ec65d,41ce2a54c0b03bf3443c3d931a367089,delivered,2018-08-08 08:38:49,2018-08-08 08:55:23,2018-08-08 13:50:00,2018-08-17 18:06:29,2018-09-04,0,0,...,False,False,-18,False,True,4869f7a5dfa277a7dca6462dcf3b52b2,vianopolis,GO,guariba,SP
3,949d5b44dbf5de918fe9c16f97b45f8a,f88197465ea7920adcdbec7375364d82,delivered,2017-11-18 19:28:06,2017-11-18 19:45:59,2017-11-22 13:39:59,2017-12-02 00:28:42,2017-12-15,0,3,...,False,False,-13,False,True,66922902710d126a0e7d26b0e3805106,sao goncalo do amarante,RN,belo horizonte,MG
4,ad21c59c0840e6cb83a9ceb5573f8159,8ab97904e6daea8866dbdbc4fb7aad2c,delivered,2018-02-13 21:18:39,2018-02-13 22:20:29,2018-02-14 19:46:34,2018-02-16 18:17:02,2018-02-26,0,0,...,False,False,-10,False,True,2c9e548be18521d1c43cde1c582c6de8,santo andre,SP,mogi das cruzes,SP


In [96]:
orders_enriched[['seller_city', 'seller_state','customer_city', 'customer_state', 'seller_id']].describe()

Unnamed: 0,seller_city,seller_state,customer_city,customer_state,seller_id
count,95082,95082,95082,95082,95082
unique,594,22,4067,27,2951
top,sao paulo,SP,sao paulo,SP,6560211a19b47992c3666cc44a7e94c0
freq,23475,67434,14817,39902,1758


In [97]:
orders_enriched[['seller_city', 'seller_state','customer_city', 'customer_state', 'seller_id']].isna().sum()

seller_city       0
seller_state      0
customer_city     0
customer_state    0
seller_id         0
dtype: int64

In [98]:
utils.path()

'C:\\Users\\acer\\Documents\\Data Analyst - Field Training Program\\Portfolio\\3. E-commerce\\olist_ops_project_dap3\\01_data'

In [245]:
orders_enriched['late_shipping_flag'] = orders_enriched['shipping_time'] > 7

In [247]:
orders_enriched.to_csv(utils.path() + "\\03_curated\\orders_enriched.csv", index=False)

In [156]:
orders_enriched.head()

Unnamed: 0,order_id,customer_id,order_status,order_purchase_timestamp,order_approved_at,order_delivered_carrier_date,order_delivered_customer_date,order_estimated_delivery_date,processing_time,shipping_time,delivery_time,IsSlowApproval,extreme_shipping_flag,extreme_delivery_flag,delivery_delay,late_delivery_flag,on_time_delivery,seller_id,customer_city,customer_state,seller_city,seller_state
0,e481f51cbdc54678b7cc49136f2d6af7,9ef432eb6251297304e76186b10a928d,delivered,2017-10-02 10:56:33,2017-10-02 11:07:15,2017-10-04 19:55:00,2017-10-10 21:25:13,2017-10-18,0,2,6,False,False,False,-8,False,True,3504c0cb71d7fa48d967e0e4c94d59d9,sao paulo,SP,maua,SP
1,53cdb2fc8bc7dce0b6741e2150273451,b0830fb4747a6c6d20dea0b8c802d7ef,delivered,2018-07-24 20:41:37,2018-07-26 03:24:27,2018-07-26 14:31:00,2018-08-07 15:27:45,2018-08-13,1,0,12,False,False,False,-6,False,True,289cdb325fb7e7f891c38608bf9e0962,barreiras,BA,belo horizonte,SP
2,47770eb9100c2d0c44946d9cf07ec65d,41ce2a54c0b03bf3443c3d931a367089,delivered,2018-08-08 08:38:49,2018-08-08 08:55:23,2018-08-08 13:50:00,2018-08-17 18:06:29,2018-09-04,0,0,9,False,False,False,-18,False,True,4869f7a5dfa277a7dca6462dcf3b52b2,vianopolis,GO,guariba,SP
3,949d5b44dbf5de918fe9c16f97b45f8a,f88197465ea7920adcdbec7375364d82,delivered,2017-11-18 19:28:06,2017-11-18 19:45:59,2017-11-22 13:39:59,2017-12-02 00:28:42,2017-12-15,0,3,9,False,False,False,-13,False,True,66922902710d126a0e7d26b0e3805106,sao goncalo do amarante,RN,belo horizonte,MG
4,ad21c59c0840e6cb83a9ceb5573f8159,8ab97904e6daea8866dbdbc4fb7aad2c,delivered,2018-02-13 21:18:39,2018-02-13 22:20:29,2018-02-14 19:46:34,2018-02-16 18:17:02,2018-02-26,0,0,1,False,False,False,-10,False,True,2c9e548be18521d1c43cde1c582c6de8,santo andre,SP,mogi das cruzes,SP


In [150]:
orders_enriched['shipping_time'].describe()

count    95082.000000
mean         2.351581
std          3.490630
min          0.000000
25%          0.000000
50%          1.000000
75%          3.000000
max        125.000000
Name: shipping_time, dtype: float64

In [153]:
orders_enriched[orders_enriched['shipping_time'] > 7]['shipping_time'].describe()

count    4937.000000
mean       13.321855
std         6.973375
min         8.000000
25%         9.000000
50%        11.000000
75%        15.000000
max       125.000000
Name: shipping_time, dtype: float64

### Defining seller KPIs

In [205]:
seller_kpis = orders_enriched.groupby('seller_id').agg(
    order_volume = ('order_id','count'),
    avg_processing_time = ('processing_time', 'mean'),
    avg_shipping_time = ("shipping_time",'mean'),
    slow_processing_rate = ('IsSlowApproval','mean'),
    late_shipping_rate = ('shipping_time', lambda x : (x > 7).mean()) # 7 days as operational SLA proxy
    
).reset_index()

In [207]:
seller_kpis.describe()

Unnamed: 0,order_volume,avg_processing_time,avg_shipping_time,slow_processing_rate,late_shipping_rate
count,2951.0,2951.0,2951.0,2951.0,2951.0
mean,32.220264,0.228875,2.773285,0.000241,0.072325
std,103.172731,0.343263,3.977812,0.00656,0.199136
min,1.0,0.0,0.0,0.0,0.0
25%,2.0,0.0,1.0,0.0,0.0
50%,7.0,0.153846,1.714286,0.0,0.0
75%,22.0,0.307692,3.121324,0.0,0.014816
max,1758.0,5.0,61.0,0.333333,1.0


### Differentiating b/w active & non-active sellers

In [208]:
active_sellers = (seller_kpis[seller_kpis['order_volume'] >= 10]).copy()

In [209]:
active_sellers.shape, seller_kpis.shape

((1212, 6), (2951, 6))

In [211]:
shipping_p75 = active_sellers["avg_shipping_time"].quantile(0.75)
late_shipping_p75 = active_sellers["late_shipping_rate"].quantile(0.75)
processing_p75 = active_sellers["avg_processing_time"].quantile(0.75)

#Sellers are evaluated relative to their peers, not against arbitrary SLAs.

In [212]:
shipping_p75,late_shipping_p75,processing_p75

(np.float64(2.904690378915325),
 np.float64(0.05517329255861366),
 np.float64(0.3))

In [213]:
active_sellers["flag_slow_shipping"] = (
    active_sellers["avg_shipping_time"] >= shipping_p75
)

active_sellers["flag_late_shipping"] = (
    active_sellers["late_shipping_rate"] >= late_shipping_p75
)

active_sellers["flag_slow_processing"] = (
    active_sellers["avg_processing_time"] >= processing_p75
)


In [None]:
active_sellers["underperformance_score"] = (
    active_sellers["flag_slow_shipping"].astype(int)
    + active_sellers["flag_late_shipping"].astype(int)
    + active_sellers["flag_slow_processing"].astype(int)
)

In [None]:
#Score meaning:
#0 → Healthy
#1 → Watchlist
#2–3 → Operationally underperforming

active_sellers["seller_status"] = pd.cut(
    active_sellers["underperformance_score"],
    bins=[-1, 0, 1, 3],
    labels=["Healthy", "Watchlist", "Underperforming"]
)

active_sellers["seller_status"].value_counts()


seller_status
Healthy            652
Watchlist          283
Underperforming    277
Name: count, dtype: int64

In [216]:
active_sellers.groupby("seller_status").agg(
    sellers=("seller_id", "count"),
    avg_orders=("order_volume", "mean"),
    avg_shipping_time=("avg_shipping_time", "mean"),
    avg_late_shipping_rate=("late_shipping_rate", "mean")
).reset_index()


  active_sellers.groupby("seller_status").agg(


Unnamed: 0,seller_status,sellers,avg_orders,avg_shipping_time,avg_late_shipping_rate
0,Healthy,652,85.406442,1.373825,0.006418
1,Watchlist,283,49.374558,1.858652,0.020091
2,Underperforming,277,70.263538,5.184536,0.208063


In [219]:
# Absolute late shipments per seller
active_sellers["late_shipments"] = (
    active_sellers["late_shipping_rate"] * active_sellers["order_volume"]
)

active_sellers[["seller_id", "order_volume", "late_shipping_rate", "late_shipments"]].head()


Unnamed: 0,seller_id,order_volume,late_shipping_rate,late_shipments
1,001cca7ae9ae17fb1caed9dfb1094831,194,0.010309,2.0
2,002100f778ceb8431b7a1020ff7ab48f,49,0.020408,1.0
4,004c9cd9d87a3c30c522c48c4fc07416,154,0.0,0.0
5,00720abe85ba0859807595bbf045a33b,13,0.0,0.0
8,00ee68308b45bc5e2660cd833c3f81cc,128,0.0,0.0


In [220]:
seller_late_impact = (
    active_sellers
    .sort_values("late_shipments", ascending=False)
    .reset_index(drop=True)
)


In [221]:
total_late_shipments = seller_late_impact["late_shipments"].sum()

seller_late_impact["cum_late_shipments"] = (
    seller_late_impact["late_shipments"].cumsum()
)

seller_late_impact["cum_late_share"] = (
    seller_late_impact["cum_late_shipments"] / total_late_shipments
)

seller_late_impact["seller_share"] = (
    (seller_late_impact.index + 1) / len(seller_late_impact)
)


In [240]:
seller_late_impact[seller_late_impact["cum_late_share"] >= 0.25]


Unnamed: 0,seller_id,order_volume,avg_processing_time,avg_shipping_time,slow_processing_rate,late_shipping_rate,flag_slow_shipping,flag_late_shipping,flag_slow_processing,underperformance_score,seller_status,late_shipments,cum_late_shipments,cum_late_share,seller_share
4,1025f0e2d44d7041d6cf58b6550e0bfa,886,0.260722,3.321670,0.000000,0.090293,True,True,False,2,Underperforming,80.0,1141.0,0.252993,0.004125
5,213b25e6f54661939f11710a6fddb871,136,0.220588,7.669118,0.000000,0.588235,True,True,False,2,Underperforming,80.0,1221.0,0.270732,0.004950
6,06a2c3af7b3aee5d69171b0e14f0ee87,369,0.211382,4.184282,0.002710,0.205962,True,True,False,2,Underperforming,76.0,1297.0,0.287583,0.005776
7,855668e0971d4dfd7bef1b6a4133b41b,296,0.304054,5.405405,0.003378,0.233108,True,True,True,3,Underperforming,69.0,1366.0,0.302882,0.006601
8,8160255418d5aaa7dbdc9f4c64ebda44,365,0.178082,3.161644,0.000000,0.145205,True,True,False,2,Underperforming,53.0,1419.0,0.314634,0.007426
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1207,02c988090b766852e088c69d7fb3b551,11,0.181818,1.363636,0.000000,0.000000,False,False,False,0,Healthy,0.0,4510.0,1.000000,0.996700
1208,f615fe7efbef0f4f08fd3086bc7a3e60,20,0.150000,0.400000,0.000000,0.000000,False,False,False,0,Healthy,0.0,4510.0,1.000000,0.997525
1209,0241d4d5d36f10f80c644447315af0bd,225,0.235556,1.253333,0.000000,0.000000,False,False,False,0,Healthy,0.0,4510.0,1.000000,0.998350
1210,01fdefa7697d26ad920e9e0346d4bd1b,127,0.196850,0.968504,0.000000,0.000000,False,False,False,0,Healthy,0.0,4510.0,1.000000,0.999175


In [235]:
seller_pct_25 = seller_late_impact[
    seller_late_impact["cum_late_share"] >= 0.25
].index[0] + 1

seller_pct_25 / len(seller_late_impact)


np.float64(0.004125412541254125)

Late shipments are highly concentrated among a small subset of sellers. The top 0.4% of active sellers account for approximately 25% of all late handovers to logistics, indicating that operational delays are driven by a very limited number of high-impact sellers rather than widespread underperformance.

In [226]:
seller_kpis.describe()

Unnamed: 0,order_volume,avg_processing_time,avg_shipping_time,slow_processing_rate,late_shipping_rate
count,2951.0,2951.0,2951.0,2951.0,2951.0
mean,32.220264,0.228875,2.773285,0.000241,0.072325
std,103.172731,0.343263,3.977812,0.00656,0.199136
min,1.0,0.0,0.0,0.0,0.0
25%,2.0,0.0,1.0,0.0,0.0
50%,7.0,0.153846,1.714286,0.0,0.0
75%,22.0,0.307692,3.121324,0.0,0.014816
max,1758.0,5.0,61.0,0.333333,1.0


In [241]:
utils.path()

'C:\\Users\\acer\\Documents\\Data Analyst - Field Training Program\\Portfolio\\3. E-commerce\\olist_ops_project_dap3\\01_data'

In [248]:
seller_kpis.to_csv(utils.path() + '\\03_curated\\seller_kpis.csv', index=False)

In [243]:
orders_enriched.head()

Unnamed: 0,order_id,customer_id,order_status,order_purchase_timestamp,order_approved_at,order_delivered_carrier_date,order_delivered_customer_date,order_estimated_delivery_date,processing_time,shipping_time,delivery_time,IsSlowApproval,extreme_shipping_flag,extreme_delivery_flag,delivery_delay,late_delivery_flag,on_time_delivery,seller_id,customer_city,customer_state,seller_city,seller_state
0,e481f51cbdc54678b7cc49136f2d6af7,9ef432eb6251297304e76186b10a928d,delivered,2017-10-02 10:56:33,2017-10-02 11:07:15,2017-10-04 19:55:00,2017-10-10 21:25:13,2017-10-18,0,2,6,False,False,False,-8,False,True,3504c0cb71d7fa48d967e0e4c94d59d9,sao paulo,SP,maua,SP
1,53cdb2fc8bc7dce0b6741e2150273451,b0830fb4747a6c6d20dea0b8c802d7ef,delivered,2018-07-24 20:41:37,2018-07-26 03:24:27,2018-07-26 14:31:00,2018-08-07 15:27:45,2018-08-13,1,0,12,False,False,False,-6,False,True,289cdb325fb7e7f891c38608bf9e0962,barreiras,BA,belo horizonte,SP
2,47770eb9100c2d0c44946d9cf07ec65d,41ce2a54c0b03bf3443c3d931a367089,delivered,2018-08-08 08:38:49,2018-08-08 08:55:23,2018-08-08 13:50:00,2018-08-17 18:06:29,2018-09-04,0,0,9,False,False,False,-18,False,True,4869f7a5dfa277a7dca6462dcf3b52b2,vianopolis,GO,guariba,SP
3,949d5b44dbf5de918fe9c16f97b45f8a,f88197465ea7920adcdbec7375364d82,delivered,2017-11-18 19:28:06,2017-11-18 19:45:59,2017-11-22 13:39:59,2017-12-02 00:28:42,2017-12-15,0,3,9,False,False,False,-13,False,True,66922902710d126a0e7d26b0e3805106,sao goncalo do amarante,RN,belo horizonte,MG
4,ad21c59c0840e6cb83a9ceb5573f8159,8ab97904e6daea8866dbdbc4fb7aad2c,delivered,2018-02-13 21:18:39,2018-02-13 22:20:29,2018-02-14 19:46:34,2018-02-16 18:17:02,2018-02-26,0,0,1,False,False,False,-10,False,True,2c9e548be18521d1c43cde1c582c6de8,santo andre,SP,mogi das cruzes,SP
