In [2]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

# for plotting
plt.style.use("default")
sns.set_theme(context="notebook", style="whitegrid")

In [3]:
df = pd.read_csv("C:/Users/aemil/OneDrive/Desktop/Portfolio/sales-orders-analytics/data/processed/sales_orders_clean.csv")

In [4]:
order_features = (
    df
    .groupby("Order")
    .agg(
        order_total_value=("Total Price ($)", "sum"),
        order_line_count=("Part Number", "count"),

        # Quantity = fulfillment state
        order_quantity_net=("Quantity", "sum"),
        has_unshipped_items=("Quantity", lambda x: (x < 0).any()),

        # Balance = financial exposure
        balance_net=("Balance", "sum"),
        customer_owes=("Balance", lambda x: (x > 0).any()),
        company_owes=("Balance", lambda x: (x < 0).any()),

        # Execution & identity
        is_executed=("is_executed", "max"),
        customer_number=("Customer Number", "first"),
        customer_name=("Customer Name", "first"),
        sales_rep=("Sales Rep Name", "first"),
        order_status=("Order Status", "first")
    )
    .reset_index()
)

In [5]:
order_features[["balance_net", "customer_owes", "company_owes"]].head()

Unnamed: 0,balance_net,customer_owes,company_owes
0,1,True,False
1,1,True,False
2,2,True,False
3,1,True,False
4,1,True,False


In [6]:
order_features[["order_quantity_net", "has_unshipped_items"]].head()

Unnamed: 0,order_quantity_net,has_unshipped_items
0,1,False
1,1,False
2,2,False
3,1,False
4,1,False


In [7]:
order_features

Unnamed: 0,Order,order_total_value,order_line_count,order_quantity_net,has_unshipped_items,balance_net,customer_owes,company_owes,is_executed,customer_number,customer_name,sales_rep,order_status
0,SO24000002,0.00,1,1,False,1,True,False,True,101625,LA Medical Aesthetics Ontario Inc.,Michelle Boudreau,Paid
1,SO24000003,0.00,1,1,False,1,True,False,True,101812,Phace Medical Aesthetics,Cynthia Boyer,Paid
2,SO24000006,2004.46,1,2,False,2,True,False,True,100459,Beauty Marx Aesthetic Medspa,General,Confirmed
3,SO24000015,0.00,1,1,False,1,True,False,True,101813,Salon Bronzage et Esthetique Bali's,Kim Benisty,Paid
4,SO24000021,0.00,1,1,False,1,True,False,True,101807,L. Coiffe Group JC Inc.,Kim Benisty,Paid
...,...,...,...,...,...,...,...,...,...,...,...,...,...
457,SO25000572,154900.00,13,17,False,17,True,False,False,102111,Vegreville Family Clinic,Kathie Abou-Mechrek,Draft
458,SO25000574,-890.00,1,-1,True,-1,False,True,True,100414,Trims Salon & Spa (Kilborn Healing),General,Confirmed
459,SO25000575,1775.00,3,9,False,9,True,False,True,102026,Delmar College (Red Deer Location),General,Confirmed
460,SO25000576,9.79,2,4,False,4,True,False,True,101190,Before & After Aesthetics,General,Confirmed


In [8]:
customer_features = (
    order_features
    .groupby("customer_number")
    .agg(
        customer_name=("customer_name", "first"),

        # Volume & value
        customer_total_revenue=("order_total_value", "sum"),
        customer_avg_order_value=("order_total_value", "mean"),
        customer_order_count=("Order", "nunique"),

        # Execution behavior
        customer_execution_rate=("is_executed", "mean"),

        # Financial exposure
        customer_balance_net=("balance_net", "sum"),
        customer_owes_ratio=("customer_owes", "mean"),
        company_owes_ratio=("company_owes", "mean"),

        # Fulfillment behavior
        unshipped_rate=("has_unshipped_items", "mean"),
        avg_order_line_count=("order_line_count", "mean")
    )
    .reset_index()
)


In [9]:
customer_features[["unshipped_rate"]].describe()

Unnamed: 0,unshipped_rate
count,385.0
mean,0.277489
std,0.426047
min,0.0
25%,0.0
50%,0.0
75%,0.5
max,1.0


In [10]:
customer_features["unshipped_rate"].value_counts(bins=5)

(-0.002, 0.2]    262
(0.8, 1.0]        92
(0.4, 0.6]        24
(0.2, 0.4]         5
(0.6, 0.8]         2
Name: count, dtype: int64

In [11]:
customer_features

Unnamed: 0,customer_number,customer_name,customer_total_revenue,customer_avg_order_value,customer_order_count,customer_execution_rate,customer_balance_net,customer_owes_ratio,company_owes_ratio,unshipped_rate,avg_order_line_count
0,100044,Raluca's Spa,-801.00,-801.00,1,1.0,-1,0.00,1.00,1.00,1.0
1,100079,Gee Beauty,3574.00,893.50,4,1.0,5,0.75,0.25,0.25,1.0
2,100101,Elite Laser Lounge,-801.00,-801.00,1,1.0,-1,0.00,1.00,1.00,1.0
3,100107,Serenity Skin Care Studio Inc.,30.00,30.00,1,1.0,1,1.00,0.00,0.00,1.0
4,100135,Dolce Bella Spa,4100.00,4100.00,1,1.0,2,1.00,0.00,0.00,2.0
...,...,...,...,...,...,...,...,...,...,...,...
380,102105,Era Massage & Beauty,0.00,0.00,1,1.0,2,1.00,0.00,0.00,1.0
381,102108,Eye Candy By Jenn,37345.79,37345.79,1,1.0,20,1.00,0.00,0.00,12.0
382,102110,Bloom & Bliss Studio 6,49065.00,49065.00,1,0.0,18,1.00,1.00,1.00,13.0
383,102111,Vegreville Family Clinic,154900.00,154900.00,1,0.0,17,1.00,0.00,0.00,13.0


In [12]:
rep_features = (
    order_features
    .groupby("sales_rep")
    .agg(
        rep_total_revenue=("order_total_value", "sum"),
        rep_avg_order_value=("order_total_value", "mean"),
        rep_order_count=("Order", "nunique"),

        # Execution performance
        rep_execution_rate=("is_executed", "mean"),

        # Pipeline / financial exposure
        rep_balance_net=("balance_net", "sum"),
        rep_customer_owes_ratio=("customer_owes", "mean"),
        rep_company_owes_ratio=("company_owes", "mean"),

        # Operational signal (fulfillment pressure)
        rep_unshipped_rate=("has_unshipped_items", "mean"),

        # Complexity proxy
        rep_avg_line_count=("order_line_count", "mean")
    )
    .reset_index()
)

In [13]:
rep_features.sort_values("rep_total_revenue", ascending=False).head()
rep_features.describe()

Unnamed: 0,rep_total_revenue,rep_avg_order_value,rep_order_count,rep_execution_rate,rep_balance_net,rep_customer_owes_ratio,rep_company_owes_ratio,rep_unshipped_rate,rep_avg_line_count
count,30.0,30.0,30.0,30.0,30.0,30.0,30.0,30.0,30.0
mean,89094.059333,13471.090864,15.4,0.948128,92.166667,0.969204,0.085186,0.118519,2.400319
std,185095.326275,45365.032239,40.564636,0.183145,266.976698,0.100604,0.208218,0.266107,3.300659
min,-13076.66,-8118.5,1.0,0.0,0.0,0.490991,0.0,0.0,1.0
25%,0.0,0.0,1.0,0.977445,2.25,1.0,0.0,0.0,1.0
50%,5961.48,2224.66,3.0,1.0,7.0,1.0,0.0,0.0,1.433333
75%,95538.01,7665.923974,13.75,1.0,30.25,1.0,0.064286,0.074359,2.0
max,759316.0,249398.57,222.0,1.0,1402.0,1.0,1.0,1.0,19.0


In [28]:
##rep_features[["rep_execution_rate", "rep_unshipped_rate"]].sort_values("rep_execution_rate").reset_index(drop=True)

In [15]:
#sanity check 1
rep_features.loc[0]

sales_rep                  Adina Zak
rep_total_revenue          249398.57
rep_avg_order_value        249398.57
rep_order_count                    1
rep_execution_rate               0.0
rep_balance_net                   23
rep_customer_owes_ratio          1.0
rep_company_owes_ratio           0.0
rep_unshipped_rate               0.0
rep_avg_line_count              19.0
Name: 0, dtype: object

In [16]:
#sanity check 2
rep_features.sort_values("rep_execution_rate").head()

Unnamed: 0,sales_rep,rep_total_revenue,rep_avg_order_value,rep_order_count,rep_execution_rate,rep_balance_net,rep_customer_owes_ratio,rep_company_owes_ratio,rep_unshipped_rate,rep_avg_line_count
0,Adina Zak,249398.57,249398.57,1,0.0,23,1.0,0.0,0.0,19.0
23,Petherick Jennifer,237440.88,16960.062857,14,0.857143,95,1.0,0.142857,0.142857,4.714286
26,Sabah Shaikh,759316.0,17658.511628,43,0.883721,419,0.976744,0.023256,0.023256,3.790698
2,Assaf Sinai,137674.53,11472.8775,12,0.916667,22,1.0,0.333333,0.333333,3.333333
29,Tara Jordan,100873.41,7759.493077,13,0.923077,38,1.0,0.076923,0.076923,2.230769


In [19]:
#sanity check 3
rep_features.sort_values("rep_execution_rate")[[
    "sales_rep",
    "rep_order_count",
    "rep_execution_rate",
    "rep_unshipped_rate"
]].head(10)

Unnamed: 0,sales_rep,rep_order_count,rep_execution_rate,rep_unshipped_rate
0,Adina Zak,1,0.0,0.0
23,Petherick Jennifer,14,0.857143,0.142857
26,Sabah Shaikh,43,0.883721,0.023256
2,Assaf Sinai,12,0.916667,0.333333
29,Tara Jordan,13,0.923077,0.076923
11,Jackie Kaufman,15,0.933333,0.0
14,Kim Benisty,27,0.962963,0.111111
12,Kathie Abou-Mechrek,35,0.971429,0.057143
10,General,222,0.995495,0.513514
7,Cynthia Boyer,1,1.0,0.0


In [20]:
rep_features["rep_order_count"].describe()

count     30.000000
mean      15.400000
std       40.564636
min        1.000000
25%        1.000000
50%        3.000000
75%       13.750000
max      222.000000
Name: rep_order_count, dtype: float64

In [21]:
rep_features_filtered = rep_features[
    rep_features["rep_order_count"] >= 5
]

In [22]:
## this filter can be adjusted, saved as CSV and used for any business purposes 
rep_features_filtered

Unnamed: 0,sales_rep,rep_total_revenue,rep_avg_order_value,rep_order_count,rep_execution_rate,rep_balance_net,rep_customer_owes_ratio,rep_company_owes_ratio,rep_unshipped_rate,rep_avg_line_count
2,Assaf Sinai,137674.53,11472.8775,12,0.916667,22,1.0,0.333333,0.333333,3.333333
6,Ciarah Gonzalez,40675.0,2711.666667,15,1.0,24,0.933333,0.066667,0.066667,1.2
10,General,79531.81,358.251396,222,0.995495,1402,0.490991,0.513514,0.513514,1.193694
11,Jackie Kaufman,0.0,0.0,15,0.933333,25,1.0,0.0,0.0,1.466667
12,Kathie Abou-Mechrek,162371.26,4639.178857,35,0.971429,94,0.942857,0.057143,0.057143,1.971429
14,Kim Benisty,672710.24,24915.194074,27,0.962963,154,0.962963,0.111111,0.111111,4.407407
20,Michelle Boudreau,-13076.66,-502.948462,26,1.0,366,0.769231,0.230769,0.230769,1.384615
22,Nellie Mahravan,4425.0,885.0,5,1.0,8,1.0,0.0,0.0,1.4
23,Petherick Jennifer,237440.88,16960.062857,14,0.857143,95,1.0,0.142857,0.142857,4.714286
26,Sabah Shaikh,759316.0,17658.511628,43,0.883721,419,0.976744,0.023256,0.023256,3.790698
