In [1]:
# load required libraries
library(tidyverse)
library(janitor)
library(dplyr)
library(ggplot2)
library(skimr)
library(purrr)
library(lubridate)

source("../../R/apply_factors.R")
source("../../R/analysis_helpers.R")
source("../../R/temporal_helpers.R")
tables <- list(
  Orders   = readr::read_csv("../../data/processed/Orders.csv"),
  Returns  = readr::read_csv("../../data/processed/Returns.csv"),
  People   = readr::read_csv("../../data/processed/People.csv")
)
tables <- apply_factors(tables)
orders <- tables$Orders 
returns <- tables$Returns
people <- tables$People

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.6
[32m✔[39m [34mforcats  [39m 1.0.1     [32m✔[39m [34mstringr  [39m 1.6.0
[32m✔[39m [34mggplot2  [39m 4.0.1     [32m✔[39m [34mtibble   [39m 3.3.0
[32m✔[39m [34mlubridate[39m 1.9.4     [32m✔[39m [34mtidyr    [39m 1.3.1
[32m✔[39m [34mpurrr    [39m 1.2.0     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors

Attaching package: ‘janitor’


The following objects are masked from ‘package:stats’:

    chisq.test, fisher.test


[1mRows: [22m[34m51290[39m [1mColumns: [22m[

# General Segment Performance

### Q1. How do total sales and order volume differ across segments?

In [2]:
segment_overview <- orders %>%
    group_by(segment) %>%
    summarise(
        total_sales = sum(sales),
        n_orders = n(),
        .groups = "drop"
    ) %>%
    arrange(desc(total_sales))

segment_overview

segment,total_sales,n_orders
<fct>,<dbl>,<int>
Consumer,6507949,26518
Corporate,3824698,15429
Home Office,2309855,9343


### Q2. How do average order values vary across segments?

In [3]:
segment_aov <- orders %>%
    group_by(segment) %>%
    summarise(
        total_sales = sum(sales),
        n_orders = n(),
        avg_order_value = total_sales / n_orders,
        .groups = "drop"
    ) %>%
    arrange(desc(avg_order_value))

segment_aov

segment,total_sales,n_orders,avg_order_value
<fct>,<dbl>,<int>,<dbl>
Corporate,3824698,15429,247.8902
Home Office,2309855,9343,247.2284
Consumer,6507949,26518,245.4163


# Category Preferences by Segment

### Q3. Which product categories dominate sales within each segment?

In [4]:
segment_category_sales <- orders %>%
    group_by(segment, category) %>%
    summarise(
        total_sales = sum(sales),
        .groups = "drop"
    ) %>%
    group_by(segment) %>%
    mutate(
        category_share = total_sales / sum(total_sales)
    ) %>%
    arrange(segment, desc(category_share))

segment_category_sales

segment,category,total_sales,category_share
<fct>,<fct>,<dbl>,<dbl>
Consumer,Technology,2427040.0,0.3729347
Consumer,Furniture,2128395.8,0.3270455
Consumer,Office Supplies,1952513.6,0.3000198
Corporate,Technology,1417791.4,0.3706937
Corporate,Furniture,1264519.8,0.3306196
Corporate,Office Supplies,1142386.4,0.2986867
Home Office,Technology,899726.1,0.3895163
Home Office,Furniture,717958.6,0.3108241
Home Office,Office Supplies,692170.2,0.2996596


### Q4. Are certain sub-categories important for specific segments?

In [5]:
segment_subcategory_sales <- orders %>%
    group_by(segment, category, sub_category) %>%
    summarise(
        total_sales = sum(sales),
        .groups = "drop"
    ) %>%
    group_by(segment) %>%
    mutate(
        subcategory_share = total_sales / sum(total_sales)
    ) %>%
    arrange(segment, desc(subcategory_share))

segment_subcategory_sales

segment,category,sub_category,total_sales,subcategory_share
<fct>,<fct>,<fct>,<dbl>,<dbl>
Consumer,Technology,Phones,905422.28,0.139125587
Consumer,Furniture,Chairs,778362.53,0.11960181
Consumer,Furniture,Bookcases,765111.14,0.117565626
Consumer,Technology,Copiers,757081.42,0.116331792
Consumer,Office Supplies,Storage,575506.16,0.088431258
Consumer,Office Supplies,Appliances,510230.28,0.078401083
Consumer,Technology,Machines,382373.05,0.058754766
Consumer,Technology,Accessories,382163.27,0.058722532
Consumer,Furniture,Tables,381726.98,0.058655492
Consumer,Office Supplies,Binders,253745.11,0.038990025


# Volume vs Value

### Q5. Are some segments volume-driven while others are value-driven?

In [6]:
segment_volume_value <- orders %>%
    group_by(segment) %>%
    summarise(
        total_sales = sum(sales),
        n_orders = n(),
        avg_order_value = total_sales / n_orders,
        .groups = "drop"
    )

segment_volume_value

segment,total_sales,n_orders,avg_order_value
<fct>,<dbl>,<int>,<dbl>
Consumer,6507949,26518,245.4163
Corporate,3824698,15429,247.8902
Home Office,2309855,9343,247.2284


### Q6. Do segments differ in order frequency per customer?

In [7]:
segment_order_frequency <- orders %>%
    group_by(segment, customer_name) %>%
    summarise(
      customer_orders = n(),
      .groups = "drop"
    ) %>%
    group_by(segment) %>%
    summarise(
      avg_orders_per_customer = mean(customer_orders),
      median_orders_per_customer = median(customer_orders),
      .groups = "drop"
    )

segment_order_frequency

segment,avg_orders_per_customer,median_orders_per_customer
<fct>,<dbl>,<dbl>
Consumer,64.83619,65.0
Corporate,64.82773,63.0
Home Office,63.12838,62.5


# Geographic Interaction

### Q7. Do segments exhibit different purchasing patterns across regions?

In [8]:
segment_region_sales <- orders %>%
    group_by(region, segment) %>%
    summarise(
        total_sales = sum(sales),
        .groups = "drop"
    ) %>%
    group_by(region) %>%
    mutate(
        segment_share = total_sales / sum(total_sales)
    ) %>%
    arrange(region, desc(segment_share))

segment_region_sales

region,segment,total_sales,segment_share
<fct>,<fct>,<dbl>,<dbl>
Africa,Consumer,423766.81,0.5406753
Africa,Corporate,204938.95,0.2614774
Africa,Home Office,155067.45,0.1978473
Canada,Consumer,35719.11,0.5336932
Canada,Corporate,19313.73,0.288574
Canada,Home Office,11895.33,0.1777328
Caribbean,Consumer,162349.21,0.5006438
Caribbean,Corporate,104537.86,0.3223683
Caribbean,Home Office,57393.8,0.1769879
Central,Consumer,1479981.17,0.5243879


# Stability & Underperformance

### Q8. Is any segment consistently underperforming across regions or categories?

In [9]:
segment_region_performance <- orders %>%
    group_by(segment, region) %>%
    summarise(
        total_sales = sum(sales),
        .groups = "drop"
    ) %>%
    group_by(segment) %>%
    summarise(
        mean_sales = mean(total_sales),
        sd_sales = sd(total_sales),
        cv_sales = sd_sales / mean_sales,
        .groups = "drop"
    )

segment_region_performance

segment,mean_sales,sd_sales,cv_sales
<fct>,<dbl>,<dbl>,<dbl>
Consumer,500611.5,354180.8,0.7074964
Corporate,294207.5,204447.1,0.6949078
Home Office,177681.2,115154.9,0.6480983


In [10]:
segment_subcategory_performance <- orders %>%
  group_by(segment, sub_category) %>%
  summarise(
    total_sales = sum(sales),
    .groups = "drop"
  ) %>%
  group_by(segment) %>%
  summarise(
    mean_sales = mean(total_sales),
    sd_sales = sd(total_sales),
    cv_sales = sd_sales / mean_sales,
    .groups = "drop"
  )

segment_subcategory_performance

segment,mean_sales,sd_sales,cv_sales
<fct>,<dbl>,<dbl>,<dbl>
Consumer,382820.6,286026.0,0.7471541
Corporate,224982.2,167126.3,0.7428425
Home Office,135873.8,100721.4,0.7412865


# Regional Accountability Overview

### Q9. How does sales performance vary across regions managed by different individuals?

In [11]:
orders_with_managers <- orders %>%
    left_join(people, by = "region")

manager_sales_performance <- orders_with_managers %>%
    group_by(person, region) %>%
    summarise(
        total_sales = sum(sales),
        total_profit = sum(profit),
        n_orders = n(),
        .groups = "drop"
    ) %>%
    arrange(desc(total_sales))

manager_sales_performance

person,region,total_sales,total_profit,n_orders
<chr>,<fct>,<dbl>,<dbl>,<int>
Anna Andreadi,Central,2822302.52,311403.98,11117
Chuck Magee,South,1600907.04,140355.77,6645
Jack Lebron,North,1248165.6,194597.95,4785
Anthony Jacobs,Oceania,1100184.61,121666.64,3487
Alejandro Ballentine,Southeast Asia,884423.17,17852.33,3129
Shirley Daniels,North Asia,848309.78,165578.42,2338
,EMEA,806161.31,43897.97,5029
Deborah Brumfield,Africa,783773.21,88871.63,4587
Nora Preis,Central Asia,752826.57,132480.19,2048
Matt Collister,West,725457.82,108418.45,3203


### Q10. Are differences in regional performance driven by order volume or order value?

In [12]:
manager_volume_value <- orders_with_managers %>%
    group_by(person, region) %>%
    summarise(
        total_sales = sum(sales),
        n_orders = n(),
        avg_order_value = total_sales / n_orders,
        .groups = "drop"
    ) %>%
    arrange(desc(total_sales))

manager_volume_value

person,region,total_sales,n_orders,avg_order_value
<chr>,<fct>,<dbl>,<int>,<dbl>
Anna Andreadi,Central,2822302.52,11117,253.8727
Chuck Magee,South,1600907.04,6645,240.919
Jack Lebron,North,1248165.6,4785,260.8497
Anthony Jacobs,Oceania,1100184.61,3487,315.5104
Alejandro Ballentine,Southeast Asia,884423.17,3129,282.6536
Shirley Daniels,North Asia,848309.78,2338,362.8357
,EMEA,806161.31,5029,160.3025
Deborah Brumfield,Africa,783773.21,4587,170.8684
Nora Preis,Central Asia,752826.57,2048,367.5911
Matt Collister,West,725457.82,3203,226.4932


# Performance Consistency & Risk

### Q11. Do regions managed by different individuals show similar stability over time?

In [13]:
manager_sales_over_time <- orders_with_managers %>%
    add_time_period("order_date", period = "year") %>%
    group_by(person, region, period) %>%
    summarise(
        total_sales = sum(sales),
        .groups = "drop"
    )

manager_stability <- manager_sales_over_time %>%
    group_by(person, region) %>%
    summarise(
        mean_sales = mean(total_sales),
        sd_sales = sd(total_sales),
        cv_sales = sd_sales / mean_sales,
        .groups = "drop"
    ) %>%
    arrange(desc(cv_sales))

manager_stability


person,region,mean_sales,sd_sales,cv_sales
<chr>,<fct>,<dbl>,<dbl>,<dbl>
Deborah Brumfield,Africa,195943.3,73162.611,0.3733866
Nicole Hansen,Canada,16732.04,6198.215,0.3704399
,EMEA,201540.33,72417.679,0.359321
Alejandro Ballentine,Southeast Asia,221105.79,74567.09,0.3372462
Anthony Jacobs,Oceania,275046.15,83062.357,0.3019943
Nora Preis,Central Asia,188206.64,56321.395,0.299253
Giulietta Dortch,Caribbean,81070.22,24036.372,0.2964883
Chuck Magee,South,400226.76,117787.467,0.2943018
Jack Lebron,North,312041.4,88914.168,0.2849435
Matt Collister,West,181364.46,50544.938,0.2786926


### Q12. Are underperforming regions associated with specific managers or structural factors?

In [14]:
region_sales <- orders_with_managers %>%
    group_by(region, person) %>%
    summarise(
        total_sales = sum(sales),
        .groups = "drop"
    )

sales_threshold <- quantile(region_sales$total_sales, 0.25)

underperforming_regions <- region_sales %>%
    filter(total_sales <= sales_threshold)

underperforming_regions

region,person,total_sales
<fct>,<chr>,<dbl>
Canada,Nicole Hansen,66928.17
Caribbean,Giulietta Dortch,324280.86
East,Kelly Williams,678781.24
West,Matt Collister,725457.82


In [15]:
underperforming_diagnostics <- orders_with_managers %>%
    filter(region %in% underperforming_regions$region) %>%
    group_by(region, person) %>%
    summarise(
        avg_discount = mean(discount),
        avg_shipping_cost = mean(shipping_cost),
        avg_profit_margin = sum(profit) / sum(sales),
        .groups = "drop"
    )

underperforming_diagnostics

region,person,avg_discount,avg_shipping_cost,avg_profit_margin
<fct>,<chr>,<dbl>,<dbl>,<dbl>
Canada,Nicole Hansen,0.0,19.28549,0.2662166
Caribbean,Giulietta Dortch,0.1357515,21.0588,0.1066092
East,Kelly Williams,0.1453652,25.542,0.134834
West,Matt Collister,0.109335,23.88769,0.1494483


# Product & Segment Exposure

### Q13. Do managers oversee regions with significantly different product or segment mixes?

In [16]:
manager_product_mix <- orders_with_managers %>%
    group_by(person, category) %>%
    summarise(
        total_sales = sum(sales),
        .groups = "drop"
    ) %>%
    group_by(person) %>%
    mutate(
        category_share = total_sales / sum(total_sales)
    )

manager_product_mix

person,category,total_sales,category_share
<chr>,<fct>,<dbl>,<dbl>
Alejandro Ballentine,Furniture,313386.7,0.3543402
Alejandro Ballentine,Office Supplies,241285.08,0.2728163
Alejandro Ballentine,Technology,329751.38,0.3728434
Anna Andreadi,Furniture,860417.58,0.3048637
Anna Andreadi,Office Supplies,923435.28,0.3271922
Anna Andreadi,Technology,1038449.66,0.3679441
Anthony Jacobs,Furniture,410468.0,0.3730901
Anthony Jacobs,Office Supplies,281713.63,0.2560603
Anthony Jacobs,Technology,408002.98,0.3708496
Chuck Magee,Furniture,515749.62,0.3221609


In [17]:
manager_segment_mix <- orders_with_managers %>%
    group_by(person, segment) %>%
    summarise(
        total_sales = sum(sales),
        .groups = "drop"
    ) %>%
    group_by(person) %>%
    mutate(
        segment_share = total_sales / sum(total_sales)
    )

manager_segment_mix

person,segment,total_sales,segment_share
<chr>,<fct>,<dbl>,<dbl>
Alejandro Ballentine,Consumer,460752.82,0.5209642
Alejandro Ballentine,Corporate,254351.71,0.2875905
Alejandro Ballentine,Home Office,169318.65,0.1914453
Anna Andreadi,Consumer,1479981.17,0.5243879
Anna Andreadi,Corporate,850363.9,0.3013015
Anna Andreadi,Home Office,491957.45,0.1743107
Anthony Jacobs,Consumer,579550.47,0.5267757
Anthony Jacobs,Corporate,322826.91,0.2934298
Anthony Jacobs,Home Office,197807.23,0.1797946
Chuck Magee,Consumer,824890.16,0.5152642
