In [1]:
# load required libraries
library(tidyverse)
library(janitor)
library(dplyr)
library(ggplot2)
library(skimr)
library(purrr)
library(lubridate)

source("../../R/apply_factors.R")
source("../../R/analysis_helpers.R")
source("../../R/temporal_helpers.R")
tables <- list(
  Orders   = readr::read_csv("../../data/processed/Orders.csv"),
  Returns  = readr::read_csv("../../data/processed/Returns.csv"),
  People   = readr::read_csv("../../data/processed/People.csv")
)
tables <- apply_factors(tables)
orders <- tables$Orders 
returns <- tables$Returns
people <- tables$People

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.6
[32m✔[39m [34mforcats  [39m 1.0.1     [32m✔[39m [34mstringr  [39m 1.6.0
[32m✔[39m [34mggplot2  [39m 4.0.1     [32m✔[39m [34mtibble   [39m 3.3.0
[32m✔[39m [34mlubridate[39m 1.9.4     [32m✔[39m [34mtidyr    [39m 1.3.1
[32m✔[39m [34mpurrr    [39m 1.2.0     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors

Attaching package: ‘janitor’


The following objects are masked from ‘package:stats’:

    chisq.test, fisher.test


[1mRows: [22m[34m51290[39m [1mColumns: [22m[

# General Segment Performance

### Q1. How do total sales and order volume differ across segments?

In [None]:
segment_overview <- orders %>%
    group_by(segment) %>%
    summarise(
        total_sales = sum(sales),
        n_orders = n(),
        .groups = "drop"
    ) %>%
    arrange(desc(total_sales))

segment_overview

segment,total_sales,n_orders
<fct>,<dbl>,<int>
Consumer,6507949,26518
Corporate,3824698,15429
Home Office,2309855,9343


### Q2. How do average order values vary across segments?

In [None]:
segment_aov <- orders %>%
    group_by(segment) %>%
    summarise(
        total_sales = sum(sales),
        n_orders = n(),
        avg_order_value = total_sales / n_orders,
        .groups = "drop"
    ) %>%
    arrange(desc(avg_order_value))

segment_aov

segment,total_sales,n_orders,avg_order_value
<fct>,<dbl>,<int>,<dbl>
Corporate,3824698,15429,247.8902
Home Office,2309855,9343,247.2284
Consumer,6507949,26518,245.4163


# Category Preferences by Segment

### Q3. Which product categories dominate sales within each segment?

In [None]:
segment_category_sales <- orders %>%
    group_by(segment, category) %>%
    summarise(
        total_sales = sum(sales),
        .groups = "drop"
    ) %>%
    group_by(segment) %>%
    mutate(
        category_share = total_sales / sum(total_sales)
    ) %>%
    arrange(segment, desc(category_share))

segment_category_sales

segment,category,total_sales,category_share
<fct>,<fct>,<dbl>,<dbl>
Consumer,Technology,2427040.0,0.3729347
Consumer,Furniture,2128395.8,0.3270455
Consumer,Office Supplies,1952513.6,0.3000198
Corporate,Technology,1417791.4,0.3706937
Corporate,Furniture,1264519.8,0.3306196
Corporate,Office Supplies,1142386.4,0.2986867
Home Office,Technology,899726.1,0.3895163
Home Office,Furniture,717958.6,0.3108241
Home Office,Office Supplies,692170.2,0.2996596


### Q4. Are certain sub-categories important for specific segments?

In [None]:
segment_subcategory_sales <- orders %>%
    group_by(segment, category, sub_category) %>%
    summarise(
        total_sales = sum(sales),
        .groups = "drop"
    ) %>%
    group_by(segment) %>%
    mutate(
        subcategory_share = total_sales / sum(total_sales)
    ) %>%
    arrange(segment, desc(subcategory_share))

segment_subcategory_sales

segment,category,sub_category,total_sales,subcategory_share
<fct>,<fct>,<fct>,<dbl>,<dbl>
Consumer,Technology,Phones,905422.28,0.139125587
Consumer,Furniture,Chairs,778362.53,0.11960181
Consumer,Furniture,Bookcases,765111.14,0.117565626
Consumer,Technology,Copiers,757081.42,0.116331792
Consumer,Office Supplies,Storage,575506.16,0.088431258
Consumer,Office Supplies,Appliances,510230.28,0.078401083
Consumer,Technology,Machines,382373.05,0.058754766
Consumer,Technology,Accessories,382163.27,0.058722532
Consumer,Furniture,Tables,381726.98,0.058655492
Consumer,Office Supplies,Binders,253745.11,0.038990025


# Volume vs Value

### Q5. Are some segments volume-driven while others are value-driven?

In [None]:
segment_volume_value <- orders %>%
    group_by(segment) %>%
    summarise(
        total_sales = sum(sales),
        n_orders = n(),
        avg_order_value = total_sales / n_orders,
        .groups = "drop"
    )

segment_volume_value

segment,total_sales,n_orders,avg_order_value
<fct>,<dbl>,<int>,<dbl>
Consumer,6507949,26518,245.4163
Corporate,3824698,15429,247.8902
Home Office,2309855,9343,247.2284


### Q6. Do segments differ in order frequency per customer?

In [8]:
segment_order_frequency <- orders %>%
    group_by(segment, customer_name) %>%
    summarise(
      customer_orders = n(),
      .groups = "drop"
    ) %>%
    group_by(segment) %>%
    summarise(
      avg_orders_per_customer = mean(customer_orders),
      median_orders_per_customer = median(customer_orders),
      .groups = "drop"
    )

segment_order_frequency

segment,avg_orders_per_customer,median_orders_per_customer
<fct>,<dbl>,<dbl>
Consumer,64.83619,65.0
Corporate,64.82773,63.0
Home Office,63.12838,62.5


# Geographic Interaction

### Q7. Do segments exhibit different purchasing patterns across regions?

In [9]:
segment_region_sales <- orders %>%
    group_by(region, segment) %>%
    summarise(
        total_sales = sum(sales, na.rm = TRUE),
        .groups = "drop"
    ) %>%
    group_by(region) %>%
    mutate(
        segment_share = total_sales / sum(total_sales)
    ) %>%
    arrange(region, desc(segment_share))

segment_region_sales

region,segment,total_sales,segment_share
<fct>,<fct>,<dbl>,<dbl>
Africa,Consumer,423766.81,0.5406753
Africa,Corporate,204938.95,0.2614774
Africa,Home Office,155067.45,0.1978473
Canada,Consumer,35719.11,0.5336932
Canada,Corporate,19313.73,0.288574
Canada,Home Office,11895.33,0.1777328
Caribbean,Consumer,162349.21,0.5006438
Caribbean,Corporate,104537.86,0.3223683
Caribbean,Home Office,57393.8,0.1769879
Central,Consumer,1479981.17,0.5243879


# Stability & Underperformance

### Q8. Is any segment consistently underperforming across regions or categories?

In [None]:
segment_region_performance <- orders %>%
    group_by(segment, region) %>%
    summarise(
        total_sales = sum(sales),
        .groups = "drop"
    ) %>%
    group_by(segment) %>%
    summarise(
        mean_sales = mean(total_sales),
        sd_sales = sd(total_sales),
        cv_sales = sd_sales / mean_sales,
        .groups = "drop"
    )

segment_region_performance

segment,mean_sales,sd_sales,cv_sales
<fct>,<dbl>,<dbl>,<dbl>
Consumer,500611.5,354180.8,0.7074964
Corporate,294207.5,204447.1,0.6949078
Home Office,177681.2,115154.9,0.6480983


In [None]:
segment_category_performance <- orders %>%
  group_by(segment, category) %>%
  summarise(
    total_sales = sum(sales, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  group_by(segment) %>%
  summarise(
    mean_sales = mean(total_sales),
    sd_sales = sd(total_sales),
    cv_sales = sd_sales / mean_sales,
    .groups = "drop"
  )

segment_category_performance
