In [1]:
# Load required libraries
library(tidyverse)
library(janitor)
library(dplyr)
library(ggplot2)
library(skimr)
library(purrr)
library(lubridate)

# Source helper scripts
source("../../R/apply_factors.R")
source("../../R/analysis_helpers.R")
source("../../R/temporal_helpers.R")

# Load data
tables <- list(
  Orders  = readr::read_csv("../../data/processed/Orders.csv"),
  Returns = readr::read_csv("../../data/processed/Returns.csv"),
  People  = readr::read_csv("../../data/processed/People.csv")
)

# Apply factor transformations
tables <- apply_factors(tables)

# Extract tables
orders  <- tables$Orders
returns <- tables$Returns
people  <- tables$People


── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.6
[32m✔[39m [34mforcats  [39m 1.0.1     [32m✔[39m [34mstringr  [39m 1.6.0
[32m✔[39m [34mggplot2  [39m 4.0.1     [32m✔[39m [34mtibble   [39m 3.3.0
[32m✔[39m [34mlubridate[39m 1.9.4     [32m✔[39m [34mtidyr    [39m 1.3.1
[32m✔[39m [34mpurrr    [39m 1.2.0     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors

Attaching package: ‘janitor’


The following objects are masked from ‘package:stats’:

    chisq.test, fisher.test


[1mRows: [22m[34m51290[39m [1mColumns: [22m[

# General Product Performance

### Which products generate the highest/lowest total sales?

In [None]:
product_sales <- orders |>  
    group_by(product_name, category, sub_category) |>  
    summarize(    
        total_sales = sum(sales),
        n_orders = n()  
        ) |>  
        ungroup() |>  
        arrange(desc(total_sales))

product_sales |> slice_head(n = 10)

[1m[22m`summarise()` has grouped output by 'product_name', 'category'. You can
override using the `.groups` argument.


product_name,category,sub_category,total_sales,n_orders
<chr>,<fct>,<fct>,<dbl>,<int>
"Apple Smart Phone, Full Size",Technology,Phones,86935.78,51
"Cisco Smart Phone, Full Size",Technology,Phones,76441.53,38
"Motorola Smart Phone, Full Size",Technology,Phones,73156.3,38
"Nokia Smart Phone, Full Size",Technology,Phones,71904.56,47
Canon imageCLASS 2200 Advanced Copier,Technology,Copiers,61599.82,5
"Hon Executive Leather Armchair, Adjustable",Furniture,Chairs,58193.48,49
"Office Star Executive Leather Armchair, Adjustable",Furniture,Chairs,50661.68,45
"Harbour Creations Executive Leather Armchair, Adjustable",Furniture,Chairs,50121.52,39
"Samsung Smart Phone, Cordless",Technology,Phones,48653.46,26
"Nokia Smart Phone, with Caller ID",Technology,Phones,47877.79,24


In [None]:
product_sales |> slice_tail(n = 10)

product_name,category,sub_category,total_sales,n_orders
<chr>,<fct>,<fct>,<dbl>,<int>
Newell 308,Office Supplies,Art,8.4,2
Stockwell Gold Paper Clips,Office Supplies,Fasteners,8.096,2
4009 Highlighters,Office Supplies,Art,8.04,1
Xerox 1989,Office Supplies,Paper,7.968,1
"Avery Hi-Liter Comfort Grip Fluorescent Highlighter, Yellow Ink",Office Supplies,Art,7.8,2
Avery Hi-Liter Pen Style Six-Color Fluorescent Set,Office Supplies,Art,7.7,1
Grip Seal Envelopes,Office Supplies,Envelopes,7.072,1
Xerox 20,Office Supplies,Paper,6.48,1
Avery 5,Office Supplies,Labels,5.76,1
Eureka Disposable Bags for Sanitaire Vibra Groomer I Upright Vac,Office Supplies,Appliances,1.624,1


### Which product categories perform best/worst?

In [None]:
category_sales <- orders |>  
    group_by(category) |>  
    summarize(    
        total_sales = sum(sales),    
        n_orders = n()  
        ) |>  
        arrange(desc(total_sales))

category_sales

category,total_sales,n_orders
<fct>,<dbl>,<int>
Technology,4744557,10141
Furniture,4110874,9876
Office Supplies,3787070,31273


### Which product sub-categories perform best/worst?

In [8]:
subcategory_sales <- orders |>
    group_by(category, sub_category) |>
    summarize(    
        total_sales = sum(sales),    
        n_orders = n()  
    ) |>  
    arrange(desc(total_sales))
subcategory_sales

[1m[22m`summarise()` has grouped output by 'category'. You can override using the
`.groups` argument.


category,sub_category,total_sales,n_orders
<fct>,<fct>,<dbl>,<int>
Technology,Phones,1706824.14,3357
Technology,Copiers,1509436.27,2223
Furniture,Chairs,1501681.76,3434
Furniture,Bookcases,1466572.24,2411
Office Supplies,Storage,1127085.86,5059
Office Supplies,Appliances,1011064.3,1755
Technology,Machines,779060.07,1486
Furniture,Tables,757041.92,861
Technology,Accessories,749237.02,3075
Office Supplies,Binders,461911.51,6152


# Revenue Structure & Concentration

### How concentrated are sales among top products/sub-categories?

In [10]:
product_sales <- product_sales |>  arrange(desc(total_sales)) |>  
    mutate(    
        cumulative_sales = cumsum(total_sales),    
        cumulative_share = cumulative_sales / sum(total_sales),    
        product_rank = row_number(),    
        product_share = product_rank / n()  
        )

product_sales |>  
    select(product_rank, product_name, total_sales, cumulative_share, product_share) |>
    slice_head(n = 20)

product_rank,product_name,total_sales,cumulative_share,product_share
<int>,<chr>,<dbl>,<dbl>,<dbl>
1,"Apple Smart Phone, Full Size",86935.78,0.006876469,0.0002633658
2,"Cisco Smart Phone, Full Size",76441.53,0.012922862,0.0005267316
3,"Motorola Smart Phone, Full Size",73156.3,0.018709399,0.0007900974
4,"Nokia Smart Phone, Full Size",71904.56,0.024396925,0.0010534633
5,Canon imageCLASS 2200 Advanced Copier,61599.82,0.029269364,0.0013168291
6,"Hon Executive Leather Armchair, Adjustable",58193.48,0.033872368,0.0015801949
7,"Office Star Executive Leather Armchair, Adjustable",50661.68,0.037879619,0.0018435607
8,"Harbour Creations Executive Leather Armchair, Adjustable",50121.52,0.041844144,0.0021069265
9,"Samsung Smart Phone, Cordless",48653.46,0.045692549,0.0023702923
10,"Nokia Smart Phone, with Caller ID",47877.79,0.049479599,0.0026336582


In [11]:
subcategory_sales <- subcategory_sales |> 
    ungroup() |>
    arrange(desc(total_sales)) |>  
    mutate(    
        cumulative_sales = cumsum(total_sales), 
        cumulative_share = cumulative_sales / sum(total_sales),    
        subcategory_rank = row_number(),    
        subcategory_share = subcategory_rank / n()  
    )
    
subcategory_sales |> select(subcategory_rank, sub_category, total_sales, cumulative_share, subcategory_share)

subcategory_rank,sub_category,total_sales,cumulative_share,subcategory_share
<int>,<fct>,<dbl>,<dbl>,<dbl>
1,Phones,1706824.14,0.1350068,0.05882353
2,Copiers,1509436.27,0.2544006,0.11764706
3,Chairs,1501681.76,0.3731811,0.17647059
4,Bookcases,1466572.24,0.4891844,0.23529412
5,Storage,1127085.86,0.5783349,0.29411765
6,Appliances,1011064.3,0.6583084,0.35294118
7,Machines,779060.07,0.7199307,0.41176471
8,Tables,757041.92,0.7798114,0.47058824
9,Accessories,749237.02,0.8390747,0.52941176
10,Binders,461911.51,0.8756111,0.58823529


### What share of products generate little or no revenue?

In [12]:
low_share <- 0.005

low_revenue_share_summary <- product_sales |>  
    arrange(total_sales) |>  
    mutate(    
        total_sales_all = sum(total_sales),    
        cum_sales = cumsum(total_sales),    
        cum_share = cum_sales / total_sales_all  
    ) |>  
    summarise(    
        total_products = n(),    
        low_revenue_products = sum(cum_share <= low_share),    
        share_products = low_revenue_products / total_products  
    )

low_revenue_share_summary

total_products,low_revenue_products,share_products
<int>,<int>,<dbl>
3797,735,0.1935739


In [13]:
bottom_share <- 0.10

bottom_products_summary <- product_sales |>  
    arrange(total_sales) |>  
    mutate(    
        total_revenue = sum(total_sales),    
        cum_revenue = cumsum(total_sales),    
        cum_share = cum_revenue / total_revenue  
    ) |>  
    summarise(    
        total_products = n(),    
        products_for_10pct_revenue = sum(cum_share <= bottom_share),    
        share_of_products = products_for_10pct_revenue / total_products  
    )

bottom_products_summary

total_products,products_for_10pct_revenue,share_of_products
<int>,<int>,<dbl>
3797,2209,0.5817751


# Volume vs Value

### Are top-selling products high-volume or high-value?

In [14]:
product_volume_value <- orders |>  
    group_by(product_name) |>  
    summarise(    
        total_sales = sum(sales),    
        total_quantity = sum(quantity),    
        avg_price_per_unit = total_sales / total_quantity,    
        n_orders = n(),    
        .groups = "drop"  
    )

top_products <- product_volume_value |>  
    arrange(desc(total_sales)) |>  
    slice_head(n = 20)

top_products

product_name,total_sales,total_quantity,avg_price_per_unit,n_orders
<chr>,<dbl>,<dbl>,<dbl>,<int>
"Apple Smart Phone, Full Size",86935.78,171,508.3964,51
"Cisco Smart Phone, Full Size",76441.53,139,549.9391,38
"Motorola Smart Phone, Full Size",73156.3,134,545.9426,38
"Nokia Smart Phone, Full Size",71904.56,147,489.1466,47
Canon imageCLASS 2200 Advanced Copier,61599.82,20,3079.9912,5
"Hon Executive Leather Armchair, Adjustable",58193.48,169,344.3401,49
"Office Star Executive Leather Armchair, Adjustable",50661.68,141,359.3027,45
"Harbour Creations Executive Leather Armchair, Adjustable",50121.52,142,352.9684,39
"Samsung Smart Phone, Cordless",48653.46,108,450.495,26
"Nokia Smart Phone, with Caller ID",47877.79,96,498.7269,24


### Which products have high order frequency but low total sales?

In [None]:
frequency_threshold <- quantile(product_volume_value$n_orders, 0.75)sales_threshold <- quantile(product_volume_value$total_sales, 0.25)high_freq_low_sales <- product_volume_value |>  filter(    n_orders >= frequency_threshold,    total_sales <= sales_threshold  ) |>  arrange(desc(n_orders))high_freq_low_sales


product_name,total_sales,total_quantity,avg_price_per_unit,n_orders
<chr>,<dbl>,<dbl>,<dbl>,<int>
"Novimex Round Labels, Adjustable",210.9464,67,3.148454,22
Avery Non-Stick Binders,217.316,71,3.060789,20
"Avery Round Labels, Adjustable",258.0144,65,3.969452,20
"Hon Round Labels, Adjustable",250.7088,68,3.686894,20
"Novimex Round Labels, Alphabetical",250.502,63,3.976222,19


# Category Structure

### How diverse is revenue within each category?

In [15]:
frequency_threshold <- quantile(product_volume_value$n_orders, 0.75)

sales_threshold <- quantile(product_volume_value$total_sales, 0.25)

high_freq_low_sales <- product_volume_value |>  
    filter(    
        n_orders >= frequency_threshold,    
        total_sales <= sales_threshold  
    ) |>  
    arrange(desc(n_orders))

high_freq_low_sales

product_name,total_sales,total_quantity,avg_price_per_unit,n_orders
<chr>,<dbl>,<dbl>,<dbl>,<int>
"Novimex Round Labels, Adjustable",210.9464,67,3.148454,22
Avery Non-Stick Binders,217.316,71,3.060789,20
"Avery Round Labels, Adjustable",258.0144,65,3.969452,20
"Hon Round Labels, Adjustable",250.7088,68,3.686894,20
"Novimex Round Labels, Alphabetical",250.502,63,3.976222,19


### Are sub-categories performing consistently within categories?

In [16]:
subcategory_sales <- orders |>  
    group_by(category, sub_category) |>  
    summarise(    
        total_sales = sum(sales),    
        n_products = n_distinct(product_name),    
        .groups = "drop"  
    )

subcategory_consistency <- subcategory_sales |>  
    group_by(category) |>  
    summarise(    
        n_subcategories = n(),    
        mean_sales = mean(total_sales),    
        sd_sales = sd(total_sales),    
        cv_sales = sd_sales / mean_sales,    
        max_to_min_ratio = max(total_sales) / min(total_sales),    
        .groups = "drop"  
    ) |>  
    arrange(desc(cv_sales))

subcategory_consistency

category,n_subcategories,mean_sales,sd_sales,cv_sales,max_to_min_ratio
<fct>,<int>,<dbl>,<dbl>,<dbl>,<dbl>
Office Supplies,9,420785.6,389090.8,0.924677,15.35455
Furniture,4,1027718.5,548587.2,0.5337913,3.894623
Technology,4,1186139.4,494041.4,0.4165121,2.278083


# Stability & Consistency

### Are top products consistently strong across regions?

In [17]:
top_products <- product_sales |>  
    arrange(desc(total_sales)) |>  
    slice_head(n = 20) |>  
    pull(product_name)

product_region_sales <- orders |>  
    filter(product_name %in% top_products) |>  
    group_by(product_name, region) |>  
    summarise(    
        regional_sales = sum(sales),    
        .groups = "drop"  
    )

product_region_consistency <- product_region_sales |>  
    group_by(product_name) |>  
    summarise(    
        n_regions = n(),    
        mean_sales = mean(regional_sales),    
        sd_sales = sd(regional_sales),    
        cv_sales = sd_sales / mean_sales,    
        max_to_min_ratio = max(regional_sales) / min(regional_sales),    
        .groups = "drop"  
    ) |>  
    arrange(cv_sales)

product_region_consistency

product_name,n_regions,mean_sales,sd_sales,cv_sales,max_to_min_ratio
<chr>,<int>,<dbl>,<dbl>,<dbl>,<dbl>
Canon imageCLASS 2200 Advanced Copier,3,20533.275,8467.758,0.412392,2.15
"Harbour Creations Executive Leather Armchair, Adjustable",9,5569.057,2455.356,0.4408925,4.032839
"Motorola Smart Phone, Cordless",9,4325.671,2208.118,0.5104682,9.833333
"Hoover Stove, White",9,3649.178,1962.395,0.5377635,5.666667
"Cisco Smart Phone, Full Size",10,7644.153,4162.284,0.5445055,9.440052
"Apple Smart Phone, Full Size",10,8693.578,4766.375,0.5482639,5.701215
"Sauder Classic Bookcase, Traditional",10,3910.83,2451.227,0.6267793,13.846154
"Eldon File Cart, Single Width",9,3820.859,2420.586,0.6335189,33.369358
"Motorola Smart Phone, Full Size",11,6650.573,4358.91,0.6554187,18.215084
"Cisco Smart Phone, Cordless",7,5860.218,4034.921,0.6885274,8.883333


### Do products perform consistently across customer segments?

In [18]:
product_segment_sales <- orders |>  
    filter(product_name %in% top_products) |>  
    group_by(product_name, segment) |>  
    summarise(    
        segment_sales = sum(sales),    
        .groups = "drop"  
    )

product_segment_consistency <- product_segment_sales |>  
    group_by(product_name) |>  
    summarise(    
        n_segments = n(),    
        mean_sales = mean(segment_sales),    
        sd_sales = sd(segment_sales),    
        cv_sales = sd_sales / mean_sales,    
        max_to_min_ratio = max(segment_sales) / min(segment_sales),    
        .groups = "drop"  
    ) |>  
    arrange(cv_sales)

product_segment_consistency

product_name,n_segments,mean_sales,sd_sales,cv_sales,max_to_min_ratio
<chr>,<int>,<dbl>,<dbl>,<dbl>,<dbl>
"Sauder Classic Bookcase, Traditional",3,13036.1,3244.376,0.2488763,1.671254
"Apple Smart Phone, Full Size",3,28978.59,8941.616,0.3085594,1.754631
"Office Star Executive Leather Armchair, Adjustable",3,16887.23,5988.987,0.354646,1.839752
"Novimex Executive Leather Armchair, Adjustable",3,13528.38,4891.398,0.3615658,2.167203
"Hoover Stove, White",3,10947.53,4140.998,0.3782585,1.977825
"Motorola Smart Phone, Full Size",3,24385.43,10536.642,0.4320875,2.607857
"Motorola Smart Phone, Cordless",3,12977.01,5855.518,0.4512223,2.42
"Eldon File Cart, Single Width",3,11462.58,5198.821,0.4535473,2.245288
Canon imageCLASS 2200 Advanced Copier,3,20533.27,11163.45,0.5436761,2.9375
"Harbour Creations Executive Leather Armchair, Adjustable",3,16707.17,9450.878,0.5656779,3.840565


# Underperformance Diagnostics

### Are underperforming products concentrated in specific categories/sub-categories?

In [19]:
sales_threshold <- quantile(product_sales$total_sales, 0.1)

underperforming_products <- product_sales |>  
    filter(total_sales <= sales_threshold)

underperforming_by_category <- orders |>  
    filter(product_name %in% underperforming_products$product_name) |>  
    group_by(category) |>  
    summarise(    
        total_sales = sum(sales),    
        .groups = "drop"  
    )

underperforming_by_category

underperforming_by_subcategory <- orders |>  
    filter(product_name %in% underperforming_products$product_name) |>  
    group_by(category, sub_category) |>  
    summarise(    
        total_sales = sum(sales),    
        .groups = "drop"  
    ) |>  
    arrange(desc(total_sales))

underperforming_by_subcategory

category,total_sales
<fct>,<dbl>
Furniture,1896.55
Office Supplies,21463.592
Technology,1298.158


category,sub_category,total_sales
<fct>,<fct>,<dbl>
Office Supplies,Paper,6608.604
Office Supplies,Art,3999.076
Office Supplies,Binders,2773.146
Office Supplies,Envelopes,2491.514
Furniture,Furnishings,1806.624
Office Supplies,Fasteners,1718.434
Office Supplies,Labels,1536.926
Office Supplies,Appliances,953.172
Office Supplies,Supplies,764.432
Office Supplies,Storage,618.288


### Are there products with high sales but unusually high return rates?

In [20]:
orders_with_returns <- orders |>  
    inner_join(returns, by = "order_id", relationship = "many-to-many") |>  
    select(order_id, product_name, category, sub_category, sales)

product_returns <- orders_with_returns |>  
    group_by(product_name) |>  
    summarise(    
        total_returned_sales = sum(sales),    
        n_returns = n(),    
        .groups = "drop"  
    ) |>  
    arrange(desc(total_returned_sales))

high_sales_threshold <- quantile(product_returns$total_returned_sales, 0.75)

high_return_threshold <- quantile(product_returns$n_returns, 0.75)

high_sales_high_returns <- product_returns |>  
    filter(    
        total_returned_sales >= high_sales_threshold,    
        n_returns >= high_return_threshold  
    ) |>  
    arrange(desc(n_returns))

high_sales_high_returns

product_name,total_returned_sales,n_returns
<chr>,<dbl>,<int>
Staples,684.6100,17
"HP Copy Machine, Color",6980.3586,8
"Samsung Smart Phone, VoIP",17220.8960,6
"KitchenAid Refrigerator, White",5765.8368,5
"Samsung Audio Dock, Cordless",2613.0720,5
"Nokia Signal Booster, Cordless",2536.6345,5
"Hon Rocking Chair, Red",1369.3330,5
"Novimex Steel Folding Chair, Set of Two",1161.6000,5
"Office Star Steel Folding Chair, Red",1135.0641,5
"SanDisk Numeric Keypad, Bluetooth",812.8500,5


# Profitability Overview

### Which products generate the most/least profit?

In [21]:
product_profit <- orders |>  
    group_by(product_name) |>  
    summarise(    
        total_profit = sum(profit),    
        total_sales = sum(sales),    
        n_orders = n(),    
        .groups = "drop"  
    ) |>  
    arrange(desc(total_profit))

product_profit |> 
    slice_head(n = 10)

product_profit |> 
    slice_tail(n = 10)

product_name,total_profit,total_sales,n_orders
<chr>,<dbl>,<dbl>,<int>
Canon imageCLASS 2200 Advanced Copier,25199.928,61599.82,5
"Cisco Smart Phone, Full Size",17238.521,76441.53,38
"Motorola Smart Phone, Full Size",17027.113,73156.3,38
"Hoover Stove, Red",11807.969,31663.78,15
"Sauder Classic Bookcase, Traditional",10672.073,39108.3,29
"Harbour Creations Executive Leather Armchair, Adjustable",10427.326,50121.52,39
"Nokia Smart Phone, Full Size",9938.195,71904.56,47
"Cisco Smart Phone, with Caller ID",9786.641,43127.5,27
"Nokia Smart Phone, with Caller ID",9465.326,47877.79,24
"Belkin Router, USB",8955.018,23470.41,32


product_name,total_profit,total_sales,n_orders
<chr>,<dbl>,<dbl>,<int>
"Lesro Training Table, Rectangular",-2581.283,2711.647,5
"Bevis Wood Table, with Bottom Storage",-2782.588,11134.662,10
Chromcraft Bull-Nose Wood Oval Conference Tables & Bases,-2876.116,9917.64,5
"Rogers Lockers, Blue",-2893.491,28214.589,42
"Bevis Computer Table, Fully Assembled",-3509.564,11177.896,13
"Bevis Round Table, Adjustable Height",-3649.894,5654.796,5
Cubify CubeX 3D Printer Triple Head Print,-3839.99,7999.98,1
"Motorola Smart Phone, Cordless",-4447.038,38931.042,23
Lexmark MX611dhe Monochrome Laser Printer,-4589.973,16829.901,4
Cubify CubeX 3D Printer Double Head Print,-8879.97,11099.963,3


### Which products generate negative profit?

In [23]:
negative_product_profit <- orders |>  
    group_by(product_name, sub_category, category) |>  
    summarise(    
        total_profit = sum(profit),    
        total_sales = sum(sales),    
        n_orders = n(),    
        .groups = "drop"  
    ) |>  
    filter(    
        total_profit <= 0  
    ) |>  
    arrange(desc(total_profit))

negative_product_profit

product_name,sub_category,category,total_profit,total_sales,n_orders
<chr>,<fct>,<fct>,<dbl>,<dbl>,<int>
"Alliance Big Bands Rubber Bands, 12/Pack",Fasteners,Office Supplies,0.00000e+00,29.7000,4
Belkin OmniView SE Rackmount Kit,Storage,Office Supplies,0.00000e+00,248.3600,2
Global Super Steno Chair,Chairs,Furniture,-1.98952e-13,2389.9020,7
Premier Electric Letter Opener,Supplies,Office Supplies,-1.98952e-13,2641.6080,8
"Stiletto Ruler, Serrated",Supplies,Office Supplies,-1.37400e-01,607.6026,14
"Acco PRESSTEX Data Binder with Storage Hooks, Dark Blue, 9 1/2"" X 11""",Binders,Office Supplies,-1.61400e-01,62.9460,4
Rubber Band Ball,Fasteners,Office Supplies,-2.99200e-01,58.3440,4
"Brites Rubber Bands, 1 1/2 oz. Box",Fasteners,Office Supplies,-5.14800e-01,13.0680,3
Eldon Gobal File Keepers,Storage,Office Supplies,-6.05600e-01,348.2200,6
"Hon Color Coded Labels, Adjustable",Labels,Office Supplies,-6.14700e-01,479.4153,20


In [24]:
negative_subcategory_profit <- negative_product_profit |>
    group_by(sub_category) |>
    summarise(
        subcategory_profit = sum(total_profit),
        subcategory_sales = sum(total_sales),
        total_orders = sum(n_orders),
        n_products = n(),
        .groups = "drop"
    ) |>
    arrange(desc(subcategory_profit))

negative_subcategory_profit

sub_category,subcategory_profit,subcategory_sales,total_orders,n_products
<fct>,<dbl>,<dbl>,<int>,<int>
Labels,-369.8314,5662.369,241,13
Fasteners,-806.0523,14325.482,404,25
Paper,-1163.7703,14396.59,217,12
Envelopes,-1296.9348,19005.685,295,16
Art,-3418.5826,39255.227,417,13
Supplies,-3886.2656,81808.482,489,34
Binders,-5148.3845,53532.806,673,59
Accessories,-6236.6255,57933.327,343,24
Copiers,-7355.9894,159641.041,262,14
Furnishings,-8129.9257,84674.65,572,48


### How does profitability compare across product categories and sub-categories?

In [25]:
category_profit <- orders |>  
    group_by(category) |>  
    summarise(    
        total_profit = sum(profit),    
        total_sales = sum(sales),    
        profit_margin = total_profit / total_sales,    
        .groups = "drop"  
    ) |>  
    arrange(desc(total_profit))

category_profit

category,total_profit,total_sales,profit_margin
<fct>,<dbl>,<dbl>,<dbl>
Technology,663778.7,4744557,0.13990319
Office Supplies,518473.8,3787070,0.13690632
Furniture,286782.3,4110874,0.06976187


In [26]:
sub_category_sales_profit <- orders |>  
    group_by(sub_category) |>  
    summarise(    
        total_sales = sum(sales),    
        total_profit = sum(profit),    
        profit_margin = total_profit / total_sales,    
        .groups = "drop"  
    ) |>  
    arrange(profit_margin)

sub_category_sales_profit

sub_category,total_sales,total_profit,profit_margin
<fct>,<dbl>,<dbl>,<dbl>
Tables,757041.92,-64083.39,-0.08464972
Machines,779060.07,58867.87,0.07556269
Supplies,243074.22,22583.26,0.09290686
Chairs,1501681.76,141973.8,0.0945432
Storage,1127085.86,108461.49,0.09623179
Bookcases,1466572.24,161924.42,0.11041012
Furnishings,385578.26,46967.43,0.12181036
Phones,1706824.14,216717.01,0.12697091
Fasteners,83242.32,11525.42,0.13845631
Appliances,1011064.3,141680.59,0.14013015


# Lifecycle & Time

### Do top products show declining or improving performance over time?

In [27]:
orders_with_period <- orders |>  
    add_time_period("order_date", period = "year")

top_products <- product_sales |>  
    arrange(desc(total_sales)) |>  
    slice_head(n = 10) |>  
    pull(product_name)

product_time_sales <- orders_with_period |>  
    filter(product_name %in% top_products) |>  
    group_by(product_name, period) |>  
    summarise(    
        total_sales = sum(sales, na.rm = TRUE),    
        .groups = "drop"  
    )

product_time_sales

product_name,period,total_sales
<chr>,<date>,<dbl>
"Apple Smart Phone, Full Size",2011-01-01,14986.246
"Apple Smart Phone, Full Size",2012-01-01,19541.873
"Apple Smart Phone, Full Size",2013-01-01,21120.408
"Apple Smart Phone, Full Size",2014-01-01,31287.252
Canon imageCLASS 2200 Advanced Copier,2013-01-01,25899.926
Canon imageCLASS 2200 Advanced Copier,2014-01-01,35699.898
"Cisco Smart Phone, Full Size",2011-01-01,6662.65
"Cisco Smart Phone, Full Size",2012-01-01,11793.17
"Cisco Smart Phone, Full Size",2013-01-01,26737.133
"Cisco Smart Phone, Full Size",2014-01-01,31248.578
