In [1]:
# Load required libraries
library(tidyverse)
library(janitor)
library(dplyr)
library(ggplot2)
library(skimr)
library(purrr)
library(lubridate)

# Source helper scripts
source("../../R/apply_factors.R")
source("../../R/analysis_helpers.R")
source("../../R/temporal_helpers.R")

# Load data
tables <- list(
  Orders  = readr::read_csv("../../data/processed/Orders.csv"),
  Returns = readr::read_csv("../../data/processed/Returns.csv"),
  People  = readr::read_csv("../../data/processed/People.csv")
)

# Apply factor transformations
tables <- apply_factors(tables)

# Extract tables
orders  <- tables$Orders
returns <- tables$Returns
people  <- tables$People

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.6
[32m✔[39m [34mforcats  [39m 1.0.1     [32m✔[39m [34mstringr  [39m 1.6.0
[32m✔[39m [34mggplot2  [39m 4.0.1     [32m✔[39m [34mtibble   [39m 3.3.0
[32m✔[39m [34mlubridate[39m 1.9.4     [32m✔[39m [34mtidyr    [39m 1.3.1
[32m✔[39m [34mpurrr    [39m 1.2.0     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors

Attaching package: ‘janitor’


The following objects are masked from ‘package:stats’:

    chisq.test, fisher.test


[1mRows: [22m[34m51290[39m [1mColumns: [22m[

# General Geographic Performance

### Which regions generate the highest and lowest total sales?

In [2]:
region_sales <- orders |>  
    group_by(region) |>  
    summarise(    
        total_sales = sum(sales),    
        n_orders = n()  
    ) |>  
    arrange(desc(total_sales))

region_sales

region,total_sales,n_orders
<fct>,<dbl>,<int>
Central,2822302.52,11117
South,1600907.04,6645
North,1248165.6,4785
Oceania,1100184.61,3487
Southeast Asia,884423.17,3129
North Asia,848309.78,2338
EMEA,806161.31,5029
Africa,783773.21,4587
Central Asia,752826.57,2048
West,725457.82,3203


### How does performance differ across markets?

In [3]:
market_sales <- orders |>  
    group_by(market) |>  
    summarise(    
        total_sales = sum(sales),    
        mean_order_value = mean(sales),    
        n_orders = n()  
    ) |>  
    arrange(desc(total_sales))

market_sales

market,total_sales,mean_order_value,n_orders
<fct>,<dbl>,<dbl>,<int>
APAC,3585744.13,325.9175,11002
EU,2938089.06,293.8089,10000
US,2297200.86,229.858,9994
LATAM,2164605.17,210.2783,10294
EMEA,806161.31,160.3025,5029
Africa,783773.21,170.8684,4587
Canada,66928.17,174.2921,384


### Which countries contribute the most within each market?

In [4]:
country_market_sales <- orders |>  
    group_by(market, country) |>  
    summarise(    
        total_sales = sum(sales, na.rm = TRUE),    
        n_orders = n(),    
        .groups = "drop"  
    ) |>  
    filter(market == "US") |>  
    # "APAC" "LATAM" "EU" "EMEA" "Africa" "Canada" are the other options
    arrange(desc(total_sales))

country_market_sales |> 
    slice_head(n = 10)

country_market_sales |> 
    slice_tail(n = 10)

market,country,total_sales,n_orders
<fct>,<fct>,<dbl>,<int>
US,United States,2297201,9994


market,country,total_sales,n_orders
<fct>,<fct>,<dbl>,<int>
US,United States,2297201,9994


# Geographic Revenue Structure & Concentration

### How is revenue concentrated within regions?

In [5]:
region_country_sales <- orders |>  
    group_by(region, country) |>  
    summarise(    
        total_sales = sum(sales, na.rm = TRUE),    
        .groups = "drop"  
    )

region_concentration <- region_country_sales |>  
    group_by(region) |>  
    arrange(desc(total_sales), .by_group = TRUE) |>  
    mutate(    
        cumulative_sales = cumsum(total_sales),    
        cumulative_share = cumulative_sales / sum(total_sales),    
        country_rank = row_number()  
    ) |>  
    ungroup()

In [6]:
region_country_tables <- region_concentration |>  
    arrange(country_rank) |>  
    group_split(region)

names(region_country_tables) <- region_concentration |>  
    distinct(region) |>  
    pull(region)

region_country_tables

region,country,total_sales,cumulative_sales,cumulative_share,country_rank
<fct>,<fct>,<dbl>,<dbl>,<dbl>,<int>
Africa,South Africa,95292.27,95292.27,0.1215814,1
Africa,Democratic Republic of the Congo,87416.58,182708.85,0.2331144,2
Africa,Morocco,87077.94,269786.79,0.3442154,3
Africa,Egypt,84139.32,353926.11,0.451567,4
Africa,Nigeria,54350.352,408276.46,0.5209115,5
Africa,Algeria,36091.59,444368.05,0.56696,6
Africa,Senegal,28848.9,473216.95,0.6037677,7
Africa,Tanzania,26816.139,500033.09,0.6379819,8
Africa,Zambia,26035.5,526068.59,0.6712,9
Africa,Cote d'Ivoire,25618.17,551686.76,0.7038857,10

region,country,total_sales,cumulative_sales,cumulative_share,country_rank
<fct>,<fct>,<dbl>,<dbl>,<dbl>,<int>
Canada,Canada,66928.17,66928.17,1,1

region,country,total_sales,cumulative_sales,cumulative_share,country_rank
<fct>,<fct>,<dbl>,<dbl>,<dbl>,<int>
Caribbean,Cuba,158854.935,158854.9,0.4898684,1
Caribbean,Dominican Republic,126140.581,284995.5,0.878854,2
Caribbean,Haiti,11038.329,296033.8,0.9128934,3
Caribbean,Barbados,7174.274,303208.1,0.935017,4
Caribbean,Trinidad and Tobago,6881.164,310089.3,0.9562368,5
Caribbean,Jamaica,6761.786,316851.1,0.9770884,6
Caribbean,Martinique,5968.151,322819.2,0.9954927,7
Caribbean,Guadeloupe,1461.64,324280.9,1.0,8

region,country,total_sales,cumulative_sales,cumulative_share,country_rank
<fct>,<fct>,<dbl>,<dbl>,<dbl>,<int>
Central,France,858931.08,858931.1,0.304337,1
Central,Germany,628840.03,1487771.1,0.527148,2
Central,United States,501239.89,1989011.0,0.7047476,3
Central,El Salvador,177554.9,2166565.9,0.767659,4
Central,Nicaragua,149687.06,2316253.0,0.8206962,5
Central,Guatemala,131602.47,2447855.4,0.8673257,6
Central,Honduras,90125.65,2537981.1,0.8992591,7
Central,Austria,81162.0,2619143.1,0.9280164,8
Central,Netherlands,77514.95,2696658.0,0.9554816,9
Central,Panama,51539.93,2748198.0,0.9737432,10

region,country,total_sales,cumulative_sales,cumulative_share,country_rank
<fct>,<fct>,<dbl>,<dbl>,<dbl>,<int>
Central Asia,India,589650.1,589650.1,0.7832483,1
Central Asia,Bangladesh,78256.47,667906.6,0.8871985,2
Central Asia,Pakistan,58872.61,726779.2,0.9654006,3
Central Asia,Afghanistan,21673.32,748452.5,0.9941898,4
Central Asia,Nepal,3522.24,751974.7,0.9988685,5
Central Asia,Sri Lanka,851.82,752826.6,1.0,6

region,country,total_sales,cumulative_sales,cumulative_share,country_rank
<fct>,<fct>,<dbl>,<dbl>,<dbl>,<int>
East,United States,678781.2,678781.2,1,1

region,country,total_sales,cumulative_sales,cumulative_share,country_rank
<fct>,<fct>,<dbl>,<dbl>,<dbl>,<int>
EMEA,Iran,113746.11,113746.1,0.141096,1
EMEA,Turkey,108507.948,222254.1,0.2756943,2
EMEA,Ukraine,86857.17,309111.2,0.3834359,3
EMEA,Russia,82913.88,392025.1,0.4862862,4
EMEA,Saudi Arabia,82012.2,474037.3,0.5880179,5
EMEA,Iraq,70714.8,544752.1,0.6757359,6
EMEA,Poland,44228.85,588981.0,0.7305994,7
EMEA,Romania,37256.58,626237.5,0.7768142,8
EMEA,Israel,19294.08,645531.6,0.8007475,9
EMEA,Bulgaria,15557.64,661089.3,0.8200459,10

region,country,total_sales,cumulative_sales,cumulative_share,country_rank
<fct>,<fct>,<dbl>,<dbl>,<dbl>,<int>
North,Mexico,622590.618,622590.6,0.4988045,1
North,United Kingdom,528576.3,1151166.9,0.922287,2
North,Sweden,30491.403,1181658.3,0.946716,3
North,Finland,20704.35,1202362.7,0.9633038,4
North,Norway,20525.37,1222888.0,0.9797482,5
North,Ireland,16639.509,1239527.5,0.9930794,6
North,Denmark,8638.053,1248165.6,1.0,7

region,country,total_sales,cumulative_sales,cumulative_share,country_rank
<fct>,<fct>,<dbl>,<dbl>,<dbl>,<int>
North Asia,China,700562.03,700562.0,0.8258328,1
North Asia,Japan,100787.52,801349.5,0.9446426,2
North Asia,South Korea,33125.38,834474.9,0.9836913,3
North Asia,Taiwan,7647.63,842122.6,0.9927064,4
North Asia,Hong Kong,6147.0,848269.6,0.9999526,5
North Asia,Mongolia,40.23,848309.8,1.0,6

region,country,total_sales,cumulative_sales,cumulative_share,country_rank
<fct>,<fct>,<dbl>,<dbl>,<dbl>,<int>
Oceania,Australia,925235.853,925235.9,0.8409824,1
Oceania,New Zealand,172020.624,1097256.5,0.9973385,2
Oceania,Papua New Guinea,2928.135,1100184.6,1.0,3

region,country,total_sales,cumulative_sales,cumulative_share,country_rank
<fct>,<fct>,<dbl>,<dbl>,<dbl>,<int>
South,United States,391721.905,391721.9,0.2446875,1
South,Brazil,361106.419,752828.3,0.4702511,2
South,Italy,289709.658,1042538.0,0.6512171,3
South,Spain,287146.68,1329684.7,0.8305821,4
South,Colombia,81502.53,1411187.2,0.8814923,5
South,Argentina,57511.783,1468699.0,0.9174168,6
South,Chile,35447.071,1504146.0,0.9395586,7
South,Venezuela,26587.83,1530733.9,0.9561666,8
South,Peru,17833.184,1548567.1,0.967306,9
South,Portugal,15105.12,1563672.2,0.9767414,10

region,country,total_sales,cumulative_sales,cumulative_share,country_rank
<fct>,<fct>,<dbl>,<dbl>,<dbl>,<int>
Southeast Asia,Indonesia,404887.5,404887.5,0.4577984,1
Southeast Asia,Philippines,183420.16,588307.7,0.665188,2
Southeast Asia,Thailand,77051.96,665359.6,0.7523091,3
Southeast Asia,Vietnam,65800.2,731159.8,0.8267081,4
Southeast Asia,Malaysia,61362.21,792522.0,0.8960892,5
Southeast Asia,Singapore,40286.25,832808.3,0.9416401,6
Southeast Asia,Myanmar (Burma),34138.87,866947.1,0.9802402,7
Southeast Asia,Cambodia,17476.02,884423.2,1.0,8

region,country,total_sales,cumulative_sales,cumulative_share,country_rank
<fct>,<fct>,<dbl>,<dbl>,<dbl>,<int>
West,United States,725457.8,725457.8,1,1


### What is the diversity in spread across countries in each region?

In [7]:
region_diversity <- region_country_sales |>  
    group_by(region) |>  
    summarise(    
        n_countries = n(),    
        regional_sales = sum(total_sales),    
        mean_country_sales = mean(total_sales),    
        sd_country_sales = sd(total_sales),    
        cv_country_sales = sd_country_sales / mean_country_sales,    
        .groups = "drop"  
    ) |>  
    arrange(cv_country_sales)

region_diversity

region,n_countries,regional_sales,mean_country_sales,sd_country_sales,cv_country_sales
<fct>,<int>,<dbl>,<dbl>,<dbl>,<dbl>
Central,12,2822302.52,235191.88,272751.86,1.159699
Southeast Asia,8,884423.17,110552.9,129243.48,1.169065
South,14,1600907.04,114350.5,146769.71,1.283507
Oceania,3,1100184.61,366728.2,491015.44,1.338908
Africa,45,783773.21,17417.18,25337.04,1.454715
North,7,1248165.6,178309.37,272819.7,1.530036
Caribbean,8,324280.86,40535.11,63589.89,1.568761
EMEA,40,806161.31,20154.03,31893.25,1.582475
Central Asia,6,752826.57,125471.09,229478.63,1.828936
North Asia,6,848309.78,141384.96,276478.06,1.955498


# Geographic Volume vs Value

### Do regions differ in average order value versus order volume?

In [8]:
region_volume_value <- orders |>  
    group_by(region) |>  
    summarise(    
        total_sales = sum(sales, na.rm = TRUE),    
        n_orders = n(),    
        avg_order_value = mean(sales, na.rm = TRUE),    
        .groups = "drop"  
    ) |>  
    arrange(desc(total_sales))

region_volume_value

region,total_sales,n_orders,avg_order_value
<fct>,<dbl>,<int>,<dbl>
Central,2822302.52,11117,253.8727
South,1600907.04,6645,240.919
North,1248165.6,4785,260.8497
Oceania,1100184.61,3487,315.5104
Southeast Asia,884423.17,3129,282.6536
North Asia,848309.78,2338,362.8357
EMEA,806161.31,5029,160.3025
Africa,783773.21,4587,170.8684
Central Asia,752826.57,2048,367.5911
West,725457.82,3203,226.4932


### Are there regions with many orders but low total sales?

In [9]:
region_volume_value |> arrange(n_orders, avg_order_value)

region,total_sales,n_orders,avg_order_value
<fct>,<dbl>,<int>,<dbl>
Canada,66928.17,384,174.2921
Caribbean,324280.86,1690,191.8822
Central Asia,752826.57,2048,367.5911
North Asia,848309.78,2338,362.8357
East,678781.24,2848,238.3361
Southeast Asia,884423.17,3129,282.6536
West,725457.82,3203,226.4932
Oceania,1100184.61,3487,315.5104
Africa,783773.21,4587,170.8684
North,1248165.6,4785,260.8497


In [10]:
region_volume_value |>  
    mutate(    
        high_volume_low_value = n_orders > median(n_orders) &      
            avg_order_value < median(avg_order_value)  
    ) |>  
    filter(high_volume_low_value)

region,total_sales,n_orders,avg_order_value,high_volume_low_value
<fct>,<dbl>,<int>,<dbl>,<lgl>
EMEA,806161.3,5029,160.3025,True
Africa,783773.2,4587,170.8684,True


# Category Composition

### How does product category composition vary by region?

In [11]:
region_category_sales <- orders |>  
    group_by(region, category) |>  
    summarise(    
        total_sales = sum(sales, na.rm = TRUE),    
        .groups = "drop"  
    )

region_category_share <- region_category_sales |>  
    group_by(region) |>  
    mutate(    
        category_share = total_sales / sum(total_sales)  
    ) |>  
    # filter(category == "Furniture") |>  
    ungroup()

region_category_share

region,category,total_sales,category_share
<fct>,<fct>,<dbl>,<dbl>
Africa,Furniture,194650.64,0.2483507
Africa,Office Supplies,266755.53,0.3403479
Africa,Technology,322367.04,0.4113014
Canada,Furniture,10595.28,0.1583082
Canada,Office Supplies,30034.08,0.448751
Canada,Technology,26298.81,0.3929408
Caribbean,Furniture,118372.4,0.3650305
Caribbean,Office Supplies,89575.42,0.2762279
Caribbean,Technology,116333.05,0.3587416
Central,Furniture,860417.58,0.3048637


### Are certain regions specialized in specific product categories?

In [12]:
region_specialization <- region_category_share |>  
    arrange(region, desc(category_share))

region_specialization |>  
    group_by(region) |>  
    slice_max(category_share, n = 1)

region,category,total_sales,category_share
<fct>,<fct>,<dbl>,<dbl>
Africa,Technology,322367.04,0.4113014
Canada,Office Supplies,30034.08,0.448751
Caribbean,Furniture,118372.4,0.3650305
Central,Technology,1038449.66,0.3679441
Central Asia,Technology,305697.32,0.4060661
East,Technology,264973.98,0.3903673
EMEA,Technology,300854.58,0.373194
North,Technology,495802.23,0.3972247
North Asia,Furniture,335716.46,0.3957475
Oceania,Furniture,410468.0,0.3730901


In [13]:
region_specialization |>  
    group_by(region) |>  
    summarise(    
        top_category_share = max(category_share),    
        .groups = "drop"  
    ) |>  
    arrange(desc(top_category_share))

region,top_category_share
<fct>,<dbl>
Canada,0.448751
Africa,0.4113014
Central Asia,0.4060661
North,0.3972247
North Asia,0.3957475
East,0.3903673
EMEA,0.373194
Oceania,0.3730901
Southeast Asia,0.3728434
Central,0.3679441


# Stability & Consistency

### Are regional sales patterns consistent across customer segments?

In [14]:
region_segment_sales <- orders |>  
    group_by(region, segment) |>  
    summarise(    
        total_sales = sum(sales, na.rm = TRUE),    
        n_orders = n(),    
        .groups = "drop"  
    )

region_segment_share <- region_segment_sales |>  
    group_by(region) |>  
    mutate(    
        segment_share = total_sales / sum(total_sales)  
    ) |>  
    # filter (segment == "Home Office") |>  
    ungroup()

region_segment_share

region,segment,total_sales,n_orders,segment_share
<fct>,<fct>,<dbl>,<int>,<dbl>
Africa,Consumer,423766.81,2381,0.5406753
Africa,Corporate,204938.95,1312,0.2614774
Africa,Home Office,155067.45,894,0.1978473
Canada,Consumer,35719.11,202,0.5336932
Canada,Corporate,19313.73,110,0.288574
Canada,Home Office,11895.33,72,0.1777328
Caribbean,Consumer,162349.21,828,0.5006438
Caribbean,Corporate,104537.86,507,0.3223683
Caribbean,Home Office,57393.8,355,0.1769879
Central,Consumer,1479981.17,5782,0.5243879


### Do regions show stable performance across time, or high volatility?

In [15]:
orders_yearly <- orders |>  
    add_time_period("order_date", period = "year")

region_time_sales <- orders_yearly |>  
    group_by(region, period) |>  
    summarise(    
        total_sales = sum(sales, na.rm = TRUE),    
        .groups = "drop"  
    )

region_volatility <- region_time_sales |>  
    group_by(region) |>  
    summarise(    
        mean_sales = mean(total_sales),    
        sd_sales = sd(total_sales),    
        cv_sales = sd_sales / mean_sales,    
        .groups = "drop"  
    ) |>  
    arrange(cv_sales)

region_volatility

region,mean_sales,sd_sales,cv_sales
<fct>,<dbl>,<dbl>,<dbl>
East,169695.31,35936.33,0.2117697
North Asia,212077.45,49137.293,0.231695
Central,705575.63,186168.595,0.2638535
West,181364.46,50544.938,0.2786926
North,312041.4,88914.168,0.2849435
South,400226.76,117787.467,0.2943018
Caribbean,81070.22,24036.372,0.2964883
Central Asia,188206.64,56321.395,0.299253
Oceania,275046.15,83062.357,0.3019943
Southeast Asia,221105.79,74567.09,0.3372462


# Underperformance Diagnostics

### Are underperforming regions associated with markets/countries?

In [16]:
region_sales <- orders |>  
    group_by(region) |>  
    summarise(    
        total_sales = sum(sales),    
        n_orders = n(),    
        avd_order_value = mean(sales),    
        .groups = "drop"  
    )

sales_threshold <- quantile(region_sales$total_sales, 0.25)

underperforming_regions <- region_sales |>  
    filter(total_sales < sales_threshold)

underperforming_regions

region,total_sales,n_orders,avd_order_value
<fct>,<dbl>,<int>,<dbl>
Canada,66928.17,384,174.2921
Caribbean,324280.86,1690,191.8822
East,678781.24,2848,238.3361


In [17]:
orders |>  
    filter(region %in% underperforming_regions$region) |>  
    group_by(region, market, country) |>  
    summarise(    
        total_sales = sum(sales),    
        n_orders = n(),    
        avd_order_value = mean(sales),    
        .groups = "drop"  
    ) |>  
    arrange(region, desc(total_sales))

region,market,country,total_sales,n_orders,avd_order_value
<fct>,<fct>,<fct>,<dbl>,<int>,<dbl>
Canada,Canada,Canada,66928.17,384,174.2921
Caribbean,LATAM,Cuba,158854.935,724,219.4129
Caribbean,LATAM,Dominican Republic,126140.581,742,170.0008
Caribbean,LATAM,Haiti,11038.329,104,106.1378
Caribbean,LATAM,Barbados,7174.274,26,275.9336
Caribbean,LATAM,Trinidad and Tobago,6881.164,30,229.3721
Caribbean,LATAM,Jamaica,6761.786,31,218.1221
Caribbean,LATAM,Martinique,5968.151,25,238.726
Caribbean,LATAM,Guadeloupe,1461.64,8,182.705
East,US,United States,678781.24,2848,238.3361


### Do regions with lower sales show lower order counts or values?

In [18]:
region_volume_value <- orders |>  
    group_by(region) |>  
    summarise(    
        total_sales = sum(sales, na.rm = TRUE),    
        n_orders = n(),    
        avg_order_value = mean(sales, na.rm = TRUE),    
        .groups = "drop"  
    )

In [19]:
region_volume_value |>  
    mutate(    
        sales_per_order = total_sales / n_orders,    
        performance_group = ifelse(      
            total_sales <= sales_threshold,      
            "Underperforming",      
            "Other"    
        )  
    ) |>  
    arrange(performance_group, sales_per_order)

region,total_sales,n_orders,avg_order_value,sales_per_order,performance_group
<fct>,<dbl>,<int>,<dbl>,<dbl>,<chr>
EMEA,806161.31,5029,160.3025,160.3025,Other
Africa,783773.21,4587,170.8684,170.8684,Other
South,1600907.04,6645,240.919,240.919,Other
Central,2822302.52,11117,253.8727,253.8727,Other
North,1248165.6,4785,260.8497,260.8497,Other
Southeast Asia,884423.17,3129,282.6536,282.6536,Other
Oceania,1100184.61,3487,315.5104,315.5104,Other
North Asia,848309.78,2338,362.8357,362.8357,Other
Central Asia,752826.57,2048,367.5911,367.5911,Other
Canada,66928.17,384,174.2921,174.2921,Underperforming


# Geographic Inequality & Distribution

### How unequal is revenue distribution across regions and markets?

In [20]:
region_inequality <- region_sales |>  
    summarise(    
        mean_sales = mean(total_sales),    
        sd_sales = sd(total_sales),    
        cv_sales = sd_sales / mean_sales  
    )

region_inequality

mean_sales,sd_sales,cv_sales
<dbl>,<dbl>,<dbl>
972500.1,672757.5,0.6917814


In [21]:
country_sales <- orders |>  
    group_by(market) |>  
    summarise(    
        total_sales = sum(sales),    
        .groups = "drop"  
    )

market_inequality <- market_sales |>  
    summarise(    
        mean_sales = mean(total_sales),    
        sd_sales = sd(total_sales),    
        cv_sales = sd_sales / mean_sales  
    )

market_inequality

mean_sales,sd_sales,cv_sales
<dbl>,<dbl>,<dbl>
1806072,1283744,0.7107933
