# Appendix I: Calculations for sales in the East Region

For additional context see Chapter 16.4 of [The Virus of Variation](https://www.brokenquality.com/book). Questions? Email James.Lehner@gmail.com or QualityIsBroken@gmail.com.

In [1]:
# Import libraries
from matplotlib import pyplot as plt
from matplotlib.ticker import FuncFormatter
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
import seaborn as sns
import pandas as pd
from process_improvement import xmr_charts as xmr

%matplotlib inline

## Get the data

The dataset used in this appendix is called:

`sales-by-region.csv`

The dataset can be found at https://github.com/jimlehner/the-virus-of-variation/tree/main/data.

In [2]:
# Get data function
def get_data(dataset_url) -> pd.DataFrame:
    return pd.read_csv(dataset_url)

In [3]:
# Sales dataset url
sales_url = r'https://raw.githubusercontent.com/jimlehner/the-virus-of-variation/refs/heads/main/data/sales-by-regions.csv'

# Get data
sales_df = get_data(sales_url)

# Make 'Month' column dtype string
sales_df['Month'] = sales_df['Month'].astype('string')

# Show dataframe
sales_df.head()

Unnamed: 0,Month,East Region,West Region
0,Jan,10.7,6.9
1,Feb,13.0,11.3
2,Mar,11.4,9.3
3,Apr,11.5,9.8
4,May,12.5,18.7


In [4]:
# Split sales_df according to region
df_east = sales_df[['Month', 'East Region']].copy()
df_west = sales_df[['Month', 'West Region']].copy()

# Rename the columns for clarity (optional)
df_east.columns = ['Month', 'Sales']
df_west.columns = ['Month', 'Sales']

# Calculate moving ranges for both regions
df_east['Moving Ranges'] = abs(df_east['Sales'].diff())
df_west['Moving Ranges'] = abs(df_west['Sales'].diff())

# Create list of dataframes
df_list = [df_east, df_west]
plot_label_names = ['East Region','West Region']

## Calculate process statistics using different measures of dispersion

In [5]:
# Create helper function for calculating process statistics
def region_stats(df, name, print_summary=True, round_value=2):
    # Specify bias correction factor
    E2 = 2.660
    
    # Basic statistics
    mean = round(df['Sales'].mean(), round_value)
    sigma = round(df['Sales'].std(ddof=1), round_value) # ddof is delta degress of freedom. 0 divides by n. 1 divides by n-1.
    amr = round(df['Moving Ranges'].mean(), round_value)

    # E2-based process limits
    UPL = round(mean + (E2 * amr), round_value)
    LPL = round(mean - (E2 * amr), round_value)

    # 3-sigma process limits
    UPL_sigma = round(mean + (3 * sigma), round_value)
    LPL_sigma = round(mean - (3 * sigma), round_value)
    
    if print_summary == True:
        # Print summary
        print(f"\n{name} region statistics:")
        print(f"  Mean: {mean}")
        print(f"  Std Dev: {sigma}")
        print(f"  Avg mR: {amr}")
        print(f"  UPL (E2): {UPL} | LPL (E2): {LPL}")
        print(f"  UPL (3σ): {UPL_sigma} | LPL (3σ): {LPL_sigma}")
        
    # Return as a dictionary
    return {
        'Mean': mean,
        'Std Dev': sigma,
        'Avg mR': amr,
        'UPL_E2': UPL,
        'LPL_E2': LPL,
        'UPL_3sigma': UPL_sigma,
        'LPL_3sigma': LPL_sigma
    }

In [6]:
# Calculate stats for each region
east_stats = region_stats(df_east, "East")
west_stats = region_stats(df_west, "West")


East region statistics:
  Mean: 12.75
  Std Dev: 1.7
  Avg mR: 1.66
  UPL (E2): 17.17 | LPL (E2): 8.33
  UPL (3σ): 17.85 | LPL (3σ): 7.65

West region statistics:
  Mean: 10.96
  Std Dev: 2.87
  Avg mR: 2.91
  UPL (E2): 18.7 | LPL (E2): 3.22
  UPL (3σ): 19.57 | LPL (3σ): 2.35


## Appendix I.1 East Region

In [14]:
# Convert east_stats to df
east_df = (pd.DataFrame
           .from_dict(east_stats, orient='index')
           .rename(columns={0:'Statistic'})
          )
east_df

Unnamed: 0,Statistic
Mean,12.75
Std Dev,1.7
Avg mR,1.66
UPL_E2,17.17
LPL_E2,8.33
UPL_3sigma,17.85
LPL_3sigma,7.65


## Appendix I.2. West region

In [13]:
# Convert west_stats to df
west_df = (pd.DataFrame
           .from_dict(west_stats, orient='index')
           .rename(columns={0:'Statistic'})
          )
west_df

Unnamed: 0,Statistic
Mean,10.96
Std Dev,2.87
Avg mR,2.91
UPL_E2,18.7
LPL_E2,3.22
UPL_3sigma,19.57
LPL_3sigma,2.35
