# Exercise Chapter 5 
Industrial Statistics: A Computer Based Approach with Python<br>
by Ron Kenett, Shelemyahu Zacks, Peter Gedeck

Publisher: Springer International Publishing; 1st edition (2023) <br>
<!-- ISBN-13: 978-3031075650 -->

(c) 2022 Ron Kenett, Shelemyahu Zacks, Peter Gedeck

The code needs to be executed in sequence.

In [None]:
import os
os.environ['OUTDATED_IGNORE'] = '1'
import warnings
from outdated import OutdatedPackageWarning
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=OutdatedPackageWarning)

In [None]:
import numpy as np
import pandas as pd
from scipy import stats
import statsmodels.formula.api as smf
from statsmodels.stats import anova
import matplotlib.pyplot as plt

import mistat

# Exercise 6

In [None]:
ISC = mistat.load_data('SOCELL')
stats.ttest_rel(ISC['t2'], ISC['t1'])

# Exercise 7

In [None]:
def statistic(x, y):
    return np.mean(x-y) # - np.mean(y)

In [None]:
def statistic(x, y):
    return np.mean(x) - np.mean(y)

res = stats.permutation_test((ISC['t2'], ISC['t3']), statistic,
                             permutation_type='samples', n_resamples=1000)
res.pvalue.round(5)

In [None]:
stats.ttest_rel(ISC['t2'], ISC['t3']).pvalue.round(5)

In [None]:
fig, ax = plt.subplots()
ax.hist(res.null_distribution, bins=20, color='lightgrey')
ax.axvline(statistic(ISC['t2'], ISC['t3']), color='black')
plt.show()

# Exercise 8

In [None]:
df = pd.DataFrame([
  ['B1', 'A', 89], ['B1', 'B', 88], ['B1', 'C', 97], ['B1', 'D', 94],
  ['B2', 'A', 84], ['B2', 'B', 77], ['B2', 'C', 92], ['B2', 'D', 79],
  ['B3', 'A', 81], ['B3', 'B', 87], ['B3', 'C', 87], ['B3', 'D', 85],
  ['B4', 'A', 87], ['B4', 'B', 92], ['B4', 'C', 89], ['B4', 'D', 84],
  ['B5', 'A', 79], ['B5', 'B', 81], ['B5', 'C', 80], ['B5', 'D', 88],
], columns=['blend', 'treatment', 'result'])

In [None]:
model = smf.ols('result ~ C(blend) + C(treatment)', data=df).fit()
anova.anova_lm(model)

In [None]:
fig, axes = plt.subplots(ncols=2)
df.groupby('blend').boxplot(column='result', subplots=False, 
                            rot=90, grid=False, ax=axes[0])
df.groupby('treatment').boxplot(column='result', subplots=False, 
                            rot=90, grid=False, ax=axes[1])
plt.tight_layout()
plt.show()

# Exercise 9

In [None]:
df = pd.DataFrame([
    [1, 'A', 38], [1, 'B', 30],   [2, 'C', 50], [2, 'D', 27], 
    [3, 'E', 33], [3, 'F', 28],   [4, 'G', 62], [4, 'H', 30], 
    [5, 'A', 37], [5, 'C', 25],   [6, 'B', 38], [6, 'H', 52], 
    [7, 'D', 89], [7, 'E', 89],   [8, 'F', 27], [8, 'G', 75], 
    [9, 'A', 17], [9, 'D', 25],   [10, 'B', 47], [10, 'G', 63],
    [11, 'C', 32], [11, 'F', 39], [12, 'E', 20], [12, 'H', 18],
    [13, 'A', 5], [13, 'E', 15],  [14, 'B', 45], [14, 'C', 38],
    [15, 'D', 11], [15, 'G', 24], [16, 'F', 37], [16, 'H', 39],
    [17, 'A', 23], [17, 'F', 40], [18, 'B', 20], [18, 'D', 14],
    [19, 'C', 18], [19, 'H', 10], [20, 'E', 22], [20, 'G', 52],
    [21, 'A', 66], [21, 'G', 67], [22, 'B', 23], [22, 'F', 46],
    [23, 'C', 22], [23, 'E', 28], [24, 'D', 20], [24, 'H', 40],
    [25, 'A', 27], [25, 'H', 32], [26, 'B', 10], [26, 'E', 40],
    [27, 'C', 32], [27, 'G', 33], [28, 'D', 18], [28, 'F', 23],
], columns=['block', 'treatment', 'result'])

In [None]:
model = smf.ols('result ~ C(block) + C(treatment)', data=df).fit()
anova.anova_lm(model)

In [None]:
df['group'] = ['G1' if t == 'G' else 'G2' for t in df['treatment']]
fig, axes = plt.subplots(ncols=3)
df.groupby('block').boxplot(column='result', subplots=False, 
                            rot=90, grid=False, ax=axes[0])
df.groupby('treatment').boxplot(column='result', subplots=False, 
                            rot=90, grid=False, ax=axes[1])
df.groupby('group').boxplot(column='result', subplots=False, 
                            rot=90, grid=False, ax=axes[2])
plt.tight_layout()
plt.show()

# Exercise 10

In [None]:
df = pd.DataFrame([
    [1,1,'A',312], [1,2,'B',299], [1,3,'C',315], [1,4,'D',290],
    [2,1,'C',295], [2,2,'A',317], [2,3,'D',313], [2,4,'B',300],
    [3,1,'B',295], [3,2,'D',298], [3,3,'A',312], [3,4,'C',315],
    [4,1,'D',313], [4,2,'C',314], [4,3,'B',299], [4,4,'A',300],
], columns=['day', 'batch', 'mixture', 'result'])

In [None]:
model = smf.ols('result ~ C(day) + C(batch) + C(mixture)', data=df).fit()
anova.anova_lm(model)

# Exercise 14

In [None]:
df = pd.DataFrame(
    [['A1', 'B1', v] for v in [18.3, 17.9, 18.5]] +
    [['A2', 'B1', v] for v in [17.9, 17.6, 16.2]] +
    [['A3', 'B1', v] for v in [19.1, 19.0, 18.9]] +
    [['A1', 'B2', v] for v in [20.5, 21.1, 20.7]] +
    [['A2', 'B2', v] for v in [18.2, 19.5, 18.9]] +
    [['A3', 'B2', v] for v in [22.1, 23.5, 22.9]] +
    [['A1', 'B3', v] for v in [21.5, 21.7, 21.9]] +
    [['A2', 'B3', v] for v in [20.1, 19.5, 18.9]] +
    [['A3', 'B3', v] for v in [22.3, 23.5, 23.3]],
    columns=['a', 'b', 'result']
)

In [None]:
model = smf.ols('result ~ C(a) + C(b) + C(a):C(b)', data=df).fit()
anova.anova_lm(model)

In [None]:
df['a:b'] = [f'{a}:{b}' for a, b in zip(df['a'], df['b'])]
fig, axes = plt.subplots(ncols=3)
df.groupby('a').boxplot(column='result', subplots=False, 
                            rot=90, grid=False, ax=axes[0])
df.groupby('b').boxplot(column='result', subplots=False, 
                            rot=90, grid=False, ax=axes[1])
df.groupby('a:b').boxplot(column='result', subplots=False, 
                            rot=90, grid=False, ax=axes[2])
plt.tight_layout()
plt.show()

# Exercise 17

In [None]:
df = pd.DataFrame([
    [-1, -1, 55.8], [-1, -1, 54.4], [1, -1, 60.3], [1, -1, 60.9],
    [-1, 1, 63.9], [-1, 1, 64.4], [1, 1, 67.9], [1, 1, 68.5],
    [0, 0, 61.5], [0, 0, 62.0], [0, 0, 61.9], [0, 0, 62.4]
], columns=['X1', 'X2', 'Y'])

In [None]:
# ignore UserWarning
import warnings
warnings.simplefilter('ignore', category=UserWarning)

In [None]:
formula = ('Y ~ X1 + X2 + X1:X2')
model = smf.ols(formula, data=df).fit()
print(model.summary2())

In [None]:
warnings.simplefilter('default', category=UserWarning)

In [None]:
def plotResponseSurface(model, ncontours=20):
    x1 = np.linspace(-1, 1)
    x2 = np.linspace(-1, 1)
    X1, X2 = np.meshgrid(x1, x2)
    exog = pd.DataFrame({'X1': X1.ravel(), 'X2': X2.ravel()})
    responses = model.predict(exog=exog)
    CS = plt.contour(x1, x2,
                responses.values.reshape(len(x2), len(x1)),
                ncontours, colors='gray')
    ax = plt.gca()
    ax.clabel(CS, inline=True, fontsize=10)
    ax.set_xlabel('X1')
    ax.set_ylabel('X2')
    return ax

plotResponseSurface(model)
plt.show()

In [None]:
# derive variance around the regression using an ANOVA (mean_sq of residuals)
res = anova.anova_lm(model)
res

In [None]:
# Estimate of variance?
# ['Residual', 'mean_sq'] gives variance around regression 
sigma2 =  0.13667
var_residuals = 0.235677

F = var_residuals / sigma2
p = 1 - stats.f(8, 3).cdf(F)
print(f'F-ratio: {F:.4f}; p-value: {p:.2f}')

# Exercise 18

In [None]:
# ignore UserWarning
import warnings
warnings.simplefilter('ignore', category=UserWarning)

In [None]:
df = pd.DataFrame([
    [1, 0, 95.6], [0.5, 0.866, 77.9], [-0.5, 0.866, 76.2],
    [-1, 0, 54.5], [-0.5, -0.866, 63.9], [0.5, -0.866, 79.1],
    [0, 0, 96.8], [0, 0, 94.8], [0, 0, 94.4], 
], columns=['X1', 'X2', 'Y'])

formula = ('Y ~ X1 + X2 + X1*X2 + I(X1**2) + I(X2**2)')
model = smf.ols(formula, data=df).fit()
print(model.summary2())

In [None]:
warnings.simplefilter('default', category=UserWarning)

In [None]:
rsm = mistat.ResponseSurfaceMethod(model, ['X1', 'X2'])
stationary = rsm.stationary_point()
stationary

In [None]:
ax = plotResponseSurface(model)
ax.scatter(*stationary, color='black')
plt.show()

In [None]:
anova.anova_lm(model)