# Exercise Chapter 6_ex 
Industrial Statistics: A Computer Based Approach with Python<br>
by Ron Kenett, Shelemyahu Zacks, Peter Gedeck

Publisher: Springer International Publishing; 1st edition (2023) <br>
<!-- ISBN-13: 978-3031075650 -->

(c) 2022 Ron Kenett, Shelemyahu Zacks, Peter Gedeck

The code needs to be executed in sequence.

In [1]:
import os
os.environ['OUTDATED_IGNORE'] = '1'
import warnings
from outdated import OutdatedPackageWarning
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=OutdatedPackageWarning)

In [2]:
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
from pyDOE2 import fracfact

import mistat

# Exercise 1

In [3]:
from mistat.design import doe
np.random.seed(1)

# Build design from factors
FacDesign = doe.full_fact({
    'm': [30, 60],
    's': [0.005, 0.02],
    'v0': [0.002, 0.01],
    'k': [1500, 4500],
    't': [290, 296],
})

# Randomize design
FacDesign = FacDesign.sample(frac=1).reset_index(drop=True)

In [4]:
# Setup and run simulator with 100 replicates 
# for each combination of factors
simulator = mistat.PistonSimulator(n_replicate=100, **FacDesign,
                                   p0=100_000, t0=350)
result = simulator.simulate()

In [5]:
factors = ['m', 's', 'v0', 'k', 't']
result = result.groupby(factors, as_index=False).agg({'seconds': ['mean', 'std']})
result.columns = [*factors, 'mean', 'std']

In [6]:
result['MSE'] = (result['mean'] - 0.02)**2 + result['std']**2
result = result.sort_values('MSE')
best_MSE = result.iloc[0,:]
result.head()

Unnamed: 0,m,s,v0,k,t,mean,std,MSE
26,60,0.02,0.002,4500,290,0.00958,0.002223,0.000114
27,60,0.02,0.002,4500,296,0.009659,0.002783,0.000115
24,60,0.02,0.002,1500,290,0.009201,0.002364,0.000122
25,60,0.02,0.002,1500,296,0.008888,0.002322,0.000129
16,60,0.005,0.002,1500,290,0.023836,0.010829,0.000132


In [7]:
best_MSE

m         60.000000
s          0.020000
v0         0.002000
k       4500.000000
t        290.000000
mean       0.009580
std        0.002223
MSE        0.000114
Name: 26, dtype: float64

In [8]:
result['SN'] = 10 * np.log10(result['mean']**2 / result['std']**2 - 1/100)
result = result.sort_values('SN', ascending=False).head()
best_SN = result.iloc[0,:]
result.head()

Unnamed: 0,m,s,v0,k,t,mean,std,MSE,SN
29,60,0.02,0.01,1500,296,0.049291,0.003361,0.000869,23.325647
13,30,0.02,0.01,1500,296,0.04237,0.00301,0.000509,22.970572
30,60,0.02,0.01,4500,290,0.056468,0.004261,0.001348,22.44585
28,60,0.02,0.01,1500,290,0.049085,0.00382,0.000861,22.177197
31,60,0.02,0.01,4500,296,0.057546,0.00448,0.00143,22.173743


In [9]:
best_SN

m         60.000000
s          0.020000
v0         0.010000
k       1500.000000
t        296.000000
mean       0.049291
std        0.003361
MSE        0.000869
SN        23.325647
Name: 29, dtype: float64

# Exercise 2

In [10]:
np.random.seed(1)

# Build design from factors
FacDesign = doe.full_fact({
    'm': [30, 60],
    's': [0.005, 0.02],
    'v0': [0.002, 0.01],
    'k': [1500, 4500],
    't': [290, 296],
    'p0': [90_000, 110_000],
    't0': [340, 360],
})

# Randomize design
FacDesign = FacDesign.sample(frac=1).reset_index(drop=True)

# Setup and run simulator with five replicates 
# for each combination of factors
simulator = mistat.PistonSimulator(n_replicate=100, **FacDesign)
result = simulator.simulate()
factors = ['m', 's', 'v0', 'k', 't', 'p0', 't0']
result = result.groupby(factors, as_index=False).agg({'seconds': ['mean', 'std']})
result.columns = [*factors, 'mean', 'std']
result['SN'] = 10 * np.log10(result['mean']**2 / result['std']**2 - 1/100)

In [11]:
model = smf.ols('SN ~ m + s + k + t + v0 + p0 + t0', data=result).fit()
print(model.summary2())

                 Results: Ordinary least squares
Model:              OLS              Adj. R-squared:     0.899   
Dependent Variable: SN               AIC:                522.9904
Date:               2022-10-25 17:11 BIC:                545.8067
No. Observations:   128              Log-Likelihood:     -253.50 
Df Model:           7                F-statistic:        163.3   
Df Residuals:       120              Prob (F-statistic): 3.28e-58
R-squared:          0.905            Scale:              3.2790  
-----------------------------------------------------------------
               Coef.   Std.Err.    t    P>|t|   [0.025    0.975] 
-----------------------------------------------------------------
Intercept       5.0686  16.6968  0.3036 0.7620 -27.9899   38.1271
m              -0.0094   0.0107 -0.8845 0.3782  -0.0306    0.0117
s             484.4714  21.3403 22.7021 0.0000 442.2191  526.7238
k               0.0001   0.0001  0.5709 0.5691  -0.0002    0.0003
t               0.0012   0.

In [12]:
model = smf.ols('SN ~ s + v0 + s*v0', data=result).fit()
print(model.summary2())

                  Results: Ordinary least squares
Model:                OLS              Adj. R-squared:     0.964   
Dependent Variable:   SN               AIC:                388.4606
Date:                 2022-10-25 17:11 BIC:                399.8687
No. Observations:     128              Log-Likelihood:     -190.23 
Df Model:             3                F-statistic:        1128.   
Df Residuals:         124              Prob (F-statistic): 8.90e-90
R-squared:            0.965            Scale:              1.1808  
-------------------------------------------------------------------
            Coef.     Std.Err.    t    P>|t|    [0.025     0.975]  
-------------------------------------------------------------------
Intercept     5.0013    0.3365 14.8604 0.0000     4.3351     5.6674
s           203.8500   23.0871  8.8296 0.0000   158.1541   249.5459
v0          416.5600   46.6710  8.9254 0.0000   324.1849   508.9350
s:v0      46770.2389 3201.6068 14.6084 0.0000 40433.3622 53107.115

# Exercise 6

In [13]:
df = pd.DataFrame([
  [1, 1, 1, 1, 1, 1, 1, 1, 2.5, 0.0827],
  [1, 1, 2, 2, 2, 2, 2, 2, 2.684, 0.1196],
  [1, 1, 3, 3, 3, 3, 3, 3, 2.66, 0.1722],
  [1, 2, 1, 1, 2, 2, 3, 3, 1.962, 0.1696],
  [1, 2, 2, 2, 3, 3, 1, 1, 1.87, 0.1168],
  [1, 2, 3, 3, 1, 1, 2, 2, 2.584, 0.1106],
  [1, 3, 1, 2, 1, 3, 2, 3, 2.032, 0.0718],
  [1, 3, 2, 3, 2, 1, 3, 1, 3.267, 0.2101],
  [1, 3, 3, 1, 3, 2, 1, 2, 2.829, 0.1516],
  [2, 1, 1, 3, 3, 2, 2, 1, 2.66, 0.1912],
  [2, 1, 2, 1, 1, 3, 3, 2, 3.166, 0.0674],
  [2, 1, 3, 2, 2, 1, 1, 3, 3.323, 0.1274],
  [2, 2, 1, 2, 3, 1, 3, 2, 2.576, 0.085],
  [2, 2, 2, 3, 1, 2, 1, 3, 2.308, 0.0964],
  [2, 2, 3, 1, 2, 3, 2, 1, 2.464, 0.0385],
  [2, 3, 1, 3, 2, 3, 1, 2, 2.667, 0.0706],
  [2, 3, 2, 1, 3, 1, 2, 3, 3.156, 0.1569],
  [2, 3, 3, 2, 1, 2, 3, 1, 3.494, 0.0473],
], columns=['F1', 'F2', 'F3', 'F4', 'F5', 
            'F6', 'F7', 'F8', 'Xbar', 'S'])

In [14]:
df['F1'] = (df['F1']-1)*2-1
for column in ['F2', 'F3', 'F4', 'F5', 'F6', 'F7', 'F8']:
    df[column] = df[column] - 2
df['SNR'] = np.log(df['Xbar'] / df['S'])

In [15]:
model = smf.ols('SNR ~ F1 + F2 + F3 + F4 + F5 + F6 + F7 + F8', data=df).fit()

In [16]:
# Ignore user warning thrown here
warnings.filterwarnings('ignore', category=UserWarning)

In [17]:
print(model.summary2().tables[1].round(5))

             Coef.  Std.Err.         t    P>|t|   [0.025   0.975]
Intercept  3.22601   0.07518  42.90997  0.00000  3.05594  3.39608
F1         0.26565   0.07518   3.53348  0.00638  0.09558  0.43572
F2         0.07900   0.09208   0.85797  0.41317 -0.12929  0.28729
F3         0.13848   0.09208   1.50392  0.16686 -0.06982  0.34677
F4        -0.14339   0.09208  -1.55733  0.15382 -0.35169  0.06490
F5        -0.31231   0.09208  -3.39185  0.00798 -0.52061 -0.10402
F6         0.12630   0.09208   1.37164  0.20340 -0.08200  0.33459
F7         0.02632   0.09208   0.28587  0.78145 -0.18197  0.23462
F8        -0.17109   0.09208  -1.85810  0.09610 -0.37938  0.03720


In [18]:
warnings.filterwarnings('default', category=UserWarning)

# Exercise 7

In [19]:
generators = {
    '2_7': 'A B C D E F G',
    '2_7_1': 'A B C D E F G ABCDEFG',
    '2_7_2': 'A B C D E F G ABCDF ABDEG',
    '2_7_3': 'A B C D E F G ABCE BCDF ACDG',
}

In [20]:
designs = {}
for name, generator in generators.items():
  designs[name] = pd.DataFrame(fracfact(generator), columns=generator.split())

# reduce the fractional factorial designs to a single block
designs['2_7_1'] = designs['2_7_1'].query('ABCDEFG == 1')
designs['2_7_2'] = designs['2_7_2'].query('ABCDF == 1 & ABDEG == 1')
designs['2_7_3'] = designs['2_7_3'].query('ABCE == 1 & BCDF == 1 & ACDG == 1')

In [21]:
FacLevels = {
    'm': [30, 60],
    's': [0.005, 0.02],
    'v0': [0.002, 0.01],
    'k': [1500, 4500],
    't': [290, 296],
    'p0': [90_000, 110_000],
    't0': [340, 360],
}
FacMap = {'A': 'm', 'B': 's', 'C': 'v0', 'D': 'k', 
          'E': 't', 'F': 'p0', 'G': 't0'}

for name, design in designs.items():
    # replace (-1, 1) with factor levels
    facDesign = {}
    for colname in design:
        if colname not in FacMap: # skip generators
            continue
        factor = FacMap[colname]
        levels = FacLevels[factor]
        facDesign[factor] = [levels[max(0, int(v))] for v in design[colname]]
    designs[name] = pd.DataFrame(facDesign)

In [22]:
results = {}
for name, design in designs.items():
    np.random.seed(1)
    # Setup and run simulator 
    simulator = mistat.PistonSimulator(n_replicate=5, **design)
    result = simulator.simulate()
    factors = list(FacLevels)
    result = result.groupby(factors, as_index=False).agg({'seconds': ['mean', 'std']})
    result.columns = [*factors, 'mean', 'std']
    result['SN'] = np.log10(result['mean']**2 / result['std']**2)
    results[name] = result

In [23]:
models = {}
for name, result in results.items():
    model = smf.ols('mean ~ m + s + k + t + v0 + p0 + t0', data=result).fit()
    models[name] = model

In [24]:
for name, model in models.items():
    print(f'{name:10s}: r2={model.rsquared:.3f}, r2_adj={model.rsquared_adj:.3f}')

2_7       : r2=0.769, r2_adj=0.755
2_7_1     : r2=0.777, r2_adj=0.749
2_7_2     : r2=0.765, r2_adj=0.697
2_7_3     : r2=0.788, r2_adj=0.602


In [25]:
for name, model in models.items():
    print(name)
    print(model.summary2().tables[1].round(4))

2_7
             Coef.  Std.Err.        t   P>|t|   [0.025   0.975]
Intercept   0.2183    0.4116   0.5304  0.5968  -0.5967   1.0333
m           0.0005    0.0003   2.0359  0.0440   0.0000   0.0011
s          -6.8075    0.5261 -12.9393  0.0000  -7.8492  -5.7658
k           0.0000    0.0000   1.6946  0.0927  -0.0000   0.0000
t          -0.0004    0.0013  -0.3135  0.7544  -0.0030   0.0022
v0         14.7097    0.9865  14.9116  0.0000  12.7566  16.6628
p0         -0.0000    0.0000  -1.3802  0.1701  -0.0000   0.0000
t0         -0.0000    0.0004  -0.0287  0.9771  -0.0008   0.0008
2_7_1
             Coef.  Std.Err.        t   P>|t|   [0.025   0.975]
Intercept  -0.1876    0.5760  -0.3257  0.7458  -1.3416   0.9663
m           0.0006    0.0004   1.6551  0.1035  -0.0001   0.0013
s          -6.5822    0.7362  -8.9401  0.0000  -8.0571  -5.1073
k           0.0000    0.0000   1.0515  0.2975  -0.0000   0.0000
t           0.0006    0.0018   0.3090  0.7585  -0.0031   0.0043
v0         14.4861    1.3805  

             Coef.  Std.Err.       t   P>|t|   [0.025   0.975]
Intercept   0.3676    0.9370  0.3923  0.6983  -1.5663   2.3014
m           0.0005    0.0006  0.7528  0.4589  -0.0008   0.0017
s          -6.8111    1.1976 -5.6874  0.0000  -9.2828  -4.3394
k           0.0000    0.0000  0.4662  0.6453  -0.0000   0.0000
t          -0.0014    0.0030 -0.4713  0.6417  -0.0076   0.0048
v0         14.9546    2.2454  6.6600  0.0000  10.3202  19.5890
p0         -0.0000    0.0000 -0.5629  0.5787  -0.0000   0.0000
t0          0.0004    0.0009  0.4529  0.6547  -0.0014   0.0023
2_7_3
             Coef.  Std.Err.       t   P>|t|   [0.025   0.975]
Intercept   0.1662    1.4378  0.1156  0.9108  -3.1494   3.4818
m           0.0003    0.0009  0.3306  0.7494  -0.0018   0.0024
s          -6.3784    1.8377 -3.4709  0.0084 -10.6161  -2.1407
k           0.0000    0.0000  0.4130  0.6905  -0.0000   0.0000
t          -0.0005    0.0046 -0.1158  0.9107  -0.0111   0.0101
v0         14.3326    3.4457  4.1596  0.0032   6.



# Exercise 9

In [26]:
tolerances = [f'tl{c}' for c in 'ABCDEFGHIJKLM']
factors = {tl: [1, 2] for tl in tolerances}
Design = doe.frac_fact_res(factors, 4)

# Randomize and create replicates
nrepeat = 100
Design = Design.sample(frac=1).reset_index(drop=True)
Design = Design.loc[Design.index.repeat(nrepeat)].reset_index(drop=True)

# Run simulation
simulator = mistat.PowerCircuitSimulation(**{k: list(Design[k]) for k in Design})
result = simulator.simulate()
result = mistat.simulationGroup(result, nrepeat)

# Combine results with the Design matrix
Design['response'] = result['volts']
Design['group'] = result['group']

# calculate mean, standard deviation, and MSE
def groupAggregation(g):
    return {
        'mean': g['response'].mean(),
        'std': g['response'].std(),
        'MSE': g['response'].var(ddof=0),
    }
results = pd.DataFrame(list(Design.groupby('group').apply(groupAggregation)))
results

Unnamed: 0,mean,std,MSE
0,229.95266,1.239031,1.519846
1,230.061268,1.247408,1.540467
2,229.956442,1.175165,1.367202
3,230.002957,1.071569,1.136778
4,230.189922,1.238505,1.518555
5,229.811038,1.378896,1.882342
6,230.020092,1.276255,1.612537
7,230.069781,1.341914,1.782726
8,229.896648,1.132489,1.269705
9,230.07889,1.23151,1.50145
