In [1]:
#default_exp ch1

The file wire.csv contains data relating the pull strength (pstren) of a wire bond (which we’ll treat as a response) to six characteristics which we shall treat as design variables: die height (dieh), post height (posth), loop height (looph), wire length (wlen), bond width on the die (diew), and bond width on the post (postw). (Derived from exercise 2.3 in Myers, Montgomery, and Anderson–Cook (2016) using data from Table E2.1.)

* Write code that converts natural variables in the file to coded variables in the unit hypercube. Also, normalize responses to have a mean of zero and a range of 1.
* Use model selection techniques to select a parsimonious linear model for the coded data including, potentially, second-order and interaction effects.
* Use the fitted model to make a prediction for pull strength, when the explanatory variables take on the values c(6, 20, 30, 90, 2, 2), in the order above, with a full accounting of uncertainty. Make sure the predictive quantities are on the original scale of the data.

In [2]:
%load_ext autoreload
%autoreload 2
%config Completer.use_jedi = False

In [15]:
#hide
from nbdev.showdoc import *
import pandas as pd
import numpy as np
from typing import List

In [77]:
def length(x: pd.Series):
    return np.sqrt(np.sum(np.square(x)))

def _predictor_to_unit_length(x: pd.Series) -> pd.Series:
    return x / length(x)


class DataManager:
    def __init__(self, response="postw", datafile="http://bobby.gramacy.com/surrogates/wire.csv"):
        self.data = pd.read_csv(datafile)
        self.data.columns = [s.strip() for s in self.data.columns]
        self.response: str = response
        self.predictors: List[str] = self._get_predictors()
        self.normalized = False

    
    def _get_predictors(self) -> List[str]:
        return [colname for colname in self.data.columns if colname != self.response]

    
    def predictors_to_hypercube(self):
        for colname in self.predictors:
            self.data[colname] = _predictor_to_unit_length(self.data[colname])
            
    def normalize_response(self):
        if self.normalized:
            return
        self.orig_mean = np.mean(self.data[self.response])
        self.orig_std = np.std(self.data[self.response])
        self.data[self.response] -= self.orig_mean
        self.data[self.response] /= self.orig_std
        self.normalized = True


In [78]:
dm = DataManager()
dm.data.head()

Unnamed: 0,pstren,dieh,posth,looph,wlen,diew,postw
0,8.0,5.2,19.6,29.6,94.9,2.1,2.3
1,8.3,5.2,19.8,32.4,89.7,2.1,1.8
2,8.5,5.8,19.6,31.0,96.2,2.0,2.0
3,8.8,6.4,19.4,32.4,95.6,2.2,2.1
4,9.0,5.8,18.6,28.6,86.5,2.0,1.8


---------
### Write code that converts natural variables in the file to coded variables in the unit hypercube. Also, normalize responses to have a mean of zero and a range of 1.

In [79]:
dm.predictors_to_hypercube()
dm.normalize_response()
dm.data.head()

Unnamed: 0,pstren,dieh,posth,looph,wlen,diew,postw
0,0.179626,0.205221,0.231706,0.210582,0.245585,0.230895,2.11661
1,0.186362,0.205221,0.234071,0.230502,0.232129,0.230895,-0.884554
2,0.190852,0.228901,0.231706,0.220542,0.24895,0.2199,0.315912
3,0.197588,0.25258,0.229342,0.230502,0.247397,0.24189,0.916145
4,0.202079,0.228901,0.219884,0.203468,0.223848,0.2199,-0.884554


In [88]:
# Test data standardization.
for colname in dm.predictors:
    x = dm.data[colname]
    assert x.max() <= 1    
    assert x.min() >= 0
    1 - length(x) <= 10e-7

---------
### Use model selection techniques to select a parsimonious linear model for the coded data including, potentially, second-order and interaction effects.

In [108]:
import pyDOE as designs

In [118]:
domain = [0.000, 1.000]
shape = 100
ranges = np.array([domain, domain])
grid = designs.lhs(shape)

In [101]:
?LHS

[0;31mInit signature:[0m [0mLHS[0m[0;34m([0m[0;34m**[0m[0mkwargs[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m      <no docstring>
[0;31mInit docstring:[0m
Constructor where values of options can be passed in.

For the list of options, see the documentation for the problem being used.

Parameters
----------
**kwargs : named arguments
    Set of options that can be optionally set; each option must have been declared.

Examples
--------
>>> import numpy as np
>>> from smt.sampling_methods import Random
>>> sampling = Random(xlimits=np.arange(2).reshape((1, 2)))
[0;31mFile:[0m           /opt/conda/lib/python3.8/site-packages/smt/sampling_methods/lhs.py
[0;31mType:[0m           type
[0;31mSubclasses:[0m     


In [106]:
sampling(10)

array([[0.35, 0.35],
       [0.85, 0.55],
       [0.15, 0.25],
       [0.05, 0.95],
       [0.65, 0.75],
       [0.75, 0.65],
       [0.45, 0.15],
       [0.95, 0.85],
       [0.25, 0.45],
       [0.55, 0.05]])