# Notebook #1: Introduction to the Definitve Screening Design

In [1]:
import pandas as pd
import definitive_screening_design as dsd

In [2]:
# Generate a DSD with 5 numerical factors
df = dsd.generate(n_num=5)
df

Generating a Definitive Screening Design with 5 numerical and 0 categorical factors.


Unnamed: 0,X01,X02,X03,X04,X05
1,0.0,1.0,1.0,1.0,1.0
2,0.0,-1.0,-1.0,-1.0,-1.0
3,1.0,0.0,-1.0,1.0,1.0
4,-1.0,0.0,1.0,-1.0,-1.0
5,1.0,-1.0,0.0,-1.0,1.0
6,-1.0,1.0,0.0,1.0,-1.0
7,1.0,1.0,-1.0,0.0,-1.0
8,-1.0,-1.0,1.0,0.0,1.0
9,1.0,1.0,1.0,-1.0,0.0
10,-1.0,-1.0,-1.0,1.0,0.0


In [3]:
# Generate a DSD with 5 numerical factors and 2 categorical factors
df = dsd.generate(n_num=5, n_cat=2)
df

Generating a Definitive Screening Design with 5 numerical and 2 categorical factors.


Unnamed: 0,X01,X02,X03,X04,X05,C01,C02
1,0.0,1.0,1.0,1.0,1.0,B,B
2,0.0,-1.0,-1.0,-1.0,-1.0,A,A
3,1.0,0.0,-1.0,-1.0,1.0,A,B
4,-1.0,0.0,1.0,1.0,-1.0,B,A
5,1.0,1.0,0.0,-1.0,-1.0,B,A
6,-1.0,-1.0,0.0,1.0,1.0,A,B
7,1.0,1.0,1.0,0.0,-1.0,A,B
8,-1.0,-1.0,-1.0,0.0,1.0,B,A
9,1.0,-1.0,1.0,1.0,0.0,A,A
10,-1.0,1.0,-1.0,-1.0,0.0,B,B


In [4]:
# Add 2 fake_factors to augment the design with 4 trials
df = dsd.generate(n_num=5, n_cat=2, n_fake_factors=2)
df

Generating a Definitive Screening Design with 5 numerical and 2 categorical factors.


Unnamed: 0,X01,X02,X03,X04,X05,C01,C02
1,0.0,1.0,1.0,1.0,1.0,B,B
2,0.0,-1.0,-1.0,-1.0,-1.0,A,A
3,1.0,0.0,-1.0,-1.0,-1.0,B,B
4,-1.0,0.0,1.0,1.0,1.0,A,A
5,1.0,-1.0,0.0,-1.0,1.0,A,B
6,-1.0,1.0,0.0,1.0,-1.0,B,A
7,1.0,-1.0,-1.0,0.0,1.0,B,A
8,-1.0,1.0,1.0,0.0,-1.0,A,B
9,1.0,-1.0,1.0,1.0,0.0,B,A
10,-1.0,1.0,-1.0,-1.0,0.0,A,B


> NOTE: 2 fake factors will augment the DSD with 4 trials, 4 fake factors will add 8 trials, 6 will add 12 trials, and so on.

In [5]:
# Use a factors_dict to generate the design
factors_dict = {
    "Temperature": [30, 60],          # Numerical factor
    "Pressure": [1, 10],              # Numbrical factor
    "Solvent": ["Solv-A", "Solv-B"],  # Categorical 2-levels factor
    "Vessel#": ["1", "2"],            # Note that by writing the numbers as string, you treat the factors as 2-levels categorical
    "Stirring_speed": [500, 700]      # Numerical factor
}
df = dsd.generate(factors_dict=factors_dict)
df

Generating a Definitive Screening Design from factors dictionary: 3 numerical and 2 categorical.


Unnamed: 0,Temperature,Pressure,Solvent,Vessel#,Stirring_speed
1,45.0,10.0,Solv-B,2,700.0
2,45.0,1.0,Solv-A,1,500.0
3,60.0,5.5,Solv-B,1,500.0
4,30.0,5.5,Solv-A,2,700.0
5,60.0,1.0,Solv-B,2,600.0
6,30.0,10.0,Solv-A,1,600.0
7,60.0,10.0,Solv-A,2,500.0
8,30.0,1.0,Solv-B,1,700.0
9,60.0,10.0,Solv-B,1,700.0
10,30.0,1.0,Solv-A,2,500.0


## Check the size of the DSD when using a different combination of `n_num` and `n_cat`
The number of trials in a definitive screening design (typically ca. 2N+1) will be compared with a full-factorial DOE having 3 levels for each numerical factor and 2 levels for the categorical factors. 

In [6]:
df_list = []
for n_cat in range(0, 12):
 for n_num in range(0, 12):
        if not n_num and not n_cat:
            continue
        dsd_df = dsd.generate(
            n_num, 
            n_cat, 
            factors_dict=None, # No need for inputing a factors_dict in this example
            min_13=False,      # Default: True. `True` will add fake factors in small design, to have always a minimum of 13 trials.
            n_fake_factors=0,  # No need for fake factors in this example   
            verbose=False      # Default: True. `False` will suppress verrbosity
        )
        df_list.append({
            "n_num": n_num,
            "n_cat": n_cat,
            "n_tot": n_num+n_cat,
            "DSd length": len(dsd_df),
            "2*n_tot+1": 2*(n_num+n_cat)+1,
            "3^n_num * 2^n_cat": 3**n_num * 2**n_cat,

        })

pd.set_option('display.max_rows', 500)
pd.DataFrame(df_list)

Unnamed: 0,n_num,n_cat,n_tot,DSd length,2*n_tot+1,3^n_num * 2^n_cat
0,1,0,1,9,3,3
1,2,0,2,9,5,9
2,3,0,3,9,7,27
3,4,0,4,9,9,81
4,5,0,5,13,11,243
5,6,0,6,13,13,729
6,7,0,7,17,15,2187
7,8,0,8,17,17,6561
8,9,0,9,21,19,19683
9,10,0,10,21,21,59049
