In [1]:
import pandas as pd
import numpy as np
from synthetic import SyntheticData

In [2]:
#Instance a Synthetic Data Class with 10,000 point seed distribution
sdata = SyntheticData(10000)

## Simulated Experiment - Level 3/synthetic.py Version
This mimics the functionality in previous section but uses the `SyntheticData` Class in `synthetic.py` library.

#### Define Degrees of Freedom for Measurement Variability

In [12]:
xbarbar = 1.10
sig_digits = 4

#Define degrees of freedom, their number of levels and their variability as fraction of mean
names = ['batch-to-batch', 'within batch', 'lab']
n_levels = dict(zip(names, [2, 3, 2]))
var_fracs = dict(zip(names, [0.05, 0.03, 0.02]))
  
names, n_levels, var_fracs

(['batch-to-batch', 'within batch', 'lab'],
 {'batch-to-batch': 2, 'within batch': 3, 'lab': 2},
 {'batch-to-batch': 0.05, 'within batch': 0.03, 'lab': 0.02})

In [13]:
expt = SyntheticData(10000, xbarbar=xbarbar, names=names, n_levels=n_levels, var_fracs=var_fracs, \
                 digits=sig_digits)
expt.create_experiment_procedure()
expt.df_expt

Unnamed: 0,level_batch-to-batch,devns_batch-to-batch,lvl_effects_batch-to-batch,level_within batch,devns_within batch,lvl_effects_within batch,level_lab,devns_lab,lvl_effects_lab,sim_meas
0,1,-0.0062,0.0,1,0.0262,0.0,1,-0.0174,0.0,1.1026
1,1,-0.0062,0.0,1,0.0262,0.0,2,-0.0062,0.0,1.1138
2,1,-0.0062,0.0,2,0.0176,0.0,1,0.0009,0.0,1.1123
3,1,-0.0062,0.0,2,0.0176,0.0,2,0.0089,0.0,1.1203
4,1,-0.0062,0.0,3,0.0378,0.0,1,0.0109,0.0,1.1425
5,1,-0.0062,0.0,3,0.0378,0.0,2,-0.0212,0.0,1.1104
6,2,-0.0485,0.0,1,-0.0239,0.0,1,0.0231,0.0,1.0507
7,2,-0.0485,0.0,1,-0.0239,0.0,2,-0.025,0.0,1.0026
8,2,-0.0485,0.0,2,0.0242,0.0,1,-0.0061,0.0,1.0696
9,2,-0.0485,0.0,2,0.0242,0.0,2,-0.0011,0.0,1.0746


## Simulated Experiment - With Mix of Specified Effects and Random Deviations


#### Define Degrees of Freedom for Measurement Variability

In [14]:
xbarbar = 1.10
sig_digits = 4

#Define degrees of freedom, their number of levels and their variability as fraction of mean
names = ['batch-to-batch', 'within batch', 'lab']
n_levels = dict(zip(names, [2, 3, 2]))
var_fracs = dict(zip(names, [0.01, 0.005, 0.02]))

#User-specified lvl_val names and deviations/effects (as frac of mean)
lvl_val_names = {'batch-to-batch':['Batch A', 'Batch B'], 'within batch':['Top', 'Middle', 'Bottom']}
lvl_val_effects = {'batch-to-batch':[-0.08, 0.08], 'within batch':[-0.04, 0.0, 0.03]}

names, n_levels, var_fracs, lvl_val_names, lvl_val_effects

(['batch-to-batch', 'within batch', 'lab'],
 {'batch-to-batch': 2, 'within batch': 3, 'lab': 2},
 {'batch-to-batch': 0.01, 'within batch': 0.005, 'lab': 0.02},
 {'batch-to-batch': ['Batch A', 'Batch B'],
  'within batch': ['Top', 'Middle', 'Bottom']},
 {'batch-to-batch': [-0.08, 0.08], 'within batch': [-0.04, 0.0, 0.03]})

In [15]:
qualn = SyntheticData(10000, xbarbar=xbarbar, names=names, n_levels=n_levels, var_fracs=var_fracs, \
            lvl_val_names=lvl_val_names, lvl_val_effects=lvl_val_effects, digits=sig_digits)

In [16]:
qualn.create_experiment_procedure()
qualn.df_expt

Unnamed: 0,level_batch-to-batch,devns_batch-to-batch,lvl_names_batch-to-batch,lvl_effects_batch-to-batch,level_within batch,devns_within batch,lvl_names_within batch,lvl_effects_within batch,level_lab,devns_lab,lvl_effects_lab,sim_meas
0,1,-0.0022,Batch A,-0.088,1,-0.0073,Top,-0.044,1,-0.0061,0.0,0.9524
1,1,-0.0022,Batch A,-0.088,1,-0.0073,Top,-0.044,2,-0.0208,0.0,0.9377
2,1,-0.0022,Batch A,-0.088,2,0.0057,Middle,0.0,1,-0.032,0.0,0.9835
3,1,-0.0022,Batch A,-0.088,2,0.0057,Middle,0.0,2,0.0087,0.0,1.0242
4,1,-0.0022,Batch A,-0.088,3,-0.0036,Bottom,0.033,1,-0.0154,0.0,1.0238
5,1,-0.0022,Batch A,-0.088,3,-0.0036,Bottom,0.033,2,-0.0062,0.0,1.033
6,2,0.0184,Batch B,0.088,1,0.0014,Top,-0.044,1,-0.0288,0.0,1.135
7,2,0.0184,Batch B,0.088,1,0.0014,Top,-0.044,2,-0.0378,0.0,1.126
8,2,0.0184,Batch B,0.088,2,0.0029,Middle,0.0,1,-0.0032,0.0,1.2061
9,2,0.0184,Batch B,0.088,2,0.0029,Middle,0.0,2,0.0425,0.0,1.2518


In [17]:
keep_cols = ['lvl_names_batch-to-batch', 'lvl_names_within batch', 'sim_meas']
qualn.df_expt[keep_cols].to_excel('df_expt.xlsx', index=False)

* Should add a meas_name attribute and use that as the measurement column name instead of 'sim_name'
* Instead of lvl_names prefix (e.g. lvl_names_batch-to-batch) should just name those columns with dof name (e.g. 'batch-to-batch')