## Baysian optimzation for Valve

In [1]:
from ax import Client, RangeParameterConfig, ChoiceParameterConfig

from ax.analysis.analysis import (
    Analysis,   
    AnalysisCard,
    AnalysisCardCategory,
    AnalysisCardLevel,
)

from ax.generation_strategy.model_spec import GeneratorSpec
from ax.modelbridge.registry import Generators

from gpytorch.kernels import MaternKernel
from botorch.models import SingleTaskGP
from botorch.models.transforms.input import Warp
from botorch.models.map_saas import AdditiveMapSaasSingleTaskGP
from ax.utils.stats.model_fit_stats import MSE
from ax.models.torch.botorch_modular.surrogate import SurrogateSpec, ModelConfig

from ax.analysis.plotly.parallel_coordinates import ParallelCoordinatesPlot
from ax.analysis.plotly.surface.slice import SlicePlot

from botorch.acquisition.logei import qLogNoisyExpectedImprovement, qLogExpectedImprovement

import pandas as pd
import numpy as np
import random
import pickle

from utils import score_vals, param_to_list, list_to_param, combine_meta_test, df_to_sv, df_to_sv_valve, \
                add_trials_valve, df_for_new_trials_valve, validate_parameters, construct_generation_strategy, \
                extract_crack_pressure, calculate_squared_error

# Mute warnings:
import warnings
warnings.filterwarnings('ignore')



## Generation Strategy - set up to go directly to BayZop
#### https://ax.dev/docs/tutorials/modular_botorch/

In [2]:
GENERATION_SET = False

In [3]:
surrogate_spec = SurrogateSpec(
    model_configs=[
        # Select between two models:
        # An additive mixture of relatively strong SAAS priors with input Warping.
        # A relatively vanilla GP with a Matern kernel.
        ModelConfig(
            botorch_model_class=AdditiveMapSaasSingleTaskGP, # maximum-a-posteriori, sparse axis-aligned subspace 
            input_transform_classes=[Warp],
        ),
        ModelConfig(
            botorch_model_class=SingleTaskGP, # maximize a single score
            covar_module_class=MaternKernel, # Matern kernel
            covar_module_options={"nu": 2.5}, # 'The smoothness parameter for the Matern kernel: either 1/2, 3/2, or 5/2.'
        ),
    ],
    eval_criterion=MSE,  # Select the model to use as the one that minimizes mean squared error.
    allow_batched_models=False,  # Forces each metric to be modeled with an independent BoTorch model.
)

In [4]:
# setup generation strategy

generator_spec = GeneratorSpec(
    model_enum=Generators.BOTORCH_MODULAR,
    model_kwargs={
        "surrogate_spec": surrogate_spec, # set up above (MAP SAAS Single GP)
        "botorch_acqf_class": qLogNoisyExpectedImprovement, # Expected Improvement - based acquisition (https://arxiv.org/abs/2310.20708)
        # "botorch_acqf_class": qLogExpectedImprovement # see if this works for categorical inputs
    },
    # We can specify various options for the optimizer here - LEFT AS DEFAULT
    model_gen_kwargs = {
        "model_gen_options": {
            "optimizer_kwargs": {
                "num_restarts": 20,
                # "sequential": False,
                "sequential": True, # to work with acquisition function on categorical
                "options": {
                    "batch_limit": 5,
                    # "maxiter": 200,
                    "maxiter_init": 200, # to work with sequential
                },
            },
        },
    }
)

generation_strategy = construct_generation_strategy(
    generator_spec=generator_spec, # set up above (surrogate w/ EI & standard kwargs)
    node_name="BoTorch w/ Model Selection",
)
generation_strategy

GenerationStrategy(name='Sobol+BoTorch w/ Model Selection', nodes=[GenerationNode(node_name='Sobol', model_specs=[GeneratorSpec(model_enum=Sobol, model_key_override=None)], transition_criteria=[MinTrials(transition_to='BoTorch w/ Model Selection')]), GenerationNode(node_name='BoTorch w/ Model Selection', model_specs=[GeneratorSpec(model_enum=BoTorch, model_key_override=None)], transition_criteria=[])])

## Initialize our Ax Client

In [5]:
Tag_client = Client()

In [6]:
Tag_client._random_seed = 237
# provide repeatability

In [7]:
# Define bounds for each parameter type
int_bounds = {
}
# NO INTEGER PARAMETERS

choice_bounds = {
    "material": ['dragon', 'eco'], # DragonSkin 20 , EcoFlex
    "cut": ['triple_center', 
            'double_dashed_x', 
            'two_parallel_lines', 
            'triangle_hole'],
}

float_bounds = {
    "thickness": [0.2, 1.0], # mm
    "dome_height": [0.65, 2.25], # mm
}

# Generate parameters dynamically
parameters = []

# Add integer parameters
for name, bounds in int_bounds.items():
    lower, upper = bounds
    this_param = RangeParameterConfig(
        name = name,
        bounds = (lower, upper),  # adjust bounds to deal with order
        parameter_type = "int",
        )
    parameters.append(this_param)


# Add float parameters
for name, bounds in float_bounds.items():
    lower, upper = bounds
    this_param = RangeParameterConfig(
        name = name,
        bounds = (lower, upper),  # adjust bounds to deal with order
        parameter_type = "float",
        )
    parameters.append(this_param)

# Add choice parameters
for name, vals in choice_bounds.items():
    this_param = ChoiceParameterConfig(
        name = name,
        values = vals,
        parameter_type= "str",
    )
    parameters.append(this_param)

# Create the experimenttrial = Tag_client.get_next_trials(max_trials=2)
Tag_client.configure_experiment(
    parameters=parameters,
)

In [8]:
# Configure optimization to minimize squared error of crack pressure
# NOTE: minimize=True, since we aren't doing the -1 in the squared error func...
Tag_client.configure_optimization(objective="squared_error", minimize=True, outcome_constraints=["crack_pressure >= 0"] )

TypeError: Client.configure_optimization() got an unexpected keyword argument 'minimize'

In [None]:
# Update the generation strategy (see section above - not necessary)
Tag_client.set_generation_strategy(
    generation_strategy=generation_strategy,
)
GENERATION_SET = True

## Grab data from Taguchi array trials - initialize client

In [None]:
# Load valve test data
Tag_tests = pd.read_pickle('valve_dict2.pkl')
Tag_meta = pd.read_csv('Valve_d_params.csv')
test_metrics = {}
for key in Tag_tests:
    crack_pressure = extract_crack_pressure(Tag_tests[key])
    test_metrics[key] = {}
    test_metrics[key]['crack_pressure'] = crack_pressure
    test_metrics[key]['squared_error'] = calculate_squared_error(crack_pressure)

# Assuming format 'TESTING_X' where X is the design number
design_mapping = {}
for key in test_metrics:
    try:
        if '_-' in key:
            design_num = -int(key.split('_-')[1])
        else:
            design_num = int(key.split('_')[1])
        design_mapping[key] = design_num
    except:
        # If parsing fails, skip this key
        print(f"Couldn't parse design number from key: {key}")
        continue

# Add design numbers to Tag_meta if needed
design_metrics = {}
for test_key, design_num in design_mapping.items():
    if design_num in design_metrics:
        # If we already have metrics for this design, keep the higher crack pressure
        if test_metrics[test_key]['crack_pressure'] > design_metrics[design_num]['crack_pressure']:
            design_metrics[design_num] = test_metrics[test_key]
    else:
        design_metrics[design_num] = test_metrics[test_key]

# Add the metrics to the metadata DataFrame
for i, row in Tag_meta.iterrows():
    design = row['Design']
    if design in design_metrics:
        Tag_meta.at[i, 'crack_pressure'] = design_metrics[design]['crack_pressure']
        Tag_meta.at[i, 'squared_error'] = design_metrics[design]['squared_error']

# Now use the original code with the updated metadata
tested_df = Tag_meta.copy()
# Remove any incomplete tests
tested_df = tested_df.dropna(subset=['crack_pressure'])
sv_out = df_to_sv_valve(tested_df)  # Use the new valve-specific function

Couldn't parse design number from key: TESTING_0.1
Couldn't parse design number from key: TESTING_0.2


In [None]:
seed = 116
# Get the actual number of available designs with data
available_designs = len(tested_df)
# Don't try to sample more than available
Number_of_initialization_trials = min(10, available_designs)
print(f"Sampling {Number_of_initialization_trials} designs from {available_designs} available designs")

random.seed(seed)
# Choose random numbers only from the available indices
choices = random.sample(range(0, available_designs), Number_of_initialization_trials)

Sampling 10 designs from 11 available designs


In [None]:
init_df = tested_df.iloc[choices]
# arrange by design number
init_df = init_df.sort_values(by='Design')
init_df

Unnamed: 0,Design,material,cut,thickness,dome_height,crack_pressure,squared_error
0,0,dragon,triple_center,0.2,0.65,72.3795,-5238.792
1,1,dragon,triple_center,0.4,1.2,1515.7596,-2297527.0
2,2,dragon,double_dashed_x,0.6,1.6,1111.0517,-1234436.0
3,3,dragon,double_dashed_x,1.0,2.25,62.7107,-3932.632
4,4,dragon,two_parallel_lines,0.2,1.6,6.7438,-45.47884
5,5,dragon,two_parallel_lines,0.4,2.25,9.8617,-97.25313
6,6,dragon,triangle_hole,0.6,0.65,3.0007,-9.0042
7,7,dragon,triangle_hole,1.0,1.2,664.4773,-441530.1
8,8,eco,triple_center,0.2,2.25,55.296,-3057.648
9,9,eco,triple_center,0.4,1.6,91.6897,-8407.001


In [None]:
param_names = [parameters[x].name for x in range(len(parameters))]
# Add initial trials
add_trials_valve(Tag_client, tested_df, param_names, choice_bounds, int_bounds, inds=choices)

In [None]:
# Summary of initialization
Tag_client.summarize()

Unnamed: 0,trial_index,arm_name,trial_status,crack_pressure,squared_error,thickness,dome_height,material,cut
0,0,0_0,COMPLETED,91.6897,-8407.001,0.4,1.6,eco,triple_center
1,1,1_0,COMPLETED,55.296,-3057.648,0.2,2.25,eco,triple_center
2,2,2_0,COMPLETED,6.7438,-45.47884,0.2,1.6,dragon,two_parallel_lines
3,3,3_0,COMPLETED,72.3795,-5238.792,0.2,0.65,dragon,triple_center
4,4,4_0,COMPLETED,3.0007,-9.0042,0.6,0.65,dragon,triangle_hole
5,5,5_0,COMPLETED,9.8617,-97.25313,0.4,2.25,dragon,two_parallel_lines
6,6,6_0,COMPLETED,664.4773,-441530.1,1.0,1.2,dragon,triangle_hole
7,7,7_0,COMPLETED,1515.7596,-2297527.0,0.4,1.2,dragon,triple_center
8,8,8_0,COMPLETED,62.7107,-3932.632,1.0,2.25,dragon,double_dashed_x
9,9,9_0,COMPLETED,1111.0517,-1234436.0,0.6,1.6,dragon,double_dashed_x


## Begin Bayesian Optimization Trials

### Load previously completed BO trials - add them to the Client

In [None]:
# load meta and test data for BO designs
BO_meta_file = 'Valve_d_params.csv' #.csv
BO_test_file = 'valve_dict2.pkl' #.pkl
# BO_test_file = 'badfile'

# Define the structure for valve experiment
design_order = ['index', 'material', 'cut', 'thickness', 'dome_height'] 
meta_order = ['Design', 'material', 'cut', 'thickness', 'dome_height', 'crack_pressure'] 

BO_meta_exist = True
try:
    BO_d_params = pd.read_csv(BO_meta_file) # all BO designs
except:
    BO_d_params = pd.DataFrame(columns=meta_order)
    BO_meta_exist = False
    print('No existing design file found. Creating new one.')
    
# grab the experimental output for the new designs
BO_tests_exist = True
try:
    BO_tests = pd.read_pickle(BO_test_file)
except:
    BO_tests = {}
    BO_tests_exist = False
    print('No existing test file found. Creating new one.')

In [None]:
Incomplete = False
if BO_tests_exist and BO_meta_exist:
    prev_BO_df = combine_meta_test(BO_d_params, BO_tests) # combine meta and test data
    
    # Check for and add required columns for valve optimization
    if 'crack_pressure' not in prev_BO_df.columns:
        # Try to extract crack pressure from test data if available
        print('Adding crack_pressure column to previous trials data')
        prev_BO_df['crack_pressure'] = np.nan
        for i, row in prev_BO_df.iterrows():
            design = row['Design']
            key = f'TESTING_{design}'
            if key in test_metrics:
                prev_BO_df.at[i, 'crack_pressure'] = test_metrics[key]['crack_pressure']
    
    # Add squared_error column if missing
    if 'squared_error' not in prev_BO_df.columns:
        print('Adding squared_error column to previous trials data')
        prev_BO_df['squared_error'] = prev_BO_df['crack_pressure'].apply(
            lambda x: -1 * (x ** 2) if not pd.isna(x) else np.nan
        )
    
    if np.isnan(prev_BO_df['crack_pressure']).any():
        print('Incomplete tests found.')
        Incomplete = True

Adding squared_error column to previous trials data
Incomplete tests found.


In [None]:
# add previoius BO trials to this Ax run (if they exist)
if 'prev_BO_df' in locals():
    add_trials_valve(Tag_client, prev_BO_df, param_names, choice_bounds, int_bounds)

Skipping incomplete trial (index 0) - no pressure data.
Skipping incomplete trial (index 1) - no pressure data.
Skipping incomplete trial (index 2) - no pressure data.
Skipping incomplete trial (index 3) - no pressure data.
Skipping incomplete trial (index 4) - no pressure data.
Skipping incomplete trial (index 5) - no pressure data.
Skipping incomplete trial (index 6) - no pressure data.
Skipping incomplete trial (index 7) - no pressure data.
Skipping incomplete trial (index 8) - no pressure data.
Skipping incomplete trial (index 9) - no pressure data.
Skipping incomplete trial (index 10) - no pressure data.
Skipping incomplete trial (index 11) - no pressure data.
Skipping incomplete trial (index 12) - no pressure data.
Skipping incomplete trial (index 13) - no pressure data.
Skipping incomplete trial (index 14) - no pressure data.
Skipping incomplete trial (index 15) - no pressure data.


In [None]:
Tag_client.summarize()

Unnamed: 0,trial_index,arm_name,trial_status,crack_pressure,squared_error,thickness,dome_height,material,cut
0,0,0_0,COMPLETED,91.6897,-8407.001,0.4,1.6,eco,triple_center
1,1,1_0,COMPLETED,55.296,-3057.648,0.2,2.25,eco,triple_center
2,2,2_0,COMPLETED,6.7438,-45.47884,0.2,1.6,dragon,two_parallel_lines
3,3,3_0,COMPLETED,72.3795,-5238.792,0.2,0.65,dragon,triple_center
4,4,4_0,COMPLETED,3.0007,-9.0042,0.6,0.65,dragon,triangle_hole
5,5,5_0,COMPLETED,9.8617,-97.25313,0.4,2.25,dragon,two_parallel_lines
6,6,6_0,COMPLETED,664.4773,-441530.1,1.0,1.2,dragon,triangle_hole
7,7,7_0,COMPLETED,1515.7596,-2297527.0,0.4,1.2,dragon,triple_center
8,8,8_0,COMPLETED,62.7107,-3932.632,1.0,2.25,dragon,double_dashed_x
9,9,9_0,COMPLETED,1111.0517,-1234436.0,0.6,1.6,dragon,double_dashed_x


In [None]:
if Incomplete:
    raise RuntimeError('Incomplete tests found. Please complete all tests before proceeding.')

RuntimeError: Incomplete tests found. Please complete all tests before proceeding.

### Find next 2 BO tests - add design values to BO_d_params

In [None]:
trials = Tag_client.get_next_trials(max_trials=2)
for key in trials:
    trial = trials[key]
    meta_order = ['Design', 'material', 'cut', 'thickness', 'dome_height']
    # start pd with meta_order as columns
    new_trial = pd.DataFrame(columns=meta_order)
    for column in meta_order:
        if column in trial.keys():
            new_trial[column] = trial[column]
    # add design column
    new_trial['Design'] = key
    
trial

TypeError: optimize_acqf_mixed() got an unexpected keyword argument 'sequential'

In [None]:
new_design_vals_df, new_meta_vals_df = df_for_new_trials_valve(trials, meta_order)

In [None]:
Tag_client.summarize()

In [None]:
# Look at latest surrogate
cards = Tag_client.compute_analyses(display=True)

## Adding next-test details to dataframes still under construction

In [None]:
# Update d_params with meta data
test_BO = pd.concat([BO_d_params, new_meta_vals_df], axis=0)

# check if any Design numbers repeat
designs = test_BO['Design'].unique()
if len(designs) != len(test_BO):
    raise RuntimeError('Design numbers repeat. Do not overwrite trials!')
else:
    BO_meta = pd.concat([BO_d_params, new_meta_vals_df], axis=0)

In [None]:
BO_meta.to_csv(BO_meta_file, index=False) # save to csv

In [None]:
# BO_test_overwrite = {}
# for i in new_meta_vals_df['Design']:
#     key = 'Taguchi_Design_' + str(i)
#     BO_test_overwrite[key] = BO_tests[key]

In [None]:
BO_experimental = combine_meta_test(BO_meta, BO_tests)

In [None]:
BO_experimental

In [None]:
Tag_client.summarize()

In [None]:
# Create Valve_designs folder if it doesn't exist
import os
os.makedirs('Valve_designs', exist_ok=True)

# Save the trial data in the Valve_designs folder
new_keys = [x for x in trials.keys()]
new_design_vals_df.to_csv(f'Valve_designs/valve_trial_{new_keys}.csv', index=False)

In [None]:
# This function has been moved to the top of the notebook

In [None]:
# This function has been moved to the top of the notebook