### Data Prep

In [1]:
import pandas as pd
import numpy as np

# Load the multi-batch dataset
df_multi = pd.read_csv("datasets/multi_batch_channels_dataset.csv")
df_context = pd.read_csv("datasets/sla_spc_flowrate_channels_13batches.csv")

# add resin temp, resin age, ambient temp into df_multi
df_multi["resin_temp"] = df_context["resin_temp"]
df_multi["resin_age"] = df_context["resin_age"]
df_multi["ambient_temp"] = df_context["ambient_temp"]

# drop unnessesary columns
df_multi.drop(columns="channel_diameter_mm")

# Compute per-batch mean, std, CV
batch_summary = df_multi.groupby("batch_id")["measured_flow_mL_per_min"].agg(["mean", "std"])
batch_summary["cv"] = batch_summary["std"] / batch_summary["mean"]

print("Batch-level summary:")
print(batch_summary.head())

Batch-level summary:
               mean       std        cv
batch_id                               
BATCH_001  0.373844  0.045291  0.121149
BATCH_002  0.063912  0.003787  0.059255
BATCH_003  6.468637  0.602202  0.093096
BATCH_004  0.273139  0.022820  0.083546
BATCH_005  1.438789  0.159296  0.110715


In [2]:
# Select features (fixed per batch)
knobs = ["resin_type", "layer_thickness_um", "orientation_deg", "support_mode", "fit_adjustment_pct"] # tunable knobs
context = ["resin_age", "resin_temp", "ambient_temp"]  # drift context
print_output = ["channel_length_mm", "channel_width_mm"] # output data from 3d print

features = knobs + context

# Merge batch-level CV back with batch parameters
df_batches = df_multi.groupby("batch_id").first()[features].reset_index()
df_batches = df_batches.merge(batch_summary["cv"].reset_index(), on="batch_id")

print("Prepared dataset:", df_batches.shape)
df_batches.head()

Prepared dataset: (20, 10)


Unnamed: 0,batch_id,resin_type,layer_thickness_um,orientation_deg,support_mode,fit_adjustment_pct,resin_age,resin_temp,ambient_temp,cv
0,BATCH_001,Resin_C,20,45,manual,0.395,0,76.67,73.85,0.121149
1,BATCH_002,Resin_B,100,45,manual,0.057,0,77.33,70.53,0.059255
2,BATCH_003,Resin_B,50,30,manual,-1.261,0,77.84,72.2,0.093096
3,BATCH_004,Resin_C,20,0,auto,1.948,1,76.51,71.81,0.083546
4,BATCH_005,Resin_A,100,60,manual,-0.901,1,77.06,72.52,0.110715


In [3]:
# Use mean historical CV as baseline
baseline_cv = df_batches["cv"].mean()
print(f"Baseline CV = {baseline_cv:.4f}")

Baseline CV = 0.0907


In [4]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

X = df_batches[features] # inputs
y = df_batches["cv"] # targets

categorical = ["resin_type", "support_mode"]
numerical = [f for f in features if f not in categorical]

preprocess = ColumnTransformer([
    ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=False), categorical),
    ("num", StandardScaler(), numerical)
])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Train Surrogate Model

In [5]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor

pipe = Pipeline([
    ("preprocess", preprocess),
    ("rf", RandomForestRegressor(random_state=42))
])

param_grid = {
    "rf__n_estimators": [100, 200, 300, 400, 500, 600, 700, 800],
    "rf__max_depth": [5, 10, 15, 20],
    "rf__max_features": ["sqrt", "log2", None],
}

grid_search = GridSearchCV(
    estimator=pipe,
    param_grid=param_grid,
    cv=5, # num cross-validation folds
    scoring='r2', # metric 
    n_jobs=-1,
    verbose=1 # display progress during fitting
)

grid_search.fit(X, y)
print("Best parameters found:", grid_search.best_params_)
print("Best cross-validation score:", grid_search.best_score_)

best_model = grid_search.best_estimator_

Fitting 5 folds for each of 96 candidates, totalling 480 fits
Best parameters found: {'rf__max_depth': 10, 'rf__max_features': None, 'rf__n_estimators': 500}
Best cross-validation score: -0.2997358853810546


In [6]:
from sklearn.ensemble import RandomForestRegressor

# RF surrogate model
model = best_model
model.fit(X_train, y_train)

print("Surrogate model R² on test set:", model.score(X_test, y_test))

Surrogate model R² on test set: -0.12697402894948806


### Calibrate uncertainty using mean and std from Surrogate Model

In [18]:
pre = model.named_steps["preprocess"]
rf  = model.named_steps["rf"]
EXPECTED = list(model.named_steps["preprocess"].feature_names_in_)

def mu_sigma(df):
    """
    Inputs dataframe of candidates with knobs + context 
    Compute y_pred per tree in forest
    Outputs arrays for mean and std, per row
    """
    Xt = pre.transform(df)
    preds = np.vstack([t.predict(Xt) for t in rf.estimators_])  # [n_trees, n_samples]
    return preds.mean(axis=0), preds.std(axis=0, ddof=1)

### BO Search Space and Current context (c_t)

In [23]:
# BO search space (knobs)
pbounds = {
    "layer_thickness_um": (50, 100), # limited to 50 and 100 per dr. ava's comment
    "orientation_deg": (0, 90),
    "fit_adjustment_pct": (-2.0, 2.0),
    "resin_type": ("Resin_A", "Resin_B", "Resin_C"),
    "support_mode": ("auto", "manual")
}

# current context snapshot BEFORE a print (experimental)
c_t = {
    "ambient_temp": 72,    # °F, lab measurement
    "resin_temp": 76,      # °F, sensor reading
    "resin_age": 12.0,        # days since resin was opened
} 