### Data Prep

In [1]:
import pandas as pd

# Load the multi-batch dataset
df_multi = pd.read_csv("datasets/multi_batch_channels_dataset.csv")
df_context = pd.read_csv("datasets/sla_spc_flowrate_channels_13batches.csv")

# add resin temp, resin age, ambient temp into df_multi
df_multi["resin_temp"] = df_context["resin_temp"]
df_multi["resin_age"] = df_context["resin_age"]
df_multi["ambient_temp"] = df_context["ambient_temp"]

# drop unnessesary columns
df_multi.drop(columns="channel_diameter_mm")

# Compute per-batch mean, std, CV
batch_summary = df_multi.groupby("batch_id")["measured_flow_mL_per_min"].agg(["mean", "std"])
batch_summary["cv"] = batch_summary["std"] / batch_summary["mean"]

print("Batch-level summary:")
print(batch_summary.head())

Batch-level summary:
               mean       std        cv
batch_id                               
BATCH_001  0.373844  0.045291  0.121149
BATCH_002  0.063912  0.003787  0.059255
BATCH_003  6.468637  0.602202  0.093096
BATCH_004  0.273139  0.022820  0.083546
BATCH_005  1.438789  0.159296  0.110715


In [14]:
# Select features (fixed per batch)
knobs = ["resin_type", "layer_thickness_um", "orientation_deg", "support_mode", "fit_adjustment_pct"] # tunable knobs
print_output = ["channel_length_mm", "channel_width_mm"] # output data from 3d print
context = ["resin_age", "resin_temp", "ambient_temp"]  # drift context

features = knobs + context

# Merge batch-level CV back with batch parameters
df_batches = df_multi.groupby("batch_id").first()[features].reset_index()
df_batches = df_batches.merge(batch_summary["cv"].reset_index(), on="batch_id")

print("Prepared dataset:", df_batches.shape)
df_batches.head()

Prepared dataset: (20, 10)


Unnamed: 0,batch_id,resin_type,layer_thickness_um,orientation_deg,support_mode,fit_adjustment_pct,resin_age,resin_temp,ambient_temp,cv
0,BATCH_001,Resin_C,20,45,manual,0.395,0,76.67,73.85,0.121149
1,BATCH_002,Resin_B,100,45,manual,0.057,0,77.33,70.53,0.059255
2,BATCH_003,Resin_B,50,30,manual,-1.261,0,77.84,72.2,0.093096
3,BATCH_004,Resin_C,20,0,auto,1.948,1,76.51,71.81,0.083546
4,BATCH_005,Resin_A,100,60,manual,-0.901,1,77.06,72.52,0.110715


In [15]:
# Use mean historical CV as baseline
baseline_cv = df_batches["cv"].mean()
print(f"Baseline CV = {baseline_cv:.4f}")

Baseline CV = 0.0907


In [16]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

X = df_batches[features] # inputs
y = df_batches["cv"] # targets

categorical = ["resin_type", "support_mode"]
numerical = [f for f in features if f not in categorical]

preprocess = ColumnTransformer([
    ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=False), categorical),
    ("num", StandardScaler(), numerical)
])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Train Surrogate Model

In [17]:
import numpy as np
from sklearn.gaussian_process import GaussianProcessRegressor 
from sklearn.gaussian_process.kernels import RBF, WhiteKernel, ConstantKernel as C

# ignore convergence warning temperarly 
import warnings
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings("ignore", category=ConvergenceWarning)

# GP surrogate model
Xt_sample = preprocess.fit_transform(X)
L = Xt_sample.shape[1]
kernel = C(1.0, (1e-6, 1e6)) * RBF(
    length_scale=np.ones(L),
    length_scale_bounds=(1e-4, 1e6)  
) + WhiteKernel(noise_level=1e-3, noise_level_bounds=(1e-8, 1e1))
gp = GaussianProcessRegressor(kernel=kernel, normalize_y=True, n_restarts_optimizer=20, alpha=0.0)

model = Pipeline([
    ("preprocess", preprocess),
    ("gp", gp),
])

model.fit(X_train, y_train)

print("Surrogate model R² on test set:", model.score(X_test, y_test))

Surrogate model R² on test set: -0.17171679478056046
