In [58]:
import numpy as np
import pandas as pd

# models
from sklearn.gaussian_process import GaussianProcessClassifier

# preprocessing
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline

# Creating Dummy Dataset for Prototyping
---
Pass if:
- flow_metric is between 9 and 11 µL/min
- pressure drop is between 85 and 115 Pascals

In [59]:
data = pd.read_csv('datasets/AI_Simulation_Microfluidic_Dataset.csv')

In [60]:
# enc = OneHotEncoder(handle_unknown='ignore', sparse_output=False)  # or sparse=False for older sklearn
# X = enc.fit_transform(data[['resin_type']])  # note the double brackets -> 2D
# oh = pd.DataFrame(X, columns=enc.get_feature_names_out(['resin_type']), index=data.index)

# # replace the original column
# data = pd.concat([data.drop(columns=['resin_type']), oh], axis=1)

# Data Preprocessing
---
- turn resin type into one hot vector
- standardize all data
- split into X and y
- split into train and test data

In [64]:
X_all = data.drop(columns=["model_id", "pass_fail"])
y_all = data['pass_fail']

# identify categorical and numerical features
cat_cols = ["resin_type"]              # categorical
num_cols = [c for c in X_all.columns if c not in cat_cols]  # numeric

# split data
X_train, X_test, y_train, y_test = train_test_split(
    X_all, y_all, test_size=0.2, random_state=42, stratify=y_all
)

# encode categorical features, standardize numerical features
pre = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=False), cat_cols),
        ("num", StandardScaler(), num_cols),
    ],
    remainder="drop",
)

Train acc: 1.0
Test  acc: 0.7


# Surrogate Model 
---

In [65]:
# End‑to‑end pipeline (example classifier)
clf = Pipeline(steps=[
    ("pre", pre),
    ("model", GaussianProcessClassifier())
])

clf.fit(X_train, y_train)
print("Train acc:", clf.score(X_train, y_train))
print("Test  acc:", clf.score(X_test, y_test))

Train acc: 1.0
Test  acc: 0.7
