# Propeller Bayesian Optimization — **Multi‑Task GP Only** (Objective + RPM + Vibration)

This Colab‑ready notebook assumes you have **measured side metrics** `rpm` and `vibration` in your CSV and will run **only** a Multi‑Task GP (ICM) that jointly models:

- Objective (e.g., `ld_ratio`)
- `rpm` (continuous)
- `vibration` (binary 0/1 treated as numeric)

**We optimize acquisition only on the objective task.** You only provide bounds for **design variables** (geometry etc.).


## 0) Setup

Install Libraries

In [2]:
%pip install -q ax-platform botorch gpytorch torch torchvision torchaudio


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.7/72.7 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.4/30.4 MB[0m [31m52.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m779.9/779.9 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m277.7/277.7 kB[0m [31m18.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m176.3/176.3 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m756.0/756.0 kB[0m [31m42.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m55.9/55.9 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.4/4.4 MB[0m [31m39.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

Imports + Setup

In [3]:
import warnings, os, json
from typing import List, Dict
import numpy as np
import pandas as pd
import torch

from botorch.models.multitask import MultiTaskGP
from botorch.fit import fit_gpytorch_mll
from botorch.acquisition.monte_carlo import qExpectedImprovement
from botorch.optim import optimize_acqf
from botorch.models.transforms.outcome import Standardize
from gpytorch.mlls import ExactMarginalLogLikelihood

warnings.filterwarnings("ignore")
torch.set_default_dtype(torch.double)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cpu


## 1) Upload your CSV (must include: objective, rpm, vibration)

In [4]:
from google.colab import files

# Upload CSV file
up = files.upload()  # choose your CSV

# Ensure exactly one file was uploaded
assert len(up) == 1, 'Please upload exactly one CSV.'

# Extract the filename
DATA_CSV = list(up.keys())[0]

# Read into pandas
import pandas as pd
df = pd.read_csv(DATA_CSV)

# Print info
print('Loaded:', DATA_CSV, 'shape=', df.shape)
df.head()


Saving dummy_propeller_data.csv to dummy_propeller_data.csv
Loaded: dummy_propeller_data.csv shape= (20, 10)


Unnamed: 0,symmetric,camber,root_chord,trip_chord,corner_radius,angle_of_attack,lift,drag,rpm,vibration
0,yes,0.102922,4.430179,3.908754,0.79502,-2.608115,0.934627,0.154353,19082.65886,0
1,no,4.849549,2.682096,3.325398,0.278844,9.264896,0.741832,0.105527,12395.618907,1
2,yes,4.162213,2.260206,3.818497,0.10497,10.215701,1.894546,0.079966,11448.948721,0
3,yes,1.061696,5.795542,3.684482,0.833915,6.225544,1.712181,0.134404,14894.527603,1
4,yes,0.909125,5.862528,2.7937,0.736172,10.419344,1.450106,0.285727,19856.504541,0


## 2) Configure columns and auto‑detect design variables

- If you have `lift`/`drag` and want L/D, we’ll create `ld_ratio = lift / drag`.
- Objective defaults to `'ld_ratio'` if present; otherwise pick any numeric you prefer.
- Design variables are auto‑detected as numeric columns excluding metadata/objectives/side metrics.

In [5]:
# Optional derived objective: lift/drag ratio
if {'lift', 'drag'}.issubset(df.columns):
    with np.errstate(divide='ignore', invalid='ignore'):
        df['ld_ratio'] = df['lift'] / df['drag']

# Choose the objective column (edit if desired)
OBJECTIVE_COL = 'ld_ratio' if 'ld_ratio' in df.columns else None

if OBJECTIVE_COL is None:
    # Fallback: pick the first numeric column that is not rpm/vibration
    numeric_cols = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
    for c in numeric_cols:
        if c.lower() not in {'rpm', 'vibration'}:
            OBJECTIVE_COL = c
            break

print('Objective column:', OBJECTIVE_COL)

# Ensure side tasks exist
missing = [c for c in ['rpm', 'vibration'] if c not in df.columns]
assert not missing, f"CSV must include side metrics: missing {missing}."

# Auto-detect design variables (numeric, excluding objective & side metrics & metadata)
meta_tokens = [
    'id','index','trial','run','timestamp','time','date',
    'note','comment','operator','label','tag','name'
]

all_cols = df.columns.tolist()
numeric_cols = [c for c in all_cols if pd.api.types.is_numeric_dtype(df[c])]

block = set(['rpm', 'vibration', OBJECTIVE_COL, 'lift', 'drag', 'neg_drag', 'ld_ratio'])
block.update([c for c in all_cols if any(t in c.lower() for t in meta_tokens)])

DESIGN_VARS = [c for c in numeric_cols if c not in block]
print('Design variables (auto):', DESIGN_VARS)
assert len(DESIGN_VARS) > 0, 'No design variables detected. Please add numeric design columns.'

# Bounds from data min/max (override here if needed)
BOUNDS: Dict[str, tuple] = {
    k: (float(np.nanmin(df[k])), float(np.nanmax(df[k]))) for k in DESIGN_VARS
}
print('Bounds:')
for k, v in BOUNDS.items():
    print(' ', k, v)

# Final task list (objective must be first; rpm and vibration follow)
TASK_LIST = [OBJECTIVE_COL, 'rpm', 'vibration']
print('Tasks:', TASK_LIST)

# Suggestions to propose
N_CANDIDATES = 5
STANDARDIZE_Y = True


Objective column: ld_ratio
Design variables (auto): ['camber', 'root_chord', 'trip_chord', 'corner_radius', 'angle_of_attack']
Bounds:
  camber (0.1029224714790122, 4.849549260809972)
  root_chord (2.1375540844608736, 5.862528132298237)
  trip_chord (1.1356818667316142, 3.960660809801552)
  corner_radius (0.1049699054112421, 0.8984914683186939)
  angle_of_attack (-4.491617465, 13.15132947852186)
Tasks: ['ld_ratio', 'rpm', 'vibration']


## 3) Build Multi‑Task tensors (task feature as last column)

In [6]:
# Bounds tensor for optimization (min/max per design variable)
bounds = torch.tensor(
    [
        [BOUNDS[k][0] for k in DESIGN_VARS],   # lower bounds
        [BOUNDS[k][1] for k in DESIGN_VARS]    # upper bounds
    ],
    dtype=torch.double,
    device=device
)

# Map each task name to an integer index (for multi-task modeling)
task_to_idx = {t: i for i, t in enumerate(TASK_LIST)}
print('Task indices:', task_to_idx)

def build_multitask_tensors(df: pd.DataFrame, xcols: List[str], task_cols: List[str]):
    """
    Build concatenated tensors for multi-task GP training.
    Each row of X: [design_vars..., task_index]
    Each row of Y: corresponding scalar target for that task.
    """
    Xs, Ys = [], []
    for t in task_cols:
        sub = df[xcols + [t]].dropna()
        if sub.empty:
            continue

        X = torch.tensor(sub[xcols].to_numpy(), dtype=torch.double)
        tfeat = torch.full((X.shape[0], 1), float(task_to_idx[t]), dtype=torch.double)
        X_mt = torch.cat([X, tfeat], dim=1)

        y = torch.tensor(sub[t].to_numpy(), dtype=torch.double).unsqueeze(-1)
        Xs.append(X_mt)
        Ys.append(y)

    X_all = torch.cat(Xs, dim=0).to(device)
    Y_all = torch.cat(Ys, dim=0).to(device)
    return X_all, Y_all

# Build tensors
X_all, Y_all = build_multitask_tensors(df, DESIGN_VARS, TASK_LIST)
print('X_all:', X_all.shape, 'Y_all:', Y_all.shape)


Task indices: {'ld_ratio': 0, 'rpm': 1, 'vibration': 2}
X_all: torch.Size([60, 6]) Y_all: torch.Size([60, 1])


## 4) Fit Multi‑Task GP (ICM)

In [7]:
# The task index is appended as the last column in X_all
task_feature = len(DESIGN_VARS)  # index of the task indicator column

# Optional output standardization (normalizes Y values)
outcome_tf = Standardize(m=1) if STANDARDIZE_Y else None

# Initialize the Multi-Task GP model
mtgp = MultiTaskGP(
    train_X=X_all,
    train_Y=Y_all,
    task_feature=task_feature,
    outcome_transform=outcome_tf
).to(device)

# Define the Marginal Log-Likelihood for optimization
mll = ExactMarginalLogLikelihood(mtgp.likelihood, mtgp)

# Fit the model hyperparameters
from botorch.fit import fit_gpytorch_mll  # updated import
fit_gpytorch_mll(mll)

# Switch model to evaluation mode
mtgp.eval()

print('✅ Fitted MultiTaskGP over tasks:', TASK_LIST)


✅ Fitted MultiTaskGP over tasks: ['ld_ratio', 'rpm', 'vibration']


## 5) Optimize acquisition (qEI) **on objective task only**

In [8]:
from botorch.acquisition.monte_carlo import qExpectedImprovement
from botorch.acquisition.objective import LinearMCObjective
from botorch.optim import optimize_acqf
from botorch.models.model import Model as BoModel
import pandas as pd, numpy as np, torch

def propose_candidates_multitask(
    model: BoModel,
    bounds: torch.Tensor,          # shape (2, d) for design vars only
    X_all: torch.Tensor,
    Y_all: torch.Tensor,
    n: int = 5,
    raw_samples: int = 256,
    q: int = 1,
) -> pd.DataFrame:
    """
    Propose candidate designs for the objective task (task index 0).
    Uses qExpectedImprovement with a linear objective and fixes the task feature to 0.0.
    """
    model.eval()

    # d = number of design variables; task feature is appended as last column in X_all
    d = bounds.shape[1]
    task_feature = d  # index of the task column in model input

    # Best observed objective value so far (rows where task == 0)
    obj_mask = (X_all[:, -1] == 0)
    Y_obj = Y_all[obj_mask]
    assert Y_obj.numel() > 0, "No objective observations found after NA-drop."
    best_f = Y_obj.max()  # tensor is fine for BoTorch

    # qEI needs an objective for multi-output models like MultiTaskGP
    objective = LinearMCObjective(
        weights=torch.tensor([1.0], dtype=torch.double, device=bounds.device)
    )

    # Extend bounds with a fixed column for the task feature (both rows = 0.0)
    task_bounds = torch.zeros(2, 1, dtype=bounds.dtype, device=bounds.device)
    bounds_full = torch.cat([bounds, task_bounds], dim=1)  # shape (2, d+1)

    # Acquisition function on the original model
    acqf = qExpectedImprovement(model=model, best_f=best_f, objective=objective)

    # Optimize EI to generate n candidates (each of size q; q=1 by default)
    cands = []
    for _ in range(n):
        cand, _ = optimize_acqf(
            acq_function=acqf,
            bounds=bounds_full,
            q=q,
            num_restarts=10,
            raw_samples=raw_samples,
            fixed_features={task_feature: 0.0},  # freeze to objective task
        )
        # Drop the appended task column before returning to user space
        cand_np = cand[..., :d].detach().cpu().numpy()
        cands.append(cand_np)

    C = np.vstack(cands)  # (n x d) when q == 1
    return pd.DataFrame(C, columns=DESIGN_VARS)


# ---- Run candidate generation ----
N_CANDIDATES = 5  # adjust as needed
candidates_df = propose_candidates_multitask(mtgp, bounds, X_all, Y_all, n=N_CANDIDATES)
display(candidates_df.head())

Unnamed: 0,camber,root_chord,trip_chord,corner_radius,angle_of_attack
0,4.239411,3.191459,3.137175,0.857,-1.288555
1,1.262807,5.826813,1.52939,0.21033,6.3372
2,1.854726,5.628378,2.18253,0.332931,1.624849
3,3.516076,3.741031,2.018296,0.766346,12.054967
4,1.642808,4.481623,2.050486,0.572206,7.622328


## 6) Save & download new designs

In [9]:
# File name for saving candidate designs
out_name = 'next_experiments_multitask.csv'

# Save the proposed candidates to CSV
candidates_df.to_csv(out_name, index=False)
print(f"✅ Saved candidate designs to {out_name}")

# Download the file locally (for Google Colab)
from google.colab import files
files.download(out_name)


✅ Saved candidate designs to next_experiments_multitask.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

### Notes
- This notebook **requires** `rpm` and `vibration` columns (vibration as 0/1; string labels auto‑mapped).- Only **design variables** receive bounds (auto min/max); you do **not** give bounds for rpm/vibration.- We treat `vibration` as numeric in regression for simplicity; if you need a Bernoulli head, you can extend this later.- To handle noisy measurements or batched suggestions, replace `qExpectedImprovement` with `qNoisyExpectedImprovement`.

In [None]:

import botorch
print(botorch.__version__)

0.15.1
