In [2]:
from LLM_agent_HPT import * # main experiment function for hyperparameter tuning task
from helper_func import * 
random.seed(20250414)

## Physical model data simulation

### Robot arm

In [3]:
from scipy.stats import qmc              # For Latin Hypercube Sampling (LHS)
from sklearn.preprocessing import MinMaxScaler  # For normalization
torch.set_default_dtype(torch.float64)

# Robot Arm Function
def robot_arm(x):
    theta1, theta2, theta3, theta4 = x[:, 0], x[:, 1], x[:, 2], x[:, 3]
    L1, L2, L3, L4 = x[:, 4], x[:, 5], x[:, 6], x[:, 7]

    # Cumulative angles
    theta12 = theta1 + theta2
    theta123 = theta12 + theta3
    theta1234 = theta123 + theta4

    # Compute u and v
    u = (L1 * np.cos(theta1) +
         L2 * np.cos(theta12) +
         L3 * np.cos(theta123) +
         L4 * np.cos(theta1234))

    v = (L1 * np.sin(theta1) +
         L2 * np.sin(theta12) +
         L3 * np.sin(theta123) +
         L4 * np.sin(theta1234))

    # Distance from origin
    return np.sqrt(u**2 + v**2)

# Input bounds
bounds_robot = np.array([
    [0, 2 * np.pi],  # theta1
    [0, 2 * np.pi],  # theta2
    [0, 2 * np.pi],  # theta3
    [0, 2 * np.pi],  # theta4
    [0, 1],          # L1
    [0, 1],          # L2
    [0, 1],          # L3
    [0, 1],          # L4
])

# Latin Hypercube Sampling
sampler = qmc.LatinHypercube(d=8)
X_sample = sampler.random(n=1000)
X_ro = qmc.scale(X_sample, bounds_robot[:, 0], bounds_robot[:, 1])
y_robot = robot_arm(X_ro).reshape(-1, 1)

# Normalize input
X_scaled_robot = MinMaxScaler().fit_transform(X_ro)

# Add Gaussian noise
y_tensor = torch.tensor(y_robot)
signal_var = torch.var(y_tensor)
noise = torch.normal(mean=0.0, std=torch.sqrt(0.1 * signal_var).item(), size=y_tensor.shape)
y_ro = (y_tensor + noise).numpy().ravel()


### piston data

In [4]:

random.seed(20250414)
torch.set_default_dtype(torch.float64)

# Piston Simulation Function
def piston_sim(x):
    M, S, V0, k, P0, Ta, T0 = [x[:, i] for i in range(7)]
    A = P0 * S**2 * V0 / T0 * (1 - T0 / Ta)
    C = 2 * np.pi * np.sqrt(M / (k + A))
    return C

# Input bounds
bounds_piston = np.array([
    [30, 60],          # M
    [0.005, 0.020],    # S
    [0.002, 0.010],    # V0
    [1000, 5000],      # k
    [90000, 110000],   # P0
    [290, 296],        # Ta
    [340, 360]         # T0
])

# Sampling
sampler = qmc.LatinHypercube(d=7)
X_sample = sampler.random(n=1000)
X_piston = qmc.scale(X_sample, bounds_piston[:, 0], bounds_piston[:, 1])
y_piston = piston_sim(X_piston).reshape(-1, 1)
X_pi = X_piston
# Normalize
X_scaled_piston = MinMaxScaler().fit_transform(X_piston)

# Add noise
y_tensor = torch.tensor(y_piston)
signal_var = torch.var(y_tensor)
noise = torch.normal(mean=0.0, std=torch.sqrt(0.1 * signal_var).item(), size=y_tensor.shape)
y_pi = (y_tensor + noise).numpy().ravel()


### self-defined training function

In [5]:
from sklearn.svm import SVR
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score

def clip(val, min_val, max_val):
    return max(min_val, min(val, max_val))

def rf_cv_mse(param_list, data_X, data_y):
    """
    param_list = [n_estimators, max_depth, min_samples_split, min_samples_leaf, max_features]
    All values in param_list can be floats.
    """
    if isinstance(param_list, torch.Tensor):
        param_list = param_list.tolist()    
    max_depth = clip(int(param_list[0]), 1, 50)
    min_samples_split = clip(int(round(param_list[1])), 2, 20)
    min_samples_leaf = clip(int(round(param_list[2])), 1, 20)
    max_features = clip(float(param_list[3]), 0.1, 1.0)

    model = RandomForestRegressor(
        n_estimators=500,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        max_features=max_features,
        random_state=42,
        n_jobs=-1
    )

    scores = cross_val_score(
        model, data_X, data_y, 
        scoring='neg_mean_squared_error', 
        cv=10
    )

    return -np.mean(scores)



def svr_cv_mse(param_list, data_X, data_y):
    """
    param_list = [C, epsilon, gamma]
    All values can be floats. 
    """
    if isinstance(param_list, torch.Tensor):
        param_list = param_list.tolist()
    C = float(param_list[0])
    epsilon = float(param_list[1])
    gamma = float(param_list[2])

    model = SVR(C=C, epsilon=epsilon, gamma=gamma)

    scores = cross_val_score(
        model, data_X, data_y,
        scoring='neg_mean_squared_error',
        cv=10
    )

    return -np.mean(scores)


def xgb_cv_mse(param_list, data_X, data_y):
    """
    param_list = [max_depth, learning_rate, subsample, colsample_bytree]
    """
    if isinstance(param_list, torch.Tensor):
        param_list = param_list.tolist()
    
    n_estimators = 500
    max_depth = int(round(param_list[0]))
    learning_rate = float(param_list[1])
    subsample = float(param_list[2])
    colsample_bytree = float(param_list[3])

    model = XGBRegressor(
        n_estimators=n_estimators,
        max_depth=max_depth,
        learning_rate=learning_rate,
        subsample=subsample,
        colsample_bytree=colsample_bytree,
        random_state=42,
        n_jobs=-1
    )

    scores = cross_val_score(model, data_X, data_y, scoring='neg_mean_squared_error', cv=10)
    return -np.mean(scores)

### bounds of the variables for each machine learning model

In [6]:
# -------------------------------
# Random Forest
bounds_rf = torch.tensor([
    [ -1,     2,     1,   0.1],   # Lower bounds
    [ 50,    20,    20,   1.0]    # Upper bounds
])
rf_params = [100, 10, 5, 2, 0.5]

# -------------------------------
# Support Vector Regressor (SVR)
bounds_svr = torch.tensor([
    [1e-2,  1e-4,   1e-4],     # Lower bounds [C, epsilon, gamma]
    [1e3,   1.0,    1.0]       # Upper bounds
])
svr_params = [10.0, 0.1, 0.01]

# -------------------------------
# XGBoost Regressor
bounds_xgb = torch.tensor([
    [ 1,    0.01,  0.5,  0.5],   # Lower bounds
    [ 10,    0.3,   1.0,  1.0]    # Upper bounds
])
xgb_params = [150, 6, 0.1, 0.8, 0.8]
# -------------------------------
# XGBoost Regressor
bounds_mlp = torch.tensor([
    [10,     1e-6,   1e-4],   # Lower bounds
    [500,    1e-1,   1e-1]    # Upper bounds
])
params = [100, 0.001, 0.01]

### description card for physical simulation data

In [7]:
ro_SVR = {
    'md_name': 'Support Vector Regression',
    'md_ndim': 3,
    'md_param': 'C: [0.01, 1000.0] (float)\n'
                'epsilon: [0.0001, 1.0] (float)\n'
                'gamma: [0.0001, 1.0] (float)',
    'data_nsamp': 1000,
    'data_nfeature': 8,
    'data_desc': ' The dataset models the position of a planar robotic arm consisting of four rotating joints and link lengths, computing the Euclidean distance of the arm’s endpoint from the origin.'}


ro_RF = {
    'md_name': 'Random Forest',
    'md_ndim': 4,
    'md_param': 'max_depth: [-1, 50] (int)\n'
                'min_samples_split: [2, 20] (int)\n'
                'min_samples_leaf: [1, 20] (int)\n'
                'max_features: [0.1, 1.0] (float)',
    'data_nsamp': 1000,
    'data_nfeature': 8,
    'data_desc': ' The dataset models the position of a planar robotic arm consisting of four rotating joints and link lengths, computing the Euclidean distance of the arm’s endpoint from the origin.'}


ro_XGB = {
    'md_name': 'XGBoost',
    'md_ndim': 4,
    'md_param': 'max_depth: [1, 10] (int)\n'
                'learning_rate: [0.01, 0.3] (float)\n'
                'subsample: [0.5, 1.0] (float)\n'
                'colsample_bytree: [0.5, 1.0] (float)',
    'data_nsamp': 1000,
    'data_nfeature': 8,
    'data_desc': ' The dataset models the position of a planar robotic arm consisting of four rotating joints and link lengths, computing the Euclidean distance of the arm’s endpoint from the origin.'}

##############################################

pi_SVR = {
    'md_name': 'Support Vector Regression',
    'md_ndim': 3,
    'md_param': 'C: [0.01, 1000.0] (float)\n'
                'epsilon: [0.0001, 1.0] (float)\n'
                'gamma: [0.0001, 1.0] (float)',
    'data_nsamp': 1000,
    'data_nfeature': 7,
    'data_desc': 'The dataset models the cycle time of a piston moving within a cylinder, based on seven physical input variables including mass, surface area, pressure, and temperature.'}


pi_RF = {
    'md_name': 'Random Forest',
    'md_ndim': 4,
    'md_param': 'max_depth: [-1, 50] (int)\n'
                'min_samples_split: [2, 20] (int)\n'
                'min_samples_leaf: [1, 20] (int)\n'
                'max_features: [0.1, 1.0] (float)',
    'data_nsamp': 1000,
    'data_nfeature': 7,
    'data_desc': 'The dataset models the cycle time of a piston moving within a cylinder, based on seven physical input variables including mass, surface area, pressure, and temperature.'}


pi_XGB = {
    'md_name': 'XGBoost',
    'md_ndim': 4,
    'md_param': 'max_depth: [1, 10] (int)\n'
                'learning_rate: [0.01, 0.3] (float)\n'
                'subsample: [0.5, 1.0] (float)\n'
                'colsample_bytree: [0.5, 1.0] (float)',
    'data_nsamp': 1000,
    'data_nfeature': 7,
    'data_desc': 'The dataset models the cycle time of a piston moving within a cylinder, based on seven physical input variables including mass, surface area, pressure, and temperature.'}



## ro+pi/svr+rf+xgb

In [8]:
import pickle
def loss(xx):
    return xgb_cv_mse(xx, X_pi, y_pi)
runner = LLMIBO_HPT(
    method='transient',
    bounds = bounds_xgb,
    objective = loss,
    dim = 4,
    desc = pi_XGB,
    T = 20,
    T_ini = 4,
    T_rep = 1
    )

histories, regrets = runner.run()
#raw_path = "/Users/chih-yuchang/Desktop/research/LLMIBO4AM/HPO_simulation_phy/XGB_pi_llambo_history.pkl"
#with open(raw_path, "wb") as f:
#    pickle.dump(histories, f)
#raw_path = "/Users/chih-yuchang/Desktop/research/LLMIBO4AM/HPO_simulation_phy/XGB_pi_llambo_regret.pkl"
#with open(raw_path, "wb") as f:
#    pickle.dump(regrets, f)

TRANSIENT:   0%|          | 0/1 [00:00<?, ?it/s]

LLM warmstarting response could not be parsed! Retrying...
LLM warmstarting response could not be parsed! Retrying...
LLM warmstarting response could not be parsed! Retrying...
LLM warmstarting response could not be parsed! Retrying...
LLM warmstarting response could not be parsed! Retrying...


TRANSIENT: 100%|██████████| 1/1 [00:44<00:00, 44.89s/it]
