In [1]:
import sys
import os
import torch
import matplotlib.pyplot as plt
import math
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tqdm import tqdm

import torch
from botorch.models import MultiTaskGP, KroneckerMultiTaskGP,  SingleTaskGP, ModelListGP
from botorch.fit import fit_gpytorch_mll
from gpytorch.mlls import ExactMarginalLogLikelihood, SumMarginalLogLikelihood
from botorch.optim import optimize_acqf
from botorch.sampling.normal import SobolQMCNormalSampler
from botorch.utils.multi_objective.hypervolume import Hypervolume
from botorch.utils.multi_objective.pareto import is_non_dominated
from botorch.acquisition.multi_objective import qExpectedHypervolumeImprovement
from botorch.acquisition.multi_objective.logei import qLogNoisyExpectedHypervolumeImprovement
from botorch.utils.multi_objective.box_decompositions.non_dominated import NondominatedPartitioning
from botorch.utils.multi_objective.box_decompositions.dominated import DominatedPartitioning
from botorch.utils.transforms import unnormalize, standardize
from pprint import pprint
from typing import Optional
from contextlib import redirect_stdout
import joblib

# 設定設備與型別
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dtype = torch.double
torch.set_default_dtype(dtype)

def format_multitask_data(X, Y):
    """
    將 X (N, D) 與 Y (N, M) 轉換為 MultiTaskGP 所需格式
    X_out: (N*M, D+1)
    Y_out: (N*M, 1)
    """
    N, D = X.shape
    M = Y.shape[1] # 目標維度，例如 100

    # 1. 處理 X: 將原始 X 重複 M 次 (區塊重複)
    # [x1, x2...xn] -> [x1, x2...xn, x1, x2...xn, ...]
    X_repeated = X.repeat(M, 1)

    # 2. 處理 Task Index: 建立 [0,0...0, 1,1...1, ..., M-1...M-1] 的索引
    # 這裡使用 repeat_interleave 確保每個索引對應一整組 N 個數據
    task_indices = torch.arange(M, device=X.device, dtype=X.dtype).repeat_interleave(N).unsqueeze(-1)

    # 3. 拼接 X 與 Task Index -> (N*M, D+1)
    full_X = torch.cat([X_repeated, task_indices], dim=-1)

    # 4. 處理 Y: 將 (N, M) 轉置後拉平為 (N*M, 1)
    # 注意：必須先轉置 (.t()) 確保順序與 task_indices 對齊
    full_Y = Y.t().reshape(-1, 1)

    return full_X, full_Y

def initialize_independent_gp(train_x, train_y):
    """
    train_x: (N, D)
    train_y: (N, M) -> M 是目標數量
    """
    num_objectives = train_y.shape[-1]
    models = []
    
    for i in range(num_objectives):
        # 提取第 i 個目標的數據
        target_y = train_y[:, i : i + 1] 
        
        # 為每個目標建立獨立的 SingleTaskGP
        # 這裡會自動為每個目標選擇最佳的 Kernel 參數
        model = SingleTaskGP(train_x, target_y,)
        models.append(model)
    
    # 使用 ModelListGP 將所有模型組合起來
    # 這樣採集函數（如 LogNEHVI）才能同時看到所有目標的預測
    model_list = ModelListGP(*models)
    
    # 對應的 MLL 也要使用 SumMarginalLogLikelihood
    mll = SumMarginalLogLikelihood(model_list.likelihood, model_list)
    
    return mll, model_list

# 計算超體積
def get_current_hv(train_Y, ref_point):
    # 1. 取得 Pareto Front
    pareto_y = train_Y[is_non_dominated(train_Y)]
    
    # 2. 初始化計算器 (注意大小寫，通常是 Hypervolume)
    hv_obj = Hypervolume(ref_point=ref_point)
    
    # 3. 計算並回傳
    return hv_obj.compute(pareto_y)



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
result_save_dir = '/workspaces/BO_EXPERIMENTS/src/results/20260204/mobo_lasso_generater'
os.makedirs(result_save_dir, exist_ok=True)

In [3]:
data_info_path = '/workspaces/BO_EXPERIMENTS/src/datasets/mt_lasso_dataset/interactive_term/data.pkl'
data_info = joblib.load(data_info_path)

n_iter = 300

total_x = data_info['initial_data']['X']
total_y = data_info['initial_data']['Y']


In [4]:
total_y.min(axis=0)

array([ -88.28661902, -310.14389428])

In [5]:
# 區分訓練與測試集
rd_idx = 0
random_state_ls = [1000, 523, 4456, 21]
for rd_idx in range(len(random_state_ls)):
    X_train, X_test, y_train, y_test = train_test_split(total_x, total_y, test_size=0.9, random_state=random_state_ls[rd_idx])

    train_x = torch.tensor(np.array(X_train), device=device)
    train_y = torch.tensor(np.array(y_train), device=device)
    test_x = torch.tensor(np.array(X_test), device=device)
    test_y = torch.tensor(np.array(y_test), device=device)

    print(train_x.shape)
    print(train_y.shape)

    # # 計算初始資料集的平均數與標準差
    # mean_train_y = torch.mean(train_y, dim=0)
    # std_train_y = torch.std(train_y, dim=0)
    # train_y_std = (train_y - mean_train_y) / std_train_y  # 標準化

    # 定義參考點
    # ref_point = train_y_std.min(dim=0).values - 1
    ref_point = torch.tensor([-15, -15], dtype=dtype, device=device)

    current_hvs = []
    t = tqdm(range(n_iter), ncols=80)
    for i in t:
        D= train_x.shape[-1]
        # train_y_std = (train_y - mean_train_y) / std_train_y  # 標準化

        
        std_train_x = train_x / 100 # 標準化 X
        noise = torch.randn_like(std_train_x) * 1e-6 # 加入維小雜訊以免程式一值跳出 NumericalWarning: A not p.d
        std_noise_train_x = std_train_x + noise

        # # 定義 model
        # model = KroneckerMultiTaskGP(noise_train_x, train_y_std) # 用這個 model 就可以不用 #, task_feature=-1
        # mll = ExactMarginalLogLikelihood(model.likelihood, model)
        # with open(os.devnull, 'w') as f:
        #     with redirect_stdout(f):
        #         fit_gpytorch_mll(mll)

        # 每個Y獨立定義 GP
        mll, model = initialize_independent_gp(std_noise_train_x, train_y)

        # 計算MSE
        with torch.no_grad():
            # 取得後驗分佈
            posterior = model.posterior(test_x / 100)
            mean = posterior.mean
            mse = torch.mean((test_y - mean)**2)

        # 初始化採樣器與採集函數
        sampler = SobolQMCNormalSampler(sample_shape=torch.Size([128]))

        # 4. 使用 qLogNoisyExpectedHypervolumeImprovement
        # 優點：不需要手動 partitioning，避開了那個維度報錯的 bug    
        acq_fun = qLogNoisyExpectedHypervolumeImprovement(
            model=model,
            ref_point=ref_point,
            X_baseline=std_noise_train_x, # 使用已有的點作為基準
            prune_baseline=True, # 自動篩選 Pareto 點，避免維度爆炸
            sampler=sampler
        )

        # set bound
        bounds = torch.zeros(2, D, device=device, dtype=dtype)
        bounds[1] = 1.

        # Set constraints
        constraints = [
            (
                torch.arange(D, device=device), # indices: X 的哪些維度要參與計算
                torch.ones(D, dtype=dtype, device=device), # coefficients: 這些維度的係數
                torch.tensor([1.0], device=device, dtype=dtype) # rhs: 等號右邊的值 (Sum = 1.0)
            )
        ]
        
        # 5. 優化
        std_candidate, _ = optimize_acqf(
            acq_function=acq_fun,
            bounds=bounds,
            equality_constraints=constraints,
            q=1,
            num_restarts=20,
            raw_samples=50
        )

        # 從 oracle function 用 candidate 取得新的資料
        candidate = std_candidate * 100 # 將 candidate 從標準化空間轉回原始空間
        new_x = pd.DataFrame(candidate.cpu().numpy(), columns=data_info['oracle_model']['data_cols'])
        new_y = data_info['oracle_model']['model_info']['PIPE'].predict(new_x) # 這裡給的 new_y 是尚未標準化的狀態
        new_y = torch.tensor(new_y, device=device)

        # concate 舊的資料
        train_x = torch.concat([train_x, candidate])
        train_y = torch.concat([train_y, new_y])

        # 計算目前所有資料的超體積 (在原始尺度下比較)
        current_hv = get_current_hv(train_y, ref_point)
        current_hvs.append(current_hv)

        message = {'MSE': mse.cpu().item(), 'HV': current_hv}

        t.set_postfix(**message)

    # 儲存 HV ls
    save_data = {
        'hv_ls': current_hvs,
        'datasplit_seed': random_state_ls[rd_idx]
    }
    result_save_path = os.path.join(result_save_dir, 'HVs_with_dataset_split_seed_{}.pkl'.format(random_state_ls[rd_idx]))
    joblib.dump(save_data, result_save_path)

torch.Size([100, 20])
torch.Size([100, 2])


Trying again with a new set of initial conditions.
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
Trying again with a new set of initial conditions.
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
Trying again with a new set of initial conditions.
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
Trying again with a new set of initial conditions.
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
Trying again with a new set of initial conditions.
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
Trying again with a new set of initial conditions.
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
Trying again with a new set of initial conditions.
  retur

torch.Size([100, 20])
torch.Size([100, 2])


Trying again with a new set of initial conditions.
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
Trying again with a new set of initial conditions.
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
Trying again with a new set of initial conditions.
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
Trying again with a new set of initial conditions.
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
Trying again with a new set of initial conditions.
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
Trying again with a new set of initial conditions.
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
Trying again with a new set of initial conditions.
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
Trying again with a new set of initial conditions.
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
Trying ag

torch.Size([100, 20])
torch.Size([100, 2])


Trying again with a new set of initial conditions.
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
Trying again with a new set of initial conditions.
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
Trying again with a new set of initial conditions.
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
Trying again with a new set of initial conditions.
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
Trying again with a new set of initial conditions.
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
Trying again with a new set of initial conditions.
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
Trying again with a new set of initial conditions.
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
  retur

torch.Size([100, 20])
torch.Size([100, 2])


Trying again with a new set of initial conditions.
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
Trying again with a new set of initial conditions.
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
Trying again with a new set of initial conditions.
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
Trying again with a new set of initial conditions.
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
Trying again with a new set of initial conditions.
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
Trying again with a new set of initial conditions.
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
Trying again with a new set of initial conditions.
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
  return _optimize_acqf_batch(opt_inputs=opt_inputs)
Trying again with a new set of initial conditions.
  return 

In [7]:
# # 根據存檔的超體積資訊繪圖
# plt.figure(figsize=(10, 5))
# plt.plot(current_hvs, marker='o', linestyle='-', color='b', label='Hyper V')

# # 加入標題與標籤
# plt.title('MOBO Hyper Volume')
# plt.xlabel('Iter')
# plt.ylabel('Volume')
# plt.grid(True, linestyle='--', alpha=0.7)
# plt.legend()

# # 4. 顯示圖表
# plt.show()