# Modules and Function

In [None]:
'''
Period

train: 2023-01-01 ~ 2023-12-31
predict: 2024-01-01 ~ 2024-01-31
'''
def objective(trial):
    import pandas as pd
    import numpy as np
    import torch
    from dateutil.relativedelta import relativedelta
    from DataPipeline.Dataloader import PortfolioDataset
    from torch.utils.data import DataLoader
    from DataPipeline.DataBuilder import build_dataset
    from models.PortfolioModel import PortfolioModel
    from pyepo.func.surrogate import SPOPlus
    from models.LinearInferencer import LinearPredictorTorch
    from torch.optim import Adam

    # ===== 超参数建议 =====
    lr = trial.suggest_float("lr", 1e-4, 1e-2, log=True)
    batch_size = trial.suggest_categorical("batch_size", [32, 64, 128])
    num_epochs = trial.suggest_int("num_epochs", 10, 50)

    tickers = ["EEM","EFA","JPXN","SPY","XLK",'VTI','AGG','DBC']
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    num_assets = len(tickers)

    # ===== 固定测试窗口（2024年1月） =====
    infer_start = pd.to_datetime("2024-01-01")
    train_start = infer_start - relativedelta(years=1)
    train_end = infer_start - pd.Timedelta(days=1)

    features_df, labels_df = build_dataset(
    tickers=tickers,
    data_dir="data/FeatureData",  # ✅ 添加这行
    start_date=str(train_start.date()),
    end_date=str(train_end.date())
)
    oracle_df = pd.read_csv("data/DailyOracle/oracle_weights_with_fee.csv", index_col=0)
    oracle_df.index = pd.to_datetime(oracle_df.index).normalize()
    features_df.index = pd.to_datetime(features_df.index).normalize()
    labels_df = oracle_df.loc[features_df.index]

    dataset = PortfolioDataset(features_df, labels_df, num_assets)
    train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

    # ===== 初始化模型、loss、优化器 =====
    input_dim = features_df.shape[1] // num_assets
    predictor = LinearPredictorTorch(input_dim * num_assets, num_assets).to(device)
    optmodel = PortfolioModel(n_assets=num_assets, budget=1.0)
    spo_loss_fn = SPOPlus(optmodel, processes=1, solve_ratio=1.0, reduction="mean")
    optimizer = Adam(predictor.parameters(), lr=lr)

    # ===== 训练函数（复用你的） =====
    def train_one_epoch(predictor, train_loader, optimizer, spo_loss_fn, optmodel, device):
        predictor.train()
        total_loss = 0.0
        for x_batch, c_true_batch in train_loader:
            x_batch = x_batch.to(device)
            c_true_batch = c_true_batch.to(device)
            for i in range(x_batch.size(0)):
                x_sample = x_batch[i].unsqueeze(0)
                c_true = c_true_batch[i]
                optimizer.zero_grad()
                c_hat = predictor(x_sample).squeeze(0)
                optmodel.setObj(c_true.detach().cpu().numpy())
                z_star_np, obj_val = optmodel.solve()
                z_star = torch.tensor(z_star_np, dtype=torch.float32, device=device).unsqueeze(0)
                true_obj = torch.tensor(obj_val, dtype=torch.float32, device=device).unsqueeze(0)
                c_hat = c_hat.unsqueeze(0)
                c_true = c_true.unsqueeze(0)
                loss = spo_loss_fn(c_hat, c_true, z_star, true_obj)
                loss.backward()
                optimizer.step()
                total_loss += loss.item()
        return total_loss / len(train_loader.dataset)

    # ===== 多轮训练 =====
    for epoch in range(num_epochs):
        loss = train_one_epoch(predictor, train_loader, optimizer, spo_loss_fn, optmodel, device)

    return loss  # 作为目标值，越低越好


# Optuna Train

In [5]:
import optuna

study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=10)

print("🎯 最佳参数：", study.best_params)

[I 2025-07-03 14:36:35,298] A new study created in memory with name: no-name-cb124f43-dd74-4cdc-89e4-dee46299f786


Num of cores: 1


[I 2025-07-03 14:36:40,455] Trial 0 finished with value: 4.1018861127665724 and parameters: {'lr': 0.0001999302950273606, 'batch_size': 128, 'num_epochs': 26}. Best is trial 0 with value: 4.1018861127665724.


Num of cores: 1


[I 2025-07-03 14:36:42,521] Trial 1 finished with value: 5.172201700478673 and parameters: {'lr': 0.0006099901066302167, 'batch_size': 128, 'num_epochs': 10}. Best is trial 0 with value: 4.1018861127665724.


Num of cores: 1


[I 2025-07-03 14:36:47,925] Trial 2 finished with value: 4.230513791003859 and parameters: {'lr': 0.0009298456932283319, 'batch_size': 128, 'num_epochs': 27}. Best is trial 0 with value: 4.1018861127665724.


Num of cores: 1


[I 2025-07-03 14:36:52,404] Trial 3 finished with value: 12.654983046064416 and parameters: {'lr': 0.0025032358712121475, 'batch_size': 32, 'num_epochs': 23}. Best is trial 0 with value: 4.1018861127665724.


Num of cores: 1


[I 2025-07-03 14:36:59,119] Trial 4 finished with value: 11.575815285066046 and parameters: {'lr': 0.002492868185194127, 'batch_size': 128, 'num_epochs': 35}. Best is trial 0 with value: 4.1018861127665724.


Num of cores: 1


[I 2025-07-03 14:37:06,314] Trial 5 finished with value: 3.130215089483912 and parameters: {'lr': 0.0006436301119994878, 'batch_size': 128, 'num_epochs': 38}. Best is trial 5 with value: 3.130215089483912.


Num of cores: 1


[I 2025-07-03 14:37:12,755] Trial 6 finished with value: 2.9549067183191995 and parameters: {'lr': 0.0001601444250269215, 'batch_size': 128, 'num_epochs': 33}. Best is trial 6 with value: 2.9549067183191995.


Num of cores: 1


[I 2025-07-03 14:37:16,602] Trial 7 finished with value: 12.2600032699156 and parameters: {'lr': 0.0033931760382936234, 'batch_size': 32, 'num_epochs': 19}. Best is trial 6 with value: 2.9549067183191995.


Num of cores: 1


[I 2025-07-03 14:37:22,119] Trial 8 finished with value: 6.109838998940097 and parameters: {'lr': 0.00011342021187501563, 'batch_size': 32, 'num_epochs': 27}. Best is trial 6 with value: 2.9549067183191995.


Num of cores: 1


[I 2025-07-03 14:37:28,989] Trial 9 finished with value: 4.768986621534968 and parameters: {'lr': 0.0011661201853356928, 'batch_size': 128, 'num_epochs': 34}. Best is trial 6 with value: 2.9549067183191995.


🎯 最佳参数： {'lr': 0.0001601444250269215, 'batch_size': 128, 'num_epochs': 33}


# Optuna Visualization

In [6]:
import optuna.visualization as vis
import plotly
# 1️⃣ 优化过程中的 loss 趋势
fig1 = vis.plot_optimization_history(study)
fig1.show()

# 2️⃣ 超参数的重要性分析
fig2 = vis.plot_param_importances(study)
fig2.show()

# 3️⃣ 平行坐标图（超参联合影响）
fig3 = vis.plot_parallel_coordinate(study)
fig3.show()

# 4️⃣ 轮廓图（两个超参 vs loss）
fig4 = vis.plot_contour(study)
fig4.show()