# Experimental setup

In [None]:
import pandas as pd
import numpy as np
import torch
import random

from dateutil.relativedelta import relativedelta
from DataPipeline.Dataloader import PortfolioDataset
from torch.utils.data import DataLoader
from DataPipeline.DataBuilder import build_dataset

from models.LinearInferencer import LinearPredictorTorch
from torch.optim import Adam

pd.options.display.float_format = '{:.6f}'.format
np.set_printoptions(precision=6, suppress=True)

tickers = ["EEM","EFA","JPXN","SPY","XLK",'VTI','AGG','DBC']

seed = 123

# 设置 Python 内建随机模块
random.seed(seed)

# 设置 NumPy 随机种子
np.random.seed(seed)

# 设置 PyTorch 的随机种子
torch.manual_seed(seed)

# 模型超参数
input_dim = 7         # 每个资产的特征数
num_assets = 8        # ETF 数量
hidden_dim = 32       # allocator 隐层宽度
epochs = 30           # 训练轮数
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# 读取数据

In [32]:
return_df = pd.DataFrame()
for ticker in tickers:
    file_path = f"data/FeatureData/{ticker}.csv"  # 不用 os.path
    df = pd.read_csv(file_path, parse_dates=["Date"])
    df["Date"] = pd.to_datetime(df["Date"]).dt.normalize()
    df = df.set_index("Date")["log_return"].rename(ticker)
    return_df = pd.concat([return_df, df], axis=1)
return_df

Unnamed: 0,EEM,EFA,JPXN,SPY,XLK,VTI,AGG,DBC
2023-01-03,0.029645,0.013201,-0.012040,0.007690,0.002593,0.008836,0.005520,-0.018034
2023-01-04,0.029645,0.013201,-0.012040,0.007690,0.002593,0.008836,0.005520,-0.018034
2023-01-05,-0.003053,-0.010035,-0.013764,-0.011479,-0.019612,-0.011782,-0.000816,-0.005091
2023-01-06,0.020677,0.025269,0.023408,0.022673,0.028874,0.021830,0.010857,0.005514
2023-01-09,0.007459,0.004247,0.001712,-0.000567,0.011558,0.000412,0.002520,0.011775
...,...,...,...,...,...,...,...,...
2024-12-24,0.003053,0.003566,-0.002558,0.011054,0.010280,0.010556,0.001137,0.005720
2024-12-26,-0.003524,0.004734,0.010050,0.000067,0.000665,0.000572,0.000723,-0.002856
2024-12-27,-0.004482,-0.001444,0.005898,-0.010582,-0.013384,-0.010890,-0.002067,0.004755
2024-12-30,-0.008070,-0.006591,-0.006462,-0.011477,-0.012798,-0.011113,0.003924,0.008973


# 训练

In [None]:
base_month = pd.to_datetime("2024-01-01")

for i in range(12):
    # 当前月份范围
    infer_start = base_month + relativedelta(months=i)
    infer_end = (infer_start + relativedelta(months=1)) - pd.Timedelta(days=1)
    train_start = infer_start - relativedelta(years=1)
    train_end = infer_start - pd.Timedelta(days=1)
    
    print(f"\n📅 第 {i+1} 次迭代：训练 {train_start.date()} ~ {train_end.date()}，推断 {infer_start.date()} ~ {infer_end.date()}")
    # 1. 训练数据
    features_df, labels_df = build_dataset(
        tickers=tickers,
        start_date=str(train_start.date()),
        end_date=str(train_end.date())
    )
    oracle_df = pd.read_csv("data/DailyOracle/oracle_weights_with_fee.csv", index_col=0)
    oracle_df.index = pd.to_datetime(oracle_df.index).normalize()
    features_df.index = pd.to_datetime(features_df.index).normalize()
    oracle_df = oracle_df.loc[features_df.index]
    labels_df = oracle_df.copy()
    dataset = PortfolioDataset(features_df, labels_df, num_assets=8)
    train_loader = DataLoader(dataset, batch_size=63, shuffle=True)
    # 2. 初始化模型
    predictor = LinearPredictorTorch(input_dim * num_assets, num_assets).to(device)
    allocator = FNNSoftmaxAllocator(num_assets, hidden_dim, num_assets).to(device)
    optimizer = Adam(list(predictor.parameters()) + list(allocator.parameters()), lr=1e-3)
    break


📅 第 1 次迭代：训练 2023-01-01 ~ 2023-12-31，推断 2024-01-01 ~ 2024-01-31
