# 加载模块

In [1]:
import sys
sys.path.append("..")
import pandas as pd
import numpy as np
from models.PortfolioModel import PortfolioModel
from tqdm import tqdm

# 读取tickers列表 - 8tickers

In [27]:
tickers = pd.read_csv('data/RawData/misc/test_tickers_Name.csv')
tickers = tickers['Ticker'].to_list() # 读取 Tickers

# 计算算术收益率

In [28]:
# 存储每个ETF的算术收益率
arith_ret_dict = {}

for ticker in tickers:
    file_path = f"data/FeatureData/{ticker}.csv"
    df = pd.read_csv(file_path, parse_dates=["Date"], index_col="Date")
    
    # 确保有 Close 列
    if "Close" not in df.columns:
        raise ValueError(f"{ticker}.csv 中缺少 Close 列")

    # 计算算术收益率
    df[f"{ticker}_return"] = df["Close"].pct_change()
    
    # 去除缺失值（第一个为NaN）
    arith_ret_dict[ticker] = df[[f"{ticker}_return"]].dropna()


In [29]:
# 将所有收益率按日期对齐合并
merged_returns = pd.concat(arith_ret_dict.values(), axis=1, join="inner")

# 打印前几行确认
print(merged_returns)
merged_returns.to_csv("data/DailyReturn/DailyReturn_8tickers.csv", index=True)
return_df = merged_returns


                     EEM_return  EFA_return  JPXN_return  SPY_return  \
Date                                                                   
2023-01-04 05:00:00    0.030089    0.013289    -0.011968    0.007720   
2023-01-05 05:00:00   -0.003048   -0.009985    -0.013670   -0.011413   
2023-01-06 05:00:00    0.020892    0.025591     0.023684    0.022932   
2023-01-09 05:00:00    0.007487    0.004256     0.001714   -0.000567   
2023-01-10 05:00:00    0.007184    0.002923    -0.001027    0.007013   
...                         ...         ...          ...         ...   
2024-12-24 05:00:00    0.003058    0.003572    -0.002554    0.011115   
2024-12-26 05:00:00   -0.003518    0.004746     0.010101    0.000067   
2024-12-27 05:00:00   -0.004472   -0.001443     0.005915   -0.010527   
2024-12-30 05:00:00   -0.008038   -0.006569    -0.006441   -0.011412   
2024-12-31 05:00:00   -0.003336    0.000000    -0.002114   -0.003638   

                     VTI_return  XLK_return  AGG_return  DBC_re

# 生成 Oracle

In [None]:
from models.PortfolioModel import PortfolioModelWithFee 
from tqdm import tqdm

returns = return_df.to_numpy()  # 每一行是某天的8个ETF的算术收益率
dates = return_df.index
num_assets = returns.shape[1]

# 初始化带手续费建模的优化器（设定手续费率 gamma）
opt_model = PortfolioModelWithFee(n_assets=num_assets, gamma=0.003)

oracle_weights = []
oracle_objectives = []

# 初始组合设为全0（即全现金）
prev_weight = [0.0] * num_assets

print("Solving for oracle weights with transaction cost...")
for r in tqdm(returns):
    w_star = opt_model.optimize(r, prev_weight)
    obj = float(np.dot(r, w_star) - opt_model.gamma * np.sum(np.abs(np.array(w_star) - np.array(prev_weight))))
    oracle_weights.append(w_star)
    oracle_objectives.append(obj)
    prev_weight = w_star  # 更新为下一日上期组合

# 保存为 CSV
oracle_df = pd.DataFrame(oracle_weights, index=dates, columns=return_df.columns)
oracle_df.to_csv("data/DailyOracle/oracle_weights_with_fee.csv")

pd.DataFrame(oracle_objectives, index=dates, columns=["oracle_objective_with_fee"])\
    .to_csv("data/DailyOracle/oracle_objectives_with_fee.csv")


Solving for oracle weights with transaction cost...


100%|██████████| 501/501 [00:00<00:00, 2956.92it/s]
