#### 讀檔

In [16]:
import os
import pandas as pd

def load_all_stock_data(folder_path):
    all_data = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".csv"):
            path = os.path.join(folder_path, filename)
            tic = filename.replace("converted_", "").replace(".csv", "")
            df = pd.read_csv(path)
            df['tic'] = tic  # 加上股票代碼欄位
            all_data.append(df)
    return pd.concat(all_data, ignore_index=True)

# 📂 資料夾路徑
folder_path = "E:\python_project\class\Reinforce_Learning\RL\code\converted_stock"

# 📈 載入所有股票資料
raw_df = load_all_stock_data(folder_path)

# # 🔍 查看資料概況
# print(raw_df.shape)
# print(raw_df[['date', 'tic']].drop_duplicates().head())


#### 補值

In [17]:
import pandas as pd
import numpy as np

def interpolate_stock_data(df, start_date="2020-01-01", end_date="2024-10-30"):
    # 建立完整日期範圍（僅工作日）
    full_dates = pd.date_range(start=start_date, end=end_date, freq='B')  # 'B' 是 business day

    result_list = []

    for tic in df['tic'].unique():
        sub_df = df[df['tic'] == tic].copy()
        sub_df['date'] = pd.to_datetime(sub_df['date'])

        # 設定 index 為日期後 reindex
        sub_df = sub_df.set_index('date').reindex(full_dates)
        sub_df['tic'] = tic  # 保留股票代碼欄位
        sub_df.index.name = 'date'

        # 線性補值（針對數值欄位）
        num_cols = ['open', 'high', 'low', 'close', 'volume']
        for col in num_cols:
            if col in sub_df.columns:
                sub_df[col] = sub_df[col].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')

        result_list.append(sub_df.reset_index())

    # 合併所有股票資料
    full_df = pd.concat(result_list, ignore_index=True)
    return full_df

# 📈 執行補值
interpolated_df = interpolate_stock_data(raw_df)

# 🔍 檢查結果
print(interpolated_df.shape)
print(interpolated_df.head())


(12610, 7)
        date       open       high        low      close     volume      tic
0 2020-01-01  82.237405  83.042405  82.237405  82.745827  4882015.0  0050.TW
1 2020-01-02  82.237405  83.042405  82.237405  82.745827  4882015.0  0050.TW
2 2020-01-03  83.296619  83.635563  82.195034  82.745827  6813547.0  0050.TW
3 2020-01-06  82.237408  82.279772  81.686615  81.686615  9321768.0  0050.TW
4 2020-01-07  81.728982  81.940825  80.839246  81.432404  6328602.0  0050.TW


  sub_df[col] = sub_df[col].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  sub_df[col] = sub_df[col].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  sub_df[col] = sub_df[col].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  sub_df[col] = sub_df[col].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  sub_df[col] = sub_df[col].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  sub_df[col] = sub_df[col].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  sub_df[col] = sub_df[col].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  sub_df[col] = sub_df[col].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  sub_df[col] = sub_df[col].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  sub_df[col] = sub_df[col].interpolate(method='linear').fillna(method='b

#### DTW分群

In [18]:
from sklearn.preprocessing import MinMaxScaler
from tslearn.metrics import cdist_dtw
from pyclustering.cluster.kmedoids import kmedoids
import matplotlib.pyplot as plt
import matplotlib.cm as cm

In [19]:
def calculate_log_return(df):
    df['date'] = pd.to_datetime(df['date'])
    result = {}

    for tic in df['tic'].unique():
        sub_df = df[df['tic'] == tic].copy().set_index('date').sort_index()
        sub_df['log_return'] = np.log(sub_df['close'] / sub_df['close'].shift(1))
        result[tic] = sub_df['log_return'].dropna()

    return pd.DataFrame(result)


In [20]:
def calculate_log_return(df):
    df['date'] = pd.to_datetime(df['date'])
    result = {}

    for tic in df['tic'].unique():
        sub_df = df[df['tic'] == tic].copy().set_index('date').sort_index()
        sub_df['log_return'] = np.log(sub_df['close'] / sub_df['close'].shift(1))
        result[tic] = sub_df['log_return'].dropna()

    return pd.DataFrame(result)


In [21]:

def dtw_cluster(log_return_df, k=3):
    series_array = log_return_df.T.values[..., np.newaxis]
    dist_matrix = cdist_dtw(series_array)
    initial_medoids = list(range(k))

    kmedoids_instance = kmedoids(dist_matrix, initial_medoids, data_type='distance_matrix', ccore=False)
    kmedoids_instance.process()
    clusters = kmedoids_instance.get_clusters()

    label_map = {}
    stock_list = list(log_return_df.columns)
    for i, cluster in enumerate(clusters):
        for idx in cluster:
            label_map[stock_list[idx]] = i
    return label_map


In [22]:
def plot_clusters(log_return_df, cluster_labels):
    grouped_stocks = {}
    for stock, group in cluster_labels.items():
        grouped_stocks.setdefault(group, []).append(stock)

    for group_id, stocks in grouped_stocks.items():
        plt.figure(figsize=(12, 4))
        for stock in stocks:
            plt.plot(log_return_df.index, log_return_df[stock], label=stock)
        plt.title(f"group {group_id}:no.{len(stocks)} ")
        plt.xlabel("date")
        plt.ylabel("normalization Log Return")
        plt.legend(loc='upper right')
        plt.grid(True)
        plt.tight_layout()
        plt.show()


In [23]:
# 讀取資料夾中所有股票 CSV
def load_all_stock_data(folder_path):
    all_data = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".csv"):
            path = os.path.join(folder_path, filename)
            tic = filename.replace("converted_", "").replace(".csv", "")
            df = pd.read_csv(path)
            df['tic'] = tic
            all_data.append(df)
    return pd.concat(all_data, ignore_index=True)

# 讀取 + 補值
folder_path = "E:\python_project\class\Reinforce_Learning\RL\code\converted_stock"
raw_df = load_all_stock_data(folder_path)
interpolated_df = interpolate_stock_data(raw_df)

# 計算 log return 並切割訓練集（只用到 2023-12-31）
log_return_df = calculate_log_return(interpolated_df)
train_log_return_df = log_return_df[log_return_df.index <= '2023-12-31']

# 正規化並分群
cluster_labels = dtw_cluster(train_log_return_df, k=6)

# 畫出分群結果
plot_clusters(train_log_return_df, cluster_labels)


  sub_df[col] = sub_df[col].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  sub_df[col] = sub_df[col].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  sub_df[col] = sub_df[col].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  sub_df[col] = sub_df[col].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  sub_df[col] = sub_df[col].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  sub_df[col] = sub_df[col].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  sub_df[col] = sub_df[col].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  sub_df[col] = sub_df[col].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  sub_df[col] = sub_df[col].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
  sub_df[col] = sub_df[col].interpolate(method='linear').fillna(method='b

### train model

In [24]:
import os
from finrl.agents.stablebaselines3.models import DRLAgent
from stable_baselines3 import PPO
from finrl.meta.env_stock_trading.env_stocktrading_np import StockTradingEnv
import numpy as np

#### 訓練

##### 環境

In [25]:
import gym
import numpy as np
from gym import spaces

class CustomStockTradingEnv(gym.Env):
    def __init__(self, config):
        super(CustomStockTradingEnv, self).__init__()

        self.price_array = config['price_array']
        self.tech_array = config['tech_array']
        self.turbulence_array = config['turbulence_array']
        self.initial_amount = config.get('initial_amount', 1e6)
        self.buy_cost_pct = config.get('buy_cost_pct', 0.001)
        self.sell_cost_pct = config.get('sell_cost_pct', 0.001)
        self.reward_scaling = config.get('reward_scaling', 1e-4)
        self.if_train = config.get('if_train', True)
        self.max_stock = config.get('max_stock', 1e4)
        self.min_trade_unit = config.get('min_trade_unit', 1)

        self.day = 0
        self.data_length, self.stock_dim = self.price_array.shape

        self.action_space = spaces.Box(low=-1, high=1, shape=(self.stock_dim,), dtype=np.float32)
        obs_dim = 1 + self.stock_dim + self.price_array.shape[1] + self.tech_array.shape[1]
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(obs_dim,), dtype=np.float32)



        self.reset()

    def reset(self):
        self.day = 0
        self.amount = self.initial_amount
        self.stocks = np.zeros(self.stock_dim, dtype=np.float32)
        self.total_asset = self.amount
        self.asset_memory = [self.total_asset]
        return self._get_observation()

    def step(self, actions):
        actions = actions * self.max_stock
        actions = np.round(actions / self.min_trade_unit) * self.min_trade_unit

        begin_total_asset = self._calculate_total_asset()
        prices = self.price_array[self.day]

        # 停損邏輯（賣出前一天跌超過 10%）
        if self.day > 0:
            prev_prices = self.price_array[self.day - 1]
            price_change = (prices - prev_prices) / prev_prices
            for i in range(self.stock_dim):
                if price_change[i] < -0.10 and self.stocks[i] > 0:
                    # 強制停損
                    slippage = np.random.normal(0, 0.005) if self.if_train else 0.0
                    traded_price = prices[i] * (1 + slippage)
                    proceeds = self.stocks[i] * traded_price * (1 - self.sell_cost_pct)
                    self.amount += proceeds
                    self.stocks[i] = 0

        for i in range(self.stock_dim):
            action = actions[i]
            price = prices[i]
            if np.isnan(price) or price < 1e-6:
                continue

            slippage = np.random.normal(0, 0.005) if self.if_train else 0.0
            traded_price = price * (1 + slippage)

            # Buy
            if action > 0:
                max_buyable = min(
                    (self.amount // (traded_price * (1 + self.buy_cost_pct))),
                    self.max_stock - self.stocks[i]
                )
                trade_amount = min(action, max_buyable)
                cost = trade_amount * traded_price * (1 + self.buy_cost_pct)
                self.amount -= cost
                self.stocks[i] += trade_amount

            # Sell
            elif action < 0:
                trade_amount = min(-action, self.stocks[i])
                proceeds = trade_amount * traded_price * (1 - self.sell_cost_pct)
                self.amount += proceeds
                self.stocks[i] -= trade_amount

        self.day += 1
        done = self.day >= self.data_length - 1

        end_total_asset = self._calculate_total_asset()
        reward = (end_total_asset - begin_total_asset) * self.reward_scaling
        self.total_asset = end_total_asset
        self.asset_memory.append(self.total_asset)

        # ✅ 每日資產增幅過大時，強制 done
        daily_return = (end_total_asset - begin_total_asset) / begin_total_asset
        if daily_return > 1.0:  # 超過 +100%
            print("⚠️ Daily return too high, forcing stop.")
            done = True

        # ✅ 最大回撤限制（-30%）
        peak = max(self.asset_memory[:-1]) if len(self.asset_memory) > 1 else self.initial_amount
        drawdown = (end_total_asset - peak) / peak
        if drawdown < -0.3:
            print("⚠️ Max drawdown triggered, forcing stop.")
            done = True

        return self._get_observation(), reward, done, {}

    def _get_observation(self):
        obs = np.concatenate([
            [self.amount],
            self.stocks,
            self.price_array[self.day],
            self.tech_array[self.day].flatten()
        ])
        return obs

    def _calculate_total_asset(self):
        return self.amount + np.sum(self.stocks * self.price_array[self.day])

    def render(self, mode='human'):
        print(f"day {self.day}, total_asset: {self.total_asset}")

###### veraion 1

In [26]:
# # 建立強化學習環境，支援單支或多支股票
# def create_env_for_stock_np(df, stock_tic, indicators, initial_amount=1e6, if_train=True):
#     if isinstance(stock_tic, str):
#         stock_list = [stock_tic]
#     else:
#         stock_list = stock_tic

#     df = df[df['tic'].isin(stock_list)].copy()
#     df = df.sort_values(['date', 'tic']).reset_index(drop=True)
#     df[indicators] = df[indicators].fillna(0)

#     unique_dates = sorted(df['date'].unique())
#     price_array = df.pivot(index='date', columns='tic', values='close').reindex(unique_dates).values

#     # 技術指標 flatten
#     tech_features = []
#     for date in unique_dates:
#         date_data = df[df['date'] == date]
#         row = []
#         for tic in stock_list:
#             sub = date_data[date_data['tic'] == tic]
#             if not sub.empty:
#                 row.extend(sub[indicators].values.flatten().tolist())
#             else:
#                 row.extend([0.0] * len(indicators))
#         tech_features.append(row)
#     tech_array = np.array(tech_features)

#     turbulence_array = np.zeros(len(price_array))

#     env = CustomStockTradingEnv(
#         config={
#             "price_array": price_array,
#             "tech_array": tech_array,
#             "turbulence_array": turbulence_array,
#             "if_add_price": True,
#             "if_add_tech": True,
#             "if_add_turbulence": False,
#             "risk_indicator_col": "turbulence",
#             "initial_amount": initial_amount,
#             "buy_cost_pct": 0.001,
#             "sell_cost_pct": 0.001,
#             "reward_scaling": 1e-4,
#             "if_train": if_train
#         }
#     )
#     return env

###### veraion 2

In [27]:
def create_env_for_stock_np(
    df,
    stock_tic,
    indicators,
    initial_amount=1e6,
    if_train=True,
    max_stock=1e4,
    slippage_pct=0.005,  # 滑價 ±0.5%
    min_trade_unit=1     # 最小交易單位（整數）
):

    # ✅ 選擇群內所有股票（多支）
    df = df[df['tic'].isin(stock_tic)].copy()
    df = df.sort_values(['date', 'tic']).reset_index(drop=True)

    # 補值處理
    df[indicators] = df[indicators].fillna(0)

    unique_dates = sorted(df['date'].unique())

    # 🎯 建構 Numpy 格式
    price_array = df.pivot(index='date', columns='tic', values='close').reindex(unique_dates).values
    tech_array = df.pivot(index='date', columns='tic', values=indicators).reindex(unique_dates).values
    turbulence_array = np.zeros(len(unique_dates))  # 無 turbulence 就全零

    # ✅ 實作滑價邏輯（train 才加隨機 slippage）
    if if_train:
        rng = np.random.default_rng()
        price_array = price_array * (1 + rng.normal(loc=0.0, scale=slippage_pct, size=price_array.shape))

    # ✅ 將價格為負數/零的異常點清掉
    price_array = np.clip(price_array, a_min=1e-3, a_max=None)

    # ✅ 整合環境
    env = CustomStockTradingEnv(
        config={
            "price_array": price_array,
            "tech_array": tech_array,
            "turbulence_array": turbulence_array,
            "if_add_price": True,
            "if_add_tech": True,
            "if_add_turbulence": False,
            "risk_indicator_col": "turbulence",
            "initial_amount": initial_amount,
            "buy_cost_pct": 0.001,
            "sell_cost_pct": 0.001,
            "reward_scaling": 1e-4,
            "if_train": if_train,
            "max_stock": max_stock,             # ✅ 限制最大倉位
            "min_trade_unit": min_trade_unit    # ✅ 整數交易限制
        }
    )

    return env


##### 分群訓練

In [28]:
# 訓練特定群組的模型（該群所有股票）
def train_model_for_cluster(cluster_id, cluster_labels, df, indicators, model_dir):
    group_stocks = [tic for tic, g in cluster_labels.items() if g == cluster_id]
    print(f"🧠 訓練群 {cluster_id}：{group_stocks}")

    # 訓練資料：2021 ~ 2023
    train_data = df[(df['date'] >= '2020-01-01') & (df['date'] <= '2023-12-31')]
    
    env = create_env_for_stock_np(train_data, stock_tic=group_stocks, indicators=indicators, if_train=True)
    agent = DRLAgent(env=env)
    model = agent.get_model("ppo")
    trained_model = agent.train_model(model=model, tb_log_name=f"ppo_cluster_{cluster_id}", total_timesteps=300_000)

    model_path = os.path.join(model_dir, f"ppo_cluster_{cluster_id}.zip")
    trained_model.save(model_path)
    print(f"✅ 模型儲存於：{model_path}")
    return model_path

# 批次訓練所有群組模型
def train_all_models(cluster_labels, df, indicators, model_dir):
    for cluster_id in sorted(set(cluster_labels.values())):
        train_model_for_cluster(
            cluster_id=cluster_id,
            cluster_labels=cluster_labels,
            df=df,
            indicators=indicators,
            model_dir=model_dir
        )

In [29]:
from stockstats import StockDataFrame

def add_technical_indicators(df, indicators=["macd", "rsi_30", "cci_30", "wr_14"]):
    df_list = []
    for tic in df['tic'].unique():
        sub_df = df[df['tic'] == tic].copy()
        sub_df = sub_df.sort_values("date")
        stock = StockDataFrame.retype(sub_df)

        for indicator in indicators:
            try:
                sub_df[indicator] = stock[indicator]
            except Exception as e:
                print(f"⚠️ {tic} 無法計算 {indicator}：{e}")

        if 'date' not in sub_df.columns:
            sub_df['date'] = sub_df.index

        df_list.append(sub_df)

    return pd.concat(df_list).reset_index(drop=True)


In [30]:
# 參數
model_dir = "E:\python_project\class\Reinforce_Learning\RL\code\model"
indicators = ["macd", "rsi_30", "cci_30", "wr_14"]

interpolated_df = add_technical_indicators(interpolated_df, indicators=indicators)

# 訓練所有模型（每群一個）
train_all_models(cluster_labels=cluster_labels, df=interpolated_df, indicators=indicators, model_dir=model_dir)

# 測試指定股票（如 1101.TW）



🧠 訓練群 0：['1402.TW']
{'n_steps': 2048, 'ent_coef': 0.01, 'learning_rate': 0.00025, 'batch_size': 64}
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




------------------------------------
| rollout/           |             |
|    ep_len_mean     | 1.04e+03    |
|    ep_rew_mean     | -3.49       |
| time/              |             |
|    fps             | 3146        |
|    iterations      | 1           |
|    time_elapsed    | 0           |
|    total_timesteps | 2048        |
| train/             |             |
|    reward          | -0.07114006 |
------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.04e+03     |
|    ep_rew_mean          | -7.3         |
| time/                   |              |
|    fps                  | 1948         |
|    iterations           | 2            |
|    time_elapsed         | 2            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0024732542 |
|    clip_fraction        | 0.0187       |
|    clip_range           | 0.2         



----------------------------------
| rollout/           |           |
|    ep_len_mean     | 1.04e+03  |
|    ep_rew_mean     | 41.5      |
| time/              |           |
|    fps             | 3052      |
|    iterations      | 1         |
|    time_elapsed    | 0         |
|    total_timesteps | 2048      |
| train/             |           |
|    reward          | -0.778896 |
----------------------------------
⚠️ Max drawdown triggered, forcing stop.
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 1.03e+03    |
|    ep_rew_mean          | 10.7        |
| time/                   |             |
|    fps                  | 1897        |
|    iterations           | 2           |
|    time_elapsed         | 2           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.001979369 |
|    clip_fraction        | 0.00947     |
|    clip_range           | 0.2    



⚠️ Max drawdown triggered, forcing stop.
------------------------------------
| rollout/           |             |
|    ep_len_mean     | 984         |
|    ep_rew_mean     | -14.8       |
| time/              |             |
|    fps             | 2806        |
|    iterations      | 1           |
|    time_elapsed    | 0           |
|    total_timesteps | 2048        |
| train/             |             |
|    reward          | -0.16409145 |
------------------------------------
⚠️ Max drawdown triggered, forcing stop.
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 936         |
|    ep_rew_mean          | -17.8       |
| time/                   |             |
|    fps                  | 1851        |
|    iterations           | 2           |
|    time_elapsed         | 2           |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.005134033 |
|    clip_fr



---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.04e+03 |
|    ep_rew_mean     | -13.4    |
| time/              |          |
|    fps             | 2999     |
|    iterations      | 1        |
|    time_elapsed    | 0        |
|    total_timesteps | 2048     |
| train/             |          |
|    reward          | 0.0      |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.04e+03     |
|    ep_rew_mean          | -10.3        |
| time/                   |              |
|    fps                  | 1785         |
|    iterations           | 2            |
|    time_elapsed         | 2            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0050875493 |
|    clip_fraction        | 0.0176       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.42



-------------------------------------
| rollout/           |              |
|    ep_len_mean     | 1.04e+03     |
|    ep_rew_mean     | -4.3         |
| time/              |              |
|    fps             | 2727         |
|    iterations      | 1            |
|    time_elapsed    | 0            |
|    total_timesteps | 2048         |
| train/             |              |
|    reward          | 0.0011791363 |
-------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 1.04e+03     |
|    ep_rew_mean          | -4.69        |
| time/                   |              |
|    fps                  | 1677         |
|    iterations           | 2            |
|    time_elapsed         | 2            |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0063019954 |
|    clip_fraction        | 0.0241       |
|    clip_range           | 



⚠️ Max drawdown triggered, forcing stop.
⚠️ Max drawdown triggered, forcing stop.
⚠️ Max drawdown triggered, forcing stop.
⚠️ Max drawdown triggered, forcing stop.
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 355       |
|    ep_rew_mean     | -30.7     |
| time/              |           |
|    fps             | 3000      |
|    iterations      | 1         |
|    time_elapsed    | 0         |
|    total_timesteps | 2048      |
| train/             |           |
|    reward          | 0.7979868 |
----------------------------------
⚠️ Max drawdown triggered, forcing stop.
⚠️ Max drawdown triggered, forcing stop.
⚠️ Max drawdown triggered, forcing stop.
⚠️ Max drawdown triggered, forcing stop.
⚠️ Max drawdown triggered, forcing stop.
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 455         |
|    ep_rew_mean          | -28         |
| time/                   |             |
|

#### 測試

In [50]:
def test_model_by_tic(cluster_labels, test_tic, df, indicators, model_dir, initial_amount=1e6):
    from stable_baselines3 import PPO
    from stable_baselines3.common.vec_env import DummyVecEnv
    import matplotlib.pyplot as plt
    import numpy as np
    import pandas as pd
    import os

    if test_tic not in cluster_labels:
        raise ValueError(f"{test_tic} not found in cluster_labels.")

    group_id = cluster_labels[test_tic]
    model_path = os.path.join(model_dir, f"ppo_cluster_{group_id}.zip")
    print(f"📁 Using model for cluster {group_id}: {model_path}")

    group_stocks = [tic for tic, g in cluster_labels.items() if g == group_id]
    test_data = df[
        (df['date'] >= '2024-01-01') &
        (df['date'] <= '2024-10-30') &
        (df['tic'].isin(group_stocks))
    ].copy()

    def make_env():
        return create_env_for_stock_np(test_data, stock_tic=group_stocks, indicators=indicators, if_train=False)

    vec_env = DummyVecEnv([make_env])
    raw_env = vec_env.envs[0]

    model = PPO.load(model_path, device="auto")
    model.set_env(vec_env)

    obs = vec_env.reset()
    asset_history = []
    date_memory = sorted(test_data['date'].unique())
    step = 0

    # 為每個股票準備 log list
    stock_logs = {tic: [] for tic in group_stocks}

    while True:
        action, _ = model.predict(obs)
        obs, reward, done, info = vec_env.step(action)

        valid_day = max(raw_env.day - 1, 0)
        valid_day = min(valid_day, len(raw_env.price_array) - 1)
        prices = raw_env.price_array[valid_day]
        date = date_memory[valid_day]
        total_asset = raw_env.amount + (prices * raw_env.stocks).sum()
        asset_history.append(total_asset)

        for i, tic in enumerate(group_stocks):
            stock_logs[tic].append({
                "date": date,
                "stock": tic,
                "price": prices[i],
                "action": float(action[0][i]),
                "stock_holding": float(raw_env.stocks[i]),
                "amount": float(raw_env.amount),
                "total_asset": float(total_asset),
                "reward": float(reward[0]),
                "done": bool(done[0]),
                **{
                    f"indicator_{ind}": float(raw_env.tech_array[valid_day, j] if raw_env.tech_array.ndim == 2 else raw_env.tech_array[valid_day, i, j])
                    for j, ind in enumerate(indicators)
                }

            })

        if done:
            print("🧪 Final step info:")
            print(f"  step: {step}")
            print(f"  amount: {raw_env.amount}")
            print(f"  price_array: {prices}")
            print(f"  stocks: {raw_env.stocks}")
            break

        step += 1

    # ✅ 儲存 log：每支股票一檔 CSV
    log_dir = "E:/python_project/class/Reinforce_Learning/RL/code/log"
    os.makedirs(log_dir, exist_ok=True)
    for tic, logs in stock_logs.items():
        log_df = pd.DataFrame(logs)
        log_path = os.path.join(log_dir, f"{tic}_log.csv")
        log_df.to_csv(log_path, index=False)
        print(f"📝 Log saved to: {log_path}")

    # 繪圖與績效分析
    asset_memory = np.array(asset_history)
    date_memory = date_memory[:len(asset_memory)]

    if len(asset_memory) >= 2 and asset_memory[-1] < asset_memory[-2] * 0.5:
        print("⚠️ Final asset drop detected, removing last point.")
        asset_memory = asset_memory[:-1]
        date_memory = date_memory[:-1]

    final_value = asset_memory[-1]
    returns = asset_memory / initial_amount - 1
    peak = np.maximum.accumulate(asset_memory)
    drawdown = (asset_memory - peak) / peak
    max_drawdown = drawdown.min()
    daily_returns = np.diff(asset_memory) / asset_memory[:-1]
    sharpe_ratio = np.mean(daily_returns) / np.std(daily_returns) * np.sqrt(252) if np.std(daily_returns) > 0 else np.nan

    plt.figure(figsize=(10, 4))
    plt.plot(date_memory, asset_memory, label="Portfolio Value")
    plt.xlabel("Date")
    plt.ylabel("Asset Value")
    plt.title(f"Backtest of cluster {group_id} (target: {test_tic})")
    plt.grid(True)
    plt.tight_layout()
    plt.legend()
    plt.xticks(rotation=45)

    output_path = f"E:/python_project/class/Reinforce_Learning/RL/code/result_image/{test_tic}.png"
    plt.savefig(output_path)
    print(f"🖼️ Backtest chart saved to: {output_path}")
    plt.show()

    print(f"📌 Final Portfolio Value: ${final_value:,.2f}")
    print(f"📈 Cumulative Return: {returns[-1]*100:.2f}%")
    print(f"📉 Max Drawdown: {max_drawdown*100:.2f}%")
    print(f"📊 Sharpe Ratio: {sharpe_ratio:.2f}")
    print("Asset history (tail):", asset_memory[-5:])


In [51]:
my_stocks = ["0050.TW", "1101.TW", "1102.TW", "1301.TW", "1303.TW",
             "1326.TW", "1402.TW", "1722.TW", "2002.TW", "2301.TW"]

def test_multiple_stocks_by_tic_list(tic_list, cluster_labels, df, indicators, model_dir, initial_amount=1e6):
    import traceback

    for test_tic in tic_list:
        print(f"\n======================")
        print(f"📊 Testing {test_tic}")
        print(f"======================")
        try:
            test_model_by_tic(
                cluster_labels=cluster_labels,
                test_tic=test_tic,
                df=df,
                indicators=indicators,
                model_dir=model_dir,
                initial_amount=initial_amount
            )
        except Exception as e:
            print(f"❌ Error testing {test_tic}: {e}")
            traceback.print_exc()

test_multiple_stocks_by_tic_list(
    tic_list=my_stocks,
    cluster_labels=cluster_labels,
    df=interpolated_df,
    indicators=indicators,
    model_dir=model_dir
)



📊 Testing 0050.TW
📁 Using model for cluster 5: E:\python_project\class\Reinforce_Learning\RL\code\model\ppo_cluster_5.zip
🧪 Final step info:
  step: 216
  amount: 1000000.0
  price_array: [129.63144684  77.88438416  65.64662552  60.69433212]
  stocks: [0. 0. 0. 0.]
📝 Log saved to: E:/python_project/class/Reinforce_Learning/RL/code/log\0050.TW_log.csv
📝 Log saved to: E:/python_project/class/Reinforce_Learning/RL/code/log\1301.TW_log.csv
📝 Log saved to: E:/python_project/class/Reinforce_Learning/RL/code/log\1303.TW_log.csv
📝 Log saved to: E:/python_project/class/Reinforce_Learning/RL/code/log\1326.TW_log.csv




🖼️ Backtest chart saved to: E:/python_project/class/Reinforce_Learning/RL/code/result_image/0050.TW.png
📌 Final Portfolio Value: $1,000,000.00
📈 Cumulative Return: 0.00%
📉 Max Drawdown: -34.81%
📊 Sharpe Ratio: 0.27
Asset history (tail): [1502663.194841   1493463.85604208 1505729.283816   1503049.72405495
 1000000.        ]

📊 Testing 1101.TW
📁 Using model for cluster 2: E:\python_project\class\Reinforce_Learning\RL\code\model\ppo_cluster_2.zip
🧪 Final step info:
  step: 216
  amount: 1000000.0
  price_array: [33.75818634 39.33778191]
  stocks: [0. 0.]
📝 Log saved to: E:/python_project/class/Reinforce_Learning/RL/code/log\1101.TW_log.csv
📝 Log saved to: E:/python_project/class/Reinforce_Learning/RL/code/log\1102.TW_log.csv


  plt.show()


🖼️ Backtest chart saved to: E:/python_project/class/Reinforce_Learning/RL/code/result_image/1101.TW.png
📌 Final Portfolio Value: $1,000,000.00
📈 Cumulative Return: 0.00%
📉 Max Drawdown: -7.10%
📊 Sharpe Ratio: 0.05
Asset history (tail): [1059529.34982137 1065029.34219197 1064029.35745076 1064029.35745076
 1000000.        ]

📊 Testing 1102.TW
📁 Using model for cluster 2: E:\python_project\class\Reinforce_Learning\RL\code\model\ppo_cluster_2.zip
🧪 Final step info:
  step: 216
  amount: 1000000.0
  price_array: [33.75818634 39.33778191]
  stocks: [0. 0.]
📝 Log saved to: E:/python_project/class/Reinforce_Learning/RL/code/log\1101.TW_log.csv
📝 Log saved to: E:/python_project/class/Reinforce_Learning/RL/code/log\1102.TW_log.csv


  plt.show()


🖼️ Backtest chart saved to: E:/python_project/class/Reinforce_Learning/RL/code/result_image/1102.TW.png
📌 Final Portfolio Value: $1,000,000.00
📈 Cumulative Return: 0.00%
📉 Max Drawdown: -6.29%
📊 Sharpe Ratio: 0.05
Asset history (tail): [1052857.69725835 1058357.68962896 1057357.70488775 1057357.70488775
 1000000.        ]

📊 Testing 1301.TW
📁 Using model for cluster 5: E:\python_project\class\Reinforce_Learning\RL\code\model\ppo_cluster_5.zip
🧪 Final step info:
  step: 216
  amount: 1000000.0
  price_array: [129.63144684  77.88438416  65.64662552  60.69433212]
  stocks: [0. 0. 0. 0.]
📝 Log saved to: E:/python_project/class/Reinforce_Learning/RL/code/log\0050.TW_log.csv
📝 Log saved to: E:/python_project/class/Reinforce_Learning/RL/code/log\1301.TW_log.csv
📝 Log saved to: E:/python_project/class/Reinforce_Learning/RL/code/log\1303.TW_log.csv
📝 Log saved to: E:/python_project/class/Reinforce_Learning/RL/code/log\1326.TW_log.csv


  plt.show()


🖼️ Backtest chart saved to: E:/python_project/class/Reinforce_Learning/RL/code/result_image/1301.TW.png
📌 Final Portfolio Value: $1,000,000.00
📈 Cumulative Return: 0.00%
📉 Max Drawdown: -33.81%
📊 Sharpe Ratio: 0.26
Asset history (tail): [1474790.91974736 1465762.07889023 1477800.88022084 1475167.53284271
 1000000.        ]

📊 Testing 1303.TW
📁 Using model for cluster 5: E:\python_project\class\Reinforce_Learning\RL\code\model\ppo_cluster_5.zip
🧪 Final step info:
  step: 216
  amount: 1000000.0
  price_array: [129.63144684  77.88438416  65.64662552  60.69433212]
  stocks: [0. 0. 0. 0.]
📝 Log saved to: E:/python_project/class/Reinforce_Learning/RL/code/log\0050.TW_log.csv
📝 Log saved to: E:/python_project/class/Reinforce_Learning/RL/code/log\1301.TW_log.csv
📝 Log saved to: E:/python_project/class/Reinforce_Learning/RL/code/log\1303.TW_log.csv
📝 Log saved to: E:/python_project/class/Reinforce_Learning/RL/code/log\1326.TW_log.csv


  plt.show()


🖼️ Backtest chart saved to: E:/python_project/class/Reinforce_Learning/RL/code/result_image/1303.TW.png
📌 Final Portfolio Value: $1,000,000.00
📈 Cumulative Return: 0.00%
📉 Max Drawdown: -34.78%
📊 Sharpe Ratio: 0.26
Asset history (tail): [1487175.81852989 1478072.08049459 1490211.27640641 1487559.18672981
 1000000.        ]

📊 Testing 1326.TW
📁 Using model for cluster 5: E:\python_project\class\Reinforce_Learning\RL\code\model\ppo_cluster_5.zip
🧪 Final step info:
  step: 216
  amount: 1000000.0
  price_array: [129.63144684  77.88438416  65.64662552  60.69433212]
  stocks: [0. 0. 0. 0.]
📝 Log saved to: E:/python_project/class/Reinforce_Learning/RL/code/log\0050.TW_log.csv
📝 Log saved to: E:/python_project/class/Reinforce_Learning/RL/code/log\1301.TW_log.csv
📝 Log saved to: E:/python_project/class/Reinforce_Learning/RL/code/log\1303.TW_log.csv
📝 Log saved to: E:/python_project/class/Reinforce_Learning/RL/code/log\1326.TW_log.csv


  plt.show()


🖼️ Backtest chart saved to: E:/python_project/class/Reinforce_Learning/RL/code/result_image/1326.TW.png
📌 Final Portfolio Value: $1,000,000.00
📈 Cumulative Return: 0.00%
📉 Max Drawdown: -34.78%
📊 Sharpe Ratio: 0.25
Asset history (tail): [1462665.60141053 1453711.22979356 1465650.09171678 1463038.41452382
 1000000.        ]

📊 Testing 1402.TW
📁 Using model for cluster 0: E:\python_project\class\Reinforce_Learning\RL\code\model\ppo_cluster_0.zip
🧪 Final step info:
  step: 216
  amount: 1000000.0
  price_array: [30.23944378]
  stocks: [0.]
📝 Log saved to: E:/python_project/class/Reinforce_Learning/RL/code/log\1402.TW_log.csv
🖼️ Backtest chart saved to: E:/python_project/class/Reinforce_Learning/RL/code/result_image/1402.TW.png
📌 Final Portfolio Value: $1,000,000.00
📈 Cumulative Return: 0.00%
📉 Max Drawdown: -8.09%
📊 Sharpe Ratio: 0.06
Asset history (tail): [1065974.16968245 1067974.17731184 1076474.16205305 1073974.16205305
 1000000.        ]

📊 Testing 1722.TW
📁 Using model for cluster 3

  plt.show()
  plt.show()


🧪 Final step info:
  step: 216
  amount: 1000000.0
  price_array: [65.33745956]
  stocks: [0.]
📝 Log saved to: E:/python_project/class/Reinforce_Learning/RL/code/log\1722.TW_log.csv
🖼️ Backtest chart saved to: E:/python_project/class/Reinforce_Learning/RL/code/result_image/1722.TW.png
📌 Final Portfolio Value: $1,000,000.00
📈 Cumulative Return: 0.00%
📉 Max Drawdown: -10.98%
📊 Sharpe Ratio: 0.07
Asset history (tail): [ 919163.98661101  917163.97898162  920164.0094992   924163.98661101
 1000000.        ]

📊 Testing 2002.TW
📁 Using model for cluster 4: E:\python_project\class\Reinforce_Learning\RL\code\model\ppo_cluster_4.zip
🧪 Final step info:
  step: 216
  amount: 1000000.0
  price_array: [26.59179306]
  stocks: [0.]
📝 Log saved to: E:/python_project/class/Reinforce_Learning/RL/code/log\2002.TW_log.csv


  plt.show()
  plt.show()


🖼️ Backtest chart saved to: E:/python_project/class/Reinforce_Learning/RL/code/result_image/2002.TW.png
📌 Final Portfolio Value: $1,000,000.00
📈 Cumulative Return: 0.00%
📉 Max Drawdown: -6.39%
📊 Sharpe Ratio: 0.04
Asset history (tail): [ 956880.84006152  956380.84769092  959380.84006152  961380.84769092
 1000000.        ]

📊 Testing 2301.TW
📁 Using model for cluster 1: E:\python_project\class\Reinforce_Learning\RL\code\model\ppo_cluster_1.zip
⚠️ Max drawdown triggered, forcing stop.
🧪 Final step info:
  step: 153
  amount: 1000000.0
  price_array: [108.83529282]
  stocks: [0.]
📝 Log saved to: E:/python_project/class/Reinforce_Learning/RL/code/log\2301.TW_log.csv
🖼️ Backtest chart saved to: E:/python_project/class/Reinforce_Learning/RL/code/result_image/2301.TW.png
📌 Final Portfolio Value: $1,000,000.00
📈 Cumulative Return: 0.00%
📉 Max Drawdown: -28.78%
📊 Sharpe Ratio: 0.25
Asset history (tail): [ 737326.77218733  721424.35263167  721424.35263167  715648.08653761
 1000000.        ]


  plt.show()
