In [17]:
# 📦 載入資料與基本處理
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import MinMaxScaler
from tslearn.metrics import cdist_dtw
from sklearn.metrics import silhouette_score
from stockstats import StockDataFrame
import matplotlib.pyplot as plt
import warnings
import sys
sys.path.append("/Users/gary/Documents/project/RL/code/FinRL")

warnings.filterwarnings("ignore")
%matplotlib inline



In [18]:
# 📂 讀入所有股票資料
def load_all_stock_data(folder_path):
    all_data = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".csv"):
            path = os.path.join(folder_path, filename)
            tic = filename.replace("converted_", "").replace(".csv", "")
            df = pd.read_csv(path)
            df['tic'] = tic
            all_data.append(df)
    return pd.concat(all_data, ignore_index=True)

folder_path = "/Users/gary/Documents/project/RL/code/converted_stock"
raw_df = load_all_stock_data(folder_path)


In [19]:
# 📈 log return 計算與標準化
def preprocess_stock_data(df):
    df['date'] = pd.to_datetime(df['date'])
    full_dates = pd.date_range(df['date'].min(), df['date'].max())

    processed = {}
    for tic in df['tic'].unique():
        sub_df = df[df['tic'] == tic].copy()
        sub_df = sub_df.set_index('date').reindex(full_dates)
        sub_df['close'] = sub_df['close'].interpolate(method='linear').fillna(method='bfill').fillna(method='ffill')
        sub_df['log_return'] = np.log(sub_df['close'] / sub_df['close'].shift(1))
        sub_df = sub_df[['log_return']].dropna()
        processed[tic] = sub_df['log_return']
    
    result_df = pd.DataFrame(processed)
    scaler = MinMaxScaler()
    scaled = pd.DataFrame(scaler.fit_transform(result_df), columns=result_df.columns, index=result_df.index)
    
    return scaled


In [20]:
from tslearn.metrics import cdist_dtw
from pyclustering.cluster.kmedoids import kmedoids
import numpy as np

def dtw_clustering_pyclustering(log_return_df, k=3):
    series_array = log_return_df.T.values[..., np.newaxis]
    distance_matrix = cdist_dtw(series_array)

    # 初始中心點隨便選 k 個 index
    initial_medoids = list(range(k))

    kmedoids_instance = kmedoids(distance_matrix, initial_medoids, data_type='distance_matrix', ccore=False)
    kmedoids_instance.process()
    clusters = kmedoids_instance.get_clusters()

    label_map = {}
    stock_list = list(log_return_df.columns)
    for i, cluster in enumerate(clusters):
        for idx in cluster:
            label_map[stock_list[idx]] = i
    return label_map, k


In [21]:
# 🧮 技術指標計算與資料分割
from stockstats import StockDataFrame

def add_technical_indicators(df, indicators=["macd", "rsi_30", "cci_30", "wr_14"]):
    df_list = []
    for tic in df['tic'].unique():
        sub_df = df[df['tic'] == tic].copy()
        sub_df = sub_df.sort_values("date")  # ⚠️ 要先有 'date' 欄位才能排序

        # StockDataFrame 會把 date 移成 index
        stock = StockDataFrame.retype(sub_df)

        for indicator in indicators:
            try:
                sub_df[indicator] = stock[indicator]
            except Exception as e:
                print(f"⚠️ 無法計算指標 {indicator}: {e}")

        # ✅ 關鍵：確保 'date' 是欄位（不是 index）
        if 'date' not in sub_df.columns:
            sub_df['date'] = sub_df.index

        df_list.append(sub_df)

    result = pd.concat(df_list).reset_index(drop=True)  # 避免後續混淆 index
    return result




def split_train_test(df, split_date="2024-01-01"):
    df['date'] = pd.to_datetime(df['date'])
    train = df[df['date'] < split_date]
    test = df[df['date'] >= split_date]
    return train, test


In [22]:
# ⚙️ 建立訓練環境與模型訓練
from finrl.meta.env_stock_trading.env_stocktrading_np import StockTradingEnv
from finrl.agents.stablebaselines3.models import DRLAgent
from stable_baselines3 import PPO

def create_env_for_stock_np(df, stock_tic, indicators, initial_amount=1e6, if_train=True):
    import numpy as np
    from finrl.meta.env_stock_trading.env_stocktrading_np import StockTradingEnv

    df = df[df['tic'] == stock_tic].copy().dropna().reset_index(drop=True)

    price_array = df[['close']].values
    tech_array = df[indicators].values
    turbulence_array = np.zeros(len(df))  # 若無 turbulence，就用全 0

    env = StockTradingEnv(
        config={
            "price_array": price_array,
            "tech_array": tech_array,
            "turbulence_array": turbulence_array,
            "if_add_price": True,
            "if_add_tech": True,
            "if_add_turbulence": False,
            "risk_indicator_col": "turbulence",
            "initial_amount": initial_amount,
            "buy_cost_pct": 0.001,
            "sell_cost_pct": 0.001,
            "reward_scaling": 1e-4,
            "if_train": if_train  # ✅ 這一行是你之前漏掉的
        }
    )
    return env


def train_ppo_model(env, model_path):
    agent = DRLAgent(env=env)
    model = agent.get_model("ppo")
    trained_model = agent.train_model(model=model, tb_log_name="ppo", total_timesteps=300000)
    trained_model.save(model_path)
    return trained_model


In [23]:
def evaluate_model(trained_model, env, initial_amount):
    state, _ = env.reset()
    done = False
    total_rewards = []
    portfolio_values = [state[0]]

    step_count = 0
    while not done:
        action, _ = trained_model.predict(state)
        state, reward, terminated, truncated, _ = env.step(action)
        done = terminated or truncated
        total_rewards.append(reward)
        portfolio_values.append(state[0])
        step_count += 1

    print(f"✅ 模型測試完成，共執行 {step_count} 步。")
    print(f"📈 總 reward 長度：{len(total_rewards)}")
    print(f"💰 Portfolio value 長度：{len(portfolio_values)}")

    plt.figure(figsize=(10, 4))

    plt.subplot(1, 2, 1)
    plt.plot(np.cumsum(total_rewards))
    plt.title("累積 Reward")

    plt.subplot(1, 2, 2)
    plt.plot(portfolio_values)
    plt.title("Portfolio Value")

    plt.tight_layout()
    plt.savefig("result_plot.png")  # ✅ 存圖備用
    plt.show()


In [24]:
indicators = ["macd", "rsi_30", "cci_30", "wr_14"]
raw_df = add_technical_indicators(raw_df, indicators=indicators)
log_return_df = preprocess_stock_data(raw_df)
cluster_labels, best_k = dtw_clustering_pyclustering(log_return_df, k=5)

train_data, test_data = split_train_test(raw_df)

model_dir = "/Users/gary/Documents/project/RL/code/model"
os.makedirs(model_dir, exist_ok=True)

# ✅ 訓練每一群的代表模型
for group_id in sorted(set(cluster_labels.values())):
    group_stocks = [tic for tic, g in cluster_labels.items() if g == group_id]
    representative_tic = group_stocks[0]
    print(f"🧠 訓練第 {group_id} 群：股票 {representative_tic}")

    env = create_env_for_stock_np(train_data, stock_tic=representative_tic, indicators=indicators, if_train=True)
    model_path = f"{model_dir}/ppo_cluster_{group_id}.zip"
    
    model = train_ppo_model(env, model_path)


🧠 訓練第 0 群：股票 1722.TW
{'n_steps': 2048, 'ent_coef': 0.01, 'learning_rate': 0.00025, 'batch_size': 64}
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
-------------------------------------
| rollout/           |              |
|    ep_len_mean     | 726          |
|    ep_rew_mean     | 8.74         |
| time/              |              |
|    fps             | 6271         |
|    iterations      | 1            |
|    time_elapsed    | 0            |
|    total_timesteps | 2048         |
| train/             |              |
|    reward          | -0.090232246 |
-------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 726          |
|    ep_rew_mean          | 16.9         |
| time/                   |              |
|    fps                  | 4418         |
|    iterations           | 2            |
|    time_elapsed         | 0            |
|    

KeyboardInterrupt: 

In [9]:
# 🧪 測試區塊：載入已訓練模型並進行回測與視覺化

from stable_baselines3 import PPO

# 📍 參數重用
indicators = ["macd", "rsi_30", "cci_30", "wr_14"]
target_tic = "1402.TW"
initial_amount = 1e6

# 📁 模型載入
model_dir = "/Users/gary/Documents/project/RL/code/model"
target_group = cluster_labels[target_tic]  # ← 若這是新檔案，請重新載入 cluster_labels
model_path = f"{model_dir}/ppo_cluster_{target_group}.zip"

# ✅ 建立測試環境
test_env = create_env_for_stock_np(test_data, stock_tic=target_tic, indicators=indicators, if_train=False)

# 📦 載入模型 & 評估
model = PPO.load(model_path, env=test_env)
evaluate_model(model, test_env, initial_amount=initial_amount)


Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
✅ 模型測試完成，共執行 97 步。
📈 總 reward 長度：97
💰 Portfolio value 長度：98
