In [2]:
from google.colab import drive
drive.mount('/content/drive', force_remount=False)
%cd /content

# 切到你的專案資料夾
%cd /content/drive/MyDrive/LSTM_PROGRAM

Mounted at /content/drive
/content
/content/drive/MyDrive/LSTM_PROGRAM


In [3]:
!mkdir -p ~/.ssh
!cp /content/drive/MyDrive/.ssh/id_ed25519* ~/.ssh/
!chmod 700 ~/.ssh
!chmod 600 ~/.ssh/id_ed25519

!eval "$(ssh-agent -s)" && ssh-add ~/.ssh/id_ed25519
!ssh-keyscan github.com >> ~/.ssh/known_hosts
!chmod 644 ~/.ssh/known_hosts

!ssh -T git@github.com


Agent pid 821
Identity added: /root/.ssh/id_ed25519 (joemi7878@gmail.com)
# github.com:22 SSH-2.0-230b7ac
# github.com:22 SSH-2.0-230b7ac
# github.com:22 SSH-2.0-230b7ac
# github.com:22 SSH-2.0-230b7ac
# github.com:22 SSH-2.0-230b7ac
Hi joemi78! You've successfully authenticated, but GitHub does not provide shell access.


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score
import os

import tensorflow as tf
from tensorflow.keras import layers, models, callbacks, optimizers
from tensorflow.keras import mixed_precision

from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.metrics import precision_score, recall_score, f1_score

from functools import cached_property
from typing import Optional


try:
    from arch import arch_model
    HAS_ARCH = True
except Exception:
    HAS_ARCH = False


#### SET GPU
gpus = tf.config.list_physical_devices('GPU')
print("Num GPUs:", len(gpus), gpus)

if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("✓ memory growth set")
    except RuntimeError as e:
        print("⚠️ GPU 已初始化，無法再設定 memory growth：", e)

# （建議）再開啟 XLA 與混合精度
tf.config.optimizer.set_jit(True)
from tensorflow.keras import mixed_precision
mixed_precision.set_global_policy('mixed_float16')

print("是否可用GPU:", tf.test.is_gpu_available())
print("使用中的裝置:", tf.config.list_physical_devices('GPU'))

print("✓ GPU 初始化流程完成")




# 1. 定義檔案路徑
file_paths = {
    "bonds_day": "./filtered_output/bonds_day_clean_period.csv",
    "bonds_hour": "./filtered_output/bonds_hour_clean_period.csv",
    "crypto_day": "./filtered_output/crypto_day_clean_period.csv",
    "crypto_hour": "./filtered_output/crypto_hour_clean_period.csv",
    "others_day": "./filtered_output/others_day_clean_period.csv",
    "others_hour": "./filtered_output/others_hour_clean_period.csv",
    "stock_day": "./filtered_output/stock_day_clean_period.csv",
    "stock_hour": "./filtered_output/stock_hour_clean_period.csv"
}


def find_date_col(df):
    for col in df.columns:
        if 'date' in col.lower():
            return col
    return df.columns[0]

def read_and_clean(file):
    # ① 正确地读 CSV，不要写 (index=True)
    df = pd.read_csv(file)

    # ② 找到原始时间列名
    date_col = find_date_col(df)

    # ③ 依次尝试各种格式去解析
    parsed = False
    for fmt in [
        '%Y-%m-%d %H:%M:%S',
        '%Y/%m/%d %H:%M:%S',
        '%Y-%m-%d %H:%M',
        '%Y/%m/%d %H:%M',
    ]:
        try:
            df[date_col] = pd.to_datetime(
                df[date_col],
                format=fmt,     # 严格匹配
                errors='raise'  # 抛错就切换下一个 fmt
            )
            parsed = True
            break
        except Exception:
            continue

    # ④ 如果上面都没能解析，再宽松一把
    if not parsed:
        df[date_col] = pd.to_datetime(df[date_col], errors='coerce')

    # ⑤ 把分钟/秒都砍掉，保留到「小时」粒度
    df[date_col] = df[date_col].dt.floor('h').dt.tz_localize(None)

    # ⑥ 把这一列重命名为 DATE
    df = df.rename(columns={date_col: 'DATE'})

    # （可选）如果你想让 DATE 作为索引：
    # df = df.set_index('DATE')

    return df



raw_dfs = {}
for name, path in file_paths.items():
    raw_dfs[name] = read_and_clean(path)
    # print(raw_dfs[name].columns)

# ===== 資料結構：取代 all_data =====
class AssetGroupLite:
    def __init__(self, name, df):
        self.name = name
        df = df.copy()
        df['DATE'] = pd.to_datetime(df['DATE'])
        self.raw = df.set_index('DATE').sort_index()

    @cached_property
    def _close_cols(self):
        return [c for c in self.raw.columns if c.endswith('_CLOSE')]

    @cached_property
    def _vol_cols(self):
        return [c for c in self.raw.columns if c.endswith('_VOLUME')]

    @cached_property
    def close_ln(self):
        if not self._close_cols:
            return self.raw.iloc[[]]
        return np.log(self.raw[self._close_cols]).rename(
            columns=lambda x: x.replace('_CLOSE', '_CLOSE_ln')
        )

    @cached_property
    def close_ln_ret(self):
        if not self._close_cols:
            return self.raw.iloc[[]]
        logp = np.log(self.raw[self._close_cols])
        return (
            logp.diff()
               .rename(columns=lambda x: x.replace('_CLOSE', '_CLOSE_ln_ret'))
               .dropna(how='all')
        )

    @cached_property
    def close_arith_ret(self):
        if not self._close_cols:
            return self.raw.iloc[[]]
        return (
            self.raw[self._close_cols].pct_change()
                .rename(columns=lambda x: x.replace('_CLOSE', '_CLOSE_arith_ret'))
                .dropna(how='all')
        )

class DataRepository:
    REQUIRED_INDEX = "DATE"

    def __init__(self, raw_dfs: dict, check_schema: bool = True):
        self.groups = {}
        for name, df in raw_dfs.items():
            if check_schema:
                assert 'DATE' in df.columns, f"{name}: 缺少 DATE 欄"
            self.groups[name] = AssetGroupLite(name, df)

    # 補上 group()，方便外部與內部呼叫
    def group(self, name: str) -> AssetGroupLite:
        if name not in self.groups:
            raise KeyError(f"Group '{name}' 不存在。可用群組：{list(self.groups.keys())}")
        return self.groups[name]

    def series(self, group: str, series_name: str) -> pd.Series:
        g = self.group(group)
        # 加上 raw → 能抓 OHLCV、IS_TRADING 等原始欄
        search_order = ['close_ln_ret', 'close_arith_ret', 'close_ln', 'raw']

        # 1) 直接命中
        for key in search_order:
            tbl = getattr(g, key)
            if series_name in tbl.columns:
                return tbl[series_name]

        # 2) 容錯：只給 base symbol，自動補候選
        base = (series_name
                .replace('_CLOSE', '')
                .replace('_OPEN', '')
                .replace('_HIGH', '')
                .replace('_LOW', '')
                .replace('_VOLUME', '')
                .replace('_IS_TRADING', '')
                .replace('_CLOSE_ln_ret', '')
                .replace('_CLOSE_arith_ret', '')
                .replace('_CLOSE_ln', ''))
        candidates = [
            f'{base}_CLOSE_ln_ret',
            f'{base}_CLOSE_arith_ret',
            f'{base}_CLOSE_ln',
            f'{base}_OPEN',
            f'{base}_HIGH',
            f'{base}_LOW',
            f'{base}_CLOSE',
            f'{base}_VOLUME',
            f'{base}_IS_TRADING'
        ]
        for cand in candidates:
            for key in search_order:
                tbl = getattr(g, key)
                if cand in tbl.columns:
                    return tbl[cand]

        raise KeyError(f"{group}: 找不到 {series_name} 或候選 {candidates}")

    # 取整張表
    def table(self, group: str, table_name: str) -> pd.DataFrame:
        g = self.group(group)
        if not hasattr(g, table_name):
            raise KeyError(f"{group}: 無表 '{table_name}'。可用表：['close_ln_ret','close_arith_ret','close_ln']")
        return getattr(g, table_name)

    # 若要直接拿 raw 的原始價/量欄位（例如 *_CLOSE 或 *_VOLUME）
    def raw_series(self, group: str, raw_col: str) -> pd.Series:
        g = self.group(group)
        if raw_col not in g.raw.columns:
            raise KeyError(f"{group}: raw 中沒有欄位 {raw_col}")
        return g.raw[raw_col]

repo = DataRepository(raw_dfs)

# # 1) 取整張 log-return 表
# df_lnret = repo.table('crypto_hour', 'close_ln_ret')
# print(type(df_lnret))
# print(df_lnret['BTCUSDT_CLOSE_ln_ret'])
# print('crypto_hour close_ln_ret columns (head):', df_lnret.columns[:5])

# # 2) 直接取單一序列（完整名）
# s1 = repo.series('crypto_hour', 'BTCUSDT_CLOSE_ln_ret')
# print('BTCUSDT_CLOSE_ln_ret len:', len(s1))
#
# # 3) 容錯：只給 base symbol，會幫你補 _CLOSE_ln_ret
# s2 = repo.series('crypto_hour', 'BTCUSDT')
# print('BTCUSDT (auto-suffixed) len:', len(s2))
#
# # 4) 原始價（若需要）
# p = repo.raw_series('stock_day', 'ES1_CLOSE')
# print('ES1_CLOSE raw len:', len(p))




####################################
#########     LSTM     #############
####################################
# ------------------ Features ------------------
def build_feature_df(repo: DataRepository, group: str, symbol: str) -> pd.DataFrame:
    s_open  = repo.raw_series(group, f'{symbol}_OPEN').asfreq('h')
    s_high  = repo.raw_series(group, f'{symbol}_HIGH').asfreq('h')
    s_low   = repo.raw_series(group, f'{symbol}_LOW').asfreq('h')
    s_close = repo.raw_series(group, f'{symbol}_CLOSE').asfreq('h')
    s_vol   = repo.raw_series(group, f'{symbol}_VOLUME').asfreq('h')
    s_flag = repo.raw_series(group, f'{symbol}_IS_TRADING').asfreq('h')
    s_lnrt  = repo.series(group, f'{symbol}_CLOSE_ln_ret').asfreq('h')
    s_ln  = repo.series(group, f'{symbol}_CLOSE_ln').asfreq('h')
    df = pd.concat([
        s_lnrt.rename('LN_RET'),
        s_open.rename('OPEN'), s_high.rename('HIGH'), s_low.rename('LOW'),
        s_close.rename('CLOSE'), s_vol.rename('VOLUME'),
        s_flag.rename('IS_TRADING'),
        s_ln.rename('CLOSE_LN')
    ], axis=1)
    df = df.apply(pd.to_numeric, errors='coerce')
    return df.dropna(how='any')

# ------------------ Model ------------------
###############  LSTM
def build_small_lstm(input_len: int, n_features: int) -> tf.keras.Model:
    inp = layers.Input(shape=(input_len, n_features))
    x = layers.LSTM(LSTM_UNITS, return_sequences=False)(inp)
    x = layers.Dropout(DROPOUT)(x)
    out = layers.Dense(1, activation='linear')(x)
    m = models.Model(inp, out)
    # loss_fn = 'mse'
    m.compile(optimizer=optimizers.Adam(learning_rate=LR), loss='mse')  # ← 改用 MSE（或 Huber(delta≈0.05)）
    return m

############### mcHARCH
def fit_vol_per_window(ret_window, mode='garch'):
    y = ret_window.dropna().astype(float)
    if len(y) < 30:
        lam = 0.94
        ewma_var = y.pow(2).ewm(alpha=1-lam, adjust=False).mean()
        log_sigma_series = 0.5 * np.log(np.maximum(ewma_var.values, 1e-12))
        log_sigma_series = pd.Series(log_sigma_series, index=ewma_var.index)\
                              .reindex(ret_window.index).ffill().bfill()
        sigma_next = np.sqrt(lam * ewma_var.iloc[-1] + (1-lam) * y.iloc[-1]**2)
        return log_sigma_series, float(sigma_next)

    vol = 'HARCH' if mode.lower() == 'harch' else 'GARCH'
    p, q = (3, 0) if vol == 'HARCH' else (1, 1)

    scale = 100.0
    am = arch_model(y.values * scale, mean='Zero', vol=vol, p=p, q=q, dist='t')
    res = am.fit(disp='off')

    # 視窗內「過濾」波動：先除回 scale，再做下限保護
    sigma_series = res.conditional_volatility          # ndarray
    sigma_series = np.maximum(sigma_series / scale, 1e-12)
    log_sigma_series = np.log(sigma_series)
    log_sigma_series = pd.Series(log_sigma_series, index=y.index)\
                          .reindex(ret_window.index).ffill().bfill()

    # 一步前瞻：variance → sqrt → 除回 scale
    fvar = res.forecast(horizon=1, reindex=False).variance.values[-1, 0]
    sigma_next = float(np.sqrt(fvar) / scale)

    return log_sigma_series, sigma_next

# ------------------ Config ------------------
ASSET_SYMBOL = 'BTCUSDT'
GROUP_HOUR   = 'crypto_hour'
TARGET_START_STR = '2022-08-01 00:00:00'
TARGET_END_STR   = '2023-08-31 23:00:00'

WINDOW_HOURS = 24 * 10  # 10 days
MODE = 'warm'           # 'warm' or 'refit'
EPOCHS_INIT = 8
EPOCHS_STEP = 1
BATCH_SIZE = 32
LR = 5e-4
LSTM_UNITS = 64
DROPOUT = 0.2
USE_HUBER = True
NORMALIZE_FEATURES = True  # z-score inputs per window

# ------------------ Features ------------------
feature_names = ['LN_RET','OPEN','HIGH','LOW','CLOSE','VOLUME','CLOSE_LN']

feat_df = build_feature_df(repo, GROUP_HOUR, ASSET_SYMBOL)
close_series = feat_df['CLOSE']

PRED_START = pd.to_datetime(TARGET_START_STR)
PRED_END   = pd.to_datetime(TARGET_END_STR)
first_needed = PRED_START - pd.Timedelta(hours=WINDOW_HOURS)
if close_series.index.min() > first_needed:
    raise RuntimeError(f"Insufficient history: need <= {first_needed}, have from {close_series.index.min()}")

# ------------------ Model ------------------

model: Optional[tf.keras.Model] = None
rows = []
cur_time = PRED_START
N_FEATURES = len(feature_names)

while cur_time <= PRED_END:
    print(f"start {cur_time}")
    win_start = cur_time - pd.Timedelta(hours=WINDOW_HOURS)
    win_end   = cur_time - pd.Timedelta(hours=1)

    if cur_time not in feat_df.index:
        cur_time += pd.Timedelta(hours=1)
        continue

    # Window features
    Xw = feat_df.loc[win_start:win_end, feature_names]
    if len(Xw) != WINDOW_HOURS or Xw.isna().any().any():
        cur_time += pd.Timedelta(hours=1)
        continue

    # ##########################################  garch  ##########################
    # # ---- 每個視窗擬合（mc）GARCH，產生波動特徵 ----
    # log_sigma_series, sigma_next = fit_vol_per_window(
    #     ret_window=feat_df.loc[win_start:win_end, 'LN_RET'],  # 僅用到 t-1
    #     mode='garch'  # 或 'harch' / 'ewma'
    # )
    # Xw_ext = Xw.copy()
    # Xw_ext['LOG_SIGMA'] = log_sigma_series  # 新特徵（只到 t-1，不洩漏）


    if NORMALIZE_FEATURES:
        cols_to_norm = ['OPEN', 'HIGH', 'LOW', 'CLOSE', 'VOLUME','CLOSE_LN']  # LN_RET 可視需求排除
        mu = Xw[cols_to_norm].mean(axis=0)
        sd = Xw[cols_to_norm].std(axis=0).replace(0.0, np.nan)
        Xw[cols_to_norm] = (Xw[cols_to_norm] - mu) / sd
        Xw = Xw.fillna(0.0)

    X_t = Xw[feature_names].values.reshape(1, WINDOW_HOURS, N_FEATURES).astype(np.float32)


    # --- 目標：下一小時 log-price ---
    tm1     = cur_time - pd.Timedelta(hours=1)
    ln_p_tm1  = float(feat_df.loc[tm1, 'CLOSE_LN'])      # ln(price_{t-1})
    ln_p_t   = float(feat_df.loc[cur_time, 'CLOSE_LN']) # ln(price_t)


    # --- 先 predict，再 fit（避免樂觀偏差）---
    if model is not None:
        # ŷ_z：標準化 ln_price
        z_hat      = float(model.predict(X_t, verbose=0).ravel()[0])
        yhat_ln_t  = z_hat * sig_ln + mu_ln   # inverse transform
        price_pred = float(np.exp(yhat_ln_t))
        r_hat      = yhat_ln_t - ln_p_tm1     # 預測的 log-return
    else:
        yhat_ln_t = np.nan
        price_pred = np.nan
        r_hat = np.nan


    # --- 建模時要用新的特徵維度 ---
    if (model is None) or (MODE == 'refit'):
        model = build_small_lstm(WINDOW_HOURS, N_FEATURES)  # ← 改這裡
        epochs_here = EPOCHS_INIT
    else:
        epochs_here = EPOCHS_STEP

    # 在每個滑窗用 ≤ t-1 的 ln 價建立 scaler（避免洩漏）
    mu_ln  = float(feat_df.loc[win_start:win_end, 'CLOSE_LN'].mean())
    sig_ln = float(feat_df.loc[win_start:win_end, 'CLOSE_LN'].std(ddof=0) + 1e-12)

    # y 標準化
    y_z_t  = np.array([(ln_p_t - mu_ln)/sig_ln], dtype=np.float32)

    # 建模時用 bias_init = mu_ln（可選，效果更穩）
    hist = model.fit(X_t, y_z_t, epochs=epochs_here, batch_size=1, shuffle=False, verbose=0)

    last_loss = float(hist.history['loss'][-1])

    # 真實價格與報酬（用來對照）
    p_tm1 = float(np.exp(ln_p_tm1))
    p_t  = float(np.exp(ln_p_t))
    y_ret = float(ln_p_t - ln_p_tm1)   # 真實的 log-return

    # 記錄
    rows.append({
        'timestamp': cur_time,
        'ln_true' : ln_p_t,
        'ln_pred' : yhat_ln_t,
        'ret_true' : y_ret,
        'ret_pred' : r_hat,
        'price_true': p_t,
        'price_pred': price_pred,
        'train_loss': last_loss,
    })

    cur_time += pd.Timedelta(hours=1)


# ------------------ Evaluation ------------------
df_eval = pd.DataFrame(rows).set_index('timestamp').sort_index()

# 1) 濾掉第一筆（PRED_START 當下模型尚未建立），僅保留 PRED_START+1h ~ PRED_END
mask_eval = (df_eval.index > PRED_START) & (df_eval.index <= PRED_END)
df_pred_sw = df_eval.loc[mask_eval].copy()

# 2) 安全遮罩：僅評估有限實數
is_finite_ret  = np.isfinite(df_pred_sw['ret_true'])  & np.isfinite(df_pred_sw['ret_pred'])
is_finite_price= np.isfinite(df_pred_sw['price_true'])& np.isfinite(df_pred_sw['price_pred'])

# 3) Returns 指標
if is_finite_ret.any():
    mae_val  = float(mean_absolute_error(df_pred_sw.loc[is_finite_ret, 'ret_true'],
                                         df_pred_sw.loc[is_finite_ret, 'ret_pred']))
    rmse_val = float(np.sqrt(mean_squared_error(df_pred_sw.loc[is_finite_ret, 'ret_true'],
                                                df_pred_sw.loc[is_finite_ret, 'ret_pred'])))
    r2_ret   = (r2_score(df_pred_sw.loc[is_finite_ret, 'ret_true'],
                         df_pred_sw.loc[is_finite_ret, 'ret_pred'])
                if is_finite_ret.sum() > 1 else np.nan)

    mae_price  = float(mean_absolute_error(df_pred_sw.loc[is_finite_price, 'price_true'],
                                           df_pred_sw.loc[is_finite_price, 'price_pred']))
    rmse_price = float(np.sqrt(mean_squared_error(df_pred_sw.loc[is_finite_price, 'price_true'],
                                                  df_pred_sw.loc[is_finite_price, 'price_pred'])))
    r2_price   = (r2_score(df_pred_sw.loc[is_finite_price, 'price_true'],
                           df_pred_sw.loc[is_finite_price, 'price_pred'])
                  if is_finite_price.sum() > 1 else np.nan)

    # 方向性
    y_true_dir = (df_pred_sw.loc[is_finite_ret, 'ret_true'].values > 0).astype(int)
    y_pred_dir = (df_pred_sw.loc[is_finite_ret, 'ret_pred'].values > 0).astype(int)
    precision  = precision_score(y_true_dir, y_pred_dir, zero_division=0)
    recall     = recall_score(y_true_dir, y_pred_dir, zero_division=0)
    f1         = f1_score(y_true_dir, y_pred_dir, zero_division=0)

    # 簽策略（單純符號乘上真實報酬）
    strat = np.sign(df_pred_sw.loc[is_finite_ret, 'ret_pred']) * df_pred_sw.loc[is_finite_ret, 'ret_true']
    if len(strat) > 0:
        ann_factor  = 252 * 24  # 小時資料
        total_return = float((strat + 1).prod() - 1)
        ann_return   = float((1 + total_return) ** (ann_factor / len(strat)) - 1)
        equity       = (1 + strat).cumprod()
        roll_max     = equity.cummax()
        max_dd       = float((equity / roll_max - 1).min())
    else:
        total_return = ann_return = max_dd = np.nan
else:
    mae_val = rmse_val = r2_ret = precision = recall = f1 = total_return = ann_return = max_dd = np.nan
    mae_price = rmse_price = r2_price = np.nan

# 4) Price 指標
if is_finite_price.sum() > 1:
    r2_price = r2_score(df_pred_sw.loc[is_finite_price, 'price_true'],
                        df_pred_sw.loc[is_finite_price, 'price_pred'])
else:
    r2_price = np.nan

# 5) 平均訓練損失
avg_loss = float(np.nanmean(df_pred_sw['train_loss'])) if len(df_pred_sw) else np.nan

# 6) 匯總表
metrics_df = pd.DataFrame({
    'asset':         [ASSET_SYMBOL],
    'group':         [GROUP_HOUR],
    'start':         [TARGET_START_STR],
    'end':           [TARGET_END_STR],
    'mae':           [mae_val],
    'rmse':          [rmse_val],
    'R2_ret':        [r2_ret],
    'R2_price':      [r2_price],
    'mae_price': [mae_price],
    'rmse_price': [rmse_price],
    'R2_price': [r2_price],
    'DA':            [float(np.mean((df_pred_sw.loc[is_finite_ret,'ret_pred'] >= 0) ==
                                     (df_pred_sw.loc[is_finite_ret,'ret_true'] >= 0))) if is_finite_ret.any() else np.nan],
    'F1_Score':      [f1],
    'Precision':     [precision],
    'Recall':        [recall],
    'avg_loss':      [avg_loss],
    'total_return':  [total_return],
    'ann_return':    [ann_return],
    'max_drawdown':  [max_dd],
    'R2_ret':        [r2_ret],
    'R2_price':      [r2_price],
})

# ------------------ Save CSVs ------------------
stem = f"{ASSET_SYMBOL.lower()}_lstm_hour_sliding_multifeat_nogarch"
os.makedirs("./LSTM_diagnostics", exist_ok=True)

out_csv = f"./LSTM_diagnostics/{stem}_{pd.to_datetime(TARGET_START_STR):%Y%m%d}_{pd.to_datetime(TARGET_END_STR):%Y%m%d}.csv"
df_pred_sw.to_csv(out_csv, float_format='%.10f')

metrics_vertical =  metrics_df.T.reset_index()
metrics_vertical.columns = ["metric", "value"]
metrics_csv = f"./LSTM_diagnostics/{stem}_metrics_{pd.to_datetime(TARGET_START_STR):%Y%m%d}_{pd.to_datetime(TARGET_END_STR):%Y%m%d}.csv"
metrics_vertical.to_csv(metrics_csv, index=False)

print("Saved:", out_csv)
print("Metrics:", metrics_csv)

# ------------------ Figures (3+1) ------------------
fig1 = f"./LSTM_diagnostics/{stem}_price_{pd.to_datetime(TARGET_START_STR):%Y%m%d}_{pd.to_datetime(TARGET_END_STR):%Y%m%d}.png"
fig2 = f"./LSTM_diagnostics/{stem}_returns_{pd.to_datetime(TARGET_START_STR):%Y%m%d}_{pd.to_datetime(TARGET_END_STR):%Y%m%d}.png"
fig3 = f"./LSTM_diagnostics/{stem}_scatter_{pd.to_datetime(TARGET_START_STR):%Y%m%d}_{pd.to_datetime(TARGET_END_STR):%Y%m%d}.png"
fig4 = f"./LSTM_diagnostics/{stem}_trainloss_price_{pd.to_datetime(TARGET_START_STR):%Y%m%d}_{pd.to_datetime(TARGET_END_STR):%Y%m%d}.png"

plt.figure(figsize=(12,6))
plt.plot(df_pred_sw.index, df_pred_sw['price_true'], label='True Price')
plt.plot(df_pred_sw.index, df_pred_sw['price_pred'], label='Predicted Price', linestyle='--')
plt.xlabel('Time'); plt.ylabel('Price'); plt.title(f"{ASSET_SYMBOL} Price"); plt.legend(); plt.grid(True)
plt.savefig(fig1, dpi=300); plt.close()

plt.figure(figsize=(12,6))
plt.plot(df_pred_sw.index, df_pred_sw['ret_true'], label='True Return', alpha=0.7)
plt.plot(df_pred_sw.index, df_pred_sw['ret_pred'], label='Predicted Return', alpha=0.7)
plt.axhline(0, color='black', linewidth=1)
plt.xlabel('Time'); plt.ylabel('Log Return'); plt.title(f"{ASSET_SYMBOL} Returns"); plt.legend(); plt.grid(True)
plt.savefig(fig2, dpi=300); plt.close()

plt.figure(figsize=(6,6))
plt.scatter(df_pred_sw['ret_true'], df_pred_sw['ret_pred'], alpha=0.5)
plt.axhline(0, color='black', linewidth=1); plt.axvline(0, color='black', linewidth=1)
plt.xlabel('True Return'); plt.ylabel('Predicted Return'); plt.title(f"{ASSET_SYMBOL} True vs Predicted Returns"); plt.grid(True)
plt.savefig(fig3, dpi=300); plt.close()

# Price + Train Loss
fig, ax1 = plt.subplots(figsize=(12,6))
ax1.plot(df_pred_sw.index, df_pred_sw['price_true'], label='True Price')
ax1.plot(df_pred_sw.index, df_pred_sw['price_pred'], label='Predicted Price', linestyle='--', alpha=0.9)
ax1.set_xlabel("Time"); ax1.set_ylabel("Price")
ax2 = ax1.twinx()
ax2.plot(df_pred_sw.index, df_pred_sw['train_loss'], label='Train Loss', alpha=0.6)
ax2.set_ylabel("Training Loss")
ax1.grid(True, which='both', axis='both', alpha=0.2)
lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(lines1 + lines2, labels1 + labels2, loc='upper left')
plt.tight_layout(); plt.savefig(fig4, dpi=300); plt.close()

print('Figures:', fig1, fig2, fig3, fig4)


[1;30;43m串流輸出內容已截斷至最後 5000 行。[0m
start 2023-01-13 03:00:00
start 2023-01-13 04:00:00
start 2023-01-13 05:00:00
start 2023-01-13 06:00:00
start 2023-01-13 07:00:00
start 2023-01-13 08:00:00
start 2023-01-13 09:00:00
start 2023-01-13 10:00:00
start 2023-01-13 11:00:00
start 2023-01-13 12:00:00
start 2023-01-13 13:00:00
start 2023-01-13 14:00:00
start 2023-01-13 15:00:00
start 2023-01-13 16:00:00
start 2023-01-13 17:00:00
start 2023-01-13 18:00:00
start 2023-01-13 19:00:00
start 2023-01-13 20:00:00
start 2023-01-13 21:00:00
start 2023-01-13 22:00:00
start 2023-01-13 23:00:00
start 2023-01-14 00:00:00
start 2023-01-14 01:00:00
start 2023-01-14 02:00:00
start 2023-01-14 03:00:00
start 2023-01-14 04:00:00
start 2023-01-14 05:00:00
start 2023-01-14 06:00:00
start 2023-01-14 07:00:00
start 2023-01-14 08:00:00
start 2023-01-14 09:00:00
start 2023-01-14 10:00:00
start 2023-01-14 11:00:00
start 2023-01-14 12:00:00
start 2023-01-14 13:00:00
start 2023-01-14 14:00:00
start 2023-01-14 15:00:00
sta