
# トヨタ株 (7203.T) — 翌日終値予測 + 売買判断（Buy/Sell）
**目的**: 2020/09〜2025/09 の5年間の株価データを用い、  
1) 翌日の終値を**回帰**で予測し、  
2) その予測に基づいて**売買判断（Buy/Sell）**の**分類**を行う。

**ポイント**  
- クリプト向けチュートリアル（15分足 / GMO Fetcher）を **日次株価 / Yahoo Finance 取得**に置換。  
- `GmoFetcher` 相当の薄いラッパー (`YfFetcher`) を実装し、**キャッシュ（joblib.Memory）**で再取得を抑制。  
- **LSTM** を用いた時系列ウィンドウ学習（回帰）＋ 予測結果を使った **2値分類**。  
- ベースライン（線形回帰）と比較、さらに**指標可視化**、**単純バックテスト**を実装。

> 環境にネットアクセスが無い場合は、Kaggle等からダウンロードした `7203.T.csv` を読み込むパスも提供します。


In [24]:

# === Imports ===
import os
import sys
import math
import gc
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import joblib
from joblib import Memory
from pathlib import Path
from datetime import datetime

# Plot
import matplotlib.pyplot as plt

# ML/DL
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
from sklearn.model_selection import TimeSeriesSplit

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

# Optional: yfinance for data download (requires internet)
try:
    import yfinance as yf
    HAS_YF = True
except Exception:
    HAS_YF = False

print('TensorFlow:', tf.__version__)
print('HAS_YF:', HAS_YF)


TensorFlow: 2.20.0
HAS_YF: True


In [None]:
# === Parameters ===
TICKER = "TM"      # トヨタ自動車 (NYSE: TM) - CSVファイルに合わせる
START_DATE = "2020-09-01"
END_DATE   = "2025-06-27"  # データセットの最終日に合わせる

CACHE_DIR = Path('/tmp/yf_cache_v2')
CACHE_DIR.mkdir(parents=True, exist_ok=True)
memory = Memory(location=str(CACHE_DIR), verbose=0)

# 学習ハイパラ
WINDOW_SIZE = 30     # 何日分の履歴で翌日を予測するか
BATCH_SIZE  = 64
EPOCHS      = 50
VAL_SPLIT   = 0.0     # 明示的に時系列分割するので 0

# 時系列分割（固定境界）
SPLIT_TRAIN_END = "2024-03-31"
SPLIT_VAL_END   = "2025-03-31"  # val: 2024-04-01〜2025-03-31
# test: 2025-04-01〜2025-06-27

# 乱数シード
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)

In [None]:
# === YfFetcher: GmoFetcher 相当の簡易ラッパ ===
def _download_ohlcv_v2(ticker, start, end, interval='1d', csv_path=None):
    if csv_path is not None and Path(csv_path).exists():
        df = pd.read_csv(csv_path)
        
        # CSV の2行目がティッカー名のみの場合は削除
        if len(df) > 0 and df.iloc[0].isna().all():
            df = df.iloc[1:].reset_index(drop=True)
        
        # date列を探す
        candidate_date_cols = [c for c in df.columns if str(c).strip().lower() in {'date', 'datetime'}]
        if candidate_date_cols:
            date_col = candidate_date_cols[0]
            # 日付として変換できない行を削除
            df = df[pd.to_datetime(df[date_col], errors='coerce').notna()]
            df[date_col] = pd.to_datetime(df[date_col])
            df = df.set_index(date_col)
        else:
            # 先頭列が日付と想定
            first_col = df.columns[0]
            df = df[pd.to_datetime(df[first_col], errors='coerce').notna()]
            df[first_col] = pd.to_datetime(df[first_col])
            df = df.set_index(first_col)
        
        # 数値列として扱えるようにする
        for col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')
        df = df.dropna(how='all')
        
        return df

    if not HAS_YF:
        raise RuntimeError("yfinance が利用できません。csv_path を指定してください。")

    return yf.download(ticker, start=start, end=end, interval=interval, progress=False)


class YfFetcher:
    def __init__(self, memory=None):
        self.memory = memory
        if memory is not None:
            # メモリキャッシュは関数定義が変わると壊れるのでバージョン付きで保持
            self._fetch_fn = memory.cache(_download_ohlcv_v2)
        else:
            self._fetch_fn = _download_ohlcv_v2

    @staticmethod
    def _normalize_df(df):
        # yfinance の列名を統一し、DatetimeIndexへ
        # (Open High Low Close Adj Close Volume)
        if df is None or len(df) == 0:
            return pd.DataFrame()
        df = df.copy()
        if not isinstance(df.index, pd.DatetimeIndex):
            df.index = pd.to_datetime(df.index)
        # インデックスに欠損がある行を削除
        df = df[df.index.notna()]
        df = df.sort_index()
        if hasattr(df.columns, 'nlevels') and df.columns.nlevels > 1:
            df.columns = [col if isinstance(col, str) else col[0] for col in df.columns]
        # 列名を小文字に揃える
        cols = {c: str(c).lower().replace(' ', '').replace('_', '') for c in df.columns}
        df = df.rename(columns=cols)
        # 必須列があるかチェック
        must = ['open', 'high', 'low', 'close', 'adjclose', 'volume']
        for m in must:
            if m not in df.columns:
                # adjclose がないケースもあるので Close を複製
                if m == 'adjclose' and 'close' in df.columns:
                    df['adjclose'] = df['close']
                else:
                    raise ValueError(f"Missing column: {m}")
        return df[['open','high','low','close','adjclose','volume']]

    def fetch_ohlcv(self, ticker, start, end, interval='1d', csv_path=None):
        """
        ticker: 例 '7203.T' or 'TM'
        start, end: 'YYYY-MM-DD'
        interval: '1d' 固定（株は日次で扱う）
        csv_path: ローカルCSVのパス（ネット不可時）
        """
        raw_df = self._fetch_fn(ticker, start, end, interval, csv_path)
        return self._normalize_df(raw_df)


fetcher = YfFetcher(memory=memory)

In [None]:
# === データ取得 ===
# ネット不可なら csv_path を指定して利用
CSV_PATH = "data/TM_1980-01-01_2025-06-27.csv"

df = fetcher.fetch_ohlcv(
    ticker=TICKER,
    start=START_DATE,
    end=END_DATE,
    interval='1d',
    csv_path=CSV_PATH  # ここを指定すればYahoo非依存
)

print('Data loaded successfully!')
print(f'Shape: {df.shape}')
print(f'Date range: {df.index.min()} to {df.index.max()}')
print('\nFirst few rows:')
print(df.head())
print('\nLast few rows:')
print(df.tail())
print('\nData statistics:')
print(df.describe())

# 保存（チュートリアル互換）
df.to_pickle('df_ohlcv_7203T.pkl')
print('\nData saved to df_ohlcv_7203T.pkl')

In [None]:
# === データ読み込み結果の詳細可視化 ===
fig, axes = plt.subplots(3, 2, figsize=(15, 12))

# 1. 終値の時系列
axes[0, 0].plot(df.index, df['close'], linewidth=1, color='steelblue')
axes[0, 0].set_title('Close Price Over Time', fontsize=12, fontweight='bold')
axes[0, 0].set_xlabel('Date')
axes[0, 0].set_ylabel('Close Price ($)')
axes[0, 0].grid(True, alpha=0.3)

# 2. 出来高の時系列
axes[0, 1].bar(df.index, df['volume'], width=1, color='coral', alpha=0.6)
axes[0, 1].set_title('Trading Volume Over Time', fontsize=12, fontweight='bold')
axes[0, 1].set_xlabel('Date')
axes[0, 1].set_ylabel('Volume')
axes[0, 1].grid(True, alpha=0.3)

# 3. OHLC分布（箱ひげ図）
ohlc_data = [df['open'], df['high'], df['low'], df['close']]
axes[1, 0].boxplot(ohlc_data, labels=['Open', 'High', 'Low', 'Close'])
axes[1, 0].set_title('OHLC Price Distribution', fontsize=12, fontweight='bold')
axes[1, 0].set_ylabel('Price ($)')
axes[1, 0].grid(True, alpha=0.3)

# 4. 日次リターンの分布
daily_returns = df['close'].pct_change().dropna()
axes[1, 1].hist(daily_returns, bins=50, color='green', alpha=0.7, edgecolor='black')
axes[1, 1].axvline(daily_returns.mean(), color='red', linestyle='--', linewidth=2, label=f'Mean: {daily_returns.mean():.4f}')
axes[1, 1].set_title('Daily Returns Distribution', fontsize=12, fontweight='bold')
axes[1, 1].set_xlabel('Daily Return')
axes[1, 1].set_ylabel('Frequency')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)

# 5. 月別平均終値
df_temp = df.copy()
df_temp['year_month'] = df_temp.index.to_period('M')
monthly_avg = df_temp.groupby('year_month')['close'].mean()
axes[2, 0].plot(monthly_avg.index.to_timestamp(), monthly_avg.values, marker='o', linewidth=2, markersize=3, color='purple')
axes[2, 0].set_title('Monthly Average Close Price', fontsize=12, fontweight='bold')
axes[2, 0].set_xlabel('Date')
axes[2, 0].set_ylabel('Average Close Price ($)')
axes[2, 0].grid(True, alpha=0.3)

# 6. データ統計サマリー（テキスト）
axes[2, 1].axis('off')
stats_text = f"""
Data Statistics Summary
{'='*40}
Period: {df.index.min().date()} to {df.index.max().date()}
Total Days: {len(df)}

Price Statistics:
  Mean Close: ${df['close'].mean():.2f}
  Std Close: ${df['close'].std():.2f}
  Min Close: ${df['close'].min():.2f}
  Max Close: ${df['close'].max():.2f}

Daily Returns:
  Mean: {daily_returns.mean():.4f} ({daily_returns.mean()*100:.2f}%)
  Std: {daily_returns.std():.4f} ({daily_returns.std()*100:.2f}%)
  Sharpe (approx): {(daily_returns.mean()/daily_returns.std())*np.sqrt(252):.2f}

Volume Statistics:
  Mean Volume: {df['volume'].mean():.0f}
  Total Volume: {df['volume'].sum():.0f}
"""
axes[2, 1].text(0.1, 0.5, stats_text, fontsize=10, family='monospace', 
                verticalalignment='center', bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.3))

plt.tight_layout()
plt.show()

print("Data visualization complete!")

In [None]:
# === 期間フィルタ（念のため） ===
# dfが存在することを確認
if 'df' not in dir():
    fallback_path = Path('df_ohlcv_7203T.pkl')
    if fallback_path.exists():
        df = pd.read_pickle(fallback_path)
        print(f"Loaded cached OHLCV data from {fallback_path}")
    else:
        raise RuntimeError('価格データ(df)が存在しません。先にデータ取得セルを実行するか df_ohlcv_7203T.pkl を用意してください。')

df = df[(df.index >= pd.to_datetime(START_DATE)) & (df.index <= pd.to_datetime(END_DATE))].copy()
print(f'Filtered data shape: {df.shape}')

# === テクニカル指標・派生特徴量 ===
def rsi(series, period=14):
    delta = series.diff()
    up = delta.clip(lower=0)
    down = -1 * delta.clip(upper=0)
    ma_up = up.rolling(window=period, min_periods=period).mean()
    ma_down = down.rolling(window=period, min_periods=period).mean()
    rs = ma_up / (ma_down + 1e-9)
    return 100 - (100 / (1 + rs))

def macd(series, fast=12, slow=26, signal=9):
    ema_fast = series.ewm(span=fast, adjust=False).mean()
    ema_slow = series.ewm(span=slow, adjust=False).mean()
    macd_line = ema_fast - ema_slow
    signal_line = macd_line.ewm(span=signal, adjust=False).mean()
    hist = macd_line - signal_line
    return macd_line, signal_line, hist

def bollinger(series, window=20, num_std=2):
    ma = series.rolling(window=window, min_periods=window).mean()
    std = series.rolling(window=window, min_periods=window).std()
    upper = ma + num_std * std
    lower = ma - num_std * std
    width = (upper - lower) / (ma + 1e-9)
    return ma, upper, lower, width

# 主要な終値ベースで算出
close = df['close']

df['ret_1d'] = close.pct_change()
df['ma_7']   = close.rolling(7).mean()
df['ma_30']  = close.rolling(30).mean()
df['ema_7']  = close.ewm(span=7, adjust=False).mean()
df['ema_30'] = close.ewm(span=30, adjust=False).mean()
df['rsi_14'] = rsi(close, 14)

macd_line, signal_line, macd_hist = macd(close)
df['macd']   = macd_line
df['macd_s'] = signal_line
df['macd_h'] = macd_hist

bb_ma, bb_up, bb_lo, bb_w = bollinger(close, 20, 2)
df['bb_ma'] = bb_ma
df['bb_up'] = bb_up
df['bb_lo'] = bb_lo
df['bb_w']  = bb_w

# 出来高系
df['vol_chg'] = df['volume'].pct_change()

# 1日先の終値（回帰ターゲット）
df['target_close_t1'] = df['close'].shift(-1)

# 1日先が上昇なら1（Buy）、下降なら0（Sell）
df['target_buy'] = (df['target_close_t1'] > df['close']).astype(float)

# 欠損除去
df = df.dropna().copy()
print('Final shape with features:', df.shape)
print('\nFirst few rows with features:')
print(df.head(3))

In [None]:
# === テクニカル指標の詳細可視化 ===
fig = plt.figure(figsize=(16, 12))
gs = fig.add_gridspec(4, 2, hspace=0.3, wspace=0.3)

# 1. 終値 + 移動平均線
ax1 = fig.add_subplot(gs[0, :])
ax1.plot(df.index, df['close'], label='Close', linewidth=1.5, color='black', alpha=0.7)
ax1.plot(df.index, df['ma_7'], label='MA(7)', linewidth=1, linestyle='--', color='blue')
ax1.plot(df.index, df['ma_30'], label='MA(30)', linewidth=1, linestyle='--', color='red')
ax1.plot(df.index, df['ema_7'], label='EMA(7)', linewidth=1, linestyle=':', color='cyan')
ax1.plot(df.index, df['ema_30'], label='EMA(30)', linewidth=1, linestyle=':', color='magenta')
ax1.set_title('Toyota (TM) Close Price with Moving Averages', fontsize=14, fontweight='bold')
ax1.set_xlabel('Date')
ax1.set_ylabel('Price ($)')
ax1.legend(loc='best')
ax1.grid(True, alpha=0.3)

# 2. ボリンジャーバンド
ax2 = fig.add_subplot(gs[1, :])
ax2.plot(df.index, df['close'], label='Close', linewidth=1.5, color='black')
ax2.plot(df.index, df['bb_ma'], label='BB MA(20)', linewidth=1, linestyle='--', color='blue')
ax2.fill_between(df.index, df['bb_up'], df['bb_lo'], alpha=0.2, color='gray', label='BB Bands')
ax2.plot(df.index, df['bb_up'], linewidth=0.8, linestyle=':', color='green', label='Upper Band')
ax2.plot(df.index, df['bb_lo'], linewidth=0.8, linestyle=':', color='red', label='Lower Band')
ax2.set_title('Bollinger Bands', fontsize=14, fontweight='bold')
ax2.set_xlabel('Date')
ax2.set_ylabel('Price ($)')
ax2.legend(loc='best')
ax2.grid(True, alpha=0.3)

# 3. RSI
ax3 = fig.add_subplot(gs[2, 0])
ax3.plot(df.index, df['rsi_14'], linewidth=1, color='purple')
ax3.axhline(70, linestyle='--', color='red', linewidth=1, label='Overbought (70)')
ax3.axhline(30, linestyle='--', color='green', linewidth=1, label='Oversold (30)')
ax3.axhline(50, linestyle='-', color='gray', linewidth=0.5, alpha=0.5)
ax3.fill_between(df.index, 30, 70, alpha=0.1, color='yellow')
ax3.set_title('RSI (14)', fontsize=12, fontweight='bold')
ax3.set_xlabel('Date')
ax3.set_ylabel('RSI')
ax3.legend(loc='best')
ax3.grid(True, alpha=0.3)
ax3.set_ylim(0, 100)

# 4. MACD
ax4 = fig.add_subplot(gs[2, 1])
ax4.plot(df.index, df['macd'], label='MACD Line', linewidth=1, color='blue')
ax4.plot(df.index, df['macd_s'], label='Signal Line', linewidth=1, color='red')
ax4.bar(df.index, df['macd_h'], label='Histogram', alpha=0.3, color='gray', width=1)
ax4.axhline(0, linestyle='-', color='black', linewidth=0.5)
ax4.set_title('MACD', fontsize=12, fontweight='bold')
ax4.set_xlabel('Date')
ax4.set_ylabel('MACD Value')
ax4.legend(loc='best')
ax4.grid(True, alpha=0.3)

# 5. 日次リターン
ax5 = fig.add_subplot(gs[3, 0])
colors = ['green' if x > 0 else 'red' for x in df['ret_1d']]
ax5.bar(df.index, df['ret_1d'], color=colors, alpha=0.6, width=1)
ax5.axhline(0, linestyle='-', color='black', linewidth=0.5)
ax5.set_title('Daily Returns', fontsize=12, fontweight='bold')
ax5.set_xlabel('Date')
ax5.set_ylabel('Return')
ax5.grid(True, alpha=0.3)

# 6. 出来高変化率
ax6 = fig.add_subplot(gs[3, 1])
ax6.plot(df.index, df['vol_chg'], linewidth=0.8, color='coral')
ax6.axhline(0, linestyle='-', color='black', linewidth=0.5)
ax6.set_title('Volume Change Rate', fontsize=12, fontweight='bold')
ax6.set_xlabel('Date')
ax6.set_ylabel('Volume Change')
ax6.grid(True, alpha=0.3)

plt.suptitle('Technical Indicators Overview', fontsize=16, fontweight='bold', y=0.995)
plt.show()

print("Technical indicators visualization complete!")

In [None]:

# === 時系列分割 ===
train = df[df.index <= SPLIT_TRAIN_END].copy()
val   = df[(df.index > SPLIT_TRAIN_END) & (df.index <= SPLIT_VAL_END)].copy()
test  = df[df.index > SPLIT_VAL_END].copy()

print('train:', train.index.min(), '->', train.index.max(), len(train))
print('val  :', val.index.min(),   '->', val.index.max(),   len(val))
print('test :', test.index.min(),  '->', test.index.max(),  len(test))

# 特徴量カラム
FEATURE_COLS = [
    'open','high','low','close','adjclose','volume',
    'ret_1d','ma_7','ma_30','ema_7','ema_30','rsi_14',
    'macd','macd_s','macd_h','bb_ma','bb_up','bb_lo','bb_w','vol_chg'
]

def make_window_dataset(df_part, feature_cols, target_col, window):
    X_list, y_list = [], []
    feats = df_part[feature_cols].values
    target = df_part[target_col].values
    for i in range(len(df_part) - window):
        X_list.append(feats[i:i+window])
        y_list.append(target[i+window])
    return np.array(X_list), np.array(y_list)

# スケーラーは訓練でfitし、他でtransform
scaler = StandardScaler()
scaler.fit(train[FEATURE_COLS].values)

def scale_df(df_part):
    cp = df_part.copy()
    cp[FEATURE_COLS] = scaler.transform(cp[FEATURE_COLS].values)
    return cp

train_s = scale_df(train)
val_s   = scale_df(val)
test_s  = scale_df(test)

# 回帰用データセット（翌日終値）
X_train_reg, y_train_reg = make_window_dataset(train_s, FEATURE_COLS, 'target_close_t1', WINDOW_SIZE)
X_val_reg,   y_val_reg   = make_window_dataset(val_s,   FEATURE_COLS, 'target_close_t1', WINDOW_SIZE)
X_test_reg,  y_test_reg  = make_window_dataset(test_s,  FEATURE_COLS, 'target_close_t1', WINDOW_SIZE)

# 分類用（Buy/Sell）
X_train_cls, y_train_cls = make_window_dataset(train_s, FEATURE_COLS, 'target_buy', WINDOW_SIZE)
X_val_cls,   y_val_cls   = make_window_dataset(val_s,   FEATURE_COLS, 'target_buy', WINDOW_SIZE)
X_test_cls,  y_test_cls  = make_window_dataset(test_s,  FEATURE_COLS, 'target_buy', WINDOW_SIZE)

X_train_reg.shape, X_val_reg.shape, X_test_reg.shape


In [None]:
# === データ分割の可視化 ===
fig, axes = plt.subplots(2, 1, figsize=(15, 8))

# 1. 全期間の終値とデータ分割
ax1 = axes[0]
ax1.plot(df.index, df['close'], linewidth=1, color='gray', alpha=0.5, label='All Data')
ax1.plot(train.index, train['close'], linewidth=1.5, color='blue', label=f'Train ({len(train)} days)')
ax1.plot(val.index, val['close'], linewidth=1.5, color='orange', label=f'Val ({len(val)} days)')
ax1.plot(test.index, test['close'], linewidth=1.5, color='green', label=f'Test ({len(test)} days)')

# 分割境界線
ax1.axvline(pd.to_datetime(SPLIT_TRAIN_END), color='red', linestyle='--', linewidth=2, alpha=0.7, label='Train/Val Split')
ax1.axvline(pd.to_datetime(SPLIT_VAL_END), color='purple', linestyle='--', linewidth=2, alpha=0.7, label='Val/Test Split')

ax1.set_title('Data Split Visualization (Train/Val/Test)', fontsize=14, fontweight='bold')
ax1.set_xlabel('Date')
ax1.set_ylabel('Close Price ($)')
ax1.legend(loc='best')
ax1.grid(True, alpha=0.3)

# 2. ターゲット変数（Buy/Sellラベル）の分布
ax2 = axes[1]
train_buy_ratio = train['target_buy'].mean()
val_buy_ratio = val['target_buy'].mean()
test_buy_ratio = test['target_buy'].mean()

splits = ['Train', 'Val', 'Test']
buy_ratios = [train_buy_ratio, val_buy_ratio, test_buy_ratio]
sell_ratios = [1-train_buy_ratio, 1-val_buy_ratio, 1-test_buy_ratio]

x = np.arange(len(splits))
width = 0.35

bars1 = ax2.bar(x - width/2, buy_ratios, width, label='Buy (1)', color='green', alpha=0.7)
bars2 = ax2.bar(x + width/2, sell_ratios, width, label='Sell (0)', color='red', alpha=0.7)

# バーの上に割合を表示
for bars in [bars1, bars2]:
    for bar in bars:
        height = bar.get_height()
        ax2.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.1%}', ha='center', va='bottom', fontsize=10)

ax2.set_ylabel('Ratio')
ax2.set_title('Target Variable Distribution (Buy/Sell) by Split', fontsize=14, fontweight='bold')
ax2.set_xticks(x)
ax2.set_xticklabels(splits)
ax2.legend()
ax2.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

print(f"\nDataset Split Summary:")
print(f"  Train: {len(train):4d} days ({len(train)/len(df)*100:5.1f}%) - Buy: {train_buy_ratio:.1%}, Sell: {1-train_buy_ratio:.1%}")
print(f"  Val:   {len(val):4d} days ({len(val)/len(df)*100:5.1f}%) - Buy: {val_buy_ratio:.1%}, Sell: {1-val_buy_ratio:.1%}")
print(f"  Test:  {len(test):4d} days ({len(test)/len(df)*100:5.1f}%) - Buy: {test_buy_ratio:.1%}, Sell: {1-test_buy_ratio:.1%}")
print(f"\nWindow Dataset Shapes:")
print(f"  X_train_reg: {X_train_reg.shape}, X_val_reg: {X_val_reg.shape}, X_test_reg: {X_test_reg.shape}")

In [None]:

# === ベースライン：線形回帰（回帰） ===
# ウィンドウを平均で潰して単純特徴に落とす簡易ベースライン
def collapse_window_mean(X):
    # (N, window, F) -> (N, F) by mean
    return X.mean(axis=1)

Xtr_bl = collapse_window_mean(X_train_reg)
Xv_bl  = collapse_window_mean(X_val_reg)
Xte_bl = collapse_window_mean(X_test_reg)

linr = LinearRegression()
linr.fit(Xtr_bl, y_train_reg)

pred_tr_bl = linr.predict(Xtr_bl)
pred_v_bl  = linr.predict(Xv_bl)
pred_te_bl = linr.predict(Xte_bl)

def rmse(y, p): return math.sqrt(mean_squared_error(y, p))

print('Baseline Linear Regression')
print('  Train RMSE:', rmse(y_train_reg, pred_tr_bl))
print('  Val   RMSE:', rmse(y_val_reg,   pred_v_bl))
print('  Test  RMSE:', rmse(y_test_reg,  pred_te_bl))

print('  Test R2  :', r2_score(y_test_reg, pred_te_bl))


In [None]:
# === LSTM（回帰：翌日終値） ===
tf.keras.backend.clear_session()

model_reg = Sequential([
    LSTM(64, return_sequences=True, input_shape=(X_train_reg.shape[1], X_train_reg.shape[2])),
    Dropout(0.2),
    LSTM(64),
    Dense(1, activation='linear')
])
model_reg.compile(optimizer='adam', loss='mse')

print("Model Architecture:")
model_reg.summary()

callbacks = [
    EarlyStopping(patience=10, restore_best_weights=True, monitor='val_loss'),
    ReduceLROnPlateau(patience=5, factor=0.5, monitor='val_loss', verbose=1),
    ModelCheckpoint('best_regression.keras', monitor='val_loss', save_best_only=True, verbose=0)
]

hist = model_reg.fit(
    X_train_reg, y_train_reg,
    validation_data=(X_val_reg, y_val_reg),
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=callbacks,
    verbose=1
)

# 推論
pred_tr = model_reg.predict(X_train_reg, verbose=0).ravel()
pred_v  = model_reg.predict(X_val_reg, verbose=0).ravel()
pred_te = model_reg.predict(X_test_reg, verbose=0).ravel()

print('\n' + '='*60)
print('LSTM Regression Results')
print('='*60)
print(f'  Train RMSE: {rmse(y_train_reg, pred_tr):.4f}')
print(f'  Val   RMSE: {rmse(y_val_reg, pred_v):.4f}')
print(f'  Test  RMSE: {rmse(y_test_reg, pred_te):.4f}')
print(f'  Test R2:    {r2_score(y_test_reg, pred_te):.4f}')
print('='*60)

# === 学習曲線の詳細可視化 ===
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# 1. Loss曲線（通常スケール）
ax1 = axes[0, 0]
ax1.plot(hist.history['loss'], label='Train Loss', linewidth=2, color='blue')
ax1.plot(hist.history['val_loss'], label='Val Loss', linewidth=2, color='orange')
ax1.set_title('Training and Validation Loss', fontsize=12, fontweight='bold')
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Loss (MSE)')
ax1.legend()
ax1.grid(True, alpha=0.3)

# 最小値にマーカー
min_val_loss_epoch = np.argmin(hist.history['val_loss'])
min_val_loss = hist.history['val_loss'][min_val_loss_epoch]
ax1.plot(min_val_loss_epoch, min_val_loss, 'r*', markersize=15, 
         label=f'Best Val Loss: {min_val_loss:.4f} @ epoch {min_val_loss_epoch+1}')
ax1.legend()

# 2. Loss曲線（対数スケール）
ax2 = axes[0, 1]
ax2.plot(hist.history['loss'], label='Train Loss', linewidth=2, color='blue')
ax2.plot(hist.history['val_loss'], label='Val Loss', linewidth=2, color='orange')
ax2.set_title('Training and Validation Loss (Log Scale)', fontsize=12, fontweight='bold')
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Loss (MSE)')
ax2.set_yscale('log')
ax2.legend()
ax2.grid(True, alpha=0.3)

# 3. 学習率の変化（ReduceLROnPlateauの効果）
if 'lr' in hist.history:
    ax3 = axes[1, 0]
    ax3.plot(hist.history['lr'], linewidth=2, color='green')
    ax3.set_title('Learning Rate Schedule', fontsize=12, fontweight='bold')
    ax3.set_xlabel('Epoch')
    ax3.set_ylabel('Learning Rate')
    ax3.set_yscale('log')
    ax3.grid(True, alpha=0.3)
else:
    # 学習率履歴がない場合は、Train/Valの差分を表示
    ax3 = axes[1, 0]
    loss_diff = np.array(hist.history['val_loss']) - np.array(hist.history['loss'])
    ax3.plot(loss_diff, linewidth=2, color='red')
    ax3.axhline(0, linestyle='--', color='black', linewidth=1)
    ax3.set_title('Validation - Training Loss Gap', fontsize=12, fontweight='bold')
    ax3.set_xlabel('Epoch')
    ax3.set_ylabel('Loss Difference')
    ax3.grid(True, alpha=0.3)

# 4. エポックごとの改善率
ax4 = axes[1, 1]
val_loss_improvement = np.diff(hist.history['val_loss'])
ax4.plot(range(1, len(val_loss_improvement)+1), val_loss_improvement, 
         marker='o', linewidth=1, markersize=4, color='purple')
ax4.axhline(0, linestyle='--', color='black', linewidth=1)
ax4.set_title('Validation Loss Improvement per Epoch', fontsize=12, fontweight='bold')
ax4.set_xlabel('Epoch')
ax4.set_ylabel('Loss Change')
ax4.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("\nTraining visualization complete!")

In [None]:
# === 分類：予測終値に基づく Buy(1)/Sell(0) 判定 ===
# 判定ロジック： pred_close_{t+1} > actual_close_{t} ? 1 : 0
# 時系列整合のため、各セットの基準 day_t の close 実値を準備する

def get_last_close_vector(df_part, window):
    # 各サンプルの "直近日の実Close" を取り出す (ラベル生成用)
    vals = df_part['close'].values  # *スケール前* が本来望ましいが、ここは y と比較するだけなので OK
    # ウィンドウで切った最後の行の index を対応づけ
    out = []
    for i in range(len(df_part) - window):
        out.append(vals[i + window - 1])
    return np.array(out)

close_train_tail = get_last_close_vector(train, WINDOW_SIZE)
close_val_tail   = get_last_close_vector(val,   WINDOW_SIZE)
close_test_tail  = get_last_close_vector(test,  WINDOW_SIZE)

buy_pred_tr = (pred_tr > close_train_tail).astype(int)
buy_pred_v  = (pred_v  > close_val_tail).astype(int)
buy_pred_te = (pred_te > close_test_tail).astype(int)

print('='*60)
print('Classification Metrics (Buy=1 / Sell=0)')
print('='*60)

def cls_metrics(y_true, y_pred):
    return {
        'acc': accuracy_score(y_true, y_pred),
        'prec': precision_score(y_true, y_pred, zero_division=0),
        'rec': recall_score(y_true, y_pred, zero_division=0),
        'f1': f1_score(y_true, y_pred, zero_division=0),
    }

metrics_tr = cls_metrics(y_train_cls, buy_pred_tr)
metrics_v = cls_metrics(y_val_cls, buy_pred_v)
metrics_te = cls_metrics(y_test_cls, buy_pred_te)

print(f'Train: Acc={metrics_tr["acc"]:.4f}, Prec={metrics_tr["prec"]:.4f}, Rec={metrics_tr["rec"]:.4f}, F1={metrics_tr["f1"]:.4f}')
print(f'Val:   Acc={metrics_v["acc"]:.4f}, Prec={metrics_v["prec"]:.4f}, Rec={metrics_v["rec"]:.4f}, F1={metrics_v["f1"]:.4f}')
print(f'Test:  Acc={metrics_te["acc"]:.4f}, Prec={metrics_te["prec"]:.4f}, Rec={metrics_te["rec"]:.4f}, F1={metrics_te["f1"]:.4f}')
print('='*60)

# Confusion Matrix (Test)
cm = confusion_matrix(y_test_cls, buy_pred_te)
print('\nConfusion Matrix (Test)')
print(cm)
print('\nClassification Report (Test)')
print(classification_report(y_test_cls, buy_pred_te, zero_division=0, target_names=['Sell (0)', 'Buy (1)']))

# === 分類性能の詳細可視化 ===
fig, axes = plt.subplots(2, 3, figsize=(18, 10))

# 1. Confusion Matrix (Train)
from sklearn.metrics import ConfusionMatrixDisplay
cm_train = confusion_matrix(y_train_cls, buy_pred_tr)
disp_train = ConfusionMatrixDisplay(confusion_matrix=cm_train, display_labels=['Sell', 'Buy'])
disp_train.plot(ax=axes[0, 0], cmap='Blues', values_format='d')
axes[0, 0].set_title('Train: Confusion Matrix', fontsize=12, fontweight='bold')

# 2. Confusion Matrix (Val)
cm_val = confusion_matrix(y_val_cls, buy_pred_v)
disp_val = ConfusionMatrixDisplay(confusion_matrix=cm_val, display_labels=['Sell', 'Buy'])
disp_val.plot(ax=axes[0, 1], cmap='Oranges', values_format='d')
axes[0, 1].set_title('Val: Confusion Matrix', fontsize=12, fontweight='bold')

# 3. Confusion Matrix (Test)
cm_test = confusion_matrix(y_test_cls, buy_pred_te)
disp_test = ConfusionMatrixDisplay(confusion_matrix=cm_test, display_labels=['Sell', 'Buy'])
disp_test.plot(ax=axes[0, 2], cmap='Greens', values_format='d')
axes[0, 2].set_title('Test: Confusion Matrix', fontsize=12, fontweight='bold')

# 4. メトリクス比較（Train/Val/Test）
splits = ['Train', 'Val', 'Test']
metrics_list = [metrics_tr, metrics_v, metrics_te]

metric_names = ['acc', 'prec', 'rec', 'f1']
metric_labels = ['Accuracy', 'Precision', 'Recall', 'F1-Score']

for idx, (metric_name, metric_label) in enumerate(zip(metric_names, metric_labels)):
    ax = axes[1, idx] if idx < 3 else None
    if ax is None:
        continue
    
    values = [m[metric_name] for m in metrics_list]
    bars = ax.bar(splits, values, color=['blue', 'orange', 'green'], alpha=0.7)
    
    # バーの上に値を表示
    for bar in bars:
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.3f}', ha='center', va='bottom', fontsize=10)
    
    ax.set_ylabel(metric_label)
    ax.set_title(f'{metric_label} by Split', fontsize=12, fontweight='bold')
    ax.set_ylim(0, 1.1)
    ax.grid(True, alpha=0.3, axis='y')

# 5. 予測信頼度分布（Test: 予測終値 - 現在終値の差分）
if len(axes[1]) > 3:
    axes[1, 3].remove()

plt.tight_layout()
plt.show()

# === 予測の信頼度分析 ===
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Test期間の予測差分（予測終値 - 現在終値）
pred_diff = pred_te - close_test_tail

# 1. Buy/Sell別の予測差分分布
ax1 = axes[0]
buy_mask = buy_pred_te == 1
sell_mask = buy_pred_te == 0

ax1.hist(pred_diff[buy_mask], bins=30, alpha=0.6, label='Buy Predictions', color='green', edgecolor='black')
ax1.hist(pred_diff[sell_mask], bins=30, alpha=0.6, label='Sell Predictions', color='red', edgecolor='black')
ax1.axvline(0, color='black', linestyle='--', linewidth=2, label='No Change')
ax1.set_title('Prediction Confidence Distribution', fontsize=12, fontweight='bold')
ax1.set_xlabel('Predicted Price Change')
ax1.set_ylabel('Frequency')
ax1.legend()
ax1.grid(True, alpha=0.3)

# 2. 正解/不正解別の予測差分分布
ax2 = axes[1]
correct_mask = (buy_pred_te == y_test_cls).astype(bool)
incorrect_mask = ~correct_mask

ax2.hist(np.abs(pred_diff[correct_mask]), bins=30, alpha=0.6, label='Correct Predictions', 
         color='blue', edgecolor='black')
ax2.hist(np.abs(pred_diff[incorrect_mask]), bins=30, alpha=0.6, label='Incorrect Predictions', 
         color='orange', edgecolor='black')
ax2.set_title('Prediction Magnitude: Correct vs Incorrect', fontsize=12, fontweight='bold')
ax2.set_xlabel('|Predicted Price Change|')
ax2.set_ylabel('Frequency')
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("\nClassification visualization complete!")

In [None]:
# === 予測結果の詳細可視化 ===
fig, axes = plt.subplots(3, 2, figsize=(16, 12))

# 1. Train: 実際の値 vs 予測値（散布図）
ax1 = axes[0, 0]
ax1.scatter(y_train_reg, pred_tr, alpha=0.3, s=10, color='blue')
ax1.plot([y_train_reg.min(), y_train_reg.max()], 
         [y_train_reg.min(), y_train_reg.max()], 
         'r--', linewidth=2, label='Perfect Prediction')
ax1.set_title('Train: Actual vs Predicted', fontsize=12, fontweight='bold')
ax1.set_xlabel('Actual Close Price')
ax1.set_ylabel('Predicted Close Price')
ax1.legend()
ax1.grid(True, alpha=0.3)

# R2スコアを表示
r2_train = r2_score(y_train_reg, pred_tr)
ax1.text(0.05, 0.95, f'R² = {r2_train:.4f}\nRMSE = {rmse(y_train_reg, pred_tr):.4f}',
         transform=ax1.transAxes, fontsize=10, verticalalignment='top',
         bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

# 2. Val: 実際の値 vs 予測値
ax2 = axes[0, 1]
ax2.scatter(y_val_reg, pred_v, alpha=0.5, s=20, color='orange')
ax2.plot([y_val_reg.min(), y_val_reg.max()], 
         [y_val_reg.min(), y_val_reg.max()], 
         'r--', linewidth=2, label='Perfect Prediction')
ax2.set_title('Val: Actual vs Predicted', fontsize=12, fontweight='bold')
ax2.set_xlabel('Actual Close Price')
ax2.set_ylabel('Predicted Close Price')
ax2.legend()
ax2.grid(True, alpha=0.3)

r2_val = r2_score(y_val_reg, pred_v)
ax2.text(0.05, 0.95, f'R² = {r2_val:.4f}\nRMSE = {rmse(y_val_reg, pred_v):.4f}',
         transform=ax2.transAxes, fontsize=10, verticalalignment='top',
         bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

# 3. Test: 実際の値 vs 予測値
ax3 = axes[1, 0]
ax3.scatter(y_test_reg, pred_te, alpha=0.5, s=20, color='green')
ax3.plot([y_test_reg.min(), y_test_reg.max()], 
         [y_test_reg.min(), y_test_reg.max()], 
         'r--', linewidth=2, label='Perfect Prediction')
ax3.set_title('Test: Actual vs Predicted', fontsize=12, fontweight='bold')
ax3.set_xlabel('Actual Close Price')
ax3.set_ylabel('Predicted Close Price')
ax3.legend()
ax3.grid(True, alpha=0.3)

r2_test = r2_score(y_test_reg, pred_te)
ax3.text(0.05, 0.95, f'R² = {r2_test:.4f}\nRMSE = {rmse(y_test_reg, pred_te):.4f}',
         transform=ax3.transAxes, fontsize=10, verticalalignment='top',
         bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

# 4. 予測誤差の分布（Test）
ax4 = axes[1, 1]
errors = y_test_reg - pred_te
ax4.hist(errors, bins=30, color='purple', alpha=0.7, edgecolor='black')
ax4.axvline(0, color='red', linestyle='--', linewidth=2, label='Zero Error')
ax4.axvline(errors.mean(), color='green', linestyle='--', linewidth=2, 
            label=f'Mean Error: {errors.mean():.4f}')
ax4.set_title('Test: Prediction Error Distribution', fontsize=12, fontweight='bold')
ax4.set_xlabel('Error (Actual - Predicted)')
ax4.set_ylabel('Frequency')
ax4.legend()
ax4.grid(True, alpha=0.3)

# 5. Test期間の時系列予測（サンプル：最初の100日）
ax5 = axes[2, 0]
n_show = min(100, len(y_test_reg))
indices = range(n_show)
ax5.plot(indices, y_test_reg[:n_show], label='Actual', linewidth=2, color='blue', marker='o', markersize=3)
ax5.plot(indices, pred_te[:n_show], label='Predicted', linewidth=2, color='red', marker='x', markersize=3)
ax5.set_title(f'Test: Time Series Prediction (First {n_show} days)', fontsize=12, fontweight='bold')
ax5.set_xlabel('Sample Index')
ax5.set_ylabel('Close Price')
ax5.legend()
ax5.grid(True, alpha=0.3)

# 6. ベースライン vs LSTM比較（RMSE）
ax6 = axes[2, 1]
models = ['Baseline\n(Linear Reg)', 'LSTM']
train_rmses = [rmse(y_train_reg, pred_tr_bl), rmse(y_train_reg, pred_tr)]
val_rmses = [rmse(y_val_reg, pred_v_bl), rmse(y_val_reg, pred_v)]
test_rmses = [rmse(y_test_reg, pred_te_bl), rmse(y_test_reg, pred_te)]

x = np.arange(len(models))
width = 0.25

bars1 = ax6.bar(x - width, train_rmses, width, label='Train', color='blue', alpha=0.7)
bars2 = ax6.bar(x, val_rmses, width, label='Val', color='orange', alpha=0.7)
bars3 = ax6.bar(x + width, test_rmses, width, label='Test', color='green', alpha=0.7)

# バーの上に値を表示
for bars in [bars1, bars2, bars3]:
    for bar in bars:
        height = bar.get_height()
        ax6.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.3f}', ha='center', va='bottom', fontsize=9)

ax6.set_ylabel('RMSE')
ax6.set_title('Model Comparison: RMSE', fontsize=12, fontweight='bold')
ax6.set_xticks(x)
ax6.set_xticklabels(models)
ax6.legend()
ax6.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

print("\nPrediction visualization complete!")

In [None]:
# === シンプル・バックテスト（テスト区間のみ） ===
# ルール：
#   Buy(1) -> 翌日寄りで買って翌日引けで手仕舞い（= 翌日終値と当日終値の差分に連動すると仮定）
#   Sell(0) -> 何もしない（空売り等は考慮しないシンプル版）

test_close = test['close'].values
# ウィンドウ切り詰めに合わせて test_close を末尾1日分削る（y_test_reg と長さ一致）
test_close_tail = test_close[WINDOW_SIZE-1: -1]  # day_t close
test_close_next = test_close[WINDOW_SIZE:]       # day_{t+1} close

# 収益率（Buyのときのみリターンを計上）
ret = np.zeros_like(buy_pred_te, dtype=float)
price_diff = (test_close_next - test_close_tail) / (test_close_tail + 1e-9)  # 日次騰落率
ret[buy_pred_te == 1] = price_diff[buy_pred_te == 1]

cum_ret = (1 + ret).cumprod() - 1

# Buy and Holdのベンチマーク
buy_hold_ret = (test_close - test_close[0]) / test_close[0]
buy_hold_ret_aligned = buy_hold_ret[WINDOW_SIZE:]  # 長さを合わせる

print('='*60)
print('Backtest Results (Test Period)')
print('='*60)
print(f'Final Cumulative Return (Strategy): {cum_ret[-1]:.2%}')
print(f'Final Cumulative Return (Buy & Hold): {buy_hold_ret_aligned[-1]:.2%}')
print(f'Number of Trades (Buy signals): {buy_pred_te.sum()}')
print(f'Win Rate: {((ret > 0) & (buy_pred_te == 1)).sum() / max(buy_pred_te.sum(), 1):.2%}')
print(f'Average Return per Trade: {ret[buy_pred_te == 1].mean():.4f} ({ret[buy_pred_te == 1].mean()*100:.2f}%)')
print(f'Sharpe Ratio (approx): {(ret.mean() / (ret.std() + 1e-9)) * np.sqrt(252):.2f}')
print('='*60)

# === バックテストの詳細可視化 ===
fig, axes = plt.subplots(3, 2, figsize=(16, 12))

# 1. 累積リターン（Strategy vs Buy & Hold）
ax1 = axes[0, 0]
ax1.plot(cum_ret, label='Strategy (Buy on predicted up)', linewidth=2, color='blue')
ax1.plot(buy_hold_ret_aligned, label='Buy & Hold', linewidth=2, color='gray', linestyle='--')
ax1.axhline(0, color='k', linestyle='-', linewidth=0.5, alpha=0.5)
ax1.set_title('Cumulative Return Comparison', fontsize=12, fontweight='bold')
ax1.set_xlabel('Trading Days')
ax1.set_ylabel('Cumulative Return')
ax1.legend()
ax1.grid(True, alpha=0.3)

# 最終リターンを表示
ax1.text(0.02, 0.98, f'Strategy: {cum_ret[-1]:.2%}\nBuy & Hold: {buy_hold_ret_aligned[-1]:.2%}',
         transform=ax1.transAxes, fontsize=10, verticalalignment='top',
         bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

# 2. 日次リターンの分布
ax2 = axes[0, 1]
ax2.hist(ret, bins=50, color='purple', alpha=0.7, edgecolor='black')
ax2.axvline(ret.mean(), color='red', linestyle='--', linewidth=2, label=f'Mean: {ret.mean():.4f}')
ax2.axvline(0, color='black', linestyle='-', linewidth=1)
ax2.set_title('Daily Returns Distribution', fontsize=12, fontweight='bold')
ax2.set_xlabel('Daily Return')
ax2.set_ylabel('Frequency')
ax2.legend()
ax2.grid(True, alpha=0.3)

# 3. トレード勝率（Buy信号のみ）
ax3 = axes[1, 0]
trade_returns = ret[buy_pred_te == 1]
wins = (trade_returns > 0).sum()
losses = (trade_returns <= 0).sum()
ax3.bar(['Wins', 'Losses'], [wins, losses], color=['green', 'red'], alpha=0.7)
ax3.set_title('Trade Outcome (Buy Signals Only)', fontsize=12, fontweight='bold')
ax3.set_ylabel('Count')
for i, (label, val) in enumerate([('Wins', wins), ('Losses', losses)]):
    ax3.text(i, val, f'{val}\n({val/(wins+losses)*100:.1f}%)', 
             ha='center', va='bottom', fontsize=10)
ax3.grid(True, alpha=0.3, axis='y')

# 4. 勝ちトレードと負けトレードの平均リターン
ax4 = axes[1, 1]
win_returns = trade_returns[trade_returns > 0]
loss_returns = trade_returns[trade_returns <= 0]
avg_win = win_returns.mean() if len(win_returns) > 0 else 0
avg_loss = loss_returns.mean() if len(loss_returns) > 0 else 0
ax4.bar(['Avg Win', 'Avg Loss'], [avg_win, avg_loss], color=['green', 'red'], alpha=0.7)
ax4.axhline(0, color='black', linestyle='-', linewidth=0.5)
ax4.set_title('Average Return per Trade Type', fontsize=12, fontweight='bold')
ax4.set_ylabel('Average Return')
for i, (label, val) in enumerate([('Avg Win', avg_win), ('Avg Loss', avg_loss)]):
    ax4.text(i, val, f'{val:.4f}\n({val*100:.2f}%)', 
             ha='center', va='bottom' if val > 0 else 'top', fontsize=10)
ax4.grid(True, alpha=0.3, axis='y')

# 5. ドローダウン分析
ax5 = axes[2, 0]
cum_ret_series = pd.Series(cum_ret)
running_max = cum_ret_series.cummax()
drawdown = cum_ret_series - running_max
ax5.fill_between(range(len(drawdown)), 0, drawdown, color='red', alpha=0.3, label='Drawdown')
ax5.plot(drawdown, color='red', linewidth=1)
ax5.set_title('Drawdown Analysis', fontsize=12, fontweight='bold')
ax5.set_xlabel('Trading Days')
ax5.set_ylabel('Drawdown')
ax5.legend()
ax5.grid(True, alpha=0.3)

max_dd = drawdown.min()
ax5.text(0.02, 0.02, f'Max Drawdown: {max_dd:.2%}',
         transform=ax5.transAxes, fontsize=10, verticalalignment='bottom',
         bbox=dict(boxstyle='round', facecolor='pink', alpha=0.5))

# 6. 月別リターン分析
ax6 = axes[2, 1]
# Test期間のインデックスを取得
test_indices = test.index[WINDOW_SIZE:]
if len(test_indices) == len(ret):
    ret_series = pd.Series(ret, index=test_indices)
    monthly_ret = ret_series.resample('M').sum()
    colors = ['green' if x > 0 else 'red' for x in monthly_ret]
    ax6.bar(range(len(monthly_ret)), monthly_ret.values, color=colors, alpha=0.7)
    ax6.axhline(0, color='black', linestyle='-', linewidth=0.5)
    ax6.set_title('Monthly Returns', fontsize=12, fontweight='bold')
    ax6.set_xlabel('Month Index')
    ax6.set_ylabel('Monthly Return')
    ax6.grid(True, alpha=0.3, axis='y')
else:
    ax6.text(0.5, 0.5, 'Monthly returns\nnot available\n(index mismatch)',
             transform=ax6.transAxes, fontsize=12, ha='center', va='center')
    ax6.axis('off')

plt.tight_layout()
plt.show()

print("\nBacktest visualization complete!")

In [None]:

# === 保存物 ===
joblib.dump(scaler, 'scaler_7203T.joblib')
model_reg.save('model_regression_7203T.keras')

with open('params_7203T.txt', 'w', encoding='utf-8') as f:
    f.write(f'Ticker: {TICKER}\n')
    f.write(f'Period: {START_DATE}..{END_DATE}\n')
    f.write(f'Window: {WINDOW_SIZE}\n')
    f.write(f'Train end: {SPLIT_TRAIN_END}\nVal end: {SPLIT_VAL_END}\n')
print('Artifacts saved.')



## メモ：クリプト向けチュートリアルからの置換ポイント
- **GmoFetcher → YfFetcher**  
  - 15分足 / `interval_sec` は株の**日次**に読み替え（`interval='1d'`）。  
  - `market='BTC_JPY'` → `ticker='7203.T'`。  
  - キャッシュは `joblib.Memory('/tmp/yf_cache')` を用意。

- **データ期間の限定**  
  - `df = df[df.index < '2021-04-01']` のような扱いを、`START_DATE`〜`END_DATE` で制御。

- **目的変数**  
  - 回帰：翌日終値 `target_close_t1`。  
  - 分類：`pred_close_{t+1} > close_t` を Buy=1 / Sell=0。

- **評価**  
  - 回帰：RMSE / R²。  
  - 分類：Accuracy / Precision / Recall / F1、Confusion Matrix。  
  - 簡易バックテストで実務的な感触も確認。

- **ネットワーク非依存**  
  - ネット不可の環境では Kaggle 等で `7203.T.csv` を持ち込み、`csv_path` に指定。
