In [2]:
import pandas as pd
import numpy as np
import yfinance as yf
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
# [FIX] 修改引用方式，避免 NameError
import xgboost as xgb
from torch.utils.data import DataLoader, TensorDataset
import warnings

# 忽略警告
warnings.filterwarnings('ignore')

# ==========================================
# 1. 資料獲取與前處理
# ==========================================
STOCK_ID = "2454.TW"
START_DATE = '2023-07-01'
END_DATE = '2023-12-31'

print(f"1. 下載 {STOCK_ID} 資料 ({START_DATE} ~ {END_DATE})...")
df = yf.download(STOCK_ID, start=START_DATE, end=END_DATE)

# 修正 yfinance 多層索引問題
if isinstance(df.columns, pd.MultiIndex):
    df.columns = df.columns.droplevel(1)
df.reset_index(inplace=True)

# 確保只有交易日資料
df = df[['Date', 'Open', 'High', 'Low', 'Close', 'Volume']].copy()

# ==========================================
# 2. 特徵工程：四大技術指標
# ==========================================
print("2. 計算四大類技術指標...")

# (1) 趨勢: MA
df['MA_5'] = df['Close'].rolling(window=5).mean()
df['MA_20'] = df['Close'].rolling(window=20).mean()

# (2) 動能: RSI
def calculate_rsi(data, window=14):
    delta = data.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
    rs = gain / loss
    return 100 - (100 / (1 + rs))
df['RSI'] = calculate_rsi(df['Close'])

# (3) 波動: Bollinger Bands
std = df['Close'].rolling(window=20).std()
df['Upper_Band'] = df['MA_20'] + (std * 2)
df['Lower_Band'] = df['MA_20'] - (std * 2)

# (4) 成交量: OBV
df['OBV'] = (np.sign(df['Close'].diff()) * df['Volume']).fillna(0).cumsum()

# 清除 NaN
df.dropna(inplace=True)
print(f"   特徵工程完成，剩餘資料筆數: {len(df)}")

# ==========================================
# 3. 資料集製作
# ==========================================
features = ['Open', 'High', 'Low', 'Close', 'Volume', 'MA_5', 'RSI', 'Upper_Band', 'OBV']
target = 'Close'

train_size = int(len(df) * 0.8)
train_df, test_df = df.iloc[:train_size], df.iloc[train_size:]

scaler = StandardScaler()
X_train = scaler.fit_transform(train_df[features])
y_train = train_df[target].values
X_test = scaler.transform(test_df[features])
y_test = test_df[target].values

# 轉換為 Tensor (給 LSTM)
def create_dataset(X, y, time_steps=5):
    Xs, ys = [], []
    for i in range(len(X) - time_steps):
        Xs.append(X[i:(i + time_steps)])
        ys.append(y[i + time_steps])
    return torch.FloatTensor(np.array(Xs)), torch.FloatTensor(np.array(ys))

TIME_STEPS = 5
X_train_t, y_train_t = create_dataset(X_train, y_train, TIME_STEPS)
X_test_t, y_test_t = create_dataset(X_test, y_test, TIME_STEPS)

# ==========================================
# 4. 模型訓練與評估
# ==========================================
print("3. 開始訓練模型 (XGBoost vs LSTM)...")
results = []

# --- 模型 A: XGBoost (Machine Learning) ---
# [FIX] 使用 xgb.XGBRegressor 確保變數被定義
xgb_model = xgb.XGBRegressor(n_estimators=100, learning_rate=0.05, random_state=42)
xgb_model.fit(X_train[TIME_STEPS:], y_train[TIME_STEPS:])
xgb_pred = xgb_model.predict(X_test[TIME_STEPS:])

rmse_xgb = np.sqrt(mean_squared_error(y_test[TIME_STEPS:], xgb_pred))
results.append({'Model': 'XGBoost', 'RMSE': rmse_xgb})

# --- 模型 B: LSTM (Deep Learning) ---
class SimpleLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim=1):
        super(SimpleLSTM, self).__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
lstm_model = SimpleLSTM(input_dim=len(features), hidden_dim=32).to(device)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(lstm_model.parameters(), lr=0.01)

loader = DataLoader(TensorDataset(X_train_t.to(device), y_train_t.to(device)), batch_size=16, shuffle=True)
lstm_model.train()
for epoch in range(50):
    for batch_X, batch_y in loader:
        optimizer.zero_grad()
        outputs = lstm_model(batch_X)
        loss = criterion(outputs.squeeze(), batch_y)
        loss.backward()
        optimizer.step()

lstm_model.eval()
with torch.no_grad():
    lstm_pred_t = lstm_model(X_test_t.to(device)).cpu().numpy().flatten()

rmse_lstm = np.sqrt(mean_squared_error(y_test[TIME_STEPS:], lstm_pred_t))
results.append({'Model': 'LSTM (Deep Learning)', 'RMSE': rmse_lstm})

# ==========================================
# 5. 結果輸出
# ==========================================
print("\n" + "="*40)
print(f"第八週作業結果報告 - 標的: {STOCK_ID}")
print("="*40)
results_df = pd.DataFrame(results)
print(results_df)
print("-" * 40)
# 顯示 RMSE
print(f"XGBoost RMSE: {rmse_xgb:.4f}")
print(f"LSTM    RMSE: {rmse_lstm:.4f}")

[*********************100%***********************]  1 of 1 completed

1. 下載 2454.TW 資料 (2023-07-01 ~ 2023-12-31)...
2. 計算四大類技術指標...
   特徵工程完成，剩餘資料筆數: 107
3. 開始訓練模型 (XGBoost vs LSTM)...






第八週作業結果報告 - 標的: 2454.TW
                  Model        RMSE
0               XGBoost   45.648805
1  LSTM (Deep Learning)  810.448839
----------------------------------------
XGBoost RMSE: 45.6488
LSTM    RMSE: 810.4488
