# CPB 簡化版 - 直接 Colab 訓練
## 不依賴 GitHub，直接在 Colab 訓練並上傳 HuggingFace
✓ 無複雜特徵工程  
✓ 無地區限制 (Binance US)  
✓ 直接上傳 HF  
✓ 5-10 分鐘完成

In [None]:
# Step 0: 安裝依賴!pip install -q ccxt torch scikit-learn pandas numpy!pip install -q huggingface-hub  # 上傳到 HF
import ccxtimport torchimport pandas as pdimport numpy as npfrom sklearn.preprocessing import MinMaxScalerimport subprocessfrom datetime import datetimeimport jsonimport os
print('✓ 所有依賴安裝完成')print(f'PyTorch 版本: {torch.__version__}')print(f'CUDA 可用: {torch.cuda.is_available()}')if torch.cuda.is_available():    print(f'GPU 型號: {torch.cuda.get_device_name(0)}')

In [None]:
# Step 1: 掛載 Google Drivefrom google.colab import drivedrive.mount('/content/drive')
# 建立工作目錄work_dir = '/content/drive/MyDrive/cpb_simple'os.makedirs(work_dir, exist_ok=True)os.chdir(work_dir)
print(f'✓ 工作目錄: {os.getcwd()}')

In [None]:
# Step 2: 採集 Binance US 數據print('\n' + '='*60)print('Step 2: 採集 Binance US 數據')print('='*60)
exchange = ccxt.binanceus()
coins = [    'BTC/USDT', 'ETH/USDT', 'SOL/USDT', 'BNB/USDT', 'ADA/USDT',    'DOGE/USDT', 'XRP/USDT', 'DOT/USDT', 'LINK/USDT', 'UNI/USDT']
data_dict = {}
for coin in coins:    try:        print(f'正在採集 {coin}...')        klines = exchange.fetch_ohlcv(coin, '1h', limit=500)  # 採集 500 根 1 小時 K 線        df = pd.DataFrame(klines, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])        df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')        df['coin'] = coin        data_dict[coin] = df        print(f'  ✓ {coin}: {len(df)} K 線')    except Exception as e:        print(f'  ✗ {coin}: {str(e)[:50]}')        continue
print(f'\n✓ 成功採集 {len(data_dict)} 個幣種')

In [None]:
# Step 3: 簡單特徵工程（無複雜指標）print('\n' + '='*60)print('Step 3: 特徵工程')print('='*60)
def engineer_features(df):    """簡單特徵工程 - 只用基本指標"""    df = df.copy()    df = df.sort_values('timestamp').reset_index(drop=True)    
    # 1. 百分比變化    df['price_change'] = df['close'].pct_change() * 100    df['volume_change'] = df['volume'].pct_change() * 100    
    # 2. 簡單移動平均    for period in [5, 10, 20]:        df[f'sma_{period}'] = df['close'].rolling(window=period).mean()    
    # 3. 相對強度指數 (RSI)    def calc_rsi(prices, period=14):        delta = prices.diff()        gain = (delta.where(delta > 0, 0)).rolling(window=period).mean()        loss = (-delta.where(delta < 0, 0)).rolling(window=period).mean()        rs = gain / (loss + 1e-10)        rsi = 100 - (100 / (1 + rs))        return rsi    df['rsi'] = calc_rsi(df['close'])    
    # 4. 波動性    df['volatility'] = df['close'].rolling(window=20).std()    
    # 5. 目標變數（7天後的價格變化）    df['target'] = df['close'].pct_change(7) * 100    
    # 移除 NaN    df = df.dropna()    
    return df
processed_data = {}for coin, df in data_dict.items():    processed_data[coin] = engineer_features(df)    print(f'✓ {coin}: {len(processed_data[coin])} 筆訓練數據')

In [None]:
# Step 4: 訓練 LSTM 模型print('\n' + '='*60)print('Step 4: 訓練 LSTM 模型')print('='*60)
import torch.nn as nnimport torch.optim as optim
class SimpleLSTM(nn.Module):    def __init__(self, input_size=8, hidden_size=64, output_size=1):        super(SimpleLSTM, self).__init__()        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True, num_layers=2)        self.fc = nn.Linear(hidden_size, output_size)    
    def forward(self, x):        lstm_out, _ = self.lstm(x)        out = self.fc(lstm_out[:, -1, :])        return out
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')print(f'使用設備: {device}')
models_dict = {}results = {}
for coin, df in processed_data.items():    print(f'\n訓練 {coin}...')    
    # 選擇特徵    feature_cols = ['price_change', 'volume_change', 'sma_5', 'sma_10', 'sma_20', 'rsi', 'volatility']    X = df[feature_cols].values    y = df['target'].values    
    # 正規化    scaler = MinMaxScaler()    X = scaler.fit_transform(X)    
    # 分割訓練/測試 (80/20)    split_idx = int(len(X) * 0.8)    X_train, X_test = X[:split_idx], X[split_idx:]    y_train, y_test = y[:split_idx], y[split_idx:]    
    # 轉換為序列 (lookback=10)    def create_sequences(X, y, lookback=10):        X_seq, y_seq = [], []        for i in range(len(X) - lookback):            X_seq.append(X[i:i+lookback])            y_seq.append(y[i+lookback])        return np.array(X_seq), np.array(y_seq)    
    X_train_seq, y_train_seq = create_sequences(X_train, y_train)    X_test_seq, y_test_seq = create_sequences(X_test, y_test)    
    # 轉為 Tensor    X_train_tensor = torch.FloatTensor(X_train_seq).to(device)    y_train_tensor = torch.FloatTensor(y_train_seq).reshape(-1, 1).to(device)    X_test_tensor = torch.FloatTensor(X_test_seq).to(device)    y_test_tensor = torch.FloatTensor(y_test_seq).reshape(-1, 1).to(device)    
    # 建立模型    model = SimpleLSTM(input_size=X_train_seq.shape[2], hidden_size=64, output_size=1).to(device)    optimizer = optim.Adam(model.parameters(), lr=0.001)    criterion = nn.MSELoss()    
    # 訓練 (30 epochs)    batch_size = 16    for epoch in range(30):        model.train()        total_loss = 0        for i in range(0, len(X_train_tensor), batch_size):            X_batch = X_train_tensor[i:i+batch_size]            y_batch = y_train_tensor[i:i+batch_size]            
            optimizer.zero_grad()            y_pred = model(X_batch)            loss = criterion(y_pred, y_batch)            loss.backward()            optimizer.step()            total_loss += loss.item()        
        if (epoch + 1) % 10 == 0:            print(f'  Epoch {epoch+1}/30, Loss: {total_loss/len(X_train_tensor):.6f}')    
    # 評估    model.eval()    with torch.no_grad():        y_pred_test = model(X_test_tensor).cpu().numpy()    
    mse = np.mean((y_pred_test - y_test_seq.reshape(-1, 1)) ** 2)    rmse = np.sqrt(mse)    mae = np.mean(np.abs(y_pred_test - y_test_seq.reshape(-1, 1)))    
    print(f'✓ {coin} 訓練完成')    print(f'  RMSE: {rmse:.6f}')    print(f'  MAE: {mae:.6f}')    
    models_dict[coin] = model    results[coin] = {'rmse': float(rmse), 'mae': float(mae)}

In [None]:
# Step 5: 保存模型print('\n' + '='*60)print('Step 5: 保存模型')print('='*60)
model_dir = 'trained_models'os.makedirs(model_dir, exist_ok=True)
for coin, model in models_dict.items():    model_path = os.path.join(model_dir, f'{coin.replace("/", "_")}_lstm.pt')    torch.save(model.state_dict(), model_path)    print(f'✓ 已保存: {model_path}')
# 保存結果results_path = os.path.join(model_dir, 'training_results.json')with open(results_path, 'w') as f:    json.dump({        'timestamp': datetime.now().isoformat(),        'source': 'Binance US',        'coins_trained': len(results),        'results': results    }, f, indent=2)
print(f'✓ 訓練結果已保存: {results_path}')

In [None]:
# Step 6: 上傳到 HuggingFaceprint('\n' + '='*60)print('Step 6: 上傳到 HuggingFace')print('='*60)
from huggingface_hub import HfApi, create_repo
# 登入 HF (需要 token)from huggingface_hub import notebook_loginnotebook_login()  # 會顯示登入框
# 建立 reporepo_name = 'cpb-lstm-models'print(f'正在建立 repo: {repo_name}...')
try:    create_repo(repo_name, exist_ok=True, private=False)    print(f'✓ Repo 已建立/存在: {repo_name}')except:    print(f'✓ Repo 已存在: {repo_name}')
# 上傳檔案api = HfApi()for coin, model in models_dict.items():    model_path = os.path.join(model_dir, f'{coin.replace("/", "_")}_lstm.pt')    api.upload_file(        path_or_fileobj=model_path,        path_in_repo=f'models/{os.path.basename(model_path)}',        repo_id=f'<your_hf_username>/{repo_name}',        repo_type='model'    )    print(f'✓ 已上傳: {coin}')
# 上傳訓練結果api.upload_file(    path_or_fileobj=os.path.join(model_dir, 'training_results.json'),    path_in_repo='training_results.json',    repo_id=f'<your_hf_username>/{repo_name}',    repo_type='model')
print(f'\n✓ 所有模型已上傳到 HuggingFace!')print(f'Repo: https://huggingface.co/<your_hf_username>/{repo_name}')

In [None]:
# 顯示最終統計print('\n' + '='*60)print('訓練完成總結')print('='*60)
results_df = pd.DataFrame(results).Tprint(results_df)
print(f'\n平均 RMSE: {results_df["rmse"].mean():.6f}')print(f'平均 MAE: {results_df["mae"].mean():.6f}')print(f'\n✓ 訓練 {len(results)} 個幣種')