# 04 · 新資料推論管線（TIC → MAST → BLS/TLS → 機率）

## 工作流程
1. **單目標推論**：輸入單個 TIC → 下載光曲線 → 預測機率
2. **批次處理**：輸入 TIC 列表 → 批次預測 → 排序輸出
3. **視覺化**：摺疊光曲線、BLS 功率譜、預測分數
4. **GPU 優化**：偵測 L4 GPU 並示範 bfloat16 autocast

---

## 1. 環境設定與依賴安裝

In [None]:
# 步驟 1: 安裝套件 (需要手動重啟 Runtime)
# ⚠️ 重要: 執行此 cell 後，請手動重啟 Runtime (Runtime → Restart runtime)

!pip install -q numpy==1.26.4 pandas astropy scipy'<1.13' matplotlib scikit-learn
!pip install -q lightkurve astroquery xgboost joblib seaborn

print("✅ 套件安裝完成!")
print("⚠️ 請現在手動重啟 Runtime: Runtime → Restart runtime")
print("   然後繼續執行下一個 cell")

In [None]:
# 步驟 2: 驗證環境 (Runtime 重啟後執行)
import numpy as np
import sys
import warnings
warnings.filterwarnings('ignore')

# 檢查 NumPy 版本
print(f"NumPy 版本: {np.__version__}")
print(f"Python 版本: {sys.version}")

if np.__version__.startswith('2.'):
    print("❌ NumPy 2.0 檢測到！請確認已執行步驟 1 並重啟 Runtime")
    raise RuntimeError("請先修復 NumPy 版本問題")
else:
    print("✅ NumPy 版本正確 (< 2.0)")
    
# 檢查是否在 Colab 環境
IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    print("📍 在 Google Colab 環境執行")
    # Clone repository if needed
    import os
    if not os.path.exists('/content/exoplanet-starter'):
        !git clone https://github.com/exoplanet-spaceapps/exoplanet-starter.git /content/exoplanet-starter
        os.chdir('/content/exoplanet-starter')
    sys.path.append('/content/exoplanet-starter')
else:
    print("💻 在本地環境執行")
    import os
    os.chdir(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
    sys.path.append(os.getcwd())

print("\n✅ 環境設定完成！可以繼續執行後續 cells")

## 2. GPU 偵測與優化設定

In [None]:
# GPU 偵測與優化設定
gpu_info = {
    'available': False,
    'device_name': None,
    'is_l4': False,
    'supports_bfloat16': False
}

try:
    import torch
    
    if torch.cuda.is_available():
        gpu_info['available'] = True
        gpu_info['device_name'] = torch.cuda.get_device_name(0)
        gpu_props = torch.cuda.get_device_properties(0)
        
        print(f"🖥️ GPU 偵測結果:")
        print(f"   型號: {gpu_info['device_name']}")
        print(f"   記憶體: {gpu_props.total_memory / 1024**3:.2f} GB")
        print(f"   CUDA 運算能力: {gpu_props.major}.{gpu_props.minor}")
        
        # 檢查是否為 L4 GPU
        if 'L4' in gpu_info['device_name']:
            gpu_info['is_l4'] = True
            gpu_info['supports_bfloat16'] = True
            print("\n💡 偵測到 NVIDIA L4 GPU！")
            print("   • 支援高效能 BF16 推論")
            print("   • 建議使用 autocast 進行加速")
        
        # 檢查 bfloat16 支援
        if hasattr(torch.cuda, 'is_bf16_supported'):
            gpu_info['supports_bfloat16'] = torch.cuda.is_bf16_supported()
            
    else:
        print("⚠️ 未偵測到 CUDA GPU")
        
except ImportError:
    print("⚠️ PyTorch 未安裝，無法使用 GPU 加速")
    # 嘗試使用 nvidia-smi
    try:
        result = subprocess.run(
            ['nvidia-smi', '--query-gpu=name,memory.total', '--format=csv,noheader'],
            capture_output=True, text=True, check=False
        )
        if result.returncode == 0:
            gpu_name, gpu_memory = result.stdout.strip().split(', ')
            print(f"\n🖥️ 通過 nvidia-smi 偵測到 GPU:")
            print(f"   型號: {gpu_name}")
            print(f"   記憶體: {gpu_memory}")
            if 'L4' in gpu_name:
                gpu_info['is_l4'] = True
                print("   💡 L4 GPU 支援 BF16 加速")
    except:
        print("   將使用 CPU 進行推論")

print("\n" + "="*60)

## 3. 載入訓練好的模型

In [None]:
# 載入模型和相關檔案
import joblib
import json
from pathlib import Path
import numpy as np
import pandas as pd

# 模型路徑
model_dir = Path("model")

# 檢查模型檔案是否存在
if not model_dir.exists():
    print("⚠️ 找不到模型目錄，請先執行 03_injection_train.ipynb 訓練模型")
    print("   或下載預訓練模型至 model/ 目錄")
else:
    # 載入模型
    model_path = model_dir / "ranker.joblib"
    scaler_path = model_dir / "scaler.joblib"
    schema_path = model_dir / "feature_schema.json"
    
    if model_path.exists():
        model = joblib.load(model_path)
        print(f"✅ 載入模型: {model_path}")
    else:
        print(f"❌ 找不到模型檔案: {model_path}")
        model = None
    
    if scaler_path.exists():
        scaler = joblib.load(scaler_path)
        print(f"✅ 載入標準化器: {scaler_path}")
    else:
        print(f"❌ 找不到標準化器: {scaler_path}")
        scaler = None
    
    if schema_path.exists():
        with open(schema_path, 'r') as f:
            schema = json.load(f)
        feature_order = schema['feature_order']
        print(f"✅ 載入特徵架構: {len(feature_order)} 個特徵")
    else:
        print(f"❌ 找不到特徵架構: {schema_path}")
        schema = None
        feature_order = None

# 檢查是否有監督式模型
supervised_model_path = model_dir / "supervised" / "ranker_supervised.joblib"
if supervised_model_path.exists():
    print(f"\n📦 發現監督式模型: {supervised_model_path}")
    print("   可選擇使用監督式模型進行推論")

print("\n" + "="*60)

## 4. 導入推論模組

In [None]:
# 導入推論模組
from app.infer import (
    predict_from_tic,
    predict_batch,
    create_folded_lightcurve_plot,
    save_inference_results,
    check_gpu_availability
)

from app.bls_features import run_bls, extract_features

# 導入視覺化套件
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette('husl')

print("📚 模組載入完成")

## 5. 單目標推論示範

In [None]:
# 單個 TIC 推論示範
print("🎯 單目標推論示範\n")
print("="*60)

# 目標 TIC（可更換）
tic_id = "TIC 25155310"  # TOI-431，已知的多行星系統

# 執行推論
result = predict_from_tic(
    tic_id,
    model_path="model/ranker.joblib",
    scaler_path="model/scaler.joblib",
    feature_schema_path="model/feature_schema.json",
    mission="TESS",
    verbose=True
)

# 顯示結果
print("\n" + "="*60)
print("📊 推論結果:")
print(f"   目標: {result['tic_id']}")
print(f"   成功: {result['success']}")

if result['success']:
    print(f"\n🎯 預測機率: {result['probability']:.3f}")
    print(f"\n📈 BLS 結果:")
    print(f"   週期: {result['bls_period']:.3f} 天")
    print(f"   深度: {result['bls_depth']*1e6:.0f} ppm")
    print(f"   SNR: {result['bls_snr']:.1f}")
    
    # 判斷是否為高信心候選
    if result['probability'] > 0.8:
        print("\n✨ 高信心行星候選！")
    elif result['probability'] > 0.5:
        print("\n📍 中等信心候選")
    else:
        print("\n❓ 低信心候選")
else:
    print(f"\n❌ 錯誤: {result['error']}")

## 6. 視覺化光曲線

In [None]:
# 視覺化光曲線和 BLS 結果
if result['success'] and result['lightcurve'] is not None:
    import lightkurve as lk
    
    # 獲取光曲線資料
    time = np.array(result['lightcurve']['time'])
    flux = np.array(result['lightcurve']['flux'])
    period = result['bls_period']
    
    # 創建圖表
    fig = plt.figure(figsize=(15, 12))
    
    # 1. 原始光曲線
    ax1 = plt.subplot(3, 2, 1)
    ax1.plot(time, flux, 'k.', alpha=0.3, markersize=1)
    ax1.set_xlabel('時間 (天)')
    ax1.set_ylabel('相對流量')
    ax1.set_title(f'{result["tic_id"]} - 去趨勢後光曲線')
    ax1.grid(True, alpha=0.3)
    
    # 2. BLS 功率譜
    ax2 = plt.subplot(3, 2, 2)
    # 重新計算 BLS 以獲得完整功率譜
    lc_obj = lk.LightCurve(time=time, flux=flux)
    bls = lc_obj.to_periodogram(method="bls", minimum_period=0.5, maximum_period=20)
    bls.plot(ax=ax2)
    ax2.axvline(period, color='red', linestyle='--', alpha=0.7, label=f'最佳週期: {period:.3f} 天')
    ax2.set_title('BLS 功率譜')
    ax2.legend()
    
    # 3. 摺疊光曲線
    ax3 = plt.subplot(3, 2, 3)
    folded_data = create_folded_lightcurve_plot(time, flux, period)
    phase = np.array(folded_data['phase'])
    flux_folded = np.array(folded_data['flux'])
    
    # 繪製散點圖
    ax3.plot(phase, flux_folded, 'k.', alpha=0.2, markersize=1)
    
    # 繪製分箱平均
    if folded_data['binned_phase']:
        ax3.plot(folded_data['binned_phase'], folded_data['binned_flux'], 
                'ro-', markersize=4, linewidth=1.5, label='分箱平均')
    
    ax3.set_xlabel('相位')
    ax3.set_ylabel('相對流量')
    ax3.set_title(f'摺疊光曲線 (P = {period:.3f} 天)')
    ax3.grid(True, alpha=0.3)
    ax3.legend()
    
    # 4. 放大凌日區域
    ax4 = plt.subplot(3, 2, 4)
    transit_mask = np.abs(phase) < 0.1  # 只顯示相位 ±0.1 的區域
    ax4.plot(phase[transit_mask], flux_folded[transit_mask], 'k.', alpha=0.3, markersize=2)
    ax4.set_xlabel('相位')
    ax4.set_ylabel('相對流量')
    ax4.set_title('凌日區域放大')
    ax4.grid(True, alpha=0.3)
    ax4.set_xlim(-0.1, 0.1)
    
    # 5. 預測機率條形圖
    ax5 = plt.subplot(3, 2, 5)
    prob = result['probability']
    color = 'green' if prob > 0.8 else 'orange' if prob > 0.5 else 'red'
    bars = ax5.bar(['行星候選機率'], [prob], color=color, alpha=0.7)
    ax5.set_ylim(0, 1)
    ax5.set_ylabel('機率')
    ax5.set_title(f'預測機率: {prob:.3f}')
    ax5.grid(True, alpha=0.3, axis='y')
    
    # 添加數值標籤
    for bar in bars:
        height = bar.get_height()
        ax5.text(bar.get_x() + bar.get_width()/2., height,
                f'{height:.3f}',
                ha='center', va='bottom', fontsize=14, fontweight='bold')
    
    # 6. 特徵重要性
    ax6 = plt.subplot(3, 2, 6)
    # 顯示關鍵特徵
    features = result['features']
    key_features = {
        'BLS SNR': features.get('bls_snr', 0),
        '週期': features.get('bls_period', 0),
        '深度 (ppm)': features.get('bls_depth', 0) * 1e6,
        '奇偶差異': features.get('odd_even_depth_diff', 0) * 1e6,
        '對稱性': features.get('transit_symmetry', 0)
    }
    
    y_pos = np.arange(len(key_features))
    values = list(key_features.values())
    labels = list(key_features.keys())
    
    ax6.barh(y_pos, values, color='skyblue', alpha=0.7)
    ax6.set_yticks(y_pos)
    ax6.set_yticklabels(labels)
    ax6.set_xlabel('數值')
    ax6.set_title('關鍵特徵值')
    ax6.grid(True, alpha=0.3, axis='x')
    
    plt.suptitle(f'{result["tic_id"]} 推論結果視覺化', fontsize=16, fontweight='bold')
    plt.tight_layout()
    plt.show()
    
    print("\n📈 視覺化完成")
else:
    print("⚠️ 無法視覺化（推論失敗或無光曲線資料）")

## 7. 批次推論多個目標

In [None]:
# 批次推論多個 TIC
print("🎯 批次推論示範\n")
print("="*60)

# 目標列表（可自行修改或擴充）
tic_list = [
    "TIC 25155310",  # TOI-431 (已知多行星系統)
    "TIC 307210830", # TOI-270 (已知三行星系統)
    "TIC 260004324", # TOI-178 (已知六行星系統)
    "TIC 55652896",  # TOI-125 (已知三行星系統)
    "TIC 441462736", # 可能的假陽性
]

print(f"📋 準備處理 {len(tic_list)} 個目標:\n")
for i, tic in enumerate(tic_list, 1):
    print(f"   {i}. {tic}")

print("\n開始批次推論...\n")
print("="*60)

# 執行批次推論
results_df = predict_batch(
    tic_list,
    model_path="model/ranker.joblib",
    scaler_path="model/scaler.joblib",
    feature_schema_path="model/feature_schema.json",
    mission="TESS",
    verbose=True
)

print("\n" + "="*60)

## 8. 結果表格與排序

In [None]:
# 顯示結果表格
if len(results_df) > 0:
    print("\n📊 批次推論結果（按機率排序）:\n")
    
    # 格式化顯示
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', None)
    pd.set_option('display.max_colwidth', None)
    
    # 只顯示關鍵欄位
    display_columns = ['tic_id', 'probability', 'bls_period', 'bls_snr', 'bls_depth', 'success']
    display_df = results_df[display_columns].copy()
    
    # 格式化數值
    if 'probability' in display_df.columns:
        display_df['probability'] = display_df['probability'].apply(lambda x: f"{x:.3f}" if pd.notna(x) else "N/A")
    if 'bls_period' in display_df.columns:
        display_df['bls_period'] = display_df['bls_period'].apply(lambda x: f"{x:.3f}" if pd.notna(x) else "N/A")
    if 'bls_snr' in display_df.columns:
        display_df['bls_snr'] = display_df['bls_snr'].apply(lambda x: f"{x:.1f}" if pd.notna(x) else "N/A")
    if 'bls_depth' in display_df.columns:
        display_df['bls_depth'] = display_df['bls_depth'].apply(lambda x: f"{x*1e6:.0f}" if pd.notna(x) else "N/A")
        display_df = display_df.rename(columns={'bls_depth': 'bls_depth_ppm'})
    
    print(display_df.to_string(index=False))
    
    # 統計摘要
    success_count = results_df['success'].sum()
    high_conf = len(results_df[(results_df['success']) & (results_df['probability'] > 0.8)])
    med_conf = len(results_df[(results_df['success']) & (results_df['probability'] > 0.5) & (results_df['probability'] <= 0.8)])
    
    print("\n📈 統計摘要:")
    print(f"   成功處理: {success_count}/{len(results_df)}")
    print(f"   高信心候選 (>0.8): {high_conf}")
    print(f"   中信心候選 (0.5-0.8): {med_conf}")
    
    # 儲存結果
    output_path = save_inference_results(
        results_df,
        output_path="results/batch_inference.csv",
        include_metadata=True
    )
    print(f"\n💾 結果已儲存至: {output_path}")
else:
    print("⚠️ 無推論結果")

## 9. 批次結果視覺化

In [None]:
# 批次結果視覺化
if len(results_df) > 0 and results_df['success'].any():
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    # 只選擇成功的結果
    success_df = results_df[results_df['success']].copy()
    
    # 1. 機率分布
    ax1 = axes[0, 0]
    if 'probability' in success_df.columns:
        probs = success_df['probability'].dropna()
        bars = ax1.bar(range(len(probs)), probs.values, color='skyblue', alpha=0.7)
        
        # 根據機率著色
        for i, (bar, prob) in enumerate(zip(bars, probs.values)):
            if prob > 0.8:
                bar.set_color('green')
            elif prob > 0.5:
                bar.set_color('orange')
            else:
                bar.set_color('red')
        
        ax1.set_xticks(range(len(probs)))
        ax1.set_xticklabels([tid.replace('TIC ', '') for tid in success_df['tic_id'].values], rotation=45)
        ax1.set_ylabel('機率')
        ax1.set_title('預測機率分布')
        ax1.axhline(y=0.5, color='gray', linestyle='--', alpha=0.5)
        ax1.axhline(y=0.8, color='gray', linestyle='--', alpha=0.5)
        ax1.grid(True, alpha=0.3, axis='y')
    
    # 2. 週期分布
    ax2 = axes[0, 1]
    if 'bls_period' in success_df.columns:
        periods = success_df['bls_period'].dropna()
        if len(periods) > 0:
            ax2.scatter(periods.values, success_df.loc[periods.index, 'probability'].values,
                       s=100, alpha=0.7, c=success_df.loc[periods.index, 'probability'].values,
                       cmap='RdYlGn', vmin=0, vmax=1)
            ax2.set_xlabel('BLS 週期 (天)')
            ax2.set_ylabel('預測機率')
            ax2.set_title('週期 vs 機率')
            ax2.grid(True, alpha=0.3)
            # 添加顏色條
            cbar = plt.colorbar(ax2.collections[0], ax=ax2)
            cbar.set_label('機率')
    
    # 3. SNR 分布
    ax3 = axes[1, 0]
    if 'bls_snr' in success_df.columns:
        snrs = success_df['bls_snr'].dropna()
        if len(snrs) > 0:
            ax3.scatter(snrs.values, success_df.loc[snrs.index, 'probability'].values,
                       s=100, alpha=0.7, c=success_df.loc[snrs.index, 'probability'].values,
                       cmap='RdYlGn', vmin=0, vmax=1)
            ax3.set_xlabel('BLS SNR')
            ax3.set_ylabel('預測機率')
            ax3.set_title('SNR vs 機率')
            ax3.grid(True, alpha=0.3)
    
    # 4. 深度分布
    ax4 = axes[1, 1]
    if 'bls_depth' in success_df.columns:
        depths = success_df['bls_depth'].dropna() * 1e6  # 轉換為 ppm
        if len(depths) > 0:
            ax4.scatter(depths.values, success_df.loc[depths.index, 'probability'].values,
                       s=100, alpha=0.7, c=success_df.loc[depths.index, 'probability'].values,
                       cmap='RdYlGn', vmin=0, vmax=1)
            ax4.set_xlabel('凌日深度 (ppm)')
            ax4.set_ylabel('預測機率')
            ax4.set_title('深度 vs 機率')
            ax4.grid(True, alpha=0.3)
    
    plt.suptitle('批次推論結果分析', fontsize=14, fontweight='bold')
    plt.tight_layout()
    plt.show()
    
    print("\n📈 批次視覺化完成")
else:
    print("⚠️ 無成功的推論結果可視覺化")

## 10. GPU 加速示範（如有 L4）

In [None]:
# GPU 加速示範（僅當偵測到 L4 GPU 時執行）
if gpu_info['is_l4'] and gpu_info['supports_bfloat16']:
    print("🚀 L4 GPU BFloat16 加速示範\n")
    print("="*60)
    
    try:
        import torch
        import torch.nn as nn
        
        # 創建示範神經網路
        class ExoplanetNet(nn.Module):
            def __init__(self, input_dim=14):
                super().__init__()
                self.fc1 = nn.Linear(input_dim, 64)
                self.fc2 = nn.Linear(64, 32)
                self.fc3 = nn.Linear(32, 1)
                self.relu = nn.ReLU()
                self.sigmoid = nn.Sigmoid()
            
            def forward(self, x):
                x = self.relu(self.fc1(x))
                x = self.relu(self.fc2(x))
                x = self.sigmoid(self.fc3(x))
                return x
        
        # 初始化模型
        device = torch.device('cuda')
        model = ExoplanetNet().to(device)
        model.eval()
        
        # 準備示範資料
        batch_size = 100
        input_features = torch.randn(batch_size, 14).to(device)
        
        # 比較推論速度
        import time
        
        # 1. 標準 FP32 推論
        print("⏱️ FP32 推論:")
        start_time = time.time()
        with torch.no_grad():
            for _ in range(1000):
                output_fp32 = model(input_features)
        torch.cuda.synchronize()
        fp32_time = time.time() - start_time
        print(f"   耗時: {fp32_time:.3f} 秒")
        
        # 2. BFloat16 autocast 推論
        print("\n⚡ BFloat16 推論 (with autocast):")
        start_time = time.time()
        with torch.no_grad():
            with torch.autocast(device_type='cuda', dtype=torch.bfloat16):
                for _ in range(1000):
                    output_bf16 = model(input_features)
        torch.cuda.synchronize()
        bf16_time = time.time() - start_time
        print(f"   耗時: {bf16_time:.3f} 秒")
        
        # 計算加速比
        speedup = fp32_time / bf16_time
        print(f"\n🏆 BFloat16 加速比: {speedup:.2f}x")
        
        # 檢查數值誤差
        diff = torch.abs(output_fp32 - output_bf16.float()).mean().item()
        print(f"   平均絕對誤差: {diff:.6f}")
        
        print("\n💡 結論:")
        print("   • L4 GPU 的 BFloat16 可顯著加速推論")
        print("   • 數值精度損失極小，適合生產部署")
        print("   • 建議在大規模批次推論時使用")
        
    except ImportError:
        print("⚠️ 需要安裝 PyTorch 才能執行 GPU 加速示範")
        print("   執行: pip install torch")
else:
    print("ℹ️ GPU 加速示範")
    print("   • 未偵測到 L4 GPU 或不支援 BFloat16")
    print("   • 當前使用標準 CPU/GPU 推論")
    print("   • 若需要加速，建議使用 Google Colab L4 執行環境")

print("\n" + "="*60)

## 11. 總結與下一步

In [None]:
print("="*70)
print("📊 推論管線執行總結")
print("="*70)

print(f"""
🎯 執行統計:
   • 處理目標數: {len(results_df) if 'results_df' in locals() else 0}
   • 成功推論: {results_df['success'].sum() if 'results_df' in locals() else 0}
   • 高信心候選: {len(results_df[(results_df['success']) & (results_df['probability'] > 0.8)]) if 'results_df' in locals() else 0}

🖥️ 運算環境:
   • GPU: {'可用 - ' + gpu_info['device_name'] if gpu_info['available'] else '不可用'}
   • L4 優化: {'支援' if gpu_info['is_l4'] else '不支援'}
   • BFloat16: {'支援' if gpu_info['supports_bfloat16'] else '不支援'}

📦 輸出檔案:
   • 批次結果: results/batch_inference.csv
   • 元資料: results/batch_inference_metadata.json

🚀 下一步建議:
   1. 對高信心候選進行人工審查
   2. 查詢 NASA Exoplanet Archive 確認已知行星
   3. 使用更多 TESS 扇區資料進行驗證
   4. 生成候選判讀卡（執行 app/report.py）
   5. 部署為 Web 應用（執行 web/app.py）

📚 相關資源:
   • NASA Exoplanet Archive: https://exoplanetarchive.ipac.caltech.edu/
   • TESS 資料入口: https://mast.stsci.edu/portal/Mashup/Clients/Mast/Portal.html
   • Lightkurve 文件: https://docs.lightkurve.org/
""")

print("="*70)
print("✅ 推論管線完成！")
print("="*70)