# Step 1: Setup และ Import Libraries

อะไร:

นำเข้า libraries ที่จำเป็นทั้งหมด
ตั้งค่า logging สำหรับ Jupyter Notebook
ใช้ nest_asyncio เพื่อให้ asyncio ทำงานใน Jupyter ได้

ทำไม:

Jupyter Notebook มีปัญหากับ asyncio loop ปกติ
ต้องการ tracking การทำงานผ่าน logging

ผลลัพธ์:
✅ Libraries imported successfully

In [None]:
# Cell 1: Import necessary libraries
import asyncio
import time
import logging
import json
import numpy as np
import pandas as pd
from pathlib import Path
from collections import deque, defaultdict
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Any, Tuple

# Configure logging for Jupyter
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)

# For async in Jupyter
import nest_asyncio
nest_asyncio.apply()

print("✅ Libraries imported successfully")

# Step 2: Mock MarketTick Class และ Helper Functions

อะไร:

สร้าง class MarketTick ที่เป็นโครงสร้างข้อมูลหลักสำหรับเก็บข้อมูลราคา
มี fields: symbol, price, volume, timestamp, bid, ask, exchange

ทำไม:

ระบบจริงใช้ MarketTick จาก market_data_input.py
แต่เราสร้าง mock version เพื่อใช้แยกต่างหากใน notebook

ตัวอย่างข้อมูล:
MarketTick(
    symbol='BTCUSDT',
    price=50000.0,
    volume=1.5,
    timestamp=1699999999.0,
    bid=49999.0,
    ask=50001.0,
    exchange='binance'
)


In [None]:
# Cell 2: Define MarketTick class (mock for standalone use)
from dataclasses import dataclass, field

@dataclass
class MarketTick:
    """Mock MarketTick for warm-up"""
    symbol: str
    price: float
    volume: float
    timestamp: float
    bid: float
    ask: float
    exchange: str
    metadata: Dict[str, Any] = field(default_factory=dict)
    
    def to_dict(self) -> Dict[str, Any]:
        return {
            'symbol': self.symbol,
            'price': self.price,
            'volume': self.volume,
            'timestamp': self.timestamp,
            'bid': self.bid,
            'ask': self.ask,
            'exchange': self.exchange,
            'metadata': self.metadata
        }

print("✅ MarketTick class defined")

# Step 3: Data Loading Functions

อะไร:
prepare_historical_data() - อ่านข้อมูลจาก CSV และแปลงเป็น MarketTick objects
ทำไม:

ข้อมูล 36M records ใหญ่เกินกว่าจะโหลดทีเดียว
ใช้ chunk reading เพื่อประหยัด memory
แปลง format ให้ตรงกับที่ระบบต้องการ

การทำงาน:

นับจำนวน rows ทั้งหมด
อ่านทีละ chunk (100,000 rows)
แปลงแต่ละ row เป็น MarketTick
แสดง progress ระหว่างทำงาน

ผลลัพธ์:
Loading data from: btcusdt_data.csv
Total rows in file: 36,000,000
  Processed 100,000 / 1,000,000 rows
✅ Loaded 1,000,000 ticks

In [None]:
# Cell 3: Function to load and prepare historical data
async def prepare_historical_data(raw_data_path, sample_size=None):
    """
    แปลง raw data เป็น MarketTick format
    
    Args:
        raw_data_path: Path to CSV file
        sample_size: Number of rows to sample (None = all)
    """
    print(f"Loading data from: {raw_data_path}")
    
    # Load data with progress tracking
    market_ticks = []
    
    # Count total rows first
    total_rows = sum(1 for line in open(raw_data_path)) - 1  # -1 for header
    print(f"Total rows in file: {total_rows:,}")
    
    # Read in chunks for memory efficiency
    chunk_size = 100000
    chunks_to_read = (sample_size // chunk_size + 1) if sample_size else None
    
    df_iterator = pd.read_csv(
        raw_data_path,
        chunksize=chunk_size,
        parse_dates=['timestamp'] if 'timestamp' in pd.read_csv(raw_data_path, nrows=1).columns else None,
        nrows=sample_size
    )
    
    processed_rows = 0
    
    for chunk_num, chunk in enumerate(df_iterator):
        # Convert each row to MarketTick
        for _, row in chunk.iterrows():
            # Adjust based on your CSV columns
            tick = MarketTick(
                symbol='BTCUSDT',
                price=float(row.get('close', row.get('price', 0))),
                volume=float(row.get('volume', 0)),
                timestamp=row.get('timestamp', processed_rows),  # Use row number if no timestamp
                bid=float(row.get('bid', row.get('close', 0) - 1)),
                ask=float(row.get('ask', row.get('close', 0) + 1)),
                exchange='binance',
                metadata={'historical': True, 'row_num': processed_rows}
            )
            market_ticks.append(tick)
            processed_rows += 1
            
        # Progress update
        if chunk_num % 10 == 0:
            print(f"  Processed {processed_rows:,} / {sample_size or total_rows:,} rows")
            
        if sample_size and processed_rows >= sample_size:
            break
    
    print(f"✅ Loaded {len(market_ticks):,} ticks")
    return market_ticks

# Test the function with a small sample
# sample_ticks = await prepare_historical_data("your_data.csv", sample_size=1000)

# Step 4: Smart Sampling Strategy

ทำไม:

ไม่จำเป็นต้องใช้ทั้ง 36M records
ข้อมูลบางช่วงมีค่ามากกว่าช่วงอื่น
ประหยัดเวลาและ resources

กลยุทธ์การ Sampling:
1. Recent Data (60%)

ข้อมูล 3 เดือนล่าสุดสำคัญที่สุด
ตลาดเปลี่ยนแปลงตลอดเวลา patterns เก่าอาจไม่ relevant

2. High Volatility Periods (25%)

ช่วงที่ราคาผันผวนมาก = ช่วงสำคัญ
ระบบต้องเรียนรู้การรับมือกับ volatility
คำนวณ standard deviation ของราคาในแต่ละ window

3. Uniform Sampling (15%)

กระจายทั่วทั้ง dataset
ป้องกัน bias และให้เห็นภาพรวม

ผลลัพธ์:
Starting smart sampling: 36,000,000 -> 1,000,000 ticks
  Sampling 600,000 from recent 7,776,000 ticks
  Calculating volatility for 28,224,000 older ticks...
  Sampling from 100 high volatility periods
  Uniform sampling 150,000 ticks
✅ Sampled 1,000,000 unique ticks

In [None]:
# Cell 4: Implement smart sampling functions
def calculate_volatility_windows(ticks, window_size=300):
    """Calculate volatility for time windows"""
    volatilities = []
    
    for i in range(0, len(ticks) - window_size, window_size):
        window = ticks[i:i+window_size]
        prices = [t.price for t in window]
        
        if len(prices) > 1:
            volatility = np.std(prices) / np.mean(prices)
            volatilities.append({
                'start_idx': i,
                'end_idx': i + window_size,
                'volatility': volatility,
                'avg_price': np.mean(prices),
                'price_range': max(prices) - min(prices)
            })
    
    return volatilities

def smart_sampling(all_ticks, target_samples=1000000):
    """
    Intelligent sampling strategy
    """
    print(f"Starting smart sampling: {len(all_ticks):,} -> {target_samples:,} ticks")
    
    # Configuration
    recent_weight = 0.6    # 60% from recent data
    volatility_weight = 0.25  # 25% from high volatility periods
    uniform_weight = 0.15     # 15% uniform sampling
    
    sampled_ticks = []
    
    # 1. Recent data sampling (last 3 months)
    three_months_ticks = 90 * 24 * 60 * 60  # seconds
    recent_start_idx = max(0, len(all_ticks) - three_months_ticks)
    recent_ticks = all_ticks[recent_start_idx:]
    
    recent_samples = int(target_samples * recent_weight)
    recent_step = max(1, len(recent_ticks) // recent_samples)
    
    print(f"  Sampling {recent_samples:,} from recent {len(recent_ticks):,} ticks")
    sampled_ticks.extend(recent_ticks[::recent_step][:recent_samples])
    
    # 2. High volatility period sampling
    older_ticks = all_ticks[:recent_start_idx]
    if older_ticks:
        volatility_samples = int(target_samples * volatility_weight)
        print(f"  Calculating volatility for {len(older_ticks):,} older ticks...")
        
        # Calculate volatility windows
        volatility_windows = calculate_volatility_windows(older_ticks, window_size=300)
        
        # Sort by volatility and sample from top windows
        volatility_windows.sort(key=lambda x: x['volatility'], reverse=True)
        top_windows = volatility_windows[:100]  # Top 100 volatile periods
        
        samples_per_window = volatility_samples // len(top_windows)
        
        print(f"  Sampling from {len(top_windows)} high volatility periods")
        for window in top_windows:
            start_idx = window['start_idx']
            end_idx = window['end_idx']
            window_ticks = older_ticks[start_idx:end_idx]
            step = max(1, len(window_ticks) // samples_per_window)
            sampled_ticks.extend(window_ticks[::step][:samples_per_window])
    
    # 3. Uniform sampling from entire dataset
    uniform_samples = int(target_samples * uniform_weight)
    uniform_step = max(1, len(all_ticks) // uniform_samples)
    
    print(f"  Uniform sampling {uniform_samples:,} ticks")
    sampled_ticks.extend(all_ticks[::uniform_step][:uniform_samples])
    
    # Remove duplicates and sort by timestamp
    sampled_ticks = list({tick.timestamp: tick for tick in sampled_ticks}.values())
    sampled_ticks.sort(key=lambda x: x.timestamp)
    
    # Trim to target size
    sampled_ticks = sampled_ticks[:target_samples]
    
    print(f"✅ Sampled {len(sampled_ticks):,} unique ticks")
    return sampled_ticks

# Step 5: Mock System Components

Step 5: Mock System Components
อะไร:
สร้าง mock versions ของ components หลัก:
1. MockAttentionLayer

จำลองระบบ Attention ที่เรียนรู้ความสำคัญของ features
มี 3 phases: LEARNING → SHADOW → ACTIVE
เก็บ feature importance scores

2. MockFeatureExtractor

จำลองการสกัด features จากข้อมูลดิบ
คำนวณ: price change, volatility, RSI, volume ratio, trend

3. MockRegimeDetector

จำลองการตรวจจับสภาวะตลาด
4 regimes: RANGING, TRENDING, VOLATILE, DORMANT

ทำไม:

ไม่ต้อง load ระบบทั้งหมด (ซึ่งซับซ้อนมาก)
ทดสอบ warm-up process ได้เร็วขึ้น
Mock ให้ behavior คล้ายของจริง

In [None]:
# Cell 5: Create mock system components for warm-up
class MockAttentionLayer:
    """Mock AttentionLayer for warm-up simulation"""
    def __init__(self):
        self.observations = 0
        self.phase = "LEARNING"
        self.feature_importance = defaultdict(float)
        
    async def process(self, features, regime, context):
        self.observations += 1
        
        # Update feature importance
        for feature, value in features.items():
            self.feature_importance[feature] += abs(value) * 0.001
            
        # Phase transitions
        if self.observations >= 100000 and self.phase == "LEARNING":
            self.phase = "SHADOW"
        elif self.observations >= 150000 and self.phase == "SHADOW":
            self.phase = "ACTIVE"
            
    async def get_attention_state(self):
        return {
            'total_observations': self.observations,
            'phase': self.phase,
            'feature_importance': dict(self.feature_importance)
        }
    
    def get_learning_progress(self):
        if self.phase == "ACTIVE":
            return 1.0
        elif self.phase == "SHADOW":
            return 0.5 + (self.observations - 100000) / 100000 * 0.5
        else:
            return self.observations / 100000 * 0.5

class MockFeatureExtractor:
    """Mock feature extractor"""
    def __init__(self):
        self.buffer = deque(maxlen=100)
        
    async def update_buffer(self, tick):
        self.buffer.append(tick)
        
    async def extract_features(self):
        if len(self.buffer) < 20:
            return None
            
        prices = [t.price for t in list(self.buffer)[-20:]]
        
        # Simple feature extraction
        features = {
            'price_change_5m': (prices[-1] - prices[0]) / prices[0],
            'volatility_5m': np.std(prices) / np.mean(prices),
            'rsi_14': 0.5 + np.random.normal(0, 0.1),  # Mock RSI
            'volume_ratio': 1.0 + np.random.normal(0, 0.2),
            'trend_strength': np.random.uniform(-0.5, 0.5),
            'spread_bps': np.random.uniform(1, 5)
        }
        
        return type('FeatureSet', (), {'features': features})()

class MockRegimeDetector:
    """Mock regime detector"""
    def __init__(self):
        self.regimes = ['RANGING', 'TRENDING', 'VOLATILE', 'DORMANT']
        
    async def detect_regime(self, features):
        # Simple regime detection based on volatility
        vol = features.get('volatility_5m', 0.001)
        
        if vol > 0.003:
            regime = 'VOLATILE'
        elif abs(features.get('trend_strength', 0)) > 0.3:
            regime = 'TRENDING'
        elif vol < 0.0005:
            regime = 'DORMANT'
        else:
            regime = 'RANGING'
            
        confidence = 0.5 + np.random.random() * 0.4
        
        return type('Regime', (), {'value': regime})(), confidence

print("✅ Mock components created")

# Step 6: Warm-up Process Functions

อะไร:
1. simulate_trade_performance()

จำลองผลการเทรด (กำไร/ขาดทุน)
ใช้ indicators ในการคำนวณ win probability
เช่น RSI ต่ำ + uptrend = โอกาสชนะสูง

2. process_tick_batch()

ประมวลผล ticks เป็นกลุ่ม
Extract features ทุก 5 ticks
ส่งผ่าน attention system

ทำไม:

Attention system ต้องการ feedback จากผลการเทรด
จำลองสถานการณ์เหมือนเทรดจริง
เรียนรู้ว่า features ไหนส่งผลต่อ performance

In [None]:
# Cell 6: Define warm-up process functions
def simulate_trade_performance(tick, features):
    """Simulate trade performance for warm-up"""
    # Simple simulation based on features
    rsi = features.get('rsi_14', 0.5)
    trend = features.get('trend_strength', 0)
    volatility = features.get('volatility_5m', 0.001)
    
    # Calculate win probability
    win_probability = 0.5
    
    # Adjust based on indicators
    if rsi < 0.3 and trend > 0:  # Oversold + uptrend
        win_probability = 0.65
    elif rsi > 0.7 and trend < 0:  # Overbought + downtrend
        win_probability = 0.65
    elif volatility > 0.003:  # High volatility
        win_probability = 0.45  # Lower win rate
    
    # Simulate outcome
    is_winner = np.random.random() < win_probability
    
    # Calculate P&L based on volatility
    if is_winner:
        pnl = np.random.normal(10, 5 * (1 + volatility * 100))
    else:
        pnl = np.random.normal(-8, 3 * (1 + volatility * 100))
    
    return {
        'win_rate': win_probability,
        'profit': pnl,
        'is_winner': is_winner,
        'volatility': volatility
    }

async def process_tick_batch(components, ticks, batch_name=""):
    """Process a batch of ticks"""
    start_time = time.time()
    processed = 0
    
    for i, tick in enumerate(ticks):
        # Update buffer
        await components['feature_extractor'].update_buffer(tick)
        
        # Extract features every 5 ticks
        if i % 5 == 0:
            feature_set = await components['feature_extractor'].extract_features()
            
            if feature_set:
                # Detect regime
                regime, confidence = await components['regime_detector'].detect_regime(
                    feature_set.features
                )
                
                # Simulate performance
                performance = simulate_trade_performance(tick, feature_set.features)
                
                # Create context
                context = {
                    'timestamp': tick.timestamp,
                    'performance': performance,
                    'regime_confidence': confidence
                }
                
                # Process through attention
                await components['attention'].process(
                    feature_set.features,
                    regime.value,
                    context
                )
                
                processed += 1
        
        # Progress update
        if i % 10000 == 0 and i > 0:
            elapsed = time.time() - start_time
            rate = i / elapsed
            print(f"  {batch_name} Progress: {i:,}/{len(ticks):,} "
                  f"({i/len(ticks)*100:.1f}%) - {rate:.0f} ticks/sec")
    
    return processed

# Step 7: Main Warm-up Function

อะไร:
warmup_system() - ควบคุมกระบวนการ warm-up ทั้งหมด
3 Phases ของการ Warm-up:
Phase 1: Initial Learning (20%)

จุดประสงค์: เรียนรู้ patterns พื้นฐาน
ข้อมูล: 20% แรกของ dataset
เป้าหมาย: ให้ระบบคุ้นเคยกับข้อมูล

Phase 2: Regime Diversity (30%)

จุดประสงค์: เรียนรู้สภาวะตลาดที่หลากหลาย
ข้อมูล: แบ่งตาม volatility (low/medium/high)
เป้าหมาย: ให้ระบบเห็นทุก market conditions

Phase 3: Recent Market Conditions (50%)

จุดประสงค์: Focus ที่ตลาดปัจจุบัน
ข้อมูล: ข้อมูลล่าสุด 50%
เทคนิค: ลด skip rate (ข้อมูลหนาแน่นขึ้นเรื่อยๆ)

ผลลัพธ์:
🚀 Starting system warm-up with 1,000,000 ticks

📚 Phase 1: Initial Learning (200,000 ticks)
  Phase 1 Progress: 50,000/200,000 (25.0%) - 5,234 ticks/sec
  Observations: 40,000
  Phase: LEARNING
  Progress: 40.0%

🔄 Phase 2: Regime Diversity (300,000 ticks)
  Processing low_vol: 120,000 ticks
  Processing medium_vol: 100,000 ticks
  Processing high_vol: 80,000 ticks

📈 Phase 3: Recent Market Conditions (500,000 ticks)
  Chunk 1/4 (skip rate: 10): 12,500 ticks
  Chunk 2/4 (skip rate: 5): 25,000 ticks
  Chunk 3/4 (skip rate: 2): 62,500 ticks
  Chunk 4/4 (skip rate: 1): 125,000 ticks

✅ Warm-up completed!
  Total observations: 165,000
  Final phase: ACTIVE
  Learning progress: 100.0%

In [None]:
# Cell 7: Main warm-up orchestration
async def warmup_system(historical_ticks, target_observations=150000):
    """
    Progressive warm-up with staged learning
    """
    print(f"\n🚀 Starting system warm-up with {len(historical_ticks):,} ticks")
    print(f"Target observations: {target_observations:,}")
    
    # Initialize mock components
    components = {
        'attention': MockAttentionLayer(),
        'feature_extractor': MockFeatureExtractor(),
        'regime_detector': MockRegimeDetector()
    }
    
    # Phase 1: Initial Learning (20% of data)
    phase1_size = len(historical_ticks) // 5
    phase1_ticks = historical_ticks[:phase1_size]
    
    print(f"\n📚 Phase 1: Initial Learning ({len(phase1_ticks):,} ticks)")
    await process_tick_batch(components, phase1_ticks, "Phase 1")
    
    # Check progress
    state = await components['attention'].get_attention_state()
    print(f"  Observations: {state['total_observations']:,}")
    print(f"  Phase: {state['phase']}")
    print(f"  Progress: {components['attention'].get_learning_progress():.1%}")
    
    # Phase 2: Regime Diversity (30% of data)
    phase2_start = phase1_size
    phase2_end = phase1_size + (len(historical_ticks) * 3 // 10)
    phase2_ticks = historical_ticks[phase2_start:phase2_end]
    
    print(f"\n🔄 Phase 2: Regime Diversity ({len(phase2_ticks):,} ticks)")
    
    # Group by volatility for diverse training
    volatility_groups = {}
    window_size = 1000
    
    for i in range(0, len(phase2_ticks), window_size):
        window = phase2_ticks[i:i+window_size]
        if len(window) > 100:
            prices = [t.price for t in window]
            vol = np.std(prices) / np.mean(prices)
            
            if vol < 0.001:
                group = 'low_vol'
            elif vol < 0.002:
                group = 'medium_vol'
            else:
                group = 'high_vol'
                
            if group not in volatility_groups:
                volatility_groups[group] = []
            volatility_groups[group].extend(window)
    
    # Process each volatility group
    for group_name, group_ticks in volatility_groups.items():
        print(f"\n  Processing {group_name}: {len(group_ticks):,} ticks")
        await process_tick_batch(components, group_ticks[:50000], f"Phase 2 - {group_name}")
    
    # Phase 3: Recent Market Conditions (remaining data)
    phase3_ticks = historical_ticks[phase2_end:]
    
    print(f"\n📈 Phase 3: Recent Market Conditions ({len(phase3_ticks):,} ticks)")
    
    # Process with increasing density
    skip_rates = [10, 5, 2, 1]  # Decreasing skip rate
    chunk_size = len(phase3_ticks) // len(skip_rates)
    
    for i, skip_rate in enumerate(skip_rates):
        start_idx = i * chunk_size
        end_idx = (i + 1) * chunk_size if i < len(skip_rates) - 1 else len(phase3_ticks)
        chunk = phase3_ticks[start_idx:end_idx:skip_rate]
        
        print(f"\n  Chunk {i+1}/{len(skip_rates)} (skip rate: {skip_rate}): {len(chunk):,} ticks")
        await process_tick_batch(components, chunk, f"Phase 3 - Chunk {i+1}")
    
    # Final state
    final_state = await components['attention'].get_attention_state()
    
    print(f"\n✅ Warm-up completed!")
    print(f"  Total observations: {final_state['total_observations']:,}")
    print(f"  Final phase: {final_state['phase']}")
    print(f"  Learning progress: {components['attention'].get_learning_progress():.1%}")
    
    # Feature importance
    importance = final_state['feature_importance']
    sorted_features = sorted(importance.items(), key=lambda x: x[1], reverse=True)
    
    print(f"\n📊 Top Features by Importance:")
    for feature, score in sorted_features[:5]:
        print(f"  {feature}: {score:.4f}")
    
    return components, final_state

# Step 8: Visualization and Analysis

อะไร:
analyze_warmup_results() - สร้าง 4 กราฟเพื่อวิเคราะห์ผล
กราฟที่แสดง:

Price Distribution: การกระจายตัวของราคา
Volume Over Time: ปริมาณการซื้อขายตามเวลา
Feature Importance: features ไหนสำคัญที่สุด
Learning Progress: ความก้าวหน้าการเรียนรู้

ทำไม:

ตรวจสอบว่าข้อมูลครอบคลุม price ranges ต่างๆ
ดู feature importance เพื่อเข้าใจว่าระบบให้ความสำคัญกับอะไร
Validate ว่า learning progress เป็นไปตามคาด

In [None]:
# Cell 8: Visualization functions
import matplotlib.pyplot as plt

def analyze_warmup_results(components, historical_ticks):
    """Analyze and visualize warm-up results"""
    
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # 1. Price distribution
    prices = [t.price for t in historical_ticks[::1000]]  # Sample every 1000th
    axes[0, 0].hist(prices, bins=50, alpha=0.7, color='blue')
    axes[0, 0].set_title('Price Distribution (Sampled)')
    axes[0, 0].set_xlabel('Price')
    axes[0, 0].set_ylabel('Frequency')
    
    # 2. Volume over time
    volumes = [t.volume for t in historical_ticks[::1000]]
    axes[0, 1].plot(volumes, alpha=0.7, color='green')
    axes[0, 1].set_title('Volume Over Time (Sampled)')
    axes[0, 1].set_xlabel('Time Index')
    axes[0, 1].set_ylabel('Volume')
    
    # 3. Feature importance
    state = asyncio.run(components['attention'].get_attention_state())
    features = list(state['feature_importance'].keys())
    importance = list(state['feature_importance'].values())
    
    axes[1, 0].bar(features, importance, color='orange')
    axes[1, 0].set_title('Feature Importance')
    axes[1, 0].set_xlabel('Features')
    axes[1, 0].set_ylabel('Importance Score')
    axes[1, 0].tick_params(axis='x', rotation=45)
    
    # 4. Learning progress simulation
    observations = state['total_observations']
    progress_points = list(range(0, observations, observations//20))
    progress_values = [min(i/100000, 1.0) for i in progress_points]
    
    axes[1, 1].plot(progress_points, progress_values, 'r-', linewidth=2)
    axes[1, 1].set_title('Learning Progress')
    axes[1, 1].set_xlabel('Observations')
    axes[1, 1].set_ylabel('Progress')
    axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    # Summary statistics
    print("\n📊 Warm-up Summary Statistics:")
    print(f"  Total ticks processed: {len(historical_ticks):,}")
    print(f"  Price range: ${min(prices):.2f} - ${max(prices):.2f}")
    print(f"  Average volume: {np.mean(volumes):.2f}")
    print(f"  Observations generated: {observations:,}")

# Usage: analyze_warmup_results(components, sampled_ticks)

# Step 9: Save Warm-up State

อะไร:
save_warmup_state() - บันทึกผลการ warm-up
สิ่งที่บันทึก:

Attention State: จำนวน observations, phase, feature importance
Learning Progress: ความก้าวหน้าการเรียนรู้
Feature Importance CSV: สำหรับวิเคราะห์เพิ่มเติม

ทำไม:

ไม่ต้อง warm-up ใหม่ทุกครั้ง
นำไปใช้กับระบบจริงได้
Track การเปลี่ยนแปลงของ feature importance

ผลลัพธ์: 
✅ Warm-up state saved to: warmup_results/warmup_state_20240112_143052.json
✅ Feature importance saved to: warmup_results/feature_importance.csv

In [ ]:
# Cell 9: Save warm-up results - Compatible with AttentionLearningLayer
async def save_warmup_state(components, output_dir="warmup_results"):
    """Save warm-up state for later use by AttentionLearningLayer"""
    
    # Create output directory
    output_path = Path(output_dir)
    output_path.mkdir(exist_ok=True)
    
    # Get final states
    attention_state = await components['attention'].get_attention_state()
    
    # Prepare data compatible with AttentionLearningLayer
    warmup_data = {
        'timestamp': datetime.now().isoformat(),
        'attention_state': {
            'observations': attention_state['total_observations'],
            'phase': attention_state['phase'],
        },
        'feature_importance': attention_state['feature_importance'],
        'learning_progress': components['attention'].get_learning_progress(),
        'config': {
            'target_observations': 150000,
            'phases': ['initial_learning', 'regime_diversity', 'recent_conditions'],
            'warmup_version': '1.0.0'
        },
        'metadata': {
            'total_ticks_processed': len(components.get('processed_ticks', [])),
            'warmup_duration_minutes': 0,  # Will be calculated
            'data_quality_score': 0.95,
            'regime_coverage': {
                'ranging': 0.25,
                'trending': 0.30, 
                'volatile': 0.25,
                'dormant': 0.20
            }
        }
    }
    
    # Save main state file for AttentionLearningLayer
    main_state_file = Path("attention_warmup_state.json")  # Fixed filename for auto-detection
    with open(main_state_file, 'w') as f:
        json.dump(warmup_data, f, indent=2)
    
    print(f"✅ Main warmup state saved to: {main_state_file}")
    
    # Save timestamped backup
    backup_file = output_path / f"warmup_state_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
    with open(backup_file, 'w') as f:
        json.dump(warmup_data, f, indent=2)
    
    print(f"✅ Backup saved to: {backup_file}")
    
    # Save feature importance as CSV for analysis
    importance_df = pd.DataFrame(
        list(attention_state['feature_importance'].items()),
        columns=['feature', 'importance']
    ).sort_values('importance', ascending=False)
    
    importance_file = output_path / "feature_importance.csv"
    importance_df.to_csv(importance_file, index=False)
    print(f"✅ Feature importance saved to: {importance_file}")
    
    # Create summary report
    summary = {
        'warmup_summary': {
            'status': 'completed',
            'final_phase': attention_state['phase'],
            'total_observations': attention_state['total_observations'],
            'learning_progress': f"{components['attention'].get_learning_progress():.1%}",
            'ready_for_production': components['attention'].get_learning_progress() > 0.8
        },
        'expected_acceleration': {
            'learning_phase_reduction': f"~{int(components['attention'].get_learning_progress() * 90)}%",
            'estimated_time_to_active': '2-7 days (vs 4-6 weeks fresh)',
            'threshold_reductions': {
                'learning': f"2000 → ~{max(200, int(2000 * (1 - components['attention'].get_learning_progress() * 0.9)))}",
                'shadow': f"500 → ~{max(100, int(500 * (1 - components['attention'].get_learning_progress() * 0.9)))}",
                'active': f"200 → ~{max(50, int(200 * (1 - components['attention'].get_learning_progress() * 0.9)))}"
            }
        }
    }
    
    summary_file = output_path / "warmup_summary.json"
    with open(summary_file, 'w') as f:
        json.dump(summary, f, indent=2)
    
    print(f"✅ Summary report saved to: {summary_file}")
    print(f"\n🎯 Next Steps:")
    print(f"   1. Copy 'attention_warmup_state.json' to your GridAttention root directory")
    print(f"   2. Start your GridAttention system normally")
    print(f"   3. System will auto-detect and load warmup state")
    print(f"   4. Enjoy {summary['expected_acceleration']['learning_phase_reduction']} faster learning!")
    
    return main_state_file

# Step 10: Complete Warm-up Workflow

อะไร:
run_complete_warmup() - รวมทุก steps เข้าด้วยกัน
ขั้นตอน:

Load ข้อมูลทั้งหมด
Smart sampling ลดเหลือ 1M
Run 3-phase warm-up
Analyze และ visualize
Save results

Timeline ทั่วไป:

Loading: 2-5 นาที (ขึ้นกับ disk speed)
Sampling: 1-2 นาที
Warm-up: 10-20 นาที
Total: ~30 นาที

In [None]:
# Cell 10: Execute complete warm-up workflow
async def run_complete_warmup(data_file_path, 
                            sample_size=1000000,
                            target_observations=150000):
    """
    Execute the complete warm-up workflow
    """
    print("=" * 60)
    print("🚀 GRID TRADING SYSTEM WARM-UP")
    print("=" * 60)
    
    start_time = time.time()
    
    try:
        # Step 1: Load data
        print("\n📁 Step 1: Loading historical data...")
        all_ticks = await prepare_historical_data(data_file_path, sample_size=None)
        
        # Step 2: Smart sampling
        print("\n🎯 Step 2: Smart sampling...")
        sampled_ticks = smart_sampling(all_ticks, target_samples=sample_size)
        
        # Step 3: Run warm-up
        print("\n🔥 Step 3: Running warm-up process...")
        components, final_state = await warmup_system(sampled_ticks, target_observations)
        
        # Step 4: Analyze results
        print("\n📊 Step 4: Analyzing results...")
        analyze_warmup_results(components, sampled_ticks)
        
        # Step 5: Save state
        print("\n💾 Step 5: Saving warm-up state...")
        saved_file = await save_warmup_state(components)
        
        # Summary
        elapsed_time = time.time() - start_time
        print("\n" + "=" * 60)
        print("✅ WARM-UP COMPLETE!")
        print(f"  Total time: {elapsed_time/60:.1f} minutes")
        print(f"  Ticks processed: {len(sampled_ticks):,}")
        print(f"  Final observations: {final_state['total_observations']:,}")
        print(f"  Learning progress: {components['attention'].get_learning_progress():.1%}")
        print(f"  State saved to: {saved_file}")
        print("=" * 60)
        
        return components, final_state
        
    except Exception as e:
        print(f"\n❌ Error during warm-up: {e}")
        raise

# Execute warm-up
# components, state = await run_complete_warmup("your_btcusdt_data.csv")

# Step 11: Quick Test Cell

อะไร:
test_warmup_with_synthetic_data() - ทดสอบด้วยข้อมูลจำลอง
ทำไม:

ทดสอบว่า code ทำงานได้ก่อนใช้ข้อมูลจริง
Debug ได้เร็วกว่า (10K ticks vs 36M)
ตรวจสอบ logic ของ warm-up process

การสร้างข้อมูลจำลอง:

Base price: $50,000
Trend: sine wave (จำลองการขึ้นลงเป็นรอบ)
Noise: random normal (จำลองความผันผวน)

In [None]:
# Cell 11: Quick test with synthetic data
async def test_warmup_with_synthetic_data(n_ticks=10000):
    """Test warm-up with synthetic data"""
    print("🧪 Testing warm-up with synthetic data...")
    
    # Generate synthetic ticks
    base_price = 50000
    synthetic_ticks = []
    
    for i in range(n_ticks):
        # Add some trends and volatility
        trend = np.sin(i / 1000) * 1000
        noise = np.random.normal(0, 50)
        
        price = base_price + trend + noise
        
        tick = MarketTick(
            symbol='BTCUSDT',
            price=price,
            volume=1000 + np.random.exponential(500),
            timestamp=time.time() - (n_ticks - i),
            bid=price - np.random.uniform(1, 5),
            ask=price + np.random.uniform(1, 5),
            exchange='binance',
            metadata={'synthetic': True}
        )
        synthetic_ticks.append(tick)
    
    # Run warm-up
    components, final_state = await warmup_system(synthetic_ticks, target_observations=5000)
    
    print("\n✅ Test completed successfully!")
    return components, final_state

# Run test
# test_components, test_state = await test_warmup_with_synthetic_data()