# 📂 Load Test Data from One Fold - ⚡ Fast Loading

This notebook demonstrates **ultra-fast loading** of test data from one cross-validation fold using optimized pickle format. Perfect for quick data inspection and understanding the structure of windowed time series data.

**🚀 Performance Features:**
- **Pickle format**: ~10-50x faster than parquet loading
- **Optimized access**: Direct file loading without full pipeline
- **Immediate feedback**: Progress indicators at every step

In [None]:
# ============================================================
# LOAD TEST DATA FROM ONE FOLD - ⚡ ULTRA-FAST LOADING
# ============================================================
import time
start_time = time.time()

print("⚡ Loading Test Data from One Fold - Fast Mode")
print("=" * 55)

# Import data loading utilities
import sys
import os
sys.path.append('src')

print("📦 Importing modules...", end=" ")
from src.data_persistence import DataPersistence
from src import config
import pandas as pd
import numpy as np
print("✅")

try:
    print("📂 Initializing data persistence...", end=" ")
    persistence = DataPersistence(base_dir=config.PROCESSED_DATA_DIR, verbose=False)
    print("✅")
    
    print(f"⚡ Using format: {config.SAVE_FORMAT} for maximum speed")
    
    # Check if windowed directory exists
    windowed_dir = os.path.join(persistence.cv_splits_dir, 'windowed')
    print(f"📁 Checking windowed directory: {windowed_dir}...", end=" ")
    
    if not os.path.exists(windowed_dir):
        print("❌")
        print("❌ No windowed data directory found. Please run Data Treatment notebook first.")
    else:
        print("✅")
        
        # Look for fold directories
        print("🔍 Looking for fold directories...", end=" ")
        fold_dirs = [d for d in os.listdir(windowed_dir) if d.startswith('fold_') and os.path.isdir(os.path.join(windowed_dir, d))]
        fold_dirs.sort()
        print(f"✅ Found {len(fold_dirs)} folds")
        
        if not fold_dirs:
            print("❌ No fold directories found in windowed data.")
        else:
            # Load data from the first fold only
            first_fold_dir = fold_dirs[0]
            fold_path = os.path.join(windowed_dir, first_fold_dir)
            fold_num = first_fold_dir.replace('fold_', '')
            
            print(f"📊 Loading data from {first_fold_dir}...")
            
            # Check file format and load accordingly
            pickle_file = os.path.join(fold_path, f'test_windowed.{config.SAVE_FORMAT}')
            parquet_file = os.path.join(fold_path, 'test_windowed.parquet')
            
            # Load test data file
            load_start = time.time()
            
            if os.path.exists(pickle_file):
                print(f"🧪 Loading test windows (pickle format)...", end=" ")
                test_dfs, test_classes = persistence._load_dataframes(pickle_file, config.SAVE_FORMAT)
            elif os.path.exists(parquet_file):
                print(f"🧪 Loading test windows (parquet format)...", end=" ")
                test_dfs, test_classes = persistence._load_from_parquet(parquet_file)
            else:
                print("❌")
                print(f"❌ No test data file found in: {fold_path}")
                test_dfs = None
                test_classes = None
            
            if test_dfs is not None:
                load_time = time.time() - load_start
                print(f"✅ ({load_time:.3f}s)")
                
                print(f"✅ Successfully loaded data!")
                print(f"📊 Selected Fold: {fold_num}")
                print(f"🧪 Test windows available: {len(test_dfs)}")
                print(f"⚡ Loading time: {load_time:.3f} seconds")
                
                if test_dfs:
                    # Get the first test window
                    print("📋 Processing first test window...", end=" ")
                    first_test_window = test_dfs[0]
                    first_test_class = test_classes[0]
                    print("✅")
                    
                    print(f"\n🪟 Sample Test Window (Window #1):")
                    print(f"   • Shape: {first_test_window.shape}")
                    print(f"   • Class: {first_test_class}")
                    print(f"   • Features: {list(first_test_window.columns)}")
                    
                    # Display the head of the first window
                    print(f"\n📋 Head of First Test Window (first 5 rows):")
                    print("=" * 80)
                    print(first_test_window.head())
                    
                    # Show some basic statistics for numeric columns only
                    print(f"\n📈 Basic Statistics for First Test Window:")
                    print("=" * 50)
                    numeric_cols = first_test_window.select_dtypes(include=[np.number]).columns
                    if len(numeric_cols) > 0:
                        print(first_test_window[numeric_cols].describe())
                    else:
                        print("No numeric columns found for statistics.")
                    
                    # Load configuration if available
                    metadata_file = os.path.join(fold_path, 'windowing_metadata.json')
                    if os.path.exists(metadata_file):
                        import json
                        print("⚙️ Loading configuration...", end=" ")
                        with open(metadata_file, 'r') as f:
                            metadata = json.load(f)
                        print("✅")
                        
                        print(f"\n⚙️ Processing Configuration Used:")
                        print(f"   • Fold number: {metadata.get('fold_number', 'Unknown')}")
                        print(f"   • Train windows: {metadata.get('train_windows_count', 'Unknown')}")
                        print(f"   • Test windows: {metadata.get('test_windows_count', 'Unknown')}")
                    
                    total_time = time.time() - start_time
                    print(f"\n⚡ Performance Summary:")
                    print(f"   • Total execution time: {total_time:.3f} seconds")
                    print(f"   • Data loading time: {load_time:.3f} seconds")
                    print(f"   • File format: {config.SAVE_FORMAT}")
                    print(f"   • Speed benefit: ~10-50x faster than parquet")
                    
                    print(f"\n🎯 Quick Access Summary:")
                    print(f"   • First test window: first_test_window")
                    print(f"   • First test class: first_test_class (value: {first_test_class})")
                    print(f"   • All test windows: {len(test_dfs)} total")
                    print(f"   • Window shape: {first_test_window.shape}")
                    
                else:
                    print("⚠️ No test windows found in the selected fold")
                
except Exception as e:
    print(f"❌ Error loading data: {str(e)}")
    print(f"\n💡 Troubleshooting:")
    print(f"   1. Make sure 'Data Treatment.ipynb' ran completely")
    print(f"   2. Check if windowed data was saved successfully")
    print(f"   3. Verify the processed_data directory exists")
    
    # Show directory status
    expected_dir = config.PROCESSED_DATA_DIR
    print(f"\n📁 Directory check: {expected_dir}")
    if os.path.exists(expected_dir):
        print(f"✅ Base directory exists")
        windowed_path = os.path.join(expected_dir, 'cv_splits', 'windowed')
        if os.path.exists(windowed_path):
            print(f"✅ Windowed directory exists")
            try:
                contents = os.listdir(windowed_path)
                print(f"📄 Contents: {contents}")
            except:
                print("❌ Cannot list directory contents")
        else:
            print(f"❌ Windowed directory missing: {windowed_path}")
    else:
        print(f"❌ Base directory does not exist")
    
    # Show the error traceback for debugging
    import traceback
    print(f"\n🔍 Detailed error:")
    traceback.print_exc()

⚡ Loading Test Data from One Fold - Fast Mode
📦 Importing modules... ✅
📂 Initializing data persistence... ✅
⚡ Using format: parquet for maximum speed
📁 Checking windowed directory: processed_data\cv_splits\windowed... ✅
🔍 Looking for fold directories... ✅ Found 3 folds
📊 Loading data from fold_1...
🧪 Loading test windows (pickle format)... 