# Optimized Data Loading Demo

This notebook demonstrates the optimized data loading capabilities using TensorFlow and PyTorch backends.

Features:
- Parallel file reading
- Prefetching for overlapping I/O and computation
- GPU-ready tensor outputs
- Automatic backend selection
- Performance comparison with original loader

In [None]:
import sys
from pathlib import Path
import time
import pandas as pd

project_root = Path.cwd().parent
sys.path.insert(0, str(project_root))

from src.data.storage.parquet_storage import ParquetStorage
from src.data.loaders.historical_loader import HistoricalDataLoader
from src.data.loaders.optimized_historical_loader import OptimizedHistoricalDataLoader

## Configuration

In [None]:
START_DATE = '20250101'
END_DATE = '20250131'

storage = ParquetStorage(base_dir='../data/inputs')

## 1. Original Loader (Sequential)

In [None]:
print("Testing Original HistoricalDataLoader (Sequential)...")

original_loader = HistoricalDataLoader(storage)

start_time = time.perf_counter()
df_original = original_loader.load_historical_player_logs(
    start_date=START_DATE,
    end_date=END_DATE
)
original_time = time.perf_counter() - start_time

print(f"\nOriginal Loader:")
print(f"  Rows loaded: {len(df_original):,}")
print(f"  Time: {original_time:.2f}s")
print(f"  Throughput: {len(df_original) / original_time:,.0f} rows/second")

## 2. TensorFlow Backend (Parallel + Prefetch)

In [None]:
print("Testing OptimizedHistoricalDataLoader with TensorFlow backend...")

try:
    tf_loader = OptimizedHistoricalDataLoader(
        storage,
        loader_type='tensorflow',
        num_workers=8,
        enable_prefetch=True,
        enable_cache=True
    )
    
    start_time = time.perf_counter()
    df_tf = tf_loader.load_historical_player_logs(
        start_date=START_DATE,
        end_date=END_DATE
    )
    tf_time = time.perf_counter() - start_time
    
    print(f"\nTensorFlow Loader:")
    print(f"  Rows loaded: {len(df_tf):,}")
    print(f"  Time: {tf_time:.2f}s")
    print(f"  Throughput: {len(df_tf) / tf_time:,.0f} rows/second")
    print(f"  Speedup: {original_time / tf_time:.2f}x")
    
except ImportError as e:
    print(f"TensorFlow not available: {e}")

## 3. PyTorch Backend (Multi-worker DataLoader)

In [None]:
print("Testing OptimizedHistoricalDataLoader with PyTorch backend...")

try:
    pytorch_loader = OptimizedHistoricalDataLoader(
        storage,
        loader_type='pytorch',
        num_workers=8,
        enable_prefetch=True,
        enable_cache=True
    )
    
    start_time = time.perf_counter()
    df_pytorch = pytorch_loader.load_historical_player_logs(
        start_date=START_DATE,
        end_date=END_DATE
    )
    pytorch_time = time.perf_counter() - start_time
    
    print(f"\nPyTorch Loader:")
    print(f"  Rows loaded: {len(df_pytorch):,}")
    print(f"  Time: {pytorch_time:.2f}s")
    print(f"  Throughput: {len(df_pytorch) / pytorch_time:,.0f} rows/second")
    print(f"  Speedup: {original_time / pytorch_time:.2f}x")
    
except ImportError as e:
    print(f"PyTorch not available: {e}")

## 4. Auto Backend (Automatic Selection)

In [None]:
print("Testing OptimizedHistoricalDataLoader with auto backend selection...")

auto_loader = OptimizedHistoricalDataLoader(
    storage,
    loader_type='auto',
    num_workers=8,
    enable_prefetch=True,
    enable_cache=True
)

print(f"\nSelected backend: {auto_loader.active_backend}")

start_time = time.perf_counter()
df_auto = auto_loader.load_historical_player_logs(
    start_date=START_DATE,
    end_date=END_DATE
)
auto_time = time.perf_counter() - start_time

print(f"\nAuto Loader:")
print(f"  Rows loaded: {len(df_auto):,}")
print(f"  Time: {auto_time:.2f}s")
print(f"  Throughput: {len(df_auto) / auto_time:,.0f} rows/second")
print(f"  Speedup: {original_time / auto_time:.2f}x")

## 5. Performance Summary

In [None]:
import matplotlib.pyplot as plt

results = pd.DataFrame({
    'Loader': ['Original', 'TensorFlow', 'PyTorch'],
    'Time (s)': [original_time, tf_time, pytorch_time],
    'Throughput (rows/s)': [
        len(df_original) / original_time,
        len(df_tf) / tf_time,
        len(df_pytorch) / pytorch_time
    ]
})

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

results.plot(x='Loader', y='Time (s)', kind='bar', ax=ax1, legend=False, color='steelblue')
ax1.set_title('Load Time Comparison', fontsize=14, fontweight='bold')
ax1.set_ylabel('Time (seconds)')
ax1.set_xlabel('')
ax1.grid(axis='y', alpha=0.3)

results.plot(x='Loader', y='Throughput (rows/s)', kind='bar', ax=ax2, legend=False, color='coral')
ax2.set_title('Throughput Comparison', fontsize=14, fontweight='bold')
ax2.set_ylabel('Rows per second')
ax2.set_xlabel('')
ax2.grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

print("\nPerformance Summary:")
print(results.to_string(index=False))

## 6. Using Optimized Loader in Walk-Forward Backtest

In [None]:
print("Example: Replace loader in WalkForwardBacktest")
print()
print("Original code:")
print("  storage = SQLiteStorage(db_path)")
print("  loader = HistoricalDataLoader(storage)")
print()
print("Optimized code:")
print("  storage = ParquetStorage(base_dir='data/inputs')")
print("  loader = OptimizedHistoricalDataLoader(")
print("      storage,")
print("      loader_type='auto',  # or 'tensorflow' / 'pytorch'")
print("      num_workers=8,")
print("      enable_prefetch=True,")
print("      enable_cache=True")
print("  )")
print()
print("The OptimizedHistoricalDataLoader is a drop-in replacement.")
print("All existing methods work identically but with improved performance.")

## 7. Advanced: TensorFlow Dataset for Training

In [None]:
try:
    from src.data.loaders.tensorflow_loader import TensorFlowDataLoader
    
    print("Creating optimized TensorFlow dataset for model training...")
    
    tf_data_loader = TensorFlowDataLoader(
        prefetch_buffer_size='AUTOTUNE',
        num_parallel_reads=8,
        cache=True
    )
    
    feature_cols = [col for col in df_tf.columns if col.startswith(('rolling_', 'ewma_'))]
    
    if feature_cols:
        dataset = tf_data_loader.create_cached_dataset(
            data=df_tf.head(10000),
            feature_columns=feature_cols[:50],
            target_column='fpts',
            batch_size=32,
            shuffle=True
        )
        
        print(f"\nDataset created:")
        print(f"  Features: {len(feature_cols[:50])}")
        print(f"  Batch size: 32")
        print(f"  Prefetching: AUTOTUNE")
        print(f"  Caching: Enabled")
        print(f"\nDataset is ready for model.fit() with optimized I/O")
    else:
        print("No feature columns found. Run feature engineering first.")
        
except ImportError:
    print("TensorFlow not available")

## 8. Advanced: PyTorch DataLoader for Training

In [None]:
try:
    from src.data.loaders.pytorch_loader import PyTorchDataLoader, ParquetDataset
    
    print("Creating optimized PyTorch DataLoader for model training...")
    
    pytorch_data_loader = PyTorchDataLoader(
        num_workers=4,
        pin_memory=True,
        persistent_workers=True,
        prefetch_factor=2
    )
    
    feature_cols = [col for col in df_pytorch.columns if col.startswith(('rolling_', 'ewma_'))]
    
    if feature_cols:
        dataset = pytorch_data_loader.create_dataset_from_dataframe(
            data=df_pytorch.head(10000),
            feature_columns=feature_cols[:50],
            target_column='fpts',
            cache_in_memory=True
        )
        
        dataloader = pytorch_data_loader.create_dataloader(
            dataset,
            batch_size=32,
            shuffle=True
        )
        
        print(f"\nDataLoader created:")
        print(f"  Features: {len(feature_cols[:50])}")
        print(f"  Batch size: 32")
        print(f"  Workers: 4")
        print(f"  Pin memory: True")
        print(f"\nDataLoader is ready for training loop with optimized multi-worker loading")
    else:
        print("No feature columns found. Run feature engineering first.")
        
except ImportError:
    print("PyTorch not available")

## Summary

Key takeaways:

1. **Drop-in replacement**: OptimizedHistoricalDataLoader works identically to HistoricalDataLoader
2. **Automatic backend selection**: Use `loader_type='auto'` for convenience
3. **Parallel file reading**: Multiple files loaded concurrently
4. **Prefetching**: I/O and computation overlap for better pipeline utilization
5. **GPU-ready**: Pin memory enabled for faster GPU transfer
6. **Advanced usage**: TensorFlow/PyTorch datasets for model training with optimized batching

Expected speedup: 2-5x depending on file count and system I/O capabilities.