# Dev Phase 1: Data Acquisition and Feature Engineering Tests

This notebook provides comprehensive unit and integration tests for Dev Phase 1 modules:
- `src.data.acquisition`: Databento data download and validation
- `src.data.features`: Technical indicator computation (24 features)

**Environment:** Google Colab with A100 GPU (80GB VRAM)

**References:**
- grok-scientific.md Section 3.1: Feature specification (V=24)
- claude-engineering.md Section 3.1: Implementation details

## 1. Environment Setup

In [1]:
# Cell 1: Mount and Warning Suppression
# =====================================
from google.colab import drive
drive.mount('/content/drive')

import warnings
import sys

# Monkey-patch showwarning to filter jupyter_client's utcnow() deprecation
# Applied early to catch all subsequent thread emissions
_original_showwarning = warnings.showwarning

def _filtered_showwarning(message, category, filename, lineno, file=None, line=None):
    """Suppress jupyter_client datetime.utcnow() deprecation at display level."""
    if category == DeprecationWarning and "datetime.utcnow()" in str(message):
        return
    _original_showwarning(message, category, filename, lineno, file, line)

warnings.showwarning = _filtered_showwarning
print("Warning filter installed.")

PROJECT_ROOT = '/content/drive/MyDrive/Colab Notebooks/Transformers/FP'

print(f"Project root: {PROJECT_ROOT}")

Mounted at /content/drive
Project root: /content/drive/MyDrive/Colab Notebooks/Transformers/FP


In [2]:
# Cell 1: Environment Setup
# =========================

# Global deprecation suppression for async/threaded operations
# Must be set BEFORE any imports to propagate to all threads
import os
os.environ['PYTHONWARNINGS'] = 'ignore::DeprecationWarning'

# Install dependencies
!pip install -q databento pandas numpy pyarrow tqdm scipy

DATA_ROOT = '/content/drive/MyDrive/Colab Notebooks/Transformers/FP/data'
os.makedirs(f'{DATA_ROOT}/raw', exist_ok=True)
os.makedirs(f'{DATA_ROOT}/processed', exist_ok=True)
os.makedirs(f'{DATA_ROOT}/metadata', exist_ok=True)

print("Environment ready!")

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/75.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.1/75.1 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/85.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.9/85.9 kB[0m [31m8.7 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.8 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m74.1 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.5 MB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m90.5 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[

In [3]:
# Install dependencies
!pip install -q pandas numpy pyarrow scipy

# Note: databento is only needed for actual data download
# Tests use synthetic data, so databento installation is optional
# !pip install -q databento

In [4]:
# Add source to path
import sys
from pathlib import Path

src_path = Path(PROJECT_ROOT) / 'src'
if str(src_path) not in sys.path:
    sys.path.insert(0, str(src_path.parent))

# Verify import
from src.data.features import FeatureEngineer, FEATURE_COLUMNS, FEATURE_GROUPS
print(f"Successfully imported FeatureEngineer")
print(f"Number of features: {len(FEATURE_COLUMNS)}")
print(f"Number of feature groups: {len(FEATURE_GROUPS)}")

Successfully imported FeatureEngineer
Number of features: 24
Number of feature groups: 6


In [5]:
# Cell 4: Standard Imports for Testing
# =====================================
import unittest
import logging
from datetime import datetime, timedelta
from typing import Dict, List, Tuple

import numpy as np
import pandas as pd

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

## 2. Synthetic Data Generation

Generate realistic NQ futures data for testing without requiring Databento API access.

In [6]:
def generate_synthetic_ohlcv(
    n_bars: int = 10000,
    start_price: float = 15000.0,
    volatility: float = 0.0002,
    start_date: str = "2023-01-03 18:00:00",
    include_gaps: bool = True,
    seed: int = 42
) -> pd.DataFrame:
    """
    Generate synthetic NQ futures OHLCV data for testing.

    Creates realistic price movements with:
    - Geometric Brownian Motion for price evolution
    - Realistic OHLC relationships
    - Volume with intraday patterns
    - Optional time gaps (market closures)

    Args:
        n_bars: Number of 1-minute bars to generate.
        start_price: Initial NQ price level.
        volatility: Per-bar volatility (standard deviation of returns).
        start_date: Starting timestamp.
        include_gaps: Whether to include realistic time gaps.
        seed: Random seed for reproducibility.

    Returns:
        DataFrame with columns: [timestamp, open, high, low, close, volume]
    """
    np.random.seed(seed)

    # Generate returns using GBM
    returns = np.random.normal(0, volatility, n_bars)

    # Generate close prices
    log_prices = np.log(start_price) + np.cumsum(returns)
    close = np.exp(log_prices)

    # Generate OHLC from close
    # High-low range varies with volatility
    hl_range = np.abs(np.random.normal(0, volatility * 2, n_bars)) * close

    # Close location within bar (random uniform)
    close_loc = np.random.uniform(0.2, 0.8, n_bars)

    low = close - close_loc * hl_range
    high = low + hl_range

    # Open is previous close with small gap
    open_prices = np.roll(close, 1)
    open_prices[0] = start_price

    # Adjust open to be within bar range
    open_prices = np.clip(open_prices, low, high)

    # Generate volume with intraday pattern
    base_volume = 1000
    volume_noise = np.random.exponential(base_volume, n_bars)

    # Generate timestamps
    start_dt = pd.Timestamp(start_date)
    timestamps = []
    current_ts = start_dt

    for i in range(n_bars):
        timestamps.append(current_ts)

        # Add time gap logic
        if include_gaps:
            # Skip weekends and add session gaps
            next_ts = current_ts + timedelta(minutes=1)

            # Friday 17:00 -> Sunday 18:00 (weekend gap)
            if current_ts.dayofweek == 4 and current_ts.hour >= 17:
                next_ts = current_ts + timedelta(days=2, hours=1)
            # Daily maintenance break: 17:00-18:00 ET
            elif current_ts.hour == 16 and current_ts.minute == 59:
                next_ts = current_ts + timedelta(hours=1, minutes=1)

            current_ts = next_ts
        else:
            current_ts += timedelta(minutes=1)

    df = pd.DataFrame({
        'timestamp': timestamps,
        'open': open_prices,
        'high': high,
        'low': low,
        'close': close,
        'volume': volume_noise.astype(np.int64)
    })

    return df

# Generate test data
print("Generating synthetic OHLCV data...")
synthetic_df = generate_synthetic_ohlcv(n_bars=10000)
print(f"Generated {len(synthetic_df):,} bars")
print(f"Date range: {synthetic_df['timestamp'].min()} to {synthetic_df['timestamp'].max()}")
print(f"Price range: {synthetic_df['close'].min():.2f} to {synthetic_df['close'].max():.2f}")
print(f"\nSample data:")
synthetic_df.head()

Generating synthetic OHLCV data...
Generated 10,000 bars
Date range: 2023-01-03 18:00:00 to 2023-01-13 00:38:00
Price range: 14803.83 to 15338.44

Sample data:


Unnamed: 0,timestamp,open,high,low,close,volume
0,2023-01-03 18:00:00,15000.539959,15004.611332,15000.539959,15001.490216,244
1,2023-01-03 18:01:00,15001.457668,15001.457668,14999.624539,15001.075388,2598
2,2023-01-03 18:02:00,15001.075388,15004.165882,15000.580874,15003.018719,723
3,2023-01-03 18:03:00,15007.111325,15007.774168,15007.111325,15007.589424,656
4,2023-01-03 18:04:00,15007.589424,15009.642047,15002.455678,15006.886625,5547


## 3. Unit Tests: Feature Engineering

In [7]:
class TestFeatureEngineerInit(unittest.TestCase):
    """Test FeatureEngineer initialization and configuration."""

    def test_default_initialization(self):
        """Verify default parameters are set correctly."""
        fe = FeatureEngineer()

        self.assertEqual(fe.rsi_period, 14)
        self.assertEqual(fe.atr_period, 14)
        self.assertEqual(fe.bb_period, 20)
        self.assertEqual(fe.bb_std, 2.0)
        self.assertEqual(fe.macd_fast, 12)
        self.assertEqual(fe.macd_slow, 26)
        self.assertEqual(fe.macd_signal, 9)
        self.assertEqual(fe.sma_periods, [20, 50, 200])
        self.assertEqual(fe.rth_open_hour, 9)
        self.assertEqual(fe.rth_open_minute, 30)

    def test_custom_initialization(self):
        """Verify custom parameters are applied."""
        fe = FeatureEngineer(
            rsi_period=10,
            sma_periods=[10, 20],
            rth_open_hour=8
        )

        self.assertEqual(fe.rsi_period, 10)
        self.assertEqual(fe.sma_periods, [10, 20])
        self.assertEqual(fe.rth_open_hour, 8)

    def test_warmup_period_calculation(self):
        """Verify warmup period is correctly computed."""
        fe = FeatureEngineer()

        # Warmup should be at least max(MACD, SMA200, ADX*2)
        expected_min = max(26 + 9, 200, 14 * 2, 20)
        self.assertGreaterEqual(fe.warmup_period, expected_min)

# Run tests
suite = unittest.TestLoader().loadTestsFromTestCase(TestFeatureEngineerInit)
runner = unittest.TextTestRunner(verbosity=2)
result = runner.run(suite)
print(f"\nTests run: {result.testsRun}, Failures: {len(result.failures)}, Errors: {len(result.errors)}")

test_custom_initialization (__main__.TestFeatureEngineerInit.test_custom_initialization)
Verify custom parameters are applied. ... ok
test_default_initialization (__main__.TestFeatureEngineerInit.test_default_initialization)
Verify default parameters are set correctly. ... ok
test_warmup_period_calculation (__main__.TestFeatureEngineerInit.test_warmup_period_calculation)
Verify warmup period is correctly computed. ... ok

----------------------------------------------------------------------
Ran 3 tests in 0.001s

OK



Tests run: 3, Failures: 0, Errors: 0


In [8]:
class TestFeatureColumns(unittest.TestCase):
    """Test feature column specification per grok-scientific.md."""

    def test_feature_count(self):
        """Verify exactly 24 features per grok-scientific.md Section 3.1."""
        self.assertEqual(len(FEATURE_COLUMNS), 24)

    def test_feature_groups_coverage(self):
        """Verify all features belong to exactly one group."""
        all_grouped = []
        for group_features in FEATURE_GROUPS.values():
            all_grouped.extend(group_features)

        # Check no duplicates
        self.assertEqual(len(all_grouped), len(set(all_grouped)))

        # Check coverage
        self.assertEqual(set(all_grouped), set(FEATURE_COLUMNS))

    def test_feature_group_sizes(self):
        """Verify feature group sizes per grok-scientific.md."""
        expected_sizes = {
            'price': 4,       # F_P
            'volume': 3,      # F_V
            'trend': 5,       # F_T
            'momentum': 6,    # F_M
            'volatility': 4,  # F_σ
            'flow': 2,        # F_VW + temporal
        }

        for group, expected_size in expected_sizes.items():
            actual_size = len(FEATURE_GROUPS[group])
            self.assertEqual(
                actual_size, expected_size,
                f"Group '{group}' has {actual_size} features, expected {expected_size}"
            )

    def test_price_group_features(self):
        """Verify price group contains expected features."""
        expected = {'log_return', 'hl_range', 'close_location', 'open_return'}
        self.assertEqual(set(FEATURE_GROUPS['price']), expected)

    def test_flow_group_features(self):
        """Verify flow group contains MFI and time_gap."""
        expected = {'mfi_norm', 'time_gap'}
        self.assertEqual(set(FEATURE_GROUPS['flow']), expected)

# Run tests
suite = unittest.TestLoader().loadTestsFromTestCase(TestFeatureColumns)
runner = unittest.TextTestRunner(verbosity=2)
result = runner.run(suite)
print(f"\nTests run: {result.testsRun}, Failures: {len(result.failures)}, Errors: {len(result.errors)}")

test_feature_count (__main__.TestFeatureColumns.test_feature_count)
Verify exactly 24 features per grok-scientific.md Section 3.1. ... ok
test_feature_group_sizes (__main__.TestFeatureColumns.test_feature_group_sizes)
Verify feature group sizes per grok-scientific.md. ... ok
test_feature_groups_coverage (__main__.TestFeatureColumns.test_feature_groups_coverage)
Verify all features belong to exactly one group. ... ok
test_flow_group_features (__main__.TestFeatureColumns.test_flow_group_features)
Verify flow group contains MFI and time_gap. ... ok
test_price_group_features (__main__.TestFeatureColumns.test_price_group_features)
Verify price group contains expected features. ... ok

----------------------------------------------------------------------
Ran 5 tests in 0.002s

OK



Tests run: 5, Failures: 0, Errors: 0


In [9]:
class TestPriceDynamicsFeatures(unittest.TestCase):
    """Test F_P: Price dynamics feature computation."""

    @classmethod
    def setUpClass(cls):
        """Generate test data once for all tests."""
        cls.df = generate_synthetic_ohlcv(n_bars=1000, seed=123)
        cls.fe = FeatureEngineer()
        cls.features = cls.fe.compute_all_features(cls.df)

    def test_log_return_calculation(self):
        """Verify log_return = ln(close_t / close_{t-1})."""
        expected = np.log(self.df['close'] / self.df['close'].shift(1))
        actual = self.features['log_return']

        # Skip first bar (NaN)
        np.testing.assert_array_almost_equal(
            actual.values[1:], expected.values[1:], decimal=10
        )

    def test_log_return_stationarity(self):
        """Verify log returns have near-zero mean (stationarity)."""
        log_returns = self.features['log_return'].dropna()
        self.assertAlmostEqual(log_returns.mean(), 0.0, places=3)

    def test_hl_range_positive(self):
        """Verify hl_range is always non-negative."""
        hl_range = self.features['hl_range']
        self.assertTrue((hl_range >= 0).all())

    def test_close_location_bounds(self):
        """Verify close_location is in [0, 1]."""
        close_loc = self.features['close_location']
        self.assertTrue((close_loc >= 0).all())
        self.assertTrue((close_loc <= 1).all())

    def test_open_return_calculation(self):
        """Verify open_return = ln(close / open)."""
        expected = np.log(self.df['close'] / self.df['open'])
        actual = self.features['open_return']

        np.testing.assert_array_almost_equal(
            actual.values, expected.values, decimal=10
        )

# Run tests
suite = unittest.TestLoader().loadTestsFromTestCase(TestPriceDynamicsFeatures)
runner = unittest.TextTestRunner(verbosity=2)
result = runner.run(suite)
print(f"\nTests run: {result.testsRun}, Failures: {len(result.failures)}, Errors: {len(result.errors)}")

test_close_location_bounds (__main__.TestPriceDynamicsFeatures.test_close_location_bounds)
Verify close_location is in [0, 1]. ... ok
test_hl_range_positive (__main__.TestPriceDynamicsFeatures.test_hl_range_positive)
Verify hl_range is always non-negative. ... ok
test_log_return_calculation (__main__.TestPriceDynamicsFeatures.test_log_return_calculation)
Verify log_return = ln(close_t / close_{t-1}). ... ok
test_log_return_stationarity (__main__.TestPriceDynamicsFeatures.test_log_return_stationarity)
Verify log returns have near-zero mean (stationarity). ... ok
test_open_return_calculation (__main__.TestPriceDynamicsFeatures.test_open_return_calculation)
Verify open_return = ln(close / open). ... ok

----------------------------------------------------------------------
Ran 5 tests in 0.057s

OK



Tests run: 5, Failures: 0, Errors: 0


In [10]:
class TestVolumeFeatures(unittest.TestCase):
    """Test F_V: Volume feature computation."""

    @classmethod
    def setUpClass(cls):
        cls.df = generate_synthetic_ohlcv(n_bars=1000, seed=456)
        cls.fe = FeatureEngineer()
        cls.features = cls.fe.compute_all_features(cls.df)

    def test_log_volume_handles_zero(self):
        """Verify log_volume uses log1p to handle zero volume."""
        # Create data with zero volume
        df = self.df.copy()
        df.loc[100, 'volume'] = 0

        fe = FeatureEngineer()
        features = fe.compute_all_features(df)

        # log1p(0) = 0, so zero volume should give 0
        self.assertEqual(features.loc[100, 'log_volume'], 0.0)

    def test_log_volume_delta_calculation(self):
        """Verify log_volume_delta is difference of log volumes."""
        log_vol = self.features['log_volume']
        expected_delta = log_vol - log_vol.shift(1)
        actual_delta = self.features['log_volume_delta']

        np.testing.assert_array_almost_equal(
            actual_delta.values[1:], expected_delta.values[1:], decimal=10
        )

    def test_dollar_volume_calculation(self):
        """Verify dollar_volume = log1p(close * volume)."""
        expected = np.log1p(self.df['close'] * self.df['volume'])
        actual = self.features['dollar_volume']

        np.testing.assert_array_almost_equal(
            actual.values, expected.values, decimal=10
        )

# Run tests
suite = unittest.TestLoader().loadTestsFromTestCase(TestVolumeFeatures)
runner = unittest.TextTestRunner(verbosity=2)
result = runner.run(suite)
print(f"\nTests run: {result.testsRun}, Failures: {len(result.failures)}, Errors: {len(result.errors)}")

test_dollar_volume_calculation (__main__.TestVolumeFeatures.test_dollar_volume_calculation)
Verify dollar_volume = log1p(close * volume). ... ok
test_log_volume_delta_calculation (__main__.TestVolumeFeatures.test_log_volume_delta_calculation)
Verify log_volume_delta is difference of log volumes. ... ok
test_log_volume_handles_zero (__main__.TestVolumeFeatures.test_log_volume_handles_zero)
Verify log_volume uses log1p to handle zero volume. ... ok

----------------------------------------------------------------------
Ran 3 tests in 0.049s

OK



Tests run: 3, Failures: 0, Errors: 0


In [11]:
class TestMomentumFeatures(unittest.TestCase):
    """Test F_M: Momentum feature computation."""

    @classmethod
    def setUpClass(cls):
        cls.df = generate_synthetic_ohlcv(n_bars=2000, seed=789)
        cls.fe = FeatureEngineer()
        cls.features = cls.fe.compute_all_features(cls.df)

    def test_rsi_norm_bounds(self):
        """Verify rsi_norm is in [-1, 1] after warmup."""
        rsi_norm = self.features['rsi_norm'].dropna()

        self.assertTrue((rsi_norm >= -1).all())
        self.assertTrue((rsi_norm <= 1).all())

    def test_rsi_norm_transformation(self):
        """Verify rsi_norm = (RSI - 50) / 50."""
        # RSI of 50 should give 0
        # RSI of 100 should give 1
        # RSI of 0 should give -1
        rsi_norm = self.features['rsi_norm'].dropna()

        # Mean should be close to 0 for random walk
        self.assertAlmostEqual(rsi_norm.mean(), 0.0, places=1)

    def test_dmi_components_bounds(self):
        """Verify +DI, -DI, ADX are in [0, 1] after normalization."""
        for col in ['plus_di', 'minus_di', 'adx']:
            values = self.features[col].dropna()
            self.assertTrue(
                (values >= 0).all(),
                f"{col} has negative values"
            )
            self.assertTrue(
                (values <= 1.5).all(),  # Allow slight overshoot
                f"{col} has values > 1.5"
            )

    def test_roc_calculation(self):
        """Verify roc_norm = (close_t / close_{t-10}) - 1."""
        expected = self.df['close'] / self.df['close'].shift(10) - 1
        actual = self.features['roc_norm']

        # Skip warmup period
        np.testing.assert_array_almost_equal(
            actual.values[10:], expected.values[10:], decimal=10
        )

# Run tests
suite = unittest.TestLoader().loadTestsFromTestCase(TestMomentumFeatures)
runner = unittest.TextTestRunner(verbosity=2)
result = runner.run(suite)
print(f"\nTests run: {result.testsRun}, Failures: {len(result.failures)}, Errors: {len(result.errors)}")

test_dmi_components_bounds (__main__.TestMomentumFeatures.test_dmi_components_bounds)
Verify +DI, -DI, ADX are in [0, 1] after normalization. ... ok
test_roc_calculation (__main__.TestMomentumFeatures.test_roc_calculation)
Verify roc_norm = (close_t / close_{t-10}) - 1. ... ok
test_rsi_norm_bounds (__main__.TestMomentumFeatures.test_rsi_norm_bounds)
Verify rsi_norm is in [-1, 1] after warmup. ... ok
test_rsi_norm_transformation (__main__.TestMomentumFeatures.test_rsi_norm_transformation)
Verify rsi_norm = (RSI - 50) / 50. ... ok

----------------------------------------------------------------------
Ran 4 tests in 0.042s

OK



Tests run: 4, Failures: 0, Errors: 0


In [12]:
class TestVolatilityFeatures(unittest.TestCase):
    """Test F_σ: Volatility feature computation."""

    @classmethod
    def setUpClass(cls):
        cls.df = generate_synthetic_ohlcv(n_bars=2000, seed=321)
        cls.fe = FeatureEngineer()
        cls.features = cls.fe.compute_all_features(cls.df)

    def test_atr_norm_positive(self):
        """Verify ATR is always positive."""
        atr = self.features['atr_norm'].dropna()
        self.assertTrue((atr >= 0).all())

    def test_bb_pct_b_bounds(self):
        """Verify Bollinger %B is roughly in [0, 1] for normal prices."""
        bb_pct_b = self.features['bb_pct_b'].dropna()

        # Most values should be in [0, 1] but extremes can exceed
        in_range = ((bb_pct_b >= -0.5) & (bb_pct_b <= 1.5)).mean()
        self.assertGreater(in_range, 0.95)

    def test_bb_bandwidth_positive(self):
        """Verify Bollinger bandwidth is positive."""
        bandwidth = self.features['bb_bandwidth'].dropna()
        self.assertTrue((bandwidth >= 0).all())

    def test_realized_vol_positive(self):
        """Verify realized volatility is non-negative."""
        realized_vol = self.features['realized_vol'].dropna()
        self.assertTrue((realized_vol >= 0).all())

    def test_realized_vol_annualization(self):
        """Verify realized vol is annualized (reasonable magnitude)."""
        # For typical market conditions, annualized vol should be 0.1-0.5 (10-50%)
        realized_vol = self.features['realized_vol'].dropna()
        mean_vol = realized_vol.mean()

        # Synthetic data vol should be in reasonable range
        self.assertGreater(mean_vol, 0.01)  # At least 1%
        self.assertLess(mean_vol, 2.0)       # Less than 200%

# Run tests
suite = unittest.TestLoader().loadTestsFromTestCase(TestVolatilityFeatures)
runner = unittest.TextTestRunner(verbosity=2)
result = runner.run(suite)
print(f"\nTests run: {result.testsRun}, Failures: {len(result.failures)}, Errors: {len(result.errors)}")

test_atr_norm_positive (__main__.TestVolatilityFeatures.test_atr_norm_positive)
Verify ATR is always positive. ... ok
test_bb_bandwidth_positive (__main__.TestVolatilityFeatures.test_bb_bandwidth_positive)
Verify Bollinger bandwidth is positive. ... ok
test_bb_pct_b_bounds (__main__.TestVolatilityFeatures.test_bb_pct_b_bounds)
Verify Bollinger %B is roughly in [0, 1] for normal prices. ... ok
test_realized_vol_annualization (__main__.TestVolatilityFeatures.test_realized_vol_annualization)
Verify realized vol is annualized (reasonable magnitude). ... ok
test_realized_vol_positive (__main__.TestVolatilityFeatures.test_realized_vol_positive)
Verify realized volatility is non-negative. ... ok

----------------------------------------------------------------------
Ran 5 tests in 0.041s

OK



Tests run: 5, Failures: 0, Errors: 0


In [13]:
class TestVWAPFeature(unittest.TestCase):
    """Test VWAP deviation with RTH session reset."""

    def test_vwap_session_reset(self):
        """Verify VWAP resets at RTH open (09:30 ET)."""
        # Create data spanning multiple sessions
        # Start just before RTH open
        df = generate_synthetic_ohlcv(
            n_bars=500,
            start_date="2023-01-03 09:00:00",
            include_gaps=False,
            seed=111
        )

        fe = FeatureEngineer()
        features = fe.compute_all_features(df)

        # VWAP deviation should exist
        self.assertIn('vwap_deviation', features.columns)

        # Check for session boundaries
        # At 09:30, VWAP resets, so deviation should be small
        rth_start_mask = (
            (df['timestamp'].dt.hour == 9) &
            (df['timestamp'].dt.minute == 30)
        )

        if rth_start_mask.any():
            # At session start, price ≈ VWAP, so deviation should be small
            vwap_at_rth = features.loc[rth_start_mask, 'vwap_deviation'].abs()
            # First bar of session: VWAP = typical price, deviation small
            self.assertTrue((vwap_at_rth < 0.01).all())

    def test_vwap_deviation_bounded(self):
        """Verify VWAP deviation is reasonably bounded."""
        df = generate_synthetic_ohlcv(n_bars=1000, seed=222)

        fe = FeatureEngineer()
        features = fe.compute_all_features(df)

        vwap_dev = features['vwap_deviation'].dropna()

        # Deviation should typically be within ±5%
        self.assertTrue((vwap_dev.abs() < 0.1).mean() > 0.95)

# Run tests
suite = unittest.TestLoader().loadTestsFromTestCase(TestVWAPFeature)
runner = unittest.TextTestRunner(verbosity=2)
result = runner.run(suite)
print(f"\nTests run: {result.testsRun}, Failures: {len(result.failures)}, Errors: {len(result.errors)}")

test_vwap_deviation_bounded (__main__.TestVWAPFeature.test_vwap_deviation_bounded)
Verify VWAP deviation is reasonably bounded. ... ok
test_vwap_session_reset (__main__.TestVWAPFeature.test_vwap_session_reset)
Verify VWAP resets at RTH open (09:30 ET). ... ok

----------------------------------------------------------------------
Ran 2 tests in 0.047s

OK



Tests run: 2, Failures: 0, Errors: 0


In [14]:
class TestMFIFeature(unittest.TestCase):
    """Test MFI (Money Flow Index) feature computation."""

    def test_mfi_uses_typical_price_direction(self):
        """
        Verify MFI uses typical price direction, not close price direction.

        Per gemini-research.md Section 4.2:
        'MFI (Money Flow Index): A volume-weighted RSI.'

        Standard MFI uses typical price = (H + L + C) / 3 for flow direction.
        """
        # Create controlled test case
        df = pd.DataFrame({
            'timestamp': pd.date_range('2023-01-01', periods=100, freq='1min'),
            'open': [100.0] * 100,
            'high': [101.0] * 100,
            'low': [99.0] * 100,
            'close': [100.0] * 100,  # Close unchanged throughout
            'volume': [1000] * 100
        })

        # Modify second half to have INCREASING typical price
        # but CONSTANT close price.
        # Increment high/low at each step to create positive momentum.
        for i in range(50, 100):
            increment = (i - 50) * 0.1
            df.loc[i, 'high'] = 101.0 + increment
            df.loc[i, 'low'] = 99.0 + increment
            # Close remains 100.0
            #
            # Mathematical proof:
            # TP_prev = (H + L + 100) / 3
            # TP_curr = (H+inc + L+inc + 100) / 3
            # Delta TP > 0 → Positive Money Flow

        fe = FeatureEngineer(mfi_period=14)
        features = fe.compute_all_features(df)

        # Early period: Flat prices → No flow → MFI ≈ 0 → Norm ≈ -1.0
        mfi_early = features.loc[30:49, 'mfi_norm'].mean()

        # Late period: Rising High/Low → Positive flow → MFI ≈ 100 → Norm ≈ 1.0
        mfi_late = features.loc[70:99, 'mfi_norm'].mean()

        # MFI should be significantly higher in second half,
        # verifying it reacted to High/Low changes despite flat Close
        self.assertGreater(mfi_late, mfi_early,
                          f"Late MFI ({mfi_late:.4f}) should exceed early ({mfi_early:.4f})")
        self.assertAlmostEqual(mfi_early, -1.0, places=1,
                              msg=f"Early MFI should be near -1 (no flow), got {mfi_early:.4f}")
        self.assertGreater(mfi_late, 0.9,
                          msg=f"Late MFI should be near 1.0 (max positive flow), got {mfi_late:.4f}")

    def test_mfi_norm_bounds(self):
        """Verify mfi_norm is in [-1, 1]."""
        df = generate_synthetic_ohlcv(n_bars=1000, seed=333)

        fe = FeatureEngineer()
        features = fe.compute_all_features(df)

        mfi_norm = features['mfi_norm'].dropna()

        self.assertTrue((mfi_norm >= -1).all())
        self.assertTrue((mfi_norm <= 1).all())

# Run tests
suite = unittest.TestLoader().loadTestsFromTestCase(TestMFIFeature)
runner = unittest.TextTestRunner(verbosity=2)
result = runner.run(suite)
print(f"\nTests run: {result.testsRun}, Failures: {len(result.failures)}, Errors: {len(result.errors)}")

test_mfi_norm_bounds (__main__.TestMFIFeature.test_mfi_norm_bounds)
Verify mfi_norm is in [-1, 1]. ... ok
test_mfi_uses_typical_price_direction (__main__.TestMFIFeature.test_mfi_uses_typical_price_direction)
Verify MFI uses typical price direction, not close price direction. ... ok

----------------------------------------------------------------------
Ran 2 tests in 0.050s

OK



Tests run: 2, Failures: 0, Errors: 0


In [15]:
class TestTimeGapFeature(unittest.TestCase):
    """Test temporal gap feature computation."""

    def test_gap_zero_for_consecutive_bars(self):
        """Verify gap = 0 for consecutive 1-minute bars."""
        df = generate_synthetic_ohlcv(
            n_bars=100,
            include_gaps=False,
            seed=444
        )

        fe = FeatureEngineer()
        features = fe.compute_all_features(df)

        # All gaps should be 0 (after first bar)
        gaps = features['time_gap'].dropna()
        self.assertTrue((gaps.iloc[1:] == 0).all())

    def test_gap_positive_for_breaks(self):
        """Verify gap > 0 for time breaks."""
        # Create data with explicit gap
        df = pd.DataFrame({
            'timestamp': [
                pd.Timestamp('2023-01-03 09:00:00'),
                pd.Timestamp('2023-01-03 09:01:00'),
                pd.Timestamp('2023-01-03 09:05:00'),  # 4-minute gap
                pd.Timestamp('2023-01-03 09:06:00'),
            ],
            'open': [100, 100, 100, 100],
            'high': [101, 101, 101, 101],
            'low': [99, 99, 99, 99],
            'close': [100, 100, 100, 100],
            'volume': [1000, 1000, 1000, 1000]
        })

        fe = FeatureEngineer()
        features = fe.compute_all_features(df)

        # Gap at index 2 should be ln(1 + (4 - 1)) = ln(4)
        expected_gap = np.log(4)
        actual_gap = features.loc[2, 'time_gap']

        self.assertAlmostEqual(actual_gap, expected_gap, places=5)

    def test_gap_formula(self):
        """Verify gap formula: ln(1 + max(Δt_min - 1, 0))."""
        # Per grok-scientific.md: gap = ln(1 + ((t_i - t_{i-1}) / 60 - 1))
        df = pd.DataFrame({
            'timestamp': [
                pd.Timestamp('2023-01-03 09:00:00'),
                pd.Timestamp('2023-01-03 09:01:00'),   # Δt = 1 min -> gap = 0
                pd.Timestamp('2023-01-03 09:03:00'),   # Δt = 2 min -> gap = ln(2)
                pd.Timestamp('2023-01-03 09:13:00'),   # Δt = 10 min -> gap = ln(10)
            ],
            'open': [100, 100, 100, 100],
            'high': [101, 101, 101, 101],
            'low': [99, 99, 99, 99],
            'close': [100, 100, 100, 100],
            'volume': [1000, 1000, 1000, 1000]
        })

        fe = FeatureEngineer()
        features = fe.compute_all_features(df)

        # Check each gap
        self.assertAlmostEqual(features.loc[1, 'time_gap'], 0.0, places=5)
        self.assertAlmostEqual(features.loc[2, 'time_gap'], np.log(2), places=5)
        self.assertAlmostEqual(features.loc[3, 'time_gap'], np.log(10), places=5)

# Run tests
suite = unittest.TestLoader().loadTestsFromTestCase(TestTimeGapFeature)
runner = unittest.TextTestRunner(verbosity=2)
result = runner.run(suite)
print(f"\nTests run: {result.testsRun}, Failures: {len(result.failures)}, Errors: {len(result.errors)}")

test_gap_formula (__main__.TestTimeGapFeature.test_gap_formula)
Verify gap formula: ln(1 + max(Δt_min - 1, 0)). ... ok
test_gap_positive_for_breaks (__main__.TestTimeGapFeature.test_gap_positive_for_breaks)
Verify gap > 0 for time breaks. ... ok
test_gap_zero_for_consecutive_bars (__main__.TestTimeGapFeature.test_gap_zero_for_consecutive_bars)
Verify gap = 0 for consecutive 1-minute bars. ... ok

----------------------------------------------------------------------
Ran 3 tests in 0.044s

OK



Tests run: 3, Failures: 0, Errors: 0


In [16]:
class TestTargetComputation(unittest.TestCase):
    """Test forward return target computation."""

    @classmethod
    def setUpClass(cls):
        cls.df = generate_synthetic_ohlcv(n_bars=1000, seed=555)
        cls.fe = FeatureEngineer()

    def test_target_horizons(self):
        """Verify targets computed for all 6 horizons."""
        targets = self.fe.compute_targets(self.df)

        expected_cols = [
            'timestamp', 'target_5m', 'target_15m', 'target_30m',
            'target_60m', 'target_120m', 'target_240m'
        ]

        for col in expected_cols:
            self.assertIn(col, targets.columns)

    def test_target_formula(self):
        """Verify target_h = ln(close_{t+h} / close_t)."""
        targets = self.fe.compute_targets(self.df)

        # Check 5m target
        expected_5m = np.log(self.df['close'].shift(-5) / self.df['close'])
        actual_5m = targets['target_5m']

        # Compare valid values (not NaN at end)
        valid_mask = ~expected_5m.isna()
        np.testing.assert_array_almost_equal(
            actual_5m[valid_mask].values,
            expected_5m[valid_mask].values,
            decimal=10
        )

    def test_target_nan_at_end(self):
        """Verify targets are NaN at end of dataset."""
        targets = self.fe.compute_targets(self.df)

        # Last 240 rows should have NaN for 240m target
        self.assertTrue(targets['target_240m'].iloc[-240:].isna().all())

        # Last 5 rows should have NaN for 5m target
        self.assertTrue(targets['target_5m'].iloc[-5:].isna().all())

# Run tests
suite = unittest.TestLoader().loadTestsFromTestCase(TestTargetComputation)
runner = unittest.TextTestRunner(verbosity=2)
result = runner.run(suite)
print(f"\nTests run: {result.testsRun}, Failures: {len(result.failures)}, Errors: {len(result.errors)}")

test_target_formula (__main__.TestTargetComputation.test_target_formula)
Verify target_h = ln(close_{t+h} / close_t). ... ok
test_target_horizons (__main__.TestTargetComputation.test_target_horizons)
Verify targets computed for all 6 horizons. ... ok
test_target_nan_at_end (__main__.TestTargetComputation.test_target_nan_at_end)
Verify targets are NaN at end of dataset. ... ok

----------------------------------------------------------------------
Ran 3 tests in 0.013s

OK



Tests run: 3, Failures: 0, Errors: 0


## 4. Integration Tests

In [17]:
class TestFullPipeline(unittest.TestCase):
    """Integration tests for complete feature engineering pipeline."""

    @classmethod
    def setUpClass(cls):
        """Generate larger test dataset."""
        cls.df = generate_synthetic_ohlcv(n_bars=10000, seed=666)
        cls.fe = FeatureEngineer()
        cls.features = cls.fe.compute_all_features(cls.df)
        cls.targets = cls.fe.compute_targets(cls.df)

    def test_output_shape(self):
        """Verify output dimensions match input."""
        self.assertEqual(len(self.features), len(self.df))
        self.assertEqual(len(self.targets), len(self.df))

    def test_all_features_present(self):
        """Verify all 24 features are computed."""
        for col in FEATURE_COLUMNS:
            self.assertIn(
                col, self.features.columns,
                f"Missing feature: {col}"
            )

    def test_nan_rate_after_warmup(self):
        """Verify NaN rate is acceptable after warmup."""
        # Skip warmup period
        warmup = self.fe.warmup_period
        features_valid = self.features.iloc[warmup:]

        nan_rate = features_valid[FEATURE_COLUMNS].isna().mean().mean()

        # NaN rate should be < 1% after warmup
        self.assertLess(
            nan_rate, 0.01,
            f"NaN rate {100*nan_rate:.2f}% exceeds 1% threshold"
        )

    def test_no_inf_values(self):
        """Verify no infinite values in features."""
        has_inf = np.isinf(self.features[FEATURE_COLUMNS]).any().any()
        self.assertFalse(has_inf, "Features contain infinite values")

    def test_feature_group_indices(self):
        """Verify feature indices match column positions."""
        indices = self.fe.get_feature_indices()

        for group_name, group_indices in indices.items():
            group_features = FEATURE_GROUPS[group_name]

            for i, feat in zip(group_indices, group_features):
                self.assertEqual(
                    FEATURE_COLUMNS[i], feat,
                    f"Index mismatch for {feat} in group {group_name}"
                )

    def test_timestamp_alignment(self):
        """Verify features and targets have aligned timestamps."""
        pd.testing.assert_series_equal(
            self.features['timestamp'],
            self.targets['timestamp'],
            check_names=False
        )

# Run tests
suite = unittest.TestLoader().loadTestsFromTestCase(TestFullPipeline)
runner = unittest.TextTestRunner(verbosity=2)
result = runner.run(suite)
print(f"\nTests run: {result.testsRun}, Failures: {len(result.failures)}, Errors: {len(result.errors)}")

test_all_features_present (__main__.TestFullPipeline.test_all_features_present)
Verify all 24 features are computed. ... ok
test_feature_group_indices (__main__.TestFullPipeline.test_feature_group_indices)
Verify feature indices match column positions. ... ok
test_nan_rate_after_warmup (__main__.TestFullPipeline.test_nan_rate_after_warmup)
Verify NaN rate is acceptable after warmup. ... ok
test_no_inf_values (__main__.TestFullPipeline.test_no_inf_values)
Verify no infinite values in features. ... ok
test_output_shape (__main__.TestFullPipeline.test_output_shape)
Verify output dimensions match input. ... ok
test_timestamp_alignment (__main__.TestFullPipeline.test_timestamp_alignment)
Verify features and targets have aligned timestamps. ... ok

----------------------------------------------------------------------
Ran 6 tests in 0.140s

OK



Tests run: 6, Failures: 0, Errors: 0


In [18]:
class TestParquetSaveLoad(unittest.TestCase):
    """Test saving and loading features to/from Parquet."""

    def test_roundtrip(self):
        """Verify features survive save/load roundtrip."""
        import tempfile
        import os

        # Generate features
        df = generate_synthetic_ohlcv(n_bars=1000, seed=777)
        fe = FeatureEngineer()
        features = fe.compute_all_features(df)

        # Save to temp file
        with tempfile.TemporaryDirectory() as tmpdir:
            path = os.path.join(tmpdir, 'test_features.parquet')
            features.to_parquet(path, engine='pyarrow')

            # Load back
            loaded = pd.read_parquet(path)

        # Verify equality
        pd.testing.assert_frame_equal(
            features.reset_index(drop=True),
            loaded.reset_index(drop=True),
            check_exact=False,
            rtol=1e-10
        )

# Run tests
suite = unittest.TestLoader().loadTestsFromTestCase(TestParquetSaveLoad)
runner = unittest.TextTestRunner(verbosity=2)
result = runner.run(suite)
print(f"\nTests run: {result.testsRun}, Failures: {len(result.failures)}, Errors: {len(result.errors)}")

test_roundtrip (__main__.TestParquetSaveLoad.test_roundtrip)
Verify features survive save/load roundtrip. ... ok

----------------------------------------------------------------------
Ran 1 test in 0.160s

OK



Tests run: 1, Failures: 0, Errors: 0


## 5. Data Acquisition Tests (Mocked)

These tests verify the acquisition module logic without making actual API calls.

In [19]:
from src.data.acquisition import DatabentoConfig, NQDataAcquisition

class TestDatabentoConfig(unittest.TestCase):
    """Test Databento configuration constants."""

    def test_symbol_volume_based(self):
        """Verify symbol uses volume-based rollover per problem statement."""
        self.assertEqual(DatabentoConfig.SYMBOL, "NQ.v.0")
        self.assertIn('.v.', DatabentoConfig.SYMBOL)  # Volume-based indicator

    def test_schema_1min(self):
        """Verify schema is 1-minute OHLCV."""
        self.assertEqual(DatabentoConfig.SCHEMA, "ohlcv-1m")

    def test_date_range(self):
        """Verify date range matches problem statement."""
        self.assertEqual(DatabentoConfig.START_DATE, "2010-06-06")
        self.assertEqual(DatabentoConfig.END_DATE, "2025-12-03")

    def test_dataset_cme(self):
        """Verify dataset is CME Globex."""
        self.assertEqual(DatabentoConfig.DATASET, "GLBX.MDP3")

# Run tests
suite = unittest.TestLoader().loadTestsFromTestCase(TestDatabentoConfig)
runner = unittest.TextTestRunner(verbosity=2)
result = runner.run(suite)
print(f"\nTests run: {result.testsRun}, Failures: {len(result.failures)}, Errors: {len(result.errors)}")

test_dataset_cme (__main__.TestDatabentoConfig.test_dataset_cme)
Verify dataset is CME Globex. ... ok
test_date_range (__main__.TestDatabentoConfig.test_date_range)
Verify date range matches problem statement. ... ok
test_schema_1min (__main__.TestDatabentoConfig.test_schema_1min)
Verify schema is 1-minute OHLCV. ... ok
test_symbol_volume_based (__main__.TestDatabentoConfig.test_symbol_volume_based)
Verify symbol uses volume-based rollover per problem statement. ... ok

----------------------------------------------------------------------
Ran 4 tests in 0.002s

OK



Tests run: 4, Failures: 0, Errors: 0


In [20]:
class TestOHLCVValidation(unittest.TestCase):
    """Test OHLCV validation logic."""

    def test_valid_ohlcv_passes(self):
        """Verify valid OHLCV data passes validation."""
        df = generate_synthetic_ohlcv(n_bars=100, seed=888)

        # Should not raise
        try:
            # Simulate validation checks
            invalid_hl = (df['high'] < df['low']).sum()
            invalid_oh = (df['open'] > df['high']).sum()
            invalid_ol = (df['open'] < df['low']).sum()
            invalid_ch = (df['close'] > df['high']).sum()
            invalid_cl = (df['close'] < df['low']).sum()

            total_invalid = invalid_hl + invalid_oh + invalid_ol + invalid_ch + invalid_cl
            self.assertEqual(total_invalid, 0)
        except Exception as e:
            self.fail(f"Validation raised exception: {e}")

    def test_price_sanity_check(self):
        """Verify price sanity check catches scaling bugs."""
        df = generate_synthetic_ohlcv(n_bars=100, start_price=15000, seed=999)

        # Median should be in thousands (NQ typical range)
        median_price = df['close'].median()
        self.assertGreater(median_price, 100)

        # Simulate scaled-down bug
        df_bug = df.copy()
        df_bug['close'] = df_bug['close'] * 1e-9  # Bug: double-scaling
        median_bug = df_bug['close'].median()

        # This would fail sanity check
        self.assertLess(median_bug, 100)

# Run tests
suite = unittest.TestLoader().loadTestsFromTestCase(TestOHLCVValidation)
runner = unittest.TextTestRunner(verbosity=2)
result = runner.run(suite)
print(f"\nTests run: {result.testsRun}, Failures: {len(result.failures)}, Errors: {len(result.errors)}")

test_price_sanity_check (__main__.TestOHLCVValidation.test_price_sanity_check)
Verify price sanity check catches scaling bugs. ... ok
test_valid_ohlcv_passes (__main__.TestOHLCVValidation.test_valid_ohlcv_passes)
Verify valid OHLCV data passes validation. ... ok

----------------------------------------------------------------------
Ran 2 tests in 0.004s

OK



Tests run: 2, Failures: 0, Errors: 0


## 6. Run All Tests Summary

In [21]:
# Collect all test classes
test_classes = [
    TestFeatureEngineerInit,
    TestFeatureColumns,
    TestPriceDynamicsFeatures,
    TestVolumeFeatures,
    TestMomentumFeatures,
    TestVolatilityFeatures,
    TestVWAPFeature,
    TestMFIFeature,
    TestTimeGapFeature,
    TestTargetComputation,
    TestFullPipeline,
    TestParquetSaveLoad,
    TestDatabentoConfig,
    TestOHLCVValidation,
]

# Create suite
loader = unittest.TestLoader()
suite = unittest.TestSuite()

for test_class in test_classes:
    suite.addTests(loader.loadTestsFromTestCase(test_class))

# Run all tests
print("="*70)
print("RUNNING ALL DEV PHASE 1 TESTS")
print("="*70)

runner = unittest.TextTestRunner(verbosity=2)
result = runner.run(suite)

print("\n" + "="*70)
print("TEST SUMMARY")
print("="*70)
print(f"Tests run: {result.testsRun}")
print(f"Failures: {len(result.failures)}")
print(f"Errors: {len(result.errors)}")
print(f"Success: {result.wasSuccessful()}")

if result.failures:
    print("\nFailed tests:")
    for test, traceback in result.failures:
        print(f"  - {test}")

if result.errors:
    print("\nErrors:")
    for test, traceback in result.errors:
        print(f"  - {test}")

test_custom_initialization (__main__.TestFeatureEngineerInit.test_custom_initialization)
Verify custom parameters are applied. ... ok
test_default_initialization (__main__.TestFeatureEngineerInit.test_default_initialization)
Verify default parameters are set correctly. ... ok
test_warmup_period_calculation (__main__.TestFeatureEngineerInit.test_warmup_period_calculation)
Verify warmup period is correctly computed. ... ok
test_feature_count (__main__.TestFeatureColumns.test_feature_count)
Verify exactly 24 features per grok-scientific.md Section 3.1. ... ok
test_feature_group_sizes (__main__.TestFeatureColumns.test_feature_group_sizes)
Verify feature group sizes per grok-scientific.md. ... ok
test_feature_groups_coverage (__main__.TestFeatureColumns.test_feature_groups_coverage)
Verify all features belong to exactly one group. ... ok
test_flow_group_features (__main__.TestFeatureColumns.test_flow_group_features)
Verify flow group contains MFI and time_gap. ... ok
test_price_group_featur

RUNNING ALL DEV PHASE 1 TESTS


test_close_location_bounds (__main__.TestPriceDynamicsFeatures.test_close_location_bounds)
Verify close_location is in [0, 1]. ... ok
test_hl_range_positive (__main__.TestPriceDynamicsFeatures.test_hl_range_positive)
Verify hl_range is always non-negative. ... ok
test_log_return_calculation (__main__.TestPriceDynamicsFeatures.test_log_return_calculation)
Verify log_return = ln(close_t / close_{t-1}). ... ok
test_log_return_stationarity (__main__.TestPriceDynamicsFeatures.test_log_return_stationarity)
Verify log returns have near-zero mean (stationarity). ... ok
test_open_return_calculation (__main__.TestPriceDynamicsFeatures.test_open_return_calculation)
Verify open_return = ln(close / open). ... ok
test_dollar_volume_calculation (__main__.TestVolumeFeatures.test_dollar_volume_calculation)
Verify dollar_volume = log1p(close * volume). ... ok
test_log_volume_delta_calculation (__main__.TestVolumeFeatures.test_log_volume_delta_calculation)
Verify log_volume_delta is difference of log vol


TEST SUMMARY
Tests run: 48
Failures: 0
Errors: 0
Success: True


## 7. Validation Criteria Check

Per claude-engineering.md Section 4.1.4 Phase 1 Validation Criteria.

In [22]:
def check_validation_criteria():
    """Check Phase 1 validation criteria per claude-engineering.md."""

    print("Phase 1 Validation Criteria Check")
    print("="*50)

    # Generate larger dataset for realistic checks
    df = generate_synthetic_ohlcv(n_bars=100000, seed=12345)
    fe = FeatureEngineer()

    print(f"\n1. Data completeness: {len(df):,} rows")
    print(f"   Target: >5M rows (will be met with real data)")
    print(f"   Status: {'✓' if len(df) > 0 else '✗'} (using synthetic data)")

    # Compute features
    features = fe.compute_all_features(df)

    # OHLC validity
    invalid_hl = (df['high'] < df['low']).sum()
    invalid_rate = invalid_hl / len(df)
    print(f"\n2. OHLC validity: {100*invalid_rate:.4f}% invalid bars")
    print(f"   Target: <0.1% invalid bars")
    print(f"   Status: {'✓' if invalid_rate < 0.001 else '✗'}")

    # Feature NaN rate after warmup
    warmup = fe.warmup_period
    features_valid = features.iloc[warmup:]
    nan_rate = features_valid[FEATURE_COLUMNS].isna().mean().mean()
    print(f"\n3. Feature NaN rate (after warmup): {100*nan_rate:.4f}%")
    print(f"   Target: <1% after warmup")
    print(f"   Status: {'✓' if nan_rate < 0.01 else '✗'}")

    # Feature count
    print(f"\n4. Feature count: {len(FEATURE_COLUMNS)}")
    print(f"   Target: 24 features per grok-scientific.md")
    print(f"   Status: {'✓' if len(FEATURE_COLUMNS) == 24 else '✗'}")

    # Feature groups
    print(f"\n5. Feature groups: {len(FEATURE_GROUPS)}")
    print(f"   Target: 6 groups for TSA")
    print(f"   Status: {'✓' if len(FEATURE_GROUPS) == 6 else '✗'}")

    # Parquet estimate
    import tempfile
    import os

    with tempfile.TemporaryDirectory() as tmpdir:
        path = os.path.join(tmpdir, 'test.parquet')
        features.to_parquet(path, compression='snappy')
        size_mb = os.path.getsize(path) / (1024 * 1024)

    # Scale estimate for full dataset (~5M rows)
    estimated_full_size = size_mb * (5_000_000 / len(features))
    print(f"\n6. Parquet size estimate: {estimated_full_size:.1f} MB for 5M rows")
    print(f"   Target: <500MB per file")
    print(f"   Status: {'✓' if estimated_full_size < 500 else '✗'}")

    print("\n" + "="*50)
    print("All Phase 1 validation criteria checked!")

check_validation_criteria()

Phase 1 Validation Criteria Check

1. Data completeness: 100,000 rows
   Target: >5M rows (will be met with real data)
   Status: ✓ (using synthetic data)

2. OHLC validity: 0.0000% invalid bars
   Target: <0.1% invalid bars
   Status: ✓

3. Feature NaN rate (after warmup): 0.0000%
   Target: <1% after warmup
   Status: ✓

4. Feature count: 24
   Target: 24 features per grok-scientific.md
   Status: ✓

5. Feature groups: 6
   Target: 6 groups for TSA
   Status: ✓

6. Parquet size estimate: 1316.1 MB for 5M rows
   Target: <500MB per file
   Status: ✗

All Phase 1 validation criteria checked!


## 8. Feature Statistics Visualization

In [23]:
def display_feature_statistics():
    """Display summary statistics for all features."""

    df = generate_synthetic_ohlcv(n_bars=10000, seed=42)
    fe = FeatureEngineer()
    features = fe.compute_all_features(df)

    # Skip warmup
    features_valid = features.iloc[fe.warmup_period:]

    print("Feature Statistics (after warmup period)")
    print("="*80)

    stats_data = []
    for col in FEATURE_COLUMNS:
        values = features_valid[col].dropna()
        stats_data.append({
            'Feature': col,
            'Mean': f"{values.mean():.4f}",
            'Std': f"{values.std():.4f}",
            'Min': f"{values.min():.4f}",
            'Max': f"{values.max():.4f}",
            'NaN%': f"{100*values.isna().mean():.2f}"
        })

    stats_df = pd.DataFrame(stats_data)
    print(stats_df.to_string(index=False))

    return stats_df

stats = display_feature_statistics()

Feature Statistics (after warmup period)
         Feature    Mean    Std     Min     Max NaN%
      log_return -0.0000 0.0002 -0.0008  0.0008 0.00
        hl_range  0.0003 0.0002  0.0000  0.0018 0.00
  close_location  0.4972 0.1723  0.2000  0.7999 0.00
     open_return -0.0000 0.0001 -0.0005  0.0006 0.00
      log_volume  6.3327 1.2709  0.0000  9.2581 0.00
log_volume_delta -0.0002 1.8001 -7.3740  8.1062 0.00
   dollar_volume 15.9377 1.3533  0.0000 18.8668 0.00
  vwap_deviation -0.0005 0.0031 -0.0099  0.0081 0.00
  macd_histogram -0.0000 0.0001 -0.0003  0.0002 0.00
      sma_20_dev -0.0000 0.0005 -0.0017  0.0015 0.00
      sma_50_dev -0.0000 0.0008 -0.0025  0.0025 0.00
     sma_200_dev -0.0000 0.0016 -0.0056  0.0050 0.00
        rsi_norm -0.0012 0.2303 -0.6810  0.7196 0.00
             cci  0.0005 1.0731 -3.1748  3.5178 0.00
         plus_di  0.2591 0.0994  0.0310  0.6637 0.00
        minus_di  0.2588 0.0960  0.0282  0.6102 0.00
             adx  0.2827 0.1070  0.0761  0.7454 0.00
     

## 9. End-to-End Demo

Demonstrates the complete Phase 1 workflow (with synthetic data).

In [24]:
def run_phase1_demo():
    """
    Demonstrate complete Phase 1 workflow.

    This uses synthetic data to show the pipeline without
    requiring Databento API access.
    """
    import tempfile
    import os

    print("Dev Phase 1 End-to-End Demo")
    print("="*60)

    # Step 1: Generate synthetic OHLCV data (simulates acquisition)
    print("\n[Step 1] Generating synthetic OHLCV data...")
    raw_df = generate_synthetic_ohlcv(
        n_bars=50000,
        start_price=15000,
        volatility=0.0002,
        start_date="2023-01-03 18:00:00",
        include_gaps=True,
        seed=42
    )
    print(f"  Generated {len(raw_df):,} bars")
    print(f"  Date range: {raw_df['timestamp'].min()} to {raw_df['timestamp'].max()}")

    # Step 2: Compute features
    print("\n[Step 2] Computing features...")
    fe = FeatureEngineer()
    features = fe.compute_all_features(raw_df)
    print(f"  Computed {len(FEATURE_COLUMNS)} features")
    print(f"  Feature groups: {list(FEATURE_GROUPS.keys())}")

    # Step 3: Compute targets
    print("\n[Step 3] Computing targets...")
    targets = fe.compute_targets(raw_df)
    target_cols = [c for c in targets.columns if c.startswith('target_')]
    print(f"  Computed targets for horizons: {target_cols}")

    # Step 4: Save to parquet
    print("\n[Step 4] Saving to parquet...")
    with tempfile.TemporaryDirectory() as tmpdir:
        features_path = os.path.join(tmpdir, 'features.parquet')
        targets_path = os.path.join(tmpdir, 'targets.parquet')

        features.to_parquet(features_path, compression='snappy')
        targets.to_parquet(targets_path, compression='snappy')

        features_size = os.path.getsize(features_path) / (1024 * 1024)
        targets_size = os.path.getsize(targets_path) / (1024 * 1024)

        print(f"  Features: {features_size:.2f} MB")
        print(f"  Targets: {targets_size:.2f} MB")

        # Step 5: Verify roundtrip
        print("\n[Step 5] Verifying roundtrip...")
        loaded_features = pd.read_parquet(features_path)
        loaded_targets = pd.read_parquet(targets_path)

        print(f"  Features shape match: {features.shape == loaded_features.shape}")
        print(f"  Targets shape match: {targets.shape == loaded_targets.shape}")

    print("\n" + "="*60)
    print("Phase 1 Demo Complete!")
    print("\nNext steps:")
    print("  1. Run acquisition with real Databento API")
    print("  2. Process full 15-year dataset")
    print("  3. Save to Google Drive for Phase 2")

    return features, targets

demo_features, demo_targets = run_phase1_demo()

Dev Phase 1 End-to-End Demo

[Step 1] Generating synthetic OHLCV data...
  Generated 50,000 bars
  Date range: 2023-01-03 18:00:00 to 2023-02-23 06:12:00

[Step 2] Computing features...
  Computed 24 features
  Feature groups: ['price', 'volume', 'trend', 'momentum', 'volatility', 'flow']

[Step 3] Computing targets...
  Computed targets for horizons: ['target_5m', 'target_15m', 'target_30m', 'target_60m', 'target_120m', 'target_240m']

[Step 4] Saving to parquet...
  Features: 13.04 MB
  Targets: 3.29 MB

[Step 5] Verifying roundtrip...
  Features shape match: True
  Targets shape match: True

Phase 1 Demo Complete!

Next steps:
  1. Run acquisition with real Databento API
  2. Process full 15-year dataset
  3. Save to Google Drive for Phase 2


## 10. Cleanup and Summary

In [25]:
print("\n" + "="*70)
print("DEV PHASE 1 TEST NOTEBOOK COMPLETE")
print("="*70)
print("""
Summary:
- Verified FeatureEngineer initialization and configuration
- Verified 24 features across 6 groups per grok-scientific.md
- Tested all feature group computations:
  * F_P: Price dynamics (4 features)
  * F_V: Volume (3 features)
  * F_T: Trend (5 features) including VWAP with RTH reset
  * F_M: Momentum (6 features)
  * F_σ: Volatility (4 features)
  * F_VW + Temporal: Flow (2 features) including MFI and time_gap
- Tested target computation for 6 horizons
- Verified Parquet save/load roundtrip
- Validated Databento configuration
- Checked Phase 1 validation criteria

Ready for Phase 2: Dataset and DataLoader implementation.
""")


DEV PHASE 1 TEST NOTEBOOK COMPLETE

Summary:
- Verified FeatureEngineer initialization and configuration
- Verified 24 features across 6 groups per grok-scientific.md
- Tested all feature group computations:
  * F_P: Price dynamics (4 features)
  * F_V: Volume (3 features)
  * F_T: Trend (5 features) including VWAP with RTH reset
  * F_M: Momentum (6 features)
  * F_σ: Volatility (4 features)
  * F_VW + Temporal: Flow (2 features) including MFI and time_gap
- Tested target computation for 6 horizons
- Verified Parquet save/load roundtrip
- Validated Databento configuration
- Checked Phase 1 validation criteria

Ready for Phase 2: Dataset and DataLoader implementation.



In [26]:
# Cell: Install Dependencies for Data Acquisition
# ================================================
!pip install -q pandas numpy pyarrow scipy databento

In [27]:
# Cell: Acquire Real Data from Databento
# =======================================
# WARNING: This will charge your Databento account (~$100-500)
# Run estimate_cost first to verify budget

acquisition = NQDataAcquisition(
    api_key=DatabentoConfig.API_KEY,
    output_dir=str(DATA_ROOT + '/raw')
)
cost = acquisition.estimate_cost(
    DatabentoConfig.START_DATE,
    DatabentoConfig.END_DATE
)
print(f"Estimated cost: ${cost:.2f}")

Estimated cost: $19.11


In [28]:
# Cell: Download Data (run after confirming cost)
# ===============================================
# Uncomment and run after verifying cost estimate

raw_df = acquisition.download_range(
    start=DatabentoConfig.START_DATE,
    end=DatabentoConfig.END_DATE
)
# ... rest of cell
acquisition.save_parquet(raw_df, "nq_ohlcv_1m_raw.parquet")
print(f"Downloaded {len(raw_df):,} bars")
print(f"Date range: {raw_df['timestamp'].min()} to {raw_df['timestamp'].max()}")

  data = self.client.timeseries.get_range(
  data = self.client.timeseries.get_range(
  data = self.client.timeseries.get_range(
  data = self.client.timeseries.get_range(
  data = self.client.timeseries.get_range(
  data = self.client.timeseries.get_range(
  data = self.client.timeseries.get_range(
  data = self.client.timeseries.get_range(


Downloaded 5,236,084 bars
Date range: 2010-06-07 00:00:00+00:00 to 2025-12-03 23:59:00+00:00


In [30]:
# # Cell: Recover Misplaced Data
# # ============================
# import shutil
# from pathlib import Path

# # Define paths
# project_root = Path('/content/drive/MyDrive/Colab Notebooks/Transformers/FP')
# wrong_path = project_root / 'dataraw' / 'nq_ohlcv_1m_raw.parquet'
# correct_dir = project_root / 'data' / 'raw'
# correct_path = correct_dir / 'nq_ohlcv_1m_raw.parquet'

# # Ensure correct directory exists
# correct_dir.mkdir(parents=True, exist_ok=True)

# # Move file if in wrong location
# if wrong_path.exists():
#     print(f"Moving {wrong_path.stat().st_size / (1024**2):.1f} MB from wrong location...")
#     shutil.move(str(wrong_path), str(correct_path))
#     print(f"✓ File moved to: {correct_path}")

#     # Cleanup
#     try:
#         wrong_path.parent.rmdir()
#         print("✓ Removed 'dataraw' directory")
#     except OSError:
#         pass
# elif correct_path.exists():
#     print(f"✓ File already in correct location: {correct_path}")
# else:
#     print("⚠ File not found - may need to re-download")

Moving 81.8 MB from wrong location...
✓ File moved to: /content/drive/MyDrive/Colab Notebooks/Transformers/FP/data/raw/nq_ohlcv_1m_raw.parquet
✓ Removed 'dataraw' directory


In [31]:
# Cell: Compute and Save Features
# ===============================
from src.data.features import compute_and_save_features
from pathlib import Path

RAW_PATH = Path(PROJECT_ROOT) / 'data' / 'raw' / 'nq_ohlcv_1m_raw.parquet'
PROCESSED_DIR = Path(PROJECT_ROOT) / 'data' / 'processed'
PROCESSED_DIR.mkdir(parents=True, exist_ok=True)

# Compute features and targets (stats saved to processed/ per function API)
features_path, targets_path, stats_path = compute_and_save_features(
    raw_data_path=str(RAW_PATH),
    output_dir=str(PROCESSED_DIR)
)

print(f"\nFeatures saved to: {features_path}")
print(f"Targets saved to: {targets_path}")
print(f"Stats saved to: {stats_path}")


Features saved to: /content/drive/MyDrive/Colab Notebooks/Transformers/FP/data/processed/nq_features_v1.parquet
Targets saved to: /content/drive/MyDrive/Colab Notebooks/Transformers/FP/data/processed/nq_targets_v1.parquet
Stats saved to: /content/drive/MyDrive/Colab Notebooks/Transformers/FP/data/processed/feature_stats.json


In [32]:
# Cell: Inspect Degraded Days Quality (Enhanced)
# ==============================================
"""
Comprehensive quality check for Databento-flagged degraded dates.
Combines completeness metrics with corruption detection.
"""
import pandas as pd
import numpy as np
from pathlib import Path
import json

# Degraded dates from BentoWarnings
degraded_dates = [
    '2017-11-13', '2018-10-21', '2019-01-15', '2019-02-22', '2019-03-13', '2019-03-26',
    '2020-02-27', '2020-02-28', '2020-06-30', '2020-07-01', '2021-12-05', '2022-01-02',
    '2025-09-17', '2025-09-24', '2025-11-28'
]

# Load raw data
raw_path = Path(PROJECT_ROOT) / 'data' / 'raw' / 'nq_ohlcv_1m_raw.parquet'
raw_df = pd.read_parquet(raw_path)
raw_df['timestamp'] = pd.to_datetime(raw_df['timestamp'])
raw_df['date'] = raw_df['timestamp'].dt.date

print("=" * 80)
print("DEGRADED DATES INSPECTION")
print("=" * 80)

# Per-date analysis
stats = []
for date_str in degraded_dates:
    date = pd.to_datetime(date_str).date()
    day_data = raw_df[raw_df['date'] == date]

    if len(day_data) == 0:
        stats.append({
            'date': date_str,
            'bars': 0,
            'completeness_%': 0,
            'zeros': 0,
            'max_return_%': 0,
            'status': 'NO_DATA'
        })
        continue

    # Completeness (vs typical ~1400 bars/day)
    expected = 1400
    completeness = (len(day_data) / expected) * 100

    # Corruption checks (Gemini)
    zeros = (day_data[['open', 'high', 'low', 'close']] == 0).sum().sum()
    returns = day_data['close'].pct_change().dropna()
    max_return = returns.abs().max() * 100 if len(returns) > 0 else 0

    # Status classification
    if zeros > 0 or max_return > 10:
        status = 'CORRUPTED'
    elif completeness < 90:
        status = 'HIGH_IMPACT'
    elif completeness < 95:
        status = 'MINOR_GAPS'
    else:
        status = 'OK'

    stats.append({
        'date': date_str,
        'bars': len(day_data),
        'completeness_%': completeness,
        'zeros': int(zeros),
        'max_return_%': max_return,
        'status': status
    })

stats_df = pd.DataFrame(stats)
print("\nPer-Date Summary:")
print(stats_df.to_string(index=False))

# Overall assessment
corrupted = stats_df[stats_df['status'] == 'CORRUPTED']
high_impact = stats_df[stats_df['status'] == 'HIGH_IMPACT']
print(f"\nSummary:")
print(f"  Corrupted (zeros or >10% jumps): {len(corrupted)}")
print(f"  High-impact (>10% missing bars): {len(high_impact)}")
print(f"  Total degraded bars: {stats_df['bars'].sum():,} of {len(raw_df):,} ({100*stats_df['bars'].sum()/len(raw_df):.3f}%)")

# Feature NaN impact (if features exist)
features_path = Path(PROJECT_ROOT) / 'data' / 'processed' / 'nq_features_v1.parquet'
if features_path.exists():
    from src.data.features import FEATURE_COLUMNS
    features_df = pd.read_parquet(features_path)
    features_df['date'] = pd.to_datetime(features_df['timestamp']).dt.date

    degraded_dates_dt = [pd.to_datetime(d).date() for d in degraded_dates]
    degraded_features = features_df[features_df['date'].isin(degraded_dates_dt)]

    nan_normal = features_df[FEATURE_COLUMNS].isna().mean().mean()
    nan_degraded = degraded_features[FEATURE_COLUMNS].isna().mean().mean()
    print(f"\nFeature NaN Rates:")
    print(f"  Overall: {nan_normal:.2%}")
    print(f"  Degraded days: {nan_degraded:.2%} (delta: +{nan_degraded - nan_normal:.2%})")

# Update stats.json with metadata
stats_path = Path(PROJECT_ROOT) / 'data' / 'processed' / 'feature_stats.json'
if stats_path.exists():
    with open(stats_path, 'r') as f:
        stats_json = json.load(f)

    stats_json['degraded_days'] = {
        'dates': degraded_dates,
        'total_bars': int(stats_df['bars'].sum()),
        'pct_of_dataset': float(100 * stats_df['bars'].sum() / len(raw_df)),
        'corrupted_dates': corrupted['date'].tolist(),
        'high_impact_dates': high_impact['date'].tolist(),
        'avg_completeness': float(stats_df['completeness_%'].mean()),
    }

    with open(stats_path, 'w') as f:
        json.dump(stats_json, f, indent=2)

    print(f"\n✓ Updated {stats_path.name} with degraded_days metadata")

# Recommendations
if len(corrupted) > 0:
    print("\n⚠ ACTION REQUIRED: Corrupted dates found")
    print("  Consider filtering in Phase 2 Dataset or re-downloading")
elif len(high_impact) > 0:
    print("\n⚠ MONITOR: Some dates have significant gaps")
    print("  Phase 2 masking will handle, but monitor training metrics")
else:
    print("\n✓ All degraded days usable - proceed to Phase 2")

DEGRADED DATES INSPECTION

Per-Date Summary:
      date  bars  completeness_%  zeros  max_return_%      status
2017-11-13  1354       96.714286      0      0.087181          OK
2018-10-21   120        8.571429      0      0.098793 HIGH_IMPACT
2019-01-15  1365       97.500000      0      0.273920          OK
2019-02-22  1305       93.214286      0      0.176523  MINOR_GAPS
2019-03-13  1364       97.428571      0      0.177863          OK
2019-03-26  1365       97.500000      0      0.168413          OK
2020-02-27  1365       97.500000      0      0.537009          OK
2020-02-28   959       68.500000      0      0.710632 HIGH_IMPACT
2020-06-30   851       60.785714      0      0.244829 HIGH_IMPACT
2020-07-01  1365       97.500000      0      0.199277          OK
2021-12-05    60        4.285714      0      0.122611 HIGH_IMPACT
2022-01-02    60        4.285714      0      0.056449 HIGH_IMPACT
2025-09-17  1356       96.857143      0      0.184406          OK
2025-09-24  1380       98.57142