# 01 - Data Exploration

Load all instruments, check data quality, verify resampling, display charts.

In [None]:
import sys
sys.path.insert(0, '..')

from data.loader import load_instrument, validate_dataframe, detect_gaps, get_data_stats
from data.resampler import resample, resample_all
from visualization.chart import candlestick_chart, multi_timeframe_chart
from config import load_config

cfg = load_config('../config.yaml')
print(f'Config loaded: symbol={cfg.data.symbol}, timeframes={cfg.data.timeframes}')

## Load All Instruments

In [None]:
instruments = {}
for symbol, info in cfg.data.instruments.items():
    try:
        df = load_instrument(symbol, '../' + cfg.data.optimized_path, info.file)
        instruments[symbol] = df
        stats = get_data_stats(df)
        issues = validate_dataframe(df)
        print(f'\n{symbol} ({info.source}):')
        print(f'  Rows: {stats["rows"]:,}')
        print(f'  Range: {stats["start"]} to {stats["end"]}')
        print(f'  Days: {stats["duration_days"]}')
        print(f'  Price: {stats["price_min"]:.2f} - {stats["price_max"]:.2f}')
        print(f'  Issues: {issues if issues else "None"}')
    except Exception as e:
        print(f'{symbol}: FAILED - {e}')

## Gap Analysis

In [None]:
for symbol, df in instruments.items():
    gaps = detect_gaps(df)
    if len(gaps) > 0:
        print(f'\n{symbol}: {len(gaps)} gaps found')
        print(f'  Largest gap: {gaps["gap_minutes"].max():.0f} minutes')
        print(f'  Gaps > 1 day: {(gaps["gap_minutes"] > 1440).sum()}')
        # Show top 5 largest gaps
        print(gaps.nlargest(5, 'gap_minutes').to_string(index=False))
    else:
        print(f'{symbol}: No gaps detected')

## NAS100 Candlestick Chart (Primary Instrument)

In [None]:
nas = instruments.get('NAS100')
if nas is not None:
    # Show last 2 weeks of 1m data
    fig = candlestick_chart(
        nas,
        title='NAS100 1m',
        range_start='2025-01-20',
        range_end='2025-02-01',
    )
    fig.show()

## Resampling Verification

In [None]:
if nas is not None:
    # Resample to all timeframes
    tfs = resample_all(nas, ['5m', '15m', '30m', '1H', '4H', '1D'])
    for tf, df in tfs.items():
        print(f'{tf}: {len(df):,} candles')

In [None]:
if nas is not None:
    # Multi-TF view: 1H, 4H, 1D for the same month
    mtf_data = {}
    for tf in ['1H', '4H', '1D']:
        df = tfs[tf]
        mask = (df['time'] >= '2025-01-01') & (df['time'] <= '2025-02-01')
        mtf_data[tf] = df[mask]
    
    fig = multi_timeframe_chart(mtf_data, symbol='NAS100')
    fig.show()

## All Instruments: 1D Charts

In [None]:
for symbol, df in instruments.items():
    daily = resample(df, '1D')
    fig = candlestick_chart(daily, title=f'{symbol} Daily', show_volume=False, height=400)
    fig.show()