# Data Exploration Notebook

This notebook is for exploring tick data and understanding its characteristics.

In [None]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from src.data_prep.tick_loader import load_tick_data
from src.data_prep.bar_aggregator import ticks_to_bars
from src.utils.paths import get_data_dir

%matplotlib inline
sns.set_style('whitegrid')

## 1. Load Tick Data

Replace 'XAUUSD' with your actual symbol name.

In [None]:
# Load tick data
# ticks = load_tick_data('XAUUSD', start_date='2024-01-01', end_date='2024-01-31')

# For demo, create synthetic data
n_ticks = 10000
ticks = pd.DataFrame({
    'timestamp': pd.date_range('2024-01-01 09:30', periods=n_ticks, freq='1s'),
    'price': np.random.randn(n_ticks).cumsum() + 1850,
    'volume': np.random.randint(1, 100, n_ticks),
    'side': np.random.choice(['buy', 'sell'], n_ticks)
})

print(f"Loaded {len(ticks):,} ticks")
ticks.head()

## 2. Basic Statistics

In [None]:
print("Tick Data Summary:")
print(ticks.describe())

print("\nTime Range:")
print(f"Start: {ticks['timestamp'].min()}")
print(f"End: {ticks['timestamp'].max()}")

## 3. Visualize Price and Volume

In [None]:
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(14, 8), sharex=True)

ax1.plot(ticks['timestamp'], ticks['price'], linewidth=0.5)
ax1.set_ylabel('Price')
ax1.set_title('Tick Price Over Time')
ax1.grid(True, alpha=0.3)

ax2.bar(ticks['timestamp'], ticks['volume'], width=0.0001, alpha=0.6)
ax2.set_ylabel('Volume')
ax2.set_xlabel('Time')
ax2.set_title('Tick Volume Over Time')
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 4. Aggregate to Bars

In [None]:
bars = ticks_to_bars(ticks, timeframe='1min', compute_features=True)

print(f"Created {len(bars):,} bars")
bars.head()

## 5. Candlestick Chart (Simple)

In [None]:
# Plot first 100 bars as candlesticks
sample_bars = bars.head(100)

fig, ax = plt.subplots(figsize=(14, 6))

for idx, row in sample_bars.iterrows():
    color = 'green' if row['close'] >= row['open'] else 'red'
    ax.plot([idx, idx], [row['low'], row['high']], color='black', linewidth=0.5)
    ax.plot([idx, idx], [row['open'], row['close']], color=color, linewidth=3)

ax.set_xlabel('Bar Index')
ax.set_ylabel('Price')
ax.set_title('Candlestick Chart (First 100 Bars)')
ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 6. Next Steps

- Explore orderbook proxy features
- Compute manipulation scores
- Analyze anomalies

See `demo_simulation.ipynb` for more examples.