# Data Exploration - Crypto Markets

This notebook explores real crypto market data and helps understand:
- Market characteristics
- Price distributions
- Volume patterns
- Symbol differences

In [None]:
# Setup
import sys
sys.path.append('../bot/src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from data import get_db_session, CryptoCandle

%matplotlib inline
sns.set_style('darkgrid')
plt.rcParams['figure.figsize'] = (12, 6)

# Load real OHLCV candles from DB
session = get_db_session()
rows = session.query(CryptoCandle).limit(5000).all()
session.close()

if not rows:
    raise RuntimeError("No real crypto candles in DB. Run collect-data first.")

df = pd.DataFrame(
    [
        {
            "symbol": r.symbol,
            "timestamp": r.timestamp,
            "open": r.open,
            "high": r.high,
            "low": r.low,
            "close": r.close,
            "volume": r.volume,
        }
        for r in rows
    ]
)

df['timestamp'] = pd.to_datetime(df['timestamp'])

## 1. Load Sample Markets

In [None]:
# Show sample of loaded candles
print(f"Loaded {len(df)} candle rows")
df.head()

## 2. Market Statistics

In [None]:
# Summary statistics
print("\nCandle Statistics:")
print("="*50)
print(f"Total candles: {len(df)}")
print(f"\nSymbols: {df['symbol'].nunique()}")
print(f"Top symbols: {df['symbol'].value_counts().head(5).to_dict()}")
print(f"\nPrice range: {df['close'].min():.3f} - {df['close'].max():.3f}")
print(f"Average price: {df['close'].mean():.3f}")
print(f"\nVolume range: ${df['volume'].min():.0f} - ${df['volume'].max():.0f}")
print(f"Average volume: ${df['volume'].mean():.0f}")

## 3. Price Distribution

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Price histogram
axes[0].hist(df['close'], bins=30, edgecolor='black')
axes[0].set_xlabel('Price')
axes[0].set_ylabel('Frequency')
axes[0].set_title('Price Distribution')

# Price by top symbols
top_symbols = df['symbol'].value_counts().head(5).index
df[df['symbol'].isin(top_symbols)].boxplot(column='close', by='symbol', ax=axes[1])
axes[1].set_xlabel('Symbol')
axes[1].set_ylabel('Price')
axes[1].set_title('Price by Top Symbols')
plt.suptitle('')

plt.tight_layout()
plt.show()

## 4. Volume Analysis

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Volume distribution (log scale)
axes[0].hist(np.log10(df['volume'].clip(lower=1)), bins=30, edgecolor='black')
axes[0].set_xlabel('Log10(Volume)')
axes[0].set_ylabel('Frequency')
axes[0].set_title('Volume Distribution (Log Scale)')

# Volume by top symbols
symbol_volume = df.groupby('symbol')['volume'].sum().sort_values().tail(10)
symbol_volume.plot(kind='barh', ax=axes[1])
axes[1].set_xlabel('Total Volume')
axes[1].set_title('Volume by Top Symbols')

plt.tight_layout()
plt.show()

## 5. Liquidity Analysis

In [None]:
# Volume vs Price scatter
plt.figure(figsize=(10, 6))
plt.scatter(df['close'], df['volume'], alpha=0.6)
plt.xlabel('Price')
plt.ylabel('Volume')
plt.title('Volume vs Price')
plt.yscale('log')
plt.grid(True, alpha=0.3)
plt.show()

# Correlation
corr = df[['close', 'volume']].corr()
print("\nCorrelation Matrix:")
print(corr)

## 6. Sample Market Deep Dive

In [None]:
# Pick a random symbol
sample = df.sample(1).iloc[0]
symbol = sample['symbol']

print(f"Symbol: {symbol}")
print(f"Timestamp: {sample['timestamp']}")
print(f"Open: {sample['open']:.4f}")
print(f"High: {sample['high']:.4f}")
print(f"Low: {sample['low']:.4f}")
print(f"Close: {sample['close']:.4f}")
print(f"Volume: {sample['volume']:.2f}")

## 7. Price History (Sample)

In [None]:
# Price history for sample symbol
history_df = df[df['symbol'] == symbol].sort_values('timestamp')

# Plot
plt.figure(figsize=(12, 6))
plt.plot(history_df['timestamp'], history_df['close'], linewidth=2)
plt.xlabel('Time')
plt.ylabel('Price')
plt.title(f'Price History - {symbol}')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Volatility
history_df['returns'] = history_df['close'].pct_change()
volatility = history_df['returns'].std()
print(f"\nVolatility: {volatility:.4f}")

## Key Insights

**From this exploration, we learned:**

1. **Price Distribution**: Crypto prices span a wide range across symbols
2. **Volume Varies**: Some symbols have much higher activity than others
3. **Symbol Differences**: Each symbol has distinct volatility profiles
4. **Volume vs Price**: Higher prices do not always imply higher volume

**Implications for Trading:**
- Focus on liquid symbols with consistent volume
- Monitor volatility for risk management
- Compare behavior across symbols to find stable patterns