# Data Exploration - Polymarket Markets

This notebook explores the market data and helps understand:
- Market characteristics
- Price distributions
- Volume patterns
- Category differences

In [None]:
# Setup
import sys
sys.path.append('../bot/src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from data.collectors import get_polymarket_client, HistoricalDataLoader
from data import get_db_session, Market

%matplotlib inline
sns.set_style('darkgrid')
plt.rcParams['figure.figsize'] = (12, 6)

## 1. Load Sample Markets

In [None]:
# Get API client
client = get_polymarket_client()

# Fetch some markets
markets = client.get_active_markets(limit=50)

# Convert to DataFrame
df = pd.DataFrame(markets)
print(f"Loaded {len(df)} markets")
df.head()

## 2. Market Statistics

In [None]:
# Summary statistics
print("\nMarket Statistics:")
print("="*50)
print(f"Total markets: {len(df)}")
print(f"\nCategories: {df['category'].value_counts().to_dict()}")
print(f"\nPrice range: {df['current_price'].min():.3f} - {df['current_price'].max():.3f}")
print(f"Average price: {df['current_price'].mean():.3f}")
print(f"\nVolume range: ${df['volume_24h'].min():.0f} - ${df['volume_24h'].max():.0f}")
print(f"Average volume: ${df['volume_24h'].mean():.0f}")

## 3. Price Distribution

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Price histogram
axes[0].hist(df['current_price'], bins=20, edgecolor='black')
axes[0].set_xlabel('Price')
axes[0].set_ylabel('Frequency')
axes[0].set_title('Price Distribution')
axes[0].axvline(0.5, color='red', linestyle='--', label='50% (coin flip)')
axes[0].legend()

# Price by category
df.boxplot(column='current_price', by='category', ax=axes[1])
axes[1].set_xlabel('Category')
axes[1].set_ylabel('Price')
axes[1].set_title('Price by Category')
plt.suptitle('')

plt.tight_layout()
plt.show()

## 4. Volume Analysis

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Volume distribution (log scale)
axes[0].hist(np.log10(df['volume_24h']), bins=20, edgecolor='black')
axes[0].set_xlabel('Log10(Volume)')
axes[0].set_ylabel('Frequency')
axes[0].set_title('Volume Distribution (Log Scale)')

# Volume by category
category_volume = df.groupby('category')['volume_24h'].sum().sort_values()
category_volume.plot(kind='barh', ax=axes[1])
axes[1].set_xlabel('Total 24h Volume ($)')
axes[1].set_title('Volume by Category')

plt.tight_layout()
plt.show()

## 5. Liquidity Analysis

In [None]:
# Volume vs Liquidity scatter
plt.figure(figsize=(10, 6))
plt.scatter(df['liquidity'], df['volume_24h'], alpha=0.6)
plt.xlabel('Liquidity ($)')
plt.ylabel('24h Volume ($)')
plt.title('Volume vs Liquidity')
plt.xscale('log')
plt.yscale('log')
plt.grid(True, alpha=0.3)
plt.show()

# Correlation
corr = df[['current_price', 'volume_24h', 'liquidity']].corr()
print("\nCorrelation Matrix:")
print(corr)

## 6. Sample Market Deep Dive

In [None]:
# Pick a random market
sample_market = df.sample(1).iloc[0]
market_id = sample_market['id']

print(f"Market ID: {market_id}")
print(f"Question: {sample_market['question']}")
print(f"Category: {sample_market['category']}")
print(f"Current Price: {sample_market['current_price']:.3f}")
print(f"Volume: ${sample_market['volume_24h']:.0f}")
print(f"Liquidity: ${sample_market['liquidity']:.0f}")

## 7. Price History (Sample)

In [None]:
from datetime import datetime, timedelta

# Get price history for sample market
history = client.get_market_history(
    market_id=market_id,
    start_time=datetime.now() - timedelta(days=7),
    end_time=datetime.now()
)

# Convert to DataFrame
history_df = pd.DataFrame(history)
history_df['timestamp'] = pd.to_datetime(history_df['timestamp'])

# Plot
plt.figure(figsize=(12, 6))
plt.plot(history_df['timestamp'], history_df['price'], linewidth=2)
plt.xlabel('Time')
plt.ylabel('Price')
plt.title(f'Price History - {sample_market["question"][:50]}...')
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Volatility
history_df['returns'] = history_df['price'].pct_change()
volatility = history_df['returns'].std()
print(f"\n7-day volatility: {volatility:.4f}")

## Key Insights

**From this exploration, we learned:**

1. **Price Distribution**: Most markets trade between 0.3-0.7, avoiding extreme probabilities
2. **Volume Varies**: Wide range from $1k to $100k+ daily volume
3. **Category Differences**: Some categories more active than others
4. **Liquidity Matters**: Higher liquidity markets have more volume
5. **Price Volatility**: Markets show varying levels of price movement

**Implications for Trading:**
- Focus on liquid markets (>$10k volume)
- Avoid extreme prices (close to 0 or 1)
- Consider category-specific strategies
- Monitor volatility for risk management