# Poolfish Data Exploration

This notebook explores what data Poolfish API provides and how far back historical data is available.

**Poolfish** is a proxy to The Graph's Uniswap V3 subgraphs.

## Data Types to Explore
1. **Pool Day Data** - Daily aggregated pool metrics (TVL, volume, fees, OHLCV)
2. **Pool Hour Data** - Hourly aggregated pool metrics
3. **Tick Data** - Current liquidity distribution across ticks
4. **Pool Info** - Basic pool configuration and current state
5. **Token Data** - Token metadata (decimals, symbol)

In [1]:
import requests
import pandas as pd
import json
import time
from datetime import datetime, timedelta
from typing import Dict, List, Optional

# Poolfish API Configuration
PROXY_URL = "https://poolfish.xyz/api/subgraph"

# Browser-like headers (required for Poolfish)
HEADERS = {
    'accept': 'application/json, text/plain, */*',
    'accept-language': 'en-US,en;q=0.9',
    'content-type': 'application/json',
    'origin': 'https://poolfish.xyz',
    'referer': 'https://poolfish.xyz/',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}

# Test Pool: WETH/USDC 0.05% on Arbitrum
POOL_ADDRESS = "0xc6962004f452be9203591991d15f6b388e09e8d0"
# POOL_ADDRESS = "0x641c00a822e8b671738d32a431a4fb6074e5c79d"
CHAIN_ID = 42161  # Arbitrum
DEX_KEY = "uniswap"

def query_poolfish(query: str, chain_id: int = CHAIN_ID) -> Dict:
    """Query Poolfish API."""
    payload = {
        "query": query,
        "dexKey": DEX_KEY,
        "chainId": chain_id
    }
    
    time.sleep(0.5)  # Rate limiting
    
    try:
        response = requests.post(PROXY_URL, json=payload, headers=HEADERS, timeout=30)
        response.raise_for_status()
        data = response.json()
        
        if "errors" in data:
            print(f"GraphQL errors: {data['errors']}")
            return {}
            
        return data.get("data", {})
    except Exception as e:
        print(f"Error: {e}")
        return {}

print("‚úÖ Poolfish query function ready")
print(f"Target Pool: {POOL_ADDRESS}")
print(f"Chain: Arbitrum (ID: {CHAIN_ID})")

‚úÖ Poolfish query function ready
Target Pool: 0xc6962004f452be9203591991d15f6b388e09e8d0
Chain: Arbitrum (ID: 42161)


## 1. Pool Basic Info

Get current pool state and configuration.

In [2]:
# Query pool basic info
query = f"""
{{
    pool(id: "{POOL_ADDRESS.lower()}") {{
        id
        feeTier
        liquidity
        sqrtPrice
        tick
        token0 {{
            id
            symbol
            decimals
        }}
        token1 {{
            id
            symbol
            decimals
        }}
        totalValueLockedUSD
        volumeUSD
        feesUSD
        txCount
        createdAtTimestamp
        createdAtBlockNumber
    }}
}}
"""

pool_info = query_poolfish(query)

if pool_info and "pool" in pool_info:
    pool = pool_info["pool"]
    print("=" * 60)
    print("POOL BASIC INFO")
    print("=" * 60)
    print(f"Pool ID: {pool['id']}")
    print(f"Pair: {pool['token0']['symbol']}/{pool['token1']['symbol']}")
    print(f"Fee Tier: {int(pool['feeTier'])/10000}%")
    print(f"Current Tick: {pool['tick']}")
    print(f"Total Value Locked: ${float(pool['totalValueLockedUSD']):,.2f}")
    print(f"Total Volume: ${float(pool['volumeUSD']):,.2f}")
    print(f"Total Fees: ${float(pool['feesUSD']):,.2f}")
    print(f"Transaction Count: {pool['txCount']}")
    
    # Pool creation date
    created_ts = int(pool['createdAtTimestamp'])
    created_date = datetime.utcfromtimestamp(created_ts)
    print(f"\nPool Created: {created_date.strftime('%Y-%m-%d %H:%M:%S')} UTC")
    print(f"Pool Age: {(datetime.utcnow() - created_date).days} days")
else:
    print("‚ùå Failed to fetch pool info")

POOL BASIC INFO
Pool ID: 0xc6962004f452be9203591991d15f6b388e09e8d0
Pair: WETH/USDC
Fee Tier: 0.05%
Current Tick: -195569
Total Value Locked: $149,971,541.10
Total Volume: $130,443,814,956.61
Total Fees: $65,221,907.48
Transaction Count: 32364519

Pool Created: 2023-06-08 21:28:24 UTC
Pool Age: 955 days


## 2. Pool Day Data - Historical Availability

Check how far back daily pool data is available.

In [3]:
def fetch_pool_day_data(pool_address: str, num_days: int = 1000, skip: int = 0) -> List[Dict]:
    """Fetch pool day data with pagination."""
    query = f"""
    {{
        poolDayDatas(
            first: {num_days},
            skip: {skip},
            orderBy: date,
            orderDirection: asc,
            where: {{pool: "{pool_address.lower()}"}}
        ) {{
            date
            volumeUSD
            tvlUSD
            feesUSD
            open
            high
            low
            close
            liquidity
            sqrtPrice
            tick
            # feeGrowthGlobal0X128
            # feeGrowthGlobal1X128
        }}
    }}
    """
    result = query_poolfish(query)
    return result.get("poolDayDatas", [])

# Fetch ALL available pool day data
print("Fetching all available pool day data...")
all_day_data = []
skip = 0
batch_size = 1000

while True:
    batch = fetch_pool_day_data(POOL_ADDRESS, num_days=batch_size, skip=skip)
    if not batch:
        break
    all_day_data.extend(batch)
    print(f"  Fetched {len(all_day_data)} days so far...")
    if len(batch) < batch_size:
        break
    skip += batch_size
    time.sleep(0.5)

print(f"\n‚úÖ Total pool day data records: {len(all_day_data)}")

Fetching all available pool day data...
  Fetched 957 days so far...

‚úÖ Total pool day data records: 957


In [4]:
# Analyze pool day data
if all_day_data:
    df_day = pd.DataFrame(all_day_data)
    df_day['date_dt'] = pd.to_datetime(df_day['date'].astype(int), unit='s')
    df_day = df_day.sort_values('date_dt')
    
    print("=" * 60)
    print("POOL DAY DATA AVAILABILITY")
    print("=" * 60)
    print(f"Total Records: {len(df_day)}")
    print(f"Earliest Date: {df_day['date_dt'].min().strftime('%Y-%m-%d')}")
    print(f"Latest Date: {df_day['date_dt'].max().strftime('%Y-%m-%d')}")
    
    date_range = (df_day['date_dt'].max() - df_day['date_dt'].min()).days
    print(f"Date Range: {date_range} days")
    
    # Check for gaps
    df_day['date_only'] = df_day['date_dt'].dt.date
    expected_dates = pd.date_range(df_day['date_dt'].min(), df_day['date_dt'].max(), freq='D')
    missing_dates = set(expected_dates.date) - set(df_day['date_only'])
    print(f"Missing Days: {len(missing_dates)}")
    
    print("\n" + "=" * 60)
    print("AVAILABLE FIELDS IN POOL DAY DATA")
    print("=" * 60)
    for col in df_day.columns:
        if col not in ['date_dt', 'date_only']:
            print(f"  ‚Ä¢ {col}")
    
    # Show sample
    print("\n" + "=" * 60)
    print("SAMPLE DATA (First 5 rows)")
    print("=" * 60)
    display(df_day[['date_dt', 'volumeUSD', 'tvlUSD', 'feesUSD', 'open', 'close']].head())
else:
    print("‚ùå No pool day data available")

POOL DAY DATA AVAILABILITY
Total Records: 957
Earliest Date: 2023-06-08
Latest Date: 2026-01-19
Date Range: 956 days
Missing Days: 0

AVAILABLE FIELDS IN POOL DAY DATA
  ‚Ä¢ close
  ‚Ä¢ date
  ‚Ä¢ feesUSD
  ‚Ä¢ high
  ‚Ä¢ liquidity
  ‚Ä¢ low
  ‚Ä¢ open
  ‚Ä¢ sqrtPrice
  ‚Ä¢ tick
  ‚Ä¢ tvlUSD
  ‚Ä¢ volumeUSD

SAMPLE DATA (First 5 rows)


Unnamed: 0,date_dt,volumeUSD,tvlUSD,feesUSD,open,close
0,2023-06-08,0.0,0.0,0.0,0.0,0.0
1,2023-06-09,1263302.0488517396,252196.4656610237,631.6510244258698,0.0,0.0
2,2023-06-10,6869485.622820175,2734375.1516384063,3434.7428114100876,0.0005443342293808,0.0005443342293808
3,2023-06-11,4288410.50498877,3642804.572333049,2144.205252494385,0.0005710979306765,0.0005710979306765
4,2023-06-12,10299397.81642859,4776767.765053861,5149.698908214296,0.0005708114618404,0.0005708114618404


## 3. Pool Hour Data - Historical Availability

Check if hourly data is available and how far back.

In [5]:
def fetch_pool_hour_data(pool_address: str, num_hours: int = 1000, skip: int = 0) -> List[Dict]:
    """Fetch pool hour data with pagination."""
    query = f"""
    {{
        poolHourDatas(
            first: {num_hours},
            skip: {skip},
            orderBy: periodStartUnix,
            orderDirection: asc,
            where: {{pool: "{pool_address.lower()}"}}
        ) {{
            periodStartUnix
            volumeUSD
            tvlUSD
            feesUSD
            open
            high
            low
            close
            liquidity
            sqrtPrice
            tick
        }}
    }}
    """
    result = query_poolfish(query)
    return result.get("poolHourDatas", [])

# Fetch sample of hour data
print("Fetching pool hour data (sample)...")
hour_data_sample = fetch_pool_hour_data(POOL_ADDRESS, num_hours=100)

if hour_data_sample:
    print(f"‚úÖ Pool hour data IS available! (got {len(hour_data_sample)} records)")
    
    # Now fetch all to check range
    print("\nFetching all available pool hour data...")
    all_hour_data = []
    skip = 0
    batch_size = 1000
    max_records = 10000  # Limit to avoid too many requests
    
    while len(all_hour_data) < max_records:
        batch = fetch_pool_hour_data(POOL_ADDRESS, num_hours=batch_size, skip=skip)
        if not batch:
            break
        all_hour_data.extend(batch)
        print(f"  Fetched {len(all_hour_data)} hours so far...")
        if len(batch) < batch_size:
            break
        skip += batch_size
        time.sleep(0.5)
    
    print(f"\n‚úÖ Total pool hour data records: {len(all_hour_data)}")
else:
    print("‚ùå Pool hour data NOT available")
    all_hour_data = []

Fetching pool hour data (sample)...
‚úÖ Pool hour data IS available! (got 100 records)

Fetching all available pool hour data...
  Fetched 1000 hours so far...
  Fetched 2000 hours so far...
  Fetched 3000 hours so far...
  Fetched 4000 hours so far...
  Fetched 5000 hours so far...
  Fetched 6000 hours so far...
  Fetched 7000 hours so far...
  Fetched 8000 hours so far...
  Fetched 9000 hours so far...
  Fetched 10000 hours so far...

‚úÖ Total pool hour data records: 10000


In [6]:
# Analyze pool hour data
if all_hour_data:
    df_hour = pd.DataFrame(all_hour_data)
    df_hour['datetime'] = pd.to_datetime(df_hour['periodStartUnix'].astype(int), unit='s')
    df_hour = df_hour.sort_values('datetime')
    
    print("=" * 60)
    print("POOL HOUR DATA AVAILABILITY")
    print("=" * 60)
    print(f"Total Records: {len(df_hour)}")
    print(f"Earliest: {df_hour['datetime'].min().strftime('%Y-%m-%d %H:%M')}")
    print(f"Latest: {df_hour['datetime'].max().strftime('%Y-%m-%d %H:%M')}")
    
    hour_range = (df_hour['datetime'].max() - df_hour['datetime'].min())
    print(f"Time Range: {hour_range.days} days, {hour_range.seconds//3600} hours")
    
    print("\n" + "=" * 60)
    print("AVAILABLE FIELDS IN POOL HOUR DATA")
    print("=" * 60)
    for col in df_hour.columns:
        if col != 'datetime':
            print(f"  ‚Ä¢ {col}")
    
    # Show sample
    print("\n" + "=" * 60)
    print("SAMPLE DATA (First 5 rows)")
    print("=" * 60)
    display(df_hour[['datetime', 'volumeUSD', 'tvlUSD', 'feesUSD', 'open', 'close']].head())

POOL HOUR DATA AVAILABILITY
Total Records: 10000
Earliest: 2023-06-08 21:00
Latest: 2024-07-29 15:00
Time Range: 416 days, 18 hours

AVAILABLE FIELDS IN POOL HOUR DATA
  ‚Ä¢ close
  ‚Ä¢ feesUSD
  ‚Ä¢ high
  ‚Ä¢ liquidity
  ‚Ä¢ low
  ‚Ä¢ open
  ‚Ä¢ periodStartUnix
  ‚Ä¢ sqrtPrice
  ‚Ä¢ tick
  ‚Ä¢ tvlUSD
  ‚Ä¢ volumeUSD

SAMPLE DATA (First 5 rows)


Unnamed: 0,datetime,volumeUSD,tvlUSD,feesUSD,open,close
0,2023-06-08 21:00:00,0,0.0,0,0.0,0.0
1,2023-06-09 00:00:00,0,185.37384028795745,0,0.0,0.0005455121823431
2,2023-06-09 01:00:00,0,396.1501087164663,0,0.0005455121823431,0.0005427202814682
3,2023-06-09 02:00:00,0,780.9011180550573,0,0.0005427202814682,0.0005464995540763
4,2023-06-09 03:00:00,0,913.7334295794537,0,0.0005464995540763,0.0005469796898695


## 4. Tick Data

Check tick liquidity distribution data.

In [7]:
def fetch_ticks(pool_address: str, num_ticks: int = 1000, skip: int = 0) -> List[Dict]:
    """Fetch tick data."""
    query = f"""
    {{
        ticks(
            first: {num_ticks},
            skip: {skip},
            where: {{poolAddress: "{pool_address.lower()}"}},
            orderBy: tickIdx
        ) {{
            tickIdx
            liquidityNet
            liquidityGross
            price0
            price1
            # feeGrowthOutside0X128
            # feeGrowthOutside1X128
        }}
    }}
    """
    result = query_poolfish(query)
    return result.get("ticks", [])

# Fetch all ticks
print("Fetching tick data...")
all_ticks = []
skip = 0
batch_size = 1000

while True:
    batch = fetch_ticks(POOL_ADDRESS, num_ticks=batch_size, skip=skip)
    if not batch:
        break
    all_ticks.extend(batch)
    print(f"  Fetched {len(all_ticks)} ticks so far...")
    if len(batch) < batch_size:
        break
    skip += batch_size
    time.sleep(0.3)

print(f"\n‚úÖ Total tick records: {len(all_ticks)}")

Fetching tick data...
  Fetched 1000 ticks so far...
  Fetched 2000 ticks so far...
  Fetched 3000 ticks so far...
  Fetched 4000 ticks so far...
  Fetched 5000 ticks so far...
  Fetched 5022 ticks so far...

‚úÖ Total tick records: 5022


In [8]:
# Analyze tick data
if all_ticks:
    df_ticks = pd.DataFrame(all_ticks)
    df_ticks['tickIdx'] = df_ticks['tickIdx'].astype(int)
    df_ticks['liquidityNet'] = df_ticks['liquidityNet'].astype(float)
    df_ticks = df_ticks.sort_values('tickIdx')
    
    print("=" * 60)
    print("TICK DATA INFO")
    print("=" * 60)
    print(f"Total Initialized Ticks: {len(df_ticks)}")
    print(f"Tick Range: {df_ticks['tickIdx'].min()} to {df_ticks['tickIdx'].max()}")
    
    # Ticks with positive/negative liquidity
    positive_liq = (df_ticks['liquidityNet'] > 0).sum()
    negative_liq = (df_ticks['liquidityNet'] < 0).sum()
    print(f"Ticks with positive liquidityNet: {positive_liq}")
    print(f"Ticks with negative liquidityNet: {negative_liq}")
    
    print("\n" + "=" * 60)
    print("AVAILABLE FIELDS IN TICK DATA")
    print("=" * 60)
    for col in df_ticks.columns:
        print(f"  ‚Ä¢ {col}")
    
    print("\n" + "=" * 60)
    print("NOTE: Tick data is a SNAPSHOT of current state")
    print("Historical tick data is NOT available via this API")
    print("=" * 60)
else:
    print("‚ùå No tick data available")

TICK DATA INFO
Total Initialized Ticks: 5022
Tick Range: -887270 to 887270
Ticks with positive liquidityNet: 969
Ticks with negative liquidityNet: 1028

AVAILABLE FIELDS IN TICK DATA
  ‚Ä¢ liquidityGross
  ‚Ä¢ liquidityNet
  ‚Ä¢ price0
  ‚Ä¢ price1
  ‚Ä¢ tickIdx

NOTE: Tick data is a SNAPSHOT of current state
Historical tick data is NOT available via this API


## 5. Other Available Entities

Explore other data entities available in the subgraph.

In [9]:
# Check for swap events
query = f"""
{{
    swaps(
        first: 10,
        orderBy: timestamp,
        orderDirection: desc,
        where: {{pool: "{POOL_ADDRESS.lower()}"}}
    ) {{
        id
        timestamp
        amount0
        amount1
        amountUSD
        sqrtPriceX96
        tick
    }}
}}
"""

swaps = query_poolfish(query)

print("=" * 60)
print("SWAP EVENTS")
print("=" * 60)

if swaps and "swaps" in swaps and swaps["swaps"]:
    print(f"‚úÖ Swap events ARE available!")
    print(f"Sample: {len(swaps['swaps'])} recent swaps")
    
    # Show sample
    df_swaps = pd.DataFrame(swaps['swaps'])
    df_swaps['datetime'] = pd.to_datetime(df_swaps['timestamp'].astype(int), unit='s')
    display(df_swaps[['datetime', 'amount0', 'amount1', 'amountUSD']].head())
else:
    print("‚ùå Swap events NOT available or error occurred")

SWAP EVENTS
‚úÖ Swap events ARE available!
Sample: 10 recent swaps


Unnamed: 0,datetime,amount0,amount1,amountUSD
0,2026-01-19 16:15:23,-0.0841277899014825,270.482422,270.3414026562332
1,2026-01-19 16:15:22,0.1260226151783691,-404.775074,404.9688051192538
2,2026-01-19 16:15:05,-0.1555143273177752,500.0,499.7392826967281
3,2026-01-19 16:14:54,-0.0021772026865175,7.0,6.9963567190974985
4,2026-01-19 16:14:17,-1.3130458107254952,4221.585179,4219.421984567857


In [10]:
# Check for mint/burn events
query_mints = f"""
{{
    mints(
        first: 5,
        orderBy: timestamp,
        orderDirection: desc,
        where: {{pool: "{POOL_ADDRESS.lower()}"}}
    ) {{
        id
        timestamp
        tickLower
        tickUpper
        amount
        amount0
        amount1
        amountUSD
    }}
}}
"""

query_burns = f"""
{{
    burns(
        first: 5,
        orderBy: timestamp,
        orderDirection: desc,
        where: {{pool: "{POOL_ADDRESS.lower()}"}}
    ) {{
        id
        timestamp
        tickLower
        tickUpper
        amount
        amount0
        amount1
        amountUSD
    }}
}}
"""

mints = query_poolfish(query_mints)
burns = query_poolfish(query_burns)

print("=" * 60)
print("MINT/BURN EVENTS")
print("=" * 60)

if mints and "mints" in mints and mints["mints"]:
    print(f"‚úÖ Mint events ARE available!")
else:
    print("‚ùå Mint events NOT available")

if burns and "burns" in burns and burns["burns"]:
    print(f"‚úÖ Burn events ARE available!")
else:
    print("‚ùå Burn events NOT available")

MINT/BURN EVENTS
‚úÖ Mint events ARE available!
‚úÖ Burn events ARE available!


## 6. Summary: What Poolfish Provides

In [11]:
print("\n" + "=" * 70)
print("                    POOLFISH DATA AVAILABILITY SUMMARY")
print("=" * 70)

# Pool Day Data summary
if all_day_data:
    df_day = pd.DataFrame(all_day_data)
    df_day['date_dt'] = pd.to_datetime(df_day['date'].astype(int), unit='s')
    earliest_day = df_day['date_dt'].min()
    latest_day = df_day['date_dt'].max()
    day_range = (latest_day - earliest_day).days
    
    print(f"\nüìÖ POOL DAY DATA:")
    print(f"   ‚Ä¢ Available: ‚úÖ YES")
    print(f"   ‚Ä¢ Records: {len(df_day)}")
    print(f"   ‚Ä¢ Date Range: {earliest_day.strftime('%Y-%m-%d')} to {latest_day.strftime('%Y-%m-%d')}")
    print(f"   ‚Ä¢ History: ~{day_range} days ({day_range/30:.1f} months)")
else:
    print(f"\nüìÖ POOL DAY DATA: ‚ùå NOT AVAILABLE")

# Pool Hour Data summary
if all_hour_data:
    df_hour = pd.DataFrame(all_hour_data)
    df_hour['datetime'] = pd.to_datetime(df_hour['periodStartUnix'].astype(int), unit='s')
    earliest_hour = df_hour['datetime'].min()
    latest_hour = df_hour['datetime'].max()
    hour_range_days = (latest_hour - earliest_hour).days
    
    print(f"\n‚è∞ POOL HOUR DATA:")
    print(f"   ‚Ä¢ Available: ‚úÖ YES")
    print(f"   ‚Ä¢ Records: {len(df_hour)}")
    print(f"   ‚Ä¢ Date Range: {earliest_hour.strftime('%Y-%m-%d %H:%M')} to {latest_hour.strftime('%Y-%m-%d %H:%M')}")
    print(f"   ‚Ä¢ History: ~{hour_range_days} days ({hour_range_days/30:.1f} months)")
else:
    print(f"\n‚è∞ POOL HOUR DATA: ‚ùå NOT AVAILABLE (or limited)")

# Tick Data summary
print(f"\nüìä TICK DATA:")
if all_ticks:
    print(f"   ‚Ä¢ Available: ‚úÖ YES (current snapshot only)")
    print(f"   ‚Ä¢ Records: {len(all_ticks)} initialized ticks")
    print(f"   ‚Ä¢ Note: This is CURRENT state, no historical tick data")
else:
    print(f"   ‚Ä¢ Available: ‚ùå NOT AVAILABLE")

# Events summary
print(f"\nüîÑ SWAP EVENTS:")
if swaps and "swaps" in swaps and swaps["swaps"]:
    print(f"   ‚Ä¢ Available: ‚úÖ YES")
    print(f"   ‚Ä¢ Note: Individual swap transactions available")
else:
    print(f"   ‚Ä¢ Available: ‚ùå NOT AVAILABLE")

print(f"\nüè¶ MINT/BURN EVENTS:")
if (mints and "mints" in mints and mints["mints"]) or (burns and "burns" in burns and burns["burns"]):
    print(f"   ‚Ä¢ Available: ‚úÖ YES")
    print(f"   ‚Ä¢ Note: LP position mint/burn transactions available")
else:
    print(f"   ‚Ä¢ Available: ‚ùå NOT AVAILABLE")

print("\n" + "=" * 70)
print("                         KEY TAKEAWAYS")
print("=" * 70)
print("""
1. Pool Day Data: Best for training - ~1 year of daily aggregates
   Fields: TVL, volume, fees, OHLCV prices, liquidity, tick

2. Pool Hour Data: Higher granularity if available
   Same fields as daily but hourly resolution

3. Tick Data: Current liquidity distribution SNAPSHOT only
   No historical tick data available

4. Swap/Mint/Burn Events: Individual transactions available
   Similar to Dune data but via GraphQL

5. For RL Training: Pool Day Data + Binance hourly prices is sufficient
   - Binance: Accurate hourly OHLCV prices
   - Poolfish: Daily TVL, volume, fees for reward calculation
""")


                    POOLFISH DATA AVAILABILITY SUMMARY

üìÖ POOL DAY DATA:
   ‚Ä¢ Available: ‚úÖ YES
   ‚Ä¢ Records: 957
   ‚Ä¢ Date Range: 2023-06-08 to 2026-01-19
   ‚Ä¢ History: ~956 days (31.9 months)

‚è∞ POOL HOUR DATA:
   ‚Ä¢ Available: ‚úÖ YES
   ‚Ä¢ Records: 10000
   ‚Ä¢ Date Range: 2023-06-08 21:00 to 2024-07-29 15:00
   ‚Ä¢ History: ~416 days (13.9 months)

üìä TICK DATA:
   ‚Ä¢ Available: ‚úÖ YES (current snapshot only)
   ‚Ä¢ Records: 5022 initialized ticks
   ‚Ä¢ Note: This is CURRENT state, no historical tick data

üîÑ SWAP EVENTS:
   ‚Ä¢ Available: ‚úÖ YES
   ‚Ä¢ Note: Individual swap transactions available

üè¶ MINT/BURN EVENTS:
   ‚Ä¢ Available: ‚úÖ YES
   ‚Ä¢ Note: LP position mint/burn transactions available

                         KEY TAKEAWAYS

1. Pool Day Data: Best for training - ~1 year of daily aggregates
   Fields: TVL, volume, fees, OHLCV prices, liquidity, tick

2. Pool Hour Data: Higher granularity if available
   Same fields as daily but hourly res

In [12]:
# Final comparison with how far back we can go
print("\n" + "=" * 70)
print("                    DATA AVAILABILITY COMPARISON")
print("=" * 70)

now = datetime.utcnow()

if all_day_data:
    df_day = pd.DataFrame(all_day_data)
    df_day['date_dt'] = pd.to_datetime(df_day['date'].astype(int), unit='s')
    earliest = df_day['date_dt'].min()
    days_back = (now - earliest).days
    print(f"\nPoolfish Pool Day Data goes back: {days_back} days (~{days_back/365:.1f} years)")
    print(f"Earliest available: {earliest.strftime('%Y-%m-%d')}")

if all_hour_data:
    df_hour = pd.DataFrame(all_hour_data)
    df_hour['datetime'] = pd.to_datetime(df_hour['periodStartUnix'].astype(int), unit='s')
    earliest_hour = df_hour['datetime'].min()
    hours_back = int((now - earliest_hour).total_seconds() / 3600)
    days_back_hour = hours_back / 24
    print(f"\nPoolfish Pool Hour Data goes back: {hours_back} hours (~{days_back_hour:.0f} days)")
    print(f"Earliest available: {earliest_hour.strftime('%Y-%m-%d %H:%M')}")

print("\n" + "=" * 70)


                    DATA AVAILABILITY COMPARISON

Poolfish Pool Day Data goes back: 956 days (~2.6 years)
Earliest available: 2023-06-08

Poolfish Pool Hour Data goes back: 22939 hours (~956 days)
Earliest available: 2023-06-08 21:00

