In [None]:
# Imports
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
plt.style.use('seaborn-v0_8')

: 

: 

: 

: 

: 

In [None]:
# Paths
# Robustly locate repo root containing the 'reports' folder
from pathlib import Path


def _find_reports_root(start: Path) -> Path:
    # Walk up; prefer a direct 'reports' folder, otherwise look for 'polymarket-research/reports'
    search_points = [start] + list(start.parents)
    for p in search_points:
        if (p / 'reports').exists():
            return p
        if (p / 'polymarket-research' / 'reports').exists():
            return p / 'polymarket-research'
    return start

cwd = Path('.').resolve()
base = _find_reports_root(cwd)
reports = base / 'reports'
latest_top = reports / 'latest_top_markets_24h.csv.gz'
latest_all = reports / 'latest_all_active_markets.csv.gz'
roll_top = reports / 'rolling' / 'top_markets_24h_rolling.csv.gz'
roll_all = reports / 'rolling' / 'all_active_markets_rolling.csv.gz'
latest_top, latest_all, roll_top, roll_all

(PosixPath('reports/latest_top_markets_24h.csv.gz'),
 PosixPath('reports/latest_all_active_markets.csv.gz'),
 PosixPath('reports/rolling/top_markets_24h_rolling.csv.gz'),
 PosixPath('reports/rolling/all_active_markets_rolling.csv.gz'))

In [None]:
# Load datasets (gzip compressed)
files = [latest_top, latest_all, roll_top, roll_all]
missing = [p for p in files if not p.exists()]
if missing:
    print("Missing datasets:")
    for p in missing:
        print(f"- {p}")
    print("Generate them by running reports. Example:")
    print("  python run_reports.py --limit 50")
else:
    top_df = pd.read_csv(latest_top, compression='gzip')
    all_df = pd.read_csv(latest_all, compression='gzip')
    roll_top_df = pd.read_csv(roll_top, compression='gzip')
    roll_all_df = pd.read_csv(roll_all, compression='gzip')
    # Parse datetimes
    for df in (top_df, all_df, roll_top_df, roll_all_df):
        if 'snapshot_time' in df.columns:
            df['snapshot_time'] = pd.to_datetime(df['snapshot_time'], errors='coerce')
        if 'endDate' in df.columns:
            df['endDate'] = pd.to_datetime(df['endDate'], errors='coerce')
    top_df.head(3), all_df.head(3)

FileNotFoundError: [Errno 2] No such file or directory: 'reports/latest_top_markets_24h.csv.gz'

## Top 20 Markets by 24h Volume
A quick bar chart from the latest Top 24h dataset.

In [None]:
# Sort and plot top 20 by volume24hr
top_plot = top_df.sort_values('volume24hr', ascending=False).head(20)
plt.figure(figsize=(10,6))
plt.barh(top_plot['slug'], top_plot['volume24hr'])
plt.gca().invert_yaxis()
plt.title('Top 20 by 24h Volume')
plt.xlabel('24h Volume')
plt.ylabel('Market (slug)')
plt.tight_layout()
plt.show()

## Rolling Trends
Aggregate the rolling datasets by date to get an overview of daily activity.

In [None]:
# Daily sums for 24h volume from rolling Top dataset
rt = roll_top_df.copy()
rt['date'] = rt['snapshot_time'].dt.date
daily_top = rt.groupby('date', as_index=False)['volume24hr'].sum()
plt.figure(figsize=(10,4))
plt.plot(daily_top['date'], daily_top['volume24hr'], marker='o')
plt.title('Daily Sum of 24h Volume (Top markets)')
plt.ylabel('Sum of 24h Volume')
plt.xlabel('Date')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
daily_top.tail(5)

In [None]:
# Daily counts of active markets from rolling All dataset
ra = roll_all_df.copy()
ra['date'] = ra['snapshot_time'].dt.date
daily_count = ra.groupby('date', as_index=False)['market_id'].nunique()
plt.figure(figsize=(10,4))
plt.plot(daily_count['date'], daily_count['market_id'], marker='o', color='tab:orange')
plt.title('Daily Count of Unique Markets (All active snapshot)')
plt.ylabel('Unique markets')
plt.xlabel('Date')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
daily_count.tail(5)

## Notes
- Data comes from Polymarket Gamma API via our scripts.
- `outcomes` and `outcomePrices` are lists; for outcome-level views, consider exploding them into long format.
- Rolling files append per run â€” schedule frequency affects granularity of these charts.