In [2]:
# Fix matplotlib display issues with pyarrow/polars
# Import and configure matplotlib BEFORE importing polars
import matplotlib
matplotlib.use('TkAgg')  # Use interactive backend (alternatives: 'QtAgg', 'Qt5Agg')
import matplotlib.pyplot as plt

# Now import polars (which uses pyarrow internally)
import polars as pl

In [3]:
csv_trades = pl.read_csv("2-5mill_trades.csv")

In [4]:
# Organize trades by ticker_name for line graph analysis

# Convert created_time to datetime and sort
csv_trades = csv_trades.with_columns(
    pl.col("created_time").str.to_datetime(time_zone="America/New_York")
).sort(["ticker", "created_time"])

# Group by ticker and show summary statistics
ticker_summary = csv_trades.group_by("ticker").agg([
    pl.len().alias("trade_count"),
    pl.col("yes_price").mean().alias("avg_yes_price"),
    pl.col("yes_price").min().alias("min_yes_price"),
    pl.col("yes_price").max().alias("max_yes_price"),
    pl.col("count").sum().alias("total_volume")
]).sort("trade_count", descending=True)

print(f"Total unique tickers: {ticker_summary.height}")
print(f"\nTop 20 most traded tickers:")
print(ticker_summary.head(20))

# Example: Get trades for a specific ticker (replace with desired ticker)
# example_ticker = ticker_summary[0, "ticker"]
# ticker_trades = csv_trades.filter(pl.col("ticker") == example_ticker)
# print(f"\nTrades for {example_ticker}:")
# print(ticker_trades.select(["created_time", "yes_price", "count"]))

Total unique tickers: 7852

Top 20 most traded tickers:
shape: (20, 6)
┌─────────────────────┬─────────────┬───────────────┬───────────────┬───────────────┬──────────────┐
│ ticker              ┆ trade_count ┆ avg_yes_price ┆ min_yes_price ┆ max_yes_price ┆ total_volume │
│ ---                 ┆ ---         ┆ ---           ┆ ---           ┆ ---           ┆ ---          │
│ str                 ┆ u32         ┆ f64           ┆ i64           ┆ i64           ┆ i64          │
╞═════════════════════╪═════════════╪═══════════════╪═══════════════╪═══════════════╪══════════════╡
│ KXNBAGAME-26FEB02NO ┆ 6487        ┆ 55.529058     ┆ 28            ┆ 80            ┆ 2083893      │
│ PCHA-CHA            ┆             ┆               ┆               ┆               ┆              │
│ KXNBAGAME-26FEB02NO ┆ 4922        ┆ 46.563795     ┆ 21            ┆ 72            ┆ 1046644      │
│ PCHA-NOP            ┆             ┆               ┆               ┆               ┆              │
│ KXWTAMATCH-26FEB02

In [5]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

# Get the most popular ticker (first row from sorted summary)
most_popular_ticker = ticker_summary[0, "ticker"]
trade_count = ticker_summary[0, "trade_count"]

# Filter trades for this ticker
ticker_trades = csv_trades.filter(pl.col("ticker") == most_popular_ticker)

# Convert to pandas and prepare price data
df = ticker_trades.select([
    "created_time", 
    (pl.col("yes_price") / 100).alias("yes_price_dollars"),  # Convert cents to dollars
    "count"
]).to_pandas()

# Create the graph
plt.figure(figsize=(12, 6))
plt.plot(df["created_time"], df["yes_price_dollars"], 
         marker='o', linestyle='-', markersize=2, alpha=0.6, color='#2E86AB')
plt.title(f"Price Over Time: {most_popular_ticker}\n({trade_count:,} trades)", 
          fontsize=14, fontweight='bold')
plt.xlabel("Time", fontsize=12)
plt.ylabel("Yes Price (USD)", fontsize=12)
plt.grid(True, alpha=0.3)
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
plt.xticks(rotation=45, ha='right')
plt.tight_layout()

# Save as PNG
plt.savefig("most_popular_market.png", dpi=300, bbox_inches='tight')
print(f"Graph saved as 'most_popular_market.png'")

plt.show()

Graph saved as 'most_popular_market.png'


In [6]:
# Limit to top 10 tickers for interactive plot
top_10_tickers = ticker_summary.head(10)["ticker"].to_list()
csv_trades_top10 = csv_trades.filter(pl.col("ticker").is_in(top_10_tickers))

In [7]:
print(csv_trades_top10)

shape: (23_849, 11)
┌───────┬──────────┬─────────────┬──────────┬───┬────────────┬────────────┬───────────┬────────────┐
│ count ┆ count_fp ┆ created_tim ┆ no_price ┆ … ┆ ticker     ┆ trade_id   ┆ yes_price ┆ yes_price_ │
│ ---   ┆ ---      ┆ e           ┆ ---      ┆   ┆ ---        ┆ ---        ┆ ---       ┆ dollars    │
│ i64   ┆ f64      ┆ ---         ┆ i64      ┆   ┆ str        ┆ str        ┆ i64       ┆ ---        │
│       ┆          ┆ datetime[μs ┆          ┆   ┆            ┆            ┆           ┆ f64        │
│       ┆          ┆ , America/N ┆          ┆   ┆            ┆            ┆           ┆            │
│       ┆          ┆ ew_York]    ┆          ┆   ┆            ┆            ┆           ┆            │
╞═══════╪══════════╪═════════════╪══════════╪═══╪════════════╪════════════╪═══════════╪════════════╡
│ 47    ┆ 47.0     ┆ 2026-02-02  ┆ 38       ┆ … ┆ KXATPMATCH ┆ 4a2a3cf7-c ┆ 62        ┆ 0.62       │
│       ┆          ┆ 14:34:30.06 ┆          ┆   ┆ -26FEB01BA ┆ d55-7892

In [8]:
import plotly.express as px

fig = px.line(
    csv_trades_top10.to_pandas(),
    x="created_time",
    y="yes_price",
    color="ticker",
    title="Trade Prices Over Time by Ticker",
    labels={"created_time": "Time", "yes_price": "Yes Price (cents)"})

fig.show()

In [9]:
# Extract all tickers that contain BTC
btc_tickers = ticker_summary.filter(pl.col("ticker").str.contains("BTC"))
print(f"Total tickers containing 'BTC': {btc_tickers.height}")
print(f"\nAll BTC tickers sorted by trade count:")
print(btc_tickers)

# Get just the ticker names as a list if needed
btc_ticker_list = btc_tickers["ticker"].to_list()
print(f"\nTicker names only ({len(btc_ticker_list)} total):")
for ticker in btc_ticker_list:
    print(f"  - {ticker}")

Total tickers containing 'BTC': 151

All BTC tickers sorted by trade count:
shape: (151, 6)
┌─────────────────────┬─────────────┬───────────────┬───────────────┬───────────────┬──────────────┐
│ ticker              ┆ trade_count ┆ avg_yes_price ┆ min_yes_price ┆ max_yes_price ┆ total_volume │
│ ---                 ┆ ---         ┆ ---           ┆ ---           ┆ ---           ┆ ---          │
│ str                 ┆ u32         ┆ f64           ┆ i64           ┆ i64           ┆ i64          │
╞═════════════════════╪═════════════╪═══════════════╪═══════════════╪═══════════════╪══════════════╡
│ KXBTC15M-26FEB02151 ┆ 1260        ┆ 50.793651     ┆ 1             ┆ 98            ┆ 46372        │
│ 5-15                ┆             ┆               ┆               ┆               ┆              │
│ KXBTC15M-26FEB02153 ┆ 1167        ┆ 60.610111     ┆ 1             ┆ 99            ┆ 45549        │
│ 0-30                ┆             ┆               ┆               ┆               ┆              │

In [None]:
# Graph all BTC trades
import plotly.express as px

# Filter trades to only include BTC tickers
btc_trades = csv_trades.filter(pl.col("ticker").str.contains("BTC"))

print(f"Total BTC trades: {btc_trades.height:,}")
print(f"Unique BTC tickers: {btc_trades['ticker'].n_unique()}")
print(f"Date range: {btc_trades['created_time'].min()} to {btc_trades['created_time'].max()}")

# Create interactive plot with all BTC trades
fig = px.line(
    btc_trades.to_pandas(),
    x="created_time",
    y="yes_price",
    color="ticker",
    title="All BTC Market Trades Over Time",
    labels={
        "created_time": "Time", 
        "yes_price": "Yes Price (cents)",
        "ticker": "Market"
    },
    hover_data=["count", "no_price"]
)

fig.update_layout(
    height=600,
    hovermode='closest',
    legend=dict(
        orientation="v",
        yanchor="top",
        y=1,
        xanchor="left",
        x=1.01
    )
)

fig.show()

# Also create a static matplotlib version
plt.figure(figsize=(14, 8))

# Get unique BTC tickers
unique_btc_tickers = btc_trades["ticker"].unique().to_list()

# Plot each ticker
for ticker in unique_btc_tickers:
    ticker_data = btc_trades.filter(pl.col("ticker") == ticker).to_pandas()
    plt.plot(ticker_data["created_time"], 
             ticker_data["yes_price"], 
             marker='o', 
             markersize=3, 
             alpha=0.6, 
             label=ticker,
             linestyle='-')

plt.title(f"All BTC Market Trades Over Time\n({btc_trades.height:,} total trades across {len(unique_btc_tickers)} markets)", 
          fontsize=14, fontweight='bold')
plt.xlabel("Time", fontsize=12)
plt.ylabel("Yes Price (cents)", fontsize=12)
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=8)
plt.grid(True, alpha=0.3)
plt.xticks(rotation=45, ha='right')
plt.tight_layout()

# Save the plot
plt.savefig("btc_trades_all.png", dpi=300, bbox_inches='tight')
print("\nGraph saved as 'btc_trades_all.png'")

plt.show()

Total BTC trades: 13,937
Unique BTC tickers: 151
Date range: 2026-02-02T19:33:55.770539+0000 to 2026-02-02T20:51:21.551839+0000



Tight layout not applied. The bottom and top margins cannot be made large enough to accommodate all Axes decorations.




Graph saved as 'btc_trades_all.png'
