In [1]:
!pip install colorama



In [2]:
# Import dependencies
import dotenv
import os
import pickle
import py_clob_client
import requests
import prediction_pipeline
from datetime import datetime, timedelta, timezone
from py_clob_client.constants import POLYGON
from py_clob_client.client import ClobClient
from py_clob_client.clob_types import OrderArgs
from py_clob_client.order_builder.constants import BUY
import disk_cache
import pretty_print_data

In [3]:
env = dotenv.dotenv_values(".env")
HOST = "https://clob.polymarket.com"
POLYMARKET_KEY = env["PK"]
CHAIN_ID = POLYGON

# Variables
RISK_TOLERANCE = 0.7  # Represents how much to adjust EV calculations based on the width of the model confidence bounds.

# Create the client
client = ClobClient(HOST, key=POLYMARKET_KEY, chain_id=CHAIN_ID)
client.set_api_creds(client.create_or_derive_api_creds())

# Setup cache
cache = disk_cache.DiskCache(cache_dir="api_cache", expiry_hours=24)

In [4]:
def fetch_all_events_from_gamma():
  """
  Fetch all events from Gamma API.
  """
  events = []
  offset = 0
  limit = 100  # Max results per page
  
  while True:
    url = "https://gamma-api.polymarket.com/events"
    params = {
      "closed": "false",
      "active": "true",
      "limit": limit,
      "offset": offset,
      "order": "createdAt",
      "ascending": "false"
    }
    
    response = requests.get(url, params=params)
    if response.status_code != 200:
      print(f"Error fetching events: {response.status_code}")
      break
        
    batch = response.json()
    if not batch:  # No more results
      break
        
    events.extend(batch)
    offset += limit
      
  return events

def fetch_all_markets(client):
  """
  Fetch all markets from CLOB, iterating through all possible pages.
  """
  markets = []
  next_cursor = ""

  while True:
    response = client.get_markets(next_cursor=next_cursor)
    if not response or 'data' not in response:
      break
    markets.extend(response['data'])

    next_cursor = response.get('next_cursor', '')
    if next_cursor == 'LTE=' or not next_cursor:
      break
  return {market['condition_id']: market for market in markets}

def filter_markets(condition_id_to_market):
    # Filter for active, non-closed markets.
    current_markets = {k: v for k, v in condition_id_to_market.items() if v['active'] and not v['closed']}
    print(f"Found {len(current_markets)} current markets")
    
    # Filter for markets ending in the future. Some markets are kept open past their end date due to a dispute in the resolution.
    future_markets = {k: v for k, v in current_markets.items() if v.get('end_date_iso') and v['end_date_iso'] > datetime.now(timezone.utc).isoformat()}
    print(f"Found {len(future_markets)} future markets")
    
    # Filter for markets ending in the next 1 days.
    near_term_markets = {k: v for k, v in future_markets.items() if v.get('end_date_iso') and v['end_date_iso'] <= (datetime.now(timezone.utc) + timedelta(days=1)).isoformat()}
    print(f"Found {len(near_term_markets)} near term markets")
    
    near_non_sports_markets = {k: v for k, v in near_term_markets.items() if v.get('tags') and 'Sports' not in v['tags']}
    print(f"Found {len(near_non_sports_markets)} near term non-sports markets")
    return near_non_sports_markets

def fetch_all_orderbooks(client, markets):
  # Fetch and attach order books for each market
  print(f"Fetching order books for {len(markets)} markets.")
  token_id_to_book = {}
  for _, market in markets.items():
    tokens = market.get('tokens', [])
    for token in tokens:
      token_id = token.get('token_id', None)
      if token_id:
        try:
          print(f"Fetching order book")
          order_book = client.get_order_book(token_id)
        except py_clob_client.exceptions.PolyApiException as e:
          print(f"Error fetching order book for token {token_id}: {e}")
          continue
        token_id_to_book[token_id] = order_book
  return token_id_to_book

In [5]:
today = datetime.now(timezone.utc).strftime('%Y-%m-%d')
markets_pickle_path = f"snapshots/markets_snapshot_{today}.pkl"

# Check if markets pickle exists
if os.path.exists(markets_pickle_path):
  with open(markets_pickle_path, 'rb') as f:
    condition_id_to_market = pickle.load(f)
  print(f"Loaded {len(condition_id_to_market)} markets from {markets_pickle_path}")
else:
  # Fetch Markets via CLOB
  condition_id_to_market = fetch_all_markets(client)
  with open(markets_pickle_path, 'wb') as f:
    pickle.dump(condition_id_to_market, f)
    print(f"Saved {len(condition_id_to_market)} markets to {markets_pickle_path}")

condition_id_to_filtered_market = filter_markets(condition_id_to_market)

Loaded 30435 markets from snapshots/markets_snapshot_2025-03-05.pkl
Found 2905 current markets
Found 2725 future markets
Found 38 near term markets
Found 20 near term non-sports markets


In [14]:
# Call LLM pipeline to generate market predictions. Note: This uses a disk cache to avoid hammering the APIs during development.
# The cache keeps predictions for 24 hours by default. Be careful to avoid using stale predictions.

condition_id_to_prediction = {}
for condition_id, market in condition_id_to_filtered_market.items():
  if not market.get('question', None) or not market.get('description', None):
    continue
  market_title = market['question']
  market_description = market.get('description', '')
  full_market_description = f'Question: {market_title}\nDescription and Rules: {market_description}'
  prediction_json = prediction_pipeline.create_prediction(full_market_description, cache=cache)
  if not prediction_json:
    continue
  condition_id_to_prediction[condition_id] = prediction_json

Checking for cached report...
Cached report found!
Checking for cached prediction...
Cached prediction found!
{'model_confidence': 0.75, 'probability': 0.68, 'reasoning': '## Superforecasting Analysis of ECB Rate Cut Prediction Market\n\n**1. Base Rate Analysis:**\n\n* **Reference Class:**  ECB interest rate decisions in periods following initial rate hikes and during periods of moderate inflation and economic uncertainty in the Eurozone.\n* **Historical Frequency:**  Historically, central banks, including the ECB, tend to cut interest rates in sequences once they begin easing cycles, especially when inflation is trending towards target and economic growth is sluggish.  Looking back at past ECB easing cycles (e.g., post-2008 financial crisis, Eurozone sovereign debt crisis, early 2010s), once the ECB started cutting, they often implemented multiple cuts within a relatively short timeframe.  However, the pace and magnitude vary significantly depending on the specific economic context.  

In [7]:
# Fetch the active order books for all markets from Polymarket via CLOB.

today = datetime.now(timezone.utc).strftime('%Y-%m-%d')
active_order_books_pickle_path = f"snapshots/active_order_books_snapshot_{today}.pkl"

token_id_to_book = {}
# Check if order book pickle exists
if os.path.exists(active_order_books_pickle_path):
  with open(active_order_books_pickle_path, 'rb') as f:
    token_id_to_book = pickle.load(f)
  print(f"Loaded {len(token_id_to_book)} order books from {active_order_books_pickle_path}")
else:
  print("No order book pickle found.")
  # Fetch order books using CLOB client
  token_id_to_book = fetch_all_orderbooks(client, condition_id_to_filtered_market)
  with open(active_order_books_pickle_path, 'wb') as f:
    pickle.dump(token_id_to_book, f)
    print(f"Saved {len(token_id_to_book)} events to {active_order_books_pickle_path}")

No order book pickle found.
Fetching order books for 20 markets.
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Fetching order book
Saved 40 events to snapshots/active_order_books_snapshot_2025-03-06.pkl


In [8]:
# Save a Polymarket snapshot of filtered markets, predictions and current order books.

today = datetime.now(timezone.utc).strftime('%Y-%m-%d')
polymarket_snapshot_pickle_path = f"snapshots/polymarket_snapshot_{today}.pkl"

polymarket_snapshot = {
    "order_books": token_id_to_book,
    "markets": condition_id_to_filtered_market,
    "predictions": condition_id_to_prediction,
}
# Check if market snapshot pickle exists
if os.path.exists(polymarket_snapshot_pickle_path):
    print("already saved snapshot pickle")
else:
  print("No snapshot pickle found.")
  # Fetch order books using CLOB client
  with open(polymarket_snapshot_pickle_path, 'wb') as f:
    pickle.dump(polymarket_snapshot, f)
    print(f"Saved market snapshot pickle to {polymarket_snapshot_pickle_path}")

No snapshot pickle found.
Saved market snapshot pickle to snapshots/polymarket_snapshot_2025-03-06.pkl


In [9]:
possible_positions = []
condition_id_to_market = polymarket_snapshot["markets"]
token_id_to_order_book = polymarket_snapshot["order_books"]
condition_id_to_prediction = polymarket_snapshot["predictions"]

In [25]:
def calculate_edge_metrics(prediction, order):
  probability = prediction['probability']
  confidence = prediction['model_confidence']
  uncertainty = prediction['uncertainty']
  uncertainty_range = uncertainty['upper_bound'] - uncertainty['lower_bound']
  edge = probability - float(order.price)
  ev = edge * confidence
  risk_adjusted_ev = ev * (1 - (1 - RISK_TOLERANCE) * uncertainty_range)
  return (edge, ev, risk_adjusted_ev)

def invert_prediction_probability(pred):
  inverted_pred = {
      "probability": 1 - pred["probability"],
      "model_confidence": pred["model_confidence"],
      "uncertainty": {
          "upper_bound": 1 - pred["uncertainty"]["lower_bound"],
          "lower_bound": 1 - pred["uncertainty"]["upper_bound"],
          "confidence_level": pred["uncertainty"]["confidence_level"],
      }
  }
  return inverted_pred

In [26]:
# Create possible position list
market_summaries = []

for cond_id, market in condition_id_to_market.items():
  prediction = condition_id_to_prediction[cond_id]
  for token in market['tokens']:
    order_book = token_id_to_order_book[token['token_id']]
    if order_book.asks:
      best_ask = order_book.asks[-1]
      if token['outcome'] == 'No':
        no_prediction = invert_prediction_probability(prediction)
        edge, ev, adjusted_ev = calculate_edge_metrics(no_prediction, best_ask)
        final_prediction = no_prediction
      else:
        edge, ev, adjusted_ev = calculate_edge_metrics(prediction, best_ask)
        final_prediction = prediction
      market_summary = {
          'title': market['question'],
          'outcome': token['outcome'],
          'condition_id': cond_id,
          'probability': final_prediction['probability'],
          'model_confidence': final_prediction['model_confidence'],
          'uncertainty': final_prediction['uncertainty'],
          'best_ask_price': float(best_ask.price),
          'best_ask_size': float(best_ask.size),
          'asks': order_book.asks,
          'edge': edge,
          'adjusted_ev': adjusted_ev
      }
      market_summaries.append(market_summary)

In [27]:
market_summaries.sort(key=lambda x: -x['adjusted_ev'])

In [28]:
pretty_print_data.pretty_print_markets(market_summaries)

MARKET: Will Trump issue an executive order on March 6?
Condition ID: 0x8750878941d591e755011c90045318fe459ebd5e95e8a27716439f2e6a8547f0

Outcome: No
  Probability: 0.92 (Model Confidence: 0.75)
  Edge: 0.440
  Adjusted EV: 0.318 🟢
  Best Ask: 0.48 (Size: 114.04)
  Uncertainty Range: [0.85-0.97] (CL: 0.90)



MARKET: Will the highest temperature in London be between 55-56°F on March 6?
Condition ID: 0x76b63e28fd148b5e657cf8100da4c1c21d77dcd6e77a652509585274434b5e34

Outcome: Yes
  Probability: 0.58 (Model Confidence: 0.65)
  Edge: 0.481
  Adjusted EV: 0.285 🟢
  Best Ask: 0.099 (Size: 20.0)
  Uncertainty Range: [0.43-0.73] (CL: 0.90)



MARKET: Will the highest temperature in London be between 61-62°F on March 6?
Condition ID: 0x3f7359138d565203fd73612a36194162c650c6218de83e8dab03126bbae1a56f

Outcome: No
  Probability: 0.92 (Model Confidence: 0.75)
  Edge: 0.290
  Adjusted EV: 0.210 🟢
  Best Ask: 0.63 (Size: 10.0)
  Uncertainty Range: [0.85-0.97] (CL: 0.90)



MARKET: 25 bps decrease i