# Kalshi and Polymarket API Test
### Kalshi and Polymarket Connection and API Basics
- Connect to the API
- See balance and active positions
- Look at active market data
- Place limit order (not executed)
### NBA use case
- Find and compare todays NBA games on Kalshi and Polymarket

#### 0 - Imports

In [1]:
import requests
import datetime
import base64
import sys
from pathlib import Path
from cryptography.hazmat.primitives import hashes, serialization
from cryptography.hazmat.primitives.asymmetric import padding
from cryptography.hazmat.backends import default_backend

# Polymarket SDK
from py_clob_client.client import ClobClient
from py_clob_client.clob_types import OrderArgs, OrderType
from py_clob_client.order_builder.constants import BUY, SELL

# Add parent directory to path to import config
sys.path.insert(0, '..')

import importlib
import config
importlib.reload(config)

from config import (
    KALSHI_API_KEY, KALSHI_PRIVATE_KEY_PATH, KALSHI_BASE_URL,
    POLYMARKET_PRIVATE_KEY, POLYMARKET_PROXY_ADDRESS, POLYMARKET_BASE_URL,
    POLYMARKET_SIGNATURE_TYPE, POLYMARKET_CHAIN_ID, validate_config
)

## 1 - Kalshi

#### 1.1 - Connect to Kalshi

In [2]:
# Adjust paths for subdirectory before validation
import os
PROJECT_ROOT = '/Users/jackduncan/Desktop/Projects/prediction-market-arbitrage'
os.chdir(PROJECT_ROOT)

validate_config()

# Load private key
with open(Path(KALSHI_PRIVATE_KEY_PATH), 'rb') as f:
    kalshi_private_key = serialization.load_pem_private_key(
        f.read(), password=None, backend=default_backend()
    )

def connect_kalshi():
    """Initialize Kalshi connection and return authenticated request function."""
    def kalshi_get(path):
        """Make authenticated GET request to Kalshi API."""
        import datetime
        timestamp = str(int(datetime.datetime.now().timestamp() * 1000))
        full_path = f"/trade-api/v2{path}"
        
        message = f"{timestamp}GET{full_path}".encode('utf-8')
        signature = kalshi_private_key.sign(
            message,
            padding.PSS(mgf=padding.MGF1(hashes.SHA256()), salt_length=padding.PSS.DIGEST_LENGTH),
            hashes.SHA256()
        )
        
        headers = {
            'KALSHI-ACCESS-KEY': KALSHI_API_KEY,
            'KALSHI-ACCESS-SIGNATURE': base64.b64encode(signature).decode('utf-8'),
            'KALSHI-ACCESS-TIMESTAMP': timestamp
        }
        
        return requests.get(KALSHI_BASE_URL + path, headers=headers)
    return kalshi_get

# Test connection
kalshi_get = connect_kalshi()
response = kalshi_get("/portfolio/balance")
status = "SUCCESS" if response.status_code == 200 else "FAILED"
print(f"[{status}] Kalshi status: {response.status_code}")

[SUCCESS] Kalshi status: 200


#### 1.2 List Holding Details for Kalshi

In [None]:
def get_kalshi_balance():
    """Get Kalshi account balance."""
    balance = kalshi_get("/portfolio/balance").json()
    return balance['balance'] / 100

def get_kalshi_positions():
    """Get Kalshi market positions with details."""
    positions = kalshi_get("/portfolio/positions").json()
    
    market_positions = positions.get('market_positions', [])
    event_positions = positions.get('event_positions', [])
    
    return {
        'market_positions': market_positions,
        'event_positions': event_positions
    }

def get_kalshi_holdings():
    """Display Kalshi balance, positions, and total portfolio value."""
    balance = get_kalshi_balance()
    positions_data = get_kalshi_positions()
    market_positions = positions_data['market_positions']
    
    total_exposure = sum(pos['market_exposure'] / 100 for pos in market_positions)
    
    print("=== KALSHI ===")
    print(f"Cash: ${balance:.2f}")
    print(f"\nPositions ({len(market_positions)}):")
    for pos in market_positions:
        exposure = pos['market_exposure'] / 100
        print(f"  {pos['ticker']}: {pos['position']} contracts @ ${exposure:.2f}")
    
    print(f"\nTotal: ${balance + total_exposure:.2f}")
    print("=" * 40)

get_kalshi_holdings()

In [None]:
# Debug: Check raw API response for positions
import json

# Try different endpoints
print("=== Testing Different Position Endpoints ===\n")

# 1. Unsettled positions
print("1. Unsettled positions:")
response1 = kalshi_get("/portfolio/positions?settlement_status=unsettled")
print(f"   Status: {response1.status_code}")
data1 = response1.json()
print(f"   Keys: {list(data1.keys())}")
print(f"   Full response: {json.dumps(data1, indent=2)}\n")

# 2. All positions (no filter)
print("2. All positions (no filter):")
response2 = kalshi_get("/portfolio/positions")
print(f"   Status: {response2.status_code}")
data2 = response2.json()
print(f"   Keys: {list(data2.keys())}")
print(f"   Full response: {json.dumps(data2, indent=2)}\n")

# 3. Settled positions
print("3. Settled positions:")
response3 = kalshi_get("/portfolio/positions?settlement_status=settled")
print(f"   Status: {response3.status_code}")
data3 = response3.json()
print(f"   Keys: {list(data3.keys())}")
print(f"   Full response: {json.dumps(data3, indent=2)}\n")

#### 1.3 Kalshi Market Data

In [None]:
# Debug: Check if orderbook has more structure
test_ticker = "KXNBAGAME-26JAN13ATLLAL-LAL"
ob_response = kalshi_get(f"/markets/{test_ticker}/orderbook")

if ob_response.status_code == 200:
    full_ob = ob_response.json()
    
    print(f"Full orderbook response keys: {list(full_ob.keys())}")
    print(f"\nOrderbook structure:")
    ob = full_ob.get('orderbook', {})
    print(f"  Orderbook keys: {list(ob.keys())}")
    
    # Check if there are separate ask/bid structures
    yes_data = ob.get('yes', []) or []
    no_data = ob.get('no', []) or []
    
    if yes_data:
        print(f"\nYes data (all levels, sorted by price):")
        yes_sorted = sorted(yes_data, key=lambda x: float(x[0]))
        for i, level in enumerate(yes_sorted[:10]):
            print(f"  {i+1}. Price: {level[0]}¢, Size: {level[1]}")
    else:
        print(f"\nYes data: Empty or None")
    
    if no_data:
        print(f"\nNo data (all levels, sorted by price):")
        no_sorted = sorted(no_data, key=lambda x: float(x[0]))
        for i, level in enumerate(no_sorted[:10]):
            print(f"  {i+1}. Price: {level[0]}¢, Size: {level[1]}, (Yes equivalent: {100-float(level[0])}¢)")
        
        # Check what "best ask" means for buying Yes
        print(f"\nTo BUY Yes, I need to SELL No or cross the spread:")
        print(f"Highest No bid: {max(no_data, key=lambda x: float(x[0]))}")
        print(f"This converts to Yes ask: {100 - max(no_data, key=lambda x: float(x[0]))[0]}¢")
    else:
        print(f"\nNo data: Empty or None")
else:
    print(f"Error: {ob_response.status_code}")

In [None]:
def get_avg_price_from_orderbook(bids, shares=10, buy_yes=True):
    """Calculate average price to buy 'shares' from Kalshi orderbook.
    
    Kalshi orderbook returns BIDS (buy orders). To calculate cost to BUY:
    - To BUY Yes: Use No bids, convert to Yes asks via (100 - no_bid)
    - To BUY No: Use Yes bids, convert to No asks via (100 - yes_bid)
    
    Args:
        bids: List of [price_cents, size] bid levels from opposite side
        shares: Number of shares to buy
        buy_yes: If True, calculate Yes purchase (using No bids). If False, No purchase (using Yes bids)
    
    Returns:
        Average price in dollars or None if insufficient liquidity
    """
    if not bids:
        return None
    
    # Sort bids descending (highest bid first) - these give best ask prices
    sorted_bids = sorted(bids, key=lambda x: float(x[0]), reverse=True)
    
    remaining = shares
    total_cost = 0
    
    for level in sorted_bids:
        bid_price_cents = float(level[0])
        size = float(level[1])
        
        # Convert bid to ask price: 100 - bid
        ask_price_cents = 100 - bid_price_cents
        
        fill = min(remaining, size)
        total_cost += fill * ask_price_cents
        remaining -= fill
        
        if remaining <= 0:
            break
    
    if remaining > 0:
        return None  # Insufficient liquidity
    
    return (total_cost / shares) / 100  # Return in dollars
    
def get_kalshi_markets(limit=5):
    """Get active Kalshi markets with average cost for 10 contracts."""
    if not kalshi_get:
        print("WARNING: Kalshi client not initialized")
        return None
    
    response = kalshi_get(f"/markets?limit={limit}&status=open")
    if response.status_code != 200:
        print(f"ERROR: Failed to fetch markets ({response.status_code})")
        return None
    
    markets = response.json().get('markets', [])
    
    print(f"=== Kalshi Markets (Top {limit}) ===\n")
    
    for i, market in enumerate(markets, 1):
        title = market.get('title', 'N/A')
        ticker = market.get('ticker', 'N/A')
        volume = market.get('volume', 0)
        
        # Use expected_expiration_time for event date, close_time for market close
        event_time = market.get('expected_expiration_time', '')[:10]
        close_time = market.get('close_time', '')[:10]
        
        # Truncate long parlay titles
        display_title = title if len(title) <= 80 else title[:77] + "..."
        
        print(f"{i}. {display_title}")
        print(f"   Ticker: {ticker}")
        print(f"   Event: {event_time} | Market Closes: {close_time}")
        print(f"   Volume: ${float(volume):,.2f}")
        
        # Get orderbook to calculate average price for 10 contracts
        try:
            ob_response = kalshi_get(f"/markets/{ticker}/orderbook")
            if ob_response.status_code == 200:
                orderbook = ob_response.json().get('orderbook', {})
                
                yes_bids = orderbook.get('yes', [])  # Bids to buy Yes
                no_bids = orderbook.get('no', [])    # Bids to buy No
                
                # To BUY Yes, use No bids (100 - no_bid = yes_ask)
                # To BUY No, use Yes bids (100 - yes_bid = no_ask)
                yes_avg = get_avg_price_from_orderbook(no_bids, 10, buy_yes=True)
                no_avg = get_avg_price_from_orderbook(yes_bids, 10, buy_yes=False)
                
                if yes_avg is not None:
                    print(f"   Yes (avg 10): ${yes_avg:.4f}")
                else:
                    print(f"   Yes (avg 10): Insufficient liquidity")
                    
                if no_avg is not None:
                    print(f"   No (avg 10): ${no_avg:.4f}")
                else:
                    print(f"   No (avg 10): Insufficient liquidity")
            else:
                print(f"   Prices: Unable to fetch orderbook")
        except Exception as e:
            print(f"   Prices: Error fetching orderbook ({e})")
        
        print()
    
    return markets

kalshi_markets = get_kalshi_markets()

#### 1.4 Kalshi Place Limit Order (Not Executed)

In [None]:
def place_kalshi_limit_order_example(ticker, side, price, size, execute=False):
    """
    Function to place a limit order on Kalshi.
    
    Args:
        ticker: Market ticker to trade
        side: 'yes' or 'no'
        price: Price per contract (0.01 to 0.99)
        size: Number of contracts to buy/sell
        execute: If False, only shows what would be done (default: False)
    """
    print(f"=== {'EXECUTING' if execute else 'EXAMPLE'} Limit Order ===")
    print(f"Market: {ticker}")
    print(f"Side: {side}")
    print(f"Price: ${price:.4f}")
    print(f"Size: {size} contracts")
    print(f"Total Cost: ${price * size:.2f}")
    print()
    
    if not execute:
        print("WARNING: Order NOT executed (execute=False)")
        print("Set execute=True to place real order")
        return None
    
    try:
        # Kalshi order payload
        order_data = {
            "ticker": ticker,
            "side": side,
            "type": "limit",
            "price": int(price * 100),  # Convert to cents
            "size": size
        }
        
        response = kalshi_get(f"/orders?{requests.compat.urlencode(order_data)}")
        
        if response.status_code == 200:
            result = response.json()
            print("[SUCCESS] Order placed successfully!")
            print(f"Order ID: {result.get('order_id', 'N/A')}")
            return result
        else:
            print(f"ERROR: Order failed ({response.status_code})")
            print(f"Response: {response.text}")
            return None
            
    except Exception as e:
        print(f"ERROR: Error placing order: {e}")
        return None

# Example: Buy 1 contract at $0.50 (NOT executed)
example_order = place_kalshi_limit_order_example(
    ticker="KXTEST-EXAMPLE",
    side="yes",
    price=0.50,
    size=1,
    execute=False  # Set to True to actually place the order
)

## 2 - Polymarket

#### 2.1 - Connect to Polymarket

In [None]:
def connect_polymarket():
    """Initialize Polymarket client using py-clob-client SDK."""
    if not POLYMARKET_PRIVATE_KEY:
        print("WARNING: POLYMARKET_PRIVATE_KEY not set in .env")
        return None
    if not POLYMARKET_PROXY_ADDRESS:
        print("WARNING: POLYMARKET_PROXY_ADDRESS not set in .env")
        return None
    
    try:
        client = ClobClient(
            POLYMARKET_BASE_URL,
            key=POLYMARKET_PRIVATE_KEY,
            chain_id=POLYMARKET_CHAIN_ID,
            signature_type=POLYMARKET_SIGNATURE_TYPE,
            funder=POLYMARKET_PROXY_ADDRESS
        )
        client.set_api_creds(client.create_or_derive_api_creds())
        return client
    except Exception as e:
        print(f"[FAILED] Failed to initialize: {e}")
        return None

# Test connection
polymarket_client = connect_polymarket()
status = "SUCCESS" if polymarket_client else "FAILED"
print(f"[{status}] Polymarket status: {200 if polymarket_client else 0}")

#### 2.2 List Holding Details for Polymarket

In [None]:
def get_polymarket_balance():
    """Get Polymarket account balance."""
    from py_clob_client.clob_types import BalanceAllowanceParams, AssetType
    balance = polymarket_client.get_balance_allowance(
        params=BalanceAllowanceParams(asset_type=AssetType.COLLATERAL)
    )
    return float(balance.get('balance', 0))

def get_polymarket_positions():
    """Get Polymarket open orders/positions."""
    from py_clob_client.clob_types import OpenOrderParams
    orders = polymarket_client.get_orders(OpenOrderParams())
    return orders if orders else []

def get_polymarket_holdings():
    """Display Polymarket balance, positions, and total portfolio value."""
    balance = get_polymarket_balance()
    positions = get_polymarket_positions()
    
    # Calculate total exposure from open orders
    total_exposure = sum(float(pos.get('original_size', 0)) * float(pos.get('price', 0)) 
                        for pos in positions)
    
    print("=== POLYMARKET ===")
    print(f"Cash: ${balance:.2f}")
    print(f"\nPositions ({len(positions)}):")
    for pos in positions:
        market = pos.get('market', 'N/A')[:50]
        side = pos.get('side', 'N/A')
        size = float(pos.get('original_size', 0))
        price = float(pos.get('price', 0))
        value = size * price
        print(f"  {market}: {side} {size:.0f} @ ${price:.3f} = ${value:.2f}")
    
    print(f"\nTotal: ${balance + total_exposure:.2f}")
    print("=" * 40)

get_polymarket_holdings()

#### 2.3 Polymarket Market Data

In [None]:
def get_avg_price_from_clob(asks, shares=10):
    """Calculate average price to buy 'shares' from CLOB orderbook.
    
    Args:
        asks: List of {'price': str, 'size': str} levels (will be sorted ascending)
        shares: Number of shares to buy
    
    Returns:
        Average price or None if insufficient liquidity
    """
    if not asks:
        return None
    
    # Sort asks by price ascending (buy from lowest first)
    sorted_asks = sorted(asks, key=lambda x: float(x['price']))
    
    remaining = shares
    total_cost = 0
    
    for level in sorted_asks:
        price = float(level['price'])
        size = float(level['size'])
        
        fill = min(remaining, size)
        total_cost += fill * price
        remaining -= fill
        
        if remaining <= 0:
            break
    
    if remaining > 0:
        return None  # Insufficient liquidity
    
    return total_cost / shares

def get_polymarket_markets(limit=5):
    """Get active Polymarket markets with orderbook-based average prices."""
    import json
    
    gamma_url = "https://gamma-api.polymarket.com/markets"
    params = {
        'limit': limit,
        'closed': 'false',
        'order': 'volume',
        'ascending': 'false'
    }
    
    response = requests.get(gamma_url, params=params)
    if response.status_code != 200:
        print(f"ERROR: Failed to fetch markets ({response.status_code})")
        return None
    
    markets_data = response.json()
    
    print(f"=== Polymarket Markets (Top {limit}) ===\n")
    for i, market in enumerate(markets_data[:limit], 1):
        question = market.get('question', 'N/A')
        slug = market.get('slug', 'N/A')
        volume = market.get('volume', 0)
        
        # Get event date fields
        end_date = market.get('endDate', '')[:10] if market.get('endDate') else 'N/A'
        start_date = market.get('startDate', '')[:10] if market.get('startDate') else 'N/A'
        
        print(f"{i}. {question}")
        print(f"   Slug: {slug}")
        print(f"   Event: {end_date}")
        print(f"   Volume: ${float(volume):,.2f}")
        
        # Parse token IDs for orderbook lookup
        token_ids_str = market.get('clobTokenIds', '[]')
        token_ids = json.loads(token_ids_str) if isinstance(token_ids_str, str) else token_ids_str
        
        # Parse outcomes
        outcomes_str = market.get('outcomes', '[]')
        outcomes = json.loads(outcomes_str) if isinstance(outcomes_str, str) else outcomes_str
        
        # Parse current prices as fallback
        prices_str = market.get('outcomePrices', '[]')
        prices = json.loads(prices_str) if isinstance(prices_str, str) else prices_str
        
        # Get orderbook for each outcome and calculate avg price for 10 shares
        for j, outcome in enumerate(outcomes):
            token_id = token_ids[j] if j < len(token_ids) else None
            fallback_price = prices[j] if j < len(prices) else None
            
            avg_price = None
            if token_id:
                try:
                    clob_url = f"https://clob.polymarket.com/book?token_id={token_id}"
                    ob_response = requests.get(clob_url, timeout=5)
                    if ob_response.status_code == 200:
                        ob_data = ob_response.json()
                        asks = ob_data.get('asks', [])
                        if asks:
                            avg_price = get_avg_price_from_clob(asks, 10)
                except Exception:
                    pass
            
            if avg_price is not None:
                print(f"   {outcome} (avg 10): ${avg_price:.4f}")
            elif fallback_price:
                print(f"   {outcome}: ${float(fallback_price):.4f}")
            else:
                print(f"   {outcome}: N/A")
        print()
    
    return markets_data

markets = get_polymarket_markets()

#### 2.4 Polymarket Place Limit Order (Not Executed)

In [None]:
def place_limit_order_example(token_id, price, size, side=BUY, execute=False):
    """
    Function to place a limit order on Polymarket.
    
    Args:
        token_id: Token ID to trade
        price: Price per token (0.01 to 0.99)
        size: Number of tokens to buy/sell
        side: BUY or SELL
        execute: If False, only shows what would be done (default: False)
    """
    if not polymarket_client:
        print("WARNING: Polymarket client not initialized")
        return None
    
    print(f"=== {'EXECUTING' if execute else 'EXAMPLE'} Limit Order ===")
    print(f"Side: {side}")
    print(f"Token ID: {token_id}")
    print(f"Price: ${price:.4f}")
    print(f"Size: {size} tokens")
    print(f"Total Cost: ${price * size:.2f} USDC")
    print()
    
    if not execute:
        print("WARNING: Order NOT executed (execute=False)")
        print("Set execute=True to place real order")
        return None
    
    try:
        # Create order arguments
        order_args = OrderArgs(
            price=price,
            size=size,
            side=side,
            token_id=token_id
        )
        
        # Sign the order
        signed_order = polymarket_client.create_order(order_args)
        
        # Post order as GTC (Good-Till-Cancelled)
        response = polymarket_client.post_order(signed_order, OrderType.GTC)
        
        print("[SUCCESS] Order placed successfully!")
        print(f"Order ID: {response.get('orderID', 'N/A')}")
        print(f"Status: {response.get('status', 'N/A')}")
        
        return response
        
    except Exception as e:
        print(f"ERROR: Error placing order: {e}")
        return None

# Example: Buy 5 tokens at $0.01 each (NOT executed)
example_order = place_limit_order_example(
    token_id="21742633143463906290569050155826241533067272736897614950488156847949938836455",
    price=0.01,
    size=5.0,
    side=BUY,
    execute=False  # Set to True to actually place the order
)

## 3 - NBA use case 

#### 3.1 Scan Kalshi for NBA Games

Sports --> Basketball --> Pro Basketball (M) --> Games

Series: KXNBAGAME

Event: {series}-{event_date - 1}{matchup} i.e. "KXNBAGAME-26JAN13CHIHOU"

Market: {event}-{winner} i.e. KXNBAGAME-26JAN13CHIHOU-HOU

In [None]:
def scan_kalshi_nba_games():
    """Scan Kalshi for NBA game markets and return as DataFrame."""
    import pandas as pd
    
    response = kalshi_get("/markets?series_ticker=KXNBAGAME&status=open&limit=100")
    if response.status_code != 200:
        print(f"ERROR: Failed to fetch NBA markets ({response.status_code})")
        return None
    
    markets = response.json().get('markets', [])
    if not markets:
        print("No NBA markets found")
        return pd.DataFrame()
    
    month_map = {
        'JAN': '01', 'FEB': '02', 'MAR': '03', 'APR': '04',
        'MAY': '05', 'JUN': '06', 'JUL': '07', 'AUG': '08',
        'SEP': '09', 'OCT': '10', 'NOV': '11', 'DEC': '12'
    }
    
    data = []
    for market in markets:
        ticker = market.get('ticker', '')
        volume = market.get('volume', 0)
        parts = ticker.split('-')
        
        # Parse date and matchup from ticker
        event_date = None
        matchup_str = market.get('title', '')[:30]
        yes_team = parts[-1] if len(parts) >= 3 else None  # Last part of ticker is winner (yes team)
        
        if len(parts) >= 3 and len(parts[1]) >= 7:
            event_part = parts[1]
            year = f"20{event_part[:2]}"
            month = month_map.get(event_part[2:5].upper(), '01')
            day = event_part[5:7]
            event_date = f"{year}-{month}-{day}"
            
            if len(event_part) >= 13:
                # Extract matchup from event_part (last 6 chars)
                matchup = event_part[-6:]
                matchup_str = f"{matchup[:3].upper()} vs {matchup[3:].upper()}"
        
        # Calculate pricing with fees
        yes_avg = no_avg = None
        try:
            market_response = kalshi_get(f"/markets/{ticker}")
            if market_response.status_code == 200:
                market_data = market_response.json().get('market', {})
                yes_ask = market_data.get('yes_ask')
                no_ask = market_data.get('no_ask')
                
                # Kalshi fee: 0.07 * C * P * (1-P), where C=10, P=price as decimal
                if yes_ask is not None:
                    p = yes_ask / 100
                    fee_cents = 0.07 * 10 * p * (1 - p) * 100
                    yes_avg = round((yes_ask + fee_cents / 10) / 100, 4)
                
                if no_ask is not None:
                    p = no_ask / 100
                    fee_cents = 0.07 * 10 * p * (1 - p) * 100
                    no_avg = round((no_ask + fee_cents / 10) / 100, 4)
        except Exception:
            pass
        
        data.append({
            'ticker': ticker,
            'matchup': matchup_str,
            'yes_team': yes_team,
            'date': event_date,
            'volume': float(volume),
            'yes_price': yes_avg,
            'no_price': no_avg
        })
    
    df = pd.DataFrame(data).sort_values(['date', 'volume'], ascending=[True, False])
    print(f"Found {len(df)} NBA markets on Kalshi\n")
    return df

nba_kalshi = scan_kalshi_nba_games()
nba_kalshi.head(10)

#### 3.2 Scan Polymarket for NBA Games
Sports --> NBA
Slug: /slug/nba-{team1}-{team2}-{date of game} i.e. "/slug/nba-phx-mia-2026-01-13"

In [None]:
# Debug: Check moneyline market details
import json

gamma_url = "https://gamma-api.polymarket.com/events"
params = {
    'series_id': 10345,
    'active': 'true',
    'closed': 'false',
    'limit': 1
}

response = requests.get(gamma_url, params=params)
if response.status_code == 200:
    events = response.json()
    event = events[0]
    slug = event.get('slug', '')
    print(f"Event: {slug}\n")
    
    markets = event.get('markets', [])
    
    # Find market with "Grizzlies vs. Lakers" or similar
    for market in markets:
        question = market.get('question', '')
        if ' vs ' in question.lower() or ' vs. ' in question.lower():
            if 'over' not in question.lower() and 'under' not in question.lower():
                outcomes_str = market.get('outcomes', '[]')
                outcomes = json.loads(outcomes_str) if isinstance(outcomes_str, str) else outcomes_str
                print(f"Question: {question}")
                print(f"Outcomes: {outcomes}")
                print()

In [None]:
def scan_polymarket_nba_games():
    """Scan Polymarket for NBA game markets and return as DataFrame."""
    import pandas as pd
    import json
    
    # Use events endpoint with NBA series_id (10345)
    gamma_url = "https://gamma-api.polymarket.com/events"
    params = {
        'series_id': 10345,
        'active': 'true',
        'closed': 'false',
        'limit': 100
    }
    
    response = requests.get(gamma_url, params=params)
    if response.status_code != 200:
        print(f"ERROR: Failed to fetch NBA events ({response.status_code})")
        return None
    
    events = response.json()
    if not events:
        print("No NBA events found")
        return pd.DataFrame()
    
    data = []
    for event in events:
        slug = event.get('slug', '')
        
        # Parse slug format: nba-team1-team2-YYYY-MM-DD
        event_date = None
        matchup_str = slug
        yes_team = None
        
        if slug.startswith('nba-'):
            parts = slug.split('-')
            if len(parts) >= 6:
                # Extract date (last 3 parts: YYYY-MM-DD)
                event_date = f"{parts[-3]}-{parts[-2]}-{parts[-1]}"
                # Extract teams
                team1 = parts[1].upper()
                team2 = parts[2].upper()
                matchup_str = f"{team1} vs {team2}"
        
        # Get markets for this event
        markets = event.get('markets', [])
        if not markets:
            continue
        
        # Find the moneyline/winner market by checking question text
        main_market = None
        for market in markets:
            question = market.get('question', '').lower()
            
            # Moneyline markets have simple "Team1 vs Team2" format
            # Exclude spread, total, over/under, and prop bet markets
            is_excluded = any(keyword in question for keyword in [
                'spread', 'total', 'over', 'under', 'o/u', 'points', 'score', 
                'rebounds', 'assists', '1h', 'first half', 'quarter', ':'
            ])
            
            # Check if it's a simple moneyline market (just team names, no props)
            if not is_excluded and (' vs ' in question or ' vs. ' in question):
                outcomes_str = market.get('outcomes', '[]')
                outcomes_test = json.loads(outcomes_str) if isinstance(outcomes_str, str) else outcomes_str
                # Moneyline market should have exactly 2 outcomes with team names
                if len(outcomes_test) == 2:
                    # Check if outcomes are team names (not Yes/No or Over/Under)
                    outcomes_lower = [str(o).lower() for o in outcomes_test]
                    if 'yes' not in outcomes_lower and 'over' not in outcomes_lower:
                        main_market = market
                        break
        
        if not main_market:
            # If no winner market found, skip this event
            continue
        
        volume = float(main_market.get('volume', 0))
        
        # Parse outcomes and prices
        outcomes_str = main_market.get('outcomes', '[]')
        outcomes = json.loads(outcomes_str) if isinstance(outcomes_str, str) else outcomes_str
        
        # For moneyline markets, outcomes are team names (e.g., ['Grizzlies', 'Lakers'])
        # Map to city abbreviation from slug (team1 or team2)
        if len(outcomes) >= 2:
            first_outcome = str(outcomes[0]).strip().lower()
            # Match outcome to team1 or team2 from slug and use the abbreviation
            if team1.lower() in first_outcome or first_outcome in team1.lower():
                yes_team = team1
            elif team2.lower() in first_outcome or first_outcome in team2.lower():
                yes_team = team2
            else:
                # Fallback: use team1 as default
                yes_team = team1
        
        prices_str = main_market.get('outcomePrices', '[]')
        prices = json.loads(prices_str) if isinstance(prices_str, str) else prices_str
        
        token_ids_str = main_market.get('clobTokenIds', '[]')
        token_ids = json.loads(token_ids_str) if isinstance(token_ids_str, str) else token_ids_str
        
        # Get orderbook prices for better precision
        prices_list = []
        for j, outcome in enumerate(outcomes):
            token_id = token_ids[j] if j < len(token_ids) else None
            fallback_price = float(prices[j]) if j < len(prices) else None
            
            price = fallback_price
            if token_id:
                try:
                    clob_url = f"https://clob.polymarket.com/book?token_id={token_id}"
                    ob_response = requests.get(clob_url, timeout=5)
                    if ob_response.status_code == 200:
                        ob_data = ob_response.json()
                        asks = ob_data.get('asks', [])
                        if asks:
                            # Calculate average for 10 contracts
                            sorted_asks = sorted(asks, key=lambda x: float(x['price']))
                            remaining = 10
                            total_cost = 0
                            for level in sorted_asks:
                                ask_price = float(level['price'])
                                size = float(level['size'])
                                fill = min(remaining, size)
                                total_cost += fill * ask_price
                                remaining -= fill
                                if remaining <= 0:
                                    break
                            if remaining <= 0:
                                price = total_cost / 10
                except Exception:
                    pass
            
            # Round to 4 decimals for precision
            if price is not None:
                price = round(price, 4)
            
            prices_list.append(price)
        
        # Assign first outcome as yes_price, second as no_price
        yes_price = prices_list[0] if len(prices_list) > 0 else None
        no_price = prices_list[1] if len(prices_list) > 1 else None
        
        data.append({
            'slug': slug,
            'matchup': matchup_str,
            'yes_team': yes_team,
            'date': event_date,
            'volume': volume,
            'yes_price': yes_price,
            'no_price': no_price
        })
    
    if not data:
        print("No NBA moneyline markets found")
        return pd.DataFrame()
    
    df = pd.DataFrame(data).sort_values(['date', 'volume'], ascending=[True, False])
    print(f"Found {len(df)} NBA events on Polymarket\n")
    return df

nba_polymarket = scan_polymarket_nba_games()
nba_polymarket.head(10)

#### 3.3 Compare NBA Markets

In [None]:
print("=== KALSHI NBA GAMES ===")
print(f"Total markets: {len(nba_kalshi)}")
print(f"\nSample (first 5):")
print(nba_kalshi[['matchup', 'yes_team', 'date', 'yes_price', 'no_price']].head(5))

print("\n\n=== POLYMARKET NBA GAMES ===")
print(f"Total events: {len(nba_polymarket)}")
print(f"\nSample (first 5):")
print(nba_polymarket[['matchup', 'yes_team', 'date', 'yes_price', 'no_price']].head(5))

#### 3.4 Join Datasets 

In [None]:
def join_nba_markets(kalshi_df, polymarket_df):
    """Join Kalshi and Polymarket NBA markets by date and matchup.
    
    Handles Kalshi having 2 markets per game (one for each team).
    Returns DataFrame with both platforms' prices.
    """
    import pandas as pd
    
    if kalshi_df is None or polymarket_df is None or kalshi_df.empty or polymarket_df.empty:
        print("ERROR: Missing or empty DataFrames")
        return pd.DataFrame()
    
    # Normalize matchups for joining (format: "TEAM1 vs TEAM2" alphabetically sorted)
    def normalize_matchup(matchup_str):
        """Extract and sort teams for consistent matching."""
        if pd.isna(matchup_str) or not matchup_str:
            return None
        parts = matchup_str.upper().replace(' VS ', ' vs ').split(' vs ')
        if len(parts) != 2:
            return None
        teams = sorted([t.strip() for t in parts])
        return f"{teams[0]} vs {teams[1]}"
    
    # Create working copies with normalized matchups
    kalshi_work = kalshi_df.copy()
    polymarket_work = polymarket_df.copy()
    
    kalshi_work['matchup_norm'] = kalshi_work['matchup'].apply(normalize_matchup)
    polymarket_work['matchup_norm'] = polymarket_work['matchup'].apply(normalize_matchup)
    
    # Group Kalshi markets by game (date + matchup)
    kalshi_grouped = kalshi_work.groupby(['date', 'matchup_norm'])
    
    # Build joined data
    joined_data = []
    
    for poly_idx, poly_row in polymarket_work.iterrows():
        date = poly_row['date']
        matchup_norm = poly_row['matchup_norm']
        
        if pd.isna(date) or pd.isna(matchup_norm):
            continue
        
        # Find matching Kalshi markets for this game
        try:
            kalshi_game = kalshi_grouped.get_group((date, matchup_norm))
        except KeyError:
            # No matching Kalshi game
            continue
        
        # Polymarket data
        poly_slug = poly_row['slug']
        poly_yes_team = poly_row['yes_team']
        poly_yes_price = poly_row['yes_price']
        poly_no_price = poly_row['no_price']
        poly_volume = poly_row['volume']
        
        # Find Kalshi market that matches Polymarket's yes_team
        primary_kalshi = kalshi_game[kalshi_game['yes_team'] == poly_yes_team]
        alt_kalshi = kalshi_game[kalshi_game['yes_team'] != poly_yes_team]
        
        # Extract Kalshi prices
        kalshi_ticker = None
        kalshi_yes_price = None
        kalshi_no_price = None
        kalshi_volume = None
        
        kalshi_ticker_alt = None
        kalshi_yes_price_alt = None
        kalshi_no_price_alt = None
        
        if not primary_kalshi.empty:
            primary = primary_kalshi.iloc[0]
            kalshi_ticker = primary['ticker']
            kalshi_yes_price = primary['yes_price']
            kalshi_no_price = primary['no_price']
            kalshi_volume = primary['volume']
        
        kalshi_volume_alt = None
        
        if not alt_kalshi.empty:
            alt = alt_kalshi.iloc[0]
            kalshi_ticker_alt = alt['ticker']
            kalshi_yes_price_alt = alt['yes_price']
            kalshi_no_price_alt = alt['no_price']
            kalshi_volume_alt = alt['volume']
        
        joined_data.append({
            'date': date,
            'matchup': poly_row['matchup'],
            'yes_team': poly_yes_team,
            'polymarket_slug': poly_slug,
            'kalshi_ticker': kalshi_ticker,
            'kalshi_ticker_alt': kalshi_ticker_alt,
            'poly_yes': poly_yes_price,
            'poly_no': poly_no_price,
            'kalshi_yes': kalshi_yes_price,
            'kalshi_no': kalshi_no_price,
            'kalshi_yes_alt': kalshi_yes_price_alt,
            'kalshi_no_alt': kalshi_no_price_alt,
            'poly_volume': poly_volume,
            'kalshi_volume': kalshi_volume,
            'kalshi_volume_alt': kalshi_volume_alt
        })
    
    df = pd.DataFrame(joined_data).sort_values(['date', 'poly_volume'], ascending=[True, False])
    print(f"Matched {len(df)} games between Kalshi and Polymarket\n")
    return df

# Join the markets
nba_joined = join_nba_markets(nba_kalshi, nba_polymarket)
nba_joined.head(10)

In [None]:
def clean_nba_markets(joined_df):
    """Remove games with invalid prices (0 or 1) and past games.
    
    Args:
        joined_df: DataFrame with joined NBA markets
        
    Returns:
        Cleaned DataFrame
    """
    import pandas as pd
    from datetime import datetime
    
    if joined_df is None or joined_df.empty:
        print("ERROR: Empty DataFrame")
        return pd.DataFrame()
    
    df = joined_df.copy()
    initial_count = len(df)
    
    # Get today's date
    today = datetime.now().date()
    
    # Filter out past games
    df['date_obj'] = pd.to_datetime(df['date']).dt.date
    df = df[df['date_obj'] >= today]
    
    # Filter out games with any price equal to 0 or 1
    price_cols = ['poly_yes', 'poly_no', 'kalshi_yes', 'kalshi_no', 'kalshi_yes_alt', 'kalshi_no_alt']
    
    for col in price_cols:
        df = df[~((df[col] == 0) | (df[col] == 1))]
    
    # Drop temporary date column
    df = df.drop(columns=['date_obj'])
    
    removed_count = initial_count - len(df)
    print(f"Removed {removed_count} games (past dates or invalid prices)")
    print(f"Remaining: {len(df)} games\n")
    
    return df

# Clean the joined markets
nba_joined_clean = clean_nba_markets(nba_joined)
nba_joined_clean.head(10)

#### 3.5 Calculate Arbitrage Lines

In [None]:
def calculate_arbitrage(joined_df):
    """Calculate arbitrage opportunities from joined NBA markets.
    
    Adds three columns:
    - arbitrage_possible: True if any combination totals < 1.0
    - best_combo: Description of the winning combination
    - profit: Profit percentage (1 - min_combo) * 100
    """
    import pandas as pd
    
    if joined_df is None or joined_df.empty:
        print("ERROR: Empty DataFrame")
        return pd.DataFrame()
    
    df = joined_df.copy()
    
    arbitrage_data = []
    
    for idx, row in df.iterrows():
        poly_yes = row['poly_yes']
        poly_no = row['poly_no']
        kalshi_yes = row['kalshi_yes']
        kalshi_no = row['kalshi_no']
        kalshi_yes_alt = row['kalshi_yes_alt']
        kalshi_no_alt = row['kalshi_no_alt']
        
        # Calculate all 6 specified arbitrage combinations
        combos = {}
        
        if pd.notna(poly_yes) and pd.notna(kalshi_no):
            combos['poly_yes + kalshi_no'] = poly_yes + kalshi_no
        
        if pd.notna(poly_no) and pd.notna(kalshi_yes):
            combos['poly_no + kalshi_yes'] = poly_no + kalshi_yes
        
        if pd.notna(poly_yes) and pd.notna(kalshi_yes_alt):
            combos['poly_yes + kalshi_yes_alt'] = poly_yes + kalshi_yes_alt
        
        if pd.notna(poly_no) and pd.notna(kalshi_no_alt):
            combos['poly_no + kalshi_no_alt'] = poly_no + kalshi_no_alt
        
        if pd.notna(kalshi_yes) and pd.notna(kalshi_yes_alt):
            combos['kalshi_yes + kalshi_yes_alt'] = kalshi_yes + kalshi_yes_alt
        
        if pd.notna(kalshi_no) and pd.notna(kalshi_no_alt):
            combos['kalshi_no + kalshi_no_alt'] = kalshi_no + kalshi_no_alt
        
        # Find minimum combination
        if combos:
            best_combo_name = min(combos, key=combos.get)
            min_combo = combos[best_combo_name]
            arbitrage_possible = min_combo < 1.0
            profit_pct = (1.0 - min_combo) * 100
        else:
            best_combo_name = None
            min_combo = None
            arbitrage_possible = False
            profit_pct = None
        
        arbitrage_data.append({
            'arbitrage_possible': arbitrage_possible,
            'best_combo': best_combo_name,
            'profit': profit_pct
        })
    
    # Add columns to DataFrame using .values to avoid index misalignment
    arb_df = pd.DataFrame(arbitrage_data)
    df['arbitrage_possible'] = arb_df['arbitrage_possible'].values
    df['best_combo'] = arb_df['best_combo'].values
    df['profit'] = arb_df['profit'].values
    
    return df

# Calculate arbitrage and display top 10 by profit
nba_arb = calculate_arbitrage(nba_joined_clean)
nba_arb_sorted = nba_arb.sort_values('profit', ascending=False)
print("=== Top 10 Arbitrage Opportunities ===\n")
# Display all columns except slugs, tickers, and volumes
display_cols = ['date', 'matchup', 'yes_team', 'poly_yes', 'poly_no', 'kalshi_yes', 'kalshi_no', 
                'kalshi_yes_alt', 'kalshi_no_alt', 'arbitrage_possible', 'best_combo', 'profit']
nba_arb_sorted[display_cols].head(10)

In [None]:
# Clear old variable and recalculate
if 'nba_arb' in globals():
    del nba_arb
    
if 'nba_arb_sorted' in globals():
    del nba_arb_sorted

# Calculate arbitrage
nba_arb = calculate_arbitrage(nba_joined_clean)
nba_arb_sorted = nba_arb.sort_values('profit', ascending=False)

print("\n=== Verification: Check actual nba_arb values ===")
for team1, team2 in [('MIN', 'HOU'), ('BOS', 'MIA'), ('MIL', 'SAS')]:
    row = nba_arb[nba_arb['matchup'].str.contains(team1) & nba_arb['matchup'].str.contains(team2)]
    if not row.empty:
        r = row.iloc[0]
        print(f"{r['matchup']}: best_combo={r['best_combo']}, profit={r['profit']:.2f}%")

print("\n=== Top 10 Arbitrage Opportunities ===\n")
display_cols = ['date', 'matchup', 'yes_team', 'poly_yes', 'poly_no', 'kalshi_yes', 'kalshi_no', 
                'kalshi_yes_alt', 'kalshi_no_alt', 'arbitrage_possible', 'best_combo', 'profit']
nba_arb_sorted[display_cols].head(10)