# Pull political market data from Polymarket API

In [2]:
import requests
import pandas as pd
import json
import csv
from datetime import datetime, timedelta


# Get possible tags to find politics
Used ChatGPT to quickly pull tags

In [4]:
import requests
import csv

# Base URL for fetching tags
base_url = "https://gamma-api.polymarket.com/tags"

# Pagination parameters
offset = 0
limit = 100  # Adjust based on API docs if necessary
all_tags = []

while True:
    response = requests.get(base_url, params={"offset": offset, "limit": limit})
    
    if response.status_code != 200:
        print(f"Failed to retrieve tags at offset {offset}. Status code: {response.status_code}")
        break

    tags = response.json()
    
    if not tags:  # Stop if no more tags are returned
        break

    all_tags.extend(tags)  # Collect tags
    offset += limit  # Move to the next batch

# Save to CSV
csv_filename = "polymarket_tags.csv"
with open(csv_filename, mode="w", newline="", encoding="utf-8") as file:
    writer = csv.writer(file)
    writer.writerow(["Tag ID", "Tag Name"])  # Header row
    for tag in all_tags:
        tag_id = tag.get("id", "N/A")  # Default to "N/A" if missing
        tag_label = tag.get("label") or tag.get("name", "Unknown")  # Try "label", fallback to "name" or "Unknown"
        
        writer.writerow([tag_id, tag_label])

print(f"Saved {len(all_tags)} tags to {csv_filename}")


Saved 3395 tags to polymarket_tags.csv


# Pull markets
Take all markets with politics tags since 2024 that are closed

In [40]:
politics_data = []
offset = 0

while offset < 10000:
    response = requests.get("https://gamma-api.polymarket.com/markets", params={"start_date_min":"2024-01-16T00:00:00Z",
                                                                        "closed":"true",
                                                                               "tag_id":2,
                                                                                "related_tags":"true",
                                                                               "limit":500,
                                                                               "offset":offset})
    result_dict = response.json()
    
    for market in result_dict:
        market_dict = {}

        try:
            market_dict['conditionId'] = market['conditionId']
            market_dict['question'] = market['question']
            market_dict['endDate'] = market['endDate']
            market_dict['createdAt'] = market['createdAt']
            market_dict['closedTime'] = market['closedTime']
            market_dict['volume'] = market['volume']
            market_dict['outcomes'] = market['outcomes']
            market_dict['clobTokenIds'] = json.loads(market['clobTokenIds'])

        except:
            print(f"Market oopsie:{market_dict['question']}")

        politics_data.append(market_dict)

    if not response.json():
        break

    offset+=500
    print(offset)
                                                                    
                                                                    

Market oopsie:Will Joe Biden be President of the United States on June 1, 2024?
500
Market oopsie:Israel ground offensive in Rafah between April 13-19?
1000
Market oopsie:Will Biden say "Old" during his speech?
Market oopsie:Will Trump say "DJT" or "Donald Trump Coin" during his Bitcoin Conference speech?
1500
2000
Market oopsie:Will Kamala Harris say "trickle-down" during her Pennsylvania speech?
Market oopsie:Will Kamala Harris say "Michelle" during her Pennsylvania speech?
Market oopsie:Will Kamala Harris say "middle class" 3 or more times during her Pennsylvania speech?
Market oopsie:Will Kamala Harris say "frack" during her Pennsylvania speech?
Market oopsie:Will Kamala Harris say "not going back" 3 or more times during her Pennsylvania speech?
Market oopsie:Will Kamala Harris say "turn the page" during her Pennsylvania speech?
Market oopsie:Will Kamala Harris say "new deal" during her Pennsylvania speech?
Market oopsie:Will Kamala Harris say "first-time homebuyer" during her Penn

In [4]:
pd.DataFrame(politics_data).to_csv("sample.csv", index=False)

In [24]:
politics_data[:2]

[{'question': 'Mexican Presidential Election: Will Claudia Sheinbaum win?',
  'endDate': '2024-05-30T00:00:00Z',
  'createdAt': '2024-01-08T17:51:23.211Z',
  'closedTime': '2024-06-03 12:46:24+00',
  'volume': '747925.287237',
  'outcomes': '["Yes", "No"]',
  'clobTokenIds': ['6612185329411315995201911495298659700662906692479419017253589525535844894016',
   '105101337642125497479772130530194665111546952858742124983152538530078566129631']},
 {'question': 'Mexican Presidential Election: Will Xóchitl Gálvez win?',
  'endDate': '2024-05-30T00:00:00Z',
  'createdAt': '2024-01-08T17:51:23.253Z',
  'closedTime': '2024-06-03 12:56:24+00',
  'volume': '686502.79597',
  'outcomes': '["Yes", "No"]',
  'clobTokenIds': ['22630580583450969756139089277784718260497974893003428673446784801108452526510',
   '83023413016163749793663256320711862681244617352870508189641651193199858076904']}]

## Pull other metadata
Get previous price data and winning outcome for each event

In [53]:
counter =0
updated_markets = politics_data
for market in updated_markets:
    counter += 1
    if counter % 100 == 0:
        print(f"Processing market {counter}")
        
    # Add new lists to store prices for each outcome
    market['prev_prices'] = []
    
    # Check if market has the required fields
    if 'clobTokenIds' not in market or 'outcomes' not in market:
        print(f"Missing required fields for market: {market.get('question', 'Unknown')}")
        continue
        
    # Safely get token IDs
    token_ids = market['clobTokenIds']
    if not token_ids:  # Check if empty
        print(f"No token IDs for market: {market.get('question', 'Unknown')}")
        continue
        
    for token_id in token_ids:
        try:       
            # Make the API request
            response = requests.get(
                "https://clob.polymarket.com/prices-history",
                params={
                    "market": token_id,
                    "interval": "max",
                    "fidelity": 1440
                },
                timeout=10  # Add timeout to prevent hanging
            )
            
            # Check if request was successful
            response.raise_for_status()
            
            price_data = response.json()
            
            # Check if we have enough history data
            if len(price_data['history']) >= 2:
                prev_price = price_data['history'][-2]['p']
            else:
                prev_price = None
                
            market['prev_prices'].append(prev_price)
            
        except requests.exceptions.RequestException as e:
            print(f"Error fetching data for market: {market['question']}")
            print(f"Token ID: {token_id}")
            print(f"Error: {str(e)}")
            market['prev_prices'].append(None)
            continue
            
    try:
        # Get market data using condition_id
        response = requests.get(f"https://clob.polymarket.com/markets/{market['conditionId']}")
        market_data = response.json()
    except:
        print(f"No request id for {market['question']}")
        
    # Find the winning token
    for token in market_data['tokens']:
        if token['winner'] == True: 
            market['winning_token_id'] = token['token_id']
            market['winning_outcome'] = token['outcome']


Processing market 100
Missing required fields for market: Will Joe Biden be President of the United States on June 1, 2024?
Processing market 200
Processing market 300
Processing market 400
Processing market 500
Processing market 600
Processing market 700
Missing required fields for market: Israel ground offensive in Rafah between April 13-19?
Processing market 800
Processing market 900
Processing market 1000
Missing required fields for market: Will Biden say "Old" during his speech?
Processing market 1100
Processing market 1200
Missing required fields for market: Will Trump say "DJT" or "Donald Trump Coin" during his Bitcoin Conference speech?
Processing market 1300
Processing market 1400
Processing market 1500
Processing market 1600
Processing market 1700
Processing market 1800
Processing market 1900
Processing market 2000
Processing market 2100
Processing market 2200
Processing market 2300
Missing required fields for market: Will Kamala Harris say "trickle-down" during her Pennsylva

## Write to CSV

In [55]:
pd.DataFrame(updated_markets).to_csv("raw_pull.csv")

In [94]:
updated_markets[1]['winning_token_id']

'83023413016163749793663256320711862681244617352870508189641651193199858076904'

## Clean dictionary

Get new keys with winning outcomes, price of actual outcome and other characteristics for analysis

In [106]:
for entry in updated_markets:
    if 'winning_token_id' in entry:
        if entry['winning_token_id'] in entry['clobTokenIds']:
            winning_index = entry['clobTokenIds'].index(entry['winning_token_id'])
        else:
            winning_index = -1  # If the winning_outcome is not in outcomes, set index to -1 (error case)
    
        # Only add correct_prediction if the winning_index is valid
        if winning_index != -1 and None  not in entry['prev_prices']:
            try:
                entry['correct_prediction'] = entry['prev_prices'][winning_index] > 0.5
                entry['prediction_price'] = entry['prev_prices'][winning_index]

            except:
                print(entry)
        else:
            entry['correct_prediction'] = None  # Handle the error case

     


In [107]:
# output to CSV
pd.DataFrame(updated_markets).to_csv("outcomes.csv", index=False)