In [None]:
import requests
import pandas as pd
import time
from datetime import datetime, timedelta
import json
import os

class PolymarketOrderBookScraper:
    def __init__(self, output_dir="polymarket_orderbook_data"):
        self.clob_url = "https://clob.polymarket.com"
        self.gamma_url = "https://gamma-api.polymarket.com"
        self.headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36"
        }
        self.output_dir = output_dir
        os.makedirs(output_dir, exist_ok=True)
    
    def get_markets(self, include_closed=True):
        """
        Get all markets from Gamma API
        
        Args:
            include_closed (bool): Whether to include closed markets
            
        Returns:
            list: List of market data
        """
        endpoint = "/events"
        params = {"closed": "true" if include_closed else "false"}
        
        response = requests.get(f"{self.gamma_url}{endpoint}", params=params, headers=self.headers)
        
        if response.status_code == 200:
            return response.json()
        else:
            print(f"Error fetching markets: {response.status_code}")
            return []
    
    def get_market_details(self, market_id):
        """
        Get detailed market information from CLOB API
        
        Args:
            market_id (str): Market ID
            
        Returns:
            dict: Market details
        """
        endpoint = f"/markets/{market_id}"
        
        response = requests.get(f"{self.clob_url}{endpoint}", headers=self.headers)
        
        if response.status_code == 200:
            return response.json()
        else:
            print(f"Error fetching market details for {market_id}: {response.status_code}")
            return {}
    
    def get_order_book(self, market_address, outcome_id, depth=2):
        """
        Get current order book for a specific market outcome
        
        Args:
            market_address (str): Market contract address
            outcome_id (str): Outcome ID
            depth (int): Depth of order book to retrieve
            
        Returns:
            dict: Order book data
        """
        endpoint = "/orderbook"
        params = {
            "marketId": market_address,
            "outcomeId": outcome_id,
            "depth": depth
        }
        
        response = requests.get(f"{self.clob_url}{endpoint}", params=params, headers=self.headers)
        
        if response.status_code == 200:
            data = response.json()
            # Add timestamp to the response
            data['timestamp'] = datetime.now().isoformat()
            return data
        else:
            print(f"Error fetching order book for {market_address}/{outcome_id}: {response.status_code}")
            return {}
    
    def get_historical_trades(self, market_address, outcome_id, limit=1000):
        """
        Get historical trades for a specific market outcome
        
        Args:
            market_address (str): Market contract address
            outcome_id (str): Outcome ID
            limit (int): Number of trades to retrieve
            
        Returns:
            list: Trade history
        """
        endpoint = "/trades"
        params = {
            "marketId": market_address,
            "outcomeId": outcome_id,
            "limit": limit
        }
        
        response = requests.get(f"{self.clob_url}{endpoint}", params=params, headers=self.headers)
        
        if response.status_code == 200:
            return response.json()
        else:
            print(f"Error fetching trade history for {market_address}/{outcome_id}: {response.status_code}")
            return []
    
    def get_historical_candles(self, market_address, outcome_id, resolution="1m", from_date=None, to_date=None, limit=60):
        """
        Get historical price candles for a specific market outcome
        
        Args:
            market_address (str): Market contract address
            outcome_id (str): Outcome ID
            resolution (str): Candle resolution ('1m', '5m', '15m', '1h', '4h', '1d')
            from_date (str): Start date in ISO format
            to_date (str): End date in ISO format
            limit (int): Maximum number of candles to retrieve
            
        Returns:
            list: Candle data
        """
        endpoint = "/candlesticks"
        params = {
            "marketId": market_address,
            "outcomeId": outcome_id,
            "resolution": resolution,
            "limit": limit
        }
        
        if from_date:
            from_ts = int(datetime.fromisoformat(from_date).timestamp() * 1000)
            params["from"] = from_ts
            
        if to_date:
            to_ts = int(datetime.fromisoformat(to_date).timestamp() * 1000)
            params["to"] = to_ts
        
        response = requests.get(f"{self.clob_url}{endpoint}", params=params, headers=self.headers)
        
        if response.status_code == 200:
            return response.json()
        else:
            print(f"Error fetching candles for {market_address}/{outcome_id}: {response.status_code}")
            return []
    
    def get_market_volume_history(self, market_id, resolution="1d", limit=365):
        """
        Get market volume history
        
        Args:
            market_id (str): Market ID
            resolution (str): Time resolution ('1d', '1w', '1M')
            limit (int): Number of data points to retrieve
            
        Returns:
            list: Volume history data
        """
        endpoint = f"/markets/{market_id}/volume-history"
        params = {
            "resolution": resolution,
            "limit": limit
        }
        
        response = requests.get(f"{self.clob_url}{endpoint}", params=params, headers=self.headers)
        
        if response.status_code == 200:
            return response.json()
        else:
            print(f"Error fetching volume history for {market_id}: {response.status_code}")
            return []
    
    def scrape_historical_data(self, include_closed=True, candle_resolutions=["1h", "1d"]):
        """
        Scrape historical data for all markets
        
        Args:
            include_closed (bool): Whether to include closed markets
            candle_resolutions (list): List of time resolutions to fetch
            
        Returns:
            dict: Dictionary of market data
        """
        print("Fetching markets...")
        markets_data = self.get_markets(include_closed=include_closed)
        
        # Extract events that contain markets
        events = markets_data.get("events", [])
        
        print(f"Found {len(events)} events.")
        
        all_data = {}
        
        for event_idx, event in enumerate(events):
            event_id = event.get("eventId")
            event_name = event.get("eventName", "Unknown Event")
            
            print(f"Processing event {event_idx+1}/{len(events)}: {event_name}")
            
            markets = event.get("markets", [])
            
            for market_idx, market in enumerate(markets):
                market_id = market.get("marketId")
                market_question = market.get("marketQuestion", "Unknown Market")
                
                print(f"  Processing market {market_idx+1}/{len(markets)}: {market_question}")
                
                # Get detailed market info from CLOB API
                market_details = self.get_market_details(market_id)
                market_address = market_details.get("marketAddress")
                
                if not market_address:
                    print(f"  No market address found, skipping...")
                    continue
                
                # Create market directory
                market_dir = f"{self.output_dir}/{market_id}"
                os.makedirs(market_dir, exist_ok=True)
                
                # Save market details
                with open(f"{market_dir}/market_details.json", 'w') as f:
                    json.dump(market_details, f, indent=2)
                
                # Get market volume history
                volume_history = self.get_market_volume_history(market_id)
                
                if volume_history:
                    volume_df = pd.DataFrame(volume_history)
                    volume_df.to_csv(f"{market_dir}/volume_history.csv", index=False)
                
                outcomes = market_details.get("outcomes", [])
                
                for outcome in outcomes:
                    outcome_id = outcome.get("outcomeId")
                    outcome_name = outcome.get("outcomeName", "Unknown")
                    
                    print(f"    Processing outcome: {outcome_name}")
                    
                    # Create outcome directory
                    outcome_dir = f"{market_dir}/{outcome_id}"
                    os.makedirs(outcome_dir, exist_ok=True)
                    
                    # Get order book
                    order_book = self.get_order_book(market_address, outcome_id)
                    
                    if order_book:
                        with open(f"{outcome_dir}/current_orderbook.json", 'w') as f:
                            json.dump(order_book, f, indent=2)
                    
                    # Get trade history
                    trades = self.get_historical_trades(market_address, outcome_id)
                    
                    if trades:
                        trades_df = pd.DataFrame(trades)
                        trades_df.to_csv(f"{outcome_dir}/trade_history.csv", index=False)
                    
                    # Get historical candles for different resolutions
                    for resolution in candle_resolutions:
                        candles = self.get_historical_candles(market_address, outcome_id, resolution=resolution)
                        
                        if candles:
                            candles_df = pd.DataFrame(candles)
                            candles_df['timestamp'] = pd.to_datetime(candles_df['timestamp'], unit='ms')
                            candles_df.to_csv(f"{outcome_dir}/candles_{resolution}.csv", index=False)
                    
                    # Be nice to the API
                    time.sleep(1)
                
                # Be nice to the API
                time.sleep(2)
            
            # Save event data
            with open(f"{self.output_dir}/event_{event_id}.json", 'w') as f:
                json.dump(event, f, indent=2)
            
            # Be nice to the API
            time.sleep(2)
        
        print(f"Scraping complete. Data saved to {self.output_dir} directory.")
        return all_data

# def main():
#     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
#     scraper = PolymarketOrderBookScraper(output_dir=f"polymarket_data_{timestamp}")
    
#     # Scrape historical data including closed markets and multiple candle resolutions
#     scraper.scrape_historical_data(
#         include_closed=True,
#         candle_resolutions=["1h", "4h", "1d"]
#     )

# if __name__ == "__main__":
#     main()