# CEQR API Test Notebook

This notebook tests the CEQR (City Environmental Quality Review) API call using Python requests.

The API endpoint is: `https://a002-ceqraccess.nyc.gov/ceqr/`

This is a POST request with form data that searches for CEQR projects by block and lot.


In [None]:
import requests
from urllib.parse import unquote, urlencode
import json
from bs4 import BeautifulSoup
import pandas as pd
import re

print("‚úÖ Imports successful")


## API Request Configuration

Setting up the URL, headers, and body from the fetch call.


In [None]:
# API endpoint
url = "https://a002-ceqraccess.nyc.gov/ceqr/"

# Minimal headers - only what's essential
headers = {
    "content-type": "application/x-www-form-urlencoded",
    "referer": "https://a002-ceqraccess.nyc.gov/ceqr/"
}

# Minimal body - ONLY borough, block, and lot (no VIEWSTATE, no other fields)
form_data = {
    "ctl00$MainContent$ddlBorough": "Brooklyn",
    "ctl00$MainContent$txtBlock": "7061",
    "ctl00$MainContent$txtLot": "27"
}

body = urlencode(form_data)

print(f"URL: {url}")
print(f"Headers: {headers}")
print(f"Body: {body}")
print(f"\n‚úÖ Minimal request configured with only borough, block, and lot")


## Make the POST Request

Sending the POST request with the configured headers and body.


In [None]:
# Make the POST request with minimal data
try:
    response = requests.post(url, headers=headers, data=body, timeout=30)
    
    print(f"Status Code: {response.status_code}")
    print(f"Response Content Length: {len(response.content)} bytes")
    
    # Check if request was successful
    if response.status_code == 200:
        print("\n‚úÖ Request successful!")
        
        # Check for error pages
        if 'Error' in response.text or 'Unhandled' in response.text:
            print("‚ö†Ô∏è  Response contains error page")
        elif 'grdSearchResults' in response.text or 'Search Results' in response.text:
            print("‚úÖ Found search results in response!")
        else:
            print("üìÑ Response received (checking content...)")
        
        # Show first 500 characters to see what we got
        print("\n--- First 500 characters of response ---")
        print(response.text[:500])
        
        # Check if we can find any CEQR numbers or project info
        ceqr_pattern = r'\d{2}[A-Z]{3}\d{3}[A-Z]'
        ceqr_matches = re.findall(ceqr_pattern, response.text)
        if ceqr_matches:
            print(f"\n‚úÖ Found CEQR numbers: {set(ceqr_matches)}")
        
    else:
        print(f"\n‚ùå Request failed with status code: {response.status_code}")
        print(f"Response: {response.text[:500]}")
        
except requests.exceptions.RequestException as e:
    print(f"‚ùå Error making request: {e}")
except Exception as e:
    print(f"‚ùå Unexpected error: {e}")


## Parse Response Table

Parse the HTML response to extract the CEQR results table with CEQR Number, Project Name, and Project Description.


In [None]:
# Parse the HTML response to extract the table data
from bs4 import BeautifulSoup
import pandas as pd

# Use the response from cell 5 (or cell 9 if that worked better)
# For now, we'll re-run the request to get fresh data
try:
    # Make the request again to get the response
    response = requests.post(url, headers=headers, data=body, timeout=30)
    
    if response.status_code == 200:
        print("‚úÖ Got response, parsing HTML...")
        
        # Parse HTML with BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Find the results table - look for table with id containing "grdSearchResults" or similar
        # The table might be in a grid or have a specific structure
        table = soup.find('table', {'id': lambda x: x and 'grdSearchResults' in x})
        
        if not table:
            # Try finding any table that might contain the results
            tables = soup.find_all('table')
            print(f"Found {len(tables)} tables in the page")
            
            # Look for table with headers containing "CEQR Number"
            for t in tables:
                headers_text = t.get_text()
                if 'CEQR Number' in headers_text or 'Project Name' in headers_text:
                    table = t
                    print(f"‚úÖ Found results table")
                    break
        
        if table:
            print("‚úÖ Found results table, extracting data...")
            
            # Extract table rows
            rows = table.find_all('tr')
            print(f"Found {len(rows)} rows in table")
            
            # Extract headers
            header_row = rows[0] if rows else None
            if header_row:
                headers_list = [th.get_text(strip=True) for th in header_row.find_all(['th', 'td'])]
                print(f"Headers: {headers_list}")
            
            # Extract data rows
            data_rows = []
            for row in rows[1:]:  # Skip header row
                cells = row.find_all(['td', 'th'])
                if cells:
                    row_data = [cell.get_text(strip=True) for cell in cells]
                    # Only add rows that have data (not empty)
                    if any(cell.strip() for cell in row_data):
                        data_rows.append(row_data)
            
            if data_rows:
                print(f"\n‚úÖ Extracted {len(data_rows)} data rows")
                
                # Create DataFrame
                if header_row:
                    df = pd.DataFrame(data_rows, columns=headers_list[:len(data_rows[0])] if len(headers_list) >= len(data_rows[0]) else None)
                else:
                    df = pd.DataFrame(data_rows)
                
                # Display the results
                print("\n" + "=" * 80)
                print("CEQR Search Results:")
                print("=" * 80)
                print(df.to_string(index=False))
                
                # Also show as a more readable format
                print("\n" + "=" * 80)
                print("Results Summary:")
                print("=" * 80)
                for idx, row in df.iterrows():
                    print(f"\nRow {idx + 1}:")
                    for col in df.columns:
                        if pd.notna(row[col]) and str(row[col]).strip():
                            print(f"  {col}: {row[col]}")
            else:
                print("‚ö†Ô∏è  No data rows found in table")
                # Show the table HTML structure for debugging
                print("\nTable HTML structure:")
                print(str(table)[:1000])
        else:
            print("‚ö†Ô∏è  Could not find results table")
            print("\nSearching for CEQR-related content...")
            
            # Try to find CEQR numbers in the text
            ceqr_pattern = r'\d{2}[A-Z]{3}\d{3}[A-Z]'
            ceqr_matches = re.findall(ceqr_pattern, response.text)
            if ceqr_matches:
                print(f"Found CEQR numbers in text: {set(ceqr_matches)}")
            
            # Show a sample of the HTML to help debug
            print("\nSample HTML (first 2000 chars):")
            print(response.text)
    else:
        print(f"‚ùå Request failed: {response.status_code}")
        
except Exception as e:
    print(f"‚ùå Error parsing response: {e}")
    import traceback
    traceback.print_exc()


Making new request (response from cell 5 not found)...
‚ö†Ô∏è  Note: Make sure you've run cells 1, 3, and 5 first!
‚ùå Error: Missing required variables: url, headers, body
   Please run cell 3 first to set up url, headers, and body.
‚ùå Error parsing response: Missing variables: url, headers, body


Traceback (most recent call last):
  File "/var/folders/cy/lk1n_dqn4gj0n_tsqkvpx_cw0000gn/T/ipykernel_34405/846701929.py", line 38, in <module>
    raise NameError(f"Missing variables: {', '.join(missing_vars)}")
NameError: Missing variables: url, headers, body
