# CEQR API Test Notebook

This notebook tests the CEQR (City Environmental Quality Review) API call using Python requests.

The API endpoint is: `https://a002-ceqraccess.nyc.gov/ceqr/`

This is a POST request with form data that searches for CEQR projects by block and lot.


In [1]:
import requests
from urllib.parse import unquote, urlencode
import json
from bs4 import BeautifulSoup
import pandas as pd
import re

print("‚úÖ Imports successful")


‚úÖ Imports successful


## API Request Configuration

Setting up the URL, headers, and body from the fetch call.


In [2]:
# API endpoint
url = "https://a002-ceqraccess.nyc.gov/ceqr/"

# Minimal headers - only what's essential
headers = {
    "content-type": "application/x-www-form-urlencoded",
    "referer": "https://a002-ceqraccess.nyc.gov/ceqr/"
}

# Minimal body - ONLY borough, block, and lot (no VIEWSTATE, no other fields)
form_data = {
    "ctl00$MainContent$ddlBorough": "Brooklyn",
    "ctl00$MainContent$txtBlock": "7061",
    "ctl00$MainContent$txtLot": "27"
}

body = urlencode(form_data)

print(f"URL: {url}")
print(f"Headers: {headers}")
print(f"Body: {body}")
print(f"\n‚úÖ Minimal request configured with only borough, block, and lot")


URL: https://a002-ceqraccess.nyc.gov/ceqr/
Headers: {'content-type': 'application/x-www-form-urlencoded', 'referer': 'https://a002-ceqraccess.nyc.gov/ceqr/'}
Body: ctl00%24MainContent%24ddlBorough=Brooklyn&ctl00%24MainContent%24txtBlock=7061&ctl00%24MainContent%24txtLot=27

‚úÖ Minimal request configured with only borough, block, and lot


## Make the POST Request

Sending the POST request with the configured headers and body.


In [3]:
# Make the POST request with minimal data
try:
    response = requests.post(url, headers=headers, data=body, timeout=30)
    
    print(f"Status Code: {response.status_code}")
    print(f"Response Content Length: {len(response.content)} bytes")
    
    # Check if request was successful
    if response.status_code == 200:
        print("\n‚úÖ Request successful!")
        
        # Check for error pages
        if 'Error' in response.text or 'Unhandled' in response.text:
            print("‚ö†Ô∏è  Response contains error page")
        elif 'grdSearchResults' in response.text or 'Search Results' in response.text:
            print("‚úÖ Found search results in response!")
        else:
            print("üìÑ Response received (checking content...)")
        
        # Show first 500 characters to see what we got
        print("\n--- First 500 characters of response ---")
        print(response.text[:500])
        
        # Check if we can find any CEQR numbers or project info
        ceqr_pattern = r'\d{2}[A-Z]{3}\d{3}[A-Z]'
        ceqr_matches = re.findall(ceqr_pattern, response.text)
        if ceqr_matches:
            print(f"\n‚úÖ Found CEQR numbers: {set(ceqr_matches)}")
        
    else:
        print(f"\n‚ùå Request failed with status code: {response.status_code}")
        print(f"Response: {response.text[:500]}")
        
except requests.exceptions.RequestException as e:
    print(f"‚ùå Error making request: {e}")
except Exception as e:
    print(f"‚ùå Unexpected error: {e}")


Status Code: 200
Response Content Length: 32714 bytes

‚úÖ Request successful!
üìÑ Response received (checking content...)

--- First 500 characters of response ---


<!DOCTYPE html>

<html lang="en">
<head><meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta http-equiv="Content-Security-Policy" content="default-src &#39;self&#39;; script-src &#39;self&#39; &#39;unsafe-inline&#39; &#39;unsafe-eval&#39;; style-src &#39;self&#39; &#39;unsafe-inline&#39; &#39;unsafe-eval&#39;;  frame-src &#39;self&#39; https://winauth;" /><title>
	Project Search
</title><script src="/bundles/jQuery?v=5Br_kWrXaG2p_Z5FlR1md


## Parse Response Table

Parse the HTML response to extract the CEQR results table with CEQR Number, Project Name, and Project Description.


In [None]:
# Parse the HTML response to extract the table data
# Use the response from cell 5 (the minimal request that worked!)
try:
    # Check if we have a response from cell 5, otherwise make a new request
    try:
        # Try to use existing response from cell 5
        if response.status_code == 200 and len(response.text) > 1000:
            print("‚úÖ Using response from cell 5")
        else:
            raise AttributeError("Response not valid")
    except (NameError, AttributeError):
        print("Making new request...")
        response = requests.post(url, headers=headers, data=body, timeout=30)
    
    if response.status_code == 200:
        print("‚úÖ Got response, parsing HTML...")
        
        # Parse HTML with BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Find the results table - ASP.NET GridViews can have various structures
        # First, try to find by ID containing "grdSearchResults"
        table = soup.find('table', {'id': lambda x: x and 'grdSearchResults' in x})
        
        if not table:
            # Try finding the GridView div wrapper
            grid_div = soup.find('div', {'id': lambda x: x and 'grdSearchResults' in x})
            if grid_div:
                # GridView might be in a div, look for table inside
                table = grid_div.find('table')
                if table:
                    print("‚úÖ Found table inside GridView div")
        
        if not table:
            # Try finding any table that might contain the results
            tables = soup.find_all('table')
            print(f"Found {len(tables)} tables in the page")
            
            # Look for table with headers containing "CEQR Number" or "Project Name"
            for t in tables:
                headers_text = t.get_text()
                if 'CEQR Number' in headers_text or 'Project Name' in headers_text or 'Project Description' in headers_text:
                    table = t
                    print(f"‚úÖ Found results table by header text")
                    break
        
        if table:
            print("‚úÖ Found results table, extracting data...")
            
            # Extract table rows
            rows = table.find_all('tr')
            print(f"Found {len(rows)} rows in table")
            
            # Extract headers
            header_row = rows[0] if rows else None
            if header_row:
                headers_list = [th.get_text(strip=True) for th in header_row.find_all(['th', 'td'])]
                print(f"Headers: {headers_list}")
            
            # Extract data rows
            data_rows = []
            for row in rows[1:]:  # Skip header row
                cells = row.find_all(['td', 'th'])
                if cells:
                    row_data = [cell.get_text(strip=True) for cell in cells]
                    # Only add rows that have data (not empty)
                    if any(cell.strip() for cell in row_data):
                        data_rows.append(row_data)
            
            if data_rows:
                print(f"\n‚úÖ Extracted {len(data_rows)} data rows")
                
                # Create DataFrame
                if header_row:
                    df = pd.DataFrame(data_rows, columns=headers_list[:len(data_rows[0])] if len(headers_list) >= len(data_rows[0]) else None)
                else:
                    df = pd.DataFrame(data_rows)
                
                # Display the results
                print("\n" + "=" * 80)
                print("CEQR Search Results:")
                print("=" * 80)
                print(df.to_string(index=False))
                
                # Also show as a more readable format
                print("\n" + "=" * 80)
                print("Results Summary:")
                print("=" * 80)
                for idx, row in df.iterrows():
                    print(f"\nRow {idx + 1}:")
                    for col in df.columns:
                        if pd.notna(row[col]) and str(row[col]).strip():
                            print(f"  {col}: {row[col]}")
            else:
                print("‚ö†Ô∏è  No data rows found in table")
                # Show the table HTML structure for debugging
                print("\nTable HTML structure:")
                print(str(table)[:1000])
        else:
            print("‚ö†Ô∏è  Could not find results table")
            print("\nSearching for CEQR-related content...")
            
            # Try to find CEQR numbers in the text
            ceqr_pattern = r'\d{2}[A-Z]{3}\d{3}[A-Z]'
            ceqr_matches = re.findall(ceqr_pattern, response.text)
            if ceqr_matches:
                print(f"Found CEQR numbers in text: {set(ceqr_matches)}")
            
            # Show a sample of the HTML to help debug
            print("\nSample HTML (first 2000 chars):")
            print(response.text)
    else:
        print(f"‚ùå Request failed: {response.status_code}")
        
except Exception as e:
    print(f"‚ùå Error parsing response: {e}")
    import traceback
    traceback.print_exc()


‚úÖ Got response, parsing HTML...
Found 4 tables in the page
‚ö†Ô∏è  Could not find results table

Searching for CEQR-related content...

Sample HTML (first 2000 chars):


<!DOCTYPE html>

<html lang="en">
<head><meta charset="utf-8" /><meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta http-equiv="Content-Security-Policy" content="default-src &#39;self&#39;; script-src &#39;self&#39; &#39;unsafe-inline&#39; &#39;unsafe-eval&#39;; style-src &#39;self&#39; &#39;unsafe-inline&#39; &#39;unsafe-eval&#39;;  frame-src &#39;self&#39; https://winauth;" /><title>
	Project Search
</title><script src="/bundles/jQuery?v=5Br_kWrXaG2p_Z5FlR1md42H9CV7IGPQPayuseC_3dM1"></script>
<script src="/bundles/modernizr?v=inCVuEFe6J4Q07A0AcRsbJic_UE5MwpRMNGcOtk94TE1"></script>
<link href="/Content/css?v=I4t_VAsuxLABXqcDpB75-Z7jI17GOShikmNppYLo5Zw1" rel="stylesheet"/>
<link href="../favicon.ico" rel="shortcut icon" type="image/x-icon" /></head>
<body>
    <form method="post" action="./"

## Parse Form Data (Optional)

If needed, we can parse the form data to see what fields are being sent.


In [5]:
# Show what we're sending
from urllib.parse import parse_qs

parsed_body = parse_qs(body)

print("Form fields being sent:")
print("=" * 60)
for key, value in parsed_body.items():
    print(f"{key}: {value[0] if value else ''}")

print("\n" + "=" * 60)
print("Summary: Only sending borough, block, and lot - no VIEWSTATE or other fields")
print("=" * 60)


Form fields being sent:
ctl00$MainContent$ddlBorough: Brooklyn
ctl00$MainContent$txtBlock: 7061
ctl00$MainContent$txtLot: 27

Summary: Only sending borough, block, and lot - no VIEWSTATE or other fields


## Alternative: Get VIEWSTATE First

If the minimal request doesn't work, we might need to first GET the page to obtain a valid VIEWSTATE, then use it in the POST request.


In [6]:
# Try getting the initial page first to extract VIEWSTATE
try:
    print("Fetching initial page to get VIEWSTATE...")
    initial_response = requests.get(url, timeout=30)
    
    if initial_response.status_code == 200:
        print("‚úÖ Got initial page")
        
        # Extract VIEWSTATE using regex
        import re
        viewstate_match = re.search(r'name="__VIEWSTATE" id="__VIEWSTATE" value="([^"]+)"', initial_response.text)
        viewstate_gen_match = re.search(r'name="__VIEWSTATEGENERATOR" id="__VIEWSTATEGENERATOR" value="([^"]+)"', initial_response.text)
        eventval_match = re.search(r'name="__EVENTVALIDATION" id="__EVENTVALIDATION" value="([^"]+)"', initial_response.text)
        
        if viewstate_match:
            viewstate = viewstate_match.group(1)
            print(f"‚úÖ Found VIEWSTATE ({len(viewstate)} chars)")
        else:
            print("‚ö†Ô∏è  Could not find VIEWSTATE")
            viewstate = ""
            
        if viewstate_gen_match:
            viewstate_gen = viewstate_gen_match.group(1)
            print(f"‚úÖ Found VIEWSTATEGENERATOR")
        else:
            viewstate_gen = ""
            
        if eventval_match:
            eventval = eventval_match.group(1)
            print(f"‚úÖ Found EVENTVALIDATION ({len(eventval)} chars)")
        else:
            eventval = ""
        
        # Now make POST request with VIEWSTATE
        if viewstate:
            print("\nMaking POST request with VIEWSTATE...")
            form_data_with_viewstate = {
                "__VIEWSTATE": viewstate,
                "__VIEWSTATEGENERATOR": viewstate_gen if viewstate_gen else "F2CE38DF",
                "__EVENTVALIDATION": eventval if eventval else "",
                "ctl00$MainContent$ddlBorough": "Brooklyn",
                "ctl00$MainContent$txtBlock": "7061",
                "ctl00$MainContent$txtLot": "27",
                "ctl00$MainContent$btnSearch": "Search"
            }
            
            body_with_viewstate = urlencode(form_data_with_viewstate)
            
            post_response = requests.post(url, headers=headers, data=body_with_viewstate, timeout=30)
            
            print(f"\nStatus Code: {post_response.status_code}")
            print(f"Response Length: {len(post_response.text)} chars")
            
            if 'grdSearchResults' in post_response.text or 'Search Results' in post_response.text:
                print("‚úÖ Found search results!")
                # Extract CEQR numbers
                ceqr_pattern = r'\d{2}[A-Z]{3}\d{3}[A-Z]'
                ceqr_matches = re.findall(ceqr_pattern, post_response.text)
                if ceqr_matches:
                    print(f"‚úÖ Found CEQR numbers: {set(ceqr_matches)}")
            elif 'Error' in post_response.text:
                print("‚ö†Ô∏è  Got error page")
            else:
                print("üìÑ Response received")
                print(f"\nFirst 500 chars:\n{post_response.text[:500]}")
    else:
        print(f"‚ùå Failed to get initial page: {initial_response.status_code}")
        
except Exception as e:
    print(f"‚ùå Error: {e}")


Fetching initial page to get VIEWSTATE...
‚úÖ Got initial page
‚úÖ Found VIEWSTATE (6424 chars)
‚úÖ Found VIEWSTATEGENERATOR
‚úÖ Found EVENTVALIDATION (3136 chars)

Making POST request with VIEWSTATE...

Status Code: 200
Response Length: 8831 chars
‚ö†Ô∏è  Got error page
