In [None]:
# Parse the HTML response to extract the table data
# Use the response from cell 5 (the minimal request that worked!)
from bs4 import BeautifulSoup
import pandas as pd
import re
import requests  # Make sure requests is imported

try:
    # Check if we have a response from cell 5, otherwise make a new request
    response_available = False
    
    # Check if response exists in the namespace
    if 'response' in globals():
        try:
            if hasattr(response, 'status_code') and response.status_code == 200 and len(response.text) > 1000:
                print("‚úÖ Using response from cell 5")
                response_available = True
        except:
            pass
    
    if not response_available:
        # Response doesn't exist or isn't valid, make a new request
        print("Making new request (response from cell 5 not found)...")
        print("‚ö†Ô∏è  Note: Make sure you've run cells 1, 3, and 5 first!")
        
        # Check if required variables exist
        missing_vars = []
        if 'url' not in globals():
            missing_vars.append('url')
        if 'headers' not in globals():
            missing_vars.append('headers')
        if 'body' not in globals():
            missing_vars.append('body')
        
        if missing_vars:
            print(f"‚ùå Error: Missing required variables: {', '.join(missing_vars)}")
            print("   Please run cell 3 first to set up url, headers, and body.")
            raise NameError(f"Missing variables: {', '.join(missing_vars)}")
        
        # Make the request
        response = requests.post(url, headers=headers, data=body, timeout=30)
    
    if response.status_code == 200:
        print("‚úÖ Got response, parsing HTML...")
        
        # Parse HTML with BeautifulSoup
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Find the results table - look for table with id containing "grdSearchResults" or similar
        # The table might be in a grid or have a specific structure
        table = soup.find('table', {'id': lambda x: x and 'grdSearchResults' in x})
        
        if not table:
            # Try finding any table that might contain the results
            tables = soup.find_all('table')
            print(f"Found {len(tables)} tables in the page")
            
            # Look for table with headers containing "CEQR Number", "Project Name", or "Project Description"
            for i, t in enumerate(tables):
                headers_text = t.get_text()
                # Check for key column names
                has_ceqr = 'CEQR Number' in headers_text or 'CEQR' in headers_text
                has_project_name = 'Project Name' in headers_text
                has_project_desc = 'Project Description' in headers_text
                
                if has_ceqr or (has_project_name and has_project_desc):
                    table = t
                    print(f"‚úÖ Found results table (table #{i+1})")
                    print(f"   Contains: CEQR={has_ceqr}, Project Name={has_project_name}, Project Description={has_project_desc}")
                    break
            
            # If still not found, print info about all tables for debugging
            if not table:
                print("\nüìã Analyzing all tables found:")
                for i, t in enumerate(tables):
                    rows = t.find_all('tr')
                    first_row_text = rows[0].get_text() if rows else ""
                    print(f"  Table {i+1}: {len(rows)} rows, first row: {first_row_text[:100]}")
        
        if table:
            print("‚úÖ Found results table, extracting data...")
            
            # Extract table rows
            rows = table.find_all('tr')
            print(f"Found {len(rows)} rows in table")
            
            # Extract headers
            header_row = rows[0] if rows else None
            if header_row:
                headers_list = [th.get_text(strip=True) for th in header_row.find_all(['th', 'td'])]
                print(f"Headers: {headers_list}")
            
            # Extract data rows
            data_rows = []
            for row in rows[1:]:  # Skip header row
                cells = row.find_all(['td', 'th'])
                if cells:
                    row_data = [cell.get_text(strip=True) for cell in cells]
                    # Only add rows that have data (not empty)
                    if any(cell.strip() for cell in row_data):
                        data_rows.append(row_data)
            
            if data_rows:
                print(f"\n‚úÖ Extracted {len(data_rows)} data rows")
                
                # Create DataFrame
                if header_row:
                    df = pd.DataFrame(data_rows, columns=headers_list[:len(data_rows[0])] if len(headers_list) >= len(data_rows[0]) else None)
                else:
                    df = pd.DataFrame(data_rows)
                
                # Display the results
                print("\n" + "=" * 80)
                print("CEQR Search Results:")
                print("=" * 80)
                print(df.to_string(index=False))
                
                # Also show as a more readable format
                print("\n" + "=" * 80)
                print("Results Summary:")
                print("=" * 80)
                for idx, row in df.iterrows():
                    print(f"\nRow {idx + 1}:")
                    for col in df.columns:
                        if pd.notna(row[col]) and str(row[col]).strip():
                            print(f"  {col}: {row[col]}")
            else:
                print("‚ö†Ô∏è  No data rows found in table")
                # Show the table HTML structure for debugging
                print("\nTable HTML structure:")
                print(str(table)[:1000])
        else:
            print("‚ö†Ô∏è  Could not find results table")
            print("\nSearching for CEQR-related content...")
            
            # Try to find CEQR numbers in the text
            ceqr_pattern = r'\d{2}[A-Z]{3}\d{3}[A-Z]'
            ceqr_matches = re.findall(ceqr_pattern, response.text)
            if ceqr_matches:
                print(f"Found CEQR numbers in text: {set(ceqr_matches)}")
            
            # Show a sample of the HTML to help debug
            print("\nSample HTML (first 2000 chars):")
            print(response.text)
    else:
        print(f"‚ùå Request failed: {response.status_code}")
        
except Exception as e:
    print(f"‚ùå Error parsing response: {e}")
    import traceback
    traceback.print_exc()


Making new request (response from cell 5 not found)...
‚ö†Ô∏è  Note: Make sure you've run cells 1, 3, and 5 first!
‚ùå Error: Missing required variables. Please run cells 1 and 3 first.
   Missing: name 'url' is not defined
‚ùå Error parsing response: name 'url' is not defined


Traceback (most recent call last):
  File "/var/folders/cy/lk1n_dqn4gj0n_tsqkvpx_cw0000gn/T/ipykernel_34136/813567678.py", line 12, in <module>
    test_response = response
                    ^^^^^^^^
NameError: name 'response' is not defined

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/var/folders/cy/lk1n_dqn4gj0n_tsqkvpx_cw0000gn/T/ipykernel_34136/813567678.py", line 24, in <module>
    response = requests.post(url, headers=headers, data=body, timeout=30)
                             ^^^
NameError: name 'url' is not defined


## ‚ö†Ô∏è Note: VIEWSTATE Approach (Does Not Work)

The VIEWSTATE approach below returns an error page. **Use the minimal request approach (cell 5) instead**, which successfully returns the search results without needing VIEWSTATE.
