# View All Page HTML Content

In [5]:
import requests
from bs4 import BeautifulSoup

url = 'https://gis.summitcountyco.gov/map/DetailData.aspx?Schno=6507888'
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
print(soup)


<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "https://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">

<html xmlns="https://www.w3.org/1999/xhtml">
<head>
<!-- Global Site Tag (gtag.js) Google Analytics -->
<script async="" src="https://www.googletagmanager.com/gtag/js?id=UA-150931768-1"></script>
<script>
        window.dataLayer = window.dataLayer || [];
        function gtag(){dataLayer.push(arguments);}
        gtag('js', new Date());
        
        gtag('config', 'UA-150931768-1')
    </script>
<link href="styles/MainBlue2.css" rel="Stylesheet" type="text/css"/><link href="SCfavicon.ico" rel="SHORTCUT ICON"/><title>
	Detail Data
</title></head>
<body alink="aqua" bgcolor="#f7f7f7" class="table.DetailData" link="blue" vlink="blue"><br/>
</body>
</html>
<table class="DetailData">
<tr>
<td align="center" class="style1aSched" colspan="9">Schedule # 6507888</td>
</tr><tr>
<td class="style1" rowspan="2">Property Desc:</td><td class="style2" rowspan="2">BLDG 51 PR

# Retrieve Data That's Easy to Extract First

In [None]:
import requests
from bs4 import BeautifulSoup

def extract_data(url):
    # Send request and parse HTML
    r = requests.get(url)
    print(f'Status code = {r.status_code}\n')
    soup = BeautifulSoup(r.text, 'html.parser')

    # Find tables
    tables = {
        'DetailData': soup.find('table', class_='DetailData'),
        'ValueData': soup.find('table', class_='ValueData'),
        'ImpData': soup.find('table', class_='ImpData'),
        'LandData': soup.find('table', class_='LandData')
    }

    # Function to extract data from a table cell
    def extract_cell_data(table, label):
        if not table:
            print(f"{label} table not found")
            return None
        cell = table.find('td', string=lambda text: text and label in text.strip())
        if cell:
            next_cell = cell.find_next('td')
            if next_cell:
                return next_cell.text.strip()
            print(f"{label} value cell not found")
        else:
            return None

    # Function to process a table and extract multiple fields
    def process_table(table, table_name, labels):
        if table:
            print(f"Found {table_name}\n")
            results = {}
            for label, display_name in labels:
                value = extract_cell_data(table, label)
                if value is not None:  # Allow empty strings
                    results[display_name] = value
                    print(f"{display_name}: {value}")
            return results
        else:
            print(f"{table_name} not found")
            return {}

    # Define labels to extract from DetailDataTable
    detail_labels = [
        ('Property Desc:', 'Property Description'),
        ('Phys. Address:', 'Physical Address'),
        ('Primary:', 'Primary Ownership'),
        ('Secondary:', 'Secondary Ownership'),
        ('C/O', 'Mailing Address - C/O'),
        ('Addr.', 'Address'),
        ('CSZ', 'Address - CSZ'),
    ]

    # Process DetailDataTable
    process_table(tables['DetailData'], 'DetailDataTable', detail_labels)

extract_data('https://gis.summitcountyco.gov/map/DetailData.aspx?Schno=6507888')

Status code = 200

Found DetailDataTable

Property Description: BLDG 51 PROSPECTOR VILLAGE COMMON AREA & CLUBHOUSE FKA LOT 33 WILDERNEST FIL 2
Physical Address: 0050 WILDERNEST CT (CR 1271) BLDG 51
Primary Ownership: PROSPECTOR VILLAGE HOMEOWNERS ASSOC,
Secondary Ownership: 
Mailing Address - C/O: 
Address - CSZ: FRISCO CO 804435265


# Find Most Recent Sale Date

In [None]:
from bs4 import BeautifulSoup
from datetime import datetime

def extract_sale_dates(html_content):
    """
    Extract sale dates from property detail HTML using BeautifulSoup.
    
    Args:
        html_content (str): HTML content as string
    
    Returns:
        list: List of dictionaries containing sale information
    """
    soup = BeautifulSoup(html_content, 'html.parser')
    
    # Find the DetailData table
    detail_table = soup.find('table', class_='DetailData')
    if not detail_table:
        return []
    
    sale_records = []
    
    # Find all cells with class 'style2b' which contain the sale dates
    date_cells = detail_table.find_all('td', class_='style2b')
    
    for cell in date_cells:
        cell_text = cell.get_text(strip=True)
        # Look for date pattern (M/D/YYYY or MM/DD/YYYY)
        if '/' in cell_text and len(cell_text.split('/')) == 3:
            try:
                # Validate it's actually a date
                date_obj = datetime.strptime(cell_text, '%m/%d/%Y')
                
                # Find the parent row to get related data
                parent_row = cell.find_parent('tr')
                if parent_row:
                    row_cells = parent_row.find_all('td')
                    
                    # Extract reception number, document type, and sale price
                    reception = ''
                    doc_type = ''
                    sale_price = ''
                    
                    for i, row_cell in enumerate(row_cells):
                        if row_cell == cell:  # Found our date cell
                            # Reception is typically before the date
                            if i > 0:
                                reception = row_cells[i-1].get_text(strip=True)
                            # Doc type is typically after the date
                            if i+1 < len(row_cells):
                                doc_type = row_cells[i+1].get_text(strip=True)
                            # Sale price is typically after doc type
                            if i+2 < len(row_cells):
                                sale_price = row_cells[i+2].get_text(strip=True)
                            break
                    
                    sale_records.append({
                        'reception_number': reception,
                        'sale_date': cell_text,
                        'date_object': date_obj,
                        'document_type': doc_type,
                        'sale_price': sale_price
                    })
                    
                    # Return immediately after finding the first date
                    break
                    
            except ValueError:
                continue
    
    return sale_records

def print_sale_dates(sale_records):
    """Print formatted sale date information."""
    if not sale_records:
        print("No sale dates found.")
        return
    
    print(f"Found {len(sale_records)} sale records:")
    print("-" * 60)
    
    for i, record in enumerate(sale_records, 1):
        print(f"{i}. Sale Date: {record['sale_date']}")
        print(f"   Reception: {record['reception_number']}")
        print(f"   Doc Type: {record['document_type']}")
        print(f"   Price: ${record['sale_price']}")
        print()

# Example usage
if __name__ == "__main__":
    # Read HTML content from file or string
    with open('html_output.txt', 'r', encoding='utf-8') as file:
        html_content = file.read()
    
    # Extract sale dates
    sale_dates = extract_sale_dates(html_content)
    
    # Print results
    print_sale_dates(sale_dates)
    
    # Just the dates as a simple list
    dates_only = [record['sale_date'] for record in sale_dates]
    print("Sale dates only:", dates_only)

Found 1 sale records:
------------------------------------------------------------
1. Sale Date: 8/26/2016
   Reception: 1120146
   Doc Type: EAS
   Price: $0

Sale dates only: ['8/26/2016']


# Integrate Sales Date Into `extract_data` Function

In [35]:
import requests
from bs4 import BeautifulSoup
from datetime import datetime

def find_tables(url):
    # Send request and parse HTML
    r = requests.get(url)
    print(f'Status code = {r.status_code}\n')
    soup = BeautifulSoup(r.text, 'html.parser')

    # Find tables
    tables = {
        'DetailData': soup.find('table', class_='DetailData'),
        'ValueData': soup.find('table', class_='ValueData'),
        'ImpData': soup.find('table', class_='ImpData'),
        'LandData': soup.find('table', class_='LandData')
    }

    # Function to extract sale date from DetailData table
    def extract_first_sale_date(table):
        if not table:
            return None
            
        # Find all cells with class 'style2b' which contain the sale dates
        date_cells = table.find_all('td', class_='style2b')
        
        for cell in date_cells:
            cell_text = cell.get_text(strip=True)
            # Look for date pattern (M/D/YYYY or MM/DD/YYYY)
            if '/' in cell_text and len(cell_text.split('/')) == 3:
                try:
                    # Validate it's actually a date
                    date_obj = datetime.strptime(cell_text, '%m/%d/%Y')
                    return cell_text  # Return the first valid date found
                except ValueError:
                    continue
        return None

    # Function to extract data from a table cell
    def extract_cell_data(table, label):
        if not table:
            print(f"{label} table not found")
            return None
        cell = table.find('td', string=lambda text: text and label in text.strip())
        if cell:
            next_cell = cell.find_next('td')
            if next_cell:
                return next_cell.text.strip()
            print(f"{label} value cell not found")
        else:
            print(f"{label} cell not found")
        return None

    # Function to process a table and extract multiple fields
    def process_table(table, table_name, labels):
        if table:
            print(f"Found {table_name}\n")
            print(f"Source URL: {url}")
            results = {}
            for label, display_name in labels:
                if label == 'Sale Date':
                    # Use special function for sale date extraction
                    value = extract_first_sale_date(table)
                else:
                    value = extract_cell_data(table, label)
                
                if value is not None:  # Allow empty strings
                    results[display_name] = value
                    print(f"{display_name}: {value}")
            return results
        else:
            print(f"{table_name} not found")
            return {}

    # Define labels to extract from DetailDataTable
    detail_labels = [
        ('Sale Date', 'Most Recent Sale Date'),
        ('Property Desc:', 'Property Description'),
        ('Phys. Address:', 'Physical Address'),
        ('Primary:', 'Primary Ownership'),
        ('Secondary:', 'Secondary Ownership'),
        ('C/O', 'Mailing Address - C/O'),
        ('Addr:', 'Address'),
        ('CSZ', 'Address - CSZ')
    ]

    # Process DetailDataTable
    return process_table(tables['DetailData'], 'DetailDataTable', detail_labels)

# Example usage
if __name__ == "__main__":
    find_tables('https://gis.summitcountyco.gov/map/DetailData.aspx?Schno=6507888')

Status code = 200

Found DetailDataTable

Source URL: https://gis.summitcountyco.gov/map/DetailData.aspx?Schno=6507888
Most Recent Sale Date: 8/26/2016
Property Description: BLDG 51 PROSPECTOR VILLAGE COMMON AREA & CLUBHOUSE FKA LOT 33 WILDERNEST FIL 2
Physical Address: 0050 WILDERNEST CT (CR 1271) BLDG 51
Primary Ownership: PROSPECTOR VILLAGE HOMEOWNERS ASSOC,
Secondary Ownership: 
Mailing Address - C/O: 
Address: PO BOX 5265
Address - CSZ: FRISCO CO 804435265


# Find the Last Schedule ID

We want to find the last Schedule ID so that we can create a range of numbers that contains all Schedule IDs. Once we have that, we can constantly check the page for new IDs.

In [8]:
import requests
import time
import csv
import os
import pandas as pd
from bs4 import BeautifulSoup

def check_schedule_exists(schedule_id):
   url = f"https://gis.summitcountyco.gov/map/DetailData.aspx?Schno={schedule_id}"
   try:
       response = requests.get(url, timeout=10)
       if response.status_code != 200:
           return False
           
       soup = BeautifulSoup(response.text, 'html.parser')
       table = soup.find('table', class_='DetailData')
       
       if table:
           # Look for Property Desc cell
           cell = table.find('td', string=lambda text: text and 'Property Desc:' in text.strip())
           if cell:
               next_cell = cell.find_next('td')
               if next_cell and next_cell.text.strip():
                   return True
       return False
   except:
       return False

def save_to_csv(schedule_id, filename='schedule_ids.csv'):
   # Check if file exists to determine if we need headers
   file_exists = os.path.isfile(filename)
   
   with open(filename, 'a', newline='') as file:
       writer = csv.writer(file)
       if not file_exists:
           writer.writerow(['schedule_id'])  # Header
       writer.writerow([schedule_id])

def remove_duplicates_from_csv(filename='schedule_ids.csv'):
   # Read the CSV
   df = pd.read_csv(filename)
   
   # Remove duplicates and sort
   df_clean = df.drop_duplicates().sort_values('schedule_id')
   
   # Save back to the same file
   df_clean.to_csv(filename, index=False)
   
   print(f"Removed duplicates. Final count: {len(df_clean)} unique schedule IDs")

def find_all_schedule_ids(start_range=6508600, end_range=10000000):
   for schedule_id in range(start_range, end_range + 1):
       if check_schedule_exists(schedule_id):
           save_to_csv(schedule_id)  # Save immediately
           print(f"Found and saved: {schedule_id}")
       else:
           print(f"Schedule ID {schedule_id} doesn't exist. Stopping.")
           break
       
       time.sleep(0.5)  # Be nice to their server
       
       if schedule_id % 1000 == 0:  # Progress indicator
           print(f"Checked up to: {schedule_id}")
   
   print("Discovery complete! Cleaning up duplicates...")
   remove_duplicates_from_csv()

# Usage
find_all_schedule_ids()

Found and saved: 6508600
Found and saved: 6508601
Found and saved: 6508602
Found and saved: 6508603
Found and saved: 6508604
Found and saved: 6508605
Found and saved: 6508606
Found and saved: 6508607
Found and saved: 6508608
Found and saved: 6508609
Found and saved: 6508610
Found and saved: 6508611
Found and saved: 6508612
Found and saved: 6508613
Found and saved: 6508614
Schedule ID 6508615 doesn't exist. Stopping.
Discovery complete! Cleaning up duplicates...
Removed duplicates. Final count: 15 unique schedule IDs


# Confirm that 6508614 Is Truly the Last ID

In [9]:
import requests
import time
from bs4 import BeautifulSoup

def check_and_report_schedule(schedule_id):
   url = f"https://gis.summitcountyco.gov/map/DetailData.aspx?Schno={schedule_id}"
   try:
       response = requests.get(url, timeout=10)
       if response.status_code != 200:
           print(f"Schedule ID {schedule_id}: HTTP error {response.status_code}")
           return False
           
       soup = BeautifulSoup(response.text, 'html.parser')
       table = soup.find('table', class_='DetailData')
       
       if table:
           # Look for Property Desc cell
           cell = table.find('td', string=lambda text: text and 'Property Desc:' in text.strip())
           if cell:
               next_cell = cell.find_next('td')
               if next_cell and next_cell.text.strip():
                   print(f"Schedule ID {schedule_id}: VALID (has Property Desc)")
                   return True
           
           print(f"Schedule ID {schedule_id}: Table found but no Property Desc")
           print(f"Table content: {table.get_text()[:200]}...")  # First 200 chars
           return False
       else:
           print(f"Schedule ID {schedule_id}: No DetailData table found")
           print(f"Page content preview: {soup.get_text()[:200]}...")  # First 200 chars
           return False
   except Exception as e:
       print(f"Schedule ID {schedule_id}: Exception - {str(e)}")
       return False

def verify_range(start_range=6508610, end_range=6508620):
   for schedule_id in range(start_range, end_range + 1):
       check_and_report_schedule(schedule_id)
       time.sleep(0.5)  # Be nice to their server

# Usage
verify_range()

Schedule ID 6508610: VALID (has Property Desc)
Schedule ID 6508611: VALID (has Property Desc)
Schedule ID 6508612: VALID (has Property Desc)
Schedule ID 6508613: VALID (has Property Desc)
Schedule ID 6508614: VALID (has Property Desc)
Schedule ID 6508615: Table found but no Property Desc
Table content: 

Schedule # 6508615

Property Desc:  SubdivFilingPhaseBlockLot

 SubCode

Phys. Address: PPI:

 Ownership Econ:

 Nhood:

Primary:,  TaxArea: 2025Tship:

Secondary: AssdVal:0Est. Tax/Tax Rate:0Range:...
Schedule ID 6508616: Table found but no Property Desc
Table content: 

Schedule # 6508616

Property Desc:  SubdivFilingPhaseBlockLot

 SubCode

Phys. Address: PPI:

 Ownership Econ:

 Nhood:

Primary:,  TaxArea: 2025Tship:

Secondary: AssdVal:0Est. Tax/Tax Rate:0Range:...
Schedule ID 6508617: Table found but no Property Desc
Table content: 

Schedule # 6508617

Property Desc:  SubdivFilingPhaseBlockLot

 SubCode

Phys. Address: PPI:

 Ownership Econ:

 Nhood:

Primary:,  TaxArea: 2025Tsh

# Check All Schedule IDs

Unfortunately, there are breaks in the Schedule ID sequence. So... we're gonna have to check all of them

In [2]:
import requests
import time
import csv
import os
import pandas as pd
from bs4 import BeautifulSoup

def check_and_report_schedule(schedule_id):
  url = f"https://gis.summitcountyco.gov/map/DetailData.aspx?Schno={schedule_id}"
  try:
      response = requests.get(url, timeout=10)
      if response.status_code != 200:
          message = f"Schedule ID {schedule_id}: HTTP error {response.status_code}"
          return False, message
          
      soup = BeautifulSoup(response.text, 'html.parser')
      table = soup.find('table', class_='DetailData')
      
      if table:
          # Look for Property Desc cell
          cell = table.find('td', string=lambda text: text and 'Property Desc:' in text.strip())
          if cell:
              next_cell = cell.find_next('td')
              if next_cell and next_cell.text.strip():
                  message = f"Schedule ID {schedule_id}: VALID (has Property Desc)"
                  return True, message
          
          message = f"Schedule ID {schedule_id}: Table found but no Property Desc"
          return False, message
      else:
          message = f"Schedule ID {schedule_id}: No DetailData table found"
          return False, message
  except Exception as e:
      message = f"Schedule ID {schedule_id}: Exception - {str(e)}"
      return False, message

def save_to_csv(message, filename='schedule_results.csv'):
  # Check if file exists to determine if we need headers
  file_exists = os.path.isfile(filename)
  
  with open(filename, 'a', newline='') as file:
      writer = csv.writer(file)
      if not file_exists:
          writer.writerow(['result'])  # Header
      writer.writerow([message])

def verify_range(start_range=1000001, end_range=100000000):
  consecutive_false = 0
  iteration_count = 0
  
  for schedule_id in range(start_range, end_range + 1):
      result, message = check_and_report_schedule(schedule_id)
      save_to_csv(message)  # Save the message instead of just the ID
      
      iteration_count += 1
      
      if result:
          consecutive_false = 0  # Reset counter on valid find
      else:
          consecutive_false += 1
          
      if consecutive_false >= 100:
          final_message = f"Stopping after 100 consecutive invalid IDs. Last checked: {schedule_id}"
          save_to_csv(final_message)
          break
          
      time.sleep(0.5)  # Be nice to their server
      
      # Wait 20 seconds every 100 iterations
      if iteration_count % 500 == 0:
          time.sleep(20)

# Usage
verify_range()

KeyboardInterrupt: 

# Fresh Start: Getting Most Recent Data Directly From the Server

In [29]:
import requests
from datetime import datetime

num_results = 100

# Query URL with adjustable result count
query_url = f"https://gis.summitcountyco.gov/arcgis/rest/services/ParcelQueryTool/SummitMap1_Pro321/MapServer/19/query?where=SOURCE=1&orderByFields=MODDATE%20DESC&resultRecordCount={num_results}&outFields=*&returnGeometry=false&f=json"

# Send the request
response = requests.get(query_url)
if response.status_code == 200:
    data = response.json()
    features = data.get("features", [])
    print(f"{num_results} Most Recently Modified Parcels:\n")
    for feature in features:
        attributes = feature.get("attributes", {})
        object_id = attributes.get("OBJECTID")
        ppi = attributes.get("PPI")
        mod_date = attributes.get("MODDATE")
        mod_type = attributes.get("MODTYPE")
        # Convert MODDATE (Unix timestamp in milliseconds) to readable date
        if mod_date:
            mod_date = datetime.fromtimestamp(mod_date / 1000).strftime("%Y-%m-%d %H:%M:%S")
        #print(f"Parcel ID: {object_id}, PPI: {ppi}, Last Modified: {mod_date}")
        print(f"Object ID: {object_id}")
        print(f"PPI: {ppi}")
        print(f"MOD Date: {mod_date}")
        print(f"MOD Type: {mod_type}\n")
else:
    print(f"Error: Unable to fetch data. Status code: {response.status_code}")

100 Most Recently Modified Parcels:

Object ID: 367
PPI: 1697-3440-00-014
MOD Date: 2025-05-26 18:00:00
MOD Type: 4

Object ID: 714
PPI: 1951-0610-00-016
MOD Date: 2025-05-26 18:00:00
MOD Type: 4

Object ID: 728
PPI: 1951-0610-00-011
MOD Date: 2025-05-26 18:00:00
MOD Type: 0

Object ID: 731
PPI: 1951-0630-00-008
MOD Date: 2025-05-26 18:00:00
MOD Type: 4

Object ID: 11034
PPI: 2097-3540-00-078
MOD Date: 2025-05-26 18:00:00
MOD Type: 4

Object ID: 11129
PPI: 2093-3540-00-037
MOD Date: 2025-05-26 18:00:00
MOD Type: 4

Object ID: 11212
PPI: 2097-3540-00-074
MOD Date: 2025-05-26 18:00:00
MOD Type: 4

Object ID: 11242
PPI: 2097-3540-00-074
MOD Date: 2025-05-26 18:00:00
MOD Type: 4

Object ID: 16102
PPI: 2211-3143-10-009
MOD Date: 2025-05-26 18:00:00
MOD Type: 4

Object ID: 17902
PPI: 9999-9999-00-049
MOD Date: 2025-05-26 18:00:00
MOD Type: 4

Object ID: 19860
PPI: 2375-1310-00-001
MOD Date: 2025-05-26 18:00:00
MOD Type: 4

Object ID: 19921
PPI: 2375-1310-00-001
MOD Date: 2025-05-26 18:00:00
