<p style="color:blue;font-weight:700;font-size:25px; text-align: center;"> Data Scrapping [1]</p>

<p style="color:green;font-weight:600;font-size:16px; text-align: center;"> Developed by: Abu Kibria,PhD </p>
    <p style="color:green;font-weight:600;font-size:16px; text-align: center;"> Midwest CASC, MI, USA </p>
<p style="color:green;font-weight:600;font-size:14px; text-align: center;"> (March 2025) </p>



In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

### Set up an agent

In [None]:
# Define URL and headers to mimic a browser
url = "https://www.michigan.gov/egle/about/organization/Water-Resources/Wetlands/consultants-list"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}

### Fetching

In [None]:
# Fetch the webpage
response = requests.get(url, headers=headers)

# Ensure request was successful
if response.status_code != 200:
    print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
    exit()

### Table detection from the HTML codes

In [None]:
# Parse HTML
soup = BeautifulSoup(response.content, 'html.parser')

# Identify the table
table = soup.find('table')  # Try finding any table if specific class is unknown

# Check if table is found
if not table:
    print("No table found on the page. Please check the webpage structure.")
    exit()

### Extracting the data

In [10]:
# Extract rows
rows = table.find_all('tr')

# Extract data
data = []
for row in rows[1:]:  # Skip header
    cols = row.find_all('td')
    
    if len(cols) < 4:  # Ensure there are enough columns
        continue
    
    consultant = cols[0].text.strip()
    contact = cols[1].text.strip()
    email = cols[2].find('a').text.strip() if cols[2].find('a') else "N/A"
    city = cols[3].text.strip()
    
    data.append([consultant, contact, email, city])


### Double checking extraction

In [None]:
# Check if data was extracted
if not data:
    print("No data extracted. The webpage structure may have changed.")
    exit()

### Convert the data into dataframe & save

In [None]:
# Create DataFrame and save to Excel
df = pd.DataFrame(data, columns=["Consultant", "Contact", "Email", "City"])
df.to_excel("wetlands_consultants.xlsx", index=False)

print("Data saved to wetlands_consultants.xlsx!")