In [24]:
# Basics
import os
import zipfile
import time

# Data management
import pandas as pd
import openpyxl

# Visualization
import matplotlib.pyplot as plt

# GIS/Maps
import geopandas as gpd
import folium
import mapclassify
from geopy.geocoders import Nominatim



In [25]:
# Step 1: Define the path to the ZIP file
zip_file_path = './county_boundaries.zip'

# Step 2: Check if the ZIP file exists
if not os.path.exists(zip_file_path):
    print(f"Error: File not found at {zip_file_path}")
else:
    print(f"ZIP file found: {zip_file_path}")

    # Step 3: Extract the ZIP file
    extraction_path = './shapefiles'
    with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
        zip_ref.extractall(extraction_path)

    # Step 4: List the extracted files
    extracted_files = os.listdir(extraction_path)
    print("Extracted files:", extracted_files)

    # Step 5: Find the shapefile (.shp) inside the extracted folder
    shapefile_path = next((os.path.join(extraction_path, f) for f in extracted_files if f.endswith('.shp')), None)

    if shapefile_path is None:
        print("Error: No shapefile (.shp) found in the extracted folder.")
    else:
        print(f"Shapefile found: {shapefile_path}")

        # Step 6: Read the shapefile using GeoPandas
        gdf = gpd.read_file(shapefile_path)

        # Step 7: Check the CRS of the shapefile
        print("Original CRS:", gdf.crs)

        # If the CRS is not EPSG:4326, reproject it
        if gdf.crs != 'EPSG:4326':
            gdf = gdf.to_crs(epsg=4326)  # Reproject to EPSG:4326
            print("Reprojected CRS:", gdf.crs)

        # Step 8: Filter the GeoDataFrame for counties in the Delaware Valley region
        delaware_valley_counties = [
            "Philadelphia", "Bucks", "Chester", "Delaware", "Montgomery",  # Pennsylvania counties
            "Burlington", "Camden", "Gloucester", "Mercer",  # New Jersey counties
            "New Castle",  # Delaware county
            "Cecil"  # Maryland county (optional)
        ]

        # Filter for the counties in the Delaware Valley region based on 'co_name' column
        delaware_valley_gdf = gdf[gdf['co_name'].isin(delaware_valley_counties)]

        # # Display the first few rows of the filtered GeoDataFrame
        # print("Counties in the Delaware Valley Region:")
        # print(delaware_valley_gdf.head())  # Use .head() to display the first few rows of the table


ZIP file found: ./county_boundaries.zip
Extracted files: ['County_Boundaries_(polygon).prj', 'County_Boundaries_(polygon).xml', 'County_Boundaries_(polygon).dbf', 'County_Boundaries_(polygon).shx', 'County_Boundaries_(polygon).shp', 'County_Boundaries_(polygon).cpg']
Shapefile found: ./shapefiles/County_Boundaries_(polygon).shp
Original CRS: EPSG:26918
Reprojected CRS: EPSG:4326


In [26]:
# Set the center of the map
map_center = [39.915, -75.2]  # Example center (near Philadelphia)

# Create a Folium map centered on the Delaware Valley region
m = folium.Map(location=map_center, zoom_start=8)

# Add the Delaware Valley region to the map as GeoJSON
folium.GeoJson(delaware_valley_gdf).add_to(m)

# Display the map
# m

<folium.features.GeoJson at 0x1200c8990>

In [None]:
# Load the schools data
schools = pd.read_excel('/Users/jazminb./Desktop/RA_Fall2024/AtlasData/PhiladelphiaSchools.xlsx')

# Initialize geolocator
geolocator = Nominatim(user_agent="school_locator")

# Function to get latitude and longitude based on school name, full address, and county
def get_coordinates(school_name, full_address, county_name):
    try:
        # Geocode the full address, including the school name, address, and county
        location = geolocator.geocode(full_address)
        
        if location:
            # Return the latitude and longitude if found
            return location.latitude, location.longitude
        else:
            print(f"Could not geocode: {school_name} at {full_address}")
            # If initial geocoding fails, try with the county (fallback to full address + county)
            address_fallback = f"{full_address}, {county_name}"
            location_fallback = geolocator.geocode(address_fallback)
            if location_fallback:
                return location_fallback.latitude, location_fallback.longitude
            else:
                print(f"Could not geocode: {school_name} in {county_name}")
                return None, None
    except Exception as e:
        print(f"Error for {school_name} at {full_address}: {e}")
        return None, None

# Create a map object
m = folium.Map(location=[39.9526, -75.1652], zoom_start=12)  # Center map on Philadelphia

# Loop through schools DataFrame using .iterrows() to get rows
results = []  # Make sure this list is initialized before use

for _, school in schools.iterrows():  # .iterrows() will give us each row
    name = school["name"]
    full_address = school["address"]  # Assuming the full address is stored in the "address" column
    county = school["county"]  # Assuming county is also included in the DataFrame
    lat, lon = get_coordinates(name, full_address, county)  # Get coordinates using full address and county
    results.append({
        "name": name,
        "type": school["type"],
        "address": full_address,
        "county": county,
        "latitude": lat,
        "longitude": lon
    })
    time.sleep(1)  # Pause to respect API usage limits

# Convert results to a DataFrame
school_coords = pd.DataFrame(results)

# Define colors for each school type
type_colors = {
    "Public District": "blue",
    "Charter": "green",
    "Private Secular": "purple",
    "Private Religious": "red"
}

# Plot each school on the map
for _, row in school_coords.iterrows():
    if not pd.isna(row["latitude"]) and not pd.isna(row["longitude"]):
        folium.Marker(
            location=[row["latitude"], row["longitude"]],
            popup=f"{row['name']} ({row['type']})",
            icon=folium.Icon(color=type_colors.get(row["type"], "gray"))
        ).add_to(m)

# Save the map to an HTML file
# m.save("schools_map.html")

m



KeyError: 'address'