In [None]:
import pandas as pd
import time
from geopy.geocoders import Nominatim
import geopandas as gpd

In [None]:
# CONFIGURATION:
INPUT_CSV = "/Users/louis/Downloads/raw data/Employee Survey Response.csv"
GEOCODED_CSV = "/Users/louis/Downloads/raw data/geocoded_results.csv"
BLOCK_GROUP_SHP = "/Users/louis/Downloads/tl_2024_30_bg/tl_2024_30_bg.shp"
CITY_CONTEXT = ", Missoula, MT, USA"
HOME_COL = "What is your home address or nearest intersection to your home?"
WORK_COL = "What is the address of your workplace or nearest intersection?"

In [None]:
# GEOCODING FUNCTION:
def geocode_addresses(df, home_col, work_col, context, output_path):
    df[home_col] = df[home_col].astype(str).str.strip()
    df[work_col] = df[work_col].astype(str).str.strip()

    df = df[df[home_col].notnull()]
    df["Full Home Address"] = df[home_col] + context
    df["Full Work Address"] = df[work_col] + context

    geolocator = Nominatim(user_agent="missoula_commute_mapper")

    def geocode_simple(address):
        try:
            time.sleep(1)
            location = geolocator.geocode(address)
            if location:
                return pd.Series([location.latitude, location.longitude])
        except Exception as e:
            print(f"Failed for: {address} → {e}")
        return pd.Series([None, None])

    print("Geocoding home addresses...")
    df[["Home_Lat", "Home_Lon"]] = df["Full Home Address"].apply(geocode_simple)

    print("Geocoding workplace addresses...")
    df[["Work_Lat", "Work_Lon"]] = df["Full Work Address"].apply(geocode_simple)

    df.to_csv(output_path, index=False)
    print(f"Saved geocoded results to: {output_path}")
    return df

In [None]:
# INSPECT SHAPEFILE METADATA:
def inspect_shapefile_columns(shp_path):
    gdf = gpd.read_file(shp_path)
    print("\n📦 Shapefile columns:")
    print(gdf.columns)
    print("\n🗺️ Example rows:")
    print(gdf.head(3))

In [None]:
# RUN:

if __name__ == "__main__":
    df = pd.read_csv(INPUT_CSV)
    geocoded_df = geocode_addresses(df, HOME_COL, WORK_COL, CITY_CONTEXT, GEOCODED_CSV)
    inspect_shapefile_columns(BLOCK_GROUP_SHP)

Geocoding home addresses...
Failed for: Sunflower Dr. and Lincoln Hills Dr., Missoula, MT, USA → HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Max retries exceeded with url: /search?q=Sunflower+Dr.+and+Lincoln+Hills+Dr.%2C+Missoula%2C+MT%2C+USA&format=json&limit=1 (Caused by ReadTimeoutError("HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Read timed out. (read timeout=1)"))
Failed for: Expressway and S Reserve St, Missoula, MT, USA → HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Max retries exceeded with url: /search?q=Expressway+and+S+Reserve+St%2C+Missoula%2C+MT%2C+USA&format=json&limit=1 (Caused by ReadTimeoutError("HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Read timed out. (read timeout=1)"))
Failed for: Orange street and S 2nd street W, Missoula, MT, USA → HTTPSConnectionPool(host='nominatim.openstreetmap.org', port=443): Max retries exceeded with url: /search?q=Orange+street+and+S+2nd+street+W%2