In [1]:
import pandas as pd
from dotenv import load_dotenv
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
from tqdm import tqdm

import requests
import os
import time

pd.set_option('display.max_columns', None)

pd.set_option('display.max_rows', 100)

df = pd.read_csv('./raw_data/national-register-listed-20240710.csv')

df

Unnamed: 0,Ref#,Prefix,Property Name,State,County,City,Street & Number,Status,Request Type,Last Action Date,Restricted Address,Acreage of Property,Area of Significance,Category of Property,External Link,Level of Significance - International,Level of Significance - Local,Level of Significance - National,Level of Significance - Not Indicated,Level of Significance - State,Listed Date,Name of Multiple Property Listing,NHL Designated Date,Other Names,Park Name,Property ID
0,99000150,,Bell House,ALABAMA,Autauga,Prattville,550 Upper Kingston Rd.,Listed,Single,2/12/1999,False,1.0,ARCHITECTURE,BUILDING,https://catalog.archives.gov/id/77835700,False,True,False,False,False,2/12/1999,,,Biggs House,,99000150.0
1,84000596,,Daniel Pratt Historic District,ALABAMA,Autauga,Prattville,"Roughly bounded by Northington Rd., 1st, 6th, ...",Listed,Single,8/30/1984,False,140.0,INDUSTRY; ARCHITECTURE; SOCIAL HISTORY,DISTRICT,https://catalog.archives.gov/id/77835702,False,False,False,False,True,8/30/1984,,,,,84000596.0
2,97000651,,Lassiter House,ALABAMA,Autauga,Autaugaville,Antauga County 15. 0.5 mi. N of jct. of AL 14 ...,Listed,Single,7/17/1997,False,1.0,ARCHITECTURE,BUILDING,https://catalog.archives.gov/id/77835698,False,True,False,False,False,7/17/1997,,,Treadwell House,,97000651.0
3,74000396,,Montgomery-Janes-Whittaker House,ALABAMA,Autauga,Prattville,S of Prattville off AL 14,Listed,Single,10/25/1974,False,4.0,ARCHITECTURE,BUILDING,https://catalog.archives.gov/id/77835696,False,True,False,False,False,10/25/1974,,,Buena Vista,,74000396.0
4,1001296,,Mount Sinai School,ALABAMA,Autauga,Prattville,1820 Cty. Rd. 57,Listed,Multiple,11/29/2001,False,5.0,EDUCATION; BLACK; ARCHITECTURE,BUILDING,https://catalog.archives.gov/id/77834765,False,True,False,False,False,11/29/2001,The Rosenwald School Building Fund and Associ...,,,,1001296.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99194,69000198,,Jenney Stockade Site,WYOMING,Weston,Newcastle,Off US 16,Listed,Single,9/30/1969,False,0.0,LANDSCAPE ARCHITECTURE; MILITARY; TRANSPORTATION,SITE,https://catalog.archives.gov/id/73730806,False,False,False,False,True,9/30/1969,,,Jenney Stockade,,69000198.0
99195,8001061,,Newcastle Commercial District,WYOMING,Weston,Newcastle,Bounded by Burlington Northern Santa-Fe Railro...,Listed,Single,4/21/2009,False,8.5,ARCHITECTURE; COMMUNITY PLANNING AND DEVELOPM...,DISTRICT,https://catalog.archives.gov/id/73730800,False,True,False,False,False,4/21/2009,,,48WE1320,,8001061.0
99196,87000791,,US Post Office-Newcastle Main,WYOMING,Weston,Newcastle,W. Main St. and Sumner Ave.,Listed,Multiple,5/19/1987,False,0.3,POLITICS/GOVERNMENT; ARCHITECTURE,BUILDING,https://catalog.archives.gov/id/73730060,False,False,False,False,True,5/19/1987,"Historic US Post Offices in Wyoming, 1900-194...",,,,87000791.0
99197,1000930,,Weston County Courthouse,WYOMING,Weston,Newcastle,1 West Main,Listed,Single,9/1/2001,False,0.9,POLITICS/GOVERNMENT,BUILDING,https://catalog.archives.gov/id/73730808,False,True,False,False,False,9/1/2001,,,48WE1160,,1000930.0


#### Remove closed hospitals

In [2]:
states_abbrev = {
    "ALABAMA": "AL", "ALASKA": "AK", "ARIZONA": "AZ", "ARKANSAS": "AR", "CALIFORNIA": "CA",
    "COLORADO": "CO", "CONNECTICUT": "CT", "DELAWARE": "DE", "FLORIDA": "FL", "GEORGIA": "GA",
    "HAWAII": "HI", "IDAHO": "ID", "ILLINOIS": "IL", "INDIANA": "IN", "IOWA": "IA",
    "KANSAS": "KS", "KENTUCKY": "KY", "LOUISIANA": "LA", "MAINE": "ME", "MARYLAND": "MD",
    "MASSACHUSETTS": "MA", "MICHIGAN": "MI", "MINNESOTA": "MN", "MISSISSIPPI": "MS", "MISSOURI": "MO",
    "MONTANA": "MT", "NEBRASKA": "NE", "NEVADA": "NV", "NEW HAMPSHIRE": "NH", "NEW JERSEY": "NJ",
    "NEW MEXICO": "NM", "NEW YORK": "NY", "NORTH CAROLINA": "NC", "NORTH DAKOTA": "ND", "OHIO": "OH",
    "OKLAHOMA": "OK", "OREGON": "OR", "PENNSYLVANIA": "PA", "RHODE ISLAND": "RI", "SOUTH CAROLINA": "SC",
    "SOUTH DAKOTA": "SD", "TENNESSEE": "TN", "TEXAS": "TX", "UTAH": "UT", "VERMONT": "VT",
    "VIRGINIA": "VA", "WASHINGTON": "WA", "WEST VIRGINIA": "WV", "WISCONSIN": "WI", "WYOMING": "WY"
}

df['State'] = df['State'].map(states_abbrev)

states_to_keep = [ 'AL','AZ','AR','CA','CO','CT','DE','FL','GA','ID','IL','IN','IA','KS','KY','LA','ME','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH',
                   'NJ','NM','NY','NC','ND','OH','OK','OR','PA','RI','SC','SD','TN','TX','UT','VT','VA','WA','WV','WI','WY' ]

df = df[df['State'].isin(states_to_keep)]

df = df[df['Category of Property'] == 'BUILDING']

df = df[df['Restricted Address'] == False]

df = df[df['Level of Significance - Local'] == False]

df = df.dropna(subset=['Street & Number'])

df = df[df['Street & Number'].str.match(r'^\d')]

df = df.rename(columns={
    'Property Name': 'Name',
})

df = df.drop_duplicates(subset=['Name', 'City', 'State'])

df

Unnamed: 0,Ref#,Prefix,Name,State,County,City,Street & Number,Status,Request Type,Last Action Date,Restricted Address,Acreage of Property,Area of Significance,Category of Property,External Link,Level of Significance - International,Level of Significance - Local,Level of Significance - National,Level of Significance - Not Indicated,Level of Significance - State,Listed Date,Name of Multiple Property Listing,NHL Designated Date,Other Names,Park Name,Property ID
41,80000679,,"Methodist Episcopal Church, South",AL,Baldwin,Daphne,1608 Old County Rd.,Listed,Single,9/22/1980,False,1.2,ARCHITECTURE,BUILDING,https://catalog.archives.gov/id/77835723,False,False,False,False,True,9/22/1980,,,Old Daphne Methodist Church,,80000679.0
75,76002259,,"Clayton, Henry D., House",AL,Barbour,Clayton,1 mi. S of Clayton off AL 30,Listed,Single,12/8/1976,False,51.0,COMMERCE; POLITICS/GOVERNMENT,BUILDING,https://catalog.archives.gov/id/77835631,False,False,True,False,False,12/8/1976,,12/8/1976,Clayton Plantation,,76002259.0
76,72000154,,Drewry-Mitchell-Moorer House,AL,Barbour,Eufaula,640 N. Eufaula Ave.,Listed,Single,4/13/1972,False,1.5,ARCHITECTURE,BUILDING,https://catalog.archives.gov/id/77835769,False,False,False,False,True,4/13/1972,,,,,72000154.0
79,72000155,,Kendall Manor,AL,Barbour,Eufaula,534 W. Broad St.,Listed,Single,1/14/1972,False,0.9,ARCHITECTURE,BUILDING,https://catalog.archives.gov/id/77835771,False,False,False,False,True,1/14/1972,,,,,72000155.0
85,74000400,,Petty-Roberts-Beatty House,AL,Barbour,Clayton,103 N. Midway,Listed,Single,1/21/1974,False,0.9,ARCHITECTURE,BUILDING,https://catalog.archives.gov/id/77835773,False,False,False,False,True,1/21/1974,,,Octagon House,,74000400.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99105,82001838,,Sweetwater Brewery,WY,Sweetwater,Green River,48 W. Railroad Ave.,Listed,Single,11/1/1982,False,0.9,INDUSTRY; ARCHITECTURE,BUILDING,https://catalog.archives.gov/id/73730705,False,False,False,False,True,11/1/1982,,,Green River Brewery,,82001838.0
99147,69000195,,Miller Cabin,WY,Teton,Jackson,1 mi. NE of Jackson,Listed,Single,4/16/1969,False,0.0,CONSERVATION; SCIENCE,BUILDING,https://catalog.archives.gov/id/73730755,False,False,True,False,False,4/16/1969,,,,,69000195.0
99148,1001454,,"Miller, Grace and Robert, Ranch (Boundary Incr...",WY,Teton,Jackson,1 mi. NE of Jackson,Listed,Single,1/11/2002,False,5.0,EXPLORATION/SETTLEMENT; CONSERVATION,BUILDING,https://catalog.archives.gov/id/73730765,False,False,True,False,False,1/11/2002,,,Miller Cabin; Miller Barn (48TE903),,1001454.0
99177,84003712,,"Quinn, A. V., House",WY,Uinta,Evanston,1049 Center St.,Listed,Single,9/13/1984,False,0.9,COMMERCE; EXPLORATION/SETTLEMENT,BUILDING,https://catalog.archives.gov/id/73730781,False,False,False,False,True,9/13/1984,,,Pine Gables Inn,,84003712.0


In [3]:
# For testing
# df = df.head(10)

# Initialize the geocoder
geolocator = Nominatim(user_agent="my_geocoder")

def geocode_address(address):
    try:
        location = geolocator.geocode(address, timeout=10)
        time.sleep(1)  # Add delay to avoid rate limits
        return (location.latitude, location.longitude) if location else (None, None)
    except GeocoderTimedOut:
        return (None, None)

def full_address(row):
    return f"{row['Street & Number']}, {row['City']}, {row['State']}"

# Prevents warnings
df = df.copy()

df.loc[:, "Full Address"] = df.apply(full_address, axis=1)

tqdm.pandas(desc="Fetching Coordinates")

df[["Latitude", "Longitude"]] = df["Full Address"].progress_apply(geocode_address).apply(pd.Series)

df

Fetching Coordinates: 100%|██████████| 8466/8466 [3:55:04<00:00,  1.67s/it]  


Unnamed: 0,Ref#,Prefix,Name,State,County,City,Street & Number,Status,Request Type,Last Action Date,Restricted Address,Acreage of Property,Area of Significance,Category of Property,External Link,Level of Significance - International,Level of Significance - Local,Level of Significance - National,Level of Significance - Not Indicated,Level of Significance - State,Listed Date,Name of Multiple Property Listing,NHL Designated Date,Other Names,Park Name,Property ID,Full Address,Latitude,Longitude
41,80000679,,"Methodist Episcopal Church, South",AL,Baldwin,Daphne,1608 Old County Rd.,Listed,Single,9/22/1980,False,1.2,ARCHITECTURE,BUILDING,https://catalog.archives.gov/id/77835723,False,False,False,False,True,9/22/1980,,,Old Daphne Methodist Church,,80000679.0,"1608 Old County Rd., Daphne, AL",,
75,76002259,,"Clayton, Henry D., House",AL,Barbour,Clayton,1 mi. S of Clayton off AL 30,Listed,Single,12/8/1976,False,51.0,COMMERCE; POLITICS/GOVERNMENT,BUILDING,https://catalog.archives.gov/id/77835631,False,False,True,False,False,12/8/1976,,12/8/1976,Clayton Plantation,,76002259.0,"1 mi. S of Clayton off AL 30, Clayton, AL",,
76,72000154,,Drewry-Mitchell-Moorer House,AL,Barbour,Eufaula,640 N. Eufaula Ave.,Listed,Single,4/13/1972,False,1.5,ARCHITECTURE,BUILDING,https://catalog.archives.gov/id/77835769,False,False,False,False,True,4/13/1972,,,,,72000154.0,"640 N. Eufaula Ave., Eufaula, AL",31.900191,-85.146036
79,72000155,,Kendall Manor,AL,Barbour,Eufaula,534 W. Broad St.,Listed,Single,1/14/1972,False,0.9,ARCHITECTURE,BUILDING,https://catalog.archives.gov/id/77835771,False,False,False,False,True,1/14/1972,,,,,72000155.0,"534 W. Broad St., Eufaula, AL",31.892778,-85.151975
85,74000400,,Petty-Roberts-Beatty House,AL,Barbour,Clayton,103 N. Midway,Listed,Single,1/21/1974,False,0.9,ARCHITECTURE,BUILDING,https://catalog.archives.gov/id/77835773,False,False,False,False,True,1/21/1974,,,Octagon House,,74000400.0,"103 N. Midway, Clayton, AL",31.884295,-85.451096
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99105,82001838,,Sweetwater Brewery,WY,Sweetwater,Green River,48 W. Railroad Ave.,Listed,Single,11/1/1982,False,0.9,INDUSTRY; ARCHITECTURE,BUILDING,https://catalog.archives.gov/id/73730705,False,False,False,False,True,11/1/1982,,,Green River Brewery,,82001838.0,"48 W. Railroad Ave., Green River, WY",41.528732,-109.468774
99147,69000195,,Miller Cabin,WY,Teton,Jackson,1 mi. NE of Jackson,Listed,Single,4/16/1969,False,0.0,CONSERVATION; SCIENCE,BUILDING,https://catalog.archives.gov/id/73730755,False,False,True,False,False,4/16/1969,,,,,69000195.0,"1 mi. NE of Jackson, Jackson, WY",,
99148,1001454,,"Miller, Grace and Robert, Ranch (Boundary Incr...",WY,Teton,Jackson,1 mi. NE of Jackson,Listed,Single,1/11/2002,False,5.0,EXPLORATION/SETTLEMENT; CONSERVATION,BUILDING,https://catalog.archives.gov/id/73730765,False,False,True,False,False,1/11/2002,,,Miller Cabin; Miller Barn (48TE903),,1001454.0,"1 mi. NE of Jackson, Jackson, WY",,
99177,84003712,,"Quinn, A. V., House",WY,Uinta,Evanston,1049 Center St.,Listed,Single,9/13/1984,False,0.9,COMMERCE; EXPLORATION/SETTLEMENT,BUILDING,https://catalog.archives.gov/id/73730781,False,False,False,False,True,9/13/1984,,,Pine Gables Inn,,84003712.0,"1049 Center St., Evanston, WY",41.267375,-110.967328


In [4]:
df = df[['Name', 'City', 'State', 'Longitude', 'Latitude']]

df = df.dropna(subset=['Longitude', 'Latitude'])

df

Unnamed: 0,Name,City,State,Longitude,Latitude
76,Drewry-Mitchell-Moorer House,Eufaula,AL,-85.146036,31.900191
79,Kendall Manor,Eufaula,AL,-85.151975,31.892778
85,Petty-Roberts-Beatty House,Clayton,AL,-85.451096,31.884295
87,Shorter Mansion,Eufaula,AL,-85.146276,31.896728
88,"Sparks, Gov. Chauncy, House",Eufaula,AL,-85.142911,31.892875
...,...,...,...,...,...
99002,US Post Office-Powell Main,Powell,WY,-108.757708,44.755063
99050,"Wissler, Susan, House",Dayton,WY,-107.262846,44.876057
99085,First National Bank Building,Rock Springs,WY,-109.218603,41.587302
99105,Sweetwater Brewery,Green River,WY,-109.468774,41.528732


In [5]:
df.to_csv('./data/Landmarks.csv', index=False)