In [21]:
# Import Dependencies
import pandas as pd
import gmaps
import requests
from config import g_key
from geopy import distance
from geopy import Nominatim
import numpy as np

In [22]:
# Get Address from User
street = input("Enter Address Line 1 (Street Address): ").strip()
city = input("Enter City: ").strip()
state = input("Enter State: ")
zip_code = str(input("Enter Zip/Postal Code: ")).strip()

Enter Address Line 1 (Street Address): 2340 Hurley Way
Enter City: Sacramento
Enter State: California
Enter Zip/Postal Code: 95825


In [23]:
# Combines individual address inputs into one variable
address = street + ", " + city + ", " + state + ", " + zip_code

In [24]:
# Returns lat/long from address
geolocator = Nominatim(user_agent="my_user_agent")
location = geolocator.geocode(address)
long_address = location
lat = location.latitude
long = location.longitude
print(long_address)
print(lat, long)

2340, Hurley Way, Arden-Arcade, Sacramento County, California, 95825, United States
38.58872355555556 -121.40824022222223


In [25]:
# State abreviations; EPA API uses abbreviations, address has whole name
us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'American Samoa': 'AS',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Guam': 'GU',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
}

In [26]:
# Returns abbreviation of state for URL
url_state = us_state_abbrev[state]

In [27]:
# EPA API Base URL for Active Superfund Sites
# Returns all location within the state and puts into Pandas DF

request = requests.get(f'https://data.epa.gov/efservice/SEMS_ACTIVE_SITES/SITE_STATE/CONTAINING/{url_state}/JSON').json()

In [28]:
# Puts requested data into Pandas Dataframe
sf_sites_all = pd.DataFrame(request)

In [29]:
# Returning new DF contianing sites that have coordinates, i.e. are currently on the NPL or proposed for NPL
sf_sites_cleaned = sf_sites_all[sf_sites_all["LATITUDE"].notna()]

In [30]:
def distance_calc(row):
    '''
    This function returns the distance in miles between the address lat/long and the Superfund Site lat/long
    '''
    address_coords = (lat, long)
    coord2 = (row['LATITUDE'], row['LONGITUDE'])
    return distance.distance(address_coords, coord2).miles

In [31]:
# Creates a new column with the distance in miles between the address and Superfund Sites
# adding temp Dataframe prevents false positive SettingWithCopyWarning
sf_sites_temp = sf_sites_cleaned.copy()
sf_sites_temp['SITE_DISTANCE'] = sf_sites_cleaned.apply(distance_calc, axis=1)
sf_sites_cleaned = sf_sites_temp.copy()

In [32]:
# Creates new DataFrame with Superfund Site within the specified distance from the address

dist = 50 # 50 miles for troubleshooting; lower or make it user input in later versions

sf_sites_near = sf_sites_cleaned.loc[(sf_sites_cleaned['SITE_DISTANCE'] <= dist)]
sf_sites_near = sf_sites_near.sort_values(by = ['SITE_DISTANCE'])

In [33]:
# Returns out the Site name and URL
site_base_url = 'https://cumulis.epa.gov/supercpad/cursites/csitinfo.cfm?id='

def get_site_url(row):
    to_append = row["SITE_ID"]
    return site_base_url + str(to_append)

In [34]:
# Creates a new column with the Site URL
# adding temp Dataframe prevents false positive SettingWithCopyWarning

sf_sites_temp = sf_sites_near.copy()
sf_sites_temp.loc[:, 'SITE_URL'] = sf_sites_near.apply(get_site_url, axis=1)
sf_sites_near = sf_sites_temp.copy()

In [35]:
site_list = sf_sites_near.loc[:, 'SITE_NAME'].tolist()
url_list = sf_sites_near.loc[:, 'SITE_URL'].tolist()
distance_list = sf_sites_near.loc[:, 'SITE_DISTANCE'].tolist()

In [43]:
# Prints out the count of Superfund Sites
count = len(sf_sites_near.index)
print(f"There are {count} Superfund Sites within {dist} miles of your address: \n")
for i in range(len(site_list)):
    print(f'{i+1}): {site_list[i]} is {distance_list[i]:.01f} miles away. \n'
          f'URL: {url_list[i]} \n')

There are 11 Superfund Sites within 50 miles of your address: 

1): SACRAMENTO ARMY DEPOT is 4.8 miles away. 
URL: https://cumulis.epa.gov/supercpad/cursites/csitinfo.cfm?id=0902715 

2): MCCLELLAN AIR FORCE BASE (GROUND WATER CONTAMINATION) is 4.9 miles away. 
URL: https://cumulis.epa.gov/supercpad/cursites/csitinfo.cfm?id=0902759 

3): JIBBOOM JUNKYARD is 5.2 miles away. 
URL: https://cumulis.epa.gov/supercpad/cursites/csitinfo.cfm?id=0902040 

4): MATHER AIR FORCE BASE (AC%26W DISPOSAL SITE) is 6.4 miles away. 
URL: https://cumulis.epa.gov/supercpad/cursites/csitinfo.cfm?id=0902793 

5): AEROJET GENERAL CORP. is 11.4 miles away. 
URL: https://cumulis.epa.gov/supercpad/cursites/csitinfo.cfm?id=0901718 

6): FRONTIER FERTILIZER is 16.2 miles away. 
URL: https://cumulis.epa.gov/supercpad/cursites/csitinfo.cfm?id=0901554 

7): LABORATORY FOR ENERGY-RELATED HEALTH RESEARCH/OLD CAMPUS LANDFILL (USDOE) is 19.2 miles away. 
URL: https://cumulis.epa.gov/supercpad/cursites/csitinfo.cfm?id=090

In [37]:
# Info box for map of site locations
info_box_template = """
<dl>
<dt>Superfund Site Name</dt><dd>{SITE_NAME}</dd>
<dt>URL</dt><dd>{SITE_URL}</dd>
</dl>
"""

# Get the data from each row and add it to the formatting template and store the data in a list.
site_info = [info_box_template.format(**row) for index, row in sf_sites_near.iterrows()]


In [38]:
# Add a marker layer for each site to the map. 
locations = sf_sites_near[["LATITUDE", "LONGITUDE"]]
fig = gmaps.figure(center=(30.0, 31.0), zoom_level=1.5)
marker_layer = gmaps.marker_layer(locations, info_box_content=site_info)
fig.add_layer(marker_layer)
fig

Figure(layout=FigureLayout(height='420px'))