Import GeoPy and pandas libraries

In [33]:
import pandas as pd
import geopandas as gdp
import folium
from geopy.geocoders import Nominatim

Test a location

In [34]:
locator = Nominatim(user_agent="myGeocoder")
location = locator.geocode("Washington, DC")
print((location.latitude, location.longitude))
print(location)

(38.8949855, -77.0365708)
Washington, District of Columbia, 20500, United States of America


Read CSV of locations

In [35]:
input_filepath = '/Users/waynegraham/projects/maps/locations.csv'

data = pd.read_csv(input_filepath, encoding='utf8')

Set the column headers

In [36]:
city_column = 'City'
state_column = 'State'

if city_column not in data.columns:
    raise ValueError("Missing city column")
    
if state_column not in data.columns:
    raise ValueError("Missing state column")
    
data["address"] = data["City"] + ", " + data["State"]
    
addresses = data['address'].to_list()

data.head()

Unnamed: 0,Name,City,State,Affiliation,address
0,Wayne,Arlington,VA,CLIR,"Arlington, VA"
1,Amy,Alexandria,VA,CLIR,"Alexandria, VA"
2,Josh,Brooklyn,NY,IIIF,"Brooklyn, NY"
3,Gayle,Atlanta,GA,DLF,"Atlanta, GA"


In [37]:
from geopy.extra.rate_limiter import RateLimiter
geocode = RateLimiter(locator.geocode, min_delay_seconds=1)

data['location'] = data['address'].apply(geocode)
data['point'] = data['location'].apply(lambda loc: tuple(loc.point) if loc else None)

Split out the longitude/latitude coordinates

In [38]:
data[['latitude', 'longitude', 'altitude']] = pd.DataFrame(data['point'].tolist(), index=data.index)

gdp.GeoDataFrame(
    data, geometry=gdp.points_from_xy(x=data.longitude, y=data.latitude)
)

data.head()

Unnamed: 0,Name,City,State,Affiliation,address,location,point,latitude,longitude,altitude,geometry
0,Wayne,Arlington,VA,CLIR,"Arlington, VA","(Arlington, Arlington County, Virginia, United...","(38.8903961, -77.0841585, 0.0)",38.890396,-77.084159,0.0,POINT (-77.08416 38.89040)
1,Amy,Alexandria,VA,CLIR,"Alexandria, VA","(Alexandria, Virginia, 22314, United States of...","(38.8051095, -77.0470229, 0.0)",38.80511,-77.047023,0.0,POINT (-77.04702 38.80511)
2,Josh,Brooklyn,NY,IIIF,"Brooklyn, NY","(Brooklyn, New York, United States of America,...","(40.6501038, -73.9495823, 0.0)",40.650104,-73.949582,0.0,POINT (-73.94958 40.65010)
3,Gayle,Atlanta,GA,DLF,"Atlanta, GA","(Atlanta, Fulton County, Georgia, United State...","(33.7490987, -84.3901849, 0.0)",33.749099,-84.390185,0.0,POINT (-84.39018 33.74910)


Remove unneeded "working" columns

In [39]:
data = data.drop(['City', 'State', 'address', 'location'], axis=1)
data.head()

Unnamed: 0,Name,Affiliation,point,latitude,longitude,altitude,geometry
0,Wayne,CLIR,"(38.8903961, -77.0841585, 0.0)",38.890396,-77.084159,0.0,POINT (-77.08416 38.89040)
1,Amy,CLIR,"(38.8051095, -77.0470229, 0.0)",38.80511,-77.047023,0.0,POINT (-77.04702 38.80511)
2,Josh,IIIF,"(40.6501038, -73.9495823, 0.0)",40.650104,-73.949582,0.0,POINT (-73.94958 40.65010)
3,Gayle,DLF,"(33.7490987, -84.3901849, 0.0)",33.749099,-84.390185,0.0,POINT (-84.39018 33.74910)


Check if there are locations without latitudes

In [40]:
data.latitude.isnull().sum()
data = data[pd.notnull(data['latitude'])]

Convert to geodataframe

In [45]:
points = gdp.GeoDataFrame(data['geometry'])
boundaing_box = points.total_bounds



[-84.3901849  33.7490987 -73.9495823  40.6501038]


TODO: Calculate centroid out of csv values

In [58]:
m = folium.Map(
    location=[38.890396, -77.084159],
    tiles='https://{s}.tile.thunderforest.com/pioneer/{z}/{x}/{y}.png?apikey=ed8a8c98442949588501489e7f836831',
    attr='<a href="http://www.thunderforest.com/">Thunderforest</a>',
    zoom_start=5
)

data.apply(lambda row:folium.CircleMarker(location=[row["latitude"], row["longitude"]]).add_to(m), axis=1)

m

In [None]:
m.save('map.html')