Install Depedencies (only run once)

In [3]:
!pip3 install --upgrade pandas
!pip3 install --upgrade matplotlib
!pip3 install --upgrade geopandas
!pip3 install --upgrade geopy
!pip3 install --upgrade folium

Requirement already up-to-date: pandas in /usr/local/lib/python3.8/site-packages (1.1.0)
Requirement already up-to-date: matplotlib in /usr/local/lib/python3.8/site-packages (3.3.0)
Requirement already up-to-date: geopandas in /usr/local/lib/python3.8/site-packages (0.8.1)
Requirement already up-to-date: geopy in /usr/local/lib/python3.8/site-packages (2.0.0)
Requirement already up-to-date: folium in /usr/local/lib/python3.8/site-packages (0.11.0)


Import GeoPy and pandas libraries

In [1]:
import pandas as pd
import geopandas as gdp
import folium
from geopy.geocoders import Nominatim

Test a location

In [2]:
locator = Nominatim(user_agent="clir_geocoder")
location = locator.geocode("Washington, DC")
print((location.latitude, location.longitude))
print(location)

(38.8949855, -77.0365708)
Washington, District of Columbia, 20500, United States of America


Read CSV of locations

In [23]:
input_filepath = '/Users/wgraham/projects/juypter_notebooks/locations.csv'

data = pd.read_csv(input_filepath, encoding='utf8')

Set the column headers

In [24]:
city_column = 'City'
state_column = 'State'

if city_column not in data.columns:
    raise ValueError("Missing city column")
    
if state_column not in data.columns:
    raise ValueError("Missing state column")
    
data["address"] = data["City"] + ", " + data["State"]
    
addresses = data['address'].to_list()

data.head()

Unnamed: 0,Name,City,State,Affiliation,address
0,Wayne,Arlington,VA,CLIR,"Arlington, VA"
1,Amy,Alexandria,VA,CLIR,"Alexandria, VA"
2,Josh,Brooklyn,NY,IIIF,"Brooklyn, NY"
3,Gayle,Atlanta,GA,DLF,"Atlanta, GA"
4,Becca,Riverside,California,CLIR,"Riverside, California"


In [25]:
from geopy.extra.rate_limiter import RateLimiter
geocode = RateLimiter(locator.geocode, min_delay_seconds=1)

data['location'] = data['address'].apply(geocode)
data['point'] = data['location'].apply(lambda loc: tuple(loc.point) if loc else None)

Split out the longitude/latitude coordinates

In [26]:
data[['latitude', 'longitude', 'altitude']] = pd.DataFrame(data['point'].tolist(), index=data.index)

gdp.GeoDataFrame(
    data, geometry=gdp.points_from_xy(x=data.longitude, y=data.latitude)
)

data.head()

Unnamed: 0,Name,City,State,Affiliation,address,location,point,latitude,longitude,altitude,geometry
0,Wayne,Arlington,VA,CLIR,"Arlington, VA","(Arlington, Arlington County, Virginia, United...","(38.8903961, -77.0841585, 0.0)",38.890396,-77.084159,0.0,POINT (-77.08416 38.89040)
1,Amy,Alexandria,VA,CLIR,"Alexandria, VA","(Alexandria, Virginia, 22314, United States of...","(38.8051095, -77.0470229, 0.0)",38.80511,-77.047023,0.0,POINT (-77.04702 38.80511)
2,Josh,Brooklyn,NY,IIIF,"Brooklyn, NY","(Brooklyn, New York, United States of America,...","(40.6501038, -73.9495823, 0.0)",40.650104,-73.949582,0.0,POINT (-73.94958 40.65010)
3,Gayle,Atlanta,GA,DLF,"Atlanta, GA","(Atlanta, Fulton County, Georgia, United State...","(33.7490987, -84.3901849, 0.0)",33.749099,-84.390185,0.0,POINT (-84.39018 33.74910)
4,Becca,Riverside,California,CLIR,"Riverside, California","(Riverside, Riverside County, California, Unit...","(33.9533546, -117.3961623, 0.0)",33.953355,-117.396162,0.0,POINT (-117.39616 33.95335)


Remove unneeded "working" columns

In [27]:
data = data.drop(['City', 'State', 'address', 'location'], axis=1)
data.head()

Unnamed: 0,Name,Affiliation,point,latitude,longitude,altitude,geometry
0,Wayne,CLIR,"(38.8903961, -77.0841585, 0.0)",38.890396,-77.084159,0.0,POINT (-77.08416 38.89040)
1,Amy,CLIR,"(38.8051095, -77.0470229, 0.0)",38.80511,-77.047023,0.0,POINT (-77.04702 38.80511)
2,Josh,IIIF,"(40.6501038, -73.9495823, 0.0)",40.650104,-73.949582,0.0,POINT (-73.94958 40.65010)
3,Gayle,DLF,"(33.7490987, -84.3901849, 0.0)",33.749099,-84.390185,0.0,POINT (-84.39018 33.74910)
4,Becca,CLIR,"(33.9533546, -117.3961623, 0.0)",33.953355,-117.396162,0.0,POINT (-117.39616 33.95335)


Check if there are locations without latitudes

In [28]:
data.latitude.isnull().sum()
data = data[pd.notnull(data['latitude'])]

Convert to geodataframe

In [29]:
from shapely.geometry import Polygon, Point, MultiPoint

points = gdp.GeoDataFrame(data['geometry'])

line_string = MultiPoint(points.geometry)
polygon = Polygon(line_string).convex_hull
centroid = polygon.centroid
print(centroid)



POINT (-68.30265357890647 39.74072230114307)


In [54]:
icon_map = {
    'CLIR': 'crimson',
    'IIIF': '#3186cc',
    'DLF': 'crimson'
}

marker_map = {
    'CLIR': 'crimson',
    'IIIF': '#3186cc',
    'DLF': 'crimson'
}

In [73]:
m = folium.Map(
    location=[centroid.y, centroid.x],
    tiles='https://{s}.tile.thunderforest.com/pioneer/{z}/{x}/{y}.png?apikey=ed8a8c98442949588501489e7f836831',
    attr='<a href="http://www.thunderforest.com/">Thunderforest</a>',
    zoom_start=3,
#     zoomControl=False
)

radius = 11

# Transparent circle markers
# data.apply(lambda row:folium.CircleMarker(
#     location=[row["latitude"], row["longitude"]],
#     color=None,
#     fill_color = icon_map[row['Affiliation']],
#     radius=radius
# ).add_to(m), axis=1)

# For MarkerIcons
# 

marker_map = {
    'CLIR': 'color="blue"',
#     'IIIF': 'color="green"',
#     'DLF': 'color="darkblue", icon="cloud"'
}

for index, row in data.iterrows():
    print(marker_map[row['Affiliation']] + ' | ' + row['Affiliation'])
    
    folium.Marker(
        location=[row["latitude"], row["longitude"]],
        popup=row['Affiliation'],
        icon=folium.Icon(marker_map[row['Affiliation']])
    ).add_to(m)

# for index, row in data.iterrows():
#     icon = folium.CircleMarker(
#         location=[row["latitude"], row["longitude"]],
#         color=None,
#         fill=True,
#         fill_color = icon_map[row['Affiliation']],
#         radius=radius,
#     ).add_to(m)

m

color="red" | CLIR
color="red" | CLIR


  icon=folium.Icon(marker_map[row['Affiliation']])


KeyError: 'IIIF'

In [14]:
m.save('map.html')
!open map.html