# 05 Maps and Geospatial Data

- Introduction to Folium
- Maps with Markers
- Choropleth Maps

# Downloading and Preparing Data

In [1]:
import numpy as np
import pandas as pd

In [2]:
df_can = pd.read_excel('https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DV0101EN/labs/Data_Files/Canada.xlsx',
                       sheet_name='Canada by Citizenship',
                       skiprows=range(20),
                       skipfooter=2
                      )

In [3]:
df_can.head()

Unnamed: 0,Type,Coverage,OdName,AREA,AreaName,REG,RegName,DEV,DevName,1980,...,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013
0,Immigrants,Foreigners,Afghanistan,935,Asia,5501,Southern Asia,902,Developing regions,16,...,2978,3436,3009,2652,2111,1746,1758,2203,2635,2004
1,Immigrants,Foreigners,Albania,908,Europe,925,Southern Europe,901,Developed regions,1,...,1450,1223,856,702,560,716,561,539,620,603
2,Immigrants,Foreigners,Algeria,903,Africa,912,Northern Africa,902,Developing regions,80,...,3616,3626,4807,3623,4005,5393,4752,4325,3774,4331
3,Immigrants,Foreigners,American Samoa,909,Oceania,957,Polynesia,902,Developing regions,0,...,0,0,1,0,0,0,0,0,0,0
4,Immigrants,Foreigners,Andorra,908,Europe,925,Southern Europe,901,Developed regions,0,...,0,0,1,1,0,0,0,0,1,1


In [4]:
df_can.shape

(195, 43)

## Clean up data
>make modifications to the original dataset to create the visualizations.

In [5]:
# clean up the dataset to remove unnecessary columns (eg. REG) 
df_can.drop(['AREA','REG','DEV','Type','Coverage'], axis=1, inplace=True)

# let's rename the columns so that they make sense
df_can.rename(columns={'OdName':'Country', 'AreaName':'Continent','RegName':'Region'}, inplace=True)

# for sake of consistency, let's also make all column labels of type string
df_can.columns = list(map(str, df_can.columns))

# add total column
df_can['Total'] = df_can.sum(axis=1)

# years that we will be using in this lesson - useful for plotting later on
years = list(map(str, range(1980, 2014)))
print (f'data dimensions: {df_can.shape}')

data dimensions: (195, 39)


# Introduction to Folium

In [6]:
import folium

In [7]:
# define the world map
world_map = folium.Map()

In [8]:
# save map
world_map.save('../figs/05_Maps/world_map.html')

In [9]:
# display world map
world_map

>customize this default definition of the world map by specifying the centre of your map and the intial zoom level.

>All locations on a map are defined by their respective Latitude and Longitude values. 

## Create Canada map 
> create a map centered around Canada and play with the zoom level to see how it affects the rendered map.

In [10]:
# define the world map centered around Canada with a low zoom level
world_map = folium.Map(location=[56.130, -106.35], zoom_start=4)

In [11]:
# save map
world_map.save('../figs/05_Maps/canada_map.html')

In [12]:
# display world map
world_map

In [13]:
# create the map with a higher zoom level
# define the world map centered around Canada with a higher zoom level
world_map = folium.Map(location=[56.130, -106.35], zoom_start=8)

In [14]:
# save map
world_map.save('../figs/05_Maps/canada_map_high_zoom.html')

In [15]:
# display world map
world_map

> the higher the zoom level the more the map is zoomed into the given center.

## Create Mexico map 
>Create a map of Mexico with a zoom level of 4.

In [16]:
# define Mexico's geolocation coordinates
mexico_latitude = 23.6345 
mexico_longitude = -102.5528

In [17]:
# define the world map centered around Canada with a higher zoom level
mexico_map = folium.Map(location=[mexico_latitude, mexico_longitude], zoom_start=4)

In [18]:
# save map
mexico_map.save('../figs/05_Maps/mexico_map.html')

In [19]:
# display world map
mexico_map

## Folium map styles
1. Stamen Toner Maps
2. Stamen Terrain Maps

## 1. Stamen Toner Maps

> high-contrast B+W (black and white) maps. 
- They are perfect for data mashups and exploring river meanders and coastal zones.

In [20]:
# create a Stamen Toner map of Canada with a zoom level of 4
world_map = folium.Map(location=[56.130, -106.35], zoom_start=4, tiles='Stamen Toner')

In [21]:
# save map
world_map.save('../figs/05_Maps/stamen_toner_canada_map.html')

In [22]:
# display map
world_map

## 2. Stamen Terrain Maps

>maps that feature hill shading and natural vegetation colors. 
- showcase advanced labeling and linework generalization of dual-carriageway roads.

In [23]:
# create a Stamen Terrain map of Canada with zoom level 4.
world_map = folium.Map(location=[56.130, -106.35], zoom_start=4, tiles='Stamen Terrain')

In [24]:
# save map
world_map.save('../figs/05_Maps/stamen_terrain_canada_map.html')

In [25]:
# display map
world_map

## Create Mexico map with hill shading and natural vegetation
>Create a map of Mexico to visualize its hill shading and natural vegetation. 
- Use a zoom level of 6.

In [26]:
# define Mexico's geolocation coordinates
mexico_latitude = 23.6345 
mexico_longitude = -102.5528

In [27]:
# define the world map centered around Canada with a higher zoom level
mexico_map = folium.Map(location=[mexico_latitude, mexico_longitude], zoom_start=6, tiles='Stamen Terrain')

In [28]:
# save map
mexico_map.save('../figs/05_Maps/hill_vegetation_mexico_map.html')

In [29]:
# display world map
mexico_map

# Maps with Markers

# Police department incidents data
>download and import the data on police department incidents

In [30]:
# read online from URL
# url = 'https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DV0101EN/labs/Data_Files/Police_Department_Incidents_-_Previous_Year__2016_.csv'
# df_incidents = pd.read_csv(url)

In [31]:
# df_incidents.head()

In [32]:
# read from local disk
path = '../data/Incidents.csv'
df_incidents = pd.read_csv(path)

In [33]:
df_incidents.head()

Unnamed: 0,IncidntNum,Category,Descript,DayOfWeek,Date,Time,PdDistrict,Resolution,Address,X,Y,Location,PdId
0,120058272,WEAPON LAWS,POSS OF PROHIBITED WEAPON,Friday,01/29/2016 12:00:00 AM,11:00,SOUTHERN,"ARREST, BOOKED",800 Block of BRYANT ST,-122.403405,37.775421,"(37.775420706711, -122.403404791479)",12005827212120
1,120058272,WEAPON LAWS,"FIREARM, LOADED, IN VEHICLE, POSSESSION OR USE",Friday,01/29/2016 12:00:00 AM,11:00,SOUTHERN,"ARREST, BOOKED",800 Block of BRYANT ST,-122.403405,37.775421,"(37.775420706711, -122.403404791479)",12005827212168
2,141059263,WARRANTS,WARRANT ARREST,Monday,04/25/2016 12:00:00 AM,14:59,BAYVIEW,"ARREST, BOOKED",KEITH ST / SHAFTER AV,-122.388856,37.729981,"(37.7299809672996, -122.388856204292)",14105926363010
3,160013662,NON-CRIMINAL,LOST PROPERTY,Tuesday,01/05/2016 12:00:00 AM,23:50,TENDERLOIN,NONE,JONES ST / OFARRELL ST,-122.412971,37.785788,"(37.7857883766888, -122.412970537591)",16001366271000
4,160002740,NON-CRIMINAL,LOST PROPERTY,Friday,01/01/2016 12:00:00 AM,00:30,MISSION,NONE,16TH ST / MISSION ST,-122.419672,37.76505,"(37.7650501214668, -122.419671780296)",16000274071000


Each row consists of 13 features:

1. **IncidntNum**: Incident Number
2. **Category**: Category of crime or incident
3. **Descript**: Description of the crime or incident
4. **DayOfWeek**: The day of week on which the incident occurred
5. **Date**: The Date on which the incident occurred
6. **Time**: The time of day on which the incident occurred
7. **PdDistrict**: The police department district
8. **Resolution**: The resolution of the crime in terms whether the perpetrator was arrested or not
9. **Address**: The closest address to where the incident took place
10. **X**: The longitude value of the crime location
11. **Y**: The latitude value of the crime location
12. **Location**: A tuple of the latitude and the longitude values
13. **PdId**: The police department ID

In [34]:
# find out how many entries there are in our dataset.
df_incidents.shape

(150500, 13)

>the dataframe consists of 150,500 crimes, which took place in the year 2016. 
>just work with the first 100 incidents in this dataset to reduce computational cost

In [35]:
# get the first 100 crimes in the df_incidents dataframe
limit = 100
df_incidents = df_incidents.iloc[0:limit, :]

In [36]:
# confirm that our dataframe now consists only of 100 crimes.
df_incidents.shape

(100, 13)

## San Francisco incidents

>visualize where the crimes took place in the city of San Francisco. 
- use the default style
- initialize the zoom level to 12.

In [37]:
# San Francisco latitude and longitude values
latitude = 37.77
longitude = -122.42

In [38]:
# create map and display it
sanfran_map = folium.Map(location=[latitude, longitude], zoom_start=12)

In [39]:
# save map
sanfran_map.save('../figs/05_Maps/sanfran_map.html')

In [40]:
# display the map of San Francisco
sanfran_map

In [41]:
# superimpose the locations of the crimes onto the map.
# create a feature group with its own features and style 
#add it to the sanfran_map.

# instantiate a feature group for the incidents in the dataframe
incidents = folium.map.FeatureGroup()

In [42]:
# loop through the 100 crimes and add each to the incidents feature group
for lat, lng, in zip(df_incidents.Y, df_incidents.X):
    incidents.add_child(
        folium.CircleMarker( # replace folium.features.CircleMarker with folium.CircleMarker           
            [lat, lng],
            radius=5, # define how big you want the circle markers to be
            color='yellow',
            fill=True,
            fill_color='blue',
            fill_opacity=0.6
        )
    )

In [43]:
# add incidents to map
sanfran_map.add_child(incidents)

In [44]:
# add some pop-up text that would get displayed when you hover over a marker. Let's make each marker display the category of the crime when hovered over.
# instantiate a feature group for the incidents in the dataframe
incidents = folium.map.FeatureGroup()

In [45]:
# loop through the 100 crimes and add each to the incidents feature group
for lat, lng, in zip(df_incidents.Y, df_incidents.X):
    incidents.add_child(
        folium.CircleMarker( # replace folium.features.CircleMarker with folium.CircleMarker           
            [lat, lng],
            radius=5, # define how big you want the circle markers to be
            color='yellow',
            fill=True,
            fill_color='blue',
            fill_opacity=0.6
        )
    )

In [46]:
# add pop-up text to each marker on the map
latitudes = list(df_incidents.Y)
latitudes[:5]

[37.775420706711,
 37.775420706711,
 37.729980967299596,
 37.78578837668879,
 37.7650501214668]

In [47]:
longitudes = list(df_incidents.X)
longitudes[:5]

[-122.40340479147899,
 -122.40340479147899,
 -122.388856204292,
 -122.412970537591,
 -122.419671780296]

In [48]:
labels = list(df_incidents.Category)
labels[:5]

['WEAPON LAWS', 'WEAPON LAWS', 'WARRANTS', 'NON-CRIMINAL', 'NON-CRIMINAL']

In [49]:
for lat, lng, label in zip(latitudes, longitudes, labels):
    folium.Marker([lat, lng], popup=label).add_to(sanfran_map)        

In [50]:
# add incidents to map
sanfran_map.add_child(incidents)

>you are able to know what crime category occurred at each marker.

>map is so congested with all these markers

## Solve congested map problem

### Solution 1: Remove location markers and just add the text to the circle markers themselves

In [51]:
# create map and display it
sanfran_map = folium.Map(location=[latitude, longitude], zoom_start=12)

In [52]:
# loop through the 100 crimes and add each to the map
for lat, lng, label in zip(df_incidents.Y, df_incidents.X, df_incidents.Category):
    folium.CircleMarker( # replace folium.features.CircleMarker with folium.CircleMarker           
        [lat, lng],
        radius=5, # define how big you want the circle markers to be
        color='yellow',
        fill=True,
        popup=label,
        fill_color='blue',
        fill_opacity=0.6
    ).add_to(sanfran_map)

# save map
sanfran_map.save('../figs/05_Maps/incidents_sanfran_map_no_location_markers.html')

In [53]:
# show map
sanfran_map

### Solution 2: Group the markers into different clusters.
>Each cluster is then represented by the number of crimes in each neighborhood. 
>These clusters can be thought of as pockets of San Francisco which you can then analyze separately.

In [54]:
# instantiating a MarkerCluster object 
# adding all the data points in the dataframe to this object

from folium import plugins

In [55]:
# start  with a clean copy of the map of San Francisco
sanfran_map = folium.Map(location = [latitude, longitude], zoom_start = 12)

In [56]:
# instantiate a mark cluster object for the incidents in the dataframe
incidents = plugins.MarkerCluster().add_to(sanfran_map)

In [57]:
# loop through the dataframe and add each data point to the mark cluster
for lat, lng, label, in zip(df_incidents.Y, df_incidents.X, df_incidents.Category):
    folium.Marker(
        location=[lat, lng],
        icon=None,
        popup=label,
    ).add_to(incidents)

# save map
sanfran_map.save('../figs/05_Maps/incidents_sanfran_map_clusters.html')

In [58]:
# display map
sanfran_map

>Notice how when you zoom out all the way, all markers are grouped into one cluster, the global cluster, of 100 markers or crimes, which is the total number of crimes in our dataframe. 

>Once you start zooming in, the global cluster will start breaking up into smaller clusters. 

>Zooming in all the way will result in individual markers.

# Choropleth Maps

>thematic map in which areas are shaded or patterned in proportion to the measurement of the statistical variable being displayed on the map

>provides an easy way to visualize how a measurement varies across a geographic area or it shows the level of variability within a region. 

>is a Choropleth map of the US depicting the population by square mile per state.

## Immigration to Canada Choropleth map
>create Choropleth map of the world depicting immigration from various countries to Canada.

In [59]:
df_can.head()

Unnamed: 0,Country,Continent,Region,DevName,1980,1981,1982,1983,1984,1985,...,2005,2006,2007,2008,2009,2010,2011,2012,2013,Total
0,Afghanistan,Asia,Southern Asia,Developing regions,16,39,39,47,71,340,...,3436,3009,2652,2111,1746,1758,2203,2635,2004,58639
1,Albania,Europe,Southern Europe,Developed regions,1,0,0,0,0,0,...,1223,856,702,560,716,561,539,620,603,15699
2,Algeria,Africa,Northern Africa,Developing regions,80,67,71,69,63,44,...,3626,4807,3623,4005,5393,4752,4325,3774,4331,69439
3,American Samoa,Oceania,Polynesia,Developing regions,0,1,0,0,0,0,...,0,1,0,0,0,0,0,0,0,6
4,Andorra,Europe,Southern Europe,Developed regions,0,0,0,0,0,0,...,0,1,1,0,0,0,0,1,1,15


In [60]:
df_can.shape

(195, 39)

## Download countries geojson file
[world_countries.json](https://s3-api.us-geo.objectstorage.softlayer.net/cf-courses-data/CognitiveClass/DV0101EN/labs/Data_Files/world_countries.json)

In [61]:
world_geo = r'../data/world_countries.json' # geojson file
world_geo


'../data/world_countries.json'

In [62]:
# create a plain world map
world_map = folium.Map(location=[0, 0], zoom_start=2, tiles='Mapbox Bright')

>use the Choropleth class with the following main parameters:
1. geo_data, which is the GeoJSON file.
2. data, which is the dataframe containing the data.
3. columns, which represents the columns in the dataframe that will be used to create the Choropleth map.
4. key_on, which is the key or variable in the GeoJSON file that contains the name of the variable of interest. To determine that, you will need to open the GeoJSON file using any text editor and note the name of the key or variable that contains the name of the countries, since the countries are our variable of interest. 
- In this case, **name** is the key in the GeoJSON file that contains the name of the countries. Note that this key is case_sensitive, so you need to pass exactly as it exists in the GeoJSON file.

In [63]:
# generate choropleth map using the total immigration of each country to Canada from 1980 to 2013
folium.Choropleth(
    geo_data=world_geo,
    data=df_can,
    columns=['Country', 'Total'],    
    key_on='feature.properties.name',
    fill_color='YlOrRd', 
    fill_opacity=0.7, 
    line_opacity=0.2,
    legend_name='Immigration to Canada'
).add_to(world_map)

<folium.features.Choropleth at 0x7fbcd1dc36a0>

In [64]:
# display map
world_map

>the darker the color of a country and the closer the color to red, the higher the number of immigrants from that country. 

>Accordingly, the highest immigration over the course of 33 years (from 1980 to 2013) was from China, India, and the Philippines, followed by Poland, Pakistan, and interestingly, the US.

If legend is displaying a negative boundary or threshold. 
- fix that by defining our own thresholds and starting with 0 instead of -6,918!

In [65]:
world_geo = r'../data/world_countries.json'

In [66]:
# create a numpy array of length 6 and has linear spacing from the minium total immigration to the maximum total immigration
threshold_scale = np.linspace(df_can['Total'].min(),
                              df_can['Total'].max(),
                              6, dtype=int)
threshold_scale = threshold_scale.tolist() # change the numpy array to a list
threshold_scale[-1] = threshold_scale[-1] + 1 # make sure that the last value of the list is greater than the maximum immigration

# let Folium determine the scale.
world_map = folium.Map(location=[0, 0], zoom_start=2, tiles='Mapbox Bright')

folium.Choropleth(
    geo_data=world_geo,
    data=df_can,
    columns=['Country', 'Total'],
    key_on='feature.properties.name',
    threshold_scale=threshold_scale,
    fill_color='YlOrRd', 
    fill_opacity=0.7, 
    line_opacity=0.2,
    legend_name='Immigration to Canada',
    reset=True
).add_to(world_map)

world_map.save('../figs/05_Maps/immigration_choropleth_map.html')

world_map