In [2]:
import geobr
import pandas as pd
import geopandas as gpd
import folium
from folium.plugins import MarkerCluster, HeatMap

# Download dataset
First of all, get the districts map. When you download it from geobr package, it comes for all over the contry. So you will need to subset it for your interesting region. In this case, I choosed São Paulo city.

In [3]:
bairros = geobr.read_neighborhood(verbose=True)
bairros = bairros[bairros['name_muni'] == 'São Paulo']
bairros.dtypes

code_muni             float64
name_muni              object
name_neighborhood      object
code_neighborhood      object
code_subdistrict       object
name_subdistrict       object
code_district          object
name_district          object
code_state             object
abbrev_state           object
reference_geom         object
geometry             geometry
dtype: object

The schools dataset is a real large one. Perhaps it longing a while until the download is done. Then, subset again.
Note tha I always take a look in the column's types. I Think it is a good practice. You can antecipate some problems and avoid some unacessary frustration further.

In [4]:
escolas = geobr.read_schools()
escolas = escolas[escolas['name_muni'] == 'São Paulo']
escolas.dtypes

abbrev_state                        object
name_muni                           object
code_school                          int64
name_school                         object
education_level                     object
education_level_others              object
admin_category                      object
address                             object
phone_number                        object
government_level                    object
private_school_type                 object
private_government_partnership      object
regulated_education_council         object
service_restriction                 object
size                                object
urban                               object
location_type                       object
date_update                         object
geometry                          geometry
dtype: object

# Data preparation
All right, whith the data in your hands, lets do some stuff first.
In the school dataset, the geometry field indicates a Point (yes, with capitalized P). Let's separate it in two columns: longitude and latitude with a simple lambda function.

In [5]:
escolas['lon'] = escolas.geometry.apply(lambda p: p.x)
escolas['lat'] = escolas.geometry.apply(lambda p: p.y)

Now, if you examine row by row, you may notice that we're not in plenty luck with this dataset. Some geometry Points are empty. And, with a great regret, we'll need to trash those out away. You can take a minute if you need it. I totally got you.

In [6]:
escolas = escolas[~escolas['lat'].isnull()]
escolas.isnull().sum()

abbrev_state                      0
name_muni                         0
code_school                       0
name_school                       0
education_level                   0
education_level_others            0
admin_category                    0
address                           0
phone_number                      0
government_level                  0
private_school_type               0
private_government_partnership    0
regulated_education_council       0
service_restriction               0
size                              0
urban                             0
location_type                     0
date_update                       0
geometry                          0
lon                               0
lat                               0
dtype: int64

Ok, with the data ready, now let's put our hands on the map. But first, make sure when you plotting it appears right on the center of the screen. For that, take the longitude and latitude medians.

In [7]:
x = escolas['lon'].median()
y = escolas['lat'].median()
print(x, y)

-46.6368685 -23.5638422


# The first map

The first map will be a simple one. Just with the district. The steps are:
1. Create your base map setting the center and the default zoom.
2. Bind the data containded in the Geo Data Frame.
3. Add it to your base map.
4. Plot it. :)

In [8]:
base = folium.Map([y-.1, x], zoom_start=10, tiles='OpenStreetMap')  # Shh, I cheated here. 
geo_sp = folium.Choropleth(bairros)
geo_sp.add_to(base)
base

In [9]:
base = folium.Map([y-0.1, x], zoom_start=10, tiles='OpenStreetMap')
geo_sp = folium.GeoJson(bairros, name=bairros.name_district)
geo_sp.add_child(folium.Popup(bairros.name_muni.iloc[0]))
geo_sp.add_to(base)
base

## District names

Nice, right?! The next one will show the district name and the amount of schools on it.
Here we use the Popup function to add the text that we want to show up.

From now on, the maps will begining to be pretty hevyer to a jupyter notebook. So, we will just save it to a html file, and visualize from there.

In [10]:
base = folium.Map([y-.1, x], zoom_start=10, tiles='OpenStreetMap')

for ii, bairro in enumerate(bairros.name_district):
    geo_sp = folium.GeoJson(bairros[ii:ii+1], name=bairro)
    n_escolas = escolas['geometry'].within(bairros.iloc[ii].geometry).sum()
    label = f'{bairro}\nEscolas: {n_escolas}'
    folium.Popup(label).add_to(geo_sp)
    geo_sp.add_to(base)

folium.LayerControl().add_to(base)
base.save('Escolas por bairro.html')

## What if we pin the schools on the map?

So that the pins don't get too piled up and the map overloaded, let's take a small sample.

Right, i know that I'm repeating code, but I just want to be the more clear possible. So each code cell got the whole process to the plot.

In [11]:
base = folium.Map([y-0.1, x], zoom_start=10, tiles='OpenStreetMap')
sp = folium.Choropleth(geobr.read_municipality(code_muni=3550308),
                       name="São Paulo",
                       line_color="Black",
                       line_weight=3,
                       fill_opacity=0)
sp.add_to(base)

amostra = escolas.sample(100)
for escola in amostra.itertuples():
    base.add_child(
        folium.Marker(
            location=[escola.lat, escola.lon],
            popup=f'<h4>{escola.name_school}</h4> <h5>{escola.government_level}</h5> <p>{escola.education_level_others}</p>',
            icon=folium.Icon(color='red', icon='info-sign')
        )
    )

folium.LayerControl().add_to(base)

base.save('Escolas Marcadores.html')


## You want the all the data ploted. You take it.

The map will be better looking if we clusterize the pins before.

In [12]:
base = folium.Map([y-0.1, x], zoom_start=10, tiles='OpenStreetMap')
sp = folium.Choropleth(geobr.read_municipality(code_muni=3550308),
               name="São Paulo",
               line_color="Black",
               line_weight=3,
               fill_opacity=0)
sp.add_to(base)

for ii, bairro in enumerate(bairros.name_district):
    geo_sp = folium.GeoJson(bairros[ii:ii+1], name=bairro)
    n_escolas = escolas['geometry'].within(bairros.iloc[ii].geometry).sum()
    label = f'Bairro: {bairro}\n - Escolas: {n_escolas}'
    folium.Popup(label).add_to(geo_sp)
    geo_sp.add_to(base)

cluster = MarkerCluster()
amostra = escolas.sample(1000)
for escola in amostra.itertuples():
    cluster.add_child(
        folium.Marker(
            location=[escola.lat, escola.lon],
            popup=f"<h4>{escola.name_school}</h4> <h5>{escola.government_level}</h5> <p>{escola.education_level_others}</p>",
            icon=folium.Icon(color='red', icon='info-sign')
        )
    )
    
base.add_child(cluster)
    
folium.LayerControl().add_to(base)

base.save('Escolas Clusterizadas.html')

## The heatmap.

A simple way to create a heat map, is creating a dataset containing the data you want to show.

And, yes, I'm rushing the explanation because it's late now, I'm sleepy and pretty hungry too. It's time for pizza, babe.
See you soon.

Best regards.

In [12]:
data = []
for ii, bairro in enumerate(bairros.name_district):
    data.append([bairros['geometry'].iloc[ii].centroid.y,
                 bairros['geometry'].iloc[ii].centroid.x,
                 escolas['geometry'].within(bairros.iloc[ii].geometry).sum()/escolas.shape[0]])

base = folium.Map([y, x], zoom_start=11, tiles='OpenStreetMap')
sp = folium.Choropleth(geobr.read_municipality(code_muni=3550308),
               name="São Paulo",
               line_color="Black",
               line_weight=3,
               fill_opacity=0)
sp.add_to(base)

for ii, bairro in enumerate(bairros.name_district):
    geo_sp = folium.GeoJson(bairros[ii:ii+1], name=bairro)
    n_escolas = escolas['geometry'].within(bairros.iloc[ii].geometry).sum()
    label = f'Bairro: {bairro}\n - Escolas: {n_escolas}'
    folium.Popup(label).add_to(geo_sp)
    geo_sp.add_to(base)

HeatMap(data, name="Escolas").add_to(base)

folium.LayerControl().add_to(base)

base.save('Escolas HeatMap.html')