How many people live near dangerous dams in Brazil and where in the territory are they distributed? Let's find out.

#### Importing packages

In [3]:
import geobr
import geopandas as gpd
import numpy as np
import pandas as pd
from shapely.geometry import Point
import tobler
from unidecode import unidecode

In [4]:
import warnings; warnings.filterwarnings('ignore', message='.*initial implementation of Parquet.*')

#### Data preparation

In [5]:
def read_dams():
    '''
    Reads the dam safety dataset using
    the appropriate configurations.
    '''

    dams = pd.read_csv("../../data/brazil/snisb/dam-report-07022021.csv", encoding='Latin5', sep=';', skiprows=[0,1])
    return dams

In [6]:
def make_gdf(df):
    '''
    Converts the dataframe to a geodaframe
    using the columns Longitude and Latitude
    ---
    Parameters:
    
    df -> The dam safety dataframe
    '''
    
    df['geometry'] = df.apply(lambda row: Point(row.longitude, row.latitude), axis=1)
    
    df = gpd.GeoDataFrame(df)
    
    return df

In [7]:
def crs_to_area(gdf):
    '''
    Converts the CRS for equal
    area calculations.
    ---
    Parameters:
    
    gdf -> A geodataframe
    '''
    
    return gdf.to_crs('''PROJCS["Brasil_Albers_Equal_Area",GEOGCS["GCS_WGS_1984",DATUM["D_SIRGAS_2000",SPHEROID["GRS_1980",6378137.0,298.257222101]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Albers"],PARAMETER["false_easting",5000000.0],PARAMETER["false_northing",10000000.0],PARAMETER["central_meridian",-54.0],PARAMETER["standard_parallel_1",-2.0],PARAMETER["standard_parallel_2",-22.0],PARAMETER["latitude_of_origin",-12.0],UNIT["Meter",1.0]]''')

In [8]:
def crs_to_coords(gdf):
    '''
    Converts the CRS of the geodataframe
    to the Brazilian standard for geogra-
    phic projections.
    ---
    Parameters:
    
    gdf -> A geodataframe
    '''
    
    return gdf.to_crs("EPSG:4674")

In [9]:
def standardize_columns(df):
    '''
    Remove special characters from the column
    names and makes them all lowercase. 
    ---
    Parameters:
    
    df -> The dam safety dataframe
    '''
    
    df.columns = df.columns.map(unidecode)
    df.columns = df.columns.map(lambda x: x.lower())
    df.columns = df.columns.map(lambda x: x.strip())
    
    return df

In [10]:
def fix_separators(df):
    '''
    The Latitude and Longitude columns in the dataframe
    are currently stored as strings with a ',' as the decimal
    separator. This function changes the separator to '.' and
    casts it to float.
    ---
    Parameters:
    
    df -> The dam safety dataframe
    '''
    
    df.latitude = df.latitude.str.replace(",", ".").astype(float)
    df.longitude = df.longitude.str.replace(",", ".").astype(float)
    df.capacidade_hm3 = df.capacidade_hm3.str.replace(",", ".").astype(float)
    
    return df

In [11]:
def classify_risky_dams(df):
    '''
    Marks the dams that have both a
    high risk category and a high
    potential damage so we can proceed 
    in the analysis.
    ---
    Parameters:
    
    df -> The dam safety dataframe
    '''
    
    condition = (df.categoria_de_risco == 'Alto') & (df.dano_potencial_associado == 'Alto')
    
    df['high_risk_high_damage'] = np.where(condition, True, False)
    
    return df

In [12]:
def create_buffer(gdf, r):
    '''
    Creates a buffer around the
    geometries of the given gdf
    with a r radius.
    '''
    
    gdf.geometry = gdf.geometry.buffer(r)
    
    return gdf

In [13]:
def get_dams():
    '''
    Runs all the functions to prepare
    the dam safety dataset sequentially.
    '''
    
    dams = read_dams()
    dams = standardize_columns(dams)
    dams = fix_separators(dams)
    dams = make_gdf(dams)
    dams = classify_risky_dams(dams)
    dams = dams[dams.high_risk_high_damage] # selects using boolean mask
    dams = dams.set_crs("EPSG:4674") # Brazilian standard projection
    dams = crs_to_area(dams)
    dams = create_buffer(dams, 1000)
    
    return dams

In [14]:
# Runs all the data reading/cleaning routines and save it to disk
# This takes some time to do, so let's save it on disk for further runs

### IN CASE THIS IS THE FIRST YOU ARE RUNNING THE SCRIPT,
### UNCOMMENT THE NEXT LINES 

#dangerous_dams = get_dams()
#dangerous_dams.to_feather("../../data/brazil/snisb/dangerous-dams.feather")

In [15]:
# Alternatively, read from disk to increase speed
dangerous_dams = gpd.read_feather("../../data/brazil/snisb/dangerous-dams.feather")

Let's also read the population grid.

In [16]:
# Reads file + converts CRS
grid = gpd.read_feather("../../data/brazil/grade/dasymetric-interpolation.feather")
grid = grid.to_crs(dangerous_dams.crs)

And both shapefiles and population counts for cities, states and regions.

In [19]:
### IN CASE THIS IS THE FIRST YOU ARE RUNNING THE SCRIPT,
### UNCOMMENT THE NEXT LINES 

# regions = geobr.read_region(2020)
# regions = regions.to_crs(grid.crs)
# regions_pop = pd.read_csv("../../data/brazil/pop/region.csv", sep=";")
# regions = regions.merge(regions_pop, left_on='code_region', right_on='code')
# regions.to_feather("../../data/brazil/geobr/regions.feather")

# states = geobr.read_state('all', 2020)
# states = states.to_crs(grid.crs)
# states_pop = pd.read_csv("../../data/brazil/pop/state.csv", sep=";")
# states = states.merge(states_pop, left_on='code_state', right_on='code')
# states.to_feather("../../data/brazil/geobr/states.to_feather")

# cities = geobr.read_municipality('all', 2020)
# cities['code_muni'] = cities.code_muni.astype(int).astype(str).str.slice(0, -1) # need to merge on the first 6 digits
# cities = cities.to_crs(grid.crs)
# cities_pop = pd.read_csv("../../data/brazil/pop/city.csv", sep=";", dtype={'code': str})
# cities = cities.merge(cities_pop, left_on='code_muni', right_on='code')
# cities.to_feather("../../data/brazil/geobr/cities.feather")

In [21]:
# # Alternatively, reads from disk to increase speed
regions = gpd.read_feather("../../data/brazil/geobr/regions.feather")
states = gpd.read_feather("../../data/brazil/geobr/states.feather")
cities = gpd.read_feather("../../data/brazil/geobr/cities.feather")

And we also need to select all the grid squares that are near at least one dangerous dam.

In [22]:
### IN CASE THIS IS THE FIRST YOU ARE RUNNING THE SCRIPT,
### UNCOMMENT THE NEXT LINES 

# # Spatial join will result in one line for each grid within a dam radius.
# # This means that we need to remove duplicate grid squares, which are nearby more than one dam

# grid_squares_near_dangerous_dams = gpd.sjoin(grid, dangerous_dams)
# grid_squares_near_dangerous_dams = grid_squares_near_dangerous_dams.drop_duplicates(subset='ID_UNICO').reset_index()
# grid_squares_near_dangerous_dams = grid_squares_near_dangerous_dams[
#                                                              # We keep only grid-relevant info
#                                                              ['ID_UNICO', 'QUADRANTE', 'MASC', 'FEM',
#                                                               'POP', 'DOM_OCU', 'pct_literate_residents',
#                                                               'pct_black_and_pardo_residents',
#                                                               'permanent_household_nominal_mean_income',
#                                                               'pct_private_households_under_minimum_wage',
#                                                               'geometry']
#                                                         ]
# grid_squares_near_dangerous_dams = grid_squares_near_dangerous_dams.to_feather(
#     "../../data/brazil/grade/squares-near-dangerous-dams.feather"
# )

In [23]:
# Alternatively, reads from disk
grid_squares_near_dangerous_dams = gpd.read_feather("../../data/brazil/grade/squares-near-dangerous-dams.feather")

#### Analysis

How many Brazilians live near dangerous dams?

In [24]:
# The simplest answer: the sum of the population living in the grid squares near dangerous dams
grid_squares_near_dangerous_dams.POP.sum()

937649

How many Brazilians in each region live near dangerous dams?

In [25]:
def people_by_geo_group(grid_squares, geo_group, groupby_columns):
    '''
    This function computes how many people live near dangerous
    dam within a geographical division of Brazil: regions, states
    and cities.
    
    Note the following caveat: some grid squares fall in the border
    of different regions and doing adequate computations for those
    would be too much work for small difference. We will, then,
    simply attribute a region to each part of the grid and mantain
    the duplicates – the total sum could be slightly above the
    previous 937639, then, if a populated area near a dam is right
    on the edge of two different geographical regions.
    ---
    Parameters:
    
    grid_squares -> A geodaframe selection of geometries with the population
    that we want to count
    geo_group -> A geodataframe with the geometries representing the relevant
    geographic divisions.
    groupby_column -> Arrays representing columns in the geo_group geo-
    dataframe. It represents the unique ids for each of the geographic divisions
    for which we want to estimate population counts.
    '''

    return gpd.sjoin(grid_squares, geo_group) \
        .groupby(groupby_columns) \
        .POP \
        .sum() \
        .sort_values(ascending=False) \
        .to_frame() \
        .reset_index()



In [26]:
people_near_dams_by_region = people_by_geo_group(grid_squares_near_dangerous_dams, regions, ['code_region', 'name_region'])
display(people_near_dams_by_region)

Unnamed: 0,code_region,name_region,POP
0,2.0,Nordeste,562123
1,3.0,Sudeste,255433
2,1.0,Norte,82040
3,4.0,Sul,22905
4,5.0,Centro Oeste,15148


Let's repeat this for the states and cities.

In [27]:
people_near_dams_by_state = people_by_geo_group(grid_squares_near_dangerous_dams, states, ['code_state', 'name_state'])
display(people_near_dams_by_state)

Unnamed: 0,code_state,name_state,POP
0,26.0,Pernambuco,161531
1,29.0,Bahia,161061
2,24.0,Rio Grande Do Norte,133067
3,31.0,Minas Gerais,97722
4,35.0,São Paulo,84117
5,25.0,Paraíba,82713
6,15.0,Pará,66372
7,33.0,Rio De Janeiro,38994
8,32.0,Espírito Santo,34600
9,41.0,Paraná,15009


In [28]:
people_near_dams_by_city = people_by_geo_group(grid_squares_near_dangerous_dams, cities, ['code_muni', 'name_muni'])
display(people_near_dams_by_city.head(10))

Unnamed: 0,code_muni,name_muni,POP
0,310620,Belo Horizonte,86022
1,292740,Salvador,40415
2,260790,Jaboatão Dos Guararapes,36842
3,260345,Camaragibe,21358
4,355240,Sumaré,20987
5,330320,Nilópolis,18528
6,330285,Mesquita,17252
7,150220,Capanema,15654
8,352390,Itu,15331
9,351620,Franca,14610


How many people by 10,000 live near a dangerous dam in each region, state and city?

In [29]:
def per_capita_ratios(numerator_df, numerator_column, denominator_df, denominator_column, merge_column, ratio=1):
    '''
    Computes per capita ratios using one dataframe
    with values for the population that was subject
    to an event and one dataframe for the total
    population in the area.
    ---
    Parameters:
    
    numerator_df -> Dataframe representing the popu-
    lation in a given area that went through some event.
    
    numerator_column -> String representing the column name
    which countains the population values.
    
    denominator_df -> Dataframe representing the total
    population in a given area.
    
    denominator_column -> String representing the column name
    which countains the population values.
    
    merge_column -> String representing a common column between
    the two dataframes through which a merge will be performed.
    
    ratio -> Sets a multiplier for the ratio. e.g, if
    ratio == 10000, the result will show a ratio for each 10,000
    people. Defaults to 1.
    
    '''
    
    results = numerator_df.merge(denominator_df, on=merge_column, suffixes=["", "_y"])
    
    results = results.drop(columns=[col for col in results.columns if "_y" in col])
    
    results['ratio']= results[numerator_column] / results[denominator_column] * ratio

    # Renames columns to increase readability
    results = results.rename(columns={numerator_column: "people_near_dangerous_dams", 
                           denominator_column: "total_population",
                           "ratio": f"{ratio}_ratio"})
    
    results = results.sort_values(by=f"{ratio}_ratio", ascending=False) 
        
    return results

In [33]:
ratios_by_region = per_capita_ratios(people_near_dams_by_region, 'POP', regions, 'pop', 'code_region', 10000).reset_index()
ratios_by_region = gpd.GeoDataFrame(ratios_by_region) # cast to gdf to save as feather
ratios_by_region.to_feather("../../data/brazil/near-dam-ratios/pop-near-dangerous-dams-by-region.feather")
display(ratios_by_region.head(10))

Unnamed: 0,index,code_region,name_region,people_near_dangerous_dams,geometry,code,region,total_population,10000_ratio
0,0,2.0,Nordeste,562123,"MULTIPOLYGON (((6605500.388 9801072.213, 66053...",2,Região Nordeste,53081950,105.897202
1,2,1.0,Norte,82040,"MULTIPOLYGON (((5844252.032 11213965.446, 5844...",1,Região Norte,15864454,51.713094
2,1,3.0,Sudeste,255433,"MULTIPOLYGON (((5607810.407 8505662.544, 56077...",3,Região Sudeste,80364410,31.784343
3,4,5.0,Centro Oeste,15148,"MULTIPOLYGON (((4602519.086 8997067.676, 46014...",5,Região Centro-Oeste,14058094,10.775287
4,3,4.0,Sul,22905,"MULTIPOLYGON (((5188736.426 7819178.176, 51888...",4,Região Sul,27386891,8.36349


In [34]:
ratios_by_state = per_capita_ratios(people_near_dams_by_state, 'POP', states, 'pop', 'code_state', 10000).reset_index()
ratios_by_state = gpd.GeoDataFrame(ratios_by_state) # cast to gdf to save as feather
ratios_by_state.to_feather("../../data/brazil/near-dam-ratios/pop-near-dangerous-dams-by-state.feather")
display(ratios_by_state.head(10))

Unnamed: 0,index,code_state,name_state,people_near_dangerous_dams,abbrev_state,code_region,name_region,geometry,code,"state""",total_population,10000_ratio
0,2,24.0,Rio Grande Do Norte,133067,RN,2.0,Nordeste,"MULTIPOLYGON (((6866272.431 10747646.115, 6866...",24,Rio Grande do Norte,3168027,420.031142
1,5,25.0,Paraíba,82713,PB,2.0,Nordeste,"MULTIPOLYGON (((7093988.192 10449669.249, 7093...",25,Paraíba,3766528,219.60012
2,11,16.0,Amapá,13851,AP,1.0,Norte,"MULTIPOLYGON (((5400648.179 11565814.635, 5400...",16,Amapá,669526,206.877702
3,0,26.0,Pernambuco,161531,PE,2.0,Nordeste,"MULTIPOLYGON (((7385901.887 10822712.549, 7385...",26,Pernambuco,8796448,183.632075
4,1,29.0,Bahia,161061,BA,2.0,Nordeste,"MULTIPOLYGON (((6603991.385 9285519.506, 66039...",29,Bahia,14016906,114.904816
5,8,32.0,Espírito Santo,34600,ES,3.0,Sudeste,"MULTIPOLYGON (((6373149.038 8968278.041, 63731...",32,Espírito Santo,3514952,98.436622
6,6,15.0,Pará,66372,PA,1.0,Norte,"MULTIPOLYGON (((5287183.770 11284166.374, 5287...",15,Pará,7581051,87.549866
7,3,31.0,Minas Gerais,97722,MG,3.0,Sudeste,"MULTIPOLYGON (((5340197.797 9189607.243, 53402...",31,Minas Gerais,19597330,49.864956
8,10,51.0,Mato Grosso,14335,MT,5.0,Centro Oeste,"MULTIPOLYGON (((4185288.338 10213558.205, 4185...",51,Mato Grosso,3035122,47.230391
9,7,33.0,Rio De Janeiro,38994,RJ,3.0,Sudeste,"MULTIPOLYGON (((6252609.806 8767805.232, 62525...",33,Rio de Janeiro,15989929,24.3866


In [35]:
ratios_by_city = per_capita_ratios(people_near_dams_by_city, 'POP', cities, 'pop', 'code_muni', 10000).reset_index()
ratios_by_city = gpd.GeoDataFrame(ratios_by_city) # cast to gdf to save as feather
ratios_by_city.to_feather("../../data/brazil/near-dam-ratios/pop-near-dangerous-dams-by-city.feather")
display(ratios_by_city.head(10))

Unnamed: 0,index,code_muni,name_muni,people_near_dangerous_dams,code_state,abbrev_state,name_state,code_region,name_region,geometry,code,city,total_population,10000_ratio
0,109,240480,Ipueira,1954,24.0,RN,Rio Grande do Norte,2.0,Nordeste,"MULTIPOLYGON (((6848618.181 10535019.964, 6849...",240480,Ipueira,2077,9407.799711
1,86,241070,Riacho Da Cruz,2564,24.0,RN,Rio Grande do Norte,2.0,Nordeste,"MULTIPOLYGON (((6763689.827 10637953.559, 6763...",241070,Riacho da Cruz,3165,8101.105845
2,23,240080,Angicos,9151,24.0,RN,Rio Grande do Norte,2.0,Nordeste,"MULTIPOLYGON (((6912766.151 10659082.738, 6923...",240080,Angicos,11549,7923.629751
3,29,240010,Acari,7637,24.0,RN,Rio Grande do Norte,2.0,Nordeste,"MULTIPOLYGON (((6901921.669 10583120.553, 6901...",240010,Acari,11035,6920.706842
4,108,240390,Francisco Dantas,1966,24.0,RN,Rio Grande do Norte,2.0,Nordeste,"MULTIPOLYGON (((6740766.793 10627397.517, 6741...",240390,Francisco Dantas,2874,6840.640223
5,152,251465,São José Do Brejo Do Cruz,1145,25.0,PB,Paraíba,2.0,Nordeste,"MULTIPOLYGON (((6825231.612 10599178.200, 6825...",251465,São José do Brejo do Cruz,1684,6799.287411
6,106,241060,Rafael Godeiro,2056,24.0,RN,Rio Grande do Norte,2.0,Nordeste,"MULTIPOLYGON (((6779484.019 10620233.187, 6781...",241060,Rafael Godeiro,3063,6712.37349
7,92,240690,Lucrécia,2402,24.0,RN,Rio Grande do Norte,2.0,Nordeste,"MULTIPOLYGON (((6775636.032 10610236.184, 6775...",240690,Lucrécia,3633,6611.615745
8,45,260360,Camutanga,5199,26.0,PE,Pernambuco,2.0,Nordeste,"MULTIPOLYGON (((7041664.845 10447122.995, 7041...",260360,Camutanga,8156,6374.448259
9,103,241180,São Fernando,2111,24.0,RN,Rio Grande do Norte,2.0,Nordeste,"MULTIPOLYGON (((6846500.010 10596368.761, 6846...",241180,São Fernando,3401,6206.997942


What are those dangerous dams used for?

In [36]:
dangerous_dams.uso_principal.value_counts()

Abastecimento humano                  505
Irrigação                             254
Regularização de vazão                119
Dessedentação Animal                  111
Aquicultura                            65
Recreação                              42
Contenção de rejeitos de mineração     39
Industrial                             33
Combate às secas                       28
Contenção de resíduos industriais       9
Defesa contra inundações                9
Paisagismo                              5
Proteção do meio ambiente               1
Name: uso_principal, dtype: int64

Are the main uses different if we segment by state and region?

In [37]:
dangerous_dams[dangerous_dams.uso_principal=='Irrigação'].uf.value_counts(normalize=True)

BA    0.248031
ES    0.145669
PB    0.141732
PA    0.133858
GO    0.110236
PE    0.090551
RN    0.043307
AL    0.031496
MT    0.023622
TO    0.011811
RS    0.007874
MG    0.003937
PI    0.003937
SE    0.003937
Name: uf, dtype: float64

In [38]:
dangerous_dams[dangerous_dams.uso_principal=='Abastecimento humano'].uf.value_counts(normalize=True)

RN    0.249505
PB    0.245545
BA    0.241584
PE    0.198020
PI    0.013861
ES    0.009901
SP    0.009901
MT    0.005941
SE    0.003960
CE    0.003960
PA    0.003960
GO    0.003960
MG    0.003960
RJ    0.001980
PR    0.001980
RR    0.001980
Name: uf, dtype: float64

Which dams affect, alone, the most people?

In [31]:
# For each dam, select all the grid squares that intersect it
# sum the population, then order by most people affected. Notice that this
# will only keep dams that have people living in the 1km
# radius around it, excluding all others
dangerous_dams_pop = gpd.sjoin(dangerous_dams, grid_squares_near_dangerous_dams) \
    .groupby(["codigo_snisb"]) \
    .POP \
    .sum() \
    .reset_index()\
    .merge(dangerous_dams, on='codigo_snisb')\
    .rename(columns={"POP":"affected_population"})\
    .sort_values(by='affected_population', ascending=False)\
    .reset_index()

dangerous_dams_pop = gpd.GeoDataFrame(dangerous_dams_pop)
dangerous_dams_pop.to_feather("../../data/brazil/snisb/dangerous-dams-population.feather")

Looking at the two cities that have the higher percentage of population living near dams.

In [32]:
dangerous_dams_pop[dangerous_dams_pop.municipio=="IPUEIRA"]

Unnamed: 0,index,codigo_snisb,affected_population,nome_da_barragem,nome_secundario,uso_principal,uf,municipio,categoria_de_risco,dano_potencial_associado,...,data_da_ultima_inspecao,tipo_da_ultima_inspecao,nivel_de_perigo_global,possui_eclusa,fase_da_vida,latitude,longitude,completude,geometry,high_risk_high_damage
114,814,18142,1916,Barragem do Sítio Ipueira,,Dessedentação Animal,RN,IPUEIRA,Alto,Alto,...,,,,,,-6.81528,-37.19333,baixa,"POLYGON ((6837490.411 10526197.929, 6837485.59...",True
898,609,7589,20,CURRAL QUEIMADO,,Abastecimento humano,RN,IPUEIRA,Alto,Alto,...,,,,,,-6.77522,-37.19868,baixa,"POLYGON ((6837175.329 10530703.712, 6837170.51...",True
954,596,7575,14,CARNAÚBAS,,Abastecimento humano,RN,IPUEIRA,Alto,Alto,...,,,,,,-6.76747,-37.25097,mínima,"POLYGON ((6831519.365 10531911.253, 6831514.54...",True
1055,598,7578,5,JATAHÍ,,Abastecimento humano,RN,IPUEIRA,Alto,Alto,...,,,,,,-6.74941,-37.24768,mínima,"POLYGON ((6831999.331 10533905.107, 6831994.51...",True


In [33]:
dangerous_dams_pop[dangerous_dams_pop.municipio=="RIACHO DA CRUZ"]

Unnamed: 0,index,codigo_snisb,affected_population,nome_da_barragem,nome_secundario,uso_principal,uf,municipio,categoria_de_risco,dano_potencial_associado,...,data_da_ultima_inspecao,tipo_da_ultima_inspecao,nivel_de_perigo_global,possui_eclusa,fase_da_vida,latitude,longitude,completude,geometry,high_risk_high_damage
88,481,7435,2564,RIACHO DA CRUZ II,,Abastecimento humano,RN,RIACHO DA CRUZ,Alto,Alto,...,,,,,,-5.93751,-37.94027,baixa,"POLYGON ((6761589.351 10628896.079, 6761584.53...",True


The sate of Amapá is particularly interesting: it has a high share of people affected by dangerous dams, but it's not in the Northeast, but in the Amazon – no reason for anti-drought dams there. What is going on?

In [34]:
dangerous_dams_pop[dangerous_dams_pop.uf=="AP"]

Unnamed: 0,index,codigo_snisb,affected_population,nome_da_barragem,nome_secundario,uso_principal,uf,municipio,categoria_de_risco,dano_potencial_associado,...,data_da_ultima_inspecao,tipo_da_ultima_inspecao,nivel_de_perigo_global,possui_eclusa,fase_da_vida,latitude,longitude,completude,geometry,high_risk_high_damage
11,921,20277,13309,Açude do Zezinho,,Recreação,AP,MACAPÁ,Alto,Alto,...,25-11-2019,Regular,,Não,,0.04428,-51.0721,mínima,"POLYGON ((5329087.323 11340677.182, 5329082.50...",True
225,182,3962,542,PANASQUEIRA,,Aquicultura,AP,TARTARUGALZINHO,Alto,Alto,...,26-11-2019,Regular,,Não,,1.57747,-50.90803,ótima,"POLYGON ((5349327.676 11508412.718, 5349322.86...",True


It appears to be simply scarcely populated with a single, big, dangerous dam in the capital.

Ok, but what about the mining dams - how many people live near them?

In [35]:
dangerous_dams_pop[dangerous_dams_pop.uso_principal=="Contenção de rejeitos de mineração"].affected_population.sum()

1169

Let's look at their details.

In [36]:
dangerous_dams_pop[dangerous_dams_pop.uso_principal=="Contenção de rejeitos de mineração"]

Unnamed: 0,index,codigo_snisb,affected_population,nome_da_barragem,nome_secundario,uso_principal,uf,municipio,categoria_de_risco,dano_potencial_associado,...,data_da_ultima_inspecao,tipo_da_ultima_inspecao,nivel_de_perigo_global,possui_eclusa,fase_da_vida,latitude,longitude,completude,geometry,high_risk_high_damage
218,59,943,574,Doutor,,Contenção de rejeitos de mineração,MG,OURO PRETO,Alto,Alto,...,,,,,Inativa,-20.29111,-43.49056,boa,"POLYGON ((6093096.386 9050994.992, 6093091.571...",True
486,58,835,129,Sul Inferior,,Contenção de rejeitos de mineração,MG,BARÃO DE COCAIS,Alto,Alto,...,,,,,,-19.97556,-43.60056,boa,"POLYGON ((6082976.163 9086517.775, 6082971.348...",True
623,950,20417,75,Capim Branco,,Contenção de rejeitos de mineração,MG,BRUMADINHO,Alto,Alto,...,,,,,,-20.10828,-44.07367,baixa,"POLYGON ((6033249.987 9073540.601, 6033245.172...",True
640,265,6989,69,Maravilhas II,,Contenção de rejeitos de mineração,MG,ITABIRITO,Alto,Alto,...,,,,,,-20.21442,-43.89169,baixa,"POLYGON ((6051739.410 9061058.374, 6051734.595...",True
674,53,702,59,B3/B4,,Contenção de rejeitos de mineração,MG,NOVA LIMA,Alto,Alto,...,16-12-2019,Regular,,,,-20.04722,-43.95361,boa,"POLYGON ((6045973.850 9079887.279, 6045969.034...",True
701,54,721,52,Borrachudo II,,Contenção de rejeitos de mineração,MG,ITABIRA,Alto,Alto,...,,,,,,-19.6125,-43.26667,boa,"POLYGON ((6119249.058 9125616.002, 6119244.243...",True
728,51,622,46,5 (Mutuca),,Contenção de rejeitos de mineração,MG,NOVA LIMA,Alto,Alto,...,26-12-2019,Regular,,,Inativa,-20.02611,-43.9425,boa,"POLYGON ((6047213.425 9082194.276, 6047208.610...",True
774,56,742,36,Norte/Laranjeiras,,Contenção de rejeitos de mineração,MG,BARÃO DE COCAIS,Alto,Alto,...,,,,,,-19.84833,-43.42167,boa,"POLYGON ((6102114.995 9099977.690, 6102110.180...",True
881,55,734,22,Barragem II Mina Engenho,,Contenção de rejeitos de mineração,MG,RIO ACIMA,Alto,Alto,...,,,,,,-20.06278,-43.79472,boa,"POLYGON ((6062430.324 9077558.123, 6062425.509...",True
889,1047,26137,20,Barragem Quéias,,Contenção de rejeitos de mineração,MG,BRUMADINHO,Alto,Alto,...,,,,,Inativa,-20.12697,-44.31608,baixa,"POLYGON ((6007979.706 9072345.149, 6007974.891...",True


Do they have emergency plans?

In [37]:
dangerous_dams_pop[dangerous_dams_pop.uso_principal=="Contenção de rejeitos de mineração"].possui_pae.value_counts(normalize=True)

Sim    0.588235
Não    0.411765
Name: possui_pae, dtype: float64

And safety plans?

In [38]:
dangerous_dams_pop[dangerous_dams_pop.uso_principal=="Contenção de rejeitos de mineração"].possui_plano_de_seguranassa.value_counts(normalize=True)

Sim    0.944444
Não    0.055556
Name: possui_plano_de_seguranassa, dtype: float64

Let's load detailed data from another governing body – the National Mining Agency (Agência Nacional de Mineração, ANM) – notice that the dams don't use the same code, although the ANM data is a primary source for the other dataset. Some of the information shown in the datasets differs, possibly because they were extracted in different points in time, but the big picture remains the same: 30~40 dangerous mining dams.

In [40]:
anm = pd.read_excel("../../data/brazil/mining-dams/14032022-mining-dams-anm.xlsx")

In [41]:
anm[(anm['Categoria de Risco']=='Alto') & 
    (anm['Dano Potencial Associado']=='Alto') & 
    (anm['ID Barragem'].astype(str).isin(dangerous_dams.codigo_barragem_fiscalizador))].shape

(32, 19)

What about the dams built by the DNOCS?

In [41]:
dangerous_dams_pop[dangerous_dams_pop.nome_do_empreendedor=="Departamento Nacional de Obras Contra as Secas"]

Unnamed: 0,index,codigo_snisb,affected_population,nome_da_barragem,nome_secundario,uso_principal,uf,municipio,categoria_de_risco,dano_potencial_associado,...,data_da_ultima_inspecao,tipo_da_ultima_inspecao,nivel_de_perigo_global,possui_eclusa,fase_da_vida,latitude,longitude,completude,geometry,high_risk_high_damage
13,740,7780,13101,SANTA CRUZ DO TRAIRI,,Abastecimento humano,RN,SANTA CRUZ,Alto,Alto,...,,,,,,-6.2245,-36.03037,baixa,"POLYGON ((6968637.363 10584182.718, 6968632.54...",True
21,708,7724,9374,CURRAIS NOVOS,,Irrigação,RN,CURRAIS NOVOS,Alto,Alto,...,,,,,,-6.26901,-36.52645,baixa,"POLYGON ((6914078.966 10582661.841, 6914074.15...",True
35,377,7233,6480,Juraci Magalhães,Poço do Urubu,Abastecimento humano,BA,ITABERABA,Alto,Alto,...,26-08-2021,Regular,Normal,,,-12.53639,-40.31667,baixa,"POLYGON ((6465131.192 9903939.275, 6465126.377...",True
36,661,7656,6391,Mororó,,Abastecimento humano,PE,PEDRA,Alto,Alto,...,,,,,,-8.49439,-36.94519,boa,"POLYGON ((6853119.977 10336942.836, 6853115.16...",True
83,396,7277,2682,Parnamirim,,Combate às secas,PE,PARNAMIRIM,Alto,Alto,...,,,,,,-8.09203,-39.56758,boa,"POLYGON ((6570933.685 10397990.721, 6570928.87...",True
88,481,7435,2564,RIACHO DA CRUZ II,,Abastecimento humano,RN,RIACHO DA CRUZ,Alto,Alto,...,,,,,,-5.93751,-37.94027,baixa,"POLYGON ((6761589.351 10628896.079, 6761584.53...",True
106,698,7706,2196,Serra Branca I,,Abastecimento humano,PB,SERRA BRANCA,Alto,Alto,...,,,,,,-7.47141,-36.6674,baixa,"POLYGON ((6890342.741 10449436.372, 6890337.92...",True
110,469,7411,1966,TESOURA,,Abastecimento humano,RN,FRANCISCO DANTAS,Alto,Alto,...,,,,,,-6.08363,-38.12487,baixa,"POLYGON ((6740448.690 10613763.367, 6740443.87...",True
123,456,7382,1767,São Gonçalo,,Abastecimento humano,PB,SOUSA,Alto,Alto,...,,,,,,-6.84569,-38.31147,baixa,"POLYGON ((6715251.659 10529899.464, 6715246.84...",True
124,984,21420,1763,Tamboril I,,Abastecimento humano,PE,ARCOVERDE,Alto,Alto,...,,,,Não,,-8.43083,-37.04667,baixa,"POLYGON ((6842544.343 10344722.209, 6842539.52...",True
