# Formatting the municipalities' shapefiles

Procedure:
 - One year after the other, if data is in state files, they are first concatenated together.
 - Then, the columns are inspected and renamed.
 - The municipalities' code is corrected to exclude the last digit if code is 7 digitis long.
 
 
UPDATE (18/08/2023): Included the formatting of the years 1980 and 1991.

In [22]:
import geopandas as gpd
import pandas as pd
import locale

import sys
sys.path.append('../utils/')
from cerrado_states_codes import cerrado_state_code

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
# User inputs
# Directory with year folders
in_dir = '../../data/BR_municipios/'

# Output directory
out_file = '../data/municipalities_shp/municipalities_{}.shp'

# Character encoding
encode = locale.getpreferredencoding()

In [3]:
# Function that, given a year folder, gets the list of all shapefiles inside the folder, reads them, and concatenates
# them.
def readSHP(year, input_dir):
    # Read list of files in directory
    list_fp = fileList.fileList(input_dir + str(year), in_extensions = 'shp')
    
    # Read files iteratively and save into list
    list_gdf = []
    for file in list_fp:
        gdf = gpd.read_file(file, encoding = encode)
        
        # If CRS is None, we set the CRS to Brazilian Geodetic System (currently SIRGAS 2000) with Geographic projection.
        if gdf.crs == None:
            gdf.crs = 'epsg:4674'
        
        list_gdf.append(gdf)
    
    # Converting list fo GeoDataFrames to GeoDataFrame
    list_gdf = gpd.GeoDataFrame(pd.concat(list_gdf, ignore_index = True))

    return list_gdf

In [31]:
# Function that adds the state character and numerical codes
def addStateCodes(data, select = False):
    
    # First, get the numerical state code from the municipalities' code
    data['s_code'] = data['m_code'].apply(lambda x: int(str(x)[:2]))
    
    # Read the state's character-numerical dictionary
    s_codes = cerrado_state_code()
    
    # If select option, select only those states in s_codes
    if select == True:
        # First, get list of the states
        states = list(cerrado_state_code().keys())

        # Then, select only those municipalities in these states
        data = data[data['s_code'].isin(states)]

    
    # Now, get the alphabetical code fomr the numerical one
    data['state'] = data['s_code'].apply(lambda x: s_codes[x])
    
    return data

In [5]:
# Function that corrects the municipality code by deleting the 7th digit, if present
def correct_m_code(data):
    data['m_code'] = data['m_code'].apply(lambda x: int(str(x)[:6]))
    
    return data

In [6]:
# Function that puts the columns in order
def order_columns(data):
    data = data[['year', 'state', 's_code', 'm_code', 'm_name', 'area_km2', 'latitude', 'longitude', 'geometry']]
    
    return data

In [7]:
# Function that calcualtes the area of a polygon in km2
def calculate_area(data):
    
    # Store original crs
    crs = data.crs
    # Transform to a CRS with m units (for Brazil, Brazil SIRGAS 2000 polyconic)
    data = data.to_crs('epsg:5880')
    
    # Then, calculate the area in m2
    data['area_m2'] = data['geometry'].area
    
    # Transform to km2
    data['area_km2'] = data['area_m2']*1e-06
    
#     print(data)
    
    # Drop m2 column
    data = data.drop(columns = ['area_m2'])
    
    # Return to original crs
    data = data.to_crs(crs)
    
    return data

In [8]:
# Function that selects only those states that we are interested in
def select_states(data):
    # First, get list of the states
    states = list(tools.cerrado_state_code().values())
    
    # Then, select only those municipalities in these states
    data = data[data['state'].isin(states)]
    
    return data

In [24]:
# Function that aggregates duplicated municipalities into a single MultiPolygon
def aggregate_municipalities(data):
    # First, just checking for information if there are repeated municipalities
    if len(data[data['m_code'].duplicated()]['m_code'].unique()) != 0:
        print('There were duplicated municipalities.')
        
        # Aggregate them into MultiPolygons
        data = data.dissolve(by = 'm_code').reset_index()
        
    return data

In [10]:
# Function that returns the latitude and longitude of the centroid of each municipality
def get_latlon(data):
    # Store original CRS
    crs = data.crs
    
    # Transform to projected CRS
    data = data.to_crs('epsg:5880')
    
    # Get latitude and longitude from the centroid
    data['latitude'] = data.centroid.y
    data['longitude'] = data.centroid.y
    
    # Return to original CRS
    data = data.to_crs(crs)
    
    return data

## Year 2000

In [11]:
# Get GeoDataFrame with all the states' municipalities
gdf_2000 = readSHP(2000, in_dir)
gdf_2000.head(2)

Unnamed: 0,MSLINK,CODIGO,AREA_1,PERIMETRO_,GEOCODIGO,NOME,SEDE,LATITUDESE,LONGITUDES,AREA_TOT_G,RESERVADO,geometry
0,17,1100205,34082.366,1996.549,1100205,Porto Velho,T,-8.762,-63.904,34082.366,F,"POLYGON ((-66.77773 -9.76616, -66.78159 -9.758..."
1,10,1100106,24855.652,960.222,1100106,Guajar -Mirim,T,-10.783,-65.339,24855.652,F,"POLYGON ((-65.40175 -10.59234, -65.39728 -10.5..."


In [12]:
# Select relevant columns
gdf_2000 = gdf_2000[['CODIGO', 'AREA_1', 'NOME', 'LATITUDESE', 'LONGITUDES', 'geometry']]

In [13]:
# Rename the relevant columns
gdf_2000 = gdf_2000.rename(columns = {'CODIGO':'m_code', 'AREA_1':'area_km2', 'NOME':'m_name', 'LATITUDESE':'latitude', 'LONGITUDES':'longitude'})

In [14]:
# Add state codes
gdf_2000 = addStateCodes(gdf_2000)

In [15]:
# Before doing anything else, there might be municipalities that appear more than once. We aggregate their geometries into a 
# single row.
gdf_2000 = aggregate_municipalities(gdf_2000)

There were duplicated municiaplities.


In [16]:
# Correct municipality code
gdf_2000 = correct_m_code(gdf_2000)

In [17]:
# Adding year column
gdf_2000['year'] = 2000

In [18]:
# Ordering the columns
gdf_2000 = order_columns(gdf_2000)

In [19]:
# Checking the GeoDataFrame again
gdf_2000

Unnamed: 0,year,state,s_code,m_code,m_name,area_km2,latitude,longitude,geometry
0,2000,RO,11,110001,Alta Floresta D'Oeste,7066.70200,-11.929,-61.996,"POLYGON ((-62.89162 -12.85844, -62.88899 -12.8..."
1,2000,RO,11,110002,Ariquemes,4426.55800,-9.913,-63.041,"POLYGON ((-63.59644 -9.99999, -63.59734 -9.998..."
2,2000,RO,11,110003,Cabixi,1314.35500,-13.492,-60.545,"POLYGON ((-60.91846 -13.54792, -60.91881 -13.5..."
3,2000,RO,11,110004,Cacoal,3792.63800,-11.438,-61.448,"POLYGON ((-61.79527 -11.39743, -61.79170 -11.3..."
4,2000,RO,11,110005,Cerejeiras,2783.30500,-13.189,-60.812,"POLYGON ((-61.90644 -13.27527, -61.90720 -13.2..."
...,...,...,...,...,...,...,...,...,...
3525,2000,GO,52,522200,Vian¢polis,954.27900,-16.742,-48.516,"POLYGON ((-48.59655 -16.92375, -48.59824 -16.9..."
3526,2000,GO,52,522205,Vicentin¢polis,737.25100,-17.735,-49.806,"POLYGON ((-50.08652 -17.78481, -50.08527 -17.7..."
3527,2000,GO,52,522220,Vila Boa,1060.17000,-15.038,-47.059,"POLYGON ((-47.21584 -14.93810, -47.21491 -14.9..."
3528,2000,GO,52,522230,Vila Prop¡cio,2181.57500,-15.457,-48.889,"POLYGON ((-49.09162 -15.48003, -49.09160 -15.4..."


In [20]:
# Save to file
gdf_2000.to_file(out_file.format(2000))

In [21]:
del gdf_2000

## Year 2001

In [22]:
# Get GeoDataFrame with all the states' municipalities
gdf_2001 = readSHP(2001, in_dir)
gdf_2001.head(2)

Unnamed: 0,MSLINK,MAPID,CODIGO,AREA_1,PERIMETRO_,GEOCODIGO,NOME,SEDE,LATITUDESE,LONGITUDES,AREA_TOT_G,geometry
0,1,99,1100015,7066.702,608.314,1100015,Alta Floresta d'Oeste,T,-11.929,-61.996,7066.702,"POLYGON ((-62.89212 -12.85885, -62.88114 -12.8..."
1,2,99,1100023,4426.558,427.64,1100023,Ariquemes,T,-9.913,-63.041,4426.558,"POLYGON ((-63.59694 -10.00039, -63.60485 -9.98..."


In [23]:
# Select relevant columns
gdf_2001 = gdf_2001[['CODIGO', 'AREA_1', 'NOME', 'LATITUDESE', 'LONGITUDES', 'geometry']]

In [24]:
# Rename the relevant columns
gdf_2001 = gdf_2001.rename(columns = {'CODIGO':'m_code', 'AREA_1':'area_km2', 'NOME':'m_name', 'LATITUDESE':'latitude', 'LONGITUDES':'longitude'})

In [25]:
# Add state codes
gdf_2001 = addStateCodes(gdf_2001)

In [26]:
# Before doing anything else, there might be municipalities that appear more than once. We aggregate their geometries into a 
# single row.
gdf_2001 = aggregate_municipalities(gdf_2001)

There were duplicated municiaplities.


In [27]:
# Correct municipality code
gdf_2001 = correct_m_code(gdf_2001)

In [28]:
# Adding year column
gdf_2001['year'] = 2001

In [29]:
# Ordering the columns
gdf_2001 = order_columns(gdf_2001)

In [30]:
# Checking the GeoDataFrame again
gdf_2001

Unnamed: 0,year,state,s_code,m_code,m_name,area_km2,latitude,longitude,geometry
0,2001,RO,11,110001,Alta Floresta d'Oeste,7066.702,-11.929,-61.996,"POLYGON ((-62.89212 -12.85885, -62.88114 -12.8..."
1,2001,RO,11,110002,Ariquemes,4426.558,-9.913,-63.041,"POLYGON ((-63.59694 -10.00039, -63.60485 -9.98..."
2,2001,RO,11,110003,Cabixi,1314.355,-13.492,-60.545,"POLYGON ((-60.91892 -13.54834, -60.91861 -13.5..."
3,2001,RO,11,110004,Cacoal,3792.638,-11.438,-61.448,"POLYGON ((-61.79589 -11.39774, -61.78298 -11.4..."
4,2001,RO,11,110005,Cerejeiras,2783.305,-13.189,-60.812,"POLYGON ((-61.50094 -13.00434, -61.47948 -13.0..."
...,...,...,...,...,...,...,...,...,...
3545,2001,GO,52,522200,Vianópolis,954.279,-16.742,-48.516,"POLYGON ((-48.59694 -16.92420, -48.59864 -16.9..."
3546,2001,GO,52,522205,Vicentinópolis,737.251,-17.735,-49.806,"POLYGON ((-50.08692 -17.78526, -50.08365 -17.7..."
3547,2001,GO,52,522220,Vila Boa,1060.170,-15.038,-47.059,"POLYGON ((-47.11127 -14.67229, -47.11645 -14.6..."
3548,2001,GO,52,522230,Vila Propício,2181.575,-15.457,-48.889,"POLYGON ((-49.08259 -15.51287, -49.08138 -15.4..."


In [31]:
# Save to file
gdf_2001.to_file(out_file.format(2001))

In [32]:
del gdf_2001

## Year 2005

In [33]:
# Get GeoDataFrame with all the states' municipalities
gdf_2005 = readSHP(2005, in_dir)
gdf_2005.head(2)

Unnamed: 0,GEOCODIGO,NOME,UF,ID_UF,REGIAO,MESOREGIAO,MICROREGIA,LATITUDE,LONGITUDE,SEDE,geometry
0,1200336,Mâncio Lima,AC,12,Norte,VALE DO JURUA,CRUZEIRO DO SUL,-7.614,-72.896,t,"POLYGON ((-73.80098 -7.11146, -73.73762 -7.134..."
1,1300201,Atalaia do Norte,AM,13,Norte,SUDOESTE AMAZONENSE,ALTO SOLIMOES,-4.372,-70.192,t,"POLYGON ((-73.80098 -7.11146, -73.79617 -7.102..."


In [34]:
# Get the relevant columns and rename
gdf_2005 = gdf_2005[['GEOCODIGO', 'NOME', 'UF', 'ID_UF', 'LATITUDE', 'LONGITUDE', 'geometry']]
# Rename these columns
gdf_2005 = gdf_2005.rename(columns = {'GEOCODIGO':'m_code', 'NOME':'m_name', 'UF':'state', 
                                      'ID_UF':'s_code', 'LATITUDE':'latitude', 'LONGITUDE':'longitude'})
# Add year column
gdf_2005['year'] = 2005

In [35]:
# Select only those states that we are intereseted in 
gdf_2005 = select_states(gdf_2005)

In [36]:
# Before doing anything else, there might be municipalities that appear more than once. We aggregate their geometries into a 
# single row.
gdf_2005 = aggregate_municipalities(gdf_2005)

There were duplicated municiaplities.


In [37]:
# Calculate area of each municipality
gdf_2005 = calculate_area(gdf_2005)

In [38]:
# Place columns in the right order
gdf_2005 = order_columns(gdf_2005)

In [39]:
# Correct municipality code
gdf_2005 = correct_m_code(gdf_2005)

In [40]:
# Checking the GeoDataFrame again
gdf_2005

Unnamed: 0,year,state,s_code,m_code,m_name,area_km2,latitude,longitude,geometry
0,2005,RO,11,110001,Alta Floresta D'Oeste,7137.928186,-11.929,-61.996,"POLYGON ((-62.89162 -12.85844, -62.88065 -12.8..."
1,2005,RO,11,110002,Ariquemes,4480.100575,-9.913,-63.041,"POLYGON ((-63.10106 -9.75217, -63.03356 -9.751..."
2,2005,RO,11,110003,Cabixi,1322.454806,-13.492,-60.545,"POLYGON ((-60.91846 -13.54792, -60.91814 -13.5..."
3,2005,RO,11,110004,Cacoal,3821.386881,-11.438,-61.448,"POLYGON ((-61.79527 -11.39743, -61.78251 -11.4..."
4,2005,RO,11,110005,Cerejeiras,2804.983150,-13.189,-60.812,"POLYGON ((-61.50047 -13.00392, -61.47901 -13.0..."
...,...,...,...,...,...,...,...,...,...
3549,2005,GO,52,522200,Vianópolis,957.618875,-16.742,-48.516,"POLYGON ((-48.25938 -16.73830, -48.26485 -16.7..."
3550,2005,GO,52,522205,Vicentinópolis,739.898831,-17.735,-49.806,"POLYGON ((-50.08652 -17.78481, -50.08324 -17.7..."
3551,2005,GO,52,522220,Vila Boa,1068.373432,-15.038,-47.059,"POLYGON ((-47.10880 -14.67637, -47.10180 -14.6..."
3552,2005,GO,52,522230,Vila Propício,2188.854372,-15.457,-48.889,"POLYGON ((-48.90783 -15.16350, -48.90097 -15.1..."


In [41]:
# Save to file
gdf_2005.to_file(out_file.format(2005))

In [42]:
del gdf_2005

## Year 2007

In [43]:
# Get GeoDataFrame with all the states' municipalities
gdf_2007 = readSHP(2007, in_dir)
gdf_2007.head(2)

Unnamed: 0,GEOCODIG_M,UF,SIGLA,NOME_MUNIC,REGIão,MESORREGIã,NOME_MESO,MICRORREGI,NOME_MICRO,geometry
0,1100015,11,RO,Alta Floresta D'Oeste,Norte,1102,Leste Rondoniense,11006,Cacoal,"POLYGON ((-62.18255 -11.86729, -62.16276 -11.8..."
1,1100023,11,RO,Ariquemes,Norte,1102,Leste Rondoniense,11003,Ariquemes,"POLYGON ((-62.53640 -9.73224, -62.50826 -9.754..."


In [44]:
# Get the relevant columns and rename
gdf_2007 = gdf_2007[['GEOCODIG_M', 'NOME_MUNIC', 'UF', 'SIGLA', 'geometry']]
# Rename these columns
gdf_2007 = gdf_2007.rename(columns = {'GEOCODIG_M':'m_code', 'NOME_MUNIC':'m_name', 'SIGLA':'state', 
                                      'UF':'s_code'})
# Add year column
gdf_2007['year'] = 2007

In [45]:
# Select only those states that we are intereseted in 
gdf_2007 = select_states(gdf_2007)

In [46]:
# Before doing anything else, there might be municipalities that appear more than once. We aggregate their geometries into a 
# single row.
gdf_2007 = aggregate_municipalities(gdf_2007)

In [47]:
# Calculate area of each municipality
gdf_2007 = calculate_area(gdf_2007)

In [48]:
# Calculating the latitude and longitude of the centroid of each municipality
gdf_2007 = get_latlon(gdf_2007)

In [49]:
# Place columns in the right order
gdf_2007 = order_columns(gdf_2007)

In [50]:
# Correct municipality code
gdf_2007 = correct_m_code(gdf_2007)

In [51]:
# Checking the GeoDataFrame again
gdf_2007

Unnamed: 0,year,state,s_code,m_code,m_name,area_km2,latitude,longitude,geometry
0,2007,RO,11,110001,Alta Floresta D'Oeste,7137.513828,8.606929e+06,8.606929e+06,"POLYGON ((-62.18255 -11.86729, -62.16276 -11.8..."
1,2007,RO,11,110002,Ariquemes,4481.039426,8.886181e+06,8.886181e+06,"POLYGON ((-62.53640 -9.73224, -62.50826 -9.754..."
2,2007,RO,11,110003,Cabixi,1322.469647,8.500044e+06,8.500044e+06,"POLYGON ((-60.39986 -13.45628, -60.40241 -13.4..."
3,2007,RO,11,110004,Cacoal,3821.868382,8.740191e+06,8.740191e+06,"POLYGON ((-61.00096 -11.39839, -61.01840 -11.4..."
4,2007,RO,11,110005,Cerejeiras,2805.536318,8.528352e+06,8.528352e+06,"POLYGON ((-61.50093 -13.00436, -61.47947 -13.0..."
...,...,...,...,...,...,...,...,...,...
5561,2007,GO,52,522200,Vianópolis,957.669648,8.132085e+06,8.132085e+06,"POLYGON ((-48.43166 -16.62800, -48.42568 -16.6..."
5562,2007,GO,52,522205,Vicentinópolis,740.278806,8.034672e+06,8.034672e+06,"POLYGON ((-49.85046 -17.57727, -49.84352 -17.5..."
5563,2007,GO,52,522220,Vila Boa,1068.947033,8.330055e+06,8.330055e+06,"POLYGON ((-47.11058 -14.67196, -47.11647 -14.6..."
5564,2007,GO,52,522230,Vila Propício,2188.902882,8.304588e+06,8.304588e+06,"POLYGON ((-48.75165 -14.90505, -48.75237 -14.9..."


In [52]:
# Save to file
gdf_2007.to_file(out_file.format(2007))

In [53]:
del gdf_2007

## Year 2010

In [54]:
# Get GeoDataFrame with all the states' municipalities
gdf_2010 = readSHP(2010, in_dir)
gdf_2010.head(2)

Unnamed: 0,ID,CD_GEOCODM,NM_MUNICIP,geometry
0,1,1100015,ALTA FLORESTA D'OESTE,"POLYGON ((-62.24620 -11.90149, -62.24398 -11.9..."
1,2,1100023,ARIQUEMES,"POLYGON ((-63.13712 -9.78413, -63.09013 -9.782..."


In [55]:
# Select relevant columns
gdf_2010 = gdf_2010[['CD_GEOCODM', 'NM_MUNICIP', 'geometry']]

In [56]:
# Rename the relevant columns
gdf_2010 = gdf_2010.rename(columns = {'CD_GEOCODM':'m_code', 'NM_MUNICIP':'m_name'})

In [57]:
# Add state codes
gdf_2010 = addStateCodes(gdf_2010)

In [58]:
# Before doing anything else, there might be municipalities that appear more than once. We aggregate their geometries into a 
# single row.
gdf_2010 = aggregate_municipalities(gdf_2010)

In [59]:
# Calculate area of each municipality
gdf_2010 = calculate_area(gdf_2010)

In [60]:
# Calculating the latitude and longitude of the centroid of each municipality
gdf_2010 = get_latlon(gdf_2010)

In [61]:
# Correct municipality code
gdf_2010 = correct_m_code(gdf_2010)

In [62]:
# Adding year column
gdf_2010['year'] = 2010

In [63]:
# Ordering the columns
gdf_2010 = order_columns(gdf_2010)

In [64]:
# Checking the GeoDataFrame again
gdf_2010

Unnamed: 0,year,state,s_code,m_code,m_name,area_km2,latitude,longitude,geometry
0,2010,RO,11,110001,ALTA FLORESTA D'OESTE,7137.806334,8.606916e+06,8.606916e+06,"POLYGON ((-62.24620 -11.90149, -62.24398 -11.9..."
1,2010,RO,11,110002,ARIQUEMES,4479.446407,8.886192e+06,8.886192e+06,"POLYGON ((-63.13712 -9.78413, -63.09013 -9.782..."
2,2010,RO,11,110003,CABIXI,1322.759675,8.500043e+06,8.500043e+06,"POLYGON ((-60.52408 -13.32137, -60.37162 -13.3..."
3,2010,RO,11,110004,CACOAL,3822.843319,8.740185e+06,8.740185e+06,"POLYGON ((-61.42679 -11.11376, -61.42900 -10.9..."
4,2010,RO,11,110005,CEREJEIRAS,2804.665257,8.528364e+06,8.528364e+06,"POLYGON ((-61.41347 -13.23417, -61.41505 -13.2..."
...,...,...,...,...,...,...,...,...,...
3550,2010,GO,52,522200,VIANÓPOLIS,958.424277,8.132084e+06,8.132084e+06,"POLYGON ((-48.54164 -16.74126, -48.53875 -16.7..."
3551,2010,GO,52,522205,VICENTINÓPOLIS,739.003170,8.034702e+06,8.034702e+06,"POLYGON ((-49.90713 -17.61460, -49.90588 -17.6..."
3552,2010,GO,52,522220,VILA BOA,1067.467465,8.330040e+06,8.330040e+06,"POLYGON ((-46.97635 -14.93112, -46.95851 -14.9..."
3553,2010,GO,52,522230,VILA PROPÍCIO,2189.954331,8.304597e+06,8.304597e+06,"POLYGON ((-48.86700 -15.09735, -48.86644 -15.0..."


In [65]:
# Save to file
gdf_2010.to_file(out_file.format(2010))

In [66]:
del gdf_2010

## Year 2013

In [67]:
# Get GeoDataFrame with all the states' municipalities
gdf_2013 = readSHP(2013, in_dir)
gdf_2013.head(2)

Unnamed: 0,NM_MUNICIP,CD_GEOCMU,geometry
0,ALTA FLORESTA D'OESTE,1100015,"POLYGON ((-62.19465 -11.82746, -62.19332 -11.8..."
1,ARIQUEMES,1100023,"POLYGON ((-62.53648 -9.73222, -62.52765 -9.736..."


In [68]:
# Select relevant columns
gdf_2013 = gdf_2013[['CD_GEOCMU', 'NM_MUNICIP', 'geometry']]

In [69]:
# Rename the relevant columns
gdf_2013 = gdf_2013.rename(columns = {'CD_GEOCMU':'m_code', 'NM_MUNICIP':'m_name'})

In [70]:
# Add state codes
gdf_2013 = addStateCodes(gdf_2013)

In [71]:
# Before doing anything else, there might be municipalities that appear more than once. We aggregate their geometries into a 
# single row.
gdf_2013 = aggregate_municipalities(gdf_2013)

In [72]:
# Calculate area of each municipality
gdf_2013 = calculate_area(gdf_2013)

In [73]:
# Calculating the latitude and longitude of the centroid of each municipality
gdf_2013 = get_latlon(gdf_2013)

In [74]:
# Correct municipality code
gdf_2013 = correct_m_code(gdf_2013)

In [75]:
# Adding year column
gdf_2013['year'] = 2013

In [76]:
# Ordering the columns
gdf_2013 = order_columns(gdf_2013)

In [77]:
# Checking the GeoDataFrame again
gdf_2013

Unnamed: 0,year,state,s_code,m_code,m_name,area_km2,latitude,longitude,geometry
0,2013,RO,11,110001,ALTA FLORESTA D'OESTE,7137.806333,8.606916e+06,8.606916e+06,"POLYGON ((-62.19465 -11.82746, -62.19332 -11.8..."
1,2013,RO,11,110002,ARIQUEMES,4479.446407,8.886192e+06,8.886192e+06,"POLYGON ((-62.53648 -9.73222, -62.52765 -9.736..."
2,2013,RO,11,110003,CABIXI,1322.749453,8.500043e+06,8.500043e+06,"POLYGON ((-60.37075 -13.36363, -60.37134 -13.3..."
3,2013,RO,11,110004,CACOAL,3822.982353,8.740186e+06,8.740186e+06,"POLYGON ((-61.00080 -11.29737, -61.00103 -11.3..."
4,2013,RO,11,110005,CEREJEIRAS,2804.665257,8.528364e+06,8.528364e+06,"POLYGON ((-61.49976 -13.00525, -61.49809 -13.0..."
...,...,...,...,...,...,...,...,...,...
3552,2013,GO,52,522200,VIANÃ“POLIS,958.424278,8.132084e+06,8.132084e+06,"POLYGON ((-48.38114 -16.63141, -48.38058 -16.6..."
3553,2013,GO,52,522205,VICENTINÃ“POLIS,739.003170,8.034702e+06,8.034702e+06,"POLYGON ((-49.84456 -17.58262, -49.84357 -17.5..."
3554,2013,GO,52,522220,VILA BOA,1067.467466,8.330040e+06,8.330040e+06,"POLYGON ((-47.11416 -14.67412, -47.11289 -14.6..."
3555,2013,GO,52,522230,VILA PROPÃCIO,2189.954331,8.304597e+06,8.304597e+06,"POLYGON ((-48.75586 -14.86267, -48.75519 -14.8..."


In [78]:
# Save to file
gdf_2013.to_file(out_file.format(2013))

In [79]:
del gdf_2013

## Year 2015

In [80]:
# Get GeoDataFrame with all the states' municipalities
gdf_2015 = readSHP(2015, in_dir)
gdf_2015.head(2)

Unnamed: 0,NM_MUNICIP,CD_GEOCMU,geometry
0,CAIUÃ,3509106,"POLYGON ((-51.86001 -21.58072, -51.85892 -21.5..."
1,CASTILHO,3511003,"POLYGON ((-51.41936 -20.68108, -51.41942 -20.6..."


In [81]:
# Select relevant columns
gdf_2015 = gdf_2015[['CD_GEOCMU', 'NM_MUNICIP', 'geometry']]

In [82]:
# Rename the relevant columns
gdf_2015 = gdf_2015.rename(columns = {'CD_GEOCMU':'m_code', 'NM_MUNICIP':'m_name'})

In [83]:
# Add state codes, selecting only the relevant ones
gdf_2015 = addStateCodes(gdf_2015, select = True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super(GeoDataFrame, self).__setitem__(key, value)


In [84]:
# Before doing anything else, there might be municipalities that appear more than once. We aggregate their geometries into a 
# single row.
gdf_2015 = aggregate_municipalities(gdf_2015)

In [85]:
# Calculate area of each municipality
gdf_2015 = calculate_area(gdf_2015)

In [86]:
# Calculating the latitude and longitude of the centroid of each municipality
gdf_2015 = get_latlon(gdf_2015)

In [87]:
# Correct municipality code
gdf_2015 = correct_m_code(gdf_2015)

In [88]:
# Adding year column
gdf_2015['year'] = 2015

In [89]:
# Ordering the columns
gdf_2015 = order_columns(gdf_2015)

In [90]:
# Checking the GeoDataFrame again
gdf_2015

Unnamed: 0,year,state,s_code,m_code,m_name,area_km2,latitude,longitude,geometry
0,2015,SP,35,350910,CAIUÃ,551.451660,7.586836e+06,7.586836e+06,"POLYGON ((-51.86001 -21.58072, -51.85892 -21.5..."
1,2015,SP,35,351100,CASTILHO,1066.159482,7.686360e+06,7.686360e+06,"POLYGON ((-51.41936 -20.68108, -51.41942 -20.6..."
2,2015,SP,35,351440,DRACENA,488.065194,7.613347e+06,7.613347e+06,"POLYGON ((-51.48515 -21.42765, -51.48500 -21.4..."
3,2015,SP,35,351530,ESTRELA DO NORTE,265.175088,7.510560e+06,7.510560e+06,"POLYGON ((-51.57097 -22.39887, -51.57099 -22.3..."
4,2015,SP,35,351535,EUCLIDES DA CUNHA PAULISTA,574.044194,7.508048e+06,7.508048e+06,"POLYGON ((-52.44569 -22.60828, -52.44569 -22.6..."
...,...,...,...,...,...,...,...,...,...
5567,2015,RO,11,110155,TEIXEIRÃ“POLIS,464.641080,8.777374e+06,8.777374e+06,"POLYGON ((-62.12258 -10.89939, -62.12418 -10.9..."
5568,2015,RO,11,110160,THEOBROMA,2220.140924,8.868975e+06,8.868975e+06,"POLYGON ((-61.95442 -9.94968, -61.95552 -9.950..."
5569,2015,RO,11,110170,URUPÃ,840.564716,8.759001e+06,8.759001e+06,"POLYGON ((-62.15601 -11.02598, -62.15500 -11.0..."
5570,2015,RO,11,110175,VALE DO ANARI,3164.553121,8.913522e+06,8.913522e+06,"POLYGON ((-61.57464 -9.71779, -61.58190 -9.711..."


In [91]:
# Save to file
gdf_2015.to_file(out_file.format(2015))

In [92]:
del gdf_2015

## Year 2016

In [93]:
# Get GeoDataFrame with all the states' municipalities
gdf_2016 = readSHP(2016, in_dir)
gdf_2016.head(2)

Unnamed: 0,NM_MUNICIP,CD_GEOCMU,geometry
0,ALTA FLORESTA D'OESTE,1100015,"POLYGON ((-62.19465 -11.82746, -62.19332 -11.8..."
1,ARIQUEMES,1100023,"POLYGON ((-62.53648 -9.73222, -62.52765 -9.736..."


In [94]:
# Select relevant columns
gdf_2016 = gdf_2016[['CD_GEOCMU', 'NM_MUNICIP', 'geometry']]

In [95]:
# Rename the relevant columns
gdf_2016 = gdf_2016.rename(columns = {'CD_GEOCMU':'m_code', 'NM_MUNICIP':'m_name'})

In [96]:
# Add state codes, selecting only the relevant ones
gdf_2016 = addStateCodes(gdf_2016, select = True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super(GeoDataFrame, self).__setitem__(key, value)


In [97]:
# Before doing anything else, there might be municipalities that appear more than once. We aggregate their geometries into a 
# single row.
gdf_2016 = aggregate_municipalities(gdf_2016)

In [98]:
# Calculate area of each municipality
gdf_2016 = calculate_area(gdf_2016)

In [99]:
# Calculating the latitude and longitude of the centroid of each municipality
gdf_2016 = get_latlon(gdf_2016)

In [100]:
# Correct municipality code
gdf_2016 = correct_m_code(gdf_2016)

In [101]:
# Adding year column
gdf_2016['year'] = 2016

In [102]:
# Ordering the columns
gdf_2016 = order_columns(gdf_2016)

In [103]:
# Checking the GeoDataFrame again
gdf_2016

Unnamed: 0,year,state,s_code,m_code,m_name,area_km2,latitude,longitude,geometry
0,2016,RO,11,110001,ALTA FLORESTA D'OESTE,7137.806306,8.606916e+06,8.606916e+06,"POLYGON ((-62.19465 -11.82746, -62.19332 -11.8..."
1,2016,RO,11,110002,ARIQUEMES,4479.446407,8.886192e+06,8.886192e+06,"POLYGON ((-62.53648 -9.73222, -62.52765 -9.736..."
2,2016,RO,11,110003,CABIXI,1322.751023,8.500043e+06,8.500043e+06,"POLYGON ((-60.37075 -13.36363, -60.37134 -13.3..."
3,2016,RO,11,110004,CACOAL,3822.982349,8.740186e+06,8.740186e+06,"POLYGON ((-61.00080 -11.29737, -61.00103 -11.3..."
4,2016,RO,11,110005,CEREJEIRAS,2804.665258,8.528364e+06,8.528364e+06,"POLYGON ((-61.49976 -13.00525, -61.49809 -13.0..."
...,...,...,...,...,...,...,...,...,...
5567,2016,GO,52,522200,VIANÃ“POLIS,958.424274,8.132084e+06,8.132084e+06,"POLYGON ((-48.38114 -16.63141, -48.38058 -16.6..."
5568,2016,GO,52,522205,VICENTINÃ“POLIS,739.003152,8.034702e+06,8.034702e+06,"POLYGON ((-49.84456 -17.58262, -49.84357 -17.5..."
5569,2016,GO,52,522220,VILA BOA,1067.467473,8.330040e+06,8.330040e+06,"POLYGON ((-47.11416 -14.67412, -47.11289 -14.6..."
5570,2016,GO,52,522230,VILA PROPÃCIO,2189.954329,8.304597e+06,8.304597e+06,"POLYGON ((-48.75586 -14.86267, -48.75519 -14.8..."


In [104]:
# Save to file
gdf_2016.to_file(out_file.format(2016))

In [105]:
del gdf_2016

## Year 2017

In [106]:
# Get GeoDataFrame with all the states' municipalities
gdf_2017 = readSHP(2017, in_dir)
gdf_2017.head(2)

Unnamed: 0,NM_MUNICIP,CD_GEOCMU,geometry
0,ABADIA DE GOIÃS,5200050,"POLYGON ((-49.44440 -16.71747, -49.44435 -16.7..."
1,ABADIA DOS DOURADOS,3100104,"POLYGON ((-47.43840 -18.16570, -47.43588 -18.1..."


In [107]:
# Select relevant columns
gdf_2017 = gdf_2017[['CD_GEOCMU', 'NM_MUNICIP', 'geometry']]

In [108]:
# Rename the relevant columns
gdf_2017 = gdf_2017.rename(columns = {'CD_GEOCMU':'m_code', 'NM_MUNICIP':'m_name'})

In [109]:
# Add state codes, selecting only the relevant ones
gdf_2017 = addStateCodes(gdf_2017, select = True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super(GeoDataFrame, self).__setitem__(key, value)


In [110]:
# Before doing anything else, there might be municipalities that appear more than once. We aggregate their geometries into a 
# single row.
gdf_2017 = aggregate_municipalities(gdf_2017)

In [111]:
# Calculate area of each municipality
gdf_2017 = calculate_area(gdf_2017)

In [112]:
# Calculating the latitude and longitude of the centroid of each municipality
gdf_2017 = get_latlon(gdf_2017)

In [113]:
# Correct municipality code
gdf_2017 = correct_m_code(gdf_2017)

In [114]:
# Adding year column
gdf_2017['year'] = 2017

In [115]:
# Ordering the columns
gdf_2017 = order_columns(gdf_2017)

In [116]:
# Checking the GeoDataFrame again
gdf_2017

Unnamed: 0,year,state,s_code,m_code,m_name,area_km2,latitude,longitude,geometry
0,2017,GO,52,520005,ABADIA DE GOIÃS,147.683412,8.137880e+06,8.137880e+06,"POLYGON ((-49.44440 -16.71747, -49.44435 -16.7..."
1,2017,MG,31,310010,ABADIA DOS DOURADOS,886.267593,7.957506e+06,7.957506e+06,"POLYGON ((-47.43840 -18.16570, -47.43588 -18.1..."
2,2017,GO,52,520010,ABADIÃ‚NIA,1049.257426,8.202408e+06,8.202408e+06,"POLYGON ((-48.84178 -16.00488, -48.79639 -16.0..."
3,2017,MG,31,310020,ABAETÃ‰,1835.632746,7.864522e+06,7.864522e+06,"POLYGON ((-45.16777 -18.89073, -45.16719 -18.8..."
4,2017,PA,15,150010,ABAETETUBA,1616.861220,9.807907e+06,9.807907e+06,"POLYGON ((-48.83139 -1.56352, -48.82910 -1.571..."
...,...,...,...,...,...,...,...,...,...
5559,2017,PR,41,412880,XAMBRÃŠ,359.720270,7.373919e+06,7.373919e+06,"POLYGON ((-53.63219 -23.65218, -53.63069 -23.6..."
5566,2017,PA,15,150840,XINGUARA,3790.279503,9.232828e+06,9.232828e+06,"POLYGON ((-49.27592 -6.62299, -49.22760 -6.686..."
5567,2017,BA,29,293360,XIQUE-XIQUE,5174.110547,8.762483e+06,8.762483e+06,"POLYGON ((-42.38095 -10.24423, -42.37974 -10.2..."
5569,2017,SP,35,355715,ZACARIAS,319.718973,7.657934e+06,7.657934e+06,"POLYGON ((-49.99275 -21.03884, -49.99214 -21.0..."


In [117]:
# Save to file
gdf_2017.to_file(out_file.format(2017))

In [118]:
del gdf_2017

## Year 2018

In [119]:
# Get GeoDataFrame with all the states' municipalities
gdf_2018 = readSHP(2018, in_dir)
gdf_2018.head(2)

Unnamed: 0,NM_MUNICIP,CD_GEOCMU,geometry
0,CACHOEIRA DO PIRIÃ,1501956,"POLYGON ((-46.31955 -1.75518, -46.31990 -1.755..."
1,SÃƒO JOSÃ‰ DOS BASÃLIOS,2111250,"POLYGON ((-44.63409 -5.03498, -44.63407 -5.034..."


In [120]:
# Select relevant columns
gdf_2018 = gdf_2018[['CD_GEOCMU', 'NM_MUNICIP', 'geometry']]

In [121]:
# Rename the relevant columns
gdf_2018 = gdf_2018.rename(columns = {'CD_GEOCMU':'m_code', 'NM_MUNICIP':'m_name'})

In [122]:
# Add state codes, selecting only the relevant ones
gdf_2018 = addStateCodes(gdf_2018, select = True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super(GeoDataFrame, self).__setitem__(key, value)


In [123]:
# Before doing anything else, there might be municipalities that appear more than once. We aggregate their geometries into a 
# single row.
gdf_2018 = aggregate_municipalities(gdf_2018)

In [124]:
# Calculate area of each municipality
gdf_2018 = calculate_area(gdf_2018)

In [125]:
# Calculating the latitude and longitude of the centroid of each municipality
gdf_2018 = get_latlon(gdf_2018)

In [126]:
# Correct municipality code
gdf_2018 = correct_m_code(gdf_2018)

In [127]:
# Adding year column
gdf_2018['year'] = 2018

In [128]:
# Ordering the columns
gdf_2018 = order_columns(gdf_2018)

In [129]:
# Checking the GeoDataFrame again
gdf_2018

Unnamed: 0,year,state,s_code,m_code,m_name,area_km2,latitude,longitude,geometry
0,2018,PA,15,150195,CACHOEIRA DO PIRIÃ,2440.761083,9.778181e+06,9.778181e+06,"POLYGON ((-46.31955 -1.75518, -46.31990 -1.755..."
1,2018,MA,21,211125,SÃƒO JOSÃ‰ DOS BASÃLIOS,358.013235,9.435135e+06,9.435135e+06,"POLYGON ((-44.63409 -5.03498, -44.63407 -5.034..."
2,2018,GO,52,520145,APARECIDA DO RIO DOCE,602.757318,7.982857e+06,7.982857e+06,"POLYGON ((-51.14230 -18.29672, -51.14236 -18.2..."
3,2018,GO,52,520235,ARENÃ“POLIS,1075.480424,8.189717e+06,8.189717e+06,"POLYGON ((-51.69953 -16.23270, -51.69957 -16.2..."
4,2018,PA,15,150285,CURUÃ,1431.360952,9.795872e+06,9.795872e+06,"POLYGON ((-55.17193 -1.83331, -55.16647 -1.731..."
...,...,...,...,...,...,...,...,...,...
5564,2018,BA,29,290430,BREJÃ•ES,533.660284,8.511097e+06,8.511097e+06,"POLYGON ((-39.96092 -13.06972, -39.96092 -13.0..."
5565,2018,MG,31,314430,NANUQUE,1556.614850,7.981959e+06,7.981959e+06,"POLYGON ((-40.49485 -17.77412, -40.49531 -17.7..."
5566,2018,BA,29,290180,ANTÃ”NIO GONÃ‡ALVES,354.794917,8.794942e+06,8.794942e+06,"POLYGON ((-40.21792 -10.55208, -40.21585 -10.5..."
5567,2018,SP,35,353550,PARAGUAÃ‡U PAULISTA,1002.984784,7.511458e+06,7.511458e+06,"POLYGON ((-50.69420 -22.39885, -50.69410 -22.3..."


In [130]:
# Save to file
gdf_2018.to_file(out_file.format(2018))

In [131]:
del gdf_2018

## Year 2019

In [132]:
# Get GeoDataFrame with all the states' municipalities
gdf_2019 = readSHP(2019, in_dir)
gdf_2019.head(2)

Unnamed: 0,CD_MUN,NM_MUN,SIGLA_UF,AREA_KM2,geometry
0,1100015,Alta Floresta D'Oeste,RO,7067.025,"POLYGON ((-62.22630 -11.89037, -62.20670 -11.8..."
1,1100023,Ariquemes,RO,4426.571,"POLYGON ((-63.58751 -9.84984, -63.58715 -9.849..."


In [133]:
# Select relevant columns
gdf_2019 = gdf_2019[['CD_MUN', 'NM_MUN', 'AREA_KM2','geometry']]

In [134]:
# Rename the relevant columns
gdf_2019 = gdf_2019.rename(columns = {'CD_MUN':'m_code', 'NM_MUN':'m_name', 'AREA_KM2':'area_km2'})

In [135]:
# Add state codes, selecting only the relevant ones
gdf_2019 = addStateCodes(gdf_2019, select = True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super(GeoDataFrame, self).__setitem__(key, value)


In [136]:
# Before doing anything else, there might be municipalities that appear more than once. We aggregate their geometries into a 
# single row.
gdf_2019 = aggregate_municipalities(gdf_2019)

In [137]:
# Calculating the latitude and longitude of the centroid of each municipality
gdf_2019 = get_latlon(gdf_2019)

In [138]:
# Correct municipality code
gdf_2019 = correct_m_code(gdf_2019)

In [139]:
# Adding year column
gdf_2019['year'] = 2019

In [140]:
# Ordering the columns
gdf_2019 = order_columns(gdf_2019)

In [141]:
# Checking the GeoDataFrame again
gdf_2019

Unnamed: 0,year,state,s_code,m_code,m_name,area_km2,latitude,longitude,geometry
0,2019,RO,11,110001,Alta Floresta D'Oeste,7067.025,8.606916e+06,8.606916e+06,"POLYGON ((-62.22630 -11.89037, -62.20670 -11.8..."
1,2019,RO,11,110002,Ariquemes,4426.571,8.886192e+06,8.886192e+06,"POLYGON ((-63.58751 -9.84984, -63.58715 -9.849..."
2,2019,RO,11,110003,Cabixi,1314.352,8.500043e+06,8.500043e+06,"POLYGON ((-60.71834 -13.39058, -60.70904 -13.3..."
3,2019,RO,11,110004,Cacoal,3792.892,8.740186e+06,8.740186e+06,"POLYGON ((-61.50114 -11.30119, -61.50104 -11.2..."
4,2019,RO,11,110005,Cerejeiras,2783.300,8.528364e+06,8.528364e+06,"POLYGON ((-61.51346 -13.28575, -61.51534 -13.2..."
...,...,...,...,...,...,...,...,...,...
5567,2019,GO,52,522200,VianÃ³polis,954.284,8.132084e+06,8.132084e+06,"POLYGON ((-48.53842 -16.75003, -48.54051 -16.7..."
5568,2019,GO,52,522205,VicentinÃ³polis,737.255,8.034702e+06,8.034702e+06,"POLYGON ((-49.90713 -17.61460, -49.90588 -17.6..."
5569,2019,GO,52,522220,Vila Boa,1060.172,8.330040e+06,8.330040e+06,"POLYGON ((-47.07742 -15.06330, -47.07747 -15.0..."
5570,2019,GO,52,522230,Vila PropÃ­cio,2181.583,8.304597e+06,8.304597e+06,"POLYGON ((-48.91463 -15.20939, -48.91462 -15.2..."


In [142]:
# Save to file
gdf_2019.to_file(out_file.format(2019))

In [143]:
del gdf_2019

## Year 2020

In [144]:
# Get GeoDataFrame with all the states' municipalities
gdf_2020 = readSHP(2020, in_dir)
gdf_2020.head(2)

Unnamed: 0,CD_MUN,NM_MUN,SIGLA_UF,AREA_KM2,geometry
0,1100015,Alta Floresta D'Oeste,RO,7067.127,"POLYGON ((-62.19465 -11.82746, -62.19332 -11.8..."
1,1100023,Ariquemes,RO,4426.571,"POLYGON ((-62.53648 -9.73222, -62.52765 -9.736..."


In [145]:
# Select relevant columns
gdf_2020 = gdf_2020[['CD_MUN', 'NM_MUN', 'AREA_KM2','geometry']]

In [146]:
# Rename the relevant columns
gdf_2020 = gdf_2020.rename(columns = {'CD_MUN':'m_code', 'NM_MUN':'m_name', 'AREA_KM2':'area_km2'})

In [147]:
# Add state codes, selecting only the relevant ones
gdf_2020 = addStateCodes(gdf_2020, select = True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super(GeoDataFrame, self).__setitem__(key, value)


In [148]:
# Before doing anything else, there might be municipalities that appear more than once. We aggregate their geometries into a 
# single row.
gdf_2020 = aggregate_municipalities(gdf_2020)

In [149]:
# Calculating the latitude and longitude of the centroid of each municipality
gdf_2020 = get_latlon(gdf_2020)

In [150]:
# Correct municipality code
gdf_2020 = correct_m_code(gdf_2020)

In [151]:
# Adding year column
gdf_2020['year'] = 2020

In [152]:
# Ordering the columns
gdf_2020 = order_columns(gdf_2020)

In [153]:
# Checking the GeoDataFrame again
gdf_2020

Unnamed: 0,year,state,s_code,m_code,m_name,area_km2,latitude,longitude,geometry
0,2020,RO,11,110001,Alta Floresta D'Oeste,7067.127,8.606922e+06,8.606922e+06,"POLYGON ((-62.19465 -11.82746, -62.19332 -11.8..."
1,2020,RO,11,110002,Ariquemes,4426.571,8.886192e+06,8.886192e+06,"POLYGON ((-62.53648 -9.73222, -62.52765 -9.736..."
2,2020,RO,11,110003,Cabixi,1314.352,8.500043e+06,8.500043e+06,"POLYGON ((-60.37119 -13.36655, -60.37134 -13.3..."
3,2020,RO,11,110004,Cacoal,3793.000,8.740187e+06,8.740187e+06,"POLYGON ((-61.00080 -11.29737, -61.00103 -11.3..."
4,2020,RO,11,110005,Cerejeiras,2783.300,8.528364e+06,8.528364e+06,"POLYGON ((-61.49976 -13.00525, -61.49809 -13.0..."
...,...,...,...,...,...,...,...,...,...
5565,2020,GO,52,522200,VianÃ³polis,954.284,8.132084e+06,8.132084e+06,"POLYGON ((-48.38114 -16.63141, -48.38058 -16.6..."
5566,2020,GO,52,522205,VicentinÃ³polis,737.255,8.034702e+06,8.034702e+06,"POLYGON ((-49.84456 -17.58262, -49.84357 -17.5..."
5567,2020,GO,52,522220,Vila Boa,1060.172,8.330040e+06,8.330040e+06,"POLYGON ((-47.11416 -14.67412, -47.11289 -14.6..."
5568,2020,GO,52,522230,Vila PropÃ­cio,2181.583,8.304597e+06,8.304597e+06,"POLYGON ((-48.75586 -14.86267, -48.75519 -14.8..."


In [154]:
# Save to file
gdf_2020.to_file(out_file.format(2020))

In [155]:
del gdf_2020

# Year 1991

In [13]:
# Read SHP file
gdf_1991 = gpd.read_file('../../data/raw/BRA_population/05_malha_municipal_1991/05-malha municipal 1991.shp')
gdf_1991.head(2)

Unnamed: 0,BR91POLY_I,NOMEMUNICP,geometry
0,1400407,NORMANDIA,"POLYGON ((-713352.505 556340.624, -713339.209 ..."
1,1400100,BOA VISTA,"POLYGON ((-713352.505 556340.624, -714720.572 ..."


In [14]:
# Rename columns
gdf_1991 = gdf_1991.rename(columns = {'BR91POLY_I':'m_code', 'NOMEMUNICP':'m_name'})

In [15]:
# Correct municipalities' code
gdf_1991 = correct_m_code(gdf_1991)

In [23]:
# Checking if duplicated and merging
gdf_1991 = aggregate_municipalities(gdf_1991)

There were duplicated municiaplities.


In [32]:
# Adding the states information
gdf_1991 = addStateCodes(gdf_1991, select = True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [34]:
# Calculate area of each municipality
gdf_1991 = calculate_area(gdf_1991)

In [35]:
# Calculating the latitude and longitude of the centroid of each municipality
gdf_1991 = get_latlon(gdf_1991)

In [36]:
# Adding year column
gdf_1991['year'] = 1991

In [37]:
# Ordering the columns
gdf_1991 = order_columns(gdf_1991)

In [38]:
# Checking the GeoDataFrame again
gdf_1991

Unnamed: 0,year,state,s_code,m_code,m_name,area_km2,latitude,longitude,geometry
1,1991,RO,11,110001,ALTA FLORESTA D'OESTE,8643.900381,8.610290e+06,8.610290e+06,"POLYGON ((-894106.624 -1321512.696, -892240.57..."
2,1991,RO,11,110002,ARIQUEMES,14520.381508,8.896478e+06,8.896478e+06,"POLYGON ((-955417.091 -990340.198, -954103.839..."
3,1991,RO,11,110003,CABIXI,1987.588535,8.502898e+06,8.502898e+06,"POLYGON ((-688195.518 -1477799.099, -688365.12..."
4,1991,RO,11,110004,CACOAL,4699.655848,8.743491e+06,8.743491e+06,"POLYGON ((-825053.588 -1225176.945, -764976.70..."
5,1991,RO,11,110005,CEREJEIRAS,10798.148551,8.541746e+06,8.541746e+06,"POLYGON ((-797089.979 -1407434.906, -799229.49..."
...,...,...,...,...,...,...,...,...,...
4487,1991,GO,52,522180,URUTAI,632.098289,8.062210e+06,8.062210e+06,"POLYGON ((625270.725 -1920042.756, 628472.000 ..."
4488,1991,GO,52,522190,VARJAO,518.316883,8.105886e+06,8.105886e+06,"POLYGON ((481426.601 -1883495.983, 482427.897 ..."
4489,1991,GO,52,522200,VIANOPOLIS,957.436766,8.132213e+06,8.132213e+06,"POLYGON ((575558.017 -1879669.749, 575998.245 ..."
4490,1991,GO,52,522205,VICENTINOPOLIS,738.876438,8.034881e+06,8.034881e+06,"POLYGON ((432650.408 -1951835.635, 434540.366 ..."


In [49]:
# Save to file
gdf_1991.to_file('../../data/shapes/municipalities_shp/municipalities_1991.shp')

# Year 1980

In [91]:
# Read SHP file
gdf_1980 = gpd.read_file('../../data/raw/BRA_population/05_malha_municipal_1980/05-malha municipal 1980.shp')
gdf_1980.head(2)

Unnamed: 0,codigo,nome,geometry
0,3540408,Populina,"POLYGON ((354410.500 -2200448.000, 355431.906 ..."
1,3518008,Guarani d'Oeste,"POLYGON ((371553.688 -2192405.250, 375432.438 ..."


In [92]:
# Rename columns
gdf_1980 = gdf_1980.rename(columns = {'codigo':'m_code', 'nome':'m_name'})

In [93]:
# Correct municipalities' code
gdf_1980 = correct_m_code(gdf_1980)

In [94]:
# Checking if duplicated and merging
gdf_1980 = aggregate_municipalities(gdf_1980)

In [95]:
# Adding the states information
gdf_1980 = addStateCodes(gdf_1980, select = True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  super().__setitem__(key, value)


In [96]:
# Calculate area of each municipality
gdf_1980 = calculate_area(gdf_1980)

In [97]:
# Calculating the latitude and longitude of the centroid of each municipality
gdf_1980 = get_latlon(gdf_1980)

In [98]:
# Adding year column
gdf_1980['year'] = 1980

In [99]:
# Ordering the columns
gdf_1980 = order_columns(gdf_1980)

In [100]:
# Checking the GeoDataFrame again
gdf_1980

Unnamed: 0,year,state,s_code,m_code,m_name,area_km2,latitude,longitude,geometry
0,1980,SP,35,354040,Populina,313.402395,7.794270e+06,7.794270e+06,"POLYGON ((354410.500 -2200448.000, 355431.906 ..."
1,1980,SP,35,351800,Guarani d'Oeste,374.567430,7.788618e+06,7.788618e+06,"POLYGON ((371553.687 -2192405.250, 375432.438 ..."
2,1980,SP,35,353590,Paranapuã,291.152283,7.783292e+06,7.783292e+06,"POLYGON ((354410.500 -2200448.000, 355143.750 ..."
3,1980,SP,35,352070,Indiaporã,278.596543,7.789151e+06,7.789151e+06,"POLYGON ((383700.063 -2201954.250, 386854.125 ..."
4,1980,SP,35,353000,Mira Estrela,216.302488,7.788427e+06,7.788427e+06,"POLYGON ((400752.187 -2202889.250, 402301.625 ..."
...,...,...,...,...,...,...,...,...,...
3980,1980,MA,21,210540,Itapecuru Mirim,1557.101055,9.614705e+06,9.614705e+06,"POLYGON ((1095155.250 -369277.531, 1107633.500..."
3985,1980,BA,29,291840,Juazeiro,7919.669851,8.913289e+06,8.913289e+06,"POLYGON ((1515874.375 -1030961.750, 1519898.00..."
3986,1980,PR,41,411580,Medianeira,1122.211604,7.200564e+06,7.200564e+06,"POLYGON ((-4955.584 -2781556.500, -3314.363 -2..."
3988,1980,MT,51,510170,Barra do Bugres,10512.049028,8.338239e+06,8.338239e+06,"POLYGON ((-338802.344 -1614383.750, -337243.15..."


After looking at the formatted file, I have discovered that Tocantins was not correctly coded. This is because Tocantins was created only in 1988 as a division of Goias. Hence, in the SHP of 1980, Tocantins is coded with the number 52, which is the coide of Goias. I need to correct for this.

Since I know that the only municipalities with missing population data in this SHP file after merging with the data file are those in Tocantins.

In [102]:
# Reading the data
data = pd.read_csv('../../data/raw/BRA_population/municipality_population_1981-2012.csv')

# Selecting relevant columns
data = data[['m_code', '1985']]

In [103]:
# Merging the data
gdf_1980 = gdf_1980.merge(data, on= 'm_code', how = 'left')

In [104]:
sum(gdf_1980['1985'].isna())

52

In [105]:
# All these rows, their code should be changed

In [106]:
gdf_1980.head()

Unnamed: 0,year,state,s_code,m_code,m_name,area_km2,latitude,longitude,geometry,1985
0,1980,SP,35,354040,Populina,313.402395,7794270.0,7794270.0,"POLYGON ((354410.500 -2200448.000, 355431.906 ...",4695.0
1,1980,SP,35,351800,Guarani d'Oeste,374.56743,7788618.0,7788618.0,"POLYGON ((371553.687 -2192405.250, 375432.438 ...",7604.0
2,1980,SP,35,353590,Paranapuã,291.152283,7783292.0,7783292.0,"POLYGON ((354410.500 -2200448.000, 355143.750 ...",5787.0
3,1980,SP,35,352070,Indiaporã,278.596543,7789151.0,7789151.0,"POLYGON ((383700.063 -2201954.250, 386854.125 ...",5742.0
4,1980,SP,35,353000,Mira Estrela,216.302488,7788427.0,7788427.0,"POLYGON ((400752.187 -2202889.250, 402301.625 ...",2544.0


In [107]:
gdf_1980.loc[test['1985'].isna(), 's_code'] = 17
gdf_1980.loc[test['1985'].isna(), 'state'] = 'TO'
gdf_1980.loc[test['1985'].isna(), 'm_code'] = [int('17' + str(i)[2:]) for i in gdf_1980[gdf_1980['1985'].isna()]['m_code'].tolist()]

In [108]:
gdf_1980 = order_columns(gdf_1980)

In [109]:
# Checking it worked
gdf_1980[gdf_1980['state'] == 'TO'].head()

Unnamed: 0,year,state,s_code,m_code,m_name,area_km2,latitude,longitude,geometry
2020,1980,TO,17,172030,São Sebastião do Tocantins,1574.774265,9405265.0,9405265.0,"POLYGON ((652714.801 -585263.348, 656366.160 -..."
2021,1980,TO,17,170220,Araguatins,3052.074035,9369824.0,9369824.0,"POLYGON ((590266.664 -591491.596, 591776.101 -..."
2022,1980,TO,17,171070,Itaguatins,1823.240061,9366943.0,9366943.0,"POLYGON ((690711.821 -598153.519, 693369.244 -..."
2023,1980,TO,17,172080,Sítio Novo de Goiás,195.68847,9372621.0,9372621.0,"POLYGON ((682654.707 -622712.504, 701368.173 -..."
2024,1980,TO,17,170290,Axixá de Goiás,104.890063,9373473.0,9373473.0,"POLYGON ((682871.007 -624097.990, 682654.707 -..."


In [110]:
# Save to file
gdf_1980.to_file('../../data/shapes/municipalities_shp/municipalities_1980.shp')