In [1]:
import geopandas as gpd
import pandas as pd
import matplotlib
import keplergl

states = gpd.read_file('./data/brazil-states.geojson')
fire_data = pd.read_csv('./data/amazon.csv')

pd.options.display.max_rows = 99999999999999

fire_data.head()

Unnamed: 0,year,state,month,number,date
0,1998,Acre,Janeiro,0.0,1998-01-01
1,1999,Acre,Janeiro,0.0,1999-01-01
2,2000,Acre,Janeiro,0.0,2000-01-01
3,2001,Acre,Janeiro,0.0,2001-01-01
4,2002,Acre,Janeiro,0.0,2002-01-01


In [2]:
fire_data = fire_data[['date', 'state', 'number']]
fire_data.head()

Unnamed: 0,date,state,number
0,1998-01-01,Acre,0.0
1,1999-01-01,Acre,0.0
2,2000-01-01,Acre,0.0
3,2001-01-01,Acre,0.0
4,2002-01-01,Acre,0.0


In [3]:
fire_data['date'] =  pd.to_datetime(fire_data['date'])

fire_data.dtypes

date      datetime64[ns]
state             object
number           float64
dtype: object

In [4]:
fire_data['state'].unique()

array(['Acre', 'Alagoas', 'Amapa', 'Amazonas', 'Bahia', 'Ceara',
       'Distrito Federal', 'Espirito Santo', 'Goias', 'Maranhao',
       'Mato Grosso', 'Minas Gerais', 'Par�', 'Paraiba', 'Pernambuco',
       'Piau', 'Rio', 'Rondonia', 'Roraima', 'Santa Catarina',
       'Sao Paulo', 'Sergipe', 'Tocantins'], dtype=object)

In [5]:
states = states[['name', 'geometry']]
states.head(30)

Unnamed: 0,name,geometry
0,Acre,"MULTIPOLYGON (((-73.60818 -7.20194, -72.86963 ..."
1,Alagoas,"MULTIPOLYGON (((-35.46516 -8.82549, -35.46626 ..."
2,Amazonas,"MULTIPOLYGON (((-67.32553 2.03012, -67.32234 2..."
3,Amapá,"MULTIPOLYGON (((-51.18168 4.00889, -51.17900 3..."
4,Bahia,"MULTIPOLYGON (((-39.28820 -8.56290, -39.28229 ..."
5,Ceará,"MULTIPOLYGON (((-40.01804 -2.83698, -40.00587 ..."
6,Espírito Santo,"MULTIPOLYGON (((-40.72383 -20.84178, -40.72804..."
7,Goiás,"MULTIPOLYGON (((-50.15776 -12.41195, -50.15901..."
8,Maranhão,"MULTIPOLYGON (((-47.03102 -8.98549, -47.02113 ..."
9,Minas Gerais,"MULTIPOLYGON (((-44.20886 -14.24413, -44.20871..."


In [6]:
# TODO: rename no accent state/name with accent in fire_data
# array(['Amapa', 'Ceara', 'Espirito Santo', 'Goias', 'Maranhao', 'Par�',
#       'Paraiba', 'Piau', 'Rio', 'Rondonia', 'Sao Paulo'], dtype=object)

fire_data.loc[fire_data['state'] == 'Amapa', 'state'] = 'Amapá'
fire_data.loc[fire_data['state'] == 'Ceara', 'state'] = 'Ceará'
fire_data.loc[fire_data['state'] == 'Espirito Santo', 'state'] = 'Espírito Santo'
fire_data.loc[fire_data['state'] == 'Goias', 'state'] = 'Goiás'
fire_data.loc[fire_data['state'] == 'Maranhao', 'state'] = 'Maranhão'
fire_data.loc[fire_data['state'] == 'Par�', 'state'] = 'Pará'
fire_data.loc[fire_data['state'] == 'Paraiba', 'state'] = 'Paraíba'
fire_data.loc[fire_data['state'] == 'Piau', 'state'] = 'Piauí'
fire_data.loc[fire_data['state'] == 'Rio', 'state'] = 'Rio de Janeiro'
fire_data.loc[fire_data['state'] == 'Rondonia', 'state'] = 'Rondônia'
fire_data.loc[fire_data['state'] == 'Sao Paulo', 'state'] = 'São Paulo'


fire_data.head(500)

Unnamed: 0,date,state,number
0,1998-01-01,Acre,0.0
1,1999-01-01,Acre,0.0
2,2000-01-01,Acre,0.0
3,2001-01-01,Acre,0.0
4,2002-01-01,Acre,0.0
5,2003-01-01,Acre,10.0
6,2004-01-01,Acre,0.0
7,2005-01-01,Acre,12.0
8,2006-01-01,Acre,4.0
9,2007-01-01,Acre,0.0


In [7]:
states.crs

{'init': 'epsg:4326'}

In [8]:
fire_data.shape

(6454, 3)

In [9]:
fdm_merged = fire_data.merge(states, how='left', left_on='state', right_on='name', indicator=True)
fdm_merged.head()

Unnamed: 0,date,state,number,name,geometry,_merge
0,1998-01-01,Acre,0.0,Acre,"MULTIPOLYGON (((-73.60818 -7.20194, -72.86963 ...",both
1,1999-01-01,Acre,0.0,Acre,"MULTIPOLYGON (((-73.60818 -7.20194, -72.86963 ...",both
2,2000-01-01,Acre,0.0,Acre,"MULTIPOLYGON (((-73.60818 -7.20194, -72.86963 ...",both
3,2001-01-01,Acre,0.0,Acre,"MULTIPOLYGON (((-73.60818 -7.20194, -72.86963 ...",both
4,2002-01-01,Acre,0.0,Acre,"MULTIPOLYGON (((-73.60818 -7.20194, -72.86963 ...",both


In [10]:
fdm_merged = fdm_merged.reset_index(drop=True)
fdm_merged.loc[fdm_merged['_merge'] == 'left_only', ['state']]['state'].unique()

array([], dtype=object)

In [11]:
fdm_merged.shape

(6454, 6)

In [12]:
fdm_merged.__class__

pandas.core.frame.DataFrame

In [13]:
fdm2 = gpd.GeoDataFrame(fdm_merged, geometry='geometry')

In [14]:
fdm2.head()

Unnamed: 0,date,state,number,name,geometry,_merge
0,1998-01-01,Acre,0.0,Acre,"MULTIPOLYGON (((-73.60818 -7.20194, -72.86963 ...",both
1,1999-01-01,Acre,0.0,Acre,"MULTIPOLYGON (((-73.60818 -7.20194, -72.86963 ...",both
2,2000-01-01,Acre,0.0,Acre,"MULTIPOLYGON (((-73.60818 -7.20194, -72.86963 ...",both
3,2001-01-01,Acre,0.0,Acre,"MULTIPOLYGON (((-73.60818 -7.20194, -72.86963 ...",both
4,2002-01-01,Acre,0.0,Acre,"MULTIPOLYGON (((-73.60818 -7.20194, -72.86963 ...",both


In [15]:
fdm2.crs = states.crs
fdm2.crs

{'init': 'epsg:4326'}

In [16]:
fdm2 = fdm2.drop('name', axis=1)
fdm2 = fdm2.drop('_merge', axis=1)

In [17]:
fdm2.head()

Unnamed: 0,date,state,number,geometry
0,1998-01-01,Acre,0.0,"MULTIPOLYGON (((-73.60818 -7.20194, -72.86963 ..."
1,1999-01-01,Acre,0.0,"MULTIPOLYGON (((-73.60818 -7.20194, -72.86963 ..."
2,2000-01-01,Acre,0.0,"MULTIPOLYGON (((-73.60818 -7.20194, -72.86963 ..."
3,2001-01-01,Acre,0.0,"MULTIPOLYGON (((-73.60818 -7.20194, -72.86963 ..."
4,2002-01-01,Acre,0.0,"MULTIPOLYGON (((-73.60818 -7.20194, -72.86963 ..."


In [18]:
w1 = keplergl.KeplerGl(height=300)

User Guide: https://github.com/keplergl/kepler.gl/blob/master/docs/keplergl-jupyter/user-guide.md


In [None]:
w1.add_data(data=fdm2, name="fires")

In [None]:
fdm2.to_file("fires_by_year.csv", driver='CSV')