# creating smaller dataframes

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
import matplotlib.dates as mdates
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import time

In [2]:
df2014 = pd.read_csv('../../dataset/data2014.csv', index_col = 'datahora', usecols = ['riscofogo', 'precipitacao', 'bioma', 'datahora', 'latitude', 'longitude', 'satelite'], parse_dates = True)
df2015 = pd.read_csv('../../dataset/data2015.csv', index_col = 'datahora', usecols = ['riscofogo', 'precipitacao', 'bioma', 'datahora', 'latitude', 'longitude', 'satelite'], parse_dates = True)
df2016 = pd.read_csv('../../dataset/data2016.csv', index_col = 'datahora', usecols = ['riscofogo', 'precipitacao', 'bioma', 'datahora', 'latitude', 'longitude', 'satelite'], parse_dates = True)
df2017 = pd.read_csv('../../dataset/data2017.csv', index_col = 'datahora', usecols = ['riscofogo', 'precipitacao', 'bioma', 'datahora', 'latitude', 'longitude', 'satelite'], parse_dates = True)
df2018 = pd.read_csv('../../dataset/data2018.csv', index_col = 'datahora', usecols = ['riscofogo', 'precipitacao', 'bioma', 'datahora', 'latitude', 'longitude', 'satelite'], parse_dates = True)
df2019 = pd.read_csv('../../dataset/data2019.csv', index_col = 'datahora', usecols = ['riscofogo', 'precipitacao', 'bioma', 'datahora', 'latitude', 'longitude', 'satelite'], parse_dates = True)
df2020 = pd.read_csv('../../dataset/data2020.csv', index_col = 'datahora', usecols = ['riscofogo', 'precipitacao', 'bioma', 'datahora', 'latitude', 'longitude', 'satelite'], parse_dates = True)

In [6]:
dataframes = [df2014, df2015, df2016, df2017, df2018, df2019, df2020]
biomes = ['Amazonia', 'Caatinga', 'Pantanal', 'Mata Atlantica', 'Pampa', 'Cerrado']

Accordingly to *Validação de focos de queimadas no Cerrado emimagens TM/Landsat-5*
by S. C. de Jesus, A. W. Setzer and F. Morelli, researchers from INPE have developed
an algorithm implemented in the data acquisition from satellites *AQUA* and *TERRA*
in an attempt to minimize the mislabeling of focus fire. Therefore we will use these
data.

In [8]:
for dataframe in dataframes:
    dataframe = dataframe[(dataframe.satelite == 'AQUA_M-T') | (dataframe.satelite == 'AQUA_M-M') |
                         (dataframe.satelite == 'TERRA_M-T') | (dataframe.satelite == 'TERRA_M-M')]
    dataframe.drop(columns = ['satelite'])

In [9]:
for dataframe in dataframes:
    for biome in biomes:
        
        df_yb = dataframe[(dataframe.bioma == biome) & (dataframe.riscofogo == 1)].copy()
        df_yb = df_yb.resample('D')[['riscofogo', 'precipitacao']].sum()
        year = str(dataframe.index[0].year)
        df_yb.to_csv('cleaned-datasets/timeseries{}{}.csv'.format(year, biome))
        

In [10]:
df_map = pd.read_csv('../../dataset/data2014.csv', usecols = ['latitude', 'longitude', 'bioma'], parse_dates = True) 

In [11]:
df_map = df_map.round(2)
print (len(df_map))
df_map = df_map.drop_duplicates()
print (len(df_map))

1855695
730029


In [12]:
df_map.to_csv('cleaned-datasets/data-map.csv')

In [13]:
for dataframe in dataframes:
    df = dataframe[['riscofogo', 'latitude', 'longitude']].copy().round(2)
    df = df[df.riscofogo == 1]
    year = df.index.year[0]
    df.to_csv('cleaned-datasets/entire_country{}.csv'.format(year))
    print (df.head())

                     riscofogo  latitude  longitude
datahora                                           
2014-10-14 17:21:00        1.0    -20.68     -46.52
2014-10-31 17:06:00        1.0     -4.41     -44.51
2014-11-12 16:41:00        1.0     -3.70     -44.12
2014-10-28 15:45:00        1.0     -3.65     -44.64
2014-11-01 16:47:00        1.0     -5.20     -45.56
                     riscofogo  latitude  longitude
datahora                                           
2015-10-25 05:37:48        1.0     -1.30     -53.52
2015-09-18 20:46:27        1.0     -6.44     -44.13
2015-12-29 17:30:00        1.0     -5.02     -50.48
2015-09-04 14:00:00        1.0    -10.74     -50.66
2015-12-29 15:00:00        1.0     -4.30     -46.85
                     riscofogo  latitude  longitude
datahora                                           
2016-07-25 16:45:00        1.0    -10.51     -50.44
2016-10-16 17:54:09        1.0     -5.52     -42.92
2016-09-12 05:13:00        1.0    -11.66     -53.61
2016-09-12 0

In [17]:
for dataframe in dataframes:
    for biome in biomes:
        df = dataframe[['riscofogo', 'latitude', 'longitude', 'bioma']].copy().round(2)
        df = df[df.riscofogo == 1]
        df = df[df.bioma == biome]
        year = df.index.year[0]
        df.to_csv('cleaned-datasets/lat_long{}{}.csv'.format(year,biome))
        print (biome, year)
        print (df.head(3))
        print (' ')
        print (' ')
        

Amazonia 2014
                     riscofogo  latitude  longitude     bioma
datahora                                                     
2014-08-20 17:42:00        1.0     -8.49     -61.48  Amazonia
2014-08-20 17:41:00        1.0    -11.64     -54.55  Amazonia
2014-08-01 21:00:00        1.0     -5.08     -48.37  Amazonia
 
 
Caatinga 2014
                     riscofogo  latitude  longitude     bioma
datahora                                                     
2014-11-03 16:08:00        1.0     -7.76     -41.15  Caatinga
2014-11-09 15:54:00        1.0    -14.33     -41.52  Caatinga
2014-10-29 16:02:00        1.0     -7.61     -37.97  Caatinga
 
 
Pantanal 2014
                     riscofogo  latitude  longitude     bioma
datahora                                                     
2014-10-23 20:01:59        1.0    -18.70     -56.93  Pantanal
2014-10-13 17:39:00        1.0    -22.09     -57.76  Pantanal
2014-08-20 17:39:00        1.0    -19.38     -56.56  Pantanal
 
 
Mata Atlantica 2

Caatinga 2018
                     riscofogo  latitude  longitude     bioma
datahora                                                     
2018-01-02 21:04:48        1.0     -6.33     -39.59  Caatinga
2018-01-02 16:24:00        1.0     -6.37     -39.22  Caatinga
2018-01-02 19:34:51        1.0     -5.91     -39.78  Caatinga
 
 
Pantanal 2018
                     riscofogo  latitude  longitude     bioma
datahora                                                     
2018-02-06 17:06:00        1.0    -19.35     -57.59  Pantanal
2018-02-07 01:25:29        1.0    -19.03     -55.32  Pantanal
2018-02-15 17:36:00        1.0    -22.04     -57.74  Pantanal
 
 
Mata Atlantica 2018
                     riscofogo  latitude  longitude           bioma
datahora                                                           
2018-01-02 16:18:00        1.0    -12.97     -39.62  Mata Atlantica
2018-01-02 12:25:00        1.0     -9.84     -36.09  Mata Atlantica
2018-01-02 16:04:08        1.0    -12.61     -39.26 