# Caipora Project

__Goal__

To carry out projections of hotspots in Brazilian territory through public data collected by satellites and provided by INPE – Queimadas. and to prevent possible threats attractive from predictive models.
 
__Data Source__

https://queimadas.dgi.inpe.br/queimadas/portal

https://openaq.org/#/

https://ipsamazonia.org.br/


__Data characteristics__


## Constants

In [None]:
BRAZILIAN_STATES = [
    { "acronym": "AC", "name": "Acre" },
    { "acronym": "AL", "name": "Alagoas" },
    { "acronym": "AP", "name": "Amapá" },
    { "acronym": "AM", "name": "Amazonas" },
    { "acronym": "BA", "name": "Bahia" },
    { "acronym": "CE", "name": "Ceará" },
    { "acronym": "DF", "name": "Distrito Federal" },
    { "acronym": "ES", "name": "Espírito Santo" },
    { "acronym": "GO", "name": "Goiás" },
    { "acronym": "MA", "name": "Maranhão" },
    { "acronym": "MT", "name": "Mato Grosso" },
    { "acronym": "MS", "name": "Mato Grosso do Sul" },
    { "acronym": "MG", "name": "Minas Gerais" },
    { "acronym": "PA", "name": "Pará" },
    { "acronym": "PB", "name": "Paraíba" },
    { "acronym": "PR", "name": "Paraná" },
    { "acronym": "PE", "name": "Pernambuco" },
    { "acronym": "PI", "name": "Piauí" },
    { "acronym": "RJ", "name": "Rio de Janeiro" },
    { "acronym": "RN", "name": "Rio Grande do Norte" },
    { "acronym": "RS", "name": "Rio Grande do Sul" },
    { "acronym": "RO", "name": "Rondônia" },
    { "acronym": "RR", "name": "Roraima" },
    { "acronym": "SC", "name": "Santa Catarina" },
    { "acronym": "SP", "name": "São Paulo" },
    { "acronym": "SE", "name": "Sergipe" },
    { "acronym": "TO", "name": "Tocantins" }
]

## Data Preparation

### Amazon SPI

In [2]:
import re
import pandas as pd
from pathlib import Path

SPI_PATH = "data/spi/amazonia"
with pd.ExcelFile(f"{SPI_PATH}/ips_tabela_completa_modificada.xlsx") as xlsx:
    
    for sheetname in xlsx.sheet_names:
        # Recupera o ano a partir do nome da aba
        match_years = re.findall(r'.*([1-3][0-9]{3})', sheetname)

        if len(match_years) > 0:
            select_year = int(match_years[0])
            
            # Carregar os dados releacionados a aba cujo nome esta referenciado pela variável sheetname
            dataset = pd.read_excel(xlsx, sheetname)
            dataset.rename(columns=lambda x: x.strip(), inplace=True)
            
            # Adiciona a feature ano
            dataset.insert(0, "Ano",select_year)
            
            # Padroniza o nome da coluna 3 retirando o ano do final
            dataset.rename(columns={ dataset.columns[4]: "IPS Amazônia" }, inplace=True)
            
            # Cria um novo arquivo csv com o nome do ano presente em sheetname.
            detailed_filepath = Path(f"{SPI_PATH}/detailed/{select_year}.csv")
            detailed_filepath.parent.mkdir(parents=True, exist_ok=True)
            dataset.to_csv(detailed_filepath, index=False)

### Hotspot

In [None]:
files = [
    "data/2012/2012-01.csv",
    "data/2012/2012-02.csv",
    "data/2012/2012-03.csv",
    "data/2012/2012-04.csv",
    "data/2012/2012-05.csv",
    "data/2012/2012-06.csv",
    "data/2012/2012-07.csv",
    "data/2012/2012-08.csv",
    "data/2012/2012-09.csv",
    "data/2012/2012-10.csv",
    "data/2012/2012-11.csv",
    "data/2012/2012-12.csv",
]
dataset = pd.concat(map(pd.read_csv, files), ignore_index=True)
dataset = dataset[(dataset.estado == "TOCANTINS")]
dataset.to_csv("data/hotspot/bra/to/hotspots_2012.csv", index=False)

files = [
    "data/2013/2013-01.csv",
    "data/2013/2013-02.csv",
    "data/2013/2013-03.csv",
    "data/2013/2013-04.csv",
    "data/2013/2013-05.csv",
    "data/2013/2013-06.csv",
    "data/2013/2013-07.csv",
    "data/2013/2013-08.csv",
    "data/2013/2013-09.csv",
    "data/2013/2013-10.csv",
    "data/2013/2013-11.csv",
    "data/2013/2013-12.csv",
]
dataset = pd.concat(map(pd.read_csv, files), ignore_index=True)
dataset = dataset[(dataset.estado == "TOCANTINS")]
dataset.to_csv("data/hotspot/bra/to/hotspots_2013.csv", index=False)

files = [
    "data/2014/2014-01.csv",
    "data/2014/2014-02.csv",
    "data/2014/2014-03.csv",
    "data/2014/2014-04.csv",
    "data/2014/2014-05.csv",
    "data/2014/2014-06.csv",
    "data/2014/2014-07.csv",
    "data/2014/2014-08.csv",
    "data/2014/2014-09.csv",
    "data/2014/2014-10.csv",
    "data/2014/2014-11.csv",
    "data/2014/2014-12.csv",
]
dataset = pd.concat(map(pd.read_csv, files), ignore_index=True)
dataset = dataset[(dataset.estado == "TOCANTINS")]
dataset.to_csv("data/hotspot/bra/to/hotspots_2014.csv", index=False)

files = [
    "data/2015/2015-01.csv",
    "data/2015/2015-02.csv",
    "data/2015/2015-03.csv",
    "data/2015/2015-04.csv",
    "data/2015/2015-05.csv",
    "data/2015/2015-06.csv",
    "data/2015/2015-07.csv",
    "data/2015/2015-08.csv",
    "data/2015/2015-09.csv",
    "data/2015/2015-10.csv",
    "data/2015/2015-11.csv",
    "data/2015/2015-12.csv",
]
dataset = pd.concat(map(pd.read_csv, files), ignore_index=True)
dataset = dataset[(dataset.estado == "TOCANTINS")]
dataset.to_csv("data/hotspot/bra/to/hotspots_2015.csv", index=False)

files = [
    "data/2016/2016-01.csv",
    "data/2016/2016-02.csv",
    "data/2016/2016-03.csv",
    "data/2016/2016-04.csv",
    "data/2016/2016-05.csv",
    "data/2016/2016-06.csv",
    "data/2016/2016-07.csv",
    "data/2016/2016-08.csv",
    "data/2016/2016-09.csv",
    "data/2016/2016-10.csv",
    "data/2016/2016-11.csv",
    "data/2016/2016-12.csv",
]
dataset = pd.concat(map(pd.read_csv, files), ignore_index=True)
dataset = dataset[(dataset.estado == "TOCANTINS")]
dataset.to_csv("data/hotspot/bra/to/hotspots_2016.csv", index=False)

files = [
    "data/2017/2017-01.csv",
    "data/2017/2017-02.csv",
    "data/2017/2017-03.csv",
    "data/2017/2017-04.csv",
    "data/2017/2017-05.csv",
    "data/2017/2017-06.csv",
    "data/2017/2017-07.csv",
    "data/2017/2017-08.csv",
    "data/2017/2017-09.csv",
    "data/2017/2017-10.csv",
    "data/2017/2017-11.csv",
    "data/2017/2017-12.csv",
]
dataset = pd.concat(map(pd.read_csv, files), ignore_index=True)
dataset = dataset[(dataset.estado == "TOCANTINS")]
dataset.to_csv("data/hotspot/bra/to/hotspots_2017.csv", index=False)

files = [
    "data/2018/2018-01.csv",
    "data/2018/2018-02.csv",
    "data/2018/2018-03.csv",
    "data/2018/2018-04.csv",
    "data/2018/2018-05.csv",
    "data/2018/2018-06.csv",
    "data/2018/2018-07.csv",
    "data/2018/2018-08.csv",
    "data/2018/2018-09.csv",
    "data/2018/2018-10.csv",
    "data/2018/2018-11.csv",
    "data/2018/2018-12.csv",
]
dataset = pd.concat(map(pd.read_csv, files), ignore_index=True)
dataset = dataset[(dataset.estado == "TOCANTINS")]
dataset.to_csv("data/hotspot/bra/to/hotspots_2018.csv", index=False)

files = [
    "data/2019/2019-01.csv",
    "data/2019/2019-02.csv",
    "data/2019/2019-03.csv",
    "data/2019/2019-04.csv",
    "data/2019/2019-05.csv",
    "data/2019/2019-06.csv",
    "data/2019/2019-07.csv",
    "data/2019/2019-08.csv",
    "data/2019/2019-09.csv",
    "data/2019/2019-10.csv",
    "data/2019/2019-11.csv",
    "data/2019/2019-12.csv",
]
dataset = pd.concat(map(pd.read_csv, files), ignore_index=True)
dataset = dataset[(dataset.estado == "TOCANTINS")]
dataset.to_csv("data/hotspot/bra/to/hotspots_2019.csv", index=False)

files = [
    "data/2020/2020-01.csv",
    "data/2020/2020-02.csv",
    "data/2020/2020-03.csv",
    "data/2020/2020-04.csv",
    "data/2020/2020-05.csv",
    "data/2020/2020-06.csv",
    "data/2020/2020-07.csv",
    "data/2020/2020-08.csv",
    "data/2020/2020-09.csv",
    "data/2020/2020-10.csv",
    "data/2020/2020-11.csv",
    "data/2020/2020-12.csv",
]
dataset = pd.concat(map(pd.read_csv, files), ignore_index=True)
dataset = dataset[(dataset.estado == "TOCANTINS")]
dataset.to_csv("data/hotspot/bra/to/hotspots_2020.csv", index=False)

files = [
    "data/2021/2021-01.csv",
    "data/2021/2021-02.csv",
    "data/2021/2021-03.csv",
    "data/2021/2021-04.csv",
    "data/2021/2021-05.csv",
    "data/2021/2021-06.csv",
    "data/2021/2021-07.csv",
    "data/2021/2021-08.csv",
    "data/2021/2021-09.csv",
    "data/2021/2021-10.csv",
    "data/2021/2021-11.csv",
    "data/2021/2021-12.csv",
]
dataset = pd.concat(map(pd.read_csv, files), ignore_index=True)
dataset = dataset[(dataset.estado == "TOCANTINS")]
dataset.to_csv("data/hotspot/bra/to/hotspots_2021.csv", index=False)