## Pré processamento dos dados

### Bibliotecas

In [1]:
#Bibliotecas
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import warnings

#Estilizar conteúdo
warnings.filterwarnings('ignore')

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

def estilo_tabelas(df, max_altura='300px', casas_decimais=3):
    def cor_linhas(row):
        if row.name % 2 == 0:
            return ['background-color: #ffffff'] * len(row)
        else:
            return ['background-color: #f9f9f9'] * len(row)
    
    return (
        df.style.apply(cor_linhas, axis=1) 
        .set_table_styles(
            [
                {'selector': 'thead th', 'props': [('font-size', '12px'), ('text-align', 'center'), ('border-bottom', '2px solid #007BFF')]},  # Azul abaixo do nome das colunas
                {'selector': 'td', 'props': [('font-size', '10px'), ('text-align', 'center'), ('max-height', '40px'), ('white-space', 'nowrap'), ('text-overflow', 'ellipsis'), ('overflow', 'hidden'), ('max-width', '300px')]},
                {'selector': 'table', 'props': [('width', '90%'), ('margin-left', 'auto'), ('margin-right', 'auto'), ('border-collapse', 'collapse')]},
                {'selector': 'td, th', 'props': [('border', '1px solid #666')]},
            ]
        )
        .set_properties(
            **{'border-color': 'darkgray', 'border-style': 'solid', 'border-width': '1px'}
        )
        .set_table_attributes(
            f'style="height:auto; overflow:auto; max-height:{max_altura}; display:block;"'  
        )
        .format(
            precision=casas_decimais  
        )
    )

### Base de dados

In [2]:
object_columns = ['Protocolo_S2iD', 'Nome_Municipio', 'Sigla_UF', 'regiao',
                  'Setores Censitários', 'Status', 'DH_Descricao', 'DM_Descricao',
                  'DA_Descricao', 'DA_Polui/cont da água', 'DA_Polui/cont do ar',
                  'DA_Polui/cont do solo', 'DA_Dimi/exauri hídrico',
                  "DA_Incêndi parques/APA's/APP's", 'PEPL_Descricao', 'PEPR_Descricao',
                  'Categoria', 'Grupo', 'Subgrupo', 'Tipo', 'Subtipo']

dtype = {col: 'object' for col in object_columns}

df_eventos = pd.read_csv(
    "https://raw.githubusercontent.com/brunagmoura/PrevisorReconhecimento/refs/heads/main/df_eventos_desastres_rec_nrec.csv",
    sep=';',
    dtype = dtype,
    decimal=',',
)

estilo_tabelas(df_eventos.head(5))

Unnamed: 0,Protocolo_S2iD,Nome_Municipio,Sigla_UF,regiao,Data_Registro,Data_Evento,codigo_ibge,Setores Censitários,Status,DH_Descricao,DH_MORTOS,DH_FERIDOS,DH_ENFERMOS,DH_DESABRIGADOS,DH_DESALOJADOS,DH_DESAPARECIDOS,DH_OUTROS AFETADOS,DH_total_danos_humanos,DM_Descricao,DM_Uni Habita Danificadas,DM_Uni Habita Destruidas,DM_Uni Habita Valor,DM_Inst Saúde Danificadas,DM_Inst Saúde Destruidas,DM_Inst Saúde Valor,DM_Inst Ensino Danificadas,DM_Inst Ensino Destruidas,DM_Inst Ensino Valor,DM_Inst Serviços Danificadas,DM_Inst Serviços Destruidas,DM_Inst Serviços Valor,DM_Inst Comuni Danificadas,DM_Inst Comuni Destruidas,DM_Inst Comuni Valor,DM_Obras de Infra Danificadas,DM_Obras de Infra Destruidas,DM_Obras de Infra Valor,DM_total_danos_materiais,DA_Descricao,DA_Polui/cont da água,DA_Polui/cont do ar,DA_Polui/cont do solo,DA_Dimi/exauri hídrico,DA_Incêndi parques/APA's/APP's,PEPL_Descricao,PEPL_Assis_méd e emergên(R$),PEPL_Abast de água pot(R$),PEPL_sist de esgotos sanit(R$),PEPL_Sis limp e rec lixo (R$),PEPL_Sis cont pragas (R$),PEPL_distrib energia (R$),PEPL_Telecomunicações (R$),PEPL_Tran loc/reg/l_curso (R$),PEPL_Distrib combustíveis(R$),PEPL_Segurança pública (R$),PEPL_Ensino (R$),PEPL_total_publico,PEPR_Descricao,PEPR_Agricultura (R$),PEPR_Pecuária (R$),PEPR_Indústria (R$),PEPR_Comércio (R$),PEPR_Serviços (R$),PEPR_total_privado,PE_PLePR,Ano_Evento,Empenhado,DensidadePop,Area,Município - UF,PIB,DOMICILIO_AREARURAL,PDEFAGUA,PDEFESGOTO,PDEFLIXO,PDEFSAN,QTDE_FAMILIAS_ATUALIZADAS,Categoria,Grupo,Subgrupo,Tipo,Subtipo,COBRADE,Pop
0,SP-A-3550001-12200-20100101,São Luiz do Paraitinga,SP,Sudeste,2010-01-01,2010-01-01,3550001,,Reconhecido,,0,0,0,93,4030,0,16,4139,,49,20,20643486.932,3.0,0,1121928.638,9.0,1,5235666.976,0,0,0.0,3,3,32411.272,25.0,0.0,65321178.458,92354672.275,,,,,,,,7978.159,673157.183,623293.688,2493.175,0.0,330993.88,3739.762,58609.552,0.0,0.0,1087024.191,2787289.589,,1642902.433,1149353.56,379785.31,0.0,24433.113,3196474.416,5983764.005,2010,,,,São Luiz do Paraitinga - SP,86448.0,,,,,,,Natural,Hidrológico,Enxurradas,,,12200,10397.0
1,SP-A-3518305-11321-20100101,Guararema,SP,Sudeste,2010-01-01,2010-01-01,3518305,,Reconhecido,,4,2,0,66,417,0,14430,14919,,161,91,18604069.987,0.0,0,0.0,2.0,0,24931.748,0,0,0.0,0,0,37397.621,9000.0,0.0,57771845.314,76438244.669,,,,,,,,0.0,179508.582,264276.524,12465.874,0.0,349044.465,74795.243,134631.437,0.0,0.0,77288.417,1092010.541,,441291.931,2049389.645,0.0,0.0,112192.889,2602874.464,3694885.005,2010,,,,Guararema - SP,1111405.0,,,,,,,Natural,Geológico,Movimento de massa,Deslizamentos,Deslizamentos de solo e ou rocha,11321,25844.0
2,BA-A-2905909-14110-20100101,Campo Alegre de Lourdes,BA,Nordeste,2010-01-01,2010-01-01,2905909,,Reconhecido,,0,0,0,0,0,0,0,0,,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,,,,,,,,747.952,448771.455,0.0,0.0,0.0,0.0,0.0,249317.475,0.0,0.0,177015.407,875852.29,,254303.825,147097.31,0.0,0.0,0.0,401401.135,1277253.425,2010,,9.638,2914.587,Campo Alegre de Lourdes - BA,107459.0,,,,,,,Natural,Climatológico,Seca,Estiagem,,14110,28090.0
3,SP-A-3513603-12100-20100101,Cunha,SP,Sudeste,2010-01-01,2010-01-01,3513603,,Reconhecido,,6,0,0,38,492,0,12000,12536,,89,9,3340854.165,0.0,0,0.0,0.0,0,0.0,0,0,0.0,0,0,0.0,3.0,400.0,8052097.614,11392951.779,,,,,,,,0.0,1246.587,0.0,0.0,0.0,1994539.8,0.0,125656.007,0.0,0.0,0.0,2121442.395,,49863.495,1383711.986,0.0,0.0,925466.467,2359041.949,4480484.344,2010,,15.538,1407.25,Cunha - SP,112225.0,,,,,,,Natural,Hidrológico,Inundações,,,12100,21866.0
4,BA-P-2917334-12200-20100101,Iuiu,BA,Nordeste,2010-01-01,2010-01-01,2917334,,Reconhecido,,0,0,0,0,0,0,0,0,,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2010,,7.147,1525.142,Iuiu - BA,50315.0,,,,,,,Natural,Hidrológico,Enxurradas,,,12200,10900.0


### Exclusão de variáveis

#### Exclusão de variáveis que não serão adicionadas ao modelo

As variáveis "Protocolo_S2iD", 'Nome_Municipio', 'Data_Registro', 'Data_Evento', 'Município - UF' não adicionam informações relevantes ao modelo.

As variáveis 'DH_Descricao', 'DM_Descricao', 'DA_Descricao', 'PEPL_Descricao', 'PEPR_Descricao' se referem à descrição dos danos informados pelos municípios e serão tratados em um modelo separado de NLP.

As variáveis 'Categoria', 'Grupo', 'Subgrupo', 'Tipo', 'Subtipo' são originadas da variável "COBRADE", a qual será utilizada para representar essas informações.

As variáveis 'Area', 'PIB' e 'Pop' foram substituídas pelos índices "Rendapercapita" (PIB/População) e "DensidadePop" (Área/População).

A variável PE_PLePR (prejuízo econômico do setor público e do setor privado) foi dividida entre "Prejuízo econômico do setor público" (PEPL_total_publico) e "Prejuízo econômico do setor privado" (PEPR_total_privado).

In [3]:
#Criar a variável Rendapercapita como proxy para a renda do município

df_eventos['Rendapercapita'] = df_eventos['PIB'] / df_eventos['Pop']

#Excluir variáveis que não serão utilizadas nos modelos

df_eventos = df_eventos.drop(
    ['Protocolo_S2iD', 'Nome_Municipio', 'Data_Registro', 'Data_Evento', 'Município - UF', 'codigo_ibge', 'DH_Descricao', 'DM_Descricao', 'DA_Descricao', 'PEPL_Descricao', 'PEPR_Descricao',
     'Categoria', 'Grupo', 'Subgrupo', 'Tipo', 'Subtipo', 'Setores Censitários', 'Area', 'PIB', 'Pop', 'PE_PLePR'], axis=1)

#### Exclusão das variáveis com mais de 50% de dados ausentes

Na base de dados há variáveis que são ausentes em mais de 50% das variáveis, conforme capítulo "Análise exploratória de dados - Dados ausentes".

In [4]:
df_na_counts = df_eventos.isna().sum().reset_index().rename(columns={0: 'Qtde. dados ausentes', 'index': 'Variável'})
df_na_counts['% de dados ausentes'] = (df_na_counts['Qtde. dados ausentes'] / len(df_eventos)) * 100
colunas_mantidas = df_na_counts[df_na_counts['% de dados ausentes'] < 50]['Variável']
df_eventos = df_eventos[colunas_mantidas]
print("Colunas mantidas: ", df_eventos.columns)

Colunas mantidas:  Index(['Sigla_UF', 'regiao', 'Status', 'DH_MORTOS', 'DH_FERIDOS',
       'DH_ENFERMOS', 'DH_DESABRIGADOS', 'DH_DESALOJADOS', 'DH_DESAPARECIDOS',
       'DH_OUTROS AFETADOS', 'DH_total_danos_humanos',
       'DM_Uni Habita Danificadas', 'DM_Uni Habita Destruidas',
       'DM_Uni Habita Valor', 'DM_Inst Saúde Danificadas',
       'DM_Inst Saúde Destruidas', 'DM_Inst Saúde Valor',
       'DM_Inst Ensino Danificadas', 'DM_Inst Ensino Destruidas',
       'DM_Inst Ensino Valor', 'DM_Inst Serviços Danificadas',
       'DM_Inst Serviços Destruidas', 'DM_Inst Serviços Valor',
       'DM_Inst Comuni Danificadas', 'DM_Inst Comuni Destruidas',
       'DM_Inst Comuni Valor', 'DM_Obras de Infra Danificadas',
       'DM_Obras de Infra Destruidas', 'DM_Obras de Infra Valor',
       'DM_total_danos_materiais', 'PEPL_Assis_méd e emergên(R$)',
       'PEPL_Abast de água pot(R$)', 'PEPL_sist de esgotos sanit(R$)',
       'PEPL_Sis limp e rec lixo (R$)', 'PEPL_Sis cont pragas (R$)',
    

### Encoding das variáveis categóricas

#### One hot enconding

Aplicação do "one hot encoding" nas variáveis "Sigla_UF", "regiao" e "COBRADE". 

In [5]:
encoder = OneHotEncoder(sparse_output=False, drop=None)  
categorias = ['Sigla_UF', 'regiao', 'COBRADE']
one_hot_encoded = encoder.fit_transform(df_eventos[categorias])

one_hot_df = pd.DataFrame(one_hot_encoded, columns=encoder.get_feature_names_out(categorias))

df_eventos = pd.concat([df_eventos.drop(columns=categorias), one_hot_df], axis=1)

#### Enconding da variável dependente "Status"

 Atribuição do valor 0 para o Status "Reconhecido" e "1" para o Status "Não reconhecido".

In [6]:
df_eventos['Status'] = df_eventos['Status'].map({'Reconhecido': 0, 'Não reconhecido': 1})

In [7]:
df_eventos.head(5)

Unnamed: 0,Status,DH_MORTOS,DH_FERIDOS,DH_ENFERMOS,DH_DESABRIGADOS,DH_DESALOJADOS,DH_DESAPARECIDOS,DH_OUTROS AFETADOS,DH_total_danos_humanos,DM_Uni Habita Danificadas,DM_Uni Habita Destruidas,DM_Uni Habita Valor,DM_Inst Saúde Danificadas,DM_Inst Saúde Destruidas,DM_Inst Saúde Valor,DM_Inst Ensino Danificadas,DM_Inst Ensino Destruidas,DM_Inst Ensino Valor,DM_Inst Serviços Danificadas,DM_Inst Serviços Destruidas,DM_Inst Serviços Valor,DM_Inst Comuni Danificadas,DM_Inst Comuni Destruidas,DM_Inst Comuni Valor,DM_Obras de Infra Danificadas,DM_Obras de Infra Destruidas,DM_Obras de Infra Valor,DM_total_danos_materiais,PEPL_Assis_méd e emergên(R$),PEPL_Abast de água pot(R$),PEPL_sist de esgotos sanit(R$),PEPL_Sis limp e rec lixo (R$),PEPL_Sis cont pragas (R$),PEPL_distrib energia (R$),PEPL_Telecomunicações (R$),PEPL_Tran loc/reg/l_curso (R$),PEPL_Distrib combustíveis(R$),PEPL_Segurança pública (R$),PEPL_Ensino (R$),PEPL_total_publico,PEPR_Agricultura (R$),PEPR_Pecuária (R$),PEPR_Indústria (R$),PEPR_Comércio (R$),PEPR_Serviços (R$),PEPR_total_privado,Ano_Evento,DensidadePop,DOMICILIO_AREARURAL,PDEFAGUA,PDEFESGOTO,PDEFLIXO,PDEFSAN,QTDE_FAMILIAS_ATUALIZADAS,Rendapercapita,Sigla_UF_AC,Sigla_UF_AL,Sigla_UF_AM,Sigla_UF_AP,Sigla_UF_BA,Sigla_UF_CE,Sigla_UF_DF,Sigla_UF_ES,Sigla_UF_GO,Sigla_UF_MA,Sigla_UF_MG,Sigla_UF_MS,Sigla_UF_MT,Sigla_UF_PA,Sigla_UF_PB,Sigla_UF_PE,Sigla_UF_PI,Sigla_UF_PR,Sigla_UF_RJ,Sigla_UF_RN,Sigla_UF_RO,Sigla_UF_RR,Sigla_UF_RS,Sigla_UF_SC,Sigla_UF_SE,Sigla_UF_SP,Sigla_UF_TO,Sigla_UF_pa,regiao_Centro-oeste,regiao_Nordeste,regiao_Norte,regiao_Sudeste,regiao_Sul,COBRADE_11110,COBRADE_11120,COBRADE_11311,COBRADE_11312,COBRADE_11313,COBRADE_11321,COBRADE_11331,COBRADE_11332,COBRADE_11340,COBRADE_11410,COBRADE_11420,COBRADE_11431,COBRADE_11432,COBRADE_11433,COBRADE_12100,COBRADE_12200,COBRADE_12300,COBRADE_13111,COBRADE_13112,COBRADE_13120,COBRADE_13211,COBRADE_13212,COBRADE_13213,COBRADE_13214,COBRADE_13215,COBRADE_13310,COBRADE_13321,COBRADE_13322,COBRADE_14110,COBRADE_14120,COBRADE_14131,COBRADE_14132,COBRADE_14140,COBRADE_15110,COBRADE_15120,COBRADE_15130,COBRADE_15210,COBRADE_15230,COBRADE_22210,COBRADE_22220,COBRADE_23120,COBRADE_24100,COBRADE_24200,COBRADE_25100,COBRADE_25500
0,0,0,0,0,93,4030,0,16,4139,49,20,20643490.0,3.0,0,1121929.0,9.0,1,5235667.0,0,0,0.0,3,3,32411.271754,25.0,0.0,65321180.0,92354670.0,7978.159201,673157.18258,623293.687574,2493.17475,0.0,330993.9,3739.762125,58609.55203,0.0,0.0,1087024.0,2787290.0,1642902.0,1149354.0,379785.309713,0.0,24433.112553,3196474.0,2010,,,,,,,,8.314706,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0,4,2,0,66,417,0,14430,14919,161,91,18604070.0,0.0,0,0.0,2.0,0,24931.75,0,0,0.0,0,0,37397.621254,9000.0,0.0,57771850.0,76438240.0,0.0,179508.582021,264276.523531,12465.873751,0.0,349044.5,74795.242509,134631.436516,0.0,0.0,77288.42,1092011.0,441291.9,2049390.0,0.0,0.0,112192.888695,2602874.0,2010,,,,,,,,43.004372,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,747.952425,448771.455053,0.0,0.0,0.0,0.0,0.0,249317.47503,0.0,0.0,177015.4,875852.3,254303.8,147097.3,0.0,0.0,0.0,401401.1,2010,9.637729,,,,,,,3.825525,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0,6,0,0,38,492,0,12000,12536,89,9,3340854.0,0.0,0,0.0,0.0,0,0.0,0,0,0.0,0,0,0.0,3.0,400.0,8052098.0,11392950.0,0.0,1246.587375,0.0,0.0,0.0,1994540.0,0.0,125656.007415,0.0,0.0,0.0,2121442.0,49863.5,1383712.0,0.0,0.0,925466.46731,2359042.0,2010,15.538106,,,,,,,5.132397,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2010,7.146876,,,,,,,4.616055,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Shuffle das variáveis

O objetivo é evitar a ordenação no treino.

In [8]:
df_eventos = df_eventos.sample(frac=1, random_state=1).reset_index(drop=True)

### Separar a base de dados em dois períodos

A base de dados utilizada apresenta uma particularidade quanto à disponibilidade das informações. 

Os dados referentes à situação das famílias em risco (CadÚnico) estão acessíveis apenas a partir de 2018. Por sua vez, os dados do produto interno bruto dos municípios, utilizados para calcular a renda per capita, estão disponíveis apenas até 2021.

Para evitar a exclusão dessas variáveis devido aos dados ausentes nos períodos anteriores a esse ano, optamos por dividir a base em duas partes para os testes dos modelos de classificação.

A base "pré-processado 1" abrange o período de 2010 a 2017 e não inclui as variáveis relacionadas ao CadÚnico. Em contrapartida, a base "pré-processado 2" cobre o período de 2018 a 2024 e incorpora as variáveis do CadÚnico, mas exclui a variável "Rendapercapita".

In [9]:
df_eventos_2010_2017 = df_eventos[df_eventos['Ano_Evento'] <= 2017]
cadunico = ['DOMICILIO_AREARURAL', 'PDEFAGUA', 'PDEFESGOTO', 'PDEFLIXO', 'PDEFSAN', 'QTDE_FAMILIAS_ATUALIZADAS']
df_eventos_2010_2017 = df_eventos_2010_2017.drop(columns=cadunico)

print("Tamanho da base (2010 a 2017): ", df_eventos_2010_2017.shape)
print("Dados ausentes na base (2010 a 2017) \n", df_eventos_2010_2017.isna().sum())

Tamanho da base (2010 a 2017):  (13047, 127)
Dados ausentes na base (2010 a 2017) 
 Status                               0
DH_MORTOS                            0
DH_FERIDOS                           0
DH_ENFERMOS                          0
DH_DESABRIGADOS                      0
DH_DESALOJADOS                       0
DH_DESAPARECIDOS                     0
DH_OUTROS AFETADOS                   0
DH_total_danos_humanos               0
DM_Uni Habita Danificadas            0
DM_Uni Habita Destruidas             0
DM_Uni Habita Valor                  0
DM_Inst Saúde Danificadas            0
DM_Inst Saúde Destruidas             0
DM_Inst Saúde Valor                  0
DM_Inst Ensino Danificadas           0
DM_Inst Ensino Destruidas            0
DM_Inst Ensino Valor                 0
DM_Inst Serviços Danificadas         0
DM_Inst Serviços Destruidas          0
DM_Inst Serviços Valor               0
DM_Inst Comuni Danificadas           0
DM_Inst Comuni Destruidas            0
DM_Inst Comuni Valo

In [10]:
df_eventos_2018_2024 = df_eventos[(df_eventos['Ano_Evento'] >= 2018)]
df_eventos_2018_2024 = df_eventos_2018_2024.drop(columns="Rendapercapita")

print("Tamanho da base (2018 a 2024): ", df_eventos_2018_2024.shape)
print("Dados ausentes na base (2018 a 2024) \n", df_eventos_2018_2024.isna().sum())

Tamanho da base (2018 a 2024):  (13211, 132)
Dados ausentes na base (2018 a 2024) 
 Status                               0
DH_MORTOS                            0
DH_FERIDOS                           0
DH_ENFERMOS                          0
DH_DESABRIGADOS                      0
DH_DESALOJADOS                       0
DH_DESAPARECIDOS                     0
DH_OUTROS AFETADOS                   0
DH_total_danos_humanos               0
DM_Uni Habita Danificadas            0
DM_Uni Habita Destruidas             0
DM_Uni Habita Valor                  0
DM_Inst Saúde Danificadas            0
DM_Inst Saúde Destruidas             0
DM_Inst Saúde Valor                  0
DM_Inst Ensino Danificadas           0
DM_Inst Ensino Destruidas            0
DM_Inst Ensino Valor                 0
DM_Inst Serviços Danificadas         0
DM_Inst Serviços Destruidas          0
DM_Inst Serviços Valor               0
DM_Inst Comuni Danificadas           0
DM_Inst Comuni Destruidas            0
DM_Inst Comuni Valo

### Separar as bases de dados em dois modelos
As bases de dados geradas contêm variáveis derivadas umas das outras (por exemplo, a variável "DH_total_danos_humanos" é a soma das demais variáveis relacionadas a danos humanos — mais detalhes estão disponíveis na página de introdução, onde essas variáveis são descritas).

Para possibilitar a realização dos testes de maneira adequada, foram criadas duas bases de dados distintas.

A primeira base, utilizada no modelo 1, considera variáveis agregadas, sendo elas: região; DH_total_danos_humanos; DM_total_danos_materiais; PEPL_total_publico; PEPR_total_privado; Empenhado; DensidadePop; Rendapercapita; COBRADE.

Já a segunda base, usada no modelo 2, trabalha com variáveis individualizadas, incluindo: UF; todas as variáveis "DH_" (exceto DH_total_danos_humanos); todas as variáveis "DM_" (exceto DM_total_danos_materiais); todas as variáveis "PEPL_" (exceto PEPL_total_publico); todas as variáveis "PEPR_" (exceto PEPR_total_privado); Empenhado; DensidadePop; Rendapercapita; COBRADE.



In [11]:
colunas_modelo1 = [
    'Status',
    'DH_total_danos_humanos',
    'DM_total_danos_materiais',
    'PEPL_total_publico',
    'PEPR_total_privado',
    'regiao_Centro-oeste',
    'regiao_Nordeste',
    'regiao_Norte',
    'regiao_Sudeste',
    'regiao_Sul',
    'COBRADE_11110',
    'COBRADE_11120',
    'COBRADE_11311',
    'COBRADE_11312',
    'COBRADE_11313',
    'COBRADE_11321',
    'COBRADE_11331',
    'COBRADE_11332',
    'COBRADE_11340',
    'COBRADE_11410',
    'COBRADE_11420',
    'COBRADE_11431',
    'COBRADE_11432',
    'COBRADE_11433',
    'COBRADE_12100',
    'COBRADE_12200',
    'COBRADE_12300',
    'COBRADE_13111',
    'COBRADE_13112',
    'COBRADE_13120',
    'COBRADE_13211',
    'COBRADE_13212',
    'COBRADE_13213',
    'COBRADE_13214',
    'COBRADE_13215',
    'COBRADE_13310',
    'COBRADE_13321',
    'COBRADE_13322',
    'COBRADE_14110',
    'COBRADE_14120',
    'COBRADE_14131',
    'COBRADE_14132',
    'COBRADE_14140',
    'COBRADE_15110',
    'COBRADE_15120',
    'COBRADE_15130',
    'COBRADE_15210',
    'COBRADE_15230',
    'COBRADE_22210',
    'COBRADE_22220',
    'COBRADE_23120',
    'COBRADE_24100',
    'COBRADE_24200',
    'COBRADE_25100',
    'COBRADE_25500'
]

colunas_CadUnico = [
    'DOMICILIO_AREARURAL',
    'PDEFAGUA',
    'PDEFESGOTO',
    'PDEFLIXO',
    'PDEFSAN',
    'QTDE_FAMILIAS_ATUALIZADAS'
]

colunas_Renda = [
    'Rendapercapita'
]

df_eventos_2010_2017_modelo1 = df_eventos_2010_2017[colunas_modelo1 + colunas_Renda]

df_eventos_2018_2024_modelo1 = df_eventos_2018_2024[colunas_modelo1 + colunas_CadUnico]


Modelo 1 e período 2010 - 2017

In [12]:
print("Dimensões da base de dados 2010-2017, modelo 1: ", df_eventos_2010_2017_modelo1.shape)
estilo_tabelas(df_eventos_2010_2017_modelo1.head(5))

Dimensões da base de dados 2010-2017, modelo 1:  (13047, 56)


Unnamed: 0,Status,DH_total_danos_humanos,DM_total_danos_materiais,PEPL_total_publico,PEPR_total_privado,regiao_Centro-oeste,regiao_Nordeste,regiao_Norte,regiao_Sudeste,regiao_Sul,COBRADE_11110,COBRADE_11120,COBRADE_11311,COBRADE_11312,COBRADE_11313,COBRADE_11321,COBRADE_11331,COBRADE_11332,COBRADE_11340,COBRADE_11410,COBRADE_11420,COBRADE_11431,COBRADE_11432,COBRADE_11433,COBRADE_12100,COBRADE_12200,COBRADE_12300,COBRADE_13111,COBRADE_13112,COBRADE_13120,COBRADE_13211,COBRADE_13212,COBRADE_13213,COBRADE_13214,COBRADE_13215,COBRADE_13310,COBRADE_13321,COBRADE_13322,COBRADE_14110,COBRADE_14120,COBRADE_14131,COBRADE_14132,COBRADE_14140,COBRADE_15110,COBRADE_15120,COBRADE_15130,COBRADE_15210,COBRADE_15230,COBRADE_22210,COBRADE_22220,COBRADE_23120,COBRADE_24100,COBRADE_24200,COBRADE_25100,COBRADE_25500,Rendapercapita
5,0,4000,166542.286,6120428.994,7286224.972,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,17.934
8,0,8034,0.0,0.0,7639477.192,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.605
11,0,2732,0.0,0.0,26406.49,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13.655
13,0,0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.997
17,0,0,2048392.375,2617.833,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.594


Modelo 1 e período 2018 - 2024

In [13]:
print("Dimensões da base de dados 2018-2024, modelo 1: ", df_eventos_2018_2024_modelo1.shape)
estilo_tabelas(df_eventos_2018_2024_modelo1.head(5))

Dimensões da base de dados 2018-2024, modelo 1:  (13211, 61)


Unnamed: 0,Status,DH_total_danos_humanos,DM_total_danos_materiais,PEPL_total_publico,PEPR_total_privado,regiao_Centro-oeste,regiao_Nordeste,regiao_Norte,regiao_Sudeste,regiao_Sul,COBRADE_11110,COBRADE_11120,COBRADE_11311,COBRADE_11312,COBRADE_11313,COBRADE_11321,COBRADE_11331,COBRADE_11332,COBRADE_11340,COBRADE_11410,COBRADE_11420,COBRADE_11431,COBRADE_11432,COBRADE_11433,COBRADE_12100,COBRADE_12200,COBRADE_12300,COBRADE_13111,COBRADE_13112,COBRADE_13120,COBRADE_13211,COBRADE_13212,COBRADE_13213,COBRADE_13214,COBRADE_13215,COBRADE_13310,COBRADE_13321,COBRADE_13322,COBRADE_14110,COBRADE_14120,COBRADE_14131,COBRADE_14132,COBRADE_14140,COBRADE_15110,COBRADE_15120,COBRADE_15130,COBRADE_15210,COBRADE_15230,COBRADE_22210,COBRADE_22220,COBRADE_23120,COBRADE_24100,COBRADE_24200,COBRADE_25100,COBRADE_25500,DOMICILIO_AREARURAL,PDEFAGUA,PDEFESGOTO,PDEFLIXO,PDEFSAN,QTDE_FAMILIAS_ATUALIZADAS
0,0,11650,0.0,132466.926,861035.02,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.305,0.191,0.559,0.167,0.091,383.0
1,0,2548,0.0,103709.315,6509159.048,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.632,0.246,0.456,0.175,0.07,57.0
2,1,336,2296093.386,1928129.702,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.708,0.691,0.851,0.691,0.576,356.0
3,0,420,9811880.0,8974880.0,29950000.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.187,0.098,0.992,0.139,0.077,632.0
4,0,13245,0.0,286229.997,1353851.954,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.242,0.157,0.187,0.063,0.028,2231.0


In [14]:
colunas_remover_modelo2 = [
    'DH_total_danos_humanos',
    'DM_total_danos_materiais',
    'PEPL_total_publico',
    'PEPR_total_privado',
    'regiao_Centro-oeste',
    'regiao_Nordeste',
    'regiao_Norte',
    'regiao_Sudeste',
    'regiao_Sul',
    'Ano_Evento']

# Para df_eventos_2010_2017_modelo2
df_eventos_2010_2017_modelo2 = df_eventos_2010_2017.drop(columns=colunas_remover_modelo2)

# Para df_eventos_2018_2024_modelo2
df_eventos_2018_2024_modelo2 = df_eventos_2018_2024.drop(columns=colunas_remover_modelo2)

Modelo 2 e período 2010 - 2017

In [15]:
print("Dimensões da base de dados 2010-2017, modelo 2: ", df_eventos_2010_2017_modelo2.shape)
estilo_tabelas(df_eventos_2010_2017_modelo2.head(5))

Dimensões da base de dados 2010-2017, modelo 2:  (13047, 117)


Unnamed: 0,Status,DH_MORTOS,DH_FERIDOS,DH_ENFERMOS,DH_DESABRIGADOS,DH_DESALOJADOS,DH_DESAPARECIDOS,DH_OUTROS AFETADOS,DM_Uni Habita Danificadas,DM_Uni Habita Destruidas,DM_Uni Habita Valor,DM_Inst Saúde Danificadas,DM_Inst Saúde Destruidas,DM_Inst Saúde Valor,DM_Inst Ensino Danificadas,DM_Inst Ensino Destruidas,DM_Inst Ensino Valor,DM_Inst Serviços Danificadas,DM_Inst Serviços Destruidas,DM_Inst Serviços Valor,DM_Inst Comuni Danificadas,DM_Inst Comuni Destruidas,DM_Inst Comuni Valor,DM_Obras de Infra Danificadas,DM_Obras de Infra Destruidas,DM_Obras de Infra Valor,PEPL_Assis_méd e emergên(R$),PEPL_Abast de água pot(R$),PEPL_sist de esgotos sanit(R$),PEPL_Sis limp e rec lixo (R$),PEPL_Sis cont pragas (R$),PEPL_distrib energia (R$),PEPL_Telecomunicações (R$),PEPL_Tran loc/reg/l_curso (R$),PEPL_Distrib combustíveis(R$),PEPL_Segurança pública (R$),PEPL_Ensino (R$),PEPR_Agricultura (R$),PEPR_Pecuária (R$),PEPR_Indústria (R$),PEPR_Comércio (R$),PEPR_Serviços (R$),DensidadePop,Rendapercapita,Sigla_UF_AC,Sigla_UF_AL,Sigla_UF_AM,Sigla_UF_AP,Sigla_UF_BA,Sigla_UF_CE,Sigla_UF_DF,Sigla_UF_ES,Sigla_UF_GO,Sigla_UF_MA,Sigla_UF_MG,Sigla_UF_MS,Sigla_UF_MT,Sigla_UF_PA,Sigla_UF_PB,Sigla_UF_PE,Sigla_UF_PI,Sigla_UF_PR,Sigla_UF_RJ,Sigla_UF_RN,Sigla_UF_RO,Sigla_UF_RR,Sigla_UF_RS,Sigla_UF_SC,Sigla_UF_SE,Sigla_UF_SP,Sigla_UF_TO,Sigla_UF_pa,COBRADE_11110,COBRADE_11120,COBRADE_11311,COBRADE_11312,COBRADE_11313,COBRADE_11321,COBRADE_11331,COBRADE_11332,COBRADE_11340,COBRADE_11410,COBRADE_11420,COBRADE_11431,COBRADE_11432,COBRADE_11433,COBRADE_12100,COBRADE_12200,COBRADE_12300,COBRADE_13111,COBRADE_13112,COBRADE_13120,COBRADE_13211,COBRADE_13212,COBRADE_13213,COBRADE_13214,COBRADE_13215,COBRADE_13310,COBRADE_13321,COBRADE_13322,COBRADE_14110,COBRADE_14120,COBRADE_14131,COBRADE_14132,COBRADE_14140,COBRADE_15110,COBRADE_15120,COBRADE_15130,COBRADE_15210,COBRADE_15230,COBRADE_22210,COBRADE_22220,COBRADE_23120,COBRADE_24100,COBRADE_24200,COBRADE_25100,COBRADE_25500
5,0,0,0,0,0,0,0,4000,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0,0,0.0,0,0,0.0,8.0,2.0,166542.286,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6078793.423,0.0,0.0,41635.571,5204446.424,0.0,0.0,0.0,2081778.549,60.642,17.934,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0,0,0,0,0,0,0,8034,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3395323.196,4244153.995,0.0,0.0,0.0,,5.605,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
11,0,0,0,0,0,0,0,2732,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,26406.49,0.0,2.079,13.655,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,19.773,5.997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0,0,0.0,0,0,0.0,13.0,2.0,2048392.375,2617.833,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,10.594,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Modelo 2 e período 2018 - 2024

In [16]:
print("Dimensões da base de dados 2018-2024, modelo 2: ", df_eventos_2018_2024_modelo2.shape)
estilo_tabelas(df_eventos_2018_2024_modelo2.head(5))

Dimensões da base de dados 2018-2024, modelo 2:  (13211, 122)


Unnamed: 0,Status,DH_MORTOS,DH_FERIDOS,DH_ENFERMOS,DH_DESABRIGADOS,DH_DESALOJADOS,DH_DESAPARECIDOS,DH_OUTROS AFETADOS,DM_Uni Habita Danificadas,DM_Uni Habita Destruidas,DM_Uni Habita Valor,DM_Inst Saúde Danificadas,DM_Inst Saúde Destruidas,DM_Inst Saúde Valor,DM_Inst Ensino Danificadas,DM_Inst Ensino Destruidas,DM_Inst Ensino Valor,DM_Inst Serviços Danificadas,DM_Inst Serviços Destruidas,DM_Inst Serviços Valor,DM_Inst Comuni Danificadas,DM_Inst Comuni Destruidas,DM_Inst Comuni Valor,DM_Obras de Infra Danificadas,DM_Obras de Infra Destruidas,DM_Obras de Infra Valor,PEPL_Assis_méd e emergên(R$),PEPL_Abast de água pot(R$),PEPL_sist de esgotos sanit(R$),PEPL_Sis limp e rec lixo (R$),PEPL_Sis cont pragas (R$),PEPL_distrib energia (R$),PEPL_Telecomunicações (R$),PEPL_Tran loc/reg/l_curso (R$),PEPL_Distrib combustíveis(R$),PEPL_Segurança pública (R$),PEPL_Ensino (R$),PEPR_Agricultura (R$),PEPR_Pecuária (R$),PEPR_Indústria (R$),PEPR_Comércio (R$),PEPR_Serviços (R$),DensidadePop,DOMICILIO_AREARURAL,PDEFAGUA,PDEFESGOTO,PDEFLIXO,PDEFSAN,QTDE_FAMILIAS_ATUALIZADAS,Sigla_UF_AC,Sigla_UF_AL,Sigla_UF_AM,Sigla_UF_AP,Sigla_UF_BA,Sigla_UF_CE,Sigla_UF_DF,Sigla_UF_ES,Sigla_UF_GO,Sigla_UF_MA,Sigla_UF_MG,Sigla_UF_MS,Sigla_UF_MT,Sigla_UF_PA,Sigla_UF_PB,Sigla_UF_PE,Sigla_UF_PI,Sigla_UF_PR,Sigla_UF_RJ,Sigla_UF_RN,Sigla_UF_RO,Sigla_UF_RR,Sigla_UF_RS,Sigla_UF_SC,Sigla_UF_SE,Sigla_UF_SP,Sigla_UF_TO,Sigla_UF_pa,COBRADE_11110,COBRADE_11120,COBRADE_11311,COBRADE_11312,COBRADE_11313,COBRADE_11321,COBRADE_11331,COBRADE_11332,COBRADE_11340,COBRADE_11410,COBRADE_11420,COBRADE_11431,COBRADE_11432,COBRADE_11433,COBRADE_12100,COBRADE_12200,COBRADE_12300,COBRADE_13111,COBRADE_13112,COBRADE_13120,COBRADE_13211,COBRADE_13212,COBRADE_13213,COBRADE_13214,COBRADE_13215,COBRADE_13310,COBRADE_13321,COBRADE_13322,COBRADE_14110,COBRADE_14120,COBRADE_14131,COBRADE_14132,COBRADE_14140,COBRADE_15110,COBRADE_15120,COBRADE_15130,COBRADE_15210,COBRADE_15230,COBRADE_22210,COBRADE_22220,COBRADE_23120,COBRADE_24100,COBRADE_24200,COBRADE_25100,COBRADE_25500
0,0,0,0,0,0,0,0,11650,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,132466.926,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,264933.852,309089.494,58874.189,228137.484,0.0,134.493,0.305,0.191,0.559,0.167,0.091,383.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0,0,0,0,0,0,0,2548,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,103709.315,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5905738.84,603420.207,0.0,0.0,0.0,10.669,0.632,0.246,0.456,0.175,0.07,57.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1,0,180,0,0,156,0,0,112,0,515149.157,0.0,0,0.0,0.0,0,0.0,0,0,0.0,1,0,161904.021,16.0,0.0,1619040.208,161904.021,0.0,0.0,1766225.681,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.708,0.691,0.851,0.691,0.576,356.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0,0,0,0,0,60,0,360,0,0,210000.0,0.0,105,0.0,0.0,0,0.0,0,0,0.0,0,0,0.0,300.0,110.0,9601880.0,0.0,0.0,1124880.0,0.0,0.0,0.0,0.0,7850000.0,0.0,0.0,0.0,14800000.0,15150000.0,0.0,0.0,0.0,,0.187,0.098,0.992,0.139,0.077,632.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0,0,0,0,0,0,0,13245,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,286229.997,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1353851.954,0.0,0.0,0.0,0.0,81.646,0.242,0.157,0.187,0.063,0.028,2231.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Exportar bases de dados criadas

In [17]:
df_eventos_2010_2017_modelo1.to_csv('df_eventos_2010_2017_modelo1.csv',
                        index=False,
                        sep=';',
                        decimal=',')

df_eventos_2018_2024_modelo1.to_csv('df_eventos_2018_2024_modelo1.csv',
                        index=False,
                        sep=';',
                        decimal=',')

df_eventos_2018_2024_modelo2.to_csv('df_eventos_2018_2024_modelo2.csv',
                        index=False,
                        sep=';',
                        decimal=',')

df_eventos_2010_2017_modelo2.to_csv('df_eventos_2010_2017_modelo2.csv',
                        index=False,
                        sep=';',
                        decimal=',')