## Recupera os dados analisados e tratados, treina o modelo, fazendo a classificação dos acidentes aeronáuticos por período de ocorrência.

In [342]:
# importa bibliotecas
import pandas as pd
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

pd.set_option('display.max_rows', None) # permite exibição de todas as linhas
pd.set_option('display.max_columns', None) # permite exibição de todas as colunas
import warnings
warnings.simplefilter('ignore') # desativa avisos de warning

In [343]:
# carregando arquivo tratado em dataframe
df_acidentes_aero_trans = pd.read_csv('arquivos_tratados/df_acidentes_aero_trans.csv')

In [344]:
# conferindo a dimensão do dataframe, linhas e colunas
df_acidentes_aero_trans.shape

(6888, 238)

In [345]:
# ordenando pelo index o dataframe carregado
df_acidentes_aero_trans = df_acidentes_aero_trans.sort_index()

In [346]:
df_acidentes_aero_trans.head(5)

Unnamed: 0,ocorrencia_cidade,ocorrencia_uf,ocorrencia_aerodromo,aeronave_modelo,aeronave_ano_fabricacao,aeronave_voo_origem,aeronave_voo_destino,aeronave_fase_operacao,aeronave_fatalidades_total,ocorrencia_tipo,taxonomia_tipo_icao,fator_nome,fator_aspecto,fator_condicionante,fator_area,ocorrencia_classificacao,ocorrencia_saida_pista,aeronave_tipo_veiculo,aeronave_motor_tipo,aeronave_motor_quantidade,aeronave_tipo_operacao,aeronave_nivel_dano,periodo,ocorrencia_latitude,ocorrencia_longitude,aeronave_assentos,one-hot__ocorrencia_cidade_***,one-hot__ocorrencia_cidade_BELO HORIZONTE,one-hot__ocorrencia_cidade_BRASÍLIA,one-hot__ocorrencia_cidade_CAMPINAS,one-hot__ocorrencia_cidade_GUARULHOS,one-hot__ocorrencia_cidade_RIO DE JANEIRO,one-hot__ocorrencia_cidade_SÃO PAULO,one-hot__ocorrencia_uf_***,one-hot__ocorrencia_uf_AM,one-hot__ocorrencia_uf_BA,one-hot__ocorrencia_uf_CE,one-hot__ocorrencia_uf_DF,one-hot__ocorrencia_uf_ES,one-hot__ocorrencia_uf_GO,one-hot__ocorrencia_uf_MA,one-hot__ocorrencia_uf_MG,one-hot__ocorrencia_uf_MS,one-hot__ocorrencia_uf_MT,one-hot__ocorrencia_uf_PA,one-hot__ocorrencia_uf_PE,one-hot__ocorrencia_uf_PR,one-hot__ocorrencia_uf_RJ,one-hot__ocorrencia_uf_RS,one-hot__ocorrencia_uf_SC,one-hot__ocorrencia_uf_SP,one-hot__ocorrencia_aerodromo_***,one-hot__ocorrencia_aerodromo_**NI,one-hot__ocorrencia_aerodromo_SBBH,one-hot__ocorrencia_aerodromo_SBBR,one-hot__ocorrencia_aerodromo_SBCF,one-hot__ocorrencia_aerodromo_SBCT,one-hot__ocorrencia_aerodromo_SBEG,one-hot__ocorrencia_aerodromo_SBGL,one-hot__ocorrencia_aerodromo_SBGO,one-hot__ocorrencia_aerodromo_SBGR,one-hot__ocorrencia_aerodromo_SBJR,one-hot__ocorrencia_aerodromo_SBKP,one-hot__ocorrencia_aerodromo_SBLO,one-hot__ocorrencia_aerodromo_SBMT,one-hot__ocorrencia_aerodromo_SBPA,one-hot__ocorrencia_aerodromo_SBRF,one-hot__ocorrencia_aerodromo_SBRJ,one-hot__ocorrencia_aerodromo_SBSP,one-hot__ocorrencia_aerodromo_SBSV,one-hot__aeronave_modelo_***,one-hot__aeronave_modelo_737-8EH,one-hot__aeronave_modelo_A320-214,one-hot__aeronave_modelo_AB-115,one-hot__aeronave_modelo_ATR-72-212A,one-hot__aeronave_modelo_EMB-810C,one-hot__aeronave_modelo_EMB-810D,one-hot__aeronave_modelo_ERJ 190-200 IGW,one-hot__aeronave_ano_fabricacao_***,one-hot__aeronave_ano_fabricacao_0.0,one-hot__aeronave_ano_fabricacao_1900.0,one-hot__aeronave_ano_fabricacao_2007.0,one-hot__aeronave_ano_fabricacao_2008.0,one-hot__aeronave_ano_fabricacao_2009.0,one-hot__aeronave_ano_fabricacao_2010.0,one-hot__aeronave_ano_fabricacao_2011.0,one-hot__aeronave_ano_fabricacao_2012.0,one-hot__aeronave_voo_origem_***,one-hot__aeronave_voo_origem_CAMPO DE MARTE - SP,one-hot__aeronave_voo_origem_CARLOS DRUMMOND DE ANDRADE / PAMPULHA,one-hot__aeronave_voo_origem_CONGONHAS,one-hot__aeronave_voo_origem_DEPUTADO LUÍS EDUARDO MAGALHÃES,one-hot__aeronave_voo_origem_FORA DE AERODROMO,one-hot__aeronave_voo_origem_GOVERNADOR ANDRÉ FRANCO MONTORO,one-hot__aeronave_voo_origem_GUARARAPES - GILBERTO FREYRE,one-hot__aeronave_voo_origem_NÃO IDENTIFICADO,one-hot__aeronave_voo_origem_PRESIDENTE JUSCELINO KUBITSCHEK,one-hot__aeronave_voo_origem_SALGADO FILHO,one-hot__aeronave_voo_origem_SANTA GENOVEVA/GOIÂNIA,one-hot__aeronave_voo_origem_SANTOS DUMONT,one-hot__aeronave_voo_origem_TANCREDO NEVES,one-hot__aeronave_voo_origem_VIRACOPOS,one-hot__aeronave_voo_destino_***,one-hot__aeronave_voo_destino_CAMPO DE MARTE - SP,one-hot__aeronave_voo_destino_CONGONHAS,one-hot__aeronave_voo_destino_DEPUTADO LUÍS EDUARDO MAGALHÃES,one-hot__aeronave_voo_destino_FORA DE AERODROMO,one-hot__aeronave_voo_destino_GOVERNADOR ANDRÉ FRANCO MONTORO,one-hot__aeronave_voo_destino_GUARARAPES - GILBERTO FREYRE,one-hot__aeronave_voo_destino_NÃO IDENTIFICADO,one-hot__aeronave_voo_destino_PRESIDENTE JUSCELINO KUBITSCHEK,one-hot__aeronave_voo_destino_SALGADO FILHO,one-hot__aeronave_voo_destino_SANTA GENOVEVA/GOIÂNIA,one-hot__aeronave_voo_destino_SANTOS DUMONT,one-hot__aeronave_voo_destino_TANCREDO NEVES,one-hot__aeronave_voo_destino_VIRACOPOS,one-hot__aeronave_fase_operacao_***,one-hot__aeronave_fase_operacao_APROXIMAÇÃO FINAL,one-hot__aeronave_fase_operacao_CIRCUITO DE TRÁFEGO,one-hot__aeronave_fase_operacao_CORRIDA APÓS POUSO,one-hot__aeronave_fase_operacao_CRUZEIRO,one-hot__aeronave_fase_operacao_DECOLAGEM,one-hot__aeronave_fase_operacao_DESCIDA,one-hot__aeronave_fase_operacao_ESPECIALIZADA,one-hot__aeronave_fase_operacao_ESTACIONAMENTO,one-hot__aeronave_fase_operacao_INDETERMINADA,one-hot__aeronave_fase_operacao_MANOBRA,one-hot__aeronave_fase_operacao_OUTRA FASE,one-hot__aeronave_fase_operacao_POUSO,one-hot__aeronave_fase_operacao_SUBIDA,one-hot__aeronave_fase_operacao_TÁXI,one-hot__aeronave_fase_operacao_VOO A BAIXA ALTURA,one-hot__aeronave_fatalidades_total_***,one-hot__aeronave_fatalidades_total_0,one-hot__aeronave_fatalidades_total_1,one-hot__aeronave_fatalidades_total_2,one-hot__aeronave_fatalidades_total_3,one-hot__aeronave_fatalidades_total_4,one-hot__aeronave_fatalidades_total_5,one-hot__aeronave_fatalidades_total_6,one-hot__ocorrencia_tipo_***,one-hot__ocorrencia_tipo_CAUSADO POR FENÔMENO METEOROLÓGICO EM VOO,one-hot__ocorrencia_tipo_COLISÃO COM AVE,one-hot__ocorrencia_tipo_COLISÃO COM OBSTÁCULO DURANTE A DECOLAGEM E POUSO,one-hot__ocorrencia_tipo_COLISÃO COM OBSTÁCULOS NO SOLO,one-hot__ocorrencia_tipo_COM PARA-BRISAS / JANELA / PORTA,one-hot__ocorrencia_tipo_COM TREM DE POUSO,one-hot__ocorrencia_tipo_ESTOURO DE PNEU,one-hot__ocorrencia_tipo_EXCURSÃO DE PISTA,one-hot__ocorrencia_tipo_FALHA DO MOTOR EM VOO,one-hot__ocorrencia_tipo_FALHA OU MAU FUNCIONAMENTO DE SISTEMA / COMPONENTE,one-hot__ocorrencia_tipo_INDETERMINADO,one-hot__ocorrencia_tipo_OPERAÇÃO A BAIXA ALTITUDE,one-hot__ocorrencia_tipo_OUTROS,one-hot__ocorrencia_tipo_PANE SECA,one-hot__ocorrencia_tipo_PERDA DE COMPONENTE EM VOO,one-hot__ocorrencia_tipo_PERDA DE CONTROLE EM VOO,one-hot__ocorrencia_tipo_PERDA DE CONTROLE NO SOLO,one-hot__ocorrencia_tipo_POUSO BRUSCO,one-hot__ocorrencia_tipo_POUSO EM LOCAL NÃO PREVISTO,one-hot__ocorrencia_tipo_POUSO LONGO,one-hot__ocorrencia_tipo_POUSO SEM TREM,one-hot__ocorrencia_tipo_TRÁFEGO AÉREO,one-hot__ocorrencia_tipo_VAZAMENTO DE OUTROS FLUIDOS,one-hot__taxonomia_tipo_icao_***,one-hot__taxonomia_tipo_icao_ARC,one-hot__taxonomia_tipo_icao_BIRD,one-hot__taxonomia_tipo_icao_CTOL,one-hot__taxonomia_tipo_icao_FUEL,one-hot__taxonomia_tipo_icao_GCOL,one-hot__taxonomia_tipo_icao_LALT,one-hot__taxonomia_tipo_icao_LOC-G,one-hot__taxonomia_tipo_icao_LOC-I,one-hot__taxonomia_tipo_icao_MAC,one-hot__taxonomia_tipo_icao_OTHR,one-hot__taxonomia_tipo_icao_RE,one-hot__taxonomia_tipo_icao_SCF-NP,one-hot__taxonomia_tipo_icao_SCF-PP,one-hot__taxonomia_tipo_icao_UNK,one-hot__fator_nome_***,one-hot__fator_nome_JULGAMENTO DE PILOTAGEM,one-hot__fator_nome_MANUTENÇÃO DA AERONAVE,one-hot__fator_nome_PLANEJAMENTO DE VOO,one-hot__fator_nome_POUCA EXPERIÊNCIA DO PILOTO,one-hot__fator_nome_PROCESSO DECISÓRIO,one-hot__fator_nome_SUPERVISÃO GERENCIAL,one-hot__fator_aspecto_***,one-hot__fator_aspecto_ASPECTO DE PROJETO,one-hot__fator_aspecto_ASPECTO MÉDICO,one-hot__fator_aspecto_ASPECTO PSICOLÓGICO,one-hot__fator_aspecto_DESEMPENHO DO SER HUMANO,one-hot__fator_aspecto_ELEMENTOS RELACIONADOS AO AMBIENTE OPERACIONAL,one-hot__fator_aspecto_INFRAESTRUTURA AEROPORTUÁRIA,one-hot__fator_aspecto_OUTRO,one-hot__fator_condicionante_***,one-hot__fator_condicionante_INDIVIDUAL,one-hot__fator_condicionante_MANUTENÇÃO DA AERONAVE,one-hot__fator_condicionante_OPERAÇÃO DA AERONAVE,one-hot__fator_condicionante_ORGANIZACIONAL,one-hot__fator_condicionante_PSICOSSOCIAL,one-hot__fator_area_***,one-hot__fator_area_FATOR HUMANO,one-hot__fator_area_FATOR MATERIAL,one-hot__fator_area_FATOR OPERACIONAL,one-hot__fator_area_OUTRO,one-hot__ocorrencia_classificacao_ACIDENTE,one-hot__ocorrencia_classificacao_INCIDENTE,one-hot__ocorrencia_classificacao_INCIDENTE GRAVE,one-hot__ocorrencia_saida_pista_NÃO,one-hot__ocorrencia_saida_pista_SIM,one-hot__aeronave_tipo_veiculo_***,one-hot__aeronave_tipo_veiculo_AVIÃO,one-hot__aeronave_tipo_veiculo_HELICÓPTERO,one-hot__aeronave_tipo_veiculo_ULTRALEVE,one-hot__aeronave_motor_tipo_***,one-hot__aeronave_motor_tipo_JATO,one-hot__aeronave_motor_tipo_PISTÃO,one-hot__aeronave_motor_tipo_TURBOEIXO,one-hot__aeronave_motor_tipo_TURBOÉLICE,one-hot__aeronave_motor_quantidade_***,one-hot__aeronave_motor_quantidade_BIMOTOR,one-hot__aeronave_motor_quantidade_MONOMOTOR,one-hot__aeronave_motor_quantidade_SEM TRAÇÃO,one-hot__aeronave_tipo_operacao_***,one-hot__aeronave_tipo_operacao_AGRÍCOLA,one-hot__aeronave_tipo_operacao_INSTRUÇÃO,one-hot__aeronave_tipo_operacao_PRIVADA,one-hot__aeronave_tipo_operacao_REGULAR,one-hot__aeronave_tipo_operacao_TÁXI AÉREO,one-hot__aeronave_nivel_dano_***,one-hot__aeronave_nivel_dano_DESTRUÍDA,one-hot__aeronave_nivel_dano_LEVE,one-hot__aeronave_nivel_dano_NENHUM,one-hot__aeronave_nivel_dano_SUBSTANCIAL,minmax__ocorrencia_latitude,minmax__ocorrencia_longitude,minmax__aeronave_assentos,periodo_oe
0,***,PR,SBLO,EMB-810D,***,FORA DE AERODROMO,FORA DE AERODROMO,***,0,FALHA OU MAU FUNCIONAMENTO DE SISTEMA / COMPON...,SCF-NP,***,***,***,***,INCIDENTE,NÃO,AVIÃO,PISTÃO,BIMOTOR,PRIVADA,***,00:00 às 06:00,0.0,0.0,6.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.528177,0.537901,0.009105,0.0
1,***,PR,***,***,***,FORA DE AERODROMO,FORA DE AERODROMO,INDETERMINADA,0,FALHA OU MAU FUNCIONAMENTO DE SISTEMA / COMPON...,SCF-NP,***,***,***,***,INCIDENTE,NÃO,AVIÃO,PISTÃO,MONOMOTOR,PRIVADA,NENHUM,18:00 às 00:00,0.0,0.0,4.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.528177,0.537901,0.00607,1.0
2,***,PE,SBRF,***,0.0,FORA DE AERODROMO,FORA DE AERODROMO,INDETERMINADA,0,***,SCF-PP,***,***,***,***,INCIDENTE,NÃO,AVIÃO,JATO,BIMOTOR,***,***,18:00 às 00:00,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.528177,0.537901,0.0,1.0
3,***,PR,SBCT,AB-115,***,FORA DE AERODROMO,FORA DE AERODROMO,***,0,ESTOURO DE PNEU,SCF-NP,***,***,***,***,INCIDENTE,NÃO,AVIÃO,PISTÃO,MONOMOTOR,INSTRUÇÃO,***,00:00 às 06:00,0.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.528177,0.537901,0.003035,0.0
4,***,PR,***,***,***,FORA DE AERODROMO,FORA DE AERODROMO,TÁXI,0,ESTOURO DE PNEU,SCF-NP,***,***,***,***,INCIDENTE,NÃO,AVIÃO,PISTÃO,BIMOTOR,TÁXI AÉREO,NENHUM,18:00 às 00:00,0.0,0.0,10.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.528177,0.537901,0.015175,1.0


In [347]:
# verifica os códigos numéricos dos períodos com os códigos categóricos
print('periodo_oe', df_acidentes_aero_trans['periodo_oe'].value_counts())
print('periodo', df_acidentes_aero_trans['periodo'].value_counts())

periodo_oe periodo_oe
2.0    2548
1.0    1885
0.0    1526
3.0     929
Name: count, dtype: int64
periodo periodo
12:00 às 18:00    2548
18:00 às 00:00    1885
00:00 às 06:00    1526
06:00 às 12:00     929
Name: count, dtype: int64


In [348]:
# criando lista de colunas originais categóricas e numéricas que participarão da clusterização
columns_aero = [
        'ocorrencia_cidade',
        'ocorrencia_uf',
        'ocorrencia_aerodromo',
        'aeronave_modelo',
        'aeronave_ano_fabricacao',
        'aeronave_voo_origem',
        'aeronave_voo_destino',
        'aeronave_fase_operacao',
        'aeronave_fatalidades_total',
        'ocorrencia_tipo',
        'taxonomia_tipo_icao',
        'fator_nome',
        'fator_aspecto',
        'fator_condicionante',
        'fator_area',

        'ocorrencia_classificacao',
        'ocorrencia_saida_pista',
        'aeronave_tipo_veiculo',
        'aeronave_motor_tipo',
        'aeronave_motor_quantidade',
        'aeronave_tipo_operacao',
        'aeronave_nivel_dano',
        'periodo',     

        #'ocorrencia_latitude',
        #'ocorrencia_longitude', 
        #'aeronave_assentos',

        #'minmax__ocorrencia_latitude',
        #'minmax__ocorrencia_longitude', 
        #'minmax__aeronave_assentos',
        ]

columns_trans_apaga = [
        'ocorrencia_cidade',
        'ocorrencia_uf',
        'ocorrencia_aerodromo',
        'aeronave_modelo',
        'aeronave_ano_fabricacao',
        'aeronave_voo_origem',
        'aeronave_voo_destino',
        'aeronave_fase_operacao',
        'aeronave_fatalidades_total',
        'ocorrencia_tipo',
        'taxonomia_tipo_icao',
        'fator_nome',
        'fator_aspecto',
        'fator_condicionante',
        'fator_area',

        'ocorrencia_classificacao',
        'ocorrencia_saida_pista',
        'aeronave_tipo_veiculo',
        'aeronave_motor_tipo',
        'aeronave_motor_quantidade',
        'aeronave_tipo_operacao',
        'aeronave_nivel_dano',
        'periodo',     

        'ocorrencia_latitude',
        'ocorrencia_longitude', 
        'aeronave_assentos',

        'minmax__ocorrencia_latitude',
        'minmax__ocorrencia_longitude', 
        #'minmax__aeronave_assentos',
     
        ]        
      

In [349]:
# carregando colunas listadas anteriormente para um novo dataframe
df_acidentes_aero = df_acidentes_aero_trans[columns_aero]

In [350]:
# conferindo a dimensão(linhas x colunas) do dataframe de colunas originais
# temos 8 colunas categoricas e 2 colunas numericas
df_acidentes_aero.shape

(6888, 23)

In [351]:
# fazendo um reconhecimento inicial das primeiras linhas do dataframe criado com variáveis(colunas) originais
df_acidentes_aero.head(5)

Unnamed: 0,ocorrencia_cidade,ocorrencia_uf,ocorrencia_aerodromo,aeronave_modelo,aeronave_ano_fabricacao,aeronave_voo_origem,aeronave_voo_destino,aeronave_fase_operacao,aeronave_fatalidades_total,ocorrencia_tipo,taxonomia_tipo_icao,fator_nome,fator_aspecto,fator_condicionante,fator_area,ocorrencia_classificacao,ocorrencia_saida_pista,aeronave_tipo_veiculo,aeronave_motor_tipo,aeronave_motor_quantidade,aeronave_tipo_operacao,aeronave_nivel_dano,periodo
0,***,PR,SBLO,EMB-810D,***,FORA DE AERODROMO,FORA DE AERODROMO,***,0,FALHA OU MAU FUNCIONAMENTO DE SISTEMA / COMPON...,SCF-NP,***,***,***,***,INCIDENTE,NÃO,AVIÃO,PISTÃO,BIMOTOR,PRIVADA,***,00:00 às 06:00
1,***,PR,***,***,***,FORA DE AERODROMO,FORA DE AERODROMO,INDETERMINADA,0,FALHA OU MAU FUNCIONAMENTO DE SISTEMA / COMPON...,SCF-NP,***,***,***,***,INCIDENTE,NÃO,AVIÃO,PISTÃO,MONOMOTOR,PRIVADA,NENHUM,18:00 às 00:00
2,***,PE,SBRF,***,0.0,FORA DE AERODROMO,FORA DE AERODROMO,INDETERMINADA,0,***,SCF-PP,***,***,***,***,INCIDENTE,NÃO,AVIÃO,JATO,BIMOTOR,***,***,18:00 às 00:00
3,***,PR,SBCT,AB-115,***,FORA DE AERODROMO,FORA DE AERODROMO,***,0,ESTOURO DE PNEU,SCF-NP,***,***,***,***,INCIDENTE,NÃO,AVIÃO,PISTÃO,MONOMOTOR,INSTRUÇÃO,***,00:00 às 06:00
4,***,PR,***,***,***,FORA DE AERODROMO,FORA DE AERODROMO,TÁXI,0,ESTOURO DE PNEU,SCF-NP,***,***,***,***,INCIDENTE,NÃO,AVIÃO,PISTÃO,BIMOTOR,TÁXI AÉREO,NENHUM,18:00 às 00:00


In [352]:
df_acidentes_aero_trans.head(5)

Unnamed: 0,ocorrencia_cidade,ocorrencia_uf,ocorrencia_aerodromo,aeronave_modelo,aeronave_ano_fabricacao,aeronave_voo_origem,aeronave_voo_destino,aeronave_fase_operacao,aeronave_fatalidades_total,ocorrencia_tipo,taxonomia_tipo_icao,fator_nome,fator_aspecto,fator_condicionante,fator_area,ocorrencia_classificacao,ocorrencia_saida_pista,aeronave_tipo_veiculo,aeronave_motor_tipo,aeronave_motor_quantidade,aeronave_tipo_operacao,aeronave_nivel_dano,periodo,ocorrencia_latitude,ocorrencia_longitude,aeronave_assentos,one-hot__ocorrencia_cidade_***,one-hot__ocorrencia_cidade_BELO HORIZONTE,one-hot__ocorrencia_cidade_BRASÍLIA,one-hot__ocorrencia_cidade_CAMPINAS,one-hot__ocorrencia_cidade_GUARULHOS,one-hot__ocorrencia_cidade_RIO DE JANEIRO,one-hot__ocorrencia_cidade_SÃO PAULO,one-hot__ocorrencia_uf_***,one-hot__ocorrencia_uf_AM,one-hot__ocorrencia_uf_BA,one-hot__ocorrencia_uf_CE,one-hot__ocorrencia_uf_DF,one-hot__ocorrencia_uf_ES,one-hot__ocorrencia_uf_GO,one-hot__ocorrencia_uf_MA,one-hot__ocorrencia_uf_MG,one-hot__ocorrencia_uf_MS,one-hot__ocorrencia_uf_MT,one-hot__ocorrencia_uf_PA,one-hot__ocorrencia_uf_PE,one-hot__ocorrencia_uf_PR,one-hot__ocorrencia_uf_RJ,one-hot__ocorrencia_uf_RS,one-hot__ocorrencia_uf_SC,one-hot__ocorrencia_uf_SP,one-hot__ocorrencia_aerodromo_***,one-hot__ocorrencia_aerodromo_**NI,one-hot__ocorrencia_aerodromo_SBBH,one-hot__ocorrencia_aerodromo_SBBR,one-hot__ocorrencia_aerodromo_SBCF,one-hot__ocorrencia_aerodromo_SBCT,one-hot__ocorrencia_aerodromo_SBEG,one-hot__ocorrencia_aerodromo_SBGL,one-hot__ocorrencia_aerodromo_SBGO,one-hot__ocorrencia_aerodromo_SBGR,one-hot__ocorrencia_aerodromo_SBJR,one-hot__ocorrencia_aerodromo_SBKP,one-hot__ocorrencia_aerodromo_SBLO,one-hot__ocorrencia_aerodromo_SBMT,one-hot__ocorrencia_aerodromo_SBPA,one-hot__ocorrencia_aerodromo_SBRF,one-hot__ocorrencia_aerodromo_SBRJ,one-hot__ocorrencia_aerodromo_SBSP,one-hot__ocorrencia_aerodromo_SBSV,one-hot__aeronave_modelo_***,one-hot__aeronave_modelo_737-8EH,one-hot__aeronave_modelo_A320-214,one-hot__aeronave_modelo_AB-115,one-hot__aeronave_modelo_ATR-72-212A,one-hot__aeronave_modelo_EMB-810C,one-hot__aeronave_modelo_EMB-810D,one-hot__aeronave_modelo_ERJ 190-200 IGW,one-hot__aeronave_ano_fabricacao_***,one-hot__aeronave_ano_fabricacao_0.0,one-hot__aeronave_ano_fabricacao_1900.0,one-hot__aeronave_ano_fabricacao_2007.0,one-hot__aeronave_ano_fabricacao_2008.0,one-hot__aeronave_ano_fabricacao_2009.0,one-hot__aeronave_ano_fabricacao_2010.0,one-hot__aeronave_ano_fabricacao_2011.0,one-hot__aeronave_ano_fabricacao_2012.0,one-hot__aeronave_voo_origem_***,one-hot__aeronave_voo_origem_CAMPO DE MARTE - SP,one-hot__aeronave_voo_origem_CARLOS DRUMMOND DE ANDRADE / PAMPULHA,one-hot__aeronave_voo_origem_CONGONHAS,one-hot__aeronave_voo_origem_DEPUTADO LUÍS EDUARDO MAGALHÃES,one-hot__aeronave_voo_origem_FORA DE AERODROMO,one-hot__aeronave_voo_origem_GOVERNADOR ANDRÉ FRANCO MONTORO,one-hot__aeronave_voo_origem_GUARARAPES - GILBERTO FREYRE,one-hot__aeronave_voo_origem_NÃO IDENTIFICADO,one-hot__aeronave_voo_origem_PRESIDENTE JUSCELINO KUBITSCHEK,one-hot__aeronave_voo_origem_SALGADO FILHO,one-hot__aeronave_voo_origem_SANTA GENOVEVA/GOIÂNIA,one-hot__aeronave_voo_origem_SANTOS DUMONT,one-hot__aeronave_voo_origem_TANCREDO NEVES,one-hot__aeronave_voo_origem_VIRACOPOS,one-hot__aeronave_voo_destino_***,one-hot__aeronave_voo_destino_CAMPO DE MARTE - SP,one-hot__aeronave_voo_destino_CONGONHAS,one-hot__aeronave_voo_destino_DEPUTADO LUÍS EDUARDO MAGALHÃES,one-hot__aeronave_voo_destino_FORA DE AERODROMO,one-hot__aeronave_voo_destino_GOVERNADOR ANDRÉ FRANCO MONTORO,one-hot__aeronave_voo_destino_GUARARAPES - GILBERTO FREYRE,one-hot__aeronave_voo_destino_NÃO IDENTIFICADO,one-hot__aeronave_voo_destino_PRESIDENTE JUSCELINO KUBITSCHEK,one-hot__aeronave_voo_destino_SALGADO FILHO,one-hot__aeronave_voo_destino_SANTA GENOVEVA/GOIÂNIA,one-hot__aeronave_voo_destino_SANTOS DUMONT,one-hot__aeronave_voo_destino_TANCREDO NEVES,one-hot__aeronave_voo_destino_VIRACOPOS,one-hot__aeronave_fase_operacao_***,one-hot__aeronave_fase_operacao_APROXIMAÇÃO FINAL,one-hot__aeronave_fase_operacao_CIRCUITO DE TRÁFEGO,one-hot__aeronave_fase_operacao_CORRIDA APÓS POUSO,one-hot__aeronave_fase_operacao_CRUZEIRO,one-hot__aeronave_fase_operacao_DECOLAGEM,one-hot__aeronave_fase_operacao_DESCIDA,one-hot__aeronave_fase_operacao_ESPECIALIZADA,one-hot__aeronave_fase_operacao_ESTACIONAMENTO,one-hot__aeronave_fase_operacao_INDETERMINADA,one-hot__aeronave_fase_operacao_MANOBRA,one-hot__aeronave_fase_operacao_OUTRA FASE,one-hot__aeronave_fase_operacao_POUSO,one-hot__aeronave_fase_operacao_SUBIDA,one-hot__aeronave_fase_operacao_TÁXI,one-hot__aeronave_fase_operacao_VOO A BAIXA ALTURA,one-hot__aeronave_fatalidades_total_***,one-hot__aeronave_fatalidades_total_0,one-hot__aeronave_fatalidades_total_1,one-hot__aeronave_fatalidades_total_2,one-hot__aeronave_fatalidades_total_3,one-hot__aeronave_fatalidades_total_4,one-hot__aeronave_fatalidades_total_5,one-hot__aeronave_fatalidades_total_6,one-hot__ocorrencia_tipo_***,one-hot__ocorrencia_tipo_CAUSADO POR FENÔMENO METEOROLÓGICO EM VOO,one-hot__ocorrencia_tipo_COLISÃO COM AVE,one-hot__ocorrencia_tipo_COLISÃO COM OBSTÁCULO DURANTE A DECOLAGEM E POUSO,one-hot__ocorrencia_tipo_COLISÃO COM OBSTÁCULOS NO SOLO,one-hot__ocorrencia_tipo_COM PARA-BRISAS / JANELA / PORTA,one-hot__ocorrencia_tipo_COM TREM DE POUSO,one-hot__ocorrencia_tipo_ESTOURO DE PNEU,one-hot__ocorrencia_tipo_EXCURSÃO DE PISTA,one-hot__ocorrencia_tipo_FALHA DO MOTOR EM VOO,one-hot__ocorrencia_tipo_FALHA OU MAU FUNCIONAMENTO DE SISTEMA / COMPONENTE,one-hot__ocorrencia_tipo_INDETERMINADO,one-hot__ocorrencia_tipo_OPERAÇÃO A BAIXA ALTITUDE,one-hot__ocorrencia_tipo_OUTROS,one-hot__ocorrencia_tipo_PANE SECA,one-hot__ocorrencia_tipo_PERDA DE COMPONENTE EM VOO,one-hot__ocorrencia_tipo_PERDA DE CONTROLE EM VOO,one-hot__ocorrencia_tipo_PERDA DE CONTROLE NO SOLO,one-hot__ocorrencia_tipo_POUSO BRUSCO,one-hot__ocorrencia_tipo_POUSO EM LOCAL NÃO PREVISTO,one-hot__ocorrencia_tipo_POUSO LONGO,one-hot__ocorrencia_tipo_POUSO SEM TREM,one-hot__ocorrencia_tipo_TRÁFEGO AÉREO,one-hot__ocorrencia_tipo_VAZAMENTO DE OUTROS FLUIDOS,one-hot__taxonomia_tipo_icao_***,one-hot__taxonomia_tipo_icao_ARC,one-hot__taxonomia_tipo_icao_BIRD,one-hot__taxonomia_tipo_icao_CTOL,one-hot__taxonomia_tipo_icao_FUEL,one-hot__taxonomia_tipo_icao_GCOL,one-hot__taxonomia_tipo_icao_LALT,one-hot__taxonomia_tipo_icao_LOC-G,one-hot__taxonomia_tipo_icao_LOC-I,one-hot__taxonomia_tipo_icao_MAC,one-hot__taxonomia_tipo_icao_OTHR,one-hot__taxonomia_tipo_icao_RE,one-hot__taxonomia_tipo_icao_SCF-NP,one-hot__taxonomia_tipo_icao_SCF-PP,one-hot__taxonomia_tipo_icao_UNK,one-hot__fator_nome_***,one-hot__fator_nome_JULGAMENTO DE PILOTAGEM,one-hot__fator_nome_MANUTENÇÃO DA AERONAVE,one-hot__fator_nome_PLANEJAMENTO DE VOO,one-hot__fator_nome_POUCA EXPERIÊNCIA DO PILOTO,one-hot__fator_nome_PROCESSO DECISÓRIO,one-hot__fator_nome_SUPERVISÃO GERENCIAL,one-hot__fator_aspecto_***,one-hot__fator_aspecto_ASPECTO DE PROJETO,one-hot__fator_aspecto_ASPECTO MÉDICO,one-hot__fator_aspecto_ASPECTO PSICOLÓGICO,one-hot__fator_aspecto_DESEMPENHO DO SER HUMANO,one-hot__fator_aspecto_ELEMENTOS RELACIONADOS AO AMBIENTE OPERACIONAL,one-hot__fator_aspecto_INFRAESTRUTURA AEROPORTUÁRIA,one-hot__fator_aspecto_OUTRO,one-hot__fator_condicionante_***,one-hot__fator_condicionante_INDIVIDUAL,one-hot__fator_condicionante_MANUTENÇÃO DA AERONAVE,one-hot__fator_condicionante_OPERAÇÃO DA AERONAVE,one-hot__fator_condicionante_ORGANIZACIONAL,one-hot__fator_condicionante_PSICOSSOCIAL,one-hot__fator_area_***,one-hot__fator_area_FATOR HUMANO,one-hot__fator_area_FATOR MATERIAL,one-hot__fator_area_FATOR OPERACIONAL,one-hot__fator_area_OUTRO,one-hot__ocorrencia_classificacao_ACIDENTE,one-hot__ocorrencia_classificacao_INCIDENTE,one-hot__ocorrencia_classificacao_INCIDENTE GRAVE,one-hot__ocorrencia_saida_pista_NÃO,one-hot__ocorrencia_saida_pista_SIM,one-hot__aeronave_tipo_veiculo_***,one-hot__aeronave_tipo_veiculo_AVIÃO,one-hot__aeronave_tipo_veiculo_HELICÓPTERO,one-hot__aeronave_tipo_veiculo_ULTRALEVE,one-hot__aeronave_motor_tipo_***,one-hot__aeronave_motor_tipo_JATO,one-hot__aeronave_motor_tipo_PISTÃO,one-hot__aeronave_motor_tipo_TURBOEIXO,one-hot__aeronave_motor_tipo_TURBOÉLICE,one-hot__aeronave_motor_quantidade_***,one-hot__aeronave_motor_quantidade_BIMOTOR,one-hot__aeronave_motor_quantidade_MONOMOTOR,one-hot__aeronave_motor_quantidade_SEM TRAÇÃO,one-hot__aeronave_tipo_operacao_***,one-hot__aeronave_tipo_operacao_AGRÍCOLA,one-hot__aeronave_tipo_operacao_INSTRUÇÃO,one-hot__aeronave_tipo_operacao_PRIVADA,one-hot__aeronave_tipo_operacao_REGULAR,one-hot__aeronave_tipo_operacao_TÁXI AÉREO,one-hot__aeronave_nivel_dano_***,one-hot__aeronave_nivel_dano_DESTRUÍDA,one-hot__aeronave_nivel_dano_LEVE,one-hot__aeronave_nivel_dano_NENHUM,one-hot__aeronave_nivel_dano_SUBSTANCIAL,minmax__ocorrencia_latitude,minmax__ocorrencia_longitude,minmax__aeronave_assentos,periodo_oe
0,***,PR,SBLO,EMB-810D,***,FORA DE AERODROMO,FORA DE AERODROMO,***,0,FALHA OU MAU FUNCIONAMENTO DE SISTEMA / COMPON...,SCF-NP,***,***,***,***,INCIDENTE,NÃO,AVIÃO,PISTÃO,BIMOTOR,PRIVADA,***,00:00 às 06:00,0.0,0.0,6.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.528177,0.537901,0.009105,0.0
1,***,PR,***,***,***,FORA DE AERODROMO,FORA DE AERODROMO,INDETERMINADA,0,FALHA OU MAU FUNCIONAMENTO DE SISTEMA / COMPON...,SCF-NP,***,***,***,***,INCIDENTE,NÃO,AVIÃO,PISTÃO,MONOMOTOR,PRIVADA,NENHUM,18:00 às 00:00,0.0,0.0,4.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.528177,0.537901,0.00607,1.0
2,***,PE,SBRF,***,0.0,FORA DE AERODROMO,FORA DE AERODROMO,INDETERMINADA,0,***,SCF-PP,***,***,***,***,INCIDENTE,NÃO,AVIÃO,JATO,BIMOTOR,***,***,18:00 às 00:00,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.528177,0.537901,0.0,1.0
3,***,PR,SBCT,AB-115,***,FORA DE AERODROMO,FORA DE AERODROMO,***,0,ESTOURO DE PNEU,SCF-NP,***,***,***,***,INCIDENTE,NÃO,AVIÃO,PISTÃO,MONOMOTOR,INSTRUÇÃO,***,00:00 às 06:00,0.0,0.0,2.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.528177,0.537901,0.003035,0.0
4,***,PR,***,***,***,FORA DE AERODROMO,FORA DE AERODROMO,TÁXI,0,ESTOURO DE PNEU,SCF-NP,***,***,***,***,INCIDENTE,NÃO,AVIÃO,PISTÃO,BIMOTOR,TÁXI AÉREO,NENHUM,18:00 às 00:00,0.0,0.0,10.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.528177,0.537901,0.015175,1.0


In [353]:
# cria dataframe somente com as colunas que sofreram transformação
df_acidentes_aero_trans = df_acidentes_aero_trans.drop(columns=columns_trans_apaga, axis=1)

- Essas colunas que sofreram transformação serão as colunas utilizadas para a definição do cluster ao qual o acidente de cada linha do dataframe vai pertencer

In [354]:
# verificando o tamanho do dataframe(linhas x colunas) criado com as colunas transformadas
# temos 47 variáveis binárias categóricas e 2 variáveis numéricas que variam de 0 a 1
df_acidentes_aero_trans.shape

(6888, 210)

In [355]:
# verificando as primeiras linhas do novo dataframe
df_acidentes_aero_trans.head(5)

Unnamed: 0,one-hot__ocorrencia_cidade_***,one-hot__ocorrencia_cidade_BELO HORIZONTE,one-hot__ocorrencia_cidade_BRASÍLIA,one-hot__ocorrencia_cidade_CAMPINAS,one-hot__ocorrencia_cidade_GUARULHOS,one-hot__ocorrencia_cidade_RIO DE JANEIRO,one-hot__ocorrencia_cidade_SÃO PAULO,one-hot__ocorrencia_uf_***,one-hot__ocorrencia_uf_AM,one-hot__ocorrencia_uf_BA,one-hot__ocorrencia_uf_CE,one-hot__ocorrencia_uf_DF,one-hot__ocorrencia_uf_ES,one-hot__ocorrencia_uf_GO,one-hot__ocorrencia_uf_MA,one-hot__ocorrencia_uf_MG,one-hot__ocorrencia_uf_MS,one-hot__ocorrencia_uf_MT,one-hot__ocorrencia_uf_PA,one-hot__ocorrencia_uf_PE,one-hot__ocorrencia_uf_PR,one-hot__ocorrencia_uf_RJ,one-hot__ocorrencia_uf_RS,one-hot__ocorrencia_uf_SC,one-hot__ocorrencia_uf_SP,one-hot__ocorrencia_aerodromo_***,one-hot__ocorrencia_aerodromo_**NI,one-hot__ocorrencia_aerodromo_SBBH,one-hot__ocorrencia_aerodromo_SBBR,one-hot__ocorrencia_aerodromo_SBCF,one-hot__ocorrencia_aerodromo_SBCT,one-hot__ocorrencia_aerodromo_SBEG,one-hot__ocorrencia_aerodromo_SBGL,one-hot__ocorrencia_aerodromo_SBGO,one-hot__ocorrencia_aerodromo_SBGR,one-hot__ocorrencia_aerodromo_SBJR,one-hot__ocorrencia_aerodromo_SBKP,one-hot__ocorrencia_aerodromo_SBLO,one-hot__ocorrencia_aerodromo_SBMT,one-hot__ocorrencia_aerodromo_SBPA,one-hot__ocorrencia_aerodromo_SBRF,one-hot__ocorrencia_aerodromo_SBRJ,one-hot__ocorrencia_aerodromo_SBSP,one-hot__ocorrencia_aerodromo_SBSV,one-hot__aeronave_modelo_***,one-hot__aeronave_modelo_737-8EH,one-hot__aeronave_modelo_A320-214,one-hot__aeronave_modelo_AB-115,one-hot__aeronave_modelo_ATR-72-212A,one-hot__aeronave_modelo_EMB-810C,one-hot__aeronave_modelo_EMB-810D,one-hot__aeronave_modelo_ERJ 190-200 IGW,one-hot__aeronave_ano_fabricacao_***,one-hot__aeronave_ano_fabricacao_0.0,one-hot__aeronave_ano_fabricacao_1900.0,one-hot__aeronave_ano_fabricacao_2007.0,one-hot__aeronave_ano_fabricacao_2008.0,one-hot__aeronave_ano_fabricacao_2009.0,one-hot__aeronave_ano_fabricacao_2010.0,one-hot__aeronave_ano_fabricacao_2011.0,one-hot__aeronave_ano_fabricacao_2012.0,one-hot__aeronave_voo_origem_***,one-hot__aeronave_voo_origem_CAMPO DE MARTE - SP,one-hot__aeronave_voo_origem_CARLOS DRUMMOND DE ANDRADE / PAMPULHA,one-hot__aeronave_voo_origem_CONGONHAS,one-hot__aeronave_voo_origem_DEPUTADO LUÍS EDUARDO MAGALHÃES,one-hot__aeronave_voo_origem_FORA DE AERODROMO,one-hot__aeronave_voo_origem_GOVERNADOR ANDRÉ FRANCO MONTORO,one-hot__aeronave_voo_origem_GUARARAPES - GILBERTO FREYRE,one-hot__aeronave_voo_origem_NÃO IDENTIFICADO,one-hot__aeronave_voo_origem_PRESIDENTE JUSCELINO KUBITSCHEK,one-hot__aeronave_voo_origem_SALGADO FILHO,one-hot__aeronave_voo_origem_SANTA GENOVEVA/GOIÂNIA,one-hot__aeronave_voo_origem_SANTOS DUMONT,one-hot__aeronave_voo_origem_TANCREDO NEVES,one-hot__aeronave_voo_origem_VIRACOPOS,one-hot__aeronave_voo_destino_***,one-hot__aeronave_voo_destino_CAMPO DE MARTE - SP,one-hot__aeronave_voo_destino_CONGONHAS,one-hot__aeronave_voo_destino_DEPUTADO LUÍS EDUARDO MAGALHÃES,one-hot__aeronave_voo_destino_FORA DE AERODROMO,one-hot__aeronave_voo_destino_GOVERNADOR ANDRÉ FRANCO MONTORO,one-hot__aeronave_voo_destino_GUARARAPES - GILBERTO FREYRE,one-hot__aeronave_voo_destino_NÃO IDENTIFICADO,one-hot__aeronave_voo_destino_PRESIDENTE JUSCELINO KUBITSCHEK,one-hot__aeronave_voo_destino_SALGADO FILHO,one-hot__aeronave_voo_destino_SANTA GENOVEVA/GOIÂNIA,one-hot__aeronave_voo_destino_SANTOS DUMONT,one-hot__aeronave_voo_destino_TANCREDO NEVES,one-hot__aeronave_voo_destino_VIRACOPOS,one-hot__aeronave_fase_operacao_***,one-hot__aeronave_fase_operacao_APROXIMAÇÃO FINAL,one-hot__aeronave_fase_operacao_CIRCUITO DE TRÁFEGO,one-hot__aeronave_fase_operacao_CORRIDA APÓS POUSO,one-hot__aeronave_fase_operacao_CRUZEIRO,one-hot__aeronave_fase_operacao_DECOLAGEM,one-hot__aeronave_fase_operacao_DESCIDA,one-hot__aeronave_fase_operacao_ESPECIALIZADA,one-hot__aeronave_fase_operacao_ESTACIONAMENTO,one-hot__aeronave_fase_operacao_INDETERMINADA,one-hot__aeronave_fase_operacao_MANOBRA,one-hot__aeronave_fase_operacao_OUTRA FASE,one-hot__aeronave_fase_operacao_POUSO,one-hot__aeronave_fase_operacao_SUBIDA,one-hot__aeronave_fase_operacao_TÁXI,one-hot__aeronave_fase_operacao_VOO A BAIXA ALTURA,one-hot__aeronave_fatalidades_total_***,one-hot__aeronave_fatalidades_total_0,one-hot__aeronave_fatalidades_total_1,one-hot__aeronave_fatalidades_total_2,one-hot__aeronave_fatalidades_total_3,one-hot__aeronave_fatalidades_total_4,one-hot__aeronave_fatalidades_total_5,one-hot__aeronave_fatalidades_total_6,one-hot__ocorrencia_tipo_***,one-hot__ocorrencia_tipo_CAUSADO POR FENÔMENO METEOROLÓGICO EM VOO,one-hot__ocorrencia_tipo_COLISÃO COM AVE,one-hot__ocorrencia_tipo_COLISÃO COM OBSTÁCULO DURANTE A DECOLAGEM E POUSO,one-hot__ocorrencia_tipo_COLISÃO COM OBSTÁCULOS NO SOLO,one-hot__ocorrencia_tipo_COM PARA-BRISAS / JANELA / PORTA,one-hot__ocorrencia_tipo_COM TREM DE POUSO,one-hot__ocorrencia_tipo_ESTOURO DE PNEU,one-hot__ocorrencia_tipo_EXCURSÃO DE PISTA,one-hot__ocorrencia_tipo_FALHA DO MOTOR EM VOO,one-hot__ocorrencia_tipo_FALHA OU MAU FUNCIONAMENTO DE SISTEMA / COMPONENTE,one-hot__ocorrencia_tipo_INDETERMINADO,one-hot__ocorrencia_tipo_OPERAÇÃO A BAIXA ALTITUDE,one-hot__ocorrencia_tipo_OUTROS,one-hot__ocorrencia_tipo_PANE SECA,one-hot__ocorrencia_tipo_PERDA DE COMPONENTE EM VOO,one-hot__ocorrencia_tipo_PERDA DE CONTROLE EM VOO,one-hot__ocorrencia_tipo_PERDA DE CONTROLE NO SOLO,one-hot__ocorrencia_tipo_POUSO BRUSCO,one-hot__ocorrencia_tipo_POUSO EM LOCAL NÃO PREVISTO,one-hot__ocorrencia_tipo_POUSO LONGO,one-hot__ocorrencia_tipo_POUSO SEM TREM,one-hot__ocorrencia_tipo_TRÁFEGO AÉREO,one-hot__ocorrencia_tipo_VAZAMENTO DE OUTROS FLUIDOS,one-hot__taxonomia_tipo_icao_***,one-hot__taxonomia_tipo_icao_ARC,one-hot__taxonomia_tipo_icao_BIRD,one-hot__taxonomia_tipo_icao_CTOL,one-hot__taxonomia_tipo_icao_FUEL,one-hot__taxonomia_tipo_icao_GCOL,one-hot__taxonomia_tipo_icao_LALT,one-hot__taxonomia_tipo_icao_LOC-G,one-hot__taxonomia_tipo_icao_LOC-I,one-hot__taxonomia_tipo_icao_MAC,one-hot__taxonomia_tipo_icao_OTHR,one-hot__taxonomia_tipo_icao_RE,one-hot__taxonomia_tipo_icao_SCF-NP,one-hot__taxonomia_tipo_icao_SCF-PP,one-hot__taxonomia_tipo_icao_UNK,one-hot__fator_nome_***,one-hot__fator_nome_JULGAMENTO DE PILOTAGEM,one-hot__fator_nome_MANUTENÇÃO DA AERONAVE,one-hot__fator_nome_PLANEJAMENTO DE VOO,one-hot__fator_nome_POUCA EXPERIÊNCIA DO PILOTO,one-hot__fator_nome_PROCESSO DECISÓRIO,one-hot__fator_nome_SUPERVISÃO GERENCIAL,one-hot__fator_aspecto_***,one-hot__fator_aspecto_ASPECTO DE PROJETO,one-hot__fator_aspecto_ASPECTO MÉDICO,one-hot__fator_aspecto_ASPECTO PSICOLÓGICO,one-hot__fator_aspecto_DESEMPENHO DO SER HUMANO,one-hot__fator_aspecto_ELEMENTOS RELACIONADOS AO AMBIENTE OPERACIONAL,one-hot__fator_aspecto_INFRAESTRUTURA AEROPORTUÁRIA,one-hot__fator_aspecto_OUTRO,one-hot__fator_condicionante_***,one-hot__fator_condicionante_INDIVIDUAL,one-hot__fator_condicionante_MANUTENÇÃO DA AERONAVE,one-hot__fator_condicionante_OPERAÇÃO DA AERONAVE,one-hot__fator_condicionante_ORGANIZACIONAL,one-hot__fator_condicionante_PSICOSSOCIAL,one-hot__fator_area_***,one-hot__fator_area_FATOR HUMANO,one-hot__fator_area_FATOR MATERIAL,one-hot__fator_area_FATOR OPERACIONAL,one-hot__fator_area_OUTRO,one-hot__ocorrencia_classificacao_ACIDENTE,one-hot__ocorrencia_classificacao_INCIDENTE,one-hot__ocorrencia_classificacao_INCIDENTE GRAVE,one-hot__ocorrencia_saida_pista_NÃO,one-hot__ocorrencia_saida_pista_SIM,one-hot__aeronave_tipo_veiculo_***,one-hot__aeronave_tipo_veiculo_AVIÃO,one-hot__aeronave_tipo_veiculo_HELICÓPTERO,one-hot__aeronave_tipo_veiculo_ULTRALEVE,one-hot__aeronave_motor_tipo_***,one-hot__aeronave_motor_tipo_JATO,one-hot__aeronave_motor_tipo_PISTÃO,one-hot__aeronave_motor_tipo_TURBOEIXO,one-hot__aeronave_motor_tipo_TURBOÉLICE,one-hot__aeronave_motor_quantidade_***,one-hot__aeronave_motor_quantidade_BIMOTOR,one-hot__aeronave_motor_quantidade_MONOMOTOR,one-hot__aeronave_motor_quantidade_SEM TRAÇÃO,one-hot__aeronave_tipo_operacao_***,one-hot__aeronave_tipo_operacao_AGRÍCOLA,one-hot__aeronave_tipo_operacao_INSTRUÇÃO,one-hot__aeronave_tipo_operacao_PRIVADA,one-hot__aeronave_tipo_operacao_REGULAR,one-hot__aeronave_tipo_operacao_TÁXI AÉREO,one-hot__aeronave_nivel_dano_***,one-hot__aeronave_nivel_dano_DESTRUÍDA,one-hot__aeronave_nivel_dano_LEVE,one-hot__aeronave_nivel_dano_NENHUM,one-hot__aeronave_nivel_dano_SUBSTANCIAL,minmax__aeronave_assentos,periodo_oe
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.009105,0.0
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.00607,1.0
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.003035,0.0
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.015175,1.0


In [356]:
# separa variáveis independentes e dependentes
X = df_acidentes_aero_trans.drop(columns=['periodo_oe'], axis=1)
y = df_acidentes_aero_trans[['periodo_oe']]

In [357]:
# separa os sets de treino e teste em 20% para teste e 80% para treino
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

In [358]:
# verifica o tamanho de cada set resultante do split
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(5510, 209)
(5510, 1)
(1378, 209)
(1378, 1)


In [359]:
# treina o modelo
clf = LogisticRegression(random_state=42)
clf.fit(X_train, y_train)

In [360]:
# prediz y com predict e predict_proba
y_pred_proba = clf.predict_proba(X_test) # fornece o percentual de cada classe
y_pred = clf.predict(X_test)

In [361]:
# visualiza a predição de y de X_test
y_pred

array([2., 2., 2., ..., 2., 0., 2.])

In [362]:
# gera a matriz de confusão para análise
# tivemos 123 acertos(soma da diagonal principal) contra 78 erros(vrs restantes) 
# de 201 total dando 61,19% de acerto
cnf_matrix = metrics.confusion_matrix(y_test, y_pred)
cnf_matrix

array([[ 68,  66, 165,   7],
       [ 33,  91, 232,  18],
       [ 36,  97, 354,  20],
       [ 26,  39, 106,  20]], dtype=int64)

In [363]:
# verifica a acurácia, precisão e recall do resultado que corresponde à matriz de confusão
print('acurácia =', metrics.accuracy_score(y_test, y_pred))
print('precisão =', metrics.precision_score(y_test, y_pred, average='weighted'))
print('recall   =', metrics.recall_score(y_test, y_pred, average='weighted'))
print('f1-score =', metrics.f1_score(y_test, y_pred, average='weighted'))

acurácia = 0.3867924528301887
precisão = 0.37155919694589196
recall   = 0.3867924528301887
f1-score = 0.35108303546182


In [364]:
# visualiza a predição das probabilidades de y
print(y_pred_proba)

[[0.14083349 0.3268309  0.36455316 0.16778246]
 [0.19749273 0.18660115 0.43114972 0.1847564 ]
 [0.17990437 0.25933561 0.48469359 0.07606643]
 ...
 [0.21942886 0.14838197 0.52636897 0.1058202 ]
 [0.35962601 0.25638637 0.23987985 0.14410777]
 [0.08402949 0.06468794 0.77719183 0.07409074]]


In [365]:
# transforma array em dataframe
dfproba = pd.DataFrame(y_pred_proba)

In [366]:
# renomeia as colunas de resultado do predict_proba
dfproba.columns = ['12:00 às 18:00', '18:00 às 00:00', '00:00 às 06:00', '06:00 às 12:00']

In [367]:
# arredonda os resultados com duas casas decimai
dfproba = round(dfproba*100, 2)

In [368]:
# soma o total de percentuais para checar se fecha em 100%
dfproba['soma_perc'] = dfproba['12:00 às 18:00'] + dfproba['18:00 às 00:00'] + \
                       dfproba['00:00 às 06:00'] + dfproba['06:00 às 12:00']

In [369]:
# exibe o dataframe para checar os percentuais
dfproba.head(10)

Unnamed: 0,12:00 às 18:00,18:00 às 00:00,00:00 às 06:00,06:00 às 12:00,soma_perc
0,14.08,32.68,36.46,16.78,100.0
1,19.75,18.66,43.11,18.48,100.0
2,17.99,25.93,48.47,7.61,100.0
3,16.94,35.0,38.58,9.48,100.0
4,35.32,23.96,15.95,24.76,99.99
5,12.39,36.59,28.32,22.69,99.99
6,16.16,38.69,34.89,10.26,100.0
7,17.12,42.49,34.7,5.69,100.0
8,14.04,31.69,45.05,9.22,100.0
9,21.62,12.35,57.77,8.27,100.01


In [370]:
df_acidentes_aero = pd.concat([df_acidentes_aero, dfproba], axis=1)

In [371]:
df_acidentes_aero.head(5)

Unnamed: 0,ocorrencia_cidade,ocorrencia_uf,ocorrencia_aerodromo,aeronave_modelo,aeronave_ano_fabricacao,aeronave_voo_origem,aeronave_voo_destino,aeronave_fase_operacao,aeronave_fatalidades_total,ocorrencia_tipo,taxonomia_tipo_icao,fator_nome,fator_aspecto,fator_condicionante,fator_area,ocorrencia_classificacao,ocorrencia_saida_pista,aeronave_tipo_veiculo,aeronave_motor_tipo,aeronave_motor_quantidade,aeronave_tipo_operacao,aeronave_nivel_dano,periodo,12:00 às 18:00,18:00 às 00:00,00:00 às 06:00,06:00 às 12:00,soma_perc
0,***,PR,SBLO,EMB-810D,***,FORA DE AERODROMO,FORA DE AERODROMO,***,0,FALHA OU MAU FUNCIONAMENTO DE SISTEMA / COMPON...,SCF-NP,***,***,***,***,INCIDENTE,NÃO,AVIÃO,PISTÃO,BIMOTOR,PRIVADA,***,00:00 às 06:00,14.08,32.68,36.46,16.78,100.0
1,***,PR,***,***,***,FORA DE AERODROMO,FORA DE AERODROMO,INDETERMINADA,0,FALHA OU MAU FUNCIONAMENTO DE SISTEMA / COMPON...,SCF-NP,***,***,***,***,INCIDENTE,NÃO,AVIÃO,PISTÃO,MONOMOTOR,PRIVADA,NENHUM,18:00 às 00:00,19.75,18.66,43.11,18.48,100.0
2,***,PE,SBRF,***,0.0,FORA DE AERODROMO,FORA DE AERODROMO,INDETERMINADA,0,***,SCF-PP,***,***,***,***,INCIDENTE,NÃO,AVIÃO,JATO,BIMOTOR,***,***,18:00 às 00:00,17.99,25.93,48.47,7.61,100.0
3,***,PR,SBCT,AB-115,***,FORA DE AERODROMO,FORA DE AERODROMO,***,0,ESTOURO DE PNEU,SCF-NP,***,***,***,***,INCIDENTE,NÃO,AVIÃO,PISTÃO,MONOMOTOR,INSTRUÇÃO,***,00:00 às 06:00,16.94,35.0,38.58,9.48,100.0
4,***,PR,***,***,***,FORA DE AERODROMO,FORA DE AERODROMO,TÁXI,0,ESTOURO DE PNEU,SCF-NP,***,***,***,***,INCIDENTE,NÃO,AVIÃO,PISTÃO,BIMOTOR,TÁXI AÉREO,NENHUM,18:00 às 00:00,35.32,23.96,15.95,24.76,99.99


## Faz o Deploy do Modelo

In [372]:
# faz o deploy do modelo
from joblib import dump

dump(clf, 'modelos/LR_cenipa_acidentes_aero.pkl')

['modelos/LR_cenipa_acidentes_aero.pkl']