## Regressão logística

### Bibliotecas e bases de dados

In [30]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
warnings.filterwarnings('ignore')

In [8]:
pd.set_option('display.max_rows', None)              
pd.set_option('display.max_columns', None)          
pd.set_option('display.max_colwidth', 50)           
pd.set_option('display.width', 1000)                 
pd.set_option('display.float_format', '{:.2f}'.format)  
pd.set_option('display.colheader_justify', 'left')  
pd.set_option('display.float_format', '{:,.2f}'.format)
def estilo_minimalista(df):
    return df.style.set_properties(**{
        'font-size': '8px',    
        'border-color': 'black', 
        'border-width': '0.4px',   
        'border-style': 'solid',
        'padding': '1px',        
    })
sns.set_theme(style="white", palette="deep", context="talk",font_scale=0.8)
plt.rcParams['figure.figsize'] = (12, 6)

In [12]:
df_eventos_preprocessado = pd.read_csv(
    "https://raw.githubusercontent.com/brunagmoura/PrevisorReconhecimento/refs/heads/main/df_eventos_preprocessado.csv",
    sep=';',
    decimal=',')

df_eventos_preprocessado.head(5)

Unnamed: 0,Status,DH_MORTOS,DH_FERIDOS,DH_ENFERMOS,DH_DESABRIGADOS,DH_DESALOJADOS,DH_DESAPARECIDOS,DH_OUTROS AFETADOS,DH_total_danos_humanos,DM_Uni Habita Danificadas,DM_Uni Habita Destruidas,DM_Uni Habita Valor,DM_Inst Saúde Danificadas,DM_Inst Saúde Destruidas,DM_Inst Saúde Valor,DM_Inst Ensino Danificadas,DM_Inst Ensino Destruidas,DM_Inst Ensino Valor,DM_Inst Serviços Danificadas,DM_Inst Serviços Destruidas,DM_Inst Serviços Valor,DM_Inst Comuni Danificadas,DM_Inst Comuni Destruidas,DM_Inst Comuni Valor,DM_Obras de Infra Danificadas,DM_Obras de Infra Destruidas,DM_Obras de Infra Valor,DM_total_danos_materiais,PEPL_Assis_méd e emergên(R$),PEPL_Abast de água pot(R$),PEPL_sist de esgotos sanit(R$),PEPL_Sis limp e rec lixo (R$),PEPL_Sis cont pragas (R$),PEPL_distrib energia (R$),PEPL_Telecomunicações (R$),PEPL_Tran loc/reg/l_curso (R$),PEPL_Distrib combustíveis(R$),PEPL_Segurança pública (R$),PEPL_Ensino (R$),PEPL_total_publico,PEPR_Agricultura (R$),PEPR_Pecuária (R$),PEPR_Indústria (R$),PEPR_Comércio (R$),PEPR_Serviços (R$),PEPR_total_privado,PE_PLePR,DensidadePop,Hab,Area,Subgrupo_Alagamentos,Subgrupo_Colapso de edificações,Subgrupo_Desastres relacionados à contaminação da água,Subgrupo_Enxurradas,Subgrupo_Epidemias,Subgrupo_Erosão,Subgrupo_Incêndios urbanos,Subgrupo_Infestações/Pragas,Subgrupo_Inundações,Subgrupo_Movimento de massa,Subgrupo_Rompimento/colapso de barragens,Subgrupo_Seca,Subgrupo_Sistemas de Grande Escala/Escala Regional,Subgrupo_Temperaturas Extremas,Subgrupo_Tempestades,Subgrupo_Terremoto,Subgrupo_Transporte aquaviário,Subgrupo_Transporte rodoviário,Subgrupo_nan,Categoria_Natural,Categoria_Tecnológico,Grupo_Biológico,Grupo_Climatológico,Grupo_Desastres Relacionados a Incêndios Urbanos,Grupo_Desastres Relacionados a Produtos Perigosos,Grupo_Desastres relacionados a obras civis,Grupo_Desastres relacionados a transporte de passageiros e cargas não perigosas,Grupo_Geológico,Grupo_Hidrológico,Grupo_Meteorológico
0,0,0,0,0,0,0,0,1500,1500,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,10.72,5358.0,500.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,0,0,0,0,0,0,0,3200,3200,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0,0,0.0,0,0,0.0,0.0,0.0,6271293.52,6271293.52,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2332356.82,0.0,0.0,0.0,2332356.82,326156.77,1630783.87,0.0,0.0,0.0,1956940.64,4289297.46,8.28,24114.0,2913.18,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,1,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0,0,0.0,0,0,54435.57,30.0,5.0,1041889.69,1096325.26,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,11.54,1696.0,147.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0.0,0.0,0,0.0,0,0,0.0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1740983.83,0.0,0.0,0.0,1740983.83,9718266.89,0.0,0.0,0.0,0.0,9718266.89,11459250.72,19.46,4781.0,245.63,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,0,0,5,2,150,25,0,0,182,7,30,3315927.2,1.0,0,1087189.25,1.0,0,90599.1,0,0,0.0,0,0,0.0,8.0,0.0,7791522.93,12285238.47,90599.1,45299.55,3823282.18,217437.85,0.0,0.0,0.0,4167558.77,0.0,0.0,90599.1,8434776.56,905991.04,54359.46,0.0,0.0,108718.92,1069069.42,9503845.99,52.91,15332.0,289.78,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


### Regressão logística

#### Todas as variáveis - sem crossvalidation

In [44]:
X_completa = df_eventos_preprocessado.drop(columns=['Status'])
y_completa = df_eventos_preprocessado['Status']

reglog_completa_semcross = LogisticRegression(solver='lbfgs')

X_train_completa_semcross, X_test_completa_semcross, y_train_completa_semcross, y_test_completa_semcross = train_test_split(X_completa, y_completa, test_size=0.2, random_state=1)

reglog_completa_semcross.fit(X_train_completa_semcross,y_train_completa_semcross)

reglog_completa_semcross.score(X_test_completa_semcross,y_test_completa_semcross)

#### Todas as variáveis - com crossvalidation (3 k-folds)

In [42]:
X_completa = df_eventos_preprocessado.drop(columns=['Status'])
y_completa = df_eventos_preprocessado['Status']

reglog_completa_cross_3 = LogisticRegression(solver='lbfgs')

X_train_completa_cross_3, X_test_completa_cross_3, y_train_completa_cross_3, y_test_completa_cross_3 = train_test_split(X_completa, y_completa, test_size=0.2, random_state=1)

cv_result_3 = cross_val_score(reglog_completa_cross_3, X_train_completa_cross_3, y_train_completa_cross_3, cv=3, scoring='f1_weighted')

reglog_completa_cross_3.fit(X_train_completa_cross_3, y_train_completa_cross_3)

print(reglog_completa_cross_3.score(X_test_completa_cross_3, y_test_completa_cross_3))