# Predição de mortalidade em internações por pneumonias bacterianas sensíveis à Atenção Primária no Brasil, 2017-2021

## Análise preditiva

### 1. Importando pacotes de análise

In [36]:
# IMPORTANDO PACOTES 
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
import pandas as pd  
import chart_studio.plotly as py
import plotly.graph_objs as gp 
import seaborn as sns
import statsmodels.formula.api as smf

from sklearn.model_selection import train_test_split

### 2. Importando dados

In [31]:
#IMPOTANDO DADOS
sih_fn = pd.read_csv("sih_pneumonia_1721_trat.csv", index_col= 'N_AIH')
sih_fn.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 658719 entries, 1217100049066 to 1721101761060
Data columns (total 9 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   IDADE         658719 non-null  float64
 1   SEXO          658719 non-null  int64  
 2   MORTE         658719 non-null  int64  
 3   DIAS_PERM     658719 non-null  int64  
 4   DIAG_PRINC    658719 non-null  int64  
 5   MARCA_UTI     658719 non-null  int64  
 6   UTI_MES_TO    658719 non-null  int64  
 7   FAIXA_ETARIA  658719 non-null  int64  
 8   REGIAO        658719 non-null  int64  
dtypes: float64(1), int64(8)
memory usage: 50.3 MB


In [34]:
#TRANSFORMANDO DADOS
sih_fn["DIAS_PERM"] = sih_fn["DIAS_PERM"].astype(float)
sih_fn.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 658719 entries, 1217100049066 to 1721101761060
Data columns (total 9 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   IDADE         658719 non-null  float64
 1   SEXO          658719 non-null  int64  
 2   MORTE         658719 non-null  int64  
 3   DIAS_PERM     658719 non-null  float64
 4   DIAG_PRINC    658719 non-null  int64  
 5   MARCA_UTI     658719 non-null  int64  
 6   UTI_MES_TO    658719 non-null  int64  
 7   FAIXA_ETARIA  658719 non-null  int64  
 8   REGIAO        658719 non-null  int64  
dtypes: float64(2), int64(7)
memory usage: 50.3 MB


### 3. Definindo teste e treino

In [32]:
#SEPARANDO OS DADOS
train, test = train_test_split(sih_fn, train_size=0.70, random_state=42)

### 4. Testando algoritmos de classificação

#### 4.1 Regressão Logística

In [46]:
log_reg1 = smf.logit("MORTE ~ IDADE + SEXO", data = train).fit()
print(log_reg1.summary())

Optimization terminated successfully.
         Current function value: 0.267568
         Iterations 8
                           Logit Regression Results                           
Dep. Variable:                  MORTE   No. Observations:               461103
Model:                          Logit   Df Residuals:                   461100
Method:                           MLE   Df Model:                            2
Date:                Sun, 18 Sep 2022   Pseudo R-squ.:                  0.1464
Time:                        10:27:54   Log-Likelihood:            -1.2338e+05
converged:                       True   LL-Null:                   -1.4453e+05
Covariance Type:            nonrobust   LLR p-value:                     0.000
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -4.6370      0.019   -242.881      0.000      -4.674      -4.600
IDADE          0.0394      0.

In [45]:
log_reg2 = smf.logit("MORTE ~ IDADE + DIAS_PERM", data = train).fit()
print(log_reg2.summary())

Optimization terminated successfully.
         Current function value: 0.267729
         Iterations 8
                           Logit Regression Results                           
Dep. Variable:                  MORTE   No. Observations:               461103
Model:                          Logit   Df Residuals:                   461100
Method:                           MLE   Df Model:                            2
Date:                Sun, 18 Sep 2022   Pseudo R-squ.:                  0.1458
Time:                        10:27:51   Log-Likelihood:            -1.2345e+05
converged:                       True   LL-Null:                   -1.4453e+05
Covariance Type:            nonrobust   LLR p-value:                     0.000
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -4.5369      0.019   -241.317      0.000      -4.574      -4.500
IDADE          0.0392      0.

In [47]:
log_reg3 = smf.logit("MORTE ~ IDADE + DIAS_PERM + DIAG_PRINC", data = train).fit()
print(log_reg3.summary())

Optimization terminated successfully.
         Current function value: 0.267659
         Iterations 8
                           Logit Regression Results                           
Dep. Variable:                  MORTE   No. Observations:               461103
Model:                          Logit   Df Residuals:                   461099
Method:                           MLE   Df Model:                            3
Date:                Sun, 18 Sep 2022   Pseudo R-squ.:                  0.1461
Time:                        10:28:49   Log-Likelihood:            -1.2342e+05
converged:                       True   LL-Null:                   -1.4453e+05
Covariance Type:            nonrobust   LLR p-value:                     0.000
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -4.5861      0.020   -231.185      0.000      -4.625      -4.547
IDADE          0.0391      0.