In [1]:
# Estudo de Caso - Prevendo os Efeitos do Consumo de Álcool em Doenças do Fígado

# Leia o manual em pdf no Capítulo 13 do curso com a especificação do estudo de caso.

# Obs: Caso tenha problemas com a acentuação, consulte este link:
# https://support.rstudio.com/hc/en-us/articles/200532197-Character-Encoding

# Definindo o diretório de trabalho
getwd()

In [2]:
# Pacotes
library(dplyr)
library(caret)
library(neuralnet)

"package 'dplyr' was built under R version 3.6.3"
Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

"package 'caret' was built under R version 3.6.3"Loading required package: lattice
Loading required package: ggplot2
Registered S3 methods overwritten by 'ggplot2':
  method         from 
  [.quosures     rlang
  c.quosures     rlang
  print.quosures rlang


ERROR: Error in library(neuralnet): there is no package called 'neuralnet'


In [7]:
##### Carregando os dados ##### 
df_pacientes <- read.csv("dados/dataset.csv")
head(df_pacientes)

idade,sexo,tot_bilirubin,direct_bilirubin,tot_proteins,albumina,ag_ratio,sgpt,sgot,alkphos,classe
65,Mulher,0.7,0.1,187,16,18,6.8,3.3,0.9,1
62,Homem,10.9,5.5,699,64,100,7.5,3.2,0.74,1
62,Homem,7.3,4.1,490,60,68,7.0,3.3,0.89,1
58,Homem,1.0,0.4,182,14,20,6.8,3.4,1.0,1
72,Homem,3.9,2.0,195,27,59,7.3,2.4,0.4,1
46,Homem,1.8,0.7,208,19,14,7.6,4.4,1.3,1


In [8]:
##### Análise Explorória ##### 

# Vamos verificar os tipos de dados
str(df_pacientes)

'data.frame':	583 obs. of  11 variables:
 $ idade           : int  65 62 62 58 72 46 26 29 17 55 ...
 $ sexo            : Factor w/ 2 levels "Homem","Mulher": 2 1 1 1 1 1 2 2 1 1 ...
 $ tot_bilirubin   : num  0.7 10.9 7.3 1 3.9 1.8 0.9 0.9 0.9 0.7 ...
 $ direct_bilirubin: num  0.1 5.5 4.1 0.4 2 0.7 0.2 0.3 0.3 0.2 ...
 $ tot_proteins    : int  187 699 490 182 195 208 154 202 202 290 ...
 $ albumina        : int  16 64 60 14 27 19 16 14 22 53 ...
 $ ag_ratio        : int  18 100 68 20 59 14 12 11 19 58 ...
 $ sgpt            : num  6.8 7.5 7 6.8 7.3 7.6 7 6.7 7.4 6.8 ...
 $ sgot            : num  3.3 3.2 3.3 3.4 2.4 4.4 3.5 3.6 4.1 3.4 ...
 $ alkphos         : num  0.9 0.74 0.89 1 0.4 1.3 1 1.1 1.2 1 ...
 $ classe          : int  1 1 1 1 1 1 1 1 2 1 ...


In [10]:
# Vamos checar se temos valores missing
sum(is.na(df_pacientes))

In [11]:
# Resumo estatístico
summary(df_pacientes)

     idade           sexo     tot_bilirubin    direct_bilirubin
 Min.   : 4.00   Homem :441   Min.   : 0.400   Min.   : 0.100  
 1st Qu.:33.00   Mulher:142   1st Qu.: 0.800   1st Qu.: 0.200  
 Median :45.00                Median : 1.000   Median : 0.300  
 Mean   :44.75                Mean   : 3.299   Mean   : 1.486  
 3rd Qu.:58.00                3rd Qu.: 2.600   3rd Qu.: 1.300  
 Max.   :90.00                Max.   :75.000   Max.   :19.700  
                                                               
  tot_proteins       albumina          ag_ratio           sgpt      
 Min.   :  63.0   Min.   :  10.00   Min.   :  10.0   Min.   :2.700  
 1st Qu.: 175.5   1st Qu.:  23.00   1st Qu.:  25.0   1st Qu.:5.800  
 Median : 208.0   Median :  35.00   Median :  42.0   Median :6.600  
 Mean   : 290.6   Mean   :  80.71   Mean   : 109.9   Mean   :6.483  
 3rd Qu.: 298.0   3rd Qu.:  60.50   3rd Qu.:  87.0   3rd Qu.:7.200  
 Max.   :2110.0   Max.   :2000.00   Max.   :4929.0   Max.   :9.600  
     

In [13]:
# Proporção da classe
# Em nosso dataset, a coluna target "classe" representa:
# 1 significa a presença de doença do fígado
# 2 significa que nenhuma doença do fígado foi identificada
table(df_pacientes$classe)


  1   2 
416 167 

In [14]:
prop.table(table(df_pacientes$classe))


        1         2 
0.7135506 0.2864494 

In [16]:
# As classes estão desbalanceadas, com mais registros da classe positiva (tem doença).
# Cuidaremos disso daqui a pouco

##### Pré-Processamento e Engenharia de Atributos ##### 

# Vamos criar outra coluna chamada "doente" e preencher com os valores sim/nao
# Esse passo não é obrigatório, sendo apenas didático
# Usaremos essa nova coluna como coluna target
df_pacientes["doente"] <- ifelse(df_pacientes$classe == 2, "nao", "sim")
head(df_pacientes)

idade,sexo,tot_bilirubin,direct_bilirubin,tot_proteins,albumina,ag_ratio,sgpt,sgot,alkphos,classe,doente
65,Mulher,0.7,0.1,187,16,18,6.8,3.3,0.9,1,sim
62,Homem,10.9,5.5,699,64,100,7.5,3.2,0.74,1,sim
62,Homem,7.3,4.1,490,60,68,7.0,3.3,0.89,1,sim
58,Homem,1.0,0.4,182,14,20,6.8,3.4,1.0,1,sim
72,Homem,3.9,2.0,195,27,59,7.3,2.4,0.4,1,sim
46,Homem,1.8,0.7,208,19,14,7.6,4.4,1.3,1,sim


In [17]:
str(df_pacientes)

'data.frame':	583 obs. of  12 variables:
 $ idade           : int  65 62 62 58 72 46 26 29 17 55 ...
 $ sexo            : Factor w/ 2 levels "Homem","Mulher": 2 1 1 1 1 1 2 2 1 1 ...
 $ tot_bilirubin   : num  0.7 10.9 7.3 1 3.9 1.8 0.9 0.9 0.9 0.7 ...
 $ direct_bilirubin: num  0.1 5.5 4.1 0.4 2 0.7 0.2 0.3 0.3 0.2 ...
 $ tot_proteins    : int  187 699 490 182 195 208 154 202 202 290 ...
 $ albumina        : int  16 64 60 14 27 19 16 14 22 53 ...
 $ ag_ratio        : int  18 100 68 20 59 14 12 11 19 58 ...
 $ sgpt            : num  6.8 7.5 7 6.8 7.3 7.6 7 6.7 7.4 6.8 ...
 $ sgot            : num  3.3 3.2 3.3 3.4 2.4 4.4 3.5 3.6 4.1 3.4 ...
 $ alkphos         : num  0.9 0.74 0.89 1 0.4 1.3 1 1.1 1.2 1 ...
 $ classe          : int  1 1 1 1 1 1 1 1 2 1 ...
 $ doente          : chr  "sim" "sim" "sim" "sim" ...


In [18]:
# Transformando a coluna target em fator
df_pacientes["doente"] <- factor(df_pacientes$doente)
str(df_pacientes)

'data.frame':	583 obs. of  12 variables:
 $ idade           : int  65 62 62 58 72 46 26 29 17 55 ...
 $ sexo            : Factor w/ 2 levels "Homem","Mulher": 2 1 1 1 1 1 2 2 1 1 ...
 $ tot_bilirubin   : num  0.7 10.9 7.3 1 3.9 1.8 0.9 0.9 0.9 0.7 ...
 $ direct_bilirubin: num  0.1 5.5 4.1 0.4 2 0.7 0.2 0.3 0.3 0.2 ...
 $ tot_proteins    : int  187 699 490 182 195 208 154 202 202 290 ...
 $ albumina        : int  16 64 60 14 27 19 16 14 22 53 ...
 $ ag_ratio        : int  18 100 68 20 59 14 12 11 19 58 ...
 $ sgpt            : num  6.8 7.5 7 6.8 7.3 7.6 7 6.7 7.4 6.8 ...
 $ sgot            : num  3.3 3.2 3.3 3.4 2.4 4.4 3.5 3.6 4.1 3.4 ...
 $ alkphos         : num  0.9 0.74 0.89 1 0.4 1.3 1 1.1 1.2 1 ...
 $ classe          : int  1 1 1 1 1 1 1 1 2 1 ...
 $ doente          : Factor w/ 2 levels "nao","sim": 2 2 2 2 2 2 2 2 1 2 ...


In [19]:
# A coluna alkphos possui 4 valores nulos. Vamos fazer imputação substituindo pela mediana.
alkphos_mediana <- median(df_pacientes$alkphos, na.rm = T)
df_pacientes$alkphos[is.na(df_pacientes$alkphos)] <- alkphos_mediana
sum(is.na(df_pacientes))

In [20]:
# Vamos criar variáveis dummy para o sexo do paciente
df_pacientes["Mulher"] <- ifelse(df_pacientes$sexo == "Mulher",1,0)
df_pacientes["Homem"] <- ifelse(df_pacientes$sexo == "Homem",1,0)
head(df_pacientes)
str(df_pacientes)

idade,sexo,tot_bilirubin,direct_bilirubin,tot_proteins,albumina,ag_ratio,sgpt,sgot,alkphos,classe,doente,Mulher,Homem
65,Mulher,0.7,0.1,187,16,18,6.8,3.3,0.9,1,sim,1,0
62,Homem,10.9,5.5,699,64,100,7.5,3.2,0.74,1,sim,0,1
62,Homem,7.3,4.1,490,60,68,7.0,3.3,0.89,1,sim,0,1
58,Homem,1.0,0.4,182,14,20,6.8,3.4,1.0,1,sim,0,1
72,Homem,3.9,2.0,195,27,59,7.3,2.4,0.4,1,sim,0,1
46,Homem,1.8,0.7,208,19,14,7.6,4.4,1.3,1,sim,0,1


'data.frame':	583 obs. of  14 variables:
 $ idade           : int  65 62 62 58 72 46 26 29 17 55 ...
 $ sexo            : Factor w/ 2 levels "Homem","Mulher": 2 1 1 1 1 1 2 2 1 1 ...
 $ tot_bilirubin   : num  0.7 10.9 7.3 1 3.9 1.8 0.9 0.9 0.9 0.7 ...
 $ direct_bilirubin: num  0.1 5.5 4.1 0.4 2 0.7 0.2 0.3 0.3 0.2 ...
 $ tot_proteins    : int  187 699 490 182 195 208 154 202 202 290 ...
 $ albumina        : int  16 64 60 14 27 19 16 14 22 53 ...
 $ ag_ratio        : int  18 100 68 20 59 14 12 11 19 58 ...
 $ sgpt            : num  6.8 7.5 7 6.8 7.3 7.6 7 6.7 7.4 6.8 ...
 $ sgot            : num  3.3 3.2 3.3 3.4 2.4 4.4 3.5 3.6 4.1 3.4 ...
 $ alkphos         : num  0.9 0.74 0.89 1 0.4 1.3 1 1.1 1.2 1 ...
 $ classe          : int  1 1 1 1 1 1 1 1 2 1 ...
 $ doente          : Factor w/ 2 levels "nao","sim": 2 2 2 2 2 2 2 2 1 2 ...
 $ Mulher          : num  1 0 0 0 0 0 1 1 0 0 ...
 $ Homem           : num  0 1 1 1 1 1 0 0 1 1 ...


In [21]:
# Divisão dos dados em treino e teste com proporção 70/30
df_treino <- df_pacientes[1:as.integer(0.70 * nrow(df_pacientes)),]
df_teste <- df_pacientes[-c(1:as.integer(0.70 * nrow(df_pacientes))),]

In [22]:
# Verificamos os dados
head(df_treino)
head(df_teste)

idade,sexo,tot_bilirubin,direct_bilirubin,tot_proteins,albumina,ag_ratio,sgpt,sgot,alkphos,classe,doente,Mulher,Homem
65,Mulher,0.7,0.1,187,16,18,6.8,3.3,0.9,1,sim,1,0
62,Homem,10.9,5.5,699,64,100,7.5,3.2,0.74,1,sim,0,1
62,Homem,7.3,4.1,490,60,68,7.0,3.3,0.89,1,sim,0,1
58,Homem,1.0,0.4,182,14,20,6.8,3.4,1.0,1,sim,0,1
72,Homem,3.9,2.0,195,27,59,7.3,2.4,0.4,1,sim,0,1
46,Homem,1.8,0.7,208,19,14,7.6,4.4,1.3,1,sim,0,1


Unnamed: 0,idade,sexo,tot_bilirubin,direct_bilirubin,tot_proteins,albumina,ag_ratio,sgpt,sgot,alkphos,classe,doente,Mulher,Homem
409,48,Homem,2.4,1.1,554,141,73,7.5,3.6,0.9,1,sim,0,1
410,48,Homem,5.0,2.6,555,284,190,6.5,3.3,1.0,1,sim,0,1
411,18,Homem,1.4,0.6,215,440,850,5.0,1.9,0.6,1,sim,0,1
412,23,Mulher,2.3,0.8,509,28,44,6.9,2.9,0.7,2,nao,1,0
413,65,Homem,4.9,2.7,190,33,71,7.1,2.9,0.7,1,sim,0,1
414,48,Homem,0.7,0.2,208,15,30,4.6,2.1,0.8,2,nao,0,1


In [23]:
# Verificamos os tipos de dados
str(df_treino)
str(df_teste)

'data.frame':	408 obs. of  14 variables:
 $ idade           : int  65 62 62 58 72 46 26 29 17 55 ...
 $ sexo            : Factor w/ 2 levels "Homem","Mulher": 2 1 1 1 1 1 2 2 1 1 ...
 $ tot_bilirubin   : num  0.7 10.9 7.3 1 3.9 1.8 0.9 0.9 0.9 0.7 ...
 $ direct_bilirubin: num  0.1 5.5 4.1 0.4 2 0.7 0.2 0.3 0.3 0.2 ...
 $ tot_proteins    : int  187 699 490 182 195 208 154 202 202 290 ...
 $ albumina        : int  16 64 60 14 27 19 16 14 22 53 ...
 $ ag_ratio        : int  18 100 68 20 59 14 12 11 19 58 ...
 $ sgpt            : num  6.8 7.5 7 6.8 7.3 7.6 7 6.7 7.4 6.8 ...
 $ sgot            : num  3.3 3.2 3.3 3.4 2.4 4.4 3.5 3.6 4.1 3.4 ...
 $ alkphos         : num  0.9 0.74 0.89 1 0.4 1.3 1 1.1 1.2 1 ...
 $ classe          : int  1 1 1 1 1 1 1 1 2 1 ...
 $ doente          : Factor w/ 2 levels "nao","sim": 2 2 2 2 2 2 2 2 1 2 ...
 $ Mulher          : num  1 0 0 0 0 0 1 1 0 0 ...
 $ Homem           : num  0 1 1 1 1 1 0 0 1 1 ...
'data.frame':	175 obs. of  14 variables:
 $ idade           

In [25]:
# Vimos na análise exploratória que as classes estão desbalanceadas.
# Vamos aplicar o upsampling e criar amostras para a classe negativa
# Fazemos isso apenas para dados de treino
?upSample
df_treino <- upSample(x = df_treino, df_treino$doente)
prop.table(table(df_treino$doente))
str(df_treino)


nao sim 
0.5 0.5 

'data.frame':	582 obs. of  16 variables:
 $ idade           : int  17 64 25 33 63 20 84 57 38 38 ...
 $ sexo            : Factor w/ 2 levels "Homem","Mulher": 1 1 1 1 1 1 2 1 2 2 ...
 $ tot_bilirubin   : num  0.9 0.9 0.6 1.6 0.9 1.1 0.7 1 2.6 2.6 ...
 $ direct_bilirubin: num  0.3 0.3 0.1 0.5 0.2 0.5 0.2 0.3 1.2 1.2 ...
 $ tot_proteins    : int  202 310 183 165 194 128 188 187 410 410 ...
 $ albumina        : int  22 61 91 15 52 20 13 19 59 59 ...
 $ ag_ratio        : int  19 58 53 23 45 30 21 23 57 57 ...
 $ sgpt            : num  7.4 7 5.5 7.3 6 3.9 6 5.2 5.6 5.6 ...
 $ sgot            : num  4.1 3.4 2.3 3.5 3.9 1.9 3.2 2.9 3 3 ...
 $ alkphos         : num  1.2 0.9 0.7 0.92 1.85 0.95 1.1 1.2 0.8 0.8 ...
 $ classe          : int  2 2 2 2 2 2 2 2 2 2 ...
 $ doente          : Factor w/ 2 levels "nao","sim": 1 1 1 1 1 1 1 1 1 1 ...
 $ Mulher          : num  0 0 0 0 0 0 1 0 1 1 ...
 $ Homem           : num  1 1 1 1 1 1 0 1 0 0 ...
 $ Class           : Factor w/ 2 levels "nao","sim": 1 1 1 

In [26]:
# Função para padronização das variáveis quantitativas
func_normaliza <- function(x){
  return ((x - min(x)) / (max(x) - min(x)))
}

In [27]:
# Aplica a função
# Observe que estamos excluindo da padronização as colunas de índice 2 e de 11 a 15
# Por que? Porque são colunas categóricas, onde a padronização não é necessária
str(df_treino)
df_treino_norm <- as.data.frame(lapply(df_treino[,-c(2,11:15)], FUN = func_normaliza))
df_teste_norm <- as.data.frame(lapply(df_teste[,-c(2,11:15)], FUN = func_normaliza))

'data.frame':	582 obs. of  16 variables:
 $ idade           : int  17 64 25 33 63 20 84 57 38 38 ...
 $ sexo            : Factor w/ 2 levels "Homem","Mulher": 1 1 1 1 1 1 2 1 2 2 ...
 $ tot_bilirubin   : num  0.9 0.9 0.6 1.6 0.9 1.1 0.7 1 2.6 2.6 ...
 $ direct_bilirubin: num  0.3 0.3 0.1 0.5 0.2 0.5 0.2 0.3 1.2 1.2 ...
 $ tot_proteins    : int  202 310 183 165 194 128 188 187 410 410 ...
 $ albumina        : int  22 61 91 15 52 20 13 19 59 59 ...
 $ ag_ratio        : int  19 58 53 23 45 30 21 23 57 57 ...
 $ sgpt            : num  7.4 7 5.5 7.3 6 3.9 6 5.2 5.6 5.6 ...
 $ sgot            : num  4.1 3.4 2.3 3.5 3.9 1.9 3.2 2.9 3 3 ...
 $ alkphos         : num  1.2 0.9 0.7 0.92 1.85 0.95 1.1 1.2 0.8 0.8 ...
 $ classe          : int  2 2 2 2 2 2 2 2 2 2 ...
 $ doente          : Factor w/ 2 levels "nao","sim": 1 1 1 1 1 1 1 1 1 1 ...
 $ Mulher          : num  0 0 0 0 0 0 1 0 1 1 ...
 $ Homem           : num  1 1 1 1 1 1 0 1 0 0 ...
 $ Class           : Factor w/ 2 levels "nao","sim": 1 1 1 

ERROR: Error in Summary.factor(structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, : 'min' not meaningful for factors


In [6]:
# Agora criamos o dataframe final retornando as colunas que não precisam de padronização
# e que usaremos para modelagem preditiva
df_treino_final <- data.frame(df_treino_norm, df_treino[,c(13,14,12)]) 
df_teste_final <- data.frame(df_teste_norm, df_teste[,c(13,14,12)]) 

# Visualiza os dados
head(df_treino_final)
head(df_teste_final)

# Verifica os tipos de dados e toal de variáveis
str(df_treino_final)
str(df_teste_final)

##### Modelagem Preditiva ##### 

# Construindo o modelo
formula_nn <- paste("doente", paste(colnames(df_treino_final[-12]), collapse = "+"), sep = "~")
modelo_figado <- neuralnet(formula_nn, data = df_treino_final)

# Resumo do modelo
str(modelo_figado)
plot(modelo_figado)

# Previsões com o modelo treinado
set.seed(7)
previsoes_figado <- predict(modelo_figado, df_teste_final[1:11]) 
head(previsoes_figado)

# O resultado das previsões é em probabilidade. Vamos ajustar a saída.
previsoes_figado_final <- ifelse(previsoes_figado[,1] > previsoes_figado[,2], "nao", "sim")
head(previsoes_figado_final)

# Acurácia do modelo
mean(previsoes_figado_final == df_teste_final$doente) 

# Previsão com novos dados
# Vejamos de os 5 novos pacientes podem desenvolver doença do fígado
# com base nos resultados dos exames de sangue

##### Carregando os dados ##### 
df_novos_pacientes <- read.csv("dados/novos_pacientes.csv")
head(df_novos_pacientes)
str(df_novos_pacientes)

# Padronizamos os novos dados
df_novos_pacientes_norm <- as.data.frame(lapply(df_novos_pacientes, FUN = func_normaliza))
head(df_novos_pacientes_norm)

# Previsões
previsoes_novos_pacientes <- predict(modelo_figado, df_novos_pacientes_norm) 
previsoes_novos_pacientes_final <- ifelse(previsoes_novos_pacientes[,1] > previsoes_novos_pacientes[,2], "nao", "sim")
head(previsoes_novos_pacientes_final)

# Previsões feitas com sucesso!

idade,sexo,tot_bilirubin,direct_bilirubin,tot_proteins,albumina,ag_ratio,sgpt,sgot,alkphos,classe
65,Mulher,0.7,0.1,187,16,18,6.8,3.3,0.9,1
62,Homem,10.9,5.5,699,64,100,7.5,3.2,0.74,1
62,Homem,7.3,4.1,490,60,68,7.0,3.3,0.89,1
58,Homem,1.0,0.4,182,14,20,6.8,3.4,1.0,1
72,Homem,3.9,2.0,195,27,59,7.3,2.4,0.4,1
46,Homem,1.8,0.7,208,19,14,7.6,4.4,1.3,1


'data.frame':	583 obs. of  11 variables:
 $ idade           : int  65 62 62 58 72 46 26 29 17 55 ...
 $ sexo            : Factor w/ 2 levels "Homem","Mulher": 2 1 1 1 1 1 2 2 1 1 ...
 $ tot_bilirubin   : num  0.7 10.9 7.3 1 3.9 1.8 0.9 0.9 0.9 0.7 ...
 $ direct_bilirubin: num  0.1 5.5 4.1 0.4 2 0.7 0.2 0.3 0.3 0.2 ...
 $ tot_proteins    : int  187 699 490 182 195 208 154 202 202 290 ...
 $ albumina        : int  16 64 60 14 27 19 16 14 22 53 ...
 $ ag_ratio        : int  18 100 68 20 59 14 12 11 19 58 ...
 $ sgpt            : num  6.8 7.5 7 6.8 7.3 7.6 7 6.7 7.4 6.8 ...
 $ sgot            : num  3.3 3.2 3.3 3.4 2.4 4.4 3.5 3.6 4.1 3.4 ...
 $ alkphos         : num  0.9 0.74 0.89 1 0.4 1.3 1 1.1 1.2 1 ...
 $ classe          : int  1 1 1 1 1 1 1 1 2 1 ...


     idade           sexo     tot_bilirubin    direct_bilirubin
 Min.   : 4.00   Homem :441   Min.   : 0.400   Min.   : 0.100  
 1st Qu.:33.00   Mulher:142   1st Qu.: 0.800   1st Qu.: 0.200  
 Median :45.00                Median : 1.000   Median : 0.300  
 Mean   :44.75                Mean   : 3.299   Mean   : 1.486  
 3rd Qu.:58.00                3rd Qu.: 2.600   3rd Qu.: 1.300  
 Max.   :90.00                Max.   :75.000   Max.   :19.700  
                                                               
  tot_proteins       albumina          ag_ratio           sgpt      
 Min.   :  63.0   Min.   :  10.00   Min.   :  10.0   Min.   :2.700  
 1st Qu.: 175.5   1st Qu.:  23.00   1st Qu.:  25.0   1st Qu.:5.800  
 Median : 208.0   Median :  35.00   Median :  42.0   Median :6.600  
 Mean   : 290.6   Mean   :  80.71   Mean   : 109.9   Mean   :6.483  
 3rd Qu.: 298.0   3rd Qu.:  60.50   3rd Qu.:  87.0   3rd Qu.:7.200  
 Max.   :2110.0   Max.   :2000.00   Max.   :4929.0   Max.   :9.600  
     


  1   2 
416 167 


        1         2 
0.7135506 0.2864494 

idade,sexo,tot_bilirubin,direct_bilirubin,tot_proteins,albumina,ag_ratio,sgpt,sgot,alkphos,classe,doente
65,Mulher,0.7,0.1,187,16,18,6.8,3.3,0.9,1,sim
62,Homem,10.9,5.5,699,64,100,7.5,3.2,0.74,1,sim
62,Homem,7.3,4.1,490,60,68,7.0,3.3,0.89,1,sim
58,Homem,1.0,0.4,182,14,20,6.8,3.4,1.0,1,sim
72,Homem,3.9,2.0,195,27,59,7.3,2.4,0.4,1,sim
46,Homem,1.8,0.7,208,19,14,7.6,4.4,1.3,1,sim


'data.frame':	583 obs. of  12 variables:
 $ idade           : int  65 62 62 58 72 46 26 29 17 55 ...
 $ sexo            : Factor w/ 2 levels "Homem","Mulher": 2 1 1 1 1 1 2 2 1 1 ...
 $ tot_bilirubin   : num  0.7 10.9 7.3 1 3.9 1.8 0.9 0.9 0.9 0.7 ...
 $ direct_bilirubin: num  0.1 5.5 4.1 0.4 2 0.7 0.2 0.3 0.3 0.2 ...
 $ tot_proteins    : int  187 699 490 182 195 208 154 202 202 290 ...
 $ albumina        : int  16 64 60 14 27 19 16 14 22 53 ...
 $ ag_ratio        : int  18 100 68 20 59 14 12 11 19 58 ...
 $ sgpt            : num  6.8 7.5 7 6.8 7.3 7.6 7 6.7 7.4 6.8 ...
 $ sgot            : num  3.3 3.2 3.3 3.4 2.4 4.4 3.5 3.6 4.1 3.4 ...
 $ alkphos         : num  0.9 0.74 0.89 1 0.4 1.3 1 1.1 1.2 1 ...
 $ classe          : int  1 1 1 1 1 1 1 1 2 1 ...
 $ doente          : chr  "sim" "sim" "sim" "sim" ...
'data.frame':	583 obs. of  12 variables:
 $ idade           : int  65 62 62 58 72 46 26 29 17 55 ...
 $ sexo            : Factor w/ 2 levels "Homem","Mulher": 2 1 1 1 1 1 2 2 1 1 ...


idade,sexo,tot_bilirubin,direct_bilirubin,tot_proteins,albumina,ag_ratio,sgpt,sgot,alkphos,classe,doente,Mulher,Homem
65,Mulher,0.7,0.1,187,16,18,6.8,3.3,0.9,1,sim,1,0
62,Homem,10.9,5.5,699,64,100,7.5,3.2,0.74,1,sim,0,1
62,Homem,7.3,4.1,490,60,68,7.0,3.3,0.89,1,sim,0,1
58,Homem,1.0,0.4,182,14,20,6.8,3.4,1.0,1,sim,0,1
72,Homem,3.9,2.0,195,27,59,7.3,2.4,0.4,1,sim,0,1
46,Homem,1.8,0.7,208,19,14,7.6,4.4,1.3,1,sim,0,1


'data.frame':	583 obs. of  14 variables:
 $ idade           : int  65 62 62 58 72 46 26 29 17 55 ...
 $ sexo            : Factor w/ 2 levels "Homem","Mulher": 2 1 1 1 1 1 2 2 1 1 ...
 $ tot_bilirubin   : num  0.7 10.9 7.3 1 3.9 1.8 0.9 0.9 0.9 0.7 ...
 $ direct_bilirubin: num  0.1 5.5 4.1 0.4 2 0.7 0.2 0.3 0.3 0.2 ...
 $ tot_proteins    : int  187 699 490 182 195 208 154 202 202 290 ...
 $ albumina        : int  16 64 60 14 27 19 16 14 22 53 ...
 $ ag_ratio        : int  18 100 68 20 59 14 12 11 19 58 ...
 $ sgpt            : num  6.8 7.5 7 6.8 7.3 7.6 7 6.7 7.4 6.8 ...
 $ sgot            : num  3.3 3.2 3.3 3.4 2.4 4.4 3.5 3.6 4.1 3.4 ...
 $ alkphos         : num  0.9 0.74 0.89 1 0.4 1.3 1 1.1 1.2 1 ...
 $ classe          : int  1 1 1 1 1 1 1 1 2 1 ...
 $ doente          : Factor w/ 2 levels "nao","sim": 2 2 2 2 2 2 2 2 1 2 ...
 $ Mulher          : num  1 0 0 0 0 0 1 1 0 0 ...
 $ Homem           : num  0 1 1 1 1 1 0 0 1 1 ...


idade,sexo,tot_bilirubin,direct_bilirubin,tot_proteins,albumina,ag_ratio,sgpt,sgot,alkphos,classe,doente,Mulher,Homem
65,Mulher,0.7,0.1,187,16,18,6.8,3.3,0.9,1,sim,1,0
62,Homem,10.9,5.5,699,64,100,7.5,3.2,0.74,1,sim,0,1
62,Homem,7.3,4.1,490,60,68,7.0,3.3,0.89,1,sim,0,1
58,Homem,1.0,0.4,182,14,20,6.8,3.4,1.0,1,sim,0,1
72,Homem,3.9,2.0,195,27,59,7.3,2.4,0.4,1,sim,0,1
46,Homem,1.8,0.7,208,19,14,7.6,4.4,1.3,1,sim,0,1


Unnamed: 0,idade,sexo,tot_bilirubin,direct_bilirubin,tot_proteins,albumina,ag_ratio,sgpt,sgot,alkphos,classe,doente,Mulher,Homem
409,48,Homem,2.4,1.1,554,141,73,7.5,3.6,0.9,1,sim,0,1
410,48,Homem,5.0,2.6,555,284,190,6.5,3.3,1.0,1,sim,0,1
411,18,Homem,1.4,0.6,215,440,850,5.0,1.9,0.6,1,sim,0,1
412,23,Mulher,2.3,0.8,509,28,44,6.9,2.9,0.7,2,nao,1,0
413,65,Homem,4.9,2.7,190,33,71,7.1,2.9,0.7,1,sim,0,1
414,48,Homem,0.7,0.2,208,15,30,4.6,2.1,0.8,2,nao,0,1


'data.frame':	408 obs. of  14 variables:
 $ idade           : int  65 62 62 58 72 46 26 29 17 55 ...
 $ sexo            : Factor w/ 2 levels "Homem","Mulher": 2 1 1 1 1 1 2 2 1 1 ...
 $ tot_bilirubin   : num  0.7 10.9 7.3 1 3.9 1.8 0.9 0.9 0.9 0.7 ...
 $ direct_bilirubin: num  0.1 5.5 4.1 0.4 2 0.7 0.2 0.3 0.3 0.2 ...
 $ tot_proteins    : int  187 699 490 182 195 208 154 202 202 290 ...
 $ albumina        : int  16 64 60 14 27 19 16 14 22 53 ...
 $ ag_ratio        : int  18 100 68 20 59 14 12 11 19 58 ...
 $ sgpt            : num  6.8 7.5 7 6.8 7.3 7.6 7 6.7 7.4 6.8 ...
 $ sgot            : num  3.3 3.2 3.3 3.4 2.4 4.4 3.5 3.6 4.1 3.4 ...
 $ alkphos         : num  0.9 0.74 0.89 1 0.4 1.3 1 1.1 1.2 1 ...
 $ classe          : int  1 1 1 1 1 1 1 1 2 1 ...
 $ doente          : Factor w/ 2 levels "nao","sim": 2 2 2 2 2 2 2 2 1 2 ...
 $ Mulher          : num  1 0 0 0 0 0 1 1 0 0 ...
 $ Homem           : num  0 1 1 1 1 1 0 0 1 1 ...
'data.frame':	175 obs. of  14 variables:
 $ idade           


nao sim 
0.5 0.5 

'data.frame':	582 obs. of  15 variables:
 $ idade           : int  17 64 25 33 63 20 84 57 38 38 ...
 $ sexo            : Factor w/ 2 levels "Homem","Mulher": 1 1 1 1 1 1 2 1 2 2 ...
 $ tot_bilirubin   : num  0.9 0.9 0.6 1.6 0.9 1.1 0.7 1 2.6 2.6 ...
 $ direct_bilirubin: num  0.3 0.3 0.1 0.5 0.2 0.5 0.2 0.3 1.2 1.2 ...
 $ tot_proteins    : int  202 310 183 165 194 128 188 187 410 410 ...
 $ albumina        : int  22 61 91 15 52 20 13 19 59 59 ...
 $ ag_ratio        : int  19 58 53 23 45 30 21 23 57 57 ...
 $ sgpt            : num  7.4 7 5.5 7.3 6 3.9 6 5.2 5.6 5.6 ...
 $ sgot            : num  4.1 3.4 2.3 3.5 3.9 1.9 3.2 2.9 3 3 ...
 $ alkphos         : num  1.2 0.9 0.7 0.92 1.85 0.95 1.1 1.2 0.8 0.8 ...
 $ classe          : int  2 2 2 2 2 2 2 2 2 2 ...
 $ doente          : Factor w/ 2 levels "nao","sim": 1 1 1 1 1 1 1 1 1 1 ...
 $ Mulher          : num  0 0 0 0 0 0 1 0 1 1 ...
 $ Homem           : num  1 1 1 1 1 1 0 1 0 0 ...
 $ Class           : Factor w/ 2 levels "nao","sim": 1 1 1 

idade,tot_bilirubin,direct_bilirubin,tot_proteins,albumina,ag_ratio,sgpt,sgot,alkphos,Mulher,Homem,doente
0.1604938,0.005369128,0.014184397,0.06790425,0.006030151,0.00182964,0.6811594,0.6956522,0.5625,0,1,nao
0.7407407,0.005369128,0.014184397,0.12066439,0.025628141,0.009758081,0.6231884,0.5434783,0.375,0,1,nao
0.2592593,0.001342282,0.0,0.05862237,0.040703518,0.008741614,0.4057971,0.3043478,0.25,0,1,nao
0.3580247,0.014765101,0.028368794,0.04982902,0.002512563,0.002642814,0.6666667,0.5652174,0.3875,0,1,nao
0.7283951,0.005369128,0.007092199,0.06399609,0.021105528,0.007115267,0.4782609,0.6521739,0.96875,0,1,nao
0.1975309,0.008053691,0.028368794,0.03175379,0.005025126,0.004065867,0.173913,0.2173913,0.40625,0,1,nao


Unnamed: 0,idade,tot_bilirubin,direct_bilirubin,tot_proteins,albumina,ag_ratio,sgpt,sgot,alkphos,Mulher,Homem,doente
409,0.4939759,0.047169811,0.051020408,0.27951807,0.170351105,0.07279236,0.6964286,0.675,0.24,0,1,sim
410,0.4939759,0.108490566,0.12755102,0.28012048,0.356306892,0.2124105,0.5178571,0.6,0.28,0,1,sim
411,0.1325301,0.023584906,0.025510204,0.0753012,0.55916775,1.0,0.25,0.25,0.12,0,1,sim
412,0.1927711,0.044811321,0.035714286,0.25240964,0.023407022,0.03818616,0.5892857,0.5,0.16,1,0,nao
413,0.6987952,0.106132075,0.132653061,0.06024096,0.029908973,0.07040573,0.625,0.5,0.16,0,1,sim
414,0.4939759,0.007075472,0.005102041,0.07108434,0.006501951,0.02147971,0.1785714,0.3,0.2,0,1,nao


'data.frame':	582 obs. of  12 variables:
 $ idade           : num  0.16 0.741 0.259 0.358 0.728 ...
 $ tot_bilirubin   : num  0.00537 0.00537 0.00134 0.01477 0.00537 ...
 $ direct_bilirubin: num  0.01418 0.01418 0 0.02837 0.00709 ...
 $ tot_proteins    : num  0.0679 0.1207 0.0586 0.0498 0.064 ...
 $ albumina        : num  0.00603 0.02563 0.0407 0.00251 0.02111 ...
 $ ag_ratio        : num  0.00183 0.00976 0.00874 0.00264 0.00712 ...
 $ sgpt            : num  0.681 0.623 0.406 0.667 0.478 ...
 $ sgot            : num  0.696 0.543 0.304 0.565 0.652 ...
 $ alkphos         : num  0.562 0.375 0.25 0.388 0.969 ...
 $ Mulher          : num  0 0 0 0 0 0 1 0 1 1 ...
 $ Homem           : num  1 1 1 1 1 1 0 1 0 0 ...
 $ doente          : Factor w/ 2 levels "nao","sim": 1 1 1 1 1 1 1 1 1 1 ...
'data.frame':	175 obs. of  12 variables:
 $ idade           : num  0.494 0.494 0.133 0.193 0.699 ...
 $ tot_bilirubin   : num  0.0472 0.1085 0.0236 0.0448 0.1061 ...
 $ direct_bilirubin: num  0.051 0.1276 0.

ERROR: Error in neuralnet(formula_nn, data = df_treino_final): não foi possível encontrar a função "neuralnet"
