# REGRESION LOGISTICA
## Predicción de insuficiencia cardíaca 

In [44]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import f1_score
import io
import requests

In [45]:
# df = pd.read_csv('heart_failure.csv')

url = 'https://raw.githubusercontent.com/casasmgb/curso-ml/main/01-section/regression/heart_failure.csv'
data = requests.get(url).content
df = pd.read_csv(io.StringIO(data.decode('utf-8')))

Verificamos que no existan nulos o NaN

In [46]:
print('null value\n',df.isnull().sum(axis=0),'\n')
print('NaN value\n',df.isna().sum(axis=0),'\n')

null value
 edad                       0
anemia                     0
creatinina_fosfoquinasa    0
diabetes                   0
fraccion_eyeccion          0
presion_sanguinea_alta     0
plaquetas                  0
creatinina_serica          0
sodio_serico               0
sexo                       0
tabaquismo                 0
tiempo                     0
EVENTO_MUERTE              0
dtype: int64 

NaN value
 edad                       0
anemia                     0
creatinina_fosfoquinasa    0
diabetes                   0
fraccion_eyeccion          0
presion_sanguinea_alta     0
plaquetas                  0
creatinina_serica          0
sodio_serico               0
sexo                       0
tabaquismo                 0
tiempo                     0
EVENTO_MUERTE              0
dtype: int64 



Correlacion de edad y el evento de muerte

# Separado de datos de entrenamiento y prueba

In [47]:
x = df.drop('EVENTO_MUERTE', axis = 1)
y = df['EVENTO_MUERTE']

scaler = MinMaxScaler()
x_scaled = scaler.fit_transform(x)
x = pd.DataFrame(x_scaled, index = x.index, columns = x.columns)
x

Unnamed: 0,edad,anemia,creatinina_fosfoquinasa,diabetes,fraccion_eyeccion,presion_sanguinea_alta,plaquetas,creatinina_serica,sodio_serico,sexo,tabaquismo,tiempo
0,0.636364,0.0,0.071319,0.0,0.090909,1.0,0.290823,0.157303,0.485714,1.0,0.0,0.000000
1,0.272727,0.0,1.000000,0.0,0.363636,0.0,0.288833,0.067416,0.657143,1.0,0.0,0.007117
2,0.454545,0.0,0.015693,0.0,0.090909,0.0,0.165960,0.089888,0.457143,1.0,1.0,0.010676
3,0.181818,1.0,0.011227,0.0,0.090909,0.0,0.224148,0.157303,0.685714,1.0,0.0,0.010676
4,0.454545,1.0,0.017479,1.0,0.090909,0.0,0.365984,0.247191,0.085714,0.0,0.0,0.014235
...,...,...,...,...,...,...,...,...,...,...,...,...
294,0.400000,0.0,0.004848,1.0,0.363636,1.0,0.157474,0.067416,0.857143,1.0,1.0,0.946619
295,0.272727,0.0,0.229268,0.0,0.363636,0.0,0.296884,0.078652,0.742857,0.0,0.0,0.950178
296,0.090909,0.0,0.259888,1.0,0.696970,0.0,0.869075,0.033708,0.714286,0.0,0.0,0.975089
297,0.090909,0.0,0.304925,0.0,0.363636,0.0,0.139290,0.101124,0.771429,1.0,1.0,0.982206


In [48]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=None)

In [49]:
print('porcentaje de datos para x\n')
print((x_train.shape[0] * 100) / x.shape[0])
print((x_test.shape[0] * 100) / x.shape[0])

print('\nporcentaje de datos para y\n')
print((y_train.shape[0] * 100) / y.shape[0])
print((x_test.shape[0] * 100) / y.shape[0])

porcentaje de datos para x

69.89966555183946
30.100334448160535

porcentaje de datos para y

69.89966555183946
30.100334448160535


## Modelos

El F1-score es el promedio armónico de la precisión y la exhaustividad, donde el F1-score alcanza su mejor valor en 1 y su peor valor en 0

* Matríz de confusión (F1-score)
La matriz de confusión muestra las predicciones correctas y exhaustivas, en comparación con las etiquetas reales.
La matriz de confusión muestra la habilidad del modelo para predecir correctamente

* Precisión se calcula con los datos de prueba y los resultados esperados.

https://www.statdeveloper.com/evaluacion-del-modelo-de-clasificacion/

### Logistica

In [50]:

log_model = LogisticRegression()
log_model.fit(x_train, y_train)
y_pred_log = log_model.predict(x_test)

log_model_acc = log_model.score(x_test, y_test)
log_model_f1 = f1_score(y_test, y_pred_log)

### Support Vector Classification

In [51]:
svc_model = SVC()
svc_model.fit(x_train, y_train)
y_pred_svc = svc_model.predict(x_test)

svc_model_acc = svc_model.score(x_test, y_test)
svc_model_f1 = f1_score(y_test, y_pred_svc)

### Multi-layer Perceptron classifier.

In [52]:
mlp_model = MLPClassifier(hidden_layer_sizes=(128, 128), max_iter=500)
mlp_model.fit(x_train, y_train)
y_pred_mlp = mlp_model.predict(x_test)

mlp_model_acc = mlp_model.score(x_test, y_test)
mlp_model_f1 = f1_score(y_test, y_pred_mlp)

In [53]:

print('\nLogistica\n')
print(f"log_model_acc : {log_model_acc}")
print(f"log_model_f1 : {log_model_f1}")
print('\nSVC\n')
print(f"svc_model_acc : {svc_model_acc}")
print(f"svc_model_f1 : {svc_model_f1}")
print('\nMLP\n')
print(f"mlp_model_acc : {mlp_model_acc}")
print(f"mlp_model_f1 : {mlp_model_f1}")




Logistica

log_model_acc : 0.8333333333333334
log_model_f1 : 0.7058823529411765

SVC

svc_model_acc : 0.7555555555555555
svc_model_f1 : 0.5217391304347826

MLP

mlp_model_acc : 0.8111111111111111
mlp_model_f1 : 0.6666666666666666


## Prediccion

In [54]:
print('\nLogistica\n')
print(y_test.to_numpy())
print(y_pred_mlp)

print('\nSVC\n')
print(y_test.to_numpy())
print(y_pred_svc)

print('\nMLP\n')
print(y_test.to_numpy())
print(y_pred_mlp)


Logistica

[1 1 0 1 1 0 0 0 1 1 0 0 0 0 0 1 1 0 0 0 1 1 0 0 1 0 0 1 0 0 0 1 0 1 0 0 0
 0 0 0 0 1 1 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 1 0 0 1 0 0 0 1 1 0 0 0 0 1 1
 1 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0]
[1 1 0 1 1 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 0 0 0 1 0 1 1 0 0 0 0 0 1 0 0 0
 0 0 1 0 1 0 0 0 0 0 1 0 0 1 0 1 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 1 1
 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0]

SVC

[1 1 0 1 1 0 0 0 1 1 0 0 0 0 0 1 1 0 0 0 1 1 0 0 1 0 0 1 0 0 0 1 0 1 0 0 0
 0 0 0 0 1 1 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 1 0 0 1 0 0 0 1 1 0 0 0 0 1 1
 1 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0]
[1 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0
 0 0 1 0 1 0 0 0 0 0 1 0 0 1 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0
 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]

MLP

[1 1 0 1 1 0 0 0 1 1 0 0 0 0 0 1 1 0 0 0 1 1 0 0 1 0 0 1 0 0 0 1 0 1 0 0 0
 0 0 0 0 1 1 0 1 0 0 0 0 0 1 0 0 0 0 0 1 0 0 1 0 0 1 0 0 0 1 1 0 0 0 0 1 1
 1 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0]
[1 1 0 1 1 0 0 0 0 0 0 0 0 0 0 1 1 0 0 1 0 0 0 0 1 0 1 1