# Librerías

In [1]:
#Librerías Base
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from google.colab import drive

In [2]:
#Librerías de Métricas
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

In [3]:
#Librerías de Machine Learning
from keras.models import Sequential
from keras.layers import Dense
from tensorflow.keras.optimizers import Adam, SGD

# Conexión al repositorio de datos

In [4]:
#Accedemos a Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


# Lectura de datos

In [5]:
#Leemos el dataset
df = pd.read_csv('/content/drive/MyDrive/Data/titanic.csv')
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [7]:
#Seleccionamos las columnas con las que trabajaremos
### Se colocan dentro de corchetes [] porque es un arreglo de campos que necesitamos, no solo uno, como 
### en el ejemplo anterior
df = df[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked', 'Survived']]
df

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Survived
0,3,male,22.0,1,0,7.2500,S,0
1,1,female,38.0,1,0,71.2833,C,1
2,3,female,26.0,0,0,7.9250,S,1
3,1,female,35.0,1,0,53.1000,S,1
4,3,male,35.0,0,0,8.0500,S,0
...,...,...,...,...,...,...,...,...
886,2,male,27.0,0,0,13.0000,S,0
887,1,female,19.0,0,0,30.0000,S,1
888,3,female,,1,2,23.4500,S,0
889,1,male,26.0,0,0,30.0000,C,1


In [8]:
#Eliminamos valores nulos
df = df.dropna()
df

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked,Survived
0,3,male,22.0,1,0,7.2500,S,0
1,1,female,38.0,1,0,71.2833,C,1
2,3,female,26.0,0,0,7.9250,S,1
3,1,female,35.0,1,0,53.1000,S,1
4,3,male,35.0,0,0,8.0500,S,0
...,...,...,...,...,...,...,...,...
885,3,female,39.0,0,5,29.1250,Q,0
886,2,male,27.0,0,0,13.0000,S,0
887,1,female,19.0,0,0,30.0000,S,1
889,1,male,26.0,0,0,30.0000,C,1


# Definición de "features"

In [9]:
#Definimos los features (x), es decir los datos que nos ayudan a predecir
### Este no lleva la columna a predecir o LABEL
dfx = df[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']]
dfx

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked
0,3,male,22.0,1,0,7.2500,S
1,1,female,38.0,1,0,71.2833,C
2,3,female,26.0,0,0,7.9250,S
3,1,female,35.0,1,0,53.1000,S
4,3,male,35.0,0,0,8.0500,S
...,...,...,...,...,...,...,...
885,3,female,39.0,0,5,29.1250,Q
886,2,male,27.0,0,0,13.0000,S
887,1,female,19.0,0,0,30.0000,S
889,1,male,26.0,0,0,30.0000,C


In [10]:
#Feature categórico
### unique nos permite identificar cuantos valores hay en esa columna.
dfx['Pclass'].unique()

array([3, 1, 2])

In [11]:
#Lo converitmos a su equivalente numérico
dfx = pd.get_dummies(dfx, columns = ['Pclass'], prefix='Pclass')
dfx

Unnamed: 0,Sex,Age,SibSp,Parch,Fare,Embarked,Pclass_1,Pclass_2,Pclass_3
0,male,22.0,1,0,7.2500,S,0,0,1
1,female,38.0,1,0,71.2833,C,1,0,0
2,female,26.0,0,0,7.9250,S,0,0,1
3,female,35.0,1,0,53.1000,S,1,0,0
4,male,35.0,0,0,8.0500,S,0,0,1
...,...,...,...,...,...,...,...,...,...
885,female,39.0,0,5,29.1250,Q,0,0,1
886,male,27.0,0,0,13.0000,S,0,1,0
887,female,19.0,0,0,30.0000,S,1,0,0
889,male,26.0,0,0,30.0000,C,1,0,0


In [12]:
#Feature categórico
dfx['Sex'].unique()

array(['male', 'female'], dtype=object)

In [13]:
#Lo converitmos a su equivalente numérico
dfx = pd.get_dummies(dfx, columns = ['Sex'], prefix='Sex')
dfx

Unnamed: 0,Age,SibSp,Parch,Fare,Embarked,Pclass_1,Pclass_2,Pclass_3,Sex_female,Sex_male
0,22.0,1,0,7.2500,S,0,0,1,0,1
1,38.0,1,0,71.2833,C,1,0,0,1,0
2,26.0,0,0,7.9250,S,0,0,1,1,0
3,35.0,1,0,53.1000,S,1,0,0,1,0
4,35.0,0,0,8.0500,S,0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...
885,39.0,0,5,29.1250,Q,0,0,1,1,0
886,27.0,0,0,13.0000,S,0,1,0,0,1
887,19.0,0,0,30.0000,S,1,0,0,1,0
889,26.0,0,0,30.0000,C,1,0,0,0,1


In [14]:
#Feature categórico
dfx['Embarked'].unique()

array(['S', 'C', 'Q'], dtype=object)

In [15]:
#Lo converitmos a su equivalente numérico
dfx = pd.get_dummies(dfx, columns = ['Embarked'], prefix='Embarked')
dfx

Unnamed: 0,Age,SibSp,Parch,Fare,Pclass_1,Pclass_2,Pclass_3,Sex_female,Sex_male,Embarked_C,Embarked_Q,Embarked_S
0,22.0,1,0,7.2500,0,0,1,0,1,0,0,1
1,38.0,1,0,71.2833,1,0,0,1,0,1,0,0
2,26.0,0,0,7.9250,0,0,1,1,0,0,0,1
3,35.0,1,0,53.1000,1,0,0,1,0,0,0,1
4,35.0,0,0,8.0500,0,0,1,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
885,39.0,0,5,29.1250,0,0,1,1,0,0,1,0
886,27.0,0,0,13.0000,0,1,0,0,1,0,0,1
887,19.0,0,0,30.0000,1,0,0,1,0,0,0,1
889,26.0,0,0,30.0000,1,0,0,0,1,1,0,0


In [18]:
### Esto no estaba se le agregó para recordarlo

### No se hizo en los datos anteriores, pero los datos numericos hay que ponerlos entre valores 0 y 1
"""# Escalamiento de los datos"""

#Las redes neuronales funcionan mejor con rangos de valores entre 0 y 1
#Vamos convertir los datos a un equivalente de datos entre 0 y 1

#Importamos el escalador
from sklearn.preprocessing import MinMaxScaler

#Instanciamos el escalador
mms = MinMaxScaler()

#Transformamos las columnas con el escalador

#En el dataframe 2, creamos una nueva columna llamada "Weight_mms" de datos escalados
dfx['Age'] = mms.fit_transform(dfx[['Age']])
dfx['SibSp'] = mms.fit_transform(dfx[['SibSp']])
dfx['Parch'] = mms.fit_transform(dfx[['Parch']])
dfx['Fare'] = mms.fit_transform(dfx[['Fare']])
dfx

Unnamed: 0,Age,SibSp,Parch,Fare,Pclass_1,Pclass_2,Pclass_3,Sex_female,Sex_male,Embarked_C,Embarked_Q,Embarked_S
0,0.271174,0.2,0.000000,0.014151,0,0,1,0,1,0,0,1
1,0.472229,0.2,0.000000,0.139136,1,0,0,1,0,1,0,0
2,0.321438,0.0,0.000000,0.015469,0,0,1,1,0,0,0,1
3,0.434531,0.2,0.000000,0.103644,1,0,0,1,0,0,0,1
4,0.434531,0.0,0.000000,0.015713,0,0,1,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
885,0.484795,0.0,0.833333,0.056848,0,0,1,1,0,0,1,0
886,0.334004,0.0,0.000000,0.025374,0,1,0,0,1,0,0,1
887,0.233476,0.0,0.000000,0.058556,1,0,0,1,0,0,0,1
889,0.321438,0.0,0.000000,0.058556,1,0,0,0,1,1,0,0


In [19]:
#Obtenemos la matriz de features
x = dfx.values
x

array([[0.27117366, 0.2       , 0.        , ..., 0.        , 0.        ,
        1.        ],
       [0.4722292 , 0.2       , 0.        , ..., 1.        , 0.        ,
        0.        ],
       [0.32143755, 0.        , 0.        , ..., 0.        , 0.        ,
        1.        ],
       ...,
       [0.23347575, 0.        , 0.        , ..., 0.        , 0.        ,
        1.        ],
       [0.32143755, 0.        , 0.        , ..., 1.        , 0.        ,
        0.        ],
       [0.39683338, 0.        , 0.        , ..., 0.        , 1.        ,
        0.        ]])

# Definición de "labels"

In [20]:
#Definimos los labels (y), es decir el dato que se predice
y = df['Survived'].values
y

array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1,
       0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
       0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0,
       1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0,
       1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0,
       1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0,
       1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1,
       1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0,

# División de los datos en datos de entrenamiento (x_train, y_train) y datos de validación (x_test, y_test)

In [21]:
#Dividimos los datos en datos de entrenamiento (x_train, y_train) y datos de validación (x_test, y_test)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)

In [22]:
#Features de entrenamiento
x_train

array([[0.27117366, 0.        , 0.        , ..., 0.        , 0.        ,
        1.        ],
       [0.01985423, 0.8       , 0.16666667, ..., 0.        , 1.        ,
        0.        ],
       [0.52249309, 0.        , 0.16666667, ..., 0.        , 0.        ,
        1.        ],
       ...,
       [0.01985423, 0.2       , 0.16666667, ..., 0.        , 0.        ,
        1.        ],
       [0.53505906, 0.        , 0.        , ..., 0.        , 0.        ,
        1.        ],
       [0.78637849, 0.2       , 0.        , ..., 0.        , 0.        ,
        1.        ]])

In [23]:
#Labels de entrenamiento
y_train

array([0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0,
       0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0,
       1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0,
       0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1,
       0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0,
       0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1,
       1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0,
       1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0,
       1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0,
       0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0,
       1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0,

In [24]:
#Features de validación
x_test

array([[0.33400352, 0.2       , 0.        , ..., 0.        , 0.        ,
        1.        ],
       [0.67328474, 0.        , 0.16666667, ..., 0.        , 0.        ,
        1.        ],
       [0.3842674 , 0.2       , 0.16666667, ..., 1.        , 0.        ,
        0.        ],
       ...,
       [0.32143755, 0.2       , 0.16666667, ..., 0.        , 0.        ,
        1.        ],
       [0.58532295, 0.        , 0.        , ..., 0.        , 0.        ,
        1.        ],
       [0.4722292 , 0.        , 0.16666667, ..., 0.        , 0.        ,
        1.        ]])

In [25]:
#Labels de validación
y_test

array([1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0,
       0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1,
       1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1,
       1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0])

# Configuración del modelo

In [27]:
#Instaciamos un modelo de machine learning vacío con "Sequential"
model = Sequential()

In [29]:
#Agregamos la primera capa de:
# - 4 neuronas, 
# - 12 entradas
# - Función de activación (tanh: tangente hiperbólica)
model.add(Dense(40, input_shape=(12,), activation='tanh'))

In [30]:
#Agregamos una segunda capa de 
# - 2 neuronas
# - Función de activación (tanh: tangente hiperbólica)
#
# De aquí en adelante no es necesario definir el input, ya que cada neurona de la capa toma los 4 inputs de la primera capa
model.add(Dense(50, activation='tanh'))

In [31]:
#Agregamos una tercera capa de 
# - 2 neuronas
# - Función de activación (tanh: tangente hiperbólica)
#
# De aquí en adelante no es necesario definir el input, ya que cada neurona de la capa toma los 4 inputs de la primera capa
model.add(Dense(2, activation='relu'))

In [32]:
#Agregamos una tercera capa de 
# - 2 neuronas
# - Función de activación (tanh: tangente hiperbólica)
#
# De aquí en adelante no es necesario definir el input, ya que cada neurona de la capa toma los 4 inputs de la primera capa
model.add(Dense(2, activation='tanh'))

In [33]:
#Como es un problema de clasificación binaria, agregamos la última capa que es la neurona de salida, la mejor función de activación es la sigmoide
model.add(Dense(1, activation='sigmoid'))

In [34]:
#Vemos un resumen del modelo
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 40)                520       
_________________________________________________________________
dense_1 (Dense)              (None, 40)                1640      
_________________________________________________________________
dense_2 (Dense)              (None, 50)                2050      
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 102       
_________________________________________________________________
dense_4 (Dense)              (None, 2)                 6         
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 3         
Total params: 4,321
Trainable params: 4,321
Non-trainable params: 0
____________________________________________________

# Compilación del modelo

In [35]:
#Compilamos el modelo indicando el tipo de error con el que se calcula la solución
#Como es un modelo de regresión logística, usaremos la función "binary_crossentropy"
model.compile(SGD(learning_rate=0.05), 'binary_crossentropy', metrics=['accuracy'])

# Entrenamiento del modelo

In [36]:
#Entrenamos el modelo enviándole los features (x_train) y labels (y_train) de entrenamiento
#Configuramos el número de iteraciones (40)
model.fit(x_train, y_train, epochs=1000)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.callbacks.History at 0x7fd9fef55ed0>

# Accuracy de entrenamiento (patrones encontrados en la data)

In [39]:
#Con los features (x_train) que usamos para entrenar el modelo, realizamos las predicciones (y_train_prediccion)
# avel para visualizarlo mejor.
y_train_prediccion = model.predict(x_train).ravel()
y_train_prediccion

array([0.09484154, 0.03683248, 0.06388104, 0.93672204, 0.0691292 ,
       0.03306556, 0.66685265, 0.08747527, 0.0505766 , 0.38256252,
       0.06246331, 0.15564275, 0.43287206, 0.14908129, 0.02449933,
       0.47512624, 0.47512624, 0.10438174, 0.02097115, 0.10826528,
       0.07807598, 0.07920796, 0.05542016, 0.28883561, 0.02443704,
       0.06328341, 0.95971155, 0.97404504, 0.66570187, 0.16531184,
       0.9720963 , 0.10960296, 0.4742124 , 0.9646908 , 0.6253024 ,
       0.6976442 , 0.47512624, 0.09462562, 0.8807039 , 0.40969825,
       0.06234357, 0.05360007, 0.9738187 , 0.18118358, 0.04263824,
       0.04034331, 0.95422196, 0.02797022, 0.04034331, 0.47512624,
       0.47512624, 0.02704707, 0.17924017, 0.12886187, 0.11893672,
       0.9705597 , 0.9746928 , 0.2594607 , 0.24529496, 0.94376016,
       0.25127804, 0.52105385, 0.18206874, 0.7865156 , 0.41594   ,
       0.11478165, 0.9692862 , 0.08233327, 0.96230185, 0.9129904 ,
       0.15441957, 0.9234659 , 0.08138034, 0.15633357, 0.47512

In [40]:
#Binarizamos las predicciones
#Todo lo que esté por encima de "0.5" será un "1"
#Todo lo que esté por debajo de "0.5" será un "0"
y_train_prediccion_binaria = y_train_prediccion > 0.5
y_train_prediccion_binaria

array([False, False, False,  True, False, False,  True, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False,  True,
        True,  True, False,  True, False, False,  True,  True,  True,
       False, False,  True, False, False, False,  True, False, False,
       False,  True, False, False, False, False, False, False, False,
       False,  True,  True, False, False,  True, False,  True, False,
        True, False, False,  True, False,  True,  True, False,  True,
       False, False, False, False,  True, False,  True, False, False,
       False,  True,  True,  True, False,  True, False,  True, False,
       False,  True,  True,  True,  True, False, False, False,  True,
        True, False,  True, False, False, False,  True, False,  True,
        True, False, False, False, False, False, False, False, False,
        True, False, False, False, False,  True, False, False,  True,
       False, False,

In [41]:
#Calculamos el error de entrenamiento
#INTERPRETACIÓN: El modelo encontró el 84.71% de los patrones existentes en la data
# Cuando logró identificar de acuerdo a la realidad.
accuracy_score(y_train, y_train_prediccion_binaria)

0.8506151142355008

# Accuracy de uso (accuracy de predicción)

In [42]:
#Con los features (x_test) que no usamos para entrenar al modelo, realizamos las predicciones (y_test_prediccion)
y_test_prediccion = model.predict(x_test).ravel()
y_test_prediccion

array([0.432317  , 0.06578839, 0.02433881, 0.47512624, 0.47512624,
       0.9710568 , 0.6099359 , 0.09643775, 0.7251199 , 0.52627176,
       0.05593321, 0.97020787, 0.17912877, 0.47512624, 0.9739549 ,
       0.21015605, 0.79492867, 0.47512624, 0.0410898 , 0.0699375 ,
       0.52542573, 0.05610275, 0.0336684 , 0.15575397, 0.16641366,
       0.13299245, 0.04935247, 0.134743  , 0.25639254, 0.02438015,
       0.03846633, 0.09784818, 0.96596736, 0.0651167 , 0.9371113 ,
       0.09185266, 0.07056546, 0.5520233 , 0.9331699 , 0.20588231,
       0.4057575 , 0.03549734, 0.974177  , 0.05586848, 0.47512624,
       0.08162582, 0.21925855, 0.06240061, 0.06268018, 0.1379394 ,
       0.08162582, 0.19375178, 0.68970716, 0.08223525, 0.3961981 ,
       0.96253484, 0.13397288, 0.30745435, 0.09547767, 0.97100306,
       0.22255722, 0.28318283, 0.06165984, 0.02486065, 0.08723107,
       0.9020332 , 0.9695988 , 0.09278426, 0.67814416, 0.04154074,
       0.0749042 , 0.9132868 , 0.02427459, 0.42526975, 0.06669

In [43]:
#Binarizamos las predicciones
#Todo lo que esté por encima de "0.5" será un "1"
#Todo lo que esté por debajo de "0.5" será un "0"
y_test_prediccion_binaria = y_test_prediccion > 0.5
y_test_prediccion_binaria

array([False, False, False, False, False,  True,  True, False,  True,
        True, False,  True, False, False,  True, False,  True, False,
       False, False,  True, False, False, False, False, False, False,
       False, False, False, False, False,  True, False,  True, False,
       False,  True,  True, False, False, False,  True, False, False,
       False, False, False, False, False, False, False,  True, False,
       False,  True, False, False, False,  True, False, False, False,
       False, False,  True,  True, False,  True, False, False,  True,
       False, False, False, False, False, False, False, False, False,
        True, False, False, False,  True, False,  True, False, False,
       False, False,  True, False, False, False, False, False, False,
       False, False,  True, False, False, False, False, False, False,
       False, False,  True,  True, False, False,  True,  True, False,
        True, False,  True, False,  True, False, False, False, False,
       False, False,

In [44]:
#Calculamos el error
#INTERPRETACIÓN: El modelo al ser usado en un entorno productivo, de 100 casos acertará en 80 y se equivocará en 20 casos
###Exactitud de uso... no es el de entrenamiento
accuracy_score(y_test, y_test_prediccion_binaria)

0.8041958041958042

# Análisis final

In [None]:
#El modelo tiene un alto porcentaje de reconocimiento de patrones en los datos (85.06%)
#El modelo tiene un alto porcentaje de aciertos en nuestras predicciones (80.00%)
#La diferencia entre el porcentaje de reconocimiento de patrones (81.25%) y el porcentaje de aciertos (80.00%) es pequeña (5%)
#Como la diferencia es pequeña, estamos frente a un modelo que representa y predice muy bien la realidad de negocio
#Es un modelo que podemos usar en un entorno real de producción