# Librerías

In [None]:
#Librerías Base
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from google.colab import drive

In [None]:
#Librerías de Métricas
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

In [None]:
#Librerías de Machine Learning
from keras.models import Sequential
from keras.layers import Dense
from tensorflow.keras.optimizers import Adam, SGD
from keras.utils.np_utils import to_categorical

# Conexión al repositorio de datos

In [None]:
#Accedemos a Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Lectura de datos

In [None]:
#Trabajaremos con un dataset que clasifica flores según sus características
#Tiene los siguientes campos:
# - sepal_length: Longitud del sépalo
# - sepal_width: Ancho del sépalo
# - petal_length: Longitud del pétalo
# - petal_width: Ancho del pétalo
# - species: Tipo de flor de iris (setosa, versicolor, virginica)
df = pd.read_csv('/content/drive/MyDrive/Data/iris.csv')
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [None]:
#Seleccionamos las columnas con las que trabajaremos
df = df[['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']]
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [None]:
#Eliminamos valores nulos
df = df.dropna()
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


# Definición de "features"

In [None]:
#Definimos los features (x), es decir los datos que nos ayudan a predecir
dfx = df[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
dfx

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [None]:
#Obtenemos la matriz de features
x = dfx.values
x

array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

# Definición de "labels"

In [None]:
#Definimos los labels (y), es decir el dato que se predice
#En este caso es una variable categórica
df['species'].unique()

array(['setosa', 'versicolor', 'virginica'], dtype=object)

In [None]:
#Le daremos una representación numérica a cada dato
representacion = {
    'setosa' : 0,
    'versicolor' : 1,
    'virginica' : 2,
}

representacion

{'setosa': 0, 'versicolor': 1, 'virginica': 2}

In [None]:
#Luego mapeamos los valores de "Y" por su representación numérica
dfy = df['species'].map(representacion)
dfy

0      0
1      0
2      0
3      0
4      0
      ..
145    2
146    2
147    2
148    2
149    2
Name: species, Length: 150, dtype: int64

In [None]:
#Creamos tres columnas de salidas, una para cada categoría
y = to_categorical(dfy)
y

array([[1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0

# División de los datos en datos de entrenamiento (x_train, y_train) y datos de validación (x_test, y_test)

In [None]:
#Dividimos los datos en datos de entrenamiento (x_train, y_train) y datos de validación (x_test, y_test)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)

In [None]:
#Features de entrenamiento
x_train

array([[6.3, 3.3, 4.7, 1.6],
       [6.3, 2.8, 5.1, 1.5],
       [5.8, 2.7, 5.1, 1.9],
       [6.5, 3.2, 5.1, 2. ],
       [5.8, 2.6, 4. , 1.2],
       [5.8, 2.8, 5.1, 2.4],
       [4.6, 3.6, 1. , 0.2],
       [4.9, 2.4, 3.3, 1. ],
       [4.7, 3.2, 1.3, 0.2],
       [7.3, 2.9, 6.3, 1.8],
       [5.8, 2.7, 3.9, 1.2],
       [5.1, 3.5, 1.4, 0.2],
       [5.1, 3.8, 1.5, 0.3],
       [7.4, 2.8, 6.1, 1.9],
       [6.5, 2.8, 4.6, 1.5],
       [7.2, 3.6, 6.1, 2.5],
       [5.2, 3.4, 1.4, 0.2],
       [6.1, 3. , 4.9, 1.8],
       [5.5, 2.4, 3.7, 1. ],
       [5.5, 2.6, 4.4, 1.2],
       [4.6, 3.1, 1.5, 0.2],
       [6.5, 3. , 5.2, 2. ],
       [5. , 3. , 1.6, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.2, 4.1, 1.5, 0.1],
       [7.1, 3. , 5.9, 2.1],
       [6.4, 2.7, 5.3, 1.9],
       [5.5, 2.5, 4. , 1.3],
       [6.3, 2.5, 5. , 1.9],
       [5.1, 3.8, 1.9, 0.4],
       [4.4, 2.9, 1.4, 0.2],
       [5.2, 2.7, 3.9, 1.4],
       [5.7, 2.9, 4.2, 1.3],
       [4.6, 3

In [None]:
#Labels de entrenamiento
y_train

array([[0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 1

In [None]:
#Features de validación
x_test

array([[6.4, 3.1, 5.5, 1.8],
       [6.1, 2.9, 4.7, 1.4],
       [5.4, 3.9, 1.3, 0.4],
       [6.9, 3.1, 4.9, 1.5],
       [5. , 3.2, 1.2, 0.2],
       [6.3, 2.7, 4.9, 1.8],
       [5.4, 3. , 4.5, 1.5],
       [6.4, 3.2, 4.5, 1.5],
       [5. , 3.4, 1.6, 0.4],
       [6. , 2.2, 4. , 1. ],
       [5.2, 3.5, 1.5, 0.2],
       [6.2, 3.4, 5.4, 2.3],
       [6.6, 3. , 4.4, 1.4],
       [5.7, 2.5, 5. , 2. ],
       [5.9, 3. , 5.1, 1.8],
       [6.4, 2.8, 5.6, 2.1],
       [6.4, 3.2, 5.3, 2.3],
       [6.3, 2.5, 4.9, 1.5],
       [5.1, 3.5, 1.4, 0.3],
       [5.5, 4.2, 1.4, 0.2],
       [7.2, 3. , 5.8, 1.6],
       [5.6, 2.7, 4.2, 1.3],
       [6.7, 3. , 5. , 1.7],
       [7.7, 2.6, 6.9, 2.3],
       [5.6, 3. , 4.5, 1.5],
       [4.7, 3.2, 1.6, 0.2],
       [6.5, 3. , 5.5, 1.8],
       [6.1, 3. , 4.6, 1.4],
       [6.2, 2.8, 4.8, 1.8],
       [5.6, 2.9, 3.6, 1.3]])

In [None]:
#Labels de validación
y_test

array([[0., 0., 1.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 1., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 1., 0.]], dtype=float32)

# Configuración del modelo

In [None]:
#Instaciamos un modelo de machine learning vacío con "Sequential"
model = Sequential()

In [None]:
#Agregamos la primera capa de:
# - 3 neuronas
# - 4 inputs
model.add(Dense(3, input_shape=(4,), activation='tanh'))

In [None]:
#Agregamos la segunda capa de:
# - 2 neuronas
model.add(Dense(2, activation='tanh'))

In [None]:
#Como es un problema de clasificación multiclase, la última capa tendrá 3 neuronas, una para cada clase
#Como estamos frente a un problema de clasificación multi-clase, la F.A. más adecuada es la "softmax"
model.add(Dense(3, activation='softmax'))

In [None]:
#Vemos un resumen del modelo
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_3 (Dense)              (None, 3)                 15        
_________________________________________________________________
dense_4 (Dense)              (None, 2)                 8         
_________________________________________________________________
dense_5 (Dense)              (None, 3)                 9         
Total params: 32
Trainable params: 32
Non-trainable params: 0
_________________________________________________________________


# Compilación del modelo

In [None]:
#Compilamos el modelo indicando el tipo de error con el que se calcula la solución
#Como es un modelo de regresión logística, usaremos la función "categorical_crossentropy"
model.compile(Adam(lr=0.1), loss='categorical_crossentropy', metrics=['accuracy'])

  "The `lr` argument is deprecated, use `learning_rate` instead.")


# Entrenamiento del modelo

In [None]:
#Entrenamos el modelo enviándole los features (x_train) y labels (y_train) de entrenamiento
#Configuramos el número de iteraciones (40)
model.fit(x_train, y_train, epochs=100, validation_split=0.1)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x7f6feb5ff990>

# Accuracy de entrenamiento (patrones encontrados en la data)

In [None]:
#Con los features (x_train) que usamos para entrenar el modelo, realizamos las predicciones (y_train_prediccion)
y_train_prediccion = model.predict(x_train)
y_train_prediccion.round(3)

array([[0.003, 0.987, 0.01 ],
       [0.001, 0.711, 0.289],
       [0.   , 0.074, 0.926],
       [0.   , 0.182, 0.818],
       [0.003, 0.99 , 0.008],
       [0.   , 0.056, 0.944],
       [0.999, 0.001, 0.   ],
       [0.003, 0.989, 0.008],
       [0.999, 0.001, 0.   ],
       [0.   , 0.066, 0.934],
       [0.003, 0.99 , 0.008],
       [0.999, 0.001, 0.   ],
       [0.999, 0.001, 0.   ],
       [0.   , 0.065, 0.935],
       [0.002, 0.985, 0.013],
       [0.   , 0.058, 0.942],
       [0.999, 0.001, 0.   ],
       [0.   , 0.344, 0.656],
       [0.003, 0.99 , 0.008],
       [0.003, 0.988, 0.01 ],
       [0.999, 0.001, 0.   ],
       [0.   , 0.101, 0.899],
       [0.999, 0.001, 0.   ],
       [0.999, 0.001, 0.   ],
       [0.999, 0.001, 0.   ],
       [0.999, 0.001, 0.   ],
       [0.   , 0.063, 0.937],
       [0.   , 0.077, 0.923],
       [0.003, 0.989, 0.009],
       [0.   , 0.081, 0.919],
       [0.999, 0.001, 0.   ],
       [0.999, 0.001, 0.   ],
       [0.003, 0.989, 0.009],
       [0.

In [None]:
#Obtenemos la cateogoría con mayor probabilidad
y_train_prediccion_categorico = np.argmax(y_train_prediccion, axis = 1)
y_train_prediccion_categorico

array([1, 1, 2, 2, 1, 2, 0, 1, 0, 2, 1, 0, 0, 2, 1, 2, 0, 2, 1, 1, 0, 2,
       0, 0, 0, 0, 2, 2, 1, 2, 0, 0, 1, 1, 0, 0, 0, 1, 0, 2, 0, 2, 2, 1,
       0, 1, 2, 1, 0, 1, 2, 2, 2, 2, 1, 0, 2, 1, 2, 0, 1, 1, 1, 2, 2, 1,
       0, 2, 0, 0, 0, 1, 2, 2, 0, 1, 1, 1, 1, 2, 0, 0, 0, 2, 1, 1, 2, 2,
       1, 0, 0, 1, 0, 0, 2, 2, 1, 0, 1, 2, 2, 2, 2, 1, 0, 0, 0, 0, 0, 1,
       2, 2, 0, 0, 1, 1, 2, 0, 1, 0])

In [None]:
#Hay tres salidas, debemos quedarnos con los índices máximos
y_train

array([[0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 1

In [None]:
#Categorizamos la data de test
y_train_categorico = np.argmax(y_train, axis = 1)
y_train_categorico

array([1, 2, 2, 2, 1, 2, 0, 1, 0, 2, 1, 0, 0, 2, 1, 2, 0, 2, 1, 1, 0, 2,
       0, 0, 0, 0, 2, 2, 1, 2, 0, 0, 1, 1, 0, 0, 0, 1, 0, 2, 0, 2, 1, 1,
       0, 1, 2, 1, 0, 1, 2, 2, 2, 2, 1, 0, 2, 1, 2, 0, 1, 1, 1, 2, 2, 1,
       0, 2, 0, 0, 0, 1, 2, 2, 0, 1, 1, 1, 1, 2, 0, 0, 0, 2, 1, 1, 2, 2,
       1, 0, 0, 1, 0, 0, 2, 2, 1, 0, 1, 2, 2, 2, 2, 1, 0, 0, 0, 0, 0, 1,
       2, 2, 0, 0, 1, 1, 2, 0, 1, 0])

In [None]:
#Imprimos la matriz de confusión
accuracy_score(y_train_categorico, y_train_prediccion_categorico)

0.9833333333333333

# Accuracy de uso (accuracy de predicción)

In [None]:
#Con los features (x_test) que no usamos para entrenar al modelo, realizamos las predicciones (y_test_prediccion)
y_test_prediccion = model.predict(x_test)
y_test_prediccion.round(3)

array([[0.   , 0.126, 0.874],
       [0.002, 0.985, 0.013],
       [0.999, 0.001, 0.   ],
       [0.003, 0.988, 0.01 ],
       [0.999, 0.001, 0.   ],
       [0.   , 0.181, 0.819],
       [0.002, 0.975, 0.023],
       [0.003, 0.989, 0.008],
       [0.999, 0.001, 0.   ],
       [0.003, 0.99 , 0.008],
       [0.999, 0.001, 0.   ],
       [0.   , 0.066, 0.934],
       [0.003, 0.99 , 0.008],
       [0.   , 0.063, 0.937],
       [0.   , 0.152, 0.848],
       [0.   , 0.059, 0.941],
       [0.   , 0.066, 0.934],
       [0.001, 0.558, 0.441],
       [0.999, 0.001, 0.   ],
       [0.999, 0.001, 0.   ],
       [0.   , 0.276, 0.724],
       [0.003, 0.989, 0.009],
       [0.001, 0.826, 0.173],
       [0.   , 0.054, 0.946],
       [0.002, 0.981, 0.017],
       [0.999, 0.001, 0.   ],
       [0.   , 0.114, 0.886],
       [0.003, 0.988, 0.009],
       [0.   , 0.286, 0.713],
       [0.003, 0.99 , 0.008]], dtype=float32)

In [None]:
#Obtenemos la cateogoría con mayor probabilidad
y_test_prediccion_categorico = np.argmax(y_test_prediccion, axis = 1)
y_test_prediccion_categorico

array([2, 1, 0, 1, 0, 2, 1, 1, 0, 1, 0, 2, 1, 2, 2, 2, 2, 1, 0, 0, 2, 1,
       1, 2, 1, 0, 2, 1, 2, 1])

In [None]:
#Hay tres salidas, debemos quedarnos con los índices máximos
y_test

array([[0., 0., 1.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 1., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 1., 0.],
       [0., 0., 1.],
       [0., 1., 0.]], dtype=float32)

In [None]:
#Categorizamos la data de test
y_test_categorico = np.argmax(y_test, axis = 1)
y_test_categorico

array([2, 1, 0, 1, 0, 2, 1, 1, 0, 1, 0, 2, 1, 2, 2, 2, 2, 1, 0, 0, 2, 1,
       1, 2, 1, 0, 2, 1, 2, 1])

In [None]:
#Calculamos el error
#INTERPRETACIÓN: El modelo al ser usado en un entorno productivo, de 100 casos acertará en 80 y se equivocará en 20 casos
accuracy_score(y_test_categorico, y_test_prediccion_categorico)

1.0

# Matriz de confusión

In [None]:
#Crearemos una función utilitaria para pintar la matriz de mejor manera
def pintar_matriz_de_confusion(y, y_prediccion_binaria, etiquetas):
    #Obtenemos la matriz de confusion
    matriz_de_confusion = confusion_matrix(y, y_prediccion_binaria)

    #Agregamos el prefijo "PREDICCION " a las etiquetas
    etiquetas_prediccion = ['PREDICCION '+ l for l in etiquetas]

    #Creamos un dataframe pandas
    df = pd.DataFrame(matriz_de_confusion, index=etiquetas, columns=etiquetas_prediccion)

    return df

In [None]:
#Imprimos la matriz de confusión
pintar_matriz_de_confusion(y_test_categorico, y_test_prediccion_categorico, ['setosa', 'versicolor', 'virginica'])

Unnamed: 0,PREDICCION setosa,PREDICCION versicolor,PREDICCION virginica
setosa,7,0,0
versicolor,0,12,0
virginica,0,0,11


# Análisis final

In [None]:
#El modelo tiene un alto porcentaje de reconocimiento de patrones en los datos (81.25%)
#El modelo tiene un alto porcentaje de aciertos en nuestras predicciones (80.00%)
#La diferencia entre el porcentaje de reconocimiento de patrones (81.25%) y el porcentaje de aciertos (80.00%) es pequeña (1.25%)
#Como la diferencia es pequeña, estamos frente a un modelo que representa y predice muy bien la realidad de negocio
#Es un modelo que podemos usar en un entorno real de producción