# Clasificación de canciones con redes neuronales (implementación propia de backpropagation)
## Javier Andres Tellez Ortiz 201617861

### Se descarga el conjunto de datos y se descomprime

In [1]:
pip install wget

Note: you may need to restart the kernel to use updated packages.


In [2]:
import wget
from zipfile import ZipFile

##Se descarga el archivo del repositorio 
file = wget.download("http://millionsongdataset.com/sites/default/files/AdditionalFiles/msd_genre_dataset.zip")

##Se abre el archivo y se descomprime
zpFile = ZipFile(file)
zpFile.extractall()
zpFile.close()

100% [........................................................................] 12656044 / 12656044

In [3]:
import pandas as pd
import numpy as np

dataset = pd.read_csv("msd_genre_dataset.txt", skiprows = range(9))

dataset.dropna()
dataset.head()

Unnamed: 0,%genre,track_id,artist_name,title,loudness,tempo,time_signature,key,mode,duration,...,var_timbre3,var_timbre4,var_timbre5,var_timbre6,var_timbre7,var_timbre8,var_timbre9,var_timbre10,var_timbre11,var_timbre12
0,classic pop and rock,TRFCOOU128F427AEC0,Blue Oyster Cult,Mes Dames Sarat,-8.697,155.007,1,9,1,246.33424,...,1255.514569,580.030472,598.485223,575.337671,322.068603,321.726029,232.700609,186.805303,181.938688,151.508011
1,classic pop and rock,TRNJTPB128F427AE9F,Blue Oyster Cult,Screams,-10.659,148.462,1,4,0,189.80526,...,2007.65307,1043.474073,585.694981,564.013736,510.177022,400.200186,365.119588,238.099708,197.933757,251.577525
2,classic pop and rock,TRLFJHA128F427AEEA,Blue Oyster Cult,Dance The Night Away,-13.494,112.909,1,10,0,158.1971,...,1204.856777,2736.520024,730.233239,665.203452,535.775111,439.335059,486.82297,265.33386,447.097987,251.880724
3,classic pop and rock,TRCQZAG128F427DB97,Blue Oyster Cult,Debbie Denise,-12.786,117.429,4,7,1,250.22649,...,809.755802,563.90807,492.803819,378.382799,372.875044,231.941957,246.313305,168.400152,85.282462,339.897173
4,classic pop and rock,TRNXMNM128F427DB8C,Blue Oyster Cult,(Don't Fear) The Reaper,-14.093,141.536,4,9,0,307.06893,...,1093.684935,343.556047,889.163314,218.111796,304.862864,178.352161,440.478867,142.669283,81.061326,208.355152


In [4]:
dataset = dataset.drop(columns = ["track_id", "artist_name", "title"])
features = dataset.columns.tolist()
dataset = dataset[(dataset["%genre"] == "jazz and blues") | (dataset["%genre"] == "soul and reggae")]

### Se obtienen los datos de los géneros de interés y se muestra la cantidad de cada clase

In [5]:
pd.DataFrame(dataset["%genre"].value_counts())

Unnamed: 0,%genre
jazz and blues,4334
soul and reggae,4016


### De acuerdo con trabajos anteriores, se estandarizan los datos y se convierten las etiquetas a valores numéricos. Se agrega un vector de unos que representa el bias

In [6]:
from sklearn.preprocessing import StandardScaler, LabelEncoder
y = dataset["%genre"].values
X = dataset.values[:,1:]

scaler = StandardScaler()
lblEncoder = LabelEncoder()

##Se convierten las etiquetas en datos binarios
y = lblEncoder.fit(np.unique(y)).transform(y)

##Se estandarizan los datos provenientes del archivo
X = scaler.fit(X).transform(X)
data_rows = X.shape[0]
ones_vector =  np.ones([data_rows,1])

##Se agrega un vector de 1's que representan el bias
X = np.concatenate((ones_vector , X), 1)

### Se dividen los datos en entrenamiento y prueba. Los datos de validación no son necesarios ya que se usarán los modelos encontrados en el reto anterior.

In [7]:
from sklearn.model_selection import train_test_split

##Se dividen los datos en entrenamiento y prueba
X, X_test, y, y_test = train_test_split(X, y, test_size = 0.2, random_state = 7861)
X, X_test = X.T, X_test.T
total_test_data = X_test.shape[1]

##Se agrega una dimension a los vectores de las etiquetas para garantizar compatibilidad
y = np.expand_dims(y, axis = 0)
y_test = np.expand_dims(y_test, axis = 0)

genres, cantidad = np.unique(y_test, return_counts = True)
total_test_data = sum(cantidad)
labels = lblEncoder.inverse_transform(genres)
print("Datos de prueba clase %s (%s): %d" % (genres[0],labels[0],cantidad[0]))
print("Datos de prueba clase %s (%s): %d" % (genres[1],labels[1],cantidad[1]))
print("Total datos prueba: %d" % total_test_data)

Datos de prueba clase 0 (jazz and blues): 879
Datos de prueba clase 1 (soul and reggae): 791
Total datos prueba: 1670


### Se definen las funciones de activacion a usar y sus derivadas

In [8]:
def ReLU(a):
    mask = a > 0
    mask = mask.astype(float)
    return np.multiply(a, mask)

def ReLU_grad(a):
    mask = a > 0
    mask = mask.astype(float)
    return mask

def sigmoid(X):
    y = 1/(1 + np.exp(-X))
    return y

def sigmoid_grad(X):
    value = sigmoid(X)
    return value * (1-value)

### Se define la clase que representa una red neuronal. En ella se defienen a su vez las funciones para inicializar pesos (Xavier initializer), para hacer propagación de un conjunto de datos y su respectiva predicción y para realizar el procedimiento de retopropagación

In [9]:
import numpy as np
import math 
import scipy.stats as stats

class Neural_Network():
    
    def __init__(self, layers, activations, activations_grad):
        assert len(layers)-1 == len(activations), "Se debe ingresar un número consistente de activaciones y de capas"
        assert len(activations_grad) == len(activations), "Se debe ingresar el mismo número de activaciones y de derivadas"
        
        self.activations = activations
        self.activations_grad = activations_grad
        
        self.delta = []
        self.z = []
        self.a = []
        
        for i in range(len(layers)-1):
            self.z.append(np.ones((1,1)))
            self.delta.append(np.ones((1,1)))
            self.a.append(np.ones((1,1)))
            
        self.W = self.initialize_weights(layers)
        
    def initialize_weights(self, layers):
        W = []
        
        for i in range(len(layers) - 1):
            fan_in = layers[i] + 1
            fan_out = layers[i + 1]
            total_neurons = fan_in + fan_out
            
            variance = math.sqrt(2/(total_neurons))
            distribution = stats.truncnorm(-2, 2, loc=0, scale=variance)
            
            w = distribution.rvs((fan_out,fan_in))
            W.append(w)
        
        return W
        
    def predict_probs(self, X):
        self.a[0] = np.matmul(self.W[0],X)
        ones = np.ones((1,self.a[0].shape[1]))
        self.a[0] = np.append(ones, self.a[0], axis=0)
        
        self.z[0] = np.apply_along_axis(self.activations[0],0,self.a[0])
        
        layers = len(self.W)
        
        for i in range(1,layers):
            self.a[i] = np.matmul(self.W[i],self.z[i-1])
            
            if i != layers-1:
                ones = np.ones((1,self.a[i].shape[1]))
                self.a[i] = np.append(ones, self.a[i], axis=0)
            
            self.z[i] = np.apply_along_axis(self.activations[i],0,self.a[i])
        return self.z[-1]
    
    def predict(self, X):
        prob = self.predict_probs(X)
        return (prob > 0.5).astype(float)
    
    def back_prop(self, y):
        self.delta[-1] = self.z[-1] - y
        for i in range(len(self.delta)-2, -1, -1):
            derivate = np.apply_along_axis(self.activations_grad[i],0,self.a[i][1:,:])
            weigth_error = np.matmul(self.W[i+1][:,1:].T, self.delta[i+1])
            self.delta[i] = np.multiply(derivate, weigth_error)

### Se define la clase para el optimizador Adam el cual puede actualizar los pesos del modelo que recibe por parámetro usando los resultados obtenidos por una propagación y una retropropagación con un conjunto de datos

In [10]:
class Adam_optimizer():
    def __init__(self, model, learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-07):
        self.model = model
        self.learning_rate = learning_rate
        self.beta_1 = beta_1
        self.beta_2 = beta_2
        self.epsilon = epsilon
        
        self.t = 0
        
        self.momentum = []
        self.variance = []
        
        for element in self.model.W:
            shape = element.shape
            self.momentum.append(np.zeros(shape))
            self.variance.append(np.zeros(shape))
        
    def update_weigths(self, X, y):
        self.t = self.t + 1 
        
        gradients = []
        
        self.model.predict_probs(X)
        self.model.back_prop(y)
        
        z = [X] + self.model.z
        deltas = self.model.delta
        
        for i in range(len(self.momentum)):
            gradient = np.matmul(deltas[i], z[i].T)
            momentum = (self.beta_1*self.momentum[i]) + ((1 - self.beta_1)*gradient)
            variance = (self.beta_2*self.variance[i]) + ((1 - self.beta_2)*np.power(gradient,2))

            corrected_momentum = momentum/(1 - (self.beta_1**self.t))
            corrected_variance = variance/(1 - (self.beta_2**self.t))

            gradient = self.learning_rate * corrected_momentum / (np.sqrt(corrected_variance) + self.epsilon)
            self.model.W[i] = self.model.W[i] - gradient 
            
            gradients.append(gradient)
            self.momentum[i] = momentum
            self.variance[i] = variance

### Se define la función que calcula las pérdidas a partir de la entropía cruzada binaria

In [11]:
def cross_entropy(model, X, y):
    y_hat = model.predict_probs(X)
    error_vector = np.multiply(y, np.log(y_hat)) + np.multiply(1-y, np.log(1-y_hat))
    error = np.sum(error_vector)
    return error

### Se define la función que calcula la precisión binaria de un modelo 

In [12]:
def calculate_accuracy(model, X, y):
    y_hat = model.predict(X)
    acc = (y_hat == y).astype(float)
    acc = np.sum(acc)
    
    return acc/y.shape[1]

### Se define la función que permite dividir los datos en lotes, barajándolos previamente

In [13]:
def splitBatches(X, y, batch_size, seed=None):
    ## Se obtiene el número total de datos
    m = X.shape[0]
    
    ##Se calcula la cantidad total de lotes
    batches = math.ceil(m/batch_size)
    
    ##Se mezclan aleatoriamente los datos
    np.random.seed(seed)
    data = np.append(X,y, axis=0)
    np.random.shuffle(data.T)

    X_batches = []
    y_batches = []
    
    for i in range(batches):
        
        ##Se toman subconjuntos de datos con el tamaño especificado 
        X_batch = data[:-1,0:batch_size]
        X_batches.append(X_batch)
                
        y_batch = data[-1,0:batch_size]
        y_batches.append(y_batch)
        
        data = data[:,batch_size:]
        
    return X_batches, y_batches

### Función para realizar el entremaniento de un modelo, indica pérdidas y exactud al final de cada época

In [14]:
def train_model(optimizer, X, y, epochs=200, batch_size=32):
    for epoch in range(epochs):
        ##Se dividen los datos en lotes al inicio de cada época
        ##se realiza este procedimiento antes de cada época para 
        ##garantizar que los datos estén barajados de forma distinta
        ##en cada recorrido
        X_batches, y_batches = splitBatches(X, y, batch_size)

        for X_batch, y_batch in zip(X_batches, y_batches):
            optimizer.update_weigths(X_batch, y_batch)

        error = cross_entropy(optimizer.model, X, y)
        accuracy = calculate_accuracy(optimizer.model, X, y)

        print("Epoch: %d, train_loss: %.2f, train_accuracy: %.3f" % (epoch+1, error, accuracy))


### Se entrena el modelo con una capa escondida con los parámetros encontrados en reto anterior 

In [15]:
one_hidden_layer_model = Neural_Network([30,19,1], [ReLU,sigmoid], [ReLU_grad,sigmoid_grad])
one_hidden_layer_optimizer = Adam_optimizer(one_hidden_layer_model, learning_rate=0.003)

train_model(one_hidden_layer_optimizer, X, y, 500, 32)

Epoch: 1, train_loss: -4473.79, train_accuracy: 0.586
Epoch: 2, train_loss: -4370.09, train_accuracy: 0.609
Epoch: 3, train_loss: -4279.06, train_accuracy: 0.626
Epoch: 4, train_loss: -4194.68, train_accuracy: 0.641
Epoch: 5, train_loss: -4114.88, train_accuracy: 0.661
Epoch: 6, train_loss: -4039.69, train_accuracy: 0.679
Epoch: 7, train_loss: -3966.40, train_accuracy: 0.694
Epoch: 8, train_loss: -3898.32, train_accuracy: 0.705
Epoch: 9, train_loss: -3831.65, train_accuracy: 0.716
Epoch: 10, train_loss: -3772.30, train_accuracy: 0.726
Epoch: 11, train_loss: -3715.31, train_accuracy: 0.734
Epoch: 12, train_loss: -3659.67, train_accuracy: 0.744
Epoch: 13, train_loss: -3607.86, train_accuracy: 0.751
Epoch: 14, train_loss: -3558.67, train_accuracy: 0.756
Epoch: 15, train_loss: -3511.21, train_accuracy: 0.762
Epoch: 16, train_loss: -3465.68, train_accuracy: 0.766
Epoch: 17, train_loss: -3423.54, train_accuracy: 0.770
Epoch: 18, train_loss: -3382.01, train_accuracy: 0.776
Epoch: 19, train_lo

Epoch: 150, train_loss: -2371.29, train_accuracy: 0.838
Epoch: 151, train_loss: -2369.62, train_accuracy: 0.838
Epoch: 152, train_loss: -2367.80, train_accuracy: 0.838
Epoch: 153, train_loss: -2366.40, train_accuracy: 0.838
Epoch: 154, train_loss: -2365.06, train_accuracy: 0.838
Epoch: 155, train_loss: -2363.81, train_accuracy: 0.837
Epoch: 156, train_loss: -2362.33, train_accuracy: 0.837
Epoch: 157, train_loss: -2360.94, train_accuracy: 0.837
Epoch: 158, train_loss: -2359.52, train_accuracy: 0.837
Epoch: 159, train_loss: -2357.95, train_accuracy: 0.837
Epoch: 160, train_loss: -2356.18, train_accuracy: 0.837
Epoch: 161, train_loss: -2354.55, train_accuracy: 0.838
Epoch: 162, train_loss: -2353.29, train_accuracy: 0.839
Epoch: 163, train_loss: -2352.20, train_accuracy: 0.838
Epoch: 164, train_loss: -2351.62, train_accuracy: 0.839
Epoch: 165, train_loss: -2351.76, train_accuracy: 0.840
Epoch: 166, train_loss: -2352.22, train_accuracy: 0.841
Epoch: 167, train_loss: -2352.84, train_accuracy

Epoch: 297, train_loss: -2242.67, train_accuracy: 0.850
Epoch: 298, train_loss: -2241.37, train_accuracy: 0.851
Epoch: 299, train_loss: -2240.64, train_accuracy: 0.850
Epoch: 300, train_loss: -2239.78, train_accuracy: 0.850
Epoch: 301, train_loss: -2238.74, train_accuracy: 0.850
Epoch: 302, train_loss: -2238.26, train_accuracy: 0.850
Epoch: 303, train_loss: -2237.96, train_accuracy: 0.849
Epoch: 304, train_loss: -2237.93, train_accuracy: 0.849
Epoch: 305, train_loss: -2237.77, train_accuracy: 0.849
Epoch: 306, train_loss: -2237.83, train_accuracy: 0.849
Epoch: 307, train_loss: -2237.88, train_accuracy: 0.849
Epoch: 308, train_loss: -2237.91, train_accuracy: 0.849
Epoch: 309, train_loss: -2238.11, train_accuracy: 0.850
Epoch: 310, train_loss: -2238.58, train_accuracy: 0.850
Epoch: 311, train_loss: -2238.98, train_accuracy: 0.849
Epoch: 312, train_loss: -2238.43, train_accuracy: 0.849
Epoch: 313, train_loss: -2237.82, train_accuracy: 0.849
Epoch: 314, train_loss: -2237.30, train_accuracy

Epoch: 444, train_loss: -2168.02, train_accuracy: 0.854
Epoch: 445, train_loss: -2168.56, train_accuracy: 0.854
Epoch: 446, train_loss: -2169.04, train_accuracy: 0.854
Epoch: 447, train_loss: -2169.09, train_accuracy: 0.854
Epoch: 448, train_loss: -2168.90, train_accuracy: 0.854
Epoch: 449, train_loss: -2168.29, train_accuracy: 0.854
Epoch: 450, train_loss: -2167.80, train_accuracy: 0.854
Epoch: 451, train_loss: -2167.08, train_accuracy: 0.854
Epoch: 452, train_loss: -2166.65, train_accuracy: 0.855
Epoch: 453, train_loss: -2165.53, train_accuracy: 0.855
Epoch: 454, train_loss: -2164.36, train_accuracy: 0.855
Epoch: 455, train_loss: -2163.49, train_accuracy: 0.856
Epoch: 456, train_loss: -2162.93, train_accuracy: 0.855
Epoch: 457, train_loss: -2162.60, train_accuracy: 0.856
Epoch: 458, train_loss: -2162.93, train_accuracy: 0.855
Epoch: 459, train_loss: -2163.65, train_accuracy: 0.856
Epoch: 460, train_loss: -2164.78, train_accuracy: 0.855
Epoch: 461, train_loss: -2167.61, train_accuracy

### Se entrena los modelo con varias capas escondidas con los parámetros encontrados en reto anterior 

In [16]:
two_hidden_layer_model = Neural_Network([30,19,13,1],
                                        [ReLU,ReLU,sigmoid], 
                                        [ReLU_grad,ReLU_grad,sigmoid_grad])
two_hidden_layer_optimizer = Adam_optimizer(two_hidden_layer_model, 
                                            learning_rate=0.003)
train_model(two_hidden_layer_optimizer, X, y, 500, 32)


tree_hidden_layer_model = Neural_Network([30,19,13,7,1], 
                                         [ReLU,ReLU,ReLU,sigmoid], 
                                         [ReLU_grad,ReLU_grad,ReLU_grad,sigmoid_grad])
tree_hidden_layer_optimizer = Adam_optimizer(tree_hidden_layer_model, 
                                             learning_rate=0.003)
train_model(tree_hidden_layer_optimizer, X, y, 500, 32)


four_hidden_layer_model = Neural_Network([30,19,13,7,3,1], 
                                         [ReLU,ReLU,ReLU,ReLU,sigmoid], 
                                         [ReLU_grad,ReLU_grad,ReLU_grad,ReLU_grad,sigmoid_grad])
four_hidden_layer_optimizer = Adam_optimizer(four_hidden_layer_model, 
                                             learning_rate=0.003)
train_model(four_hidden_layer_optimizer, X, y, 500, 32)

Epoch: 1, train_loss: -4394.60, train_accuracy: 0.607
Epoch: 2, train_loss: -4326.05, train_accuracy: 0.619
Epoch: 3, train_loss: -4258.63, train_accuracy: 0.637
Epoch: 4, train_loss: -4195.79, train_accuracy: 0.654
Epoch: 5, train_loss: -4138.47, train_accuracy: 0.669
Epoch: 6, train_loss: -4081.26, train_accuracy: 0.684
Epoch: 7, train_loss: -4025.84, train_accuracy: 0.698
Epoch: 8, train_loss: -3968.84, train_accuracy: 0.710
Epoch: 9, train_loss: -3913.35, train_accuracy: 0.722
Epoch: 10, train_loss: -3859.76, train_accuracy: 0.729
Epoch: 11, train_loss: -3806.94, train_accuracy: 0.738
Epoch: 12, train_loss: -3754.16, train_accuracy: 0.746
Epoch: 13, train_loss: -3703.34, train_accuracy: 0.756
Epoch: 14, train_loss: -3654.26, train_accuracy: 0.762
Epoch: 15, train_loss: -3607.55, train_accuracy: 0.769
Epoch: 16, train_loss: -3562.41, train_accuracy: 0.772
Epoch: 17, train_loss: -3517.87, train_accuracy: 0.778
Epoch: 18, train_loss: -3474.07, train_accuracy: 0.782
Epoch: 19, train_lo

Epoch: 150, train_loss: -2345.58, train_accuracy: 0.842
Epoch: 151, train_loss: -2344.08, train_accuracy: 0.842
Epoch: 152, train_loss: -2340.66, train_accuracy: 0.842
Epoch: 153, train_loss: -2338.30, train_accuracy: 0.841
Epoch: 154, train_loss: -2336.04, train_accuracy: 0.841
Epoch: 155, train_loss: -2334.23, train_accuracy: 0.841
Epoch: 156, train_loss: -2332.36, train_accuracy: 0.841
Epoch: 157, train_loss: -2329.26, train_accuracy: 0.841
Epoch: 158, train_loss: -2325.90, train_accuracy: 0.841
Epoch: 159, train_loss: -2323.43, train_accuracy: 0.841
Epoch: 160, train_loss: -2321.79, train_accuracy: 0.841
Epoch: 161, train_loss: -2320.36, train_accuracy: 0.841
Epoch: 162, train_loss: -2318.53, train_accuracy: 0.842
Epoch: 163, train_loss: -2316.41, train_accuracy: 0.841
Epoch: 164, train_loss: -2313.80, train_accuracy: 0.841
Epoch: 165, train_loss: -2311.53, train_accuracy: 0.841
Epoch: 166, train_loss: -2309.48, train_accuracy: 0.842
Epoch: 167, train_loss: -2307.47, train_accuracy

Epoch: 297, train_loss: -2213.68, train_accuracy: 0.850
Epoch: 298, train_loss: -2214.05, train_accuracy: 0.850
Epoch: 299, train_loss: -2215.35, train_accuracy: 0.850
Epoch: 300, train_loss: -2217.66, train_accuracy: 0.850
Epoch: 301, train_loss: -2220.57, train_accuracy: 0.851
Epoch: 302, train_loss: -2223.22, train_accuracy: 0.850
Epoch: 303, train_loss: -2226.74, train_accuracy: 0.849
Epoch: 304, train_loss: -2229.61, train_accuracy: 0.849
Epoch: 305, train_loss: -2231.34, train_accuracy: 0.849
Epoch: 306, train_loss: -2231.57, train_accuracy: 0.849
Epoch: 307, train_loss: -2230.99, train_accuracy: 0.850
Epoch: 308, train_loss: -2230.45, train_accuracy: 0.849
Epoch: 309, train_loss: -2229.96, train_accuracy: 0.848
Epoch: 310, train_loss: -2230.49, train_accuracy: 0.848
Epoch: 311, train_loss: -2228.40, train_accuracy: 0.848
Epoch: 312, train_loss: -2226.50, train_accuracy: 0.848
Epoch: 313, train_loss: -2224.85, train_accuracy: 0.848
Epoch: 314, train_loss: -2222.69, train_accuracy

Epoch: 444, train_loss: -2134.87, train_accuracy: 0.854
Epoch: 445, train_loss: -2130.43, train_accuracy: 0.855
Epoch: 446, train_loss: -2126.89, train_accuracy: 0.855
Epoch: 447, train_loss: -2122.72, train_accuracy: 0.856
Epoch: 448, train_loss: -2119.63, train_accuracy: 0.857
Epoch: 449, train_loss: -2117.48, train_accuracy: 0.859
Epoch: 450, train_loss: -2116.86, train_accuracy: 0.860
Epoch: 451, train_loss: -2116.91, train_accuracy: 0.861
Epoch: 452, train_loss: -2118.50, train_accuracy: 0.861
Epoch: 453, train_loss: -2121.05, train_accuracy: 0.862
Epoch: 454, train_loss: -2123.11, train_accuracy: 0.860
Epoch: 455, train_loss: -2126.64, train_accuracy: 0.861
Epoch: 456, train_loss: -2131.13, train_accuracy: 0.860
Epoch: 457, train_loss: -2128.55, train_accuracy: 0.860
Epoch: 458, train_loss: -2126.34, train_accuracy: 0.859
Epoch: 459, train_loss: -2124.20, train_accuracy: 0.859
Epoch: 460, train_loss: -2123.42, train_accuracy: 0.860
Epoch: 461, train_loss: -2122.47, train_accuracy

Epoch: 93, train_loss: -2617.32, train_accuracy: 0.823
Epoch: 94, train_loss: -2613.74, train_accuracy: 0.824
Epoch: 95, train_loss: -2610.24, train_accuracy: 0.825
Epoch: 96, train_loss: -2606.11, train_accuracy: 0.826
Epoch: 97, train_loss: -2602.05, train_accuracy: 0.825
Epoch: 98, train_loss: -2596.55, train_accuracy: 0.826
Epoch: 99, train_loss: -2592.72, train_accuracy: 0.827
Epoch: 100, train_loss: -2588.50, train_accuracy: 0.826
Epoch: 101, train_loss: -2585.20, train_accuracy: 0.827
Epoch: 102, train_loss: -2580.51, train_accuracy: 0.828
Epoch: 103, train_loss: -2574.26, train_accuracy: 0.828
Epoch: 104, train_loss: -2567.41, train_accuracy: 0.829
Epoch: 105, train_loss: -2555.60, train_accuracy: 0.828
Epoch: 106, train_loss: -2545.31, train_accuracy: 0.828
Epoch: 107, train_loss: -2537.32, train_accuracy: 0.829
Epoch: 108, train_loss: -2531.48, train_accuracy: 0.828
Epoch: 109, train_loss: -2527.38, train_accuracy: 0.828
Epoch: 110, train_loss: -2524.53, train_accuracy: 0.828

Epoch: 240, train_loss: -2323.44, train_accuracy: 0.841
Epoch: 241, train_loss: -2322.97, train_accuracy: 0.841
Epoch: 242, train_loss: -2322.81, train_accuracy: 0.840
Epoch: 243, train_loss: -2322.89, train_accuracy: 0.840
Epoch: 244, train_loss: -2323.40, train_accuracy: 0.840
Epoch: 245, train_loss: -2323.94, train_accuracy: 0.840
Epoch: 246, train_loss: -2324.58, train_accuracy: 0.840
Epoch: 247, train_loss: -2325.16, train_accuracy: 0.840
Epoch: 248, train_loss: -2324.06, train_accuracy: 0.841
Epoch: 249, train_loss: -2322.85, train_accuracy: 0.841
Epoch: 250, train_loss: -2322.18, train_accuracy: 0.842
Epoch: 251, train_loss: -2322.07, train_accuracy: 0.842
Epoch: 252, train_loss: -2321.03, train_accuracy: 0.842
Epoch: 253, train_loss: -2320.33, train_accuracy: 0.842
Epoch: 254, train_loss: -2320.30, train_accuracy: 0.842
Epoch: 255, train_loss: -2317.96, train_accuracy: 0.842
Epoch: 256, train_loss: -2315.02, train_accuracy: 0.843
Epoch: 257, train_loss: -2313.14, train_accuracy

Epoch: 387, train_loss: -2216.31, train_accuracy: 0.851
Epoch: 388, train_loss: -2216.62, train_accuracy: 0.851
Epoch: 389, train_loss: -2218.10, train_accuracy: 0.851
Epoch: 390, train_loss: -2217.62, train_accuracy: 0.852
Epoch: 391, train_loss: -2218.21, train_accuracy: 0.851
Epoch: 392, train_loss: -2220.61, train_accuracy: 0.851
Epoch: 393, train_loss: -2223.43, train_accuracy: 0.851
Epoch: 394, train_loss: -2224.22, train_accuracy: 0.851
Epoch: 395, train_loss: -2225.78, train_accuracy: 0.851
Epoch: 396, train_loss: -2225.58, train_accuracy: 0.851
Epoch: 397, train_loss: -2226.48, train_accuracy: 0.851
Epoch: 398, train_loss: -2225.56, train_accuracy: 0.851
Epoch: 399, train_loss: -2226.57, train_accuracy: 0.850
Epoch: 400, train_loss: -2226.28, train_accuracy: 0.850
Epoch: 401, train_loss: -2223.53, train_accuracy: 0.849
Epoch: 402, train_loss: -2219.05, train_accuracy: 0.850
Epoch: 403, train_loss: -2213.90, train_accuracy: 0.851
Epoch: 404, train_loss: -2211.32, train_accuracy

Epoch: 35, train_loss: -4349.54, train_accuracy: 0.483
Epoch: 36, train_loss: -4339.85, train_accuracy: 0.483
Epoch: 37, train_loss: -4330.18, train_accuracy: 0.483
Epoch: 38, train_loss: -4321.01, train_accuracy: 0.483
Epoch: 39, train_loss: -4312.35, train_accuracy: 0.483
Epoch: 40, train_loss: -4304.36, train_accuracy: 0.483
Epoch: 41, train_loss: -4297.14, train_accuracy: 0.483
Epoch: 42, train_loss: -4290.37, train_accuracy: 0.483
Epoch: 43, train_loss: -4284.17, train_accuracy: 0.483
Epoch: 44, train_loss: -4277.60, train_accuracy: 0.483
Epoch: 45, train_loss: -4270.83, train_accuracy: 0.483
Epoch: 46, train_loss: -4264.32, train_accuracy: 0.483
Epoch: 47, train_loss: -4257.48, train_accuracy: 0.483
Epoch: 48, train_loss: -4250.83, train_accuracy: 0.483
Epoch: 49, train_loss: -4244.36, train_accuracy: 0.483
Epoch: 50, train_loss: -4237.50, train_accuracy: 0.483
Epoch: 51, train_loss: -4230.25, train_accuracy: 0.483
Epoch: 52, train_loss: -4222.90, train_accuracy: 0.483
Epoch: 53,

Epoch: 183, train_loss: -3348.70, train_accuracy: 0.831
Epoch: 184, train_loss: -3348.07, train_accuracy: 0.831
Epoch: 185, train_loss: -3346.13, train_accuracy: 0.832
Epoch: 186, train_loss: -3342.23, train_accuracy: 0.831
Epoch: 187, train_loss: -3338.83, train_accuracy: 0.832
Epoch: 188, train_loss: -3335.43, train_accuracy: 0.832
Epoch: 189, train_loss: -3332.41, train_accuracy: 0.832
Epoch: 190, train_loss: -3327.73, train_accuracy: 0.833
Epoch: 191, train_loss: -3324.61, train_accuracy: 0.833
Epoch: 192, train_loss: -3324.25, train_accuracy: 0.832
Epoch: 193, train_loss: -3325.45, train_accuracy: 0.834
Epoch: 194, train_loss: -3327.02, train_accuracy: 0.834
Epoch: 195, train_loss: -3329.15, train_accuracy: 0.834
Epoch: 196, train_loss: -3334.02, train_accuracy: 0.833
Epoch: 197, train_loss: -3335.85, train_accuracy: 0.833
Epoch: 198, train_loss: -3336.42, train_accuracy: 0.833
Epoch: 199, train_loss: -3335.04, train_accuracy: 0.833
Epoch: 200, train_loss: -3335.12, train_accuracy

Epoch: 330, train_loss: -2969.39, train_accuracy: 0.845
Epoch: 331, train_loss: -2967.50, train_accuracy: 0.845
Epoch: 332, train_loss: -2965.90, train_accuracy: 0.845
Epoch: 333, train_loss: -2965.60, train_accuracy: 0.844
Epoch: 334, train_loss: -2964.59, train_accuracy: 0.844
Epoch: 335, train_loss: -2963.36, train_accuracy: 0.844
Epoch: 336, train_loss: -2961.78, train_accuracy: 0.844
Epoch: 337, train_loss: -2960.24, train_accuracy: 0.844
Epoch: 338, train_loss: -2959.55, train_accuracy: 0.845
Epoch: 339, train_loss: -2959.41, train_accuracy: 0.845
Epoch: 340, train_loss: -2958.42, train_accuracy: 0.845
Epoch: 341, train_loss: -2956.00, train_accuracy: 0.845
Epoch: 342, train_loss: -2953.40, train_accuracy: 0.844
Epoch: 343, train_loss: -2950.79, train_accuracy: 0.845
Epoch: 344, train_loss: -2948.96, train_accuracy: 0.845
Epoch: 345, train_loss: -2945.49, train_accuracy: 0.844
Epoch: 346, train_loss: -2939.96, train_accuracy: 0.846
Epoch: 347, train_loss: -2935.60, train_accuracy

Epoch: 477, train_loss: -2753.08, train_accuracy: 0.851
Epoch: 478, train_loss: -2757.63, train_accuracy: 0.851
Epoch: 479, train_loss: -2761.40, train_accuracy: 0.851
Epoch: 480, train_loss: -2761.69, train_accuracy: 0.850
Epoch: 481, train_loss: -2760.73, train_accuracy: 0.850
Epoch: 482, train_loss: -2756.76, train_accuracy: 0.850
Epoch: 483, train_loss: -2748.48, train_accuracy: 0.851
Epoch: 484, train_loss: -2742.11, train_accuracy: 0.852
Epoch: 485, train_loss: -2738.53, train_accuracy: 0.852
Epoch: 486, train_loss: -2732.94, train_accuracy: 0.851
Epoch: 487, train_loss: -2727.64, train_accuracy: 0.851
Epoch: 488, train_loss: -2722.43, train_accuracy: 0.852
Epoch: 489, train_loss: -2720.35, train_accuracy: 0.852
Epoch: 490, train_loss: -2718.82, train_accuracy: 0.853
Epoch: 491, train_loss: -2717.91, train_accuracy: 0.852
Epoch: 492, train_loss: -2718.20, train_accuracy: 0.851
Epoch: 493, train_loss: -2719.05, train_accuracy: 0.851
Epoch: 494, train_loss: -2719.82, train_accuracy

### Se comparan los diversos modelos entrenados. Se evidencia que, en general, se obtivieron resultados comparables con los obtenidos con TensorFlow; por lo que es posible concluir que se implementó adecuadamente tanto el procedimiento de retropropagación como el algoritmo de optimización 

In [17]:
accuracy = calculate_accuracy(one_hidden_layer_model, X_test, y_test)
print('Exactitud con una neurona oculta: %.2f %%' % (accuracy*100))
accuracy = calculate_accuracy(two_hidden_layer_model, X_test, y_test)
print('Exactitud con dos neuronas ocultas: %.2f %%' % (accuracy*100))
accuracy = calculate_accuracy(tree_hidden_layer_model, X_test, y_test)
print('Exactitud con tres neuronas ocultas: %.2f %%' % (accuracy*100))
accuracy = calculate_accuracy(four_hidden_layer_model, X_test, y_test)
print('Exactitud con cuatro neuronas ocultas: %.2f %%' % (accuracy*100))

precision = 0.034
confianza = (1 - (2*math.exp(-2*(precision**2)*total_test_data))) *100

print('Esta estimaciones poseen un %.1f%% de precisión con una confianza del %.2f%%' % (precision*100, confianza))

Exactitud con una neurona oculta: 83.41 %
Exactitud con dos neuronas ocultas: 83.35 %
Exactitud con tres neuronas ocultas: 82.51 %
Exactitud con cuatro neuronas ocultas: 82.40 %
Esta estimaciones poseen un 3.4% de precisión con una confianza del 95.79%


### Se evidencia, igual que en reto anterior, que los modelos con mejores rendimientos son quellos con menos capas escondidas.