# Before Running
Add files fashion-mnist_train.csv and fashion-mnist_test.csv to the fashion-mnist-dataset folder.

# Introduction
Here, we'll explore neural networks applied to the fashion-mnist problem.

In [238]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss
import csv
import pandas as pd

In [239]:
# Functions

def formatArray (dataFrame, columnToExtract) :
    array = dataFrame.values
    target = array[:,columnToExtract]
    params = np.delete(array, columnToExtract, axis = 1)
    return params, target

def loadFashionTrainData():
    return pd.read_csv("fashion-mnist-dataset/fashion-mnist_train.csv")

def loadFashionTestData():
    return pd.read_csv("fashion-mnist-dataset/fashion-mnist_test.csv")

def split_train_test(data, test_ratio):
    np.random.seed(42)
    shuffled_indices = np.random.permutation(len(data))
    test_set_size = int(len(data) * test_ratio)
    test_indices = shuffled_indices[:test_set_size]
    train_indices = shuffled_indices[test_set_size:]
    return data.iloc[train_indices], data.iloc[test_indices]

def createTarget (target):
    results = np.zeros((target.size, 10), dtype=int)
    for i in range(10):
        for j in range(target.size):
            if (target[j] != i):
                results[j][i - 1] = 0
            else:
                results[j][i - 1] = 1
    return results

def p_print(a):
    for x in a:
        print(*x, sep="   ")
    return

# The dataset
First and foremost, we'll open train and test data. The training data is split to obtain validation items and the the target values are also separated from the original data.

In [240]:
fashionTrainDataset = loadFashionTrainData()
fashionTestDataset = loadFashionTestData()
fashionTrain, fashionValidation = split_train_test(fashionTrainDataset, 0.2)
fashionTrainParams, fashionTrainTarget = formatArray(fashionTrain, 0)
fashionTrainTarget = createTarget(fashionTrainTarget)
fashionTrainParams = fashionTrainParams/255
fashionValidationParams, fashionValidationTarget = formatArray(fashionValidation, 0)
fashionValidationTarget = createTarget(fashionValidationTarget)
fashionValidationParams = fashionValidationParams/255


# Activation and Softmax Functions
First, we'll start by implementing some useful functions seen in class

In [241]:
def sigmoid(n):
    return (1/(1+np.exp(-n)))

def derivative_sigmoid(n):
    x = sigmoid(n)
    return x * (1 - x)

In [302]:
def relu(n):
    if n < 0:
        return 0
    return n

def derivative_relu(n):
    if n <= 0:
        return 0
    return 1

In [303]:
def leaky_relu(n):
    if n > 0:
        return n
    return 0.01 * n

def derivative_leaky_relu(n):
    if n <= 0:
        return 0.01
    return 1

In [304]:
def softmax(n):
    exp = np.exp(n)
    test_sum = np.sum(exp, axis=1, keepdims=True)
    return exp/test_sum

## Forward Propagation
In this section, we define forward propagation related functions.

In [315]:
def initialize_1hl(input_dimension,hidden_layer_1_neurons, output_dimension):
    neural_data = {}
    np.random.seed()
    neural_data['w1'] = np.random.randn(input_dimension, hidden_layer_1_neurons)/ np.sqrt(input_dimension)
    neural_data['w2'] = np.random.randn(hidden_layer_1_neurons, output_dimension)/ np.sqrt(input_dimension)
    
    neural_data['b1'] = np.zeros((1, hidden_layer_1_neurons))
    neural_data['b2'] = np.zeros((1, output_dimension))
    return neural_data

def initialize_2hl(input_dimension, hidden_layer_1_neurons, hidden_layer_2_neurons, output_dimension):
    neural_data = {}
    np.random.seed()

    neural_data['w1'] = np.random.randn(input_dimension, hidden_layer_1_neurons)/ np.sqrt(input_dimension)
    neural_data['w2'] = np.random.randn(hidden_layer_1_neurons, hidden_layer_2_neurons)/ np.sqrt(hidden_layer_1_neurons)
    neural_data['w3'] = np.random.randn(hidden_layer_2_neurons, output_dimension)/ np.sqrt(hidden_layer_2_neurons)
    
    neural_data['b1'] = np.zeros((1, hidden_layer_1_neurons))
    neural_data['b2'] = np.zeros((1, hidden_layer_2_neurons))
    neural_data['b3'] = np.zeros((1, output_dimension))
    return neural_data
    
def forward_prop_1hl(x, neural_data, activation="sigmoid"):
    w1 , w2, b1, b2 = neural_data['w1'], neural_data['w2'], neural_data['b1'], neural_data['b2']
    x1 = np.dot(x, w1) + b1 #Output of hidden layer
    if activation == "sigmoid":
        y1 = np.asarray([[sigmoid(n) for n in j] for j in x1]) #Output of hidden layer with activation function
    elif activation == "relu":
        y1 = np.asarray([[relu(n) for n in j] for j in x1]) #Output of hidden layer with activation function
    elif activation == "leaky_relu":
        y1 = np.asarray([[leaky_relu(n) for n in j] for j in x1]) #Output of hidden layer with activation function
    x2 = np.dot(y1, w2) + b2 #Output of last layer
    neural_data['x1'] = x1
    neural_data['x2'] = x2
    neural_data['y1'] = y1
    neural_data['o'] = softmax(x2)  # Final output with softmax

    return neural_data['o']

def forward_prop_2hl(x, neural_data, activation="sigmoid"):
    w1 , w2, w3, b1, b2, b3 = neural_data['w1'], neural_data['w2'], neural_data['w3'], neural_data['b1'], neural_data['b2'], neural_data['b3']

    x1 = np.dot(x, w1) + b1
    if activation == "sigmoid":
        y1 = np.asarray([[sigmoid(n) for n in j] for j in x1]) #Output of hidden layer with activation function
    elif activation == "relu":
        y1 = np.asarray([[relu(n) for n in j] for j in x1]) #Output of hidden layer with activation function
    elif activation == "leaky_relu":
        y1 = np.asarray([[leaky_relu(n) for n in j] for j in x1])
        
    
    x2 = np.dot(y1, w2) + b2
    if activation=="sigmoid":
        y2 = np.asarray([[sigmoid(n) for n in j] for j in x2])
    elif activation=="relu":
        y2 = np.asarray([[relu(n) for n in j] for j in x2])
    elif activation=="leaky_relu":
        y2 = np.asarray([[leaky_relu(n) for n in j] for j in x2])
    x3 = np.dot(y2, w3) + b3
    
    neural_data['x1'] = x1
    neural_data['x2'] = x2
    neural_data['x3'] = x3

    neural_data['y1'] = y1
    neural_data['y2'] = y2
    neural_data['o'] = softmax(x3)
    
    return neural_data['o']

## Prediction Functions
Helper functions that return predictions, given our model.

In [316]:
def predict_1hl(x, neural_data):
    test = forward_prop_1hl(x,neural_data)
    return np.argmax(test, axis=1)

def predict_2hl(x, neural_data):
    return np.argmax(forward_prop_2hl(x,neural_data), axis=1)

## Cost function

In [329]:
def neuralNetworkCostFunction(output, target):
#     fashionTargetMinusOne = fashionTarget - 1
#     cost = 0
#     for j in range(fashionTrainOutput.shape[0]):
#         cost += np.sum(np.multiply(fashionTarget, np.log10(fashionTrainOutput[j])),np.multiply(fashionTargetMinusOne, (1- np.log10(fashionTrainOutput[j]))))
#     cost = cost*(-1)/fashionTrainOutput.shape[0]
    
    
    
#     partial_cost = -np.log(probs[range(fashionTrainOutput.shape[0]), fashionTarget])
#     partial_cost -= np.log(1-probs[range(fashionTrainOutput.shape[0]), fashionTarget])
#     cost = np.sum(partial_cost)
    
    cost = log_loss(target, output)
    return cost

def meanSquaresCost(fashionTrainOutput, fashionTarget):
    diference = fashionTrainOutput - fashionTarget
    squareDiference = diference ** 2
    n = fashionTrainOutput.shape[0]    
    return (np.sum(squareDiference)/(2*n))

def accuracy (target, params, neural_model, hidden_layers=1):
    right_answers = 0
    target_indexes = np.argmax(target, axis=1)
    if hidden_layers == 1:
        predicted = predict_1hl(params, neural_model)
    if hidden_layers == 2:
        predicted = predict_2hl(params, neural_model)
    n = params.shape[0]
    for i in range (n):
        if (target_indexes[i] == predicted[i]):
            right_answers = right_answers + 1
    return right_answers/n

def construct_confusion_matrix (results, model, X):
    confusion_matrix = np.zeros((10, 10), dtype=int)
    predicted = predict_1hl(X, model)
    n = X.shape[0]
    for i in range (n):
        confusion_matrix[results[i]][predicted[i]] = confusion_matrix[results[i]][predicted[i]] + 1
    return confusion_matrix

## Training and backward propagation

Some notes:

### For one hidden layer:
- x1 = #Output of hidden layer
- x2 = #Output of last layer
- y1 = #Output of hidden layer with activation function
- o = Final output with Softmax

### For TWO hidden layers:
- x1 = #Output of first hidden layer
- x2 = #Output of second hidden layer
- x3 = Output of last layer
- y1 = #Output of first hidden layer with activation function
- y2 = #Output of second hidden layer with activation function
- o = Final output with Softmax


# One hidden layer

Here, we present our code and results achieved by a learning algorithm that uses a neural network with only one hidden layer.

In [318]:
def train_neural_network_1hl(hidden_layer_1_neurons, epochs, trainParams, trainTarget, learning_rate, regularization_rate, activation="sigmoid"):
    print("Beginning training with ", epochs, " epochs and ", hidden_layer_1_neurons, " hidden neurons.")
    input_dimension = 784
    output_dimension = 10
#     Initializes weights and biases for our neural network
    neural_data = initialize_1hl(input_dimension, hidden_layer_1_neurons, output_dimension)
    print("Initialized weights")
    
#     Prepares for mini-batch
    batchSize = 600
    start_idx = 0
    indices = np.arange(trainParams.shape[0])
    np.random.shuffle(indices)
    print("Prepared for mini-batch.")
#     Performs Backpropagation
    capitalDelta3 = 0
    capitalDelta2 = 0
    for j in range(epochs):
        excerpt = indices[start_idx:start_idx + batchSize]
        mini_batch_data = trainParams[excerpt]
        miniBatchTarget = trainTarget[excerpt]
#         Performs Forward propagation
        probs = forward_prop_1hl(mini_batch_data, neural_data, activation)

#         Performs Backward propagation

        delta3 = probs - miniBatchTarget

        dW2 =(1./batchSize)* (neural_data['y1'].T).dot(delta3)
        db2 =(1./batchSize)* ( np.sum(delta3, axis=0, keepdims=True))
        delta2 = np.dot(delta3, neural_data['w2'].T)
        aux = neural_data['y1']
        if activation == "sigmoid":
            aux = [[derivative_sigmoid(n) for n in x] for x in aux]
        if activation == "relu":
            aux = [[derivative_relu(n) for n in x] for x in aux]
        if activation == "leaky_relu":
            aux = [[derivative_leaky_relu(n) for n in x] for x in aux]

        delta2 = delta2 * aux

        dW1 = (1./batchSize)*np.dot(mini_batch_data.T, delta2)
        db1 = (1./batchSize)*np.sum(delta2, axis=0)
        
        
#          # Performs regularization
#         dW2 += regularization_rate * neural_data['w2']
#         dW1 += regularization_rate * neural_data['w1']

        # Gradient descent parameter update
        neural_data['w1'] -= learning_rate * dW1
        neural_data['b1'] -= learning_rate * db1
        neural_data['w2'] -= learning_rate * dW2
        neural_data['b2'] -= learning_rate * db2
        
        if j%50 == 0:
            #         Calculates costs

            cost = neuralNetworkCostFunction(probs, miniBatchTarget)
            validation_probs = forward_prop_1hl(fashionValidationParams, neural_data)
            validation_cost = neuralNetworkCostFunction(validation_probs, fashionValidationTarget)
            print("Ended iteration", j," Cost: ", cost, " Validation cost: ", validation_cost)
        start_idx += batchSize;
        start_idx %= mini_batch_data.shape[0]
    return neural_data

# Two hidden layers

Same as before, but for 2 hidden layers.

In [319]:
def train_neural_network_2hl(hidden_layer_1_neurons, hidden_layer_2_neurons, epochs, trainParams, trainTarget, learning_rate, regularization_rate, activation="sigmoid"):
    print("Beginning training with", epochs, "epochs,", hidden_layer_1_neurons, "neurons (1st layer)", hidden_layer_2_neurons, "neurons (2nd layer)")

    input_dimension = 784
    output_dimension = 10
#     Initializes weights and biases for our neural network
    neural_data = initialize_2hl(input_dimension, hidden_layer_1_neurons, hidden_layer_2_neurons, output_dimension)
    print("Initialized weights")
    
#     Prepares for mini-batch
    batchSize = 600
    start_idx = 0
    indices = np.arange(trainParams.shape[0])
    np.random.shuffle(indices)
    print("Prepared for mini-batch.")
#     Performs Backpropagation
    for j in range(epochs):
        excerpt = indices[start_idx:start_idx + batchSize]
        mini_batch_data = trainParams[excerpt]
        miniBatchTarget = trainTarget[excerpt]
#         Performs Forward propagation
        probs = forward_prop_2hl(mini_batch_data, neural_data, activation)
        
#         Performs Backward propagation
        delta4 = probs - miniBatchTarget
        dW3 = 1/batchSize * (neural_data['y2'].T).dot(delta4)
        db3 = 1/batchSize * np.sum(delta4, axis=0, keepdims=True)
        delta3 = delta4.dot(neural_data['w3'].T)
        aux = neural_data['y2']
        if activation=="sigmoid":
            aux = [[derivative_sigmoid(n) for n in x] for x in aux]
        if activation=="relu":
            aux = [[derivative_relu(n) for n in x] for x in aux]
        if activation=="leaky_relu":
            aux = [[derivative_leaky_relu(n) for n in x] for x in aux]        
        delta3 = delta3 * aux
        dW2 = 1/batchSize * np.dot(neural_data['y1'].T, delta3)
        db2 = 1/batchSize * np.sum(delta3, axis=0, keepdims=True)
        delta2 = delta3.dot(neural_data['w2'].T)  #look for issues here
        aux = neural_data['y1']
        if activation=="sigmoid":
            aux = [[derivative_sigmoid(n) for n in x] for x in aux]
        if activation=="relu":
            aux = [[derivative_relu(n) for n in x] for x in aux]
        if activation=="leaky_relu":
            aux = [[derivative_leaky_relu(n) for n in x] for x in aux]
        delta2 = delta2 * aux
        dW1 = 1/batchSize * np.dot(mini_batch_data.T, delta2)
        db1 = 1/batchSize * np.sum(delta2, axis=0, keepdims=True)      
        
#          # Performs regularization
#         dW3 += regularization_rate * neural_data['w3']
#         dW2 += regularization_rate * neural_data['w2']
#         dW1 += regularization_rate * neural_data['w1']

        # Gradient descent parameter update
        neural_data['w1'] -= learning_rate * dW1
        neural_data['b1'] -= learning_rate * db1
        neural_data['w2'] -= learning_rate * dW2
        neural_data['b2'] -= learning_rate * db2
        neural_data['w3'] -= learning_rate * dW3
        neural_data['b3'] -= learning_rate * db3
        
        if j%50 == 0:
            #         Calculates costs

            cost = neuralNetworkCostFunction(probs, miniBatchTarget)
            validation_probs = forward_prop_2hl(fashionValidationParams, neural_data)
            validation_cost = neuralNetworkCostFunction(validation_probs, fashionValidationTarget)
            print("Ended iteration", j," Cost: ", cost, " Validation cost: ", validation_cost)
        start_idx += 1;
        start_idx %= mini_batch_data.shape[0]
        
    return neural_data


# Testing the neural networks

Now, we'll test our neural networks under multiple circumstances on the validation set, so we can gest the best possible models.

## Problem details
- Input dimension: 28x28 = 784 neurons
- Output dimension: 10 classes = 10 neurons

In [250]:
hidden_layers = 1
input_neurons = 784
output_neurons = 10
hidden_layer_1_neurons = 50
hidden_layer_2_neurons = 15
learning_rate = 0.1
regularization_rate = 0

## Defining our evaluation
Here, we define the function which will use the training we implemented before. It will train 3 models (using the same parameters), to test to an extent the impact of random initialization on the weights matrix. After the training, we check the costs and accuracies of each model (both related to train an validation sets), as well as the average of such values.

In [278]:
def evaluate_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation="sigmoid"):
    model_1hl_1 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)
    print("======================First model trained=====================")
    model_1hl_2 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)
    print("======================Second model trained=====================")
    model_1hl_3 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)
    print("======================Third model trained=====================")
    probs_sigmoid1 = forward_prop_1hl(fashionValidationParams, model_1hl_1)
    cost1 = neuralNetworkCostFunction(probs_sigmoid1, fashionValidationTarget)
    acc1_train = accuracy(fashionTrainTarget, fashionTrainParams, model_1hl_1)
    acc1_validation = accuracy(fashionValidationTarget, fashionValidationParams, model_1hl_1)
    print("First model ", activation, "validation cost: ", cost1, " acc_train: {0:.4f} ".format(acc1_train), " acc_validation: {0:.4f} ".format(acc1_validation))
    
    probs_sigmoid2 = forward_prop_1hl(fashionValidationParams, model_1hl_2)
    cost2 = neuralNetworkCostFunction(probs_sigmoid2, fashionValidationTarget)
    acc2_train = accuracy(fashionTrainTarget, fashionTrainParams, model_1hl_2)
    acc2_validation = accuracy(fashionValidationTarget, fashionValidationParams, model_1hl_2)
    print("Second model  ", activation, "validation cost: ", cost2, " acc_train:{0:.4f} ".format(acc2_train), " acc_validation: {0:.4f} ".format(acc2_validation))
    
    probs_sigmoid3 = forward_prop_1hl(fashionValidationParams, model_1hl_3)
    cost3 = neuralNetworkCostFunction(probs_sigmoid3, fashionValidationTarget)
    acc3_train = accuracy(fashionTrainTarget, fashionTrainParams, model_1hl_3)
    acc3_validation = accuracy(fashionValidationTarget, fashionValidationParams, model_1hl_3)
    print("Third model ", activation, "validation cost: ", cost3, " acc_train:{0:.4f} ".format(acc3_train), " acc_validation: {0:.4f} ".format(acc3_validation))
    
    
    avg_loss = ((cost1+cost2+cost3)/3)
    avg_acc = ((acc1_validation + acc2_validation + acc3_validation)/3)
    print("Average validation loss: ", avg_loss, " Average validation accuracy: ", avg_acc)
    return

def evaluate_neural_network_2hl(hidden_layer_1_neurons, hidden_layer_2_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation="sigmoid"):
    model_2hl_1 = train_neural_network_2hl(hidden_layer_1_neurons, hidden_layer_2_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)
    print("======================First model trained=====================")
    model_2hl_2 = train_neural_network_2hl(hidden_layer_1_neurons, hidden_layer_2_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)
    print("======================Second model trained=====================")
    model_2hl_3 = train_neural_network_2hl(hidden_layer_1_neurons, hidden_layer_2_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)
    print("======================Third model trained=====================")
    probs_sigmoid1 = forward_prop_2hl(fashionValidationParams, model_2hl_1)
    cost1 = neuralNetworkCostFunction(probs_sigmoid1, fashionValidationTarget)
    acc1_train = accuracy(fashionTrainTarget, fashionTrainParams, model_2hl_1, hidden_layers=2)
    acc1_validation = accuracy(fashionValidationTarget, fashionValidationParams, model_2hl_1, hidden_layers=2)
    print("First model ", activation, "validation cost: ", cost1, " acc_train: {0:.4f} ".format(acc1_train), " acc_validation: {0:.4f} ".format(acc1_validation))
    
    probs_sigmoid2 = forward_prop_2hl(fashionValidationParams, model_2hl_2)
    cost2 = neuralNetworkCostFunction(probs_sigmoid2, fashionValidationTarget)
    acc2_train = accuracy(fashionTrainTarget, fashionTrainParams, model_2hl_2, hidden_layers=2)
    acc2_validation = accuracy(fashionValidationTarget, fashionValidationParams, model_2hl_2, hidden_layers=2)
    print("Second model  ", activation, "validation cost: ", cost2, " acc_train:{0:.4f} ".format(acc2_train), " acc_validation: {0:.4f} ".format(acc2_validation))
    
    probs_sigmoid3 = forward_prop_2hl(fashionValidationParams, model_2hl_3)
    cost3 = neuralNetworkCostFunction(probs_sigmoid3, fashionValidationTarget)
    acc3_train = accuracy(fashionTrainTarget, fashionTrainParams, model_2hl_3, hidden_layers=2)
    acc3_validation = accuracy(fashionValidationTarget, fashionValidationParams, model_2hl_3, hidden_layers=2)
    print("Third model ", activation, "validation cost: ", cost3, " acc_train:{0:.4f} ".format(acc3_train), " acc_validation: {0:.4f} ".format(acc3_validation))
    
    
    avg_loss = ((cost1+cost2+cost3)/3)
    avg_acc = ((acc1_validation + acc2_validation + acc3_validation)/3)
    print("Average validation loss: ", avg_loss, " Average validation accuracy: ", avg_acc)
    return

## First experiments
First, we will train a model using a hidden layer with 50 neurons and 1000 epochs, which is small given the input of 784 neurons. We will run 3 times for each activation function to get an average result (that depends heavily on the initialization of the weights).

## Sigmoid.50

Since everything seems ok, we'll increase the number of epochs to 1000, but mantaining the current amount of neurons.

In [252]:
epochs = 2000
activation = "sigmoid"
evaluate_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)


Beginning training with  2000  epochs and  50  hidden neurons.
Initialized weights
Prepared for mini-batch.
Ended iteration 0  Cost:  2.3095678010957243  Validation cost:  2.303000951389828
Ended iteration 50  Cost:  2.10616453431803  Validation cost:  2.1116563286158287
Ended iteration 100  Cost:  1.7660127041980669  Validation cost:  1.778048729409108
Ended iteration 150  Cost:  1.5228663380362844  Validation cost:  1.5365662872767576
Ended iteration 200  Cost:  1.3641502752722172  Validation cost:  1.3778975257467707
Ended iteration 250  Cost:  1.2510247578583324  Validation cost:  1.2652103078619668
Ended iteration 300  Cost:  1.168143734613796  Validation cost:  1.1831522078464725
Ended iteration 350  Cost:  1.1073639262438901  Validation cost:  1.1228406807385178
Ended iteration 400  Cost:  1.0633856492650084  Validation cost:  1.0785116708034872
Ended iteration 450  Cost:  1.0325790540350563  Validation cost:  1.0468535494605153
Ended iteration 500  Cost:  1.0111647902257948  Va

Ended iteration 600  Cost:  0.9115678459965239  Validation cost:  0.9605446726906532
Ended iteration 650  Cost:  0.8970000118468242  Validation cost:  0.9496318568068141
Ended iteration 700  Cost:  0.8865149403750778  Validation cost:  0.9419996332043648
Ended iteration 750  Cost:  0.8782232098970195  Validation cost:  0.9364397295185798
Ended iteration 800  Cost:  0.8714638860865  Validation cost:  0.9324109385672628
Ended iteration 850  Cost:  0.8663766102939  Validation cost:  0.9297573105675492
Ended iteration 900  Cost:  0.8625049463171979  Validation cost:  0.9279543288071915
Ended iteration 950  Cost:  0.858756631389545  Validation cost:  0.9261277561856857
Ended iteration 1000  Cost:  0.8544883626369539  Validation cost:  0.9238158764029166
Ended iteration 1050  Cost:  0.8495444397869906  Validation cost:  0.9210669661499139
Ended iteration 1100  Cost:  0.8434986621776092  Validation cost:  0.9179185709285963
Ended iteration 1150  Cost:  0.8358760281616388  Validation cost:  0.

Now for 2000 epochs:

From these trainings, we see that we achieve best performance generally around 1500 epochs. Now we'll test with other activation functions.

## Relu.50

In [253]:
epochs = 2000
activation = "relu"
evaluate_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)


Beginning training with  2000  epochs and  50  hidden neurons.
Initialized weights
Prepared for mini-batch.
Ended iteration 0  Cost:  2.3023907772287755  Validation cost:  2.3020479915883687
Ended iteration 50  Cost:  1.1720106350637778  Validation cost:  1.765683460081947
Ended iteration 100  Cost:  0.8134728432848074  Validation cost:  1.5422806043742943
Ended iteration 150  Cost:  0.6670156268686959  Validation cost:  1.455052639440825
Ended iteration 200  Cost:  0.5686379914438255  Validation cost:  1.4049963179594882
Ended iteration 250  Cost:  0.49183080584850053  Validation cost:  1.3684917942759816
Ended iteration 300  Cost:  0.4278990737591168  Validation cost:  1.3387356667262964
Ended iteration 350  Cost:  0.37342250044910436  Validation cost:  1.313632286143536
Ended iteration 400  Cost:  0.38090584329533445  Validation cost:  1.2972852267904575
Ended iteration 450  Cost:  0.32321557937099377  Validation cost:  1.2517586152452962
Ended iteration 500  Cost:  0.29228016468543

Ended iteration 550  Cost:  0.2934286787990864  Validation cost:  1.3850787084096676
Ended iteration 600  Cost:  0.27935066363558503  Validation cost:  1.3909649208591561
Ended iteration 650  Cost:  0.2746709636587201  Validation cost:  1.3292639631872696
Ended iteration 700  Cost:  0.23687825935304654  Validation cost:  1.3397742436290734
Ended iteration 750  Cost:  0.22479383828529706  Validation cost:  1.2941935311310693
Ended iteration 800  Cost:  0.20195888808526788  Validation cost:  1.2764857861782242
Ended iteration 850  Cost:  0.1810648805838559  Validation cost:  1.2797762521624643
Ended iteration 900  Cost:  0.20160401215581586  Validation cost:  1.236069437344204
Ended iteration 950  Cost:  0.16933502954263727  Validation cost:  1.2447711326626023
Ended iteration 1000  Cost:  0.1442884758118689  Validation cost:  1.225156770741262
Ended iteration 1050  Cost:  0.11357866924627023  Validation cost:  1.213043566479511
Ended iteration 1100  Cost:  0.10031594634304507  Validatio

## Leaky-relu.50

In [254]:
epochs = 2000
activation = "leaky_relu"
evaluate_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)


Beginning training with  2000  epochs and  50  hidden neurons.
Initialized weights
Prepared for mini-batch.
Ended iteration 0  Cost:  2.303072272687457  Validation cost:  2.30205808136482
Ended iteration 50  Cost:  1.0681808666430463  Validation cost:  2.0110539301754606
Ended iteration 100  Cost:  0.7502436454467308  Validation cost:  1.9413271614820673
Ended iteration 150  Cost:  0.6487268719290502  Validation cost:  1.8835401225174797
Ended iteration 200  Cost:  0.5486457661532882  Validation cost:  1.873943349388835
Ended iteration 250  Cost:  0.46882519646401816  Validation cost:  1.871568264870146
Ended iteration 300  Cost:  0.4023194568380842  Validation cost:  1.8738037099581664
Ended iteration 350  Cost:  0.34884159310255697  Validation cost:  1.8804156403154104
Ended iteration 400  Cost:  0.3022883031370993  Validation cost:  1.8874781233871714
Ended iteration 450  Cost:  0.26097549447109514  Validation cost:  1.8890856657344082
Ended iteration 500  Cost:  0.2515211147907723 

Ended iteration 550  Cost:  0.2210748581665382  Validation cost:  1.8879569888874463
Ended iteration 600  Cost:  0.20688651731369367  Validation cost:  1.8352735894156076
Ended iteration 650  Cost:  0.18060055239713566  Validation cost:  1.836152653121338
Ended iteration 700  Cost:  0.15740029728607483  Validation cost:  1.8382645532729334
Ended iteration 750  Cost:  0.1876991499776744  Validation cost:  1.8386439690286418
Ended iteration 800  Cost:  0.1556908973971127  Validation cost:  1.863695192903362
Ended iteration 850  Cost:  0.09171970282067309  Validation cost:  1.854861458704534
Ended iteration 900  Cost:  0.08732834654998634  Validation cost:  1.8722255316294945
Ended iteration 950  Cost:  0.07310613412769776  Validation cost:  1.8359799478059682
Ended iteration 1000  Cost:  0.06504449917381032  Validation cost:  1.852034395348534
Ended iteration 1050  Cost:  0.05858342443478149  Validation cost:  1.861032134745804
Ended iteration 1100  Cost:  0.0530612035333704  Validation 

# Changing hidden layer size
Now we will experiment changing the amount of neurons on the hidden layer and see the impact on different activation functions.

In [255]:
hidden_layer_1_neurons = 100
epochs = 2000
learning_rate = 0.1

### Sigmoid.100

In [256]:
activation = "sigmoid"
evaluate_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)


Beginning training with  2000  epochs and  100  hidden neurons.
Initialized weights
Prepared for mini-batch.
Ended iteration 0  Cost:  2.3075846360878094  Validation cost:  2.30166912236039
Ended iteration 50  Cost:  2.0267707646786883  Validation cost:  2.0328058382214964
Ended iteration 100  Cost:  1.6260374943217202  Validation cost:  1.6394500177382314
Ended iteration 150  Cost:  1.3668992618600615  Validation cost:  1.3814861675491574
Ended iteration 200  Cost:  1.2118115674610441  Validation cost:  1.2254510768741862
Ended iteration 250  Cost:  1.1039309143059788  Validation cost:  1.117058627982523
Ended iteration 300  Cost:  1.0213709305604552  Validation cost:  1.0349520143313642
Ended iteration 350  Cost:  0.9557367847952559  Validation cost:  0.9709088444869152
Ended iteration 400  Cost:  0.9026429214323518  Validation cost:  0.9205562116641143
Ended iteration 450  Cost:  0.8593947219558076  Validation cost:  0.8808794202834821
Ended iteration 500  Cost:  0.8244829578490879 

Ended iteration 600  Cost:  0.6951197636222699  Validation cost:  0.7915350376262178
Ended iteration 650  Cost:  0.6787017179992102  Validation cost:  0.7792206987458926
Ended iteration 700  Cost:  0.6645501059825832  Validation cost:  0.7688406604169998
Ended iteration 750  Cost:  0.6520700624699882  Validation cost:  0.7599146674502072
Ended iteration 800  Cost:  0.6408594229418599  Validation cost:  0.7521261811847595
Ended iteration 850  Cost:  0.63064139888207  Validation cost:  0.745253457771234
Ended iteration 900  Cost:  0.621220314263828  Validation cost:  0.7391307510350252
Ended iteration 950  Cost:  0.6124567509479087  Validation cost:  0.7336303420924526
Ended iteration 1000  Cost:  0.6042519861535273  Validation cost:  0.7286551900572344
Ended iteration 1050  Cost:  0.5965339063048809  Validation cost:  0.724132883537785
Ended iteration 1100  Cost:  0.589252432980419  Validation cost:  0.7200097684148276
Ended iteration 1150  Cost:  0.5824008654522654  Validation cost:  0

### Relu.100

In [257]:
activation = "relu"
evaluate_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)


Beginning training with  2000  epochs and  100  hidden neurons.
Initialized weights
Prepared for mini-batch.
Ended iteration 0  Cost:  2.31402656905383  Validation cost:  2.2964020686328404
Ended iteration 50  Cost:  1.070137640987048  Validation cost:  1.6904258337124654
Ended iteration 100  Cost:  0.752082822021523  Validation cost:  1.5000885217389575
Ended iteration 150  Cost:  0.6084849469856618  Validation cost:  1.4437978529024849
Ended iteration 200  Cost:  0.5103456280006297  Validation cost:  1.4234569545373597
Ended iteration 250  Cost:  0.436823462858009  Validation cost:  1.413487899663024
Ended iteration 300  Cost:  0.3787286451556218  Validation cost:  1.4062283869430157
Ended iteration 350  Cost:  0.3307037871949107  Validation cost:  1.3992621463342338
Ended iteration 400  Cost:  0.3298164172975005  Validation cost:  1.3889777930032938
Ended iteration 450  Cost:  0.2881774430799724  Validation cost:  1.366435786221274
Ended iteration 500  Cost:  0.2540882189443649  Val

Ended iteration 550  Cost:  0.23887321216331306  Validation cost:  1.2692129140654962
Ended iteration 600  Cost:  0.2100780713875348  Validation cost:  1.2446365185396675
Ended iteration 650  Cost:  0.19405603951345957  Validation cost:  1.2067534256930832
Ended iteration 700  Cost:  0.16989987122544836  Validation cost:  1.1828971867625815
Ended iteration 750  Cost:  0.24802423503125492  Validation cost:  1.1989749381013757
Ended iteration 800  Cost:  0.137725706886031  Validation cost:  1.1667894581703755
Ended iteration 850  Cost:  0.13418805705939207  Validation cost:  1.1386395886656777
Ended iteration 900  Cost:  0.1150490277989385  Validation cost:  1.1299986444516643
Ended iteration 950  Cost:  0.1954508924312723  Validation cost:  1.1456096466855739
Ended iteration 1000  Cost:  0.09761054720857548  Validation cost:  1.1042954055812313
Ended iteration 1050  Cost:  0.19208623484221105  Validation cost:  1.100527385936976
Ended iteration 1100  Cost:  0.08687881441473093  Validati

###  Leaky Relu.100

In [258]:
activation = "leaky_relu"
evaluate_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)


Beginning training with  2000  epochs and  100  hidden neurons.
Initialized weights
Prepared for mini-batch.
Ended iteration 0  Cost:  2.2802938851389927  Validation cost:  2.3179310101475283
Ended iteration 50  Cost:  0.9355064264957242  Validation cost:  1.8997123034784462
Ended iteration 100  Cost:  0.6990077051114352  Validation cost:  1.8037354016280653
Ended iteration 150  Cost:  0.6051762776325995  Validation cost:  1.7579096597279347
Ended iteration 200  Cost:  0.529707563087128  Validation cost:  1.7428700666579964
Ended iteration 250  Cost:  0.46718578987986104  Validation cost:  1.7256272278055513
Ended iteration 300  Cost:  0.4329180291428354  Validation cost:  1.7167262797694522
Ended iteration 350  Cost:  0.376384508786149  Validation cost:  1.707025289799464
Ended iteration 400  Cost:  0.34623087549452075  Validation cost:  1.6909719012096693
Ended iteration 450  Cost:  0.30913387479308535  Validation cost:  1.6791930290868688
Ended iteration 500  Cost:  0.27730891105293

Ended iteration 550  Cost:  0.21732342620926826  Validation cost:  1.6003572515412376
Ended iteration 600  Cost:  0.15160135279062156  Validation cost:  1.622497408234889
Ended iteration 650  Cost:  0.13816318951531845  Validation cost:  1.65066362962536
Ended iteration 700  Cost:  0.1098576949287344  Validation cost:  1.624615080304567
Ended iteration 750  Cost:  0.10335737667237721  Validation cost:  1.6161478758497148
Ended iteration 800  Cost:  0.08274725427577341  Validation cost:  1.599358874375777
Ended iteration 850  Cost:  0.07262781515317902  Validation cost:  1.6054790296052208
Ended iteration 900  Cost:  0.06457756025865008  Validation cost:  1.6040660524992232
Ended iteration 950  Cost:  0.05772439141885362  Validation cost:  1.6026961678824918
Ended iteration 1000  Cost:  0.0519588564003288  Validation cost:  1.6000716384063272
Ended iteration 1050  Cost:  0.04702100555300621  Validation cost:  1.597831744059009
Ended iteration 1100  Cost:  0.042786620260297115  Validatio

## Now 500 neurons
Expanding to 500 neurons on the hidden layer...

In [259]:
hidden_layer_1_neurons = 500

### Sigmoid.500

In [260]:
activation = "sigmoid"
evaluate_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)


Beginning training with  2000  epochs and  500  hidden neurons.
Initialized weights
Prepared for mini-batch.
Ended iteration 0  Cost:  2.3034062422651496  Validation cost:  2.283836165931808
Ended iteration 50  Cost:  1.4898286081689696  Validation cost:  1.5313818152672172
Ended iteration 100  Cost:  1.0858993069291962  Validation cost:  1.1497947197672445
Ended iteration 150  Cost:  0.8937476133945215  Validation cost:  0.9722722534266183
Ended iteration 200  Cost:  0.781985685464608  Validation cost:  0.8730845707494518
Ended iteration 250  Cost:  0.7082765065231559  Validation cost:  0.81110199702847
Ended iteration 300  Cost:  0.6551739512600292  Validation cost:  0.7692382897673442
Ended iteration 350  Cost:  0.6140956195576965  Validation cost:  0.7390831044020313
Ended iteration 400  Cost:  0.5804393697939575  Validation cost:  0.716139895741206
Ended iteration 450  Cost:  0.5516066947293081  Validation cost:  0.6978834669603703
Ended iteration 500  Cost:  0.5260728546113802  V

Ended iteration 550  Cost:  0.5361788505821514  Validation cost:  0.6671450078026562
Ended iteration 600  Cost:  0.5165974641571971  Validation cost:  0.6565575084870661
Ended iteration 650  Cost:  0.49835595762517876  Validation cost:  0.6472867364075642
Ended iteration 700  Cost:  0.48119539000243494  Validation cost:  0.6390868074005868
Ended iteration 750  Cost:  0.464937522996615  Validation cost:  0.6317872791963306
Ended iteration 800  Cost:  0.44945582129027095  Validation cost:  0.6252655310606624
Ended iteration 850  Cost:  0.4346576897349567  Validation cost:  0.6194298594417511
Ended iteration 900  Cost:  0.42047340306766445  Validation cost:  0.6142089697781502
Ended iteration 950  Cost:  0.40684910279809905  Validation cost:  0.6095453715155056
Ended iteration 1000  Cost:  0.39374229196572685  Validation cost:  0.6053911878925571
Ended iteration 1050  Cost:  0.3811188638296315  Validation cost:  0.6017054672656885
Ended iteration 1100  Cost:  0.3689510652633768  Validatio

### Relu.500

In [261]:
activation = "relu"
evaluate_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)


Beginning training with  2000  epochs and  500  hidden neurons.
Initialized weights
Prepared for mini-batch.
Ended iteration 0  Cost:  2.285867802875108  Validation cost:  2.251106274833747
Ended iteration 50  Cost:  0.7555120408045355  Validation cost:  1.8293496187469034
Ended iteration 100  Cost:  0.566071835213495  Validation cost:  2.3178235513533925
Ended iteration 150  Cost:  0.523537404582975  Validation cost:  2.686203144285995
Ended iteration 200  Cost:  0.41954495127642494  Validation cost:  2.8668642051846116
Ended iteration 250  Cost:  0.3634583731512732  Validation cost:  3.000748294175635
Ended iteration 300  Cost:  0.3180465386612152  Validation cost:  3.0938930347883584
Ended iteration 350  Cost:  0.28108866180349995  Validation cost:  3.173810752992934
Ended iteration 400  Cost:  0.2742046236352307  Validation cost:  3.2784662311461066
Ended iteration 450  Cost:  0.23589741663167482  Validation cost:  3.2160353871688043
Ended iteration 500  Cost:  0.2024393578535564  

Ended iteration 550  Cost:  0.16847179229479328  Validation cost:  3.005690248787712
Ended iteration 600  Cost:  0.1513955366801076  Validation cost:  3.0212705514401446
Ended iteration 650  Cost:  0.13568603102177537  Validation cost:  3.035383944359366
Ended iteration 700  Cost:  0.12143393392433971  Validation cost:  3.0490874217964437
Ended iteration 750  Cost:  0.10813880424272014  Validation cost:  3.062829376579991
Ended iteration 800  Cost:  0.0960954483385508  Validation cost:  3.077013182132618
Ended iteration 850  Cost:  0.08540911850796502  Validation cost:  3.09154449007318
Ended iteration 900  Cost:  0.07645193380166708  Validation cost:  3.10638372994163
Ended iteration 950  Cost:  0.06935137990008686  Validation cost:  3.1223673276056023
Ended iteration 1000  Cost:  0.06323804841646946  Validation cost:  3.1387930177890655
Ended iteration 1050  Cost:  0.05782404945176374  Validation cost:  3.154579807043119
Ended iteration 1100  Cost:  0.05303051588450811  Validation co

### Leaky Relu.500

In [262]:
activation = "leaky_relu"
evaluate_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)


Beginning training with  2000  epochs and  500  hidden neurons.
Initialized weights
Prepared for mini-batch.
Ended iteration 0  Cost:  2.354094520926173  Validation cost:  2.3611244015574604
Ended iteration 50  Cost:  0.6732472793885368  Validation cost:  2.0938394898767916
Ended iteration 100  Cost:  0.5105147189504344  Validation cost:  2.3928588209391193
Ended iteration 150  Cost:  0.40039316159939964  Validation cost:  2.5656836274066124
Ended iteration 200  Cost:  0.3286369229122634  Validation cost:  2.6455891150574598
Ended iteration 250  Cost:  0.2763058251889412  Validation cost:  2.6815404353838326
Ended iteration 300  Cost:  0.235881515165102  Validation cost:  2.7002023293037505
Ended iteration 350  Cost:  0.2036873846484668  Validation cost:  2.713120402383332
Ended iteration 400  Cost:  0.1765714854042014  Validation cost:  2.7074483912596023
Ended iteration 450  Cost:  0.15380407832152485  Validation cost:  2.741410002558635
Ended iteration 500  Cost:  0.1274641132783617

Ended iteration 550  Cost:  0.18216471284191427  Validation cost:  3.916081888530625
Ended iteration 600  Cost:  0.14774618748250448  Validation cost:  3.9274820378934603
Ended iteration 650  Cost:  0.123053520767613  Validation cost:  3.9908349129956253
Ended iteration 700  Cost:  0.10920414269932008  Validation cost:  4.0412521286527
Ended iteration 750  Cost:  0.13958007412177798  Validation cost:  4.105008468393131
Ended iteration 800  Cost:  0.0803653856090949  Validation cost:  4.1064826358961755
Ended iteration 850  Cost:  0.07227612031126898  Validation cost:  4.152692438968041
Ended iteration 900  Cost:  0.06511870968929237  Validation cost:  4.197681756775598
Ended iteration 950  Cost:  0.058989207093251894  Validation cost:  4.242205034258431
Ended iteration 1000  Cost:  0.05370611681269295  Validation cost:  4.285626847145082
Ended iteration 1050  Cost:  0.04912436454084663  Validation cost:  4.326604402741142
Ended iteration 1100  Cost:  0.04513729983067884  Validation cos

# Testing for two hidden layers

So far, we've tested many possibilities for one hidden layer. So right now we'll try to add more complexity to our model by adding an additional hidden layer.

Similar to proccess done previously, we'll experiment with different number of neurons for each hidden layer, and trying with different activation functions as well.

In [279]:
hidden_layer_1_neurons = 50
hidden_layer_2_neurons = 50
epochs = 2000
learning_rate = 0.1

### Sigmoid.50.50

In [320]:
activation = "sigmoid"
evaluate_neural_network_2hl(hidden_layer_1_neurons, hidden_layer_2_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)

Beginning training with 2000 epochs, 300 neurons (1st layer) 100 neurons (2nd layer)
Initialized weights
Prepared for mini-batch.


KeyboardInterrupt: 

### Relu.50.50

In [322]:
activation = "relu"
evaluate_neural_network_2hl(hidden_layer_1_neurons, hidden_layer_2_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)


Beginning training with 2000 epochs, 300 neurons (1st layer) 100 neurons (2nd layer)
Initialized weights
Prepared for mini-batch.
Ended iteration 0  Cost:  2.2687286017487622  Validation cost:  2.3535397996251906
Ended iteration 50  Cost:  0.679235185093714  Validation cost:  2.0506281905465884
Ended iteration 100  Cost:  0.4206143164577436  Validation cost:  2.076417059632718
Ended iteration 150  Cost:  0.23487308469263865  Validation cost:  2.0531358308813115
Ended iteration 200  Cost:  0.4447015261223381  Validation cost:  2.060451616860988
Ended iteration 250  Cost:  0.20834096624192255  Validation cost:  2.0413991231778184
Ended iteration 300  Cost:  0.18310420815952416  Validation cost:  1.9956985549494426
Ended iteration 350  Cost:  0.08433081823124193  Validation cost:  2.0037937667776453
Ended iteration 400  Cost:  0.2275553867676305  Validation cost:  2.083040254123355
Ended iteration 450  Cost:  0.16804518521370654  Validation cost:  1.7491771678481327
Ended iteration 500  C

KeyboardInterrupt: 

### Leaky Relu.50.50

In [282]:
activation = "leaky_relu"
evaluate_neural_network_2hl(hidden_layer_1_neurons, hidden_layer_2_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)

Beginning training with 2000 epochs, 50 neurons (1st layer) 50 neurons (2nd layer)
Initialized weights
Prepared for mini-batch.
Ended iteration 0  Cost:  2.298928492627872  Validation cost:  2.545005117291657
Ended iteration 50  Cost:  0.850490757393525  Validation cost:  2.3080570543404084
Ended iteration 100  Cost:  0.6256890178426208  Validation cost:  2.2681462698271826
Ended iteration 150  Cost:  0.5236785081913394  Validation cost:  2.2469759798322855
Ended iteration 200  Cost:  0.46379281586277993  Validation cost:  2.202677245990259
Ended iteration 250  Cost:  0.4232928033383241  Validation cost:  2.1826249645959477
Ended iteration 300  Cost:  0.36863268156473455  Validation cost:  2.1641735707778333
Ended iteration 350  Cost:  0.32698648267455555  Validation cost:  2.1473067292388746
Ended iteration 400  Cost:  0.27959147871225615  Validation cost:  2.1388076271298155
Ended iteration 450  Cost:  0.2589524069223697  Validation cost:  2.1266667050506
Ended iteration 500  Cost:  

Ended iteration 500  Cost:  0.2688538974852244  Validation cost:  2.2466567378341917
Ended iteration 550  Cost:  0.25327958181612475  Validation cost:  2.2957186815719424
Ended iteration 600  Cost:  0.5695517747422975  Validation cost:  2.268868755952722
Ended iteration 650  Cost:  0.2481069835574419  Validation cost:  2.2955225428535324
Ended iteration 700  Cost:  0.17906728150986018  Validation cost:  2.276368427335762
Ended iteration 750  Cost:  0.19890564808299602  Validation cost:  2.3133329310437523
Ended iteration 800  Cost:  0.11677642309712832  Validation cost:  2.2941386092575646
Ended iteration 850  Cost:  0.07499906069290488  Validation cost:  2.2910402702343657
Ended iteration 900  Cost:  0.08038644405521743  Validation cost:  2.299560676982784
Ended iteration 950  Cost:  0.08726790125029922  Validation cost:  2.313635829864952
Ended iteration 1000  Cost:  0.19791456594435242  Validation cost:  2.302178137785097
Ended iteration 1050  Cost:  0.1514163461885321  Validation c

## Changing sizes

In [323]:
hidden_layer_1_neurons = 300
hidden_layer_2_neurons = 100
learning_rate = 0.1

### Sigmoid.300.100

In [294]:
activation = "sigmoid"
evaluate_neural_network_2hl(hidden_layer_1_neurons, hidden_layer_2_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)

Beginning training with 2000 epochs, 300 neurons (1st layer) 100 neurons (2nd layer)
[[ 0.26983761  0.16238625 -1.59434205 ... -0.43984695 -2.06239805
  -0.4719023 ]
 [-1.60754322 -0.81579097 -1.04036327 ...  0.50308106 -1.67073445
  -0.02778651]
 [ 1.36162409 -0.99414746 -0.20564067 ...  1.50946234 -1.58177584
   0.27925043]
 ...
 [-0.50577558 -0.46066772  0.13750424 ... -0.18905299 -0.09514519
   0.12460211]
 [-1.72625585  1.26505764 -0.53738858 ... -2.29878714 -0.95999469
  -1.91881162]
 [-0.06572883  0.87396181 -0.52427227 ...  0.64302363  0.43359024
   0.80912879]]
[[-0.04775898 -1.32359362 -0.01330956 ...  1.16476138  0.25804786
   0.22165842]
 [ 0.54766561  0.27563297 -0.28015877 ... -0.08284414 -0.27742385
   1.23578733]
 [ 1.11378827 -0.66713116  1.0357024  ...  1.42014527 -1.21995926
  -0.06914371]
 ...
 [ 0.82660465  0.1982049  -2.76164565 ...  0.66763665 -0.63434863
   0.16341093]
 [ 1.46351417 -0.2187261  -1.70912165 ...  0.16489435  1.1477964
  -0.35386467]
 [-1.05051825 

[[1.25119401e-07 3.31150133e-06 9.71231620e-01 2.65818160e-08
  9.96449772e-09 6.56643106e-09 2.88925867e-08 1.48614922e-05
  6.94350447e-11 2.87500100e-02]
 [7.95241290e-05 3.86534405e-02 9.57264836e-01 7.04012921e-09
  4.89327354e-07 2.65423170e-04 8.07847386e-06 3.49016630e-03
  1.40242818e-10 2.38035033e-04]
 [1.06784743e-07 1.05405141e-05 9.99934691e-01 8.21141611e-09
  1.84698420e-09 1.86947820e-08 7.98637387e-10 5.41832832e-05
  1.29112269e-12 4.48666193e-07]
 [6.83081968e-08 1.36666396e-05 9.99679160e-01 1.29891479e-07
  5.33964895e-10 9.69895684e-08 7.58015308e-11 1.69858929e-04
  8.31317205e-09 1.37010692e-04]
 [3.53274084e-06 2.15572514e-02 9.76669527e-01 2.41866358e-08
  1.71945327e-06 8.83711106e-06 4.13523665e-06 1.17692667e-04
  1.57883551e-09 1.63727880e-03]]
[[1 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 1 0]
 [0 0 0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 1 0 0]]
Ended iteration 0  Cost:  11.764101551180609  Validation cost:  11.125691719882969


KeyboardInterrupt: 

### Relu.300.100

In [324]:
activation = "relu"
evaluate_neural_network_2hl(hidden_layer_1_neurons, hidden_layer_2_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)


Beginning training with 2000 epochs, 300 neurons (1st layer) 100 neurons (2nd layer)
Initialized weights
Prepared for mini-batch.
Ended iteration 0  Cost:  2.3864051072012034  Validation cost:  2.4116259826032866
Ended iteration 50  Cost:  0.821666245392131  Validation cost:  2.2101837942763436
Ended iteration 100  Cost:  0.6092624432487515  Validation cost:  2.2237955031243044
Ended iteration 150  Cost:  0.4992799055064407  Validation cost:  2.2304797354633337
Ended iteration 200  Cost:  0.4007449376528482  Validation cost:  2.262533232818289
Ended iteration 250  Cost:  0.3534688616602495  Validation cost:  2.2953720628139878
Ended iteration 300  Cost:  0.29105093898267803  Validation cost:  2.2956193455901874
Ended iteration 350  Cost:  0.2704222151311323  Validation cost:  2.299283376645753
Ended iteration 400  Cost:  0.2856509011164307  Validation cost:  2.3856725977307742
Ended iteration 450  Cost:  0.23171967206667693  Validation cost:  2.2857982783440765
Ended iteration 500  Cos

Ended iteration 500  Cost:  0.18329599220754147  Validation cost:  2.23003480881589
Ended iteration 550  Cost:  0.1758053454156528  Validation cost:  2.241828698690652
Ended iteration 600  Cost:  0.2675383470684906  Validation cost:  2.2520411665317908
Ended iteration 650  Cost:  0.13479015810833003  Validation cost:  2.2667202986006294
Ended iteration 700  Cost:  0.07655198149775104  Validation cost:  2.277391799554831
Ended iteration 750  Cost:  0.043980541823972236  Validation cost:  2.2620788406145698
Ended iteration 800  Cost:  0.035259254690075965  Validation cost:  2.2617595498514227
Ended iteration 850  Cost:  0.06389056070057937  Validation cost:  2.157462681375806
Ended iteration 900  Cost:  0.03115277296736181  Validation cost:  2.240040697999623
Ended iteration 950  Cost:  0.026071066612899537  Validation cost:  2.267897166191547
Ended iteration 1000  Cost:  0.02210633327288129  Validation cost:  2.2693127576053214
Ended iteration 1050  Cost:  0.0250451597809862  Validation

### Leaky Relu.300.100

In [326]:
activation = "leaky_relu"
learning_rate=0.1
evaluate_neural_network_2hl(hidden_layer_1_neurons, hidden_layer_2_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)


Beginning training with 2000 epochs, 300 neurons (1st layer) 100 neurons (2nd layer)
Initialized weights
Prepared for mini-batch.
Ended iteration 0  Cost:  2.2728287369647764  Validation cost:  2.34349077765117
Ended iteration 50  Cost:  0.8168484532593815  Validation cost:  2.2000867617752964
Ended iteration 100  Cost:  0.6336933044223657  Validation cost:  2.1865180006550538
Ended iteration 150  Cost:  0.510937307822785  Validation cost:  2.206836798233028
Ended iteration 200  Cost:  0.40786948245382104  Validation cost:  2.197083473700934
Ended iteration 250  Cost:  0.4245267299813025  Validation cost:  2.2325149668506583
Ended iteration 300  Cost:  0.33001564147193874  Validation cost:  2.202452973329842
Ended iteration 350  Cost:  0.2841736892173489  Validation cost:  2.2211851789716324
Ended iteration 400  Cost:  0.26415530212795746  Validation cost:  2.2307895209673423
Ended iteration 450  Cost:  0.31127951163536677  Validation cost:  2.2135604793758743
Ended iteration 500  Cost

Ended iteration 500  Cost:  0.22257366563136982  Validation cost:  2.3905564670011774
Ended iteration 550  Cost:  0.20350169377359137  Validation cost:  2.446320315998636
Ended iteration 600  Cost:  0.3554630949016934  Validation cost:  2.4606086464837325
Ended iteration 650  Cost:  0.1720656580565312  Validation cost:  2.406999815100992
Ended iteration 700  Cost:  0.14152923268060286  Validation cost:  2.400849142284032
Ended iteration 750  Cost:  0.09269155431387496  Validation cost:  2.3992248458081313
Ended iteration 800  Cost:  0.06338547203727461  Validation cost:  2.406325265142734
Ended iteration 850  Cost:  0.049052589532699274  Validation cost:  2.39456897924934
Ended iteration 900  Cost:  0.05539426692419303  Validation cost:  2.369177257120201
Ended iteration 950  Cost:  0.05329379454302604  Validation cost:  2.3748266861858265
Ended iteration 1000  Cost:  0.20726723643113626  Validation cost:  2.2171844215674543
Ended iteration 1050  Cost:  0.07251831459953584  Validation 

## Changing sizes

In [327]:
hidden_layer_1_neurons = 500
hidden_layer_2_neurons = 250

### Sigmoid.500.250

In [328]:
activation = "sigmoid"
evaluate_neural_network_2hl(hidden_layer_1_neurons, hidden_layer_2_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)

Beginning training with 2000 epochs, 500 neurons (1st layer) 250 neurons (2nd layer)
Initialized weights
Prepared for mini-batch.
Ended iteration 0  Cost:  2.5060392174924924  Validation cost:  2.3222700203825317
Ended iteration 50  Cost:  2.1963213023855643  Validation cost:  2.213606606489974
Ended iteration 100  Cost:  2.0259479022905458  Validation cost:  2.0513768422547844
Ended iteration 150  Cost:  1.7469371714794049  Validation cost:  1.779189637344452
Ended iteration 200  Cost:  1.4862820216339523  Validation cost:  1.5255935615004819
Ended iteration 250  Cost:  1.3109347358780963  Validation cost:  1.3475511767620574
Ended iteration 300  Cost:  1.2044598588133828  Validation cost:  1.2174683095542582
Ended iteration 350  Cost:  1.1170981800022852  Validation cost:  1.126886847757941
Ended iteration 400  Cost:  1.0527373272121416  Validation cost:  1.054226767146927
Ended iteration 450  Cost:  1.010226265458643  Validation cost:  0.9941326998038151
Ended iteration 500  Cost:  

KeyboardInterrupt: 

### Relu.500.250

In [None]:
activation = "relu"
evaluate_neural_network_2hl(hidden_layer_1_neurons, hidden_layer_2_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)


### Leaky Relu.500.250

In [None]:
activation = "leaky_relu"
evaluate_neural_network_2hl(hidden_layer_1_neurons, hidden_layer_2_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)


# Using test data
Right now we'll see how our model handles the test data

In [332]:
fashionTestParams, fashionTestTarget = formatArray(fashionTestDataset, 0)
fashionTestTarget = createTarget(fashionTestTarget)
fashionTestParams = fashionTestParams/255

model_1hl_1 = train_neural_network_1hl(500, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)


Beginning training with  2000  epochs and  500  hidden neurons.
Initialized weights
Prepared for mini-batch.
Ended iteration 0  Cost:  2.3991860136685728  Validation cost:  2.3316026716316345
Ended iteration 50  Cost:  1.5644331754905838  Validation cost:  1.571128223576728
Ended iteration 100  Cost:  1.1539204601947681  Validation cost:  1.172093827959
Ended iteration 150  Cost:  0.9568701770049237  Validation cost:  0.9876451768169804
Ended iteration 200  Cost:  0.8411868709500163  Validation cost:  0.8845913860204307
Ended iteration 250  Cost:  0.7637288277711356  Validation cost:  0.8196016581084586
Ended iteration 300  Cost:  0.7070624230514607  Validation cost:  0.7752689352649349
Ended iteration 350  Cost:  0.6627047565406443  Validation cost:  0.7431396357072231
Ended iteration 400  Cost:  0.626085104168689  Validation cost:  0.7186531053100139
Ended iteration 450  Cost:  0.5946070622057511  Validation cost:  0.6992136889175222
Ended iteration 500  Cost:  0.5667522917347436  Va

In [None]:
probs_sigmoid1 = forward_prop_1hl(fashionTestParams, model_1hl_1)
cost1 = neuralNetworkCostFunction(probs_sigmoid1, fashionTestTarget)
acc1_test = accuracy(fashionTestTarget, fashionTestParams, model_1hl_1)
print("Model:", activation, "Test cost:", cost1, "Test Accuracy: {0:.4f} ".format(acc1_test))

In [None]:
conf_matrix = construct_confusion_matrix(fashionTestTarget, model_1hl_1, fashionTestParams)
p_print(conf_matrix)