# Introduction
Here, we'll explore neural networks applied to the fashion-mnist problem.

In [184]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss
import csv
import pandas as pd

In [185]:
# Functions

def formatArray (dataFrame, columnToExtract) :
    array = dataFrame.values
    target = array[:,columnToExtract]
    params = np.delete(array, columnToExtract, axis = 1)
    return params, target

def loadFashionTrainData():
    return pd.read_csv("fashion-mnist-dataset/fashion-mnist_train.csv")

def loadFashionTestData():
    return pd.read_csv("fashion-mnist-dataset/fashion-mnist_test.csv")

def split_train_test(data, test_ratio):
    np.random.seed(42)
    shuffled_indices = np.random.permutation(len(data))
    test_set_size = int(len(data) * test_ratio)
    test_indices = shuffled_indices[:test_set_size]
    train_indices = shuffled_indices[test_set_size:]
    return data.iloc[train_indices], data.iloc[test_indices]

def createTarget (target):
    results = np.zeros((target.size, 10), dtype=int)
    for i in range(10):
        for j in range(target.size):
            if (target[j] != i):
                results[j][i - 1] = 0
            else:
                results[j][i - 1] = 1
    return results

def p_print(a):
    for x in a:
        print(*x, sep=" ")
    return

# The dataset
First and foremost, we'll open train and test data. The training data is split to obtain validation items and the the target values are also separated from the original data.

In [186]:
fashionTrainDataset = loadFashionTrainData()
fashionTestDataset = loadFashionTestData()
fashionTrain, fashionValidation = split_train_test(fashionTrainDataset, 0.2)
fashionTrainParams, fashionTrainTarget = formatArray(fashionTrain, 0)
fashionTrainTarget = createTarget(fashionTrainTarget)
fashionTrainParams = fashionTrainParams/255
fashionValidationParams, fashionValidationTarget = formatArray(fashionValidation, 0)
fashionValidationTarget = createTarget(fashionValidationTarget)
fashionValidationParams = fashionValidationParams/255


# Activation and Softmax Functions
First, we'll start by implementing some useful functions seen in class

In [187]:
def sigmoid(n):
    return (1/(1+np.exp(-n)))

def derivative_sigmoid(n):
    x = sigmoid(n)
    return x * (1 - x)

In [188]:
def relu(n):
    if n < 0:
        return 0
    return n

def derivative_relu(n):
    if n < 0:
        return 0
    return 1

In [189]:
def leaky_relu(n):
    if n > 0:
        return n
    return 0.01 * n

def derivative_leaky_relu(n):
    if n < 0:
        return 0.01
    return 1

In [190]:
def softmax(n):
    exp = np.exp(n)
    test_sum = np.sum(exp, axis=1, keepdims=True)
    return exp/test_sum

## Forward Propagation
In this section, we define forward propagation related functions.

In [212]:
def initialize_1hl(input_dimension,hidden_layer_1_neurons, output_dimension):
    neural_data = {}
    np.random.seed()
    neural_data['w1'] = np.random.randn(input_dimension, hidden_layer_1_neurons)/ np.sqrt(input_dimension)
    neural_data['w2'] = np.random.randn(hidden_layer_1_neurons, output_dimension)/ np.sqrt(input_dimension)
    
    neural_data['b1'] = np.zeros((1, hidden_layer_1_neurons))
    neural_data['b2'] = np.zeros((1, output_dimension))
    return neural_data

def initialize_2hl(input_dimension, hidden_layer_1_neurons, hidden_layer_2_neurons, output_dimension):
    neural_data = {}
    np.random.seed(0)

    neural_data['w1'] = np.random.randn(input_dimension, hidden_layer_1_neurons)/ np.sqrt(input_dimension)
    neural_data['w2'] = np.random.randn(hidden_layer_1_neurons, hidden_layer_2_neurons)/ np.sqrt(hidden_layer_1_neurons)
    neural_data['w3'] = np.random.randn(hidden_layer_2_neurons, output_dimension)/ np.sqrt(hidden_layer_2_neurons)

    neural_data['b1'] = np.zeros((1, hidden_layer_1_neurons))
    neural_data['b2'] = np.zeros((1, hidden_layer_2_neurons))
    neural_data['b3'] = np.zeros((1, output_dimension))
    return neural_data
    
def forward_prop_1hl(x, neural_data, activation="sigmoid"):
    w1 , w2, b1, b2 = neural_data['w1'], neural_data['w2'], neural_data['b1'], neural_data['b2']
    x1 = np.dot(x, w1) + b1 #Output of hidden layer
    if activation == "sigmoid":
        y1 = np.asarray([[sigmoid(n) for n in j] for j in x1]) #Output of hidden layer with activation function
    elif activation == "relu":
        y1 = np.asarray([[relu(n) for n in j] for j in x1]) #Output of hidden layer with activation function
    elif activation == "leaky_relu":
        y1 = np.asarray([[leaky_relu(n) for n in j] for j in x1]) #Output of hidden layer with activation function
    x2 = np.dot(y1, w2) + b2 #Output of last layer
    neural_data['x1'] = x1
    neural_data['x2'] = x2
    neural_data['y1'] = y1
    neural_data['o'] = softmax(x2)  # Final output with softmax

    return neural_data['o']

def forward_prop_2hl(x, neural_data):
    w1 , w2, w3, b1, b2, b3 = neural_data['w1'], neural_data['w2'], neural_data['w3'], neural_data['b1'], neural_data['b2'], neural_data['b3']

    x1 = np.dot(x, w1) + b1
    y1 = np.asarray([[relu(n) for n in j] for j in x1])
    x2 = np.dot(a1, w2) + b2
    y2 = np.asarray([[relu(n) for n in j] for j in x2])
    x3 = np.dot(a2, w3) + b3
    
    neural_data['x1'] = x1
    neural_data['x2'] = x2
    neural_data['x3'] = x3

    neural_data['y1'] = y1
    neural_data['y2'] = y2
    
    neural_data['o'] = softmax(x3)
    return neural_data['o']

## Prediction Functions
Helper functions that return predictions, given our model.

In [192]:
def predict_1hl(x, neural_data):
    test = forward_prop_1hl(x,neural_data)
    return np.argmax(test, axis=1)

def predict_2hl(x, neural_data):
    return np.argmax(forward_prop_2hl(x,neural_data), axis=1)

## Cost function

In [200]:
def neuralNetworkCostFunction(output, target):
#     fashionTargetMinusOne = fashionTarget - 1
#     cost = 0
#     for j in range(fashionTrainOutput.shape[0]):
#         cost += np.sum(np.multiply(fashionTarget, np.log10(fashionTrainOutput[j])),np.multiply(fashionTargetMinusOne, (1- np.log10(fashionTrainOutput[j]))))
#     cost = cost*(-1)/fashionTrainOutput.shape[0]
    
    
    
#     partial_cost = -np.log(probs[range(fashionTrainOutput.shape[0]), fashionTarget])
#     partial_cost -= np.log(1-probs[range(fashionTrainOutput.shape[0]), fashionTarget])
#     cost = np.sum(partial_cost)
    
    cost = log_loss(target, output)
    return cost

def meanSquaresCost(fashionTrainOutput, fashionTarget):
    diference = fashionTrainOutput - fashionTarget
    squareDiference = diference ** 2
    n = fashionTrainOutput.shape[0]    
    return (np.sum(squareDiference)/(2*n))

def accuracy (target, params, neural_model, hidden_layers=1):
    right_answers = 0
    target_indexes = np.argmax(target, axis=1)
    if hidden_layers == 1:
        predicted = predict_1hl(params, neural_model)
    if hidden_layers == 2:
        predicted = predict_2hl(params, neural_model)
    n = params.shape[0]
    for i in range (n):
        if (target_indexes[i] == predicted[i]):
            right_answers = right_answers + 1
    return right_answers/n

## Training and backward propagation

Some notes:

### For one hidden layer:
- x1 = #Output of hidden layer
- x2 = #Output of last layer
- y1 = #Output of hidden layer with activation function
- o = Final output with Softmax

### For TWO hidden layers:
- x1 = #Output of first hidden layer
- x2 = #Output of second hidden layer
- x3 = Output of last layer
- y1 = #Output of first hidden layer with activation function
- y2 = #Output of second hidden layer with activation function
- o = Final output with Softmax


# One hidden layer

Here, we present our code and results achieved by a learning algorithm that uses a neural network with only one hidden layer.

In [194]:
def train_neural_network_1hl(hidden_layer_1_neurons, epochs, trainParams, trainTarget, learning_rate, regularization_rate, activation="sigmoid"):
    print("Beginning training with ", epochs, " epochs and ", hidden_layer_1_neurons, " hidden neurons.")
    input_dimension = 784
    output_dimension = 10
#     Initializes weights and biases for our neural network
    neural_data = initialize_1hl(input_dimension, hidden_layer_1_neurons, output_dimension)
    print("Initialized weights")
    sigmoid = True
    relu = False
    leaky_relu = False
    
#     Prepares for mini-batch
    batchSize = 600
    start_idx = 0
    indices = np.arange(trainParams.shape[0])
    np.random.shuffle(indices)
    print("Prepared for mini-batch.")
#     Performs Backpropagation
    capitalDelta3 = 0
    capitalDelta2 = 0
    for j in range(epochs):
        excerpt = indices[start_idx:start_idx + batchSize]
        mini_batch_data = trainParams[excerpt]
        miniBatchTarget = trainTarget[excerpt]
#         Performs Forward propagation
        probs = forward_prop_1hl(mini_batch_data, neural_data, activation)
    

#         Performs Backward propagation

        delta3 = probs - miniBatchTarget

        dW2 =(1./batchSize)* (neural_data['y1'].T).dot(delta3)
        db2 =(1./batchSize)* ( np.sum(delta3, axis=0, keepdims=True))
        delta2 = np.dot(delta3, neural_data['w2'].T)
        aux = neural_data['y1']
        if activation == "sigmoid":
            aux = [[derivative_sigmoid(n) for n in x] for x in aux]
        if activation == "relu":
            aux = [[derivative_relu(n) for n in x] for x in aux]
        if activation == "leaky_relu":
            aux = [[derivative_leaky_relu(n) for n in x] for x in aux]

        delta2 = delta2 * aux

        dW1 = (1./batchSize)*np.dot(mini_batch_data.T, delta2)
        db1 = (1./batchSize)*np.sum(delta2, axis=0)
        
        
#          # Performs regularization
#         dW2 += regularization_rate * neural_data['w2']
#         dW1 += regularization_rate * neural_data['w1']

        # Gradient descent parameter update
        neural_data['w1'] -= learning_rate * dW1
        neural_data['b1'] -= learning_rate * db1
        neural_data['w2'] -= learning_rate * dW2
        neural_data['b2'] -= learning_rate * db2
        
        if j%50 == 0:
            #         Calculates costs

            cost = neuralNetworkCostFunction(probs, miniBatchTarget)
            validation_probs = forward_prop_1hl(fashionValidationParams, neural_data)
            validation_cost = neuralNetworkCostFunction(validation_probs, fashionValidationTarget)
            print("Ended iteration", j," Cost: ", cost, " Validation cost: ", validation_cost)
        start_idx += batchSize;
        start_idx %= mini_batch_data.shape[0]
    return neural_data

# Two hidden layers

Same as before, but for 2 hidden layers.

In [195]:
def train_neural_network_2hl(hidden_layer_1_neurons, hidden_layer_2_neurons, epochs, trainParams, trainTarget, learning_rate, regularization_rate):
    input_dimension = 784
    output_dimension = 10
#     Initializes weights and biases for our neural network
    neural_data = initialize_2hl(input_dimension, hidden_layer_1_neurons, hidden_layer_2_neurons, output_dimension)
    sigmoid = True
    relu = False
    leaky_relu = False
    
#     Prepares for mini-batch
    batchSize = 64
    start_idx = 0;
    indices = np.arange(trainParams.shape[0])
    np.random.shuffle(indices)
    excerpt = indices[start_idx:start_idx + batchSize]
    mini_batch_data = trainParams[excerpt]
    miniBatchTarget = createTarget(trainTarget[excerpt])
#     Performs Backpropagation
    for j in range(epochs):

#         Performs Forward propagation
        probs = forward_prop_1hl(mini_batch_data, neural_data)
        
#         Calculates cost

#         Performs Backward propagation
        delta4 = probs - miniBatchTarget
        dW3 = (neural_data['y2'].T).dot(delta4)
        db3 = np.sum(delta4, axis=0, keepdims=True)
        delta3 = delta4.dot(neural_data['w3'].T)
        aux = neural_data['y2']
        if sigmoid:
            aux = [[derivative_sigmoid(n) for n in x] for x in aux]
        if relu:
            aux = [[derivative_relu(n) for n in x] for x in aux]
        if leaky_relu:
            aux = [[derivative_leaky_relu(n) for n in x] for x in aux]        
        delta3 = delta3 * aux
        dW2 = np.dot(mini_batch_data.T, delta3)
        db2 = np.sum(delta3, axis=0)
        delta2 = delta3.dot(neural_data['w2'].T)  #look for issues here
        aux = neural_data['y1']
        if sigmoid:
            aux = [[derivative_sigmoid(n) for n in x] for x in aux]
        if relu:
            aux = [[derivative_relu(n) for n in x] for x in aux]
        if leaky_relu:
            aux = [[derivative_leaky_relu(n) for n in x] for x in aux]
        delta2 = delta2 * aux
        dW1 = np.dot(mini_batch_data.T, delta2)
        db1 = np.sum(delta2, axis=0)      
        
#          # Performs regularization
#         dW3 += regularization_rate * neural_data['w3']
#         dW2 += regularization_rate * neural_data['w2']
#         dW1 += regularization_rate * neural_data['w1']

        # Gradient descent parameter update
        
        neural_data['w1'] += -learning_rate * dW1
        neural_data['b1'] += -learning_rate * db1
        neural_data['w2'] += -learning_rate * dW2
        neural_data['b2'] += -learning_rate * db2
        neural_data['w3'] += -learning_rate * dW3
        neural_data['b3'] += -learning_rate * db3
        
        print("Ended iteration", j)
        start_idx += 1;
        start_idx %= mini_batch_data.shape[0]
        
    return neural_data

# Testing the neural networks

Now, we'll test our neural networks under multiple circumstances on the validation set, so we can gest the best possible models.

## Problem details
- Input dimension: 28x28 = 784 neurons
- Output dimension: 10 classes = 10 neurons

In [196]:
hidden_layers = 1
input_neurons = 784
output_neurons = 10
hidden_layer_1_neurons = 50
hidden_layer_2_neurons = 15
learning_rate = 0.1
regularization_rate = 0

## Defining our evaluation
Here, we define the function which will use the training we implemented before. It will train 3 models (using the same parameters), to test to an extent the impact of random initialization on the weights matrix. After the training, we check the costs and accuracies of each model (both related to train an validation sets), as well as the average of such values.

In [207]:
def evaluate_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation="sigmoid"):
    model_1hl_1 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)
    print("======================First model trained=====================")
    model_1hl_2 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)
    print("======================Second model trained=====================")
    model_1hl_3 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)
    print("======================Third model trained=====================")
    probs_sigmoid1 = forward_prop_1hl(fashionValidationParams, model_1hl_1)
    cost1 = neuralNetworkCostFunction(probs_sigmoid1, fashionValidationTarget)
    acc1_train = accuracy(fashionTrainTarget, fashionTrainParams, model_1hl_1)
    acc1_validation = accuracy(fashionValidationTarget, fashionValidationParams, model_1hl_1)
    print("First model ", activation, "validation cost: ", cost1, " acc_train: {0:.4f} ".format(acc1_train), " acc_validation: {0:.4f} ".format(acc1_validation))
    
    probs_sigmoid2 = forward_prop_1hl(fashionValidationParams, model_1hl_2)
    cost2 = neuralNetworkCostFunction(probs_sigmoid2, fashionValidationTarget)
    acc2_train = accuracy(fashionTrainTarget, fashionTrainParams, model_1hl_2)
    acc2_validation = accuracy(fashionValidationTarget, fashionValidationParams, model_1hl_2)
    print("Second model  ", activation, "validation cost: ", cost2, " acc_train:{0:.4f} ".format(acc2_train), " acc_validation: {0:.4f} ".format(acc2_validation))
    
    probs_sigmoid3 = forward_prop_1hl(fashionValidationParams, model_1hl_3)
    cost3 = neuralNetworkCostFunction(probs_sigmoid3, fashionValidationTarget)
    acc3_train = accuracy(fashionTrainTarget, fashionTrainParams, model_1hl_3)
    acc3_validation = accuracy(fashionValidationTarget, fashionValidationParams, model_1hl_3)
    print("Third model ", activation, "validation cost: ", cost3, " acc_train:{0:.4f} ".format(acc3_train), " acc_validation: {0:.4f} ".format(acc3_validation))
    
    
    avg_loss = ((cost1+cost2+cost3)/3)
    avg_acc = ((acc1_validation + acc2_validation + acc3_validation)/3)
    print("Average validation loss: ", avg_loss, " Average validation accuracy: ", avg_acc)
    return

## First experiments
First, we will train a model using a hidden layer with 50 neurons and 1000 epochs, which is small given the input of 784 neurons. We will run 3 times for each activation function to get an average result (that depends heavily on the initialization of the weights).

## Sigmoid.50

Since everything seems ok, we'll increase the number of epochs to 1000, but mantaining the current amount of neurons.

In [208]:
epochs = 1000
activation = "sigmoid"
evaluate_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)


Beginning training with  1000  epochs and  50  hidden neurons.
Initialized weights
Prepared for mini-batch.
Ended iteration 0  Cost:  2.3189274990773403  Validation cost:  2.308840114216827
Ended iteration 50  Cost:  2.171037962825242  Validation cost:  2.1837254937446287
Ended iteration 100  Cost:  1.8645495813242572  Validation cost:  1.8758884833012268
Ended iteration 150  Cost:  1.5806684164274916  Validation cost:  1.5945600691037713
Ended iteration 200  Cost:  1.4097875070160208  Validation cost:  1.4268218913522153
Ended iteration 250  Cost:  1.2933453573268108  Validation cost:  1.3141021084284596
Ended iteration 300  Cost:  1.2042783284529452  Validation cost:  1.2286972348102279
Ended iteration 350  Cost:  1.1331665686497145  Validation cost:  1.16157526942327
Ended iteration 400  Cost:  1.0773114436869984  Validation cost:  1.1101187761208735
Ended iteration 450  Cost:  1.0349248356206233  Validation cost:  1.0723990045324765
Ended iteration 500  Cost:  1.003889567761496  Va

Now for 2000 epochs:

In [209]:
epochs=2000
activation = "sigmoid"
evaluate_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)


Beginning training with  2000  epochs and  50  hidden neurons.
Initialized weights
Prepared for mini-batch.
Ended iteration 0  Cost:  2.309584803483088  Validation cost:  2.3053486210577807
Ended iteration 50  Cost:  2.1564941498806403  Validation cost:  2.1731646967738363
Ended iteration 100  Cost:  1.8517262061793849  Validation cost:  1.8712662016631196
Ended iteration 150  Cost:  1.5783252719919543  Validation cost:  1.5980975480741204
Ended iteration 200  Cost:  1.4099436035618367  Validation cost:  1.4265489245819734
Ended iteration 250  Cost:  1.289168316098962  Validation cost:  1.3051478744728737
Ended iteration 300  Cost:  1.190502690231248  Validation cost:  1.210246899999425
Ended iteration 350  Cost:  1.1110033949449278  Validation cost:  1.1368538902004954
Ended iteration 400  Cost:  1.0515953815990229  Validation cost:  1.0834034904084433
Ended iteration 450  Cost:  1.0084389890352876  Validation cost:  1.0454901542699924
Ended iteration 500  Cost:  0.9764130244304439  V

Ended iteration 600  Cost:  0.963479722465216  Validation cost:  1.04860675376449
Ended iteration 650  Cost:  0.9522941504067068  Validation cost:  1.0380138911191579
Ended iteration 700  Cost:  0.9417452730100038  Validation cost:  1.0276939998175203
Ended iteration 750  Cost:  0.9330948128908453  Validation cost:  1.0193251169222695
Ended iteration 800  Cost:  0.9263927527873125  Validation cost:  1.0129030581571532
Ended iteration 850  Cost:  0.9208004949146568  Validation cost:  1.0074923076629423
Ended iteration 900  Cost:  0.9160770407094939  Validation cost:  1.0027822347719297
Ended iteration 950  Cost:  0.9123219217235048  Validation cost:  0.999009417463767
Ended iteration 1000  Cost:  0.9095474923731378  Validation cost:  0.9964851227517391
Ended iteration 1050  Cost:  0.9074910001637647  Validation cost:  0.9951597154204993
Ended iteration 1100  Cost:  0.9056020781955374  Validation cost:  0.994461693276095
Ended iteration 1150  Cost:  0.9030818108945793  Validation cost:  

From these trainings, we see that we achieve best performance generally around 1500 epochs. Now we'll test with other activation functions.

## Relu.50

In [210]:
epochs = 1500
activation = "relu"
evaluate_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)


Beginning training with  1500  epochs and  50  hidden neurons.
Initialized weights
Prepared for mini-batch.
Ended iteration 0  Cost:  2.3164666632507918  Validation cost:  2.310441018496367
Ended iteration 50  Cost:  1.192824575839597  Validation cost:  1.8104917507244445
Ended iteration 100  Cost:  0.8314183993936627  Validation cost:  1.6243965565701601
Ended iteration 150  Cost:  0.6845210967789782  Validation cost:  1.5555502310877352
Ended iteration 200  Cost:  0.5860866183761796  Validation cost:  1.515126787072321
Ended iteration 250  Cost:  0.5125848460900257  Validation cost:  1.482372410330805
Ended iteration 300  Cost:  0.4554583950718295  Validation cost:  1.453276918008165
Ended iteration 350  Cost:  0.408742077923518  Validation cost:  1.42777488816148
Ended iteration 400  Cost:  0.39400495390197515  Validation cost:  1.3824527567363778
Ended iteration 450  Cost:  0.3538569726985296  Validation cost:  1.350752855588127
Ended iteration 500  Cost:  0.3180160542618654  Valid

First model  relu validation cost:  1.0318463453792883  acc_train: 0.6719   acc_validation: 0.6644 
Second model   relu validation cost:  0.9862588391588282  acc_train:0.7053   acc_validation: 0.7005 
Third model  relu validation cost:  1.1776426395270934  acc_train:0.5824   acc_validation: 0.5805 
Average validation loss:  1.0652492746884032  Average validation accuracy:  0.6484722222222222


## Leaky-relu.50

In [213]:
epochs = 2000
activation = "leaky_relu"
evaluate_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)


Beginning training with  2000  epochs and  50  hidden neurons.
Initialized weights
Prepared for mini-batch.
Ended iteration 0  Cost:  2.302849938656908  Validation cost:  2.3105654144774914
Ended iteration 50  Cost:  1.1070574951364116  Validation cost:  2.01886601262095
Ended iteration 100  Cost:  0.7581413648313114  Validation cost:  1.9395130288641822
Ended iteration 150  Cost:  0.6476219339071607  Validation cost:  1.8893976713865486
Ended iteration 200  Cost:  0.540676911591149  Validation cost:  1.8701075154785436
Ended iteration 250  Cost:  0.44555981541257667  Validation cost:  1.8564365609984277
Ended iteration 300  Cost:  0.3878401627297766  Validation cost:  1.8342690908634722
Ended iteration 350  Cost:  0.3358157162023498  Validation cost:  1.8140881000201092
Ended iteration 400  Cost:  0.28948118827006947  Validation cost:  1.7940775202525074
Ended iteration 450  Cost:  0.2541457454134992  Validation cost:  1.784445262402187
Ended iteration 500  Cost:  0.22815904051620284 

Ended iteration 550  Cost:  0.2093029258962358  Validation cost:  1.8546940257167583
Ended iteration 600  Cost:  0.17423445345901142  Validation cost:  1.8470965484529684
Ended iteration 650  Cost:  0.15434463024690387  Validation cost:  1.8587937357873874
Ended iteration 700  Cost:  0.13672680853957211  Validation cost:  1.8627474393242591
Ended iteration 750  Cost:  0.120293890925472  Validation cost:  1.8647950738438892
Ended iteration 800  Cost:  0.10459350140890224  Validation cost:  1.8650641559913508
Ended iteration 850  Cost:  0.09087174195103441  Validation cost:  1.8663997094308604
Ended iteration 900  Cost:  0.07899083887561886  Validation cost:  1.8686638342812463
Ended iteration 950  Cost:  0.06995370157897664  Validation cost:  1.8738920463455728
Ended iteration 1000  Cost:  0.06285652497457526  Validation cost:  1.877260104046379
Ended iteration 1050  Cost:  0.05680201408836956  Validation cost:  1.8793766454951626
Ended iteration 1100  Cost:  0.051596156296783234  Valid

# Changing hidden layer size
Now we will experiment changing the amount of neurons on the hidden layer and see the impact on different activation functions. We will fix the amount of epochs to 1000, which generates an average-to-good result for all activation functions. Besides that, we'll use a learning rate of 0.01.

In [42]:
hidden_layer_1_neurons = 100
epochs = 1000
learning_rate = 0.01

## Sigmoid.100

In [None]:
activation = "sigmoid"
evaluate_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)


## Relu.100

In [None]:
activation = "relu"
evaluate_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)


## Leaky Relu.100

In [None]:
activation = "leaky_relu"
evaluate_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)


Expanding to 500 neurons on the hidden layer...

In [None]:
hidden_layer_1_neurons = 500

## Sigmoid.500

In [None]:
activation = "sigmoid"
evaluate_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)


## Relu.500

In [None]:
activation = "relu"
evaluate_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)


## Leaky Relu.500

In [None]:
activation = "leaky_relu"
evaluate_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation)
