# Introduction
Here, we'll explore neural networks applied to the fashion-mnist problem.

In [2]:
import numpy as np
from sklearn.linear_model import LogisticRegression
import csv
import pandas as pd

In [3]:
# Functions

def formatArray (dataFrame, columnToExtract) :
    array = dataFrame.values
    target = array[:,columnToExtract]
    params = np.delete(array, columnToExtract, axis = 1)
    return params, target

def loadFashionTrainData():
    return pd.read_csv("fashion-mnist-dataset/fashion-mnist_train.csv")

def loadFashionTestData():
    return pd.read_csv("fashion-mnist-dataset/fashion-mnist_test.csv")

def split_train_test(data, test_ratio):
    np.random.seed(42)
    shuffled_indices = np.random.permutation(len(data))
    test_set_size = int(len(data) * test_ratio)
    test_indices = shuffled_indices[:test_set_size]
    train_indices = shuffled_indices[test_set_size:]
    return data.iloc[train_indices], data.iloc[test_indices]

def createTarget (target):
    results = np.zeros((target.size, 10), dtype=int)
    for i in range(10):
        for j in range(target.size):
            if (target[j] != i):
                results[j][i - 1] = 0
            else:
                results[j][i - 1] = 1
    return results

def p_print(a):
    for x in a:
        print(*x, sep=" ")
    return

# The dataset
First and foremost, we'll open train and test data. The training data is split to obtain validation items and the the target values are also separated from the original data.

In [4]:
fashionTrainDataset = loadFashionTrainData()
fashionTestDataset = loadFashionTestData()
fashionTrain, fashionValidation = split_train_test(fashionTrainDataset, 0.2)
fashionTrainParams, fashionTrainTarget = formatArray(fashionTrain, 0)
fashionTrainParams = fashionTrainParams/255
fashionValidationParams, fashionValidationTarget = formatArray(fashionValidation, 0)
fashionValidationTarget = createTarget(fashionValidationTarget)
fashionValidationParams = fashionValidationParams/255
print (fashionTrainParams[:5])
print (type(fashionTrainParams))

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
<class 'numpy.ndarray'>


# Activation and Softmax Functions
First, we'll start by implementing some useful functions seen in class

In [5]:
def sigmoid(n):
    return (1/(1+np.exp(-n)))

def derivative_sigmoid(n):
    x = sigmoid(n)
    return x * (1 - x)

In [6]:
def relu(n):
    if n < 0:
        return 0
    return n

def derivative_relu(n):
    if n < 0:
        return 0
    return 1

In [7]:
def leaky_relu(n):
    if n > 0:
        return n
    return 0.01 * n

def derivative_leaky_relu(n):
    if n < 0:
        return 0.01
    return 1

In [8]:
def softmax(n):
    exp = np.exp(n)
    test_sum = np.sum(exp, axis=1, keepdims=True)
    return exp/test_sum

## Forward Propagation
In this section, we define forward propagation related functions.

In [31]:
def initialize_1hl(input_dimension,hidden_layer_1_neurons, output_dimension):
    neural_data = {}
    np.random.seed()
    neural_data['w1'] = np.random.randn(input_dimension, hidden_layer_1_neurons)/ np.sqrt(input_dimension)
    neural_data['w2'] = np.random.randn(hidden_layer_1_neurons, output_dimension)/ np.sqrt(input_dimension)
    
    neural_data['b1'] = np.zeros((1, hidden_layer_1_neurons))
    neural_data['b2'] = np.zeros((1, output_dimension))
    return neural_data

def initialize_2hl(input_dimension, hidden_layer_1_neurons, hidden_layer_2_neurons, output_dimension):
    neural_data = {}
    np.random.seed(0)

    neural_data['w1'] = np.random.randn(input_dimension, hidden_layer_1_neurons)/ np.sqrt(input_dimension)
    neural_data['w2'] = np.random.randn(hidden_layer_1_neurons, hidden_layer_2_neurons)/ np.sqrt(hidden_layer_1_neurons)
    neural_data['w3'] = np.random.randn(hidden_layer_2_neurons, output_dimension)/ np.sqrt(hidden_layer_2_neurons)

    neural_data['b1'] = np.zeros((1, hidden_layer_1_neurons))
    neural_data['b2'] = np.zeros((1, hidden_layer_2_neurons))
    neural_data['b3'] = np.zeros((1, output_dimension))
    return neural_data
    
def forward_prop_1hl(x, neural_data, activation="sigmoid"):
    w1 , w2, b1, b2 = neural_data['w1'], neural_data['w2'], neural_data['b1'], neural_data['b2']
    x1 = np.dot(x, w1) + b1 #Output of hidden layer
    if activation == "sigmoid":
        y1 = np.asarray([[sigmoid(n) for n in j] for j in x1]) #Output of hidden layer with activation function
    elif actication == "relu":
        y1 = np.asarray([[relu(n) for n in j] for j in x1]) #Output of hidden layer with activation function
    elif activation == "leaky-relu"
        y1 = np.asarray([[leaky_relu(n) for n in j] for j in x1]) #Output of hidden layer with activation function
    x2 = np.dot(y1, w2) + b2 #Output of last layer
    neural_data['x1'] = x1
    neural_data['x2'] = x2
    neural_data['y1'] = y1
    neural_data['o'] = softmax(x2)  # Final output with softmax

    return neural_data['o']

def forward_prop_2hl(x, neural_data):
    w1 , w2, w3, b1, b2, b3 = neural_data['w1'], neural_data['w2'], neural_data['w3'], neural_data['b1'], neural_data['b2'], neural_data['b3']

    x1 = np.dot(x, w1) + b1
    y1 = np.asarray([[relu(n) for n in j] for j in x1])
    x2 = np.dot(a1, w2) + b2
    y2 = np.asarray([[relu(n) for n in j] for j in x2])
    x3 = np.dot(a2, w3) + b3
    
    neural_data['x1'] = x1
    neural_data['x2'] = x2
    neural_data['x3'] = x3

    neural_data['y1'] = y1
    neural_data['y2'] = y2
    
    neural_data['o'] = softmax(x3)
    return neural_data['o']

## Prediction Functions
Helper functions that return predictions, given our model.

In [10]:
def predict_1hl(x, neural_data):
    test = forward_prop_1hl(x,neural_data)
    return np.argmax(test, axis=1)

def predict_2hl(x, neural_data):
    return np.argmax(forward_prop_2hl(x,neural_data), axis=1)

## Cost function

In [11]:
def gradient_cost(fashionTrainOutput, fashionTarget, testCasesAmount):
    fashionTargetMinusOne = fashionTarget - 1
    cost = 0
    for j in range(testCasesAmount):
        cost += np.add(np.multiply(fashionTarget, np.log10(fashionTrainOutput[j])),np.multiply(fashionTargetMinusOne, (1- np.log10(fashionTrainOutput[j]))))
    cost = cost*(-1)/testCasesAmount
    return cost

def neuralNetworkCostFunction(fashionTrainOutput, fashionTarget):
    diference = fashionTrainOutput - fashionTarget
    squareDiference = diference ** 2
    n = fashionTrainOutput.shape[0]    
    return (np.sum(squareDiference)/(2*n))

def regressionLogisticCostFunction (results, model, X):
    agaTheta = model.predict_proba(X)
    n = X.shape[0]
    diference = results - agaTheta
    squareDiference = diference * diference
    return (np.sum(squareDiference)/(2*n))

## Training and backward propagation

Some notes:

### For one hidden layer:
- x1 = #Output of hidden layer
- x2 = #Output of last layer
- y1 = #Output of hidden layer with activation function
- o = Final output with Softmax

### For TWO hidden layers:
- x1 = #Output of first hidden layer
- x2 = #Output of second hidden layer
- x3 = Output of last layer
- y1 = #Output of first hidden layer with activation function
- y2 = #Output of second hidden layer with activation function
- o = Final output with Softmax


# One hidden layer

Here, we present our code and results achieved by a learning algorithm that uses a neural network with only one hidden layer.

In [16]:
def train_neural_network_1hl(hidden_layer_1_neurons, epochs, trainParams, trainTarget, learning_rate, regularization_rate, activation="sigmoid"):
    print("Beginning training...")
    input_dimension = 784
    output_dimension = 10
#     Initializes weights and biases for our neural network
    neural_data = initialize_1hl(input_dimension, hidden_layer_1_neurons, output_dimension)
    print("Initialized weights")
    sigmoid = True
    relu = False
    leaky_relu = False
    
#     Prepares for mini-batch
    batchSize = 600
    start_idx = 0
    indices = np.arange(trainParams.shape[0])
    np.random.shuffle(indices)
    print("Prepared for mini-batch.")
#     Performs Backpropagation
    capitalDelta3 = 0
    capitalDelta2 = 0
    for j in range(epochs):
        excerpt = indices[start_idx:start_idx + batchSize]
        mini_batch_data = trainParams[excerpt]
        miniBatchTarget = createTarget(trainTarget[excerpt])
#         Performs Forward propagation
        probs = forward_prop_1hl(mini_batch_data, neural_data, activation)
    

#         Performs Backward propagation

        delta3 = probs - miniBatchTarget

        dW2 =(1./batchSize)* (neural_data['y1'].T).dot(delta3)
        db2 =(1./batchSize)* ( np.sum(delta3, axis=0, keepdims=True))
        delta2 = np.dot(delta3, neural_data['w2'].T)
        aux = neural_data['y1']
        if activation == "sigmoid":
            aux = [[derivative_sigmoid(n) for n in x] for x in aux]
        if activation == "relu":
            aux = [[derivative_relu(n) for n in x] for x in aux]
        if activation == "leaky_relu":
            aux = [[derivative_leaky_relu(n) for n in x] for x in aux]

        delta2 = delta2 * aux

        dW1 = (1./batchSize)*np.dot(mini_batch_data.T, delta2)
        db1 = (1./batchSize)*np.sum(delta2, axis=0)
        
        
#          # Performs regularization
#         dW2 += regularization_rate * neural_data['w2']
#         dW1 += regularization_rate * neural_data['w1']

        # Gradient descent parameter update
        neural_data['w1'] -= learning_rate * dW1
        neural_data['b1'] -= learning_rate * db1
        neural_data['w2'] -= learning_rate * dW2
        neural_data['b2'] -= learning_rate * db2
        
        if j%50 == 0:
            #         Calculates costs
            cost = neuralNetworkCostFunction(probs, miniBatchTarget)
            validation_probs = forward_prop_1hl(fashionValidationParams, neural_data)
            validation_cost = neuralNetworkCostFunction(validation_probs, fashionValidationTarget)
            print("Ended iteration", j," Cost: ", cost, " Validation cost: ", validation_cost)
        start_idx += batchSize;
        start_idx %= mini_batch_data.shape[0]
    return neural_data

# Two hidden layers

Same as before, but for 2 hidden layers.

In [13]:
def train_neural_network_2hl(hidden_layer_1_neurons, hidden_layer_2_neurons, epochs, trainParams, trainTarget, learning_rate, regularization_rate):
    input_dimension = 784
    output_dimension = 10
#     Initializes weights and biases for our neural network
    neural_data = initialize_2hl(input_dimension, hidden_layer_1_neurons, hidden_layer_2_neurons, output_dimension)
    sigmoid = True
    relu = False
    leaky_relu = False
    
#     Prepares for mini-batch
    batchSize = 64
    start_idx = 0;
    indices = np.arange(trainParams.shape[0])
    np.random.shuffle(indices)
    excerpt = indices[start_idx:start_idx + batchSize]
    mini_batch_data = trainParams[excerpt]
    miniBatchTarget = createTarget(trainTarget[excerpt])
#     Performs Backpropagation
    for j in range(epochs):

#         Performs Forward propagation
        probs = forward_prop_1hl(mini_batch_data, neural_data)
        
#         Calculates cost

#         Performs Backward propagation
        delta4 = probs - miniBatchTarget
        dW3 = (neural_data['y2'].T).dot(delta4)
        db3 = np.sum(delta4, axis=0, keepdims=True)
        delta3 = delta4.dot(neural_data['w3'].T)
        aux = neural_data['y2']
        if sigmoid:
            aux = [[derivative_sigmoid(n) for n in x] for x in aux]
        if relu:
            aux = [[derivative_relu(n) for n in x] for x in aux]
        if leaky_relu:
            aux = [[derivative_leaky_relu(n) for n in x] for x in aux]        
        delta3 = delta3 * aux
        dW2 = np.dot(mini_batch_data.T, delta3)
        db2 = np.sum(delta3, axis=0)
        delta2 = delta3.dot(neural_data['w2'].T)  #look for issues here
        aux = neural_data['y1']
        if sigmoid:
            aux = [[derivative_sigmoid(n) for n in x] for x in aux]
        if relu:
            aux = [[derivative_relu(n) for n in x] for x in aux]
        if leaky_relu:
            aux = [[derivative_leaky_relu(n) for n in x] for x in aux]
        delta2 = delta2 * aux
        dW1 = np.dot(mini_batch_data.T, delta2)
        db1 = np.sum(delta2, axis=0)      
        
#          # Performs regularization
#         dW3 += regularization_rate * neural_data['w3']
#         dW2 += regularization_rate * neural_data['w2']
#         dW1 += regularization_rate * neural_data['w1']

        # Gradient descent parameter update
        
        neural_data['w1'] += -learning_rate * dW1
        neural_data['b1'] += -learning_rate * db1
        neural_data['w2'] += -learning_rate * dW2
        neural_data['b2'] += -learning_rate * db2
        neural_data['w3'] += -learning_rate * dW3
        neural_data['b3'] += -learning_rate * db3
        
        print("Ended iteration", j)
        start_idx += 1;
        start_idx %= mini_batch_data.shape[0]
        
    return neural_data

# Testing the neural networks

Now, we'll test our neural networks under multiple circumstances on the validation set, so we can gest the best possible models.

## Problem details
- Input dimension: 28x28 = 784 neurons
- Output dimension: 10 classes = 10 neurons

In [14]:
hidden_layers = 1
input_neurons = 784
output_neurons = 10
hidden_layer_1_neurons = 50
hidden_layer_2_neurons = 15
learning_rate = 0.1
regularization_rate = 0
epochs = 200

## Starting small
First, we will train a model using a hidden layer with 50 neurons and 200 epochs, which is small given the input of 784 neurons. We will run 3 times for each activation function to get an average result (that depends heavily on the initialization of the weights).

## Sigmoid.50

In [34]:
epochs=200
hidden_layer_1_neurons=50
model_1hl_sigmoid1 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate)
print("======================First model trained=====================")
model_1hl_sigmoid2 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate)
print("======================Second model trained=====================")
model_1hl_sigmoid3 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate)
print("======================Third model trained=====================")
probs_sigmoid1 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid1)
cost1 = neuralNetworkCostFunction(probs_sigmoid1, fashionValidationTarget)
print("First model validation cost: ", cost1)
probs_sigmoid2 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid2)
cost2 = neuralNetworkCostFunction(probs_sigmoid2, fashionValidationTarget)
print("Second model validation cost: ", cost2)
probs_sigmoid3 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid3)
cost3 = neuralNetworkCostFunction(probs_sigmoid3, fashionValidationTarget)
avg = ((cost1+cost2+cost3)/3)
print("Third model validation cost: ", cost3)
print("Average: ", avg)

Beginning training...
Initialized weights
Prepared for mini-batch.
Ended iteration 0  Cost:  0.45019556729150473  Validation cost:  0.44974519740812025
Ended iteration 50  Cost:  0.38282042236201175  Validation cost:  0.38685795845508103
Ended iteration 100  Cost:  0.2985541421094475  Validation cost:  0.3091462685935611
Ended iteration 150  Cost:  0.2552539933612356  Validation cost:  0.26837535768752285
Beginning training...
Initialized weights
Prepared for mini-batch.
Ended iteration 0  Cost:  0.4518269892624977  Validation cost:  0.4504125866956274
Ended iteration 50  Cost:  0.3780817055795375  Validation cost:  0.37895947072367014
Ended iteration 100  Cost:  0.3021440971255299  Validation cost:  0.3038535945384921
Ended iteration 150  Cost:  0.26151065585507044  Validation cost:  0.2648871988094492
Beginning training...
Initialized weights
Prepared for mini-batch.
Ended iteration 0  Cost:  0.4506599232860998  Validation cost:  0.4497981826610876
Ended iteration 50  Cost:  0.387904

Since everything seems ok, we'll increase the number of epochs to 1000, but mantaining the current amount of neurons.

In [35]:
epochs = 1000
model_1hl_sigmoid1 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate)
print("======================First model trained=====================")
model_1hl_sigmoid2 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate)
print("======================Second model trained=====================")
model_1hl_sigmoid3 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate)
print("======================Third model trained=====================")
probs_sigmoid1 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid1)
cost1 = neuralNetworkCostFunction(probs_sigmoid1, fashionValidationTarget)
print("First model validation cost: ", cost1)
probs_sigmoid2 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid2)
cost2 = neuralNetworkCostFunction(probs_sigmoid2, fashionValidationTarget)
print("Second model validation cost: ", cost2)
probs_sigmoid3 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid3)
cost3 = neuralNetworkCostFunction(probs_sigmoid3, fashionValidationTarget)
avg = ((cost1+cost2+cost3)/3)
print("Third model validation cost: ", cost3)
print("Average: ", avg)

Beginning training...
Initialized weights
Prepared for mini-batch.
Ended iteration 0  Cost:  0.44959187390799116  Validation cost:  0.4480108001649867
Ended iteration 50  Cost:  0.36952606683374717  Validation cost:  0.3740381977705004
Ended iteration 100  Cost:  0.3080086989490664  Validation cost:  0.3166197600437621
Ended iteration 150  Cost:  0.2739455816782257  Validation cost:  0.28392435323233045
Ended iteration 200  Cost:  0.25346563044121545  Validation cost:  0.2647823077469343
Ended iteration 250  Cost:  0.23911963300137548  Validation cost:  0.2518448704674362
Ended iteration 300  Cost:  0.22794751997651863  Validation cost:  0.24198656413587208
Ended iteration 350  Cost:  0.21922585148820797  Validation cost:  0.23433720963608956
Ended iteration 400  Cost:  0.21222300597657845  Validation cost:  0.2282338261045932
Ended iteration 450  Cost:  0.20613082997249557  Validation cost:  0.22341075882204434
Ended iteration 500  Cost:  0.20091433358064  Validation cost:  0.21939940

Now for 2000 epochs:

In [None]:
epochs=2000
model_1hl_sigmoid1 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate)
print("======================First model trained=====================")
model_1hl_sigmoid2 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate)
print("======================Second model trained=====================")
model_1hl_sigmoid3 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate)
print("======================Third model trained=====================")
probs_sigmoid1 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid1)
cost1 = neuralNetworkCostFunction(probs_sigmoid1, fashionValidationTarget)
print("First model sigmoid validation cost: ", cost1)
probs_sigmoid2 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid2)
cost2 = neuralNetworkCostFunction(probs_sigmoid2, fashionValidationTarget)
print("Second model sigmoid validation cost: ", cost2)
probs_sigmoid3 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid3)
cost3 = neuralNetworkCostFunction(probs_sigmoid3, fashionValidationTarget)
avg = ((cost1+cost2+cost3)/3)
print("Third model sigmoid validation cost: ", cost3)
print("Average: ", avg)

From these trainings, we see that we achieve best performance generally around 1100 epochs. Now we'll test with other activation functions.

## Relu.50

In [37]:
epochs = 1100
model_1hl_relu1 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation="relu")
print("======================First model trained=====================")
model_1hl_relu2 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation="relu")
print("======================Second model trained=====================")
model_1hl_relu3 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation="relu")
print("======================Third model trained=====================")
probs_sigmoid1 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid1)
cost1 = neuralNetworkCostFunction(probs_sigmoid1, fashionValidationTarget)
print("First model relu validation cost: ", cost1)
probs_sigmoid2 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid2)
cost2 = neuralNetworkCostFunction(probs_sigmoid2, fashionValidationTarget)
print("Second model relu validation cost: ", cost2)
probs_sigmoid3 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid3)
cost3 = neuralNetworkCostFunction(probs_sigmoid3, fashionValidationTarget)
avg = ((cost1+cost2+cost3)/3)
print("Third model relu validation cost: ", cost3)
print("Average: ", avg)

Beginning training...
Initialized weights
Prepared for mini-batch.
Ended iteration 0  Cost:  0.44891177198196647  Validation cost:  0.44764262451701803
Ended iteration 50  Cost:  0.2639578082198586  Validation cost:  0.2702709506995608
Ended iteration 100  Cost:  0.2006488706812488  Validation cost:  0.2134667499178839
Ended iteration 150  Cost:  0.16998019486420005  Validation cost:  0.19116141703346257
Ended iteration 200  Cost:  0.1448769425922058  Validation cost:  0.17611739287711717
Ended iteration 250  Cost:  0.12369301979864952  Validation cost:  0.16589467566856309
Ended iteration 300  Cost:  0.10559218537109814  Validation cost:  0.1592306992279439
Ended iteration 350  Cost:  0.09945562296593052  Validation cost:  0.1583065743832179
Ended iteration 400  Cost:  0.08788745907715258  Validation cost:  0.15537018960074098
Ended iteration 450  Cost:  0.08500968086004791  Validation cost:  0.15944676634797048
Ended iteration 500  Cost:  0.07610687815649932  Validation cost:  0.1637

## Leaky-relu.50

In [None]:
epochs = 2000
model_1hl_leaky_relu1 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation="leaky-relu")
print("======================First model trained=====================")
model_1hl_leaky_relu2 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation="leaky-relu")
print("======================Second model trained=====================")
model_1hl_leaky_relu3 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation="leaky-relu")
print("======================Third model trained=====================")
probs_sigmoid1 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid1)
cost1 = neuralNetworkCostFunction(probs_sigmoid1, fashionValidationTarget)
print("First model leaky-relu validation cost: ", cost1)
probs_sigmoid2 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid2)
cost2 = neuralNetworkCostFunction(probs_sigmoid2, fashionValidationTarget)
print("Second model leaky-relu validation cost: ", cost2)
probs_sigmoid3 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid3)
cost3 = neuralNetworkCostFunction(probs_sigmoid3, fashionValidationTarget)
avg = ((cost1+cost2+cost3)/3)
print("Third model leaky-relu validation cost: ", cost3)
print("Average: ", avg)

# Changing hidden layer size
Now we will experiment changing the amount of neurons on the hidden layer and see the impact on different activation functions. We will fix the amount of epochs to 1350, which generates an average-to-good result for all activation functions.

In [42]:
hidden_layer_1_neurons = 100
epochs = 1350

## Sigmoid.100

In [None]:
model_1hl_sigmoid1 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate)
print("======================First model trained=====================")
model_1hl_sigmoid2 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate)
print("======================Second model trained=====================")
model_1hl_sigmoid3 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate)
print("======================Third model trained=====================")
probs_sigmoid1 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid1)
cost1 = neuralNetworkCostFunction(probs_sigmoid1, fashionValidationTarget)
print("First model sigmoid validation cost: ", cost1)
probs_sigmoid2 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid2)
cost2 = neuralNetworkCostFunction(probs_sigmoid2, fashionValidationTarget)
print("Second model sigmoid validation cost: ", cost2)
probs_sigmoid3 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid3)
cost3 = neuralNetworkCostFunction(probs_sigmoid3, fashionValidationTarget)
avg = ((cost1+cost2+cost3)/3)
print("Third model sigmoid validation cost: ", cost3)
print("Average: ", avg)

## Relu.100

In [None]:
model_1hl_relu1 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation="relu")
print("======================First model trained=====================")
model_1hl_relu2 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation="relu")
print("======================Second model trained=====================")
model_1hl_relu3 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation="relu")
print("======================Third model trained=====================")
probs_sigmoid1 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid1)
cost1 = neuralNetworkCostFunction(probs_sigmoid1, fashionValidationTarget)
print("First model relu validation cost: ", cost1)
probs_sigmoid2 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid2)
cost2 = neuralNetworkCostFunction(probs_sigmoid2, fashionValidationTarget)
print("Second model relu validation cost: ", cost2)
probs_sigmoid3 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid3)
cost3 = neuralNetworkCostFunction(probs_sigmoid3, fashionValidationTarget)
avg = ((cost1+cost2+cost3)/3)
print("Third model relu validation cost: ", cost3)
print("Average: ", avg)

## Leaky Relu.100

In [None]:
model_1hl_leaky_relu1 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation="leaky-relu")
print("======================First model trained=====================")
model_1hl_leaky_relu2 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation="leaky-relu")
print("======================Second model trained=====================")
model_1hl_leaky_relu3 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation="leaky-relu")
print("======================Third model trained=====================")
probs_sigmoid1 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid1)
cost1 = neuralNetworkCostFunction(probs_sigmoid1, fashionValidationTarget)
print("First model leaky-relu validation cost: ", cost1)
probs_sigmoid2 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid2)
cost2 = neuralNetworkCostFunction(probs_sigmoid2, fashionValidationTarget)
print("Second model leaky-relu validation cost: ", cost2)
probs_sigmoid3 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid3)
cost3 = neuralNetworkCostFunction(probs_sigmoid3, fashionValidationTarget)
avg = ((cost1+cost2+cost3)/3)
print("Third model leaky-relu validation cost: ", cost3)
print("Average: ", avg)

Expanding to 500 neurons on the hidden layer...

In [None]:
hidden_layer_1_neurons = 500

## Sigmoid.500

In [None]:
model_1hl_sigmoid1 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate)
print("======================First model trained=====================")
model_1hl_sigmoid2 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate)
print("======================Second model trained=====================")
model_1hl_sigmoid3 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate)
print("======================Third model trained=====================")
probs_sigmoid1 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid1)
cost1 = neuralNetworkCostFunction(probs_sigmoid1, fashionValidationTarget)
print("First model sigmoid validation cost: ", cost1)
probs_sigmoid2 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid2)
cost2 = neuralNetworkCostFunction(probs_sigmoid2, fashionValidationTarget)
print("Second model sigmoid validation cost: ", cost2)
probs_sigmoid3 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid3)
cost3 = neuralNetworkCostFunction(probs_sigmoid3, fashionValidationTarget)
avg = ((cost1+cost2+cost3)/3)
print("Third model sigmoid validation cost: ", cost3)
print("Average: ", avg)

## Relu.500

In [None]:
model_1hl_relu1 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation="relu")
print("======================First model trained=====================")
model_1hl_relu2 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation="relu")
print("======================Second model trained=====================")
model_1hl_relu3 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation="relu")
print("======================Third model trained=====================")
probs_sigmoid1 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid1)
cost1 = neuralNetworkCostFunction(probs_sigmoid1, fashionValidationTarget)
print("First model relu validation cost: ", cost1)
probs_sigmoid2 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid2)
cost2 = neuralNetworkCostFunction(probs_sigmoid2, fashionValidationTarget)
print("Second model relu validation cost: ", cost2)
probs_sigmoid3 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid3)
cost3 = neuralNetworkCostFunction(probs_sigmoid3, fashionValidationTarget)
avg = ((cost1+cost2+cost3)/3)
print("Third model relu validation cost: ", cost3)
print("Average: ", avg)

## Leaky Relu.500

In [None]:
model_1hl_leaky_relu1 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation="leaky-relu")
print("======================First model trained=====================")
model_1hl_leaky_relu2 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation="leaky-relu")
print("======================Second model trained=====================")
model_1hl_leaky_relu3 = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate, activation="leaky-relu")
print("======================Third model trained=====================")
probs_sigmoid1 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid1)
cost1 = neuralNetworkCostFunction(probs_sigmoid1, fashionValidationTarget)
print("First model leaky-relu validation cost: ", cost1)
probs_sigmoid2 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid2)
cost2 = neuralNetworkCostFunction(probs_sigmoid2, fashionValidationTarget)
print("Second model leaky-relu validation cost: ", cost2)
probs_sigmoid3 = forward_prop_1hl(fashionValidationParams, model_1hl_sigmoid3)
cost3 = neuralNetworkCostFunction(probs_sigmoid3, fashionValidationTarget)
avg = ((cost1+cost2+cost3)/3)
print("Third model leaky-relu validation cost: ", cost3)
print("Average: ", avg)