# Introduction
Here, we'll explore neural networks applied to the fashion-mnist problem.

In [80]:
import numpy as np
from sklearn.linear_model import LogisticRegression
import csv
import pandas as pd

In [81]:
# Functions

def formatArray (dataFrame, columnToExtract) :
    array = dataFrame.values
    target = array[:,columnToExtract]
    params = np.delete(array, columnToExtract, axis = 1)
    return params, target

def loadFashionTrainData():
    return pd.read_csv("fashion-mnist-dataset/fashion-mnist_train.csv")

def loadFashionTestData():
    return pd.read_csv("fashion-mnist-dataset/fashion-mnist_test.csv")

def split_train_test(data, test_ratio):
    np.random.seed(42)
    shuffled_indices = np.random.permutation(len(data))
    test_set_size = int(len(data) * test_ratio)
    test_indices = shuffled_indices[:test_set_size]
    train_indices = shuffled_indices[test_set_size:]
    return data.iloc[train_indices], data.iloc[test_indices]

def createTarget (target):
    results = np.zeros((target.size, 10), dtype=int)
    for i in range(10):
        for j in range(target.size):
            if (target[j] != i):
                results[j][i - 1] = 0
            else:
                results[j][i - 1] = 1
    return results

def p_print(a):
    for x in a:
        print(*x, sep=" ")
    return

# The dataset
First and foremost, we'll open train and test data. The training data is split to obtain validation items and the the target values are also separated from the original data.

In [82]:
fashionTrainDataset = loadFashionTrainData()
fashionTestDataset = loadFashionTestData()
fashionTrain, fashionValidation = split_train_test(fashionTrainDataset, 0.2)
fashionTrainParams, fashionTrainTarget = formatArray(fashionTrain, 0)
fashionTrainParams = fashionTrainParams/255
fashionValidationParams, fashionValidationTarget = formatArray(fashionValidation, 0)
fashionValidationTarget = createTarget(fashionValidationTarget)
fashionValidationParams = fashionValidationParams/255
print (fashionTrainParams[:5])
print (type(fashionTrainParams))

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
<class 'numpy.ndarray'>


# Activation and Softmax Functions
First, we'll start by implementing some useful functions seen in class

In [83]:
def sigmoid(n):
    return (1/(1+np.exp(-n)))

def derivative_sigmoid(n):
    x = sigmoid(n)
    return x * (1 - x)

In [84]:
def relu(n):
    if n < 0:
        return 0
    return n

def derivative_relu(n):
    if n < 0:
        return 0
    return 1

In [85]:
def leaky_relu(n):
    if n > 0:
        return n
    return 0.01 * n

def derivative_leaky_relu(n):
    if n < 0:
        return 0.01
    return 1

In [86]:
def softmax(n):
    exp = np.exp(n)
    test_sum = np.sum(exp, axis=1, keepdims=True)
    return exp/test_sum

## Forward Propagation
In this section, we define forward propagation related functions.

In [87]:
def initialize_1hl(input_dimension,hidden_layer_1_neurons, output_dimension):
    neural_data = {}
    np.random.seed(0)
    neural_data['w1'] = np.random.randn(input_dimension, hidden_layer_1_neurons)/ np.sqrt(input_dimension)
    neural_data['w2'] = np.random.randn(hidden_layer_1_neurons, output_dimension)/ np.sqrt(hidden_layer_1_neurons)
    
    neural_data['b1'] = np.zeros((1, hidden_layer_1_neurons))
    neural_data['b2'] = np.zeros((1, output_dimension))
    return neural_data

def initialize_2hl(input_dimension, hidden_layer_1_neurons, hidden_layer_2_neurons, output_dimension):
    neural_data = {}
    np.random.seed(0)

    neural_data['w1'] = np.random.randn(input_dimension, hidden_layer_1_neurons)/ np.sqrt(input_dimension)
    neural_data['w2'] = np.random.randn(hidden_layer_1_neurons, hidden_layer_2_neurons)/ np.sqrt(hidden_layer_1_neurons)
    neural_data['w3'] = np.random.randn(hidden_layer_2_neurons, output_dimension)/ np.sqrt(hidden_layer_2_neurons)

    neural_data['b1'] = np.zeros((1, hidden_layer_1_neurons))
    neural_data['b2'] = np.zeros((1, hidden_layer_2_neurons))
    neural_data['b3'] = np.zeros((1, output_dimension))
    return neural_data
    
def forward_prop_1hl(x, neural_data):
    w1 , w2, b1, b2 = neural_data['w1'], neural_data['w2'], neural_data['b1'], neural_data['b2']
    x1 = np.dot(x, w1) + b1 #Output of hidden layer
    y1 = np.asarray([[relu(n) for n in j] for j in x1]) #Output of hidden layer with activation function
    x2 = np.dot(y1, w2) + b2 #Output of last layer
    neural_data['x1'] = x1
    neural_data['x2'] = x2
    neural_data['y1'] = y1
    neural_data['o'] = softmax(x2)  # Final output with softmax

    return neural_data['o']

def forward_prop_2hl(x, neural_data):
    w1 , w2, w3, b1, b2, b3 = neural_data['w1'], neural_data['w2'], neural_data['w3'], neural_data['b1'], neural_data['b2'], neural_data['b3']

    x1 = np.dot(x, w1) + b1
    y1 = np.asarray([[relu(n) for n in j] for j in x1])
    x2 = np.dot(a1, w2) + b2
    y2 = np.asarray([[relu(n) for n in j] for j in x2])
    x3 = np.dot(a2, w3) + b3
    
    neural_data['x1'] = x1
    neural_data['x2'] = x2
    neural_data['x3'] = x3

    neural_data['y1'] = y1
    neural_data['y2'] = y2
    
    neural_data['o'] = softmax(x3)
    return neural_data['o']

## Prediction Functions
Helper functions that return predictions, given our model.

In [88]:
def predict_1hl(x, neural_data):
    test = forward_prop_1hl(x,neural_data)
    return np.argmax(test, axis=1)

def predict_2hl(x, neural_data):
    return np.argmax(forward_prop_2hl(x,neural_data), axis=1)

## Cost function

In [89]:
def gradient_cost(fashionTrainOutput, fashionTarget, testCasesAmount):
    fashionTargetMinusOne = fashionTarget - 1
    cost = 0
    for j in range(testCasesAmount):
        cost += np.add(np.multiply(fashionTarget, np.log10(fashionTrainOutput[j])),np.multiply(fashionTargetMinusOne, (1- np.log10(fashionTrainOutput[j]))))
    cost = cost*(-1)/testCasesAmount
    return cost

def neuralNetworkCostFunction(fashionTrainOutput, fashionTarget):
    diference = fashionTrainOutput - fashionTarget
    squareDiference = diference ** 2
    n = fashionTrainOutput.shape[0]    
    return (np.sum(squareDiference)/(2*n))

def regressionLogisticCostFunction (results, model, X):
    agaTheta = model.predict_proba(X)
    n = X.shape[0]
    diference = results - agaTheta
    squareDiference = diference * diference
    return (np.sum(squareDiference)/(2*n))

## Training and backward propagation

Some notes:

### For one hidden layer:
- x1 = #Output of hidden layer
- x2 = #Output of last layer
- y1 = #Output of hidden layer with activation function
- o = Final output with Softmax

### For TWO hidden layers:
- x1 = #Output of first hidden layer
- x2 = #Output of second hidden layer
- x3 = Output of last layer
- y1 = #Output of first hidden layer with activation function
- y2 = #Output of second hidden layer with activation function
- o = Final output with Softmax


# One hidden layer

Here, we present our code and results achieved by a learning algorithm that uses a neural network with only one hidden layer.

In [90]:
def train_neural_network_1hl(hidden_layer_1_neurons, epochs, trainParams, trainTarget, learning_rate, regularization_rate):
    print("Beginning training...")
    input_dimension = 784
    output_dimension = 10
#     Initializes weights and biases for our neural network
    neural_data = initialize_1hl(input_dimension, hidden_layer_1_neurons, output_dimension)
    print("Initialized weights")
    sigmoid = True
    relu = False
    leaky_relu = False
    
#     Prepares for mini-batch
    batchSize = 600
    start_idx = 0
    indices = np.arange(trainParams.shape[0])
    np.random.shuffle(indices)
    print("Prepared for mini-batch.")
#     Performs Backpropagation
    capitalDelta3 = 0
    capitalDelta2 = 0
    for j in range(epochs):
        excerpt = indices[start_idx:start_idx + batchSize]
        mini_batch_data = trainParams[excerpt]
        miniBatchTarget = createTarget(trainTarget[excerpt])
#         Performs Forward propagation
        probs = forward_prop_1hl(mini_batch_data, neural_data)
    
#         Calculates cost
#         cur_result = np.amax(probs, axis=1)

        cost = neuralNetworkCostFunction(probs, miniBatchTarget)

#         Performs Backward propagation

        delta3 = probs - miniBatchTarget

        dW2 =(1./batchSize)* (neural_data['y1'].T).dot(delta3)
        db2 =(1./batchSize)* ( np.sum(delta3, axis=0, keepdims=True))
        delta2 = np.dot(delta3, neural_data['w2'].T)
        aux = neural_data['y1']
        if sigmoid:
            aux = [[derivative_sigmoid(n) for n in x] for x in aux]
        if relu:
            aux = [[derivative_relu(n) for n in x] for x in aux]
        if leaky_relu:
            aux = [[derivative_leaky_relu(n) for n in x] for x in aux]

        delta2 = delta2 * aux

        dW1 = (1./batchSize)*np.dot(mini_batch_data.T, delta2)
        db1 = (1./batchSize)*np.sum(delta2, axis=0)
        
        
#          # Performs regularization
#         dW2 += regularization_rate * neural_data['w2']
#         dW1 += regularization_rate * neural_data['w1']

        # Gradient descent parameter update
        neural_data['w1'] -= learning_rate * dW1
        neural_data['b1'] -= learning_rate * db1
        neural_data['w2'] -= learning_rate * dW2
        neural_data['b2'] -= learning_rate * db2
        
        print("Ended iteration", j," Cost: ", cost)
        start_idx += batchSize;
        start_idx %= mini_batch_data.shape[0]
    return neural_data

# Two hidden layers

Same as before, but for 2 hidden layers.

In [91]:
def train_neural_network_2hl(hidden_layer_1_neurons, hidden_layer_2_neurons, epochs, trainParams, trainTarget, learning_rate, regularization_rate):
    input_dimension = 784
    output_dimension = 10
#     Initializes weights and biases for our neural network
    neural_data = initialize_2hl(input_dimension, hidden_layer_1_neurons, hidden_layer_2_neurons, output_dimension)
    sigmoid = True
    relu = False
    leaky_relu = False
    
#     Prepares for mini-batch
    batchSize = 64
    start_idx = 0;
    indices = np.arange(trainParams.shape[0])
    np.random.shuffle(indices)
    excerpt = indices[start_idx:start_idx + batchSize]
    mini_batch_data = trainParams[excerpt]
    miniBatchTarget = createTarget(trainTarget[excerpt])
#     Performs Backpropagation
    for j in range(epochs):

#         Performs Forward propagation
        probs = forward_prop_1hl(mini_batch_data, neural_data)
        
#         Calculates cost

#         Performs Backward propagation
        delta4 = probs - miniBatchTarget
        dW3 = (neural_data['y2'].T).dot(delta4)
        db3 = np.sum(delta4, axis=0, keepdims=True)
        delta3 = delta4.dot(neural_data['w3'].T)
        aux = neural_data['y2']
        if sigmoid:
            aux = [[derivative_sigmoid(n) for n in x] for x in aux]
        if relu:
            aux = [[derivative_relu(n) for n in x] for x in aux]
        if leaky_relu:
            aux = [[derivative_leaky_relu(n) for n in x] for x in aux]        
        delta3 = delta3 * aux
        dW2 = np.dot(mini_batch_data.T, delta3)
        db2 = np.sum(delta3, axis=0)
        delta2 = delta3.dot(neural_data['w2'].T)  #look for issues here
        aux = neural_data['y1']
        if sigmoid:
            aux = [[derivative_sigmoid(n) for n in x] for x in aux]
        if relu:
            aux = [[derivative_relu(n) for n in x] for x in aux]
        if leaky_relu:
            aux = [[derivative_leaky_relu(n) for n in x] for x in aux]
        delta2 = delta2 * aux
        dW1 = np.dot(mini_batch_data.T, delta2)
        db1 = np.sum(delta2, axis=0)      
        
#          # Performs regularization
#         dW3 += regularization_rate * neural_data['w3']
#         dW2 += regularization_rate * neural_data['w2']
#         dW1 += regularization_rate * neural_data['w1']

        # Gradient descent parameter update
        
        neural_data['w1'] += -learning_rate * dW1
        neural_data['b1'] += -learning_rate * db1
        neural_data['w2'] += -learning_rate * dW2
        neural_data['b2'] += -learning_rate * db2
        neural_data['w3'] += -learning_rate * dW3
        neural_data['b3'] += -learning_rate * db3
        
        print("Ended iteration", j)
        start_idx += 1;
        start_idx %= mini_batch_data.shape[0]
        
    return neural_data

# Testing the neural networks

Now, we'll test our neural networks under multiple circumstances on the validation set, so we can gest the best possible models.

## Problem details
- Input dimension: 28x28 = 784 neurons
- Output dimension: 10 classes = 10 neurons

In [101]:
hidden_layers = 1
input_neurons = 784
output_neurons = 10
hidden_layer_1_neurons = 60
hidden_layer_2_neurons = 15
learning_rate = 0.1
regularization_rate = 0
epochs = 2000

In [102]:
# fashionTrainParams, fashionTrainTarget
model_1hl = train_neural_network_1hl(hidden_layer_1_neurons, epochs, fashionTrainParams, fashionTrainTarget, learning_rate, regularization_rate)


Beginning training...
Initialized weights
Prepared for mini-batch.
Ended iteration 0  Cost:  0.4538479104898881
Ended iteration 1  Cost:  0.45005130359993684
Ended iteration 2  Cost:  0.4468346130661469
Ended iteration 3  Cost:  0.44395102518227325
Ended iteration 4  Cost:  0.44124653343140985
Ended iteration 5  Cost:  0.4386572455773819
Ended iteration 6  Cost:  0.4361469258904498
Ended iteration 7  Cost:  0.4336971397348658
Ended iteration 8  Cost:  0.43126863739065907
Ended iteration 9  Cost:  0.4288557417309543
Ended iteration 10  Cost:  0.4264439190567791
Ended iteration 11  Cost:  0.4240192338345957
Ended iteration 12  Cost:  0.4215759258446124
Ended iteration 13  Cost:  0.41911086345339604
Ended iteration 14  Cost:  0.4166192455768798
Ended iteration 15  Cost:  0.4140984933193335
Ended iteration 16  Cost:  0.4115496902872875
Ended iteration 17  Cost:  0.40897632709582493
Ended iteration 18  Cost:  0.4063854737169868
Ended iteration 19  Cost:  0.40376662232272104
Ended iteration 

Ended iteration 175  Cost:  0.19762244811834914
Ended iteration 176  Cost:  0.19715900743510223
Ended iteration 177  Cost:  0.19669903379783726
Ended iteration 178  Cost:  0.1962418569310656
Ended iteration 179  Cost:  0.19578762311391387
Ended iteration 180  Cost:  0.19533637290376732
Ended iteration 181  Cost:  0.19488824853526326
Ended iteration 182  Cost:  0.19444245689183043
Ended iteration 183  Cost:  0.19399953417558094
Ended iteration 184  Cost:  0.1935614073362414
Ended iteration 185  Cost:  0.1931268658073551
Ended iteration 186  Cost:  0.1926963913017235
Ended iteration 187  Cost:  0.19226915051188484
Ended iteration 188  Cost:  0.19184383867814858
Ended iteration 189  Cost:  0.19142021153090927
Ended iteration 190  Cost:  0.1909988203311398
Ended iteration 191  Cost:  0.19058080892879592
Ended iteration 192  Cost:  0.19016678728368272
Ended iteration 193  Cost:  0.1897554359639025
Ended iteration 194  Cost:  0.1893469146027537
Ended iteration 195  Cost:  0.18894087606410817

Ended iteration 347  Cost:  0.15115780371317158
Ended iteration 348  Cost:  0.1510059118619371
Ended iteration 349  Cost:  0.15085471352210258
Ended iteration 350  Cost:  0.15070408388896167
Ended iteration 351  Cost:  0.15055419288101393
Ended iteration 352  Cost:  0.1504045290193583
Ended iteration 353  Cost:  0.15025472324311054
Ended iteration 354  Cost:  0.15010584816439154
Ended iteration 355  Cost:  0.14995759113447077
Ended iteration 356  Cost:  0.14980993256327874
Ended iteration 357  Cost:  0.1496625266305395
Ended iteration 358  Cost:  0.14951574003950013
Ended iteration 359  Cost:  0.1493697268701097
Ended iteration 360  Cost:  0.1492248001279468
Ended iteration 361  Cost:  0.14908080615436986
Ended iteration 362  Cost:  0.14893728948571583
Ended iteration 363  Cost:  0.14879459850904714
Ended iteration 364  Cost:  0.1486530289738713
Ended iteration 365  Cost:  0.14851219561789597
Ended iteration 366  Cost:  0.1483720604969254
Ended iteration 367  Cost:  0.14823222717398504

Ended iteration 519  Cost:  0.13230066104339186
Ended iteration 520  Cost:  0.13222237278459129
Ended iteration 521  Cost:  0.13214523968271596
Ended iteration 522  Cost:  0.13206835535602116
Ended iteration 523  Cost:  0.13199134338850468
Ended iteration 524  Cost:  0.13191431673429602
Ended iteration 525  Cost:  0.13183728766643157
Ended iteration 526  Cost:  0.1317602698185999
Ended iteration 527  Cost:  0.13168336324260313
Ended iteration 528  Cost:  0.1316066717374542
Ended iteration 529  Cost:  0.131530524145779
Ended iteration 530  Cost:  0.13145465821385566
Ended iteration 531  Cost:  0.13137880912947178
Ended iteration 532  Cost:  0.1313023108763306
Ended iteration 533  Cost:  0.13122615684332145
Ended iteration 534  Cost:  0.1311502288556691
Ended iteration 535  Cost:  0.131074947572998
Ended iteration 536  Cost:  0.13100038187746232
Ended iteration 537  Cost:  0.1309258184782013
Ended iteration 538  Cost:  0.13085139162281573
Ended iteration 539  Cost:  0.1307771471848055
En

Ended iteration 692  Cost:  0.12029541731529224
Ended iteration 693  Cost:  0.12023426760763059
Ended iteration 694  Cost:  0.12017252456129336
Ended iteration 695  Cost:  0.12011032731290977
Ended iteration 696  Cost:  0.12004827822580794
Ended iteration 697  Cost:  0.11998581960235967
Ended iteration 698  Cost:  0.11992339033441295
Ended iteration 699  Cost:  0.11986068759490552
Ended iteration 700  Cost:  0.11979789404238077
Ended iteration 701  Cost:  0.11973532219643634
Ended iteration 702  Cost:  0.11967267233636888
Ended iteration 703  Cost:  0.11960917911641648
Ended iteration 704  Cost:  0.11954485357319096
Ended iteration 705  Cost:  0.11948012872451613
Ended iteration 706  Cost:  0.11941544212842348
Ended iteration 707  Cost:  0.11935050203961749
Ended iteration 708  Cost:  0.11928541235138324
Ended iteration 709  Cost:  0.1192204075564981
Ended iteration 710  Cost:  0.11915526267036929
Ended iteration 711  Cost:  0.11908957687979183
Ended iteration 712  Cost:  0.11902424082

Ended iteration 864  Cost:  0.1102751099832576
Ended iteration 865  Cost:  0.110218785086213
Ended iteration 866  Cost:  0.11016280857985748
Ended iteration 867  Cost:  0.11010759553353225
Ended iteration 868  Cost:  0.11005243450415388
Ended iteration 869  Cost:  0.10999733635547021
Ended iteration 870  Cost:  0.10994219011733876
Ended iteration 871  Cost:  0.10988711400686858
Ended iteration 872  Cost:  0.10983241970944699
Ended iteration 873  Cost:  0.10977786454500255
Ended iteration 874  Cost:  0.10972284422367853
Ended iteration 875  Cost:  0.10966797284094146
Ended iteration 876  Cost:  0.10961339508504153
Ended iteration 877  Cost:  0.10955893553083954
Ended iteration 878  Cost:  0.10950449671750857
Ended iteration 879  Cost:  0.10945119224957513
Ended iteration 880  Cost:  0.10939820838344895
Ended iteration 881  Cost:  0.10934536215906661
Ended iteration 882  Cost:  0.10929218000919994
Ended iteration 883  Cost:  0.10923936818107965
Ended iteration 884  Cost:  0.1091873818644

Ended iteration 1036  Cost:  0.10256048691933005
Ended iteration 1037  Cost:  0.102520347739615
Ended iteration 1038  Cost:  0.10248027534919331
Ended iteration 1039  Cost:  0.1024403070754647
Ended iteration 1040  Cost:  0.10240044319588487
Ended iteration 1041  Cost:  0.10236037749636702
Ended iteration 1042  Cost:  0.10232024053750977
Ended iteration 1043  Cost:  0.10228029346076228
Ended iteration 1044  Cost:  0.10223961577522175
Ended iteration 1045  Cost:  0.10219853024941195
Ended iteration 1046  Cost:  0.10215751006432082
Ended iteration 1047  Cost:  0.10211652686621327
Ended iteration 1048  Cost:  0.10207531812296491
Ended iteration 1049  Cost:  0.10203369572412507
Ended iteration 1050  Cost:  0.10199219323380106
Ended iteration 1051  Cost:  0.10195076032304613
Ended iteration 1052  Cost:  0.1019093647036149
Ended iteration 1053  Cost:  0.10186806013006136
Ended iteration 1054  Cost:  0.10182682953759925
Ended iteration 1055  Cost:  0.10178565607406483
Ended iteration 1056  Co

Ended iteration 1205  Cost:  0.09549813947597059
Ended iteration 1206  Cost:  0.09545795547625538
Ended iteration 1207  Cost:  0.0954177013787536
Ended iteration 1208  Cost:  0.09537746450452689
Ended iteration 1209  Cost:  0.09533702732930026
Ended iteration 1210  Cost:  0.09529642298564238
Ended iteration 1211  Cost:  0.09525472599749381
Ended iteration 1212  Cost:  0.09521286728547089
Ended iteration 1213  Cost:  0.09517109106899975
Ended iteration 1214  Cost:  0.09512967160508938
Ended iteration 1215  Cost:  0.09508750207010412
Ended iteration 1216  Cost:  0.0950449989417647
Ended iteration 1217  Cost:  0.09500250732896386
Ended iteration 1218  Cost:  0.09496004964445083
Ended iteration 1219  Cost:  0.09491763091285543
Ended iteration 1220  Cost:  0.09487524728364027
Ended iteration 1221  Cost:  0.09483289869926899
Ended iteration 1222  Cost:  0.09479054197287182
Ended iteration 1223  Cost:  0.09474817483424215
Ended iteration 1224  Cost:  0.09470591267012733
Ended iteration 1225  

Ended iteration 1373  Cost:  0.08800166990844219
Ended iteration 1374  Cost:  0.08795908336118377
Ended iteration 1375  Cost:  0.08791653433289855
Ended iteration 1376  Cost:  0.08787403490462078
Ended iteration 1377  Cost:  0.0878315823335577
Ended iteration 1378  Cost:  0.08778917881337882
Ended iteration 1379  Cost:  0.08774655535758924
Ended iteration 1380  Cost:  0.08770392439650544
Ended iteration 1381  Cost:  0.08766141319851532
Ended iteration 1382  Cost:  0.08761921361428056
Ended iteration 1383  Cost:  0.08757736764724056
Ended iteration 1384  Cost:  0.08753559699452064
Ended iteration 1385  Cost:  0.08749345887828962
Ended iteration 1386  Cost:  0.08745111851273159
Ended iteration 1387  Cost:  0.08740883820309824
Ended iteration 1388  Cost:  0.08736660539523364
Ended iteration 1389  Cost:  0.08732444949849232
Ended iteration 1390  Cost:  0.08728217121475627
Ended iteration 1391  Cost:  0.087239910519826
Ended iteration 1392  Cost:  0.08719767168529123
Ended iteration 1393  C

Ended iteration 1541  Cost:  0.08148505675643779
Ended iteration 1542  Cost:  0.08145102438458557
Ended iteration 1543  Cost:  0.08141705226621757
Ended iteration 1544  Cost:  0.08138280202999659
Ended iteration 1545  Cost:  0.08134846028475924
Ended iteration 1546  Cost:  0.081313833734336
Ended iteration 1547  Cost:  0.08127920986034906
Ended iteration 1548  Cost:  0.08124452526341852
Ended iteration 1549  Cost:  0.08120990945710548
Ended iteration 1550  Cost:  0.08117536676604319
Ended iteration 1551  Cost:  0.08114091332567351
Ended iteration 1552  Cost:  0.0811064548697962
Ended iteration 1553  Cost:  0.08107241320181802
Ended iteration 1554  Cost:  0.08103914582368654
Ended iteration 1555  Cost:  0.0810058382575352
Ended iteration 1556  Cost:  0.08097242084262367
Ended iteration 1557  Cost:  0.08093905820279902
Ended iteration 1558  Cost:  0.08090576690326498
Ended iteration 1559  Cost:  0.08087254720413926
Ended iteration 1560  Cost:  0.08083939481931751
Ended iteration 1561  Co

Ended iteration 1709  Cost:  0.0764068353395634
Ended iteration 1710  Cost:  0.0763824472075872
Ended iteration 1711  Cost:  0.07635810710455956
Ended iteration 1712  Cost:  0.07633259766396262
Ended iteration 1713  Cost:  0.07630631950000523
Ended iteration 1714  Cost:  0.07627999428752195
Ended iteration 1715  Cost:  0.07625367909678897
Ended iteration 1716  Cost:  0.076227784762034
Ended iteration 1717  Cost:  0.07620215422481814
Ended iteration 1718  Cost:  0.07617663659441627
Ended iteration 1719  Cost:  0.07615114375939705
Ended iteration 1720  Cost:  0.07612562731773063
Ended iteration 1721  Cost:  0.07610027551569377
Ended iteration 1722  Cost:  0.07607505840949348
Ended iteration 1723  Cost:  0.0760499066554757
Ended iteration 1724  Cost:  0.07602476412890953
Ended iteration 1725  Cost:  0.07599968339110752
Ended iteration 1726  Cost:  0.0759746625119396
Ended iteration 1727  Cost:  0.07594979658310808
Ended iteration 1728  Cost:  0.07592538015541841
Ended iteration 1729  Cost

Ended iteration 1878  Cost:  0.07294369820034416
Ended iteration 1879  Cost:  0.07292578884497768
Ended iteration 1880  Cost:  0.07290797378831307
Ended iteration 1881  Cost:  0.07289019279728239
Ended iteration 1882  Cost:  0.07287246998946738
Ended iteration 1883  Cost:  0.0728547745606155
Ended iteration 1884  Cost:  0.07283709076024983
Ended iteration 1885  Cost:  0.07281944542611003
Ended iteration 1886  Cost:  0.07280182945422765
Ended iteration 1887  Cost:  0.072784257236278
Ended iteration 1888  Cost:  0.07276671180814565
Ended iteration 1889  Cost:  0.07274912527735901
Ended iteration 1890  Cost:  0.07273155917596111
Ended iteration 1891  Cost:  0.07271401928559906
Ended iteration 1892  Cost:  0.07269650501855177
Ended iteration 1893  Cost:  0.07267901459865096
Ended iteration 1894  Cost:  0.07266155322249981
Ended iteration 1895  Cost:  0.07264425842366395
Ended iteration 1896  Cost:  0.07262706719034272
Ended iteration 1897  Cost:  0.07260991854603902
Ended iteration 1898  C

In [104]:
probs = forward_prop_1hl(fashionValidationParams, model_1hl)
print("Validation cost: ", neuralNetworkCostFunction(probs, fashionValidationTarget))


Validation cost:  0.17863184961677483
