# Building a Deep Learning Neural Network



# import packages

In [56]:
import numpy as np
import pickle
import time
import scipy
from scipy.special import expit
from PIL import Image
from scipy import ndimage

## The sigmoid function

In [57]:

def sigmoid(x):
    """
    :param x: numpy array of shape (N,D) representing input layer
    :return:
    out = numpy array of shape (N,D) representing output of sigmoid layer
    cache = storing x for backpropagation
    """
    cache = x.copy()
    out = 1./(1. + np.exp(-x))
    #out = 1./(1. + expit(-x))
    return out, cache

In [58]:
# test sigmoid

x=np.array([[1.0, -1.0, 0.0],[2.0, 0.0, -2.0]])
out, cache = sigmoid(x)
print("out: ", out)
print("cache: ", cache)

out:  [[ 0.73105858  0.26894142  0.5       ]
 [ 0.88079708  0.5         0.11920292]]
cache:  [[ 1. -1.  0.]
 [ 2.  0. -2.]]


In [59]:
def dSigmoid(dout, cache):
    """
    :param dout: numpy array of shape (N,D) representing gradients of output layer
    :param cache: numpy array of shape (N,D) used for backpropagation
    :return:
    dx = numpy array of shape (N,D) representing gradients of input layer
    """
    x = cache
    out = 1./(1 + np.exp(-x))
    dx = out*(1-out)
    return dx


In [60]:
# test dSigmoid
x=np.array([[ 0.73105858, 0.26894142, 0.5],[ 0.88079708, 0.5, 0.11920292]])
cache= np.array([[ 1., -1., 0.],[ 2., 0., -2.]])
dx = dSigmoid(x, cache)
print("dx: ", dx)


dx:  [[ 0.19661193  0.19661193  0.25      ]
 [ 0.10499359  0.25        0.10499359]]


In [61]:
def tanh(x):
    """
    :param x: numpy array of shape (N,D) representing input layer
    :return:
    out = numpy array of shape (N,D) representing output of tanh layer
    cache = storing x for backpropagation
    """
    cache = x.copy()
    out = np.tanh(x)
    return out, cache


In [62]:
# test tanh
x=np.array([[1.0, -1.0, 0.0],[2.0, 0.0, -2.0]])
out, cache = tanh(x)
print("out: ", out)
print("cache: ", cache)

out:  [[ 0.76159416 -0.76159416  0.        ]
 [ 0.96402758  0.         -0.96402758]]
cache:  [[ 1. -1.  0.]
 [ 2.  0. -2.]]


In [63]:
def dTanh(dout, cache):
    """
    :param dout: numpy array of shape (N,D) representing gradients of output layer
    :param cache: numpy array of shape (N,D) representing input layer for backpropagation
    :return:
    dx = numpy array of shape (N,D) representing gradients of input layer
    """
    x = cache
    out = np.tanh(x)
    dx = dout*(1-out**2)
    return dx



In [64]:
# test dTanh
x=np.array([[ 0.73105858, 0.26894142, 0.5],[ 0.88079708, 0.5, 0.11920292]])
cache= np.array([[ 1., -1., 0.],[ 2., 0., -2.]])
dx = dTanh(x, cache)
print("dx: ", dx)


dx:  [[ 0.30702585  0.1129485   0.5       ]
 [ 0.06222904  0.5         0.00842178]]


In [65]:
def relu(x):
    """
    :param x: numpy array of shape (N,D) representing input layer
    :return:
    out = numpy array of shape (N,D) representing output of relu layer
    cache = storing x for backpropagation
    """
    cache = x.copy()
    out = x*(x > 0)
    #out = np.maximum(0.01, x)
    return out, cache


In [66]:
# test relu
x=np.array([[1.0, -1.0, 0.0],[2.0, 0.0, -2.0]])
out, cache = relu(x)
print("out: ", out)
print("cache: ", cache)

out:  [[ 1. -0.  0.]
 [ 2.  0. -0.]]
cache:  [[ 1. -1.  0.]
 [ 2.  0. -2.]]


In [67]:


def dRelu(dout, cache):
    """
    :param dout: numpy array of shape (N,D) representing gradients of output layer
    :param cache: numpy array of shape (N,D) representing input layer for backpropagation
    :return:
    dx = numpy array of shape (N,D) representing gradients of input layer
    """
    x = cache
    #dx = dout*(x > 0)
    dx = 1. * (x > 0)
    return dx


In [68]:
# test dRelu
x=np.array([[ 0.73105858, 0.26894142, 0.5],[ 0.88079708, 0.5, 0.11920292]])
cache= np.array([[ 1., -1., 0.],[ 2., 0., -2.]])
dx = dRelu(x, cache)
print("dx: ", dx)

dx:  [[ 1.  0.  0.]
 [ 1.  0.  0.]]


In [69]:

def initialize_parameters(layer_dims):
  """
  Arguments:
  layer_dims -- python array (list) containing the dimensions of each layer in
    our network
  Returns:
  parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL":
  Wl -- weight matrix of shape (layer_dims[l], layer_dims[l-1])
  bl -- bias vector of shape (layer_dims[l], 1)
  """

  np.random.seed(3)
  parameters = {}
  L = len(layer_dims)
  # number of layers in the network
  for l in range(1, L):
    ### START CODE HERE ### (≈ 2 lines of code)
    parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1]) * 0.01
    parameters['b' + str(l)] = np.zeros((layer_dims[l], 1))
    ### END CODE HERE ###
    assert(parameters['W' + str(l)].shape == (layer_dims[l], layer_dims[l-1]))
    assert(parameters['b' + str(l)].shape == (layer_dims[l], 1))
  return parameters


In [70]:
def linear_forward(A, W, b):
  """
  Implement the linear part of a layer's forward propagation.
  Arguments:
  A -- activations from previous layer (or input data): (size of previous layer, number of examples)
  W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)
  b -- bias vector, numpy array of shape (size of the current layer, 1)
  Returns:
  Z -- the input of the activation function, also called pre-activation parameter
  cache -- a python dictionary containing "A", "W" and "b" ; stored for computing the backward pass efficiently
  """

  ### START CODE HERE ### (≈ 1 line of code)
  Z = W.dot(A) + b
  ### END CODE HERE ###
  assert(Z.shape == (W.shape[0], A.shape[1]))
  cache = (A, W, b)
  return Z, cache


In [71]:
def linear_activation_forward(A_prev, W, b, activation):
  """
  Implement the forward propagation for the LINEAR->ACTIVATION layer
  Arguments:
  A_prev -- activations from previous layer (or input data): (size of previouslayer, number of examples)
  W -- weights matrix: numpy array of shape (size of current layer, size of previous layer)
  b -- bias vector, numpy array of shape (size of the current layer, 1)
  activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"
  Returns:
  A -- the output of the activation function, also called the post-activation value
  cache -- a python dictionary containing "linear_cache" and "activation_cache";
  stored for computing the backward pass efficiently
  """
  Z, linear_cache = linear_forward(A_prev, W, b)
  if activation == "sigmoid":
    # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
    A, activation_cache = sigmoid(Z)
  elif activation == "relu":
    # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
    A, activation_cache = relu(Z)
  assert (A.shape == (W.shape[0], A_prev.shape[1]))
  cache = (linear_cache, activation_cache)
  return A, cache


In [72]:
def L_model_forward(X, parameters):
  """
  Implement forward propagation for the [LINEAR->RELU]*(L-1)->LINEAR->SIGMOID computation
  Arguments:
  X -- data, numpy array of shape (input size, number of examples)
  parameters -- output of initialize_parameters_deep()
  Returns:
  AL -- last post-activation value
  caches -- list of caches containing:
  every cache of linear_relu_forward() (there are L-1 of them, indexed from 0 to L-2)
  the cache of linear_sigmoid_forward() (there is one, indexed L-1)
  """
  caches = []
  A = X
  L = len(parameters) // 2 # number of layers in the neural network
  # Implement [LINEAR -> RELU]*(L-1). Add "cache" to the "caches" list.
  for l in range(1, L):
    # ??? A_prev = A
    ### START CODE HERE ### (≈ 2 lines of code)
    W = parameters['W' + str(l)]
    b = parameters['b' + str(l)]
    A, cache = linear_activation_forward(A, W, b, activation = "relu")
    caches.append(cache)
    ### END CODE HERE ###
  # Implement LINEAR -> SIGMOID. Add "cache" to the "caches" list.
  ### START CODE HERE ### (≈ 2 lines of code)
  W = parameters['W' + str(L)]
  b = parameters['b' + str(L)]
  AL, cache = linear_activation_forward(A, W, b, activation = "sigmoid")

  caches.append(cache)
  ### END CODE HERE ###
  assert(AL.shape == (1,X.shape[1]))
  return AL, caches


In [73]:
def compute_cost(AL, Y):
  """
  Implement the cost function defined by equation (7).
  Arguments:
  AL -- probability vector corresponding to your label predictions, shape (1, number of examples)
  Y -- true "label" vector (for example: containing 0 if non-cat, 1 if cat), shape (1, number of examples)
  Returns:
  cost -- cross-entropy cost
  """
  m = Y.shape[1]
  # Compute loss from aL and y.
  ### START CODE HERE ### (≈ 1 lines of code)
  cost = - np.sum((Y.dot(np.log(AL.T))+((1-Y).dot(np.log(1-AL.T)))))/m
  ### END CODE HERE ###
  cost = np.squeeze(cost)
  # To make sure your cost's shape is what we expect (e.g. this turns [[17]] into 17).
  assert(cost.shape == ())
  return cost


In [74]:
def linear_backward(dZ, cache):
  """
  Implement the linear portion of backward propagation for a single layer (layer l)
  Arguments:
  dZ -- Gradient of the cost with respect to the linear output (of current layer l)
  cache -- tuple of values (A_prev, W, b) coming from the forward propagation in the current layer
  Returns:
  dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
  dW -- Gradient of the cost with respect to W (current layer l), same shape as W
  db -- Gradient of the cost with respect to b (current layer l), same shape as b
  """
  A_prev, W, b = cache
  m = A_prev.shape[1]
  ### START CODE HERE ### (≈ 3 lines of code)
  dW = dZ.dot(A_prev.T)/m
  db = np.sum(dZ, axis=1, keepdims=True)/m
  dA_prev = W.T.dot(dZ)
  ### END CODE HERE ###
  assert (dA_prev.shape == A_prev.shape)
  assert (dW.shape == W.shape)
  assert (db.shape == b.shape)
  return dA_prev, dW, db


In [75]:
def linear_activation_backward(dA, cache, activation):
  """
  Implement the backward propagation for the LINEAR->ACTIVATION layer.
  Arguments:
  dA -- post-activation gradient for current layer lcache -- tuple of values (linear_cache, activation_cache) we store for computing backward propagation efficiently
  activation -- the activation to be used in this layer, stored as a text string: "sigmoid" or "relu"
  Returns:
  dA_prev -- Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
  dW -- Gradient of the cost with respect to W (current layer l), same shape as W
  db -- Gradient of the cost with respect to b (current layer l), same shape as b
  """

  linear_cache, activation_cache = cache
  if activation == "relu":
    dZ = dRelu(dA, activation_cache)
  elif activation == "sigmoid":
    dZ = dSigmoid(dA, activation_cache)
  dA_prev, dW, db = linear_backward(dZ, linear_cache)
  return dA_prev, dW, db


In [76]:
def L_model_backward(AL, Y, caches):
  """
  Implement the backward propagation for the [LINEAR->RELU] * (L-1) -> LINEAR -> SIGMOID group
  Arguments:
  AL -- probability vector, output of the forward propagation (L_model_forward())
  Y -- true "label" vector (containing 0 if non-cat, 1 if cat)
  caches -- list of caches containing:
    every cache of linear_activation_forward() with "relu" (it's caches[l], for l in range(L-1) i.e l = 0...L-2)
    the cache of linear_activation_forward() with "sigmoid" (it's caches[L-1])
  Returns:
  grads -- A dictionary with the gradients
    grads["dA" + str(l)] = ...
    grads["dW" + str(l)] = ...
    grads["db" + str(l)] = ...
  """
  grads = {}
  L = len(caches) # the number of layers
  m = AL.shape[1]
  Y = Y.reshape(AL.shape) # after this line, Y is the same shape as AL
  # Initializing the backpropagation
  ### START CODE HERE ### (1 line of code)
  dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
  ### END CODE HERE ###
  # Lth layer (SIGMOID -> LINEAR) gradients. Inputs: "AL, Y, caches". Outputs: "grads["dAL"], grads["dWL"], grads["dbL"]
  ### START CODE HERE ### (approx. 2 lines)
  current_cache = caches[L-1]
  grads["dA" + str(L)], grads["dW" + str(L)], grads["db" + str(L)] = linear_activation_backward(dAL, current_cache, "sigmoid")
  ### END CODE HERE ###
  for l in reversed(range(L-1)):
    # lth layer: (RELU -> LINEAR) gradients.
    # Inputs: "grads["dA" + str(l + 2)], caches".
    # Outputs: "grads["dA" + str(l + 1)] , grads["dW" + str(l + 1)] , grads["db" + str(l + 1)]
    ### START CODE HERE ### (approx. 5 lines)
    current_cache = caches[l]
    dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA" + str(l + 2)], current_cache, "relu")
    grads["dA" + str(l + 1)] = dA_prev_temp
    grads["dW" + str(l + 1)] = dW_temp
    grads["db" + str(l + 1)] = db_temp
    ### END CODE HERE ###
  return grads


In [77]:
def update_parameters(parameters, grads, learning_rate):
  """
  Update parameters using gradient descent
  Arguments:
  parameters -- python dictionary containing your parameters
  grads -- python dictionary containing your gradients, output of L_model_backward
  Returns:
  parameters -- python dictionary containing your updated parameters
    parameters["W" + str(l)] = ...
    parameters["b" + str(l)] = ...
  """

  L = len(parameters) // 2 # number of layers in the neural network
  # Update rule for each parameter. Use a for loop.
  ### START CODE HERE ### (≈ 3 lines of code)
  for l in range(L):
    parameters["W" + str(l+1)] -= learning_rate * grads["dW" + str(l+1)]
    parameters["b" + str(l+1)] -= learning_rate * grads["db" + str(l+1)]
    ### END CODE HERE ###
  return parameters


In [78]:
def two_layer_model(X, Y, layers_dims, learning_rate = 0.0075, num_iterations = 3000, print_cost=False):
  """
  Implements a two-layer neural network: LINEAR->RELU->LINEAR->SIGMOID.
  Arguments:
  X -- input data, of shape (n_x, number of examples)
  Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples)
  layers_dims -- dimensions of the layers (n_x, n_h, n_y)
  num_iterations -- number of iterations of the optimization loop
  learning_rate -- learning rate of the gradient descent update rule
  print_cost -- If set to True, this will print the cost every 100 iterations
  Returns:
  parameters -- a dictionary containing W1, W2, b1, and b2
  """
  np.random.seed(1)
  grads = {}
  costs = []
  # to keep track of the cost
  m = X.shape[1]
  (n_x, n_h, n_y) = layers_dims
  # number of examples
  # Initialize parameters dictionary, by calling one of the functions you'd previously implemented
  ### START CODE HERE ### (≈ 1 line of code)
  parameters = initialize_parameters(layers_dims)
  ### END CODE HERE ###
  # Get W1, b1, W2 and b2 from the dictionary parameters.
  W1 = parameters["W1"]
  b1 = parameters["b1"]
  W2 = parameters["W2"]
  b2 = parameters["b2"]
  # Loop (gradient descent)
  for i in range(0, num_iterations):
    # Forward propagation: LINEAR -> RELU -> LINEAR -> SIGMOID. Inputs: "X, W1, b1". Output: "A1, cache1, A2, cache2".
    ### START CODE HERE ### (≈ 2 lines of code)
    A1, cache1 = linear_activation_forward(X, W1, b1, "relu")
    A2, cache2 = linear_activation_forward(A1, W2, b2, "sigmoid")
    ### END CODE HERE ###
    # Compute cost
    ### START CODE HERE ### (≈ 1 line of code)
    cost = compute_cost(A2, Y)
    ### END CODE HERE ###
    # Initializing backward propagation
    dA2 = - (np.divide(Y, A2) - np.divide(1 - Y, 1 - A2))
    # Backward propagation. Inputs: "dA2, cache2, cache1". Outputs: "dA1, dW2, db2; also dA0 (not used), dW1, db1".
    ### START CODE HERE ### (≈ 2 lines of code)
    dA1, dW2, db2 = linear_activation_backward(dA2, cache2, "sigmoid")
    dA0, dW1, db1 = linear_activation_backward(dA1, cache1, "relu")
    ### END CODE HERE ###
    # Set grads['dWl'] to dW1, grads['db1'] to db1, grads['dW2'] to dW2, grads['db2'] to db2
    grads['dW1'] = dW1
    grads['db1'] = db1
    grads['dW2'] = dW2
    grads['db2'] = db2
    # Update parameters.
    ### START CODE HERE ### (approx. 1 line of code)
    parameters = update_parameters(parameters, grads, learning_rate)
    ### END CODE HERE ###
    # Retrieve W1, b1, W2, b2 from parameters
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    # Print the cost every 100 training example
    if print_cost and i % 100 == 0:
      print("Cost after iteration {}: {}".format(i, np.squeeze(cost)))
    if print_cost and i % 100 == 0:
      costs.append(cost)
  return parameters


In [79]:
def unpickle(file):
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

In [80]:
########################################################################
#
# Functions for downloading and extracting data-files from the internet.
#
# Implemented in Python 3.5
#
########################################################################
#
# This file is part of the TensorFlow Tutorials available at:
#
# https://github.com/Hvass-Labs/TensorFlow-Tutorials
#
# Published under the MIT License. See the file LICENSE for details.
#
# Copyright 2016 by Magnus Erik Hvass Pedersen
#
########################################################################

import sys
import os
import urllib.request
import tarfile
import zipfile

########################################################################


def _print_download_progress(count, block_size, total_size):
    """
    Function used for printing the download progress.
    Used as a call-back function in maybe_download_and_extract().
    """

    # Percentage completion.
    pct_complete = float(count * block_size) / total_size

    # Status-message. Note the \r which means the line should overwrite itself.
    msg = "\r- Download progress: {0:.1%}".format(pct_complete)

    # Print it.
    sys.stdout.write(msg)
    sys.stdout.flush()


########################################################################


def maybe_download_and_extract(url, download_dir):
    """
    Download and extract the data if it doesn't already exist.
    Assumes the url is a tar-ball file.
    :param url:
        Internet URL for the tar-file to download.
        Example: "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
    :param download_dir:
        Directory where the downloaded file is saved.
        Example: "data/CIFAR-10/"
    :return:
        Nothing.
    """

    # Filename for saving the file downloaded from the internet.
    # Use the filename from the URL and add it to the download_dir.
    filename = url.split('/')[-1]
    file_path = os.path.join(download_dir, filename)

    # Check if the file already exists.
    # If it exists then we assume it has also been extracted,
    # otherwise we need to download and extract it now.
    if not os.path.exists(file_path):
        # Check if the download directory exists, otherwise create it.
        if not os.path.exists(download_dir):
            os.makedirs(download_dir)

        # Download the file from the internet.
        file_path, _ = urllib.request.urlretrieve(url=url,
                                                  filename=file_path,
                                                  reporthook=_print_download_progress)

        print()
        print("Download finished. Extracting files.")

        if file_path.endswith(".zip"):
            # Unpack the zip-file.
            zipfile.ZipFile(file=file_path, mode="r").extractall(download_dir)
        elif file_path.endswith((".tar.gz", ".tgz")):
            # Unpack the tar-ball.
            tarfile.open(name=file_path, mode="r:gz").extractall(download_dir)

        print("Done.")
    else:
        print("Data has apparently already been downloaded and unpacked.")


########################################################################

In [81]:
def loadData(url, data_dir):
    """
    Download and extract the data if it doesn't already exist.
    Assumes the url is a tar-ball file.
    :param url:
        Internet URL for the tar-file to download.
        Example: "https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
    :param data_dir:
        Directory where the downloaded file is saved.
        Example: "data/CIFAR-10/"
    :return:
        dict
    """

    # use function maybe_download_and_extract from download.py to download CIFAR-10 data
    maybe_download_and_extract(url, data_dir)

    # each data file unpickled from CIFAR-10 has dict keys:  dict_keys([b'batch_label', b'labels', b'data', b'filenames'])
    for i in range(4):
        # Load the images and class-numbers from the data-file.
        dict = unpickle(data_dir + "cifar-10-batches-py/data_batch_" + str(i + 1))
				# print("\ndict keys: ", dict.keys())
        if(i == 0):
            X = dict[b'data']
            Y = dict[b'labels']
        else:
            X = np.concatenate((X, dict[b'data'])) 
            Y = np.concatenate((Y, dict[b'labels']))

    X_train = np.concatenate((X, dict[b'data'])).T 
    Y_train = np.concatenate((Y, dict[b'labels']))

    dict = unpickle(data_dir + "cifar-10-batches-py/test_batch");
    X_test = dict[b'data'].T
    Y_test = np.array(dict[b'labels'])
    Y_train = np.reshape(Y_train, (1, len(Y_train)))
    Y_test = np.reshape(Y_test, (1, len(Y_test)))

    raw = unpickle(data_dir + "cifar-10-batches-py/batches.meta")[b'label_names']
    # Convert from binary strings.
    names = [x.decode('utf-8') for x in raw]

    return X_train, Y_train, X_test, Y_test, names


In [82]:
np.random.seed(1)

origtrain_x, train_y, origtest_x, test_y, classes = loadData("https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz", "/media/duo/extra/python/tests/data/CIFAR-10/")

#reduce examples and tests
origtrain_x = origtrain_x[0:,0:400]
train_y = train_y[0:,0:400]
origtest_x = origtest_x[0:,0:50]
test_y = test_y[0:,0:50]

# Standardize data to have feature values between 0 and 1
train_x = origtrain_x/255
test_x = origtest_x/255

# Explore your dataset
m_train = train_x.shape[1]
num_px = train_x.shape[0]
m_test = test_x.shape[1]

print("Number of training examples: " + str(m_train))
print("Number of testing examples: " + str(m_test))
print("train_x shape: " + str(train_x.shape))
print("train_y shape: " + str(train_y.shape))
print("test_x shape: " + str(test_x.shape))
print("test_y shape: " + str(test_y.shape))
print("classes: ", classes)

# build and test a 2 layer neural network
### CONSTANTS DEFINING THE MODEL ####
n_x = 3072
# num_px * num_px * 3
n_h = 7
n_y = 1
layers_dims = (n_x, n_h, n_y)


parameters = two_layer_model(train_x, train_y, layers_dims = (n_x, n_h, n_y), num_iterations = 2500, print_cost=True)



PermissionError: [Errno 13] Permission denied: '/media/duo/extra'