In [1]:
import numpy as np     
import matplotlib.pyplot as plt 
import scipy

In [2]:
def initialize_parameters(layer_dims):
    """
    Arguments:
    layer_dims -- python array (list) containing the dimensions of each layer in our network
    
    Returns:
    parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL":
                    Wl -- weight matrix of shape (layer_dims[l], layer_dims[l-1])
                    bl -- bias vector of shape (layer_dims[l], 1)
    """
    
    np.random.seed(1)
    parameters = {}
    L = len(layer_dims)            # number of layers in the network

    for l in range(1, L):
        parameters['W' + str(l)] = np.random.randn(layer_dims[int(l)], layer_dims[int(l-1)]) * 0.01
        parameters['b' + str(l)] = np.zeros((layer_dims[int(l)], 1))
        

        
    return parameters


In [3]:
parameters = initialize_parameters([3, 2, 1])
parameters

{'W1': array([[ 0.01624345, -0.00611756, -0.00528172],
        [-0.01072969,  0.00865408, -0.02301539]]),
 'b1': array([[0.],
        [0.]]),
 'W2': array([[ 0.01744812, -0.00761207]]),
 'b2': array([[0.]])}

In [4]:
def sigmoid(Z):
    A = 1 / (1 + np.exp(-Z)) 
    cache = Z 
    return A, cache

def leaky_relu(): 
    A = np.max(0.1 * Z, Z) 
    cache = Z 
    return A, cache


def backward_sigmoid(dA, cache):
    Z = cache 
    dZ = dA * (sigmoid(Z) * (1 - sigmoid(Z)))

    return dZ

def backward_leaky_relu(dA, cache):
    Z = cache
    dZ = np.array(dA)

    return  dZ 

In [5]:
def linear_forward(A, W, b): 
    Z = np.dot(W, A) + b
    cache = (A, W, b)

    return Z, cache


def linear_activation_forward(A_prev, W, b, activation): 

    if activation == "leaky_relu": 
        Z, linear_cache = linear_forward(A_prev, W, b) 
        A, activation_cache = leaky_relu(Z)

    elif activation == "sigmoid":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = sigmoid(Z)


    cache = (linear_cache, activation_cache)
    return A, cache
    

    

In [6]:
def forward(X, parameters): 
    caches = []
    A = X 
    L = len(parameters) // 2 

    for l in range(1, L):
        A_prev = A 
        A, cache = linear_activation_forward(A_prev=A_prev,
                                            W=parameters["W" + str(l)],
                                            b=parameters["b" + str(l)], 
                                            activation="leaky_relu")

        caches.append(cache)

    AL, cache = linear_activation_forward(A_prev=A,
                                            W=parameters["W" + str(L)],
                                            b=parameters["b" + str(L)], 
                                            activation="sigmoid")
    cachesa.append(cache)
    return AL, caches
                            

In [7]:
def compute_cost(AL, Y): 
    m = Y.shape[1]

    cost = - (np.dot(Y, np.log(AL).T) + np.dot(1 - Y, np.log(1 - AL).T)) / m
    cost = float(np.squeeze(cost))

    return cost
    

In [8]:
def linear_backward(dZ, cache): 
    A_prev, W, b = cache
    m = A_prev.shape[1]

    dW = np.dot(dZ, A_prev.T) / m
    db = np.sum(dZ, axis=1, keepdims=True) / m 
    dA_prev = np.dot(W.T, dZ)

    return dA_prev, dW, db


def linear_activation_backward(dA, cache, activation): 

    linear_cache, activation_cache = cache

    if activation == "sigmoid": 
        dZ = backward_sigmoid(dA, activation_cache) 
        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    elif activation == "leaky_relu": 
        dZ = backward_leaky_relu(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    return dA_prev, dW, db 

In [9]:
def backward(AL, Y, caches): 
    
    grads = {}
    m = Y.shape[1]
    L = len(caches)

    dAL = - np.divide(Y, AL) + np.divide(1 - Y, 1 - AL)
    current_cache = caches[L- 1]
    dA_prev_temp, dW_temp, db_temp = linear_activation_backward(dA = dAL, cache = current_cache, activation = "sigmoid")
    grads["dA" + str(L-1)] = dA_prev_temp
    grads["dW" + str(L)] = dW_temp
    grads["db" + str(L)] = db_temp

    for l in reversed(range(L - 1)): # from l=L-2 to L=0 
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA" + str(l + 1)], current_cache, activation="leaky_relu")
        grads["dA" + str(l)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp

    return grads




In [10]:
def update_parameters(parameters, grads, learning_rate): 
    L = len(parameters) // 2 
    for l in range(L):
        # parameters["W" + str(l+1)] = ...
        # parameters["b" + str(l+1)] = ...
        # YOUR CODE STARTS HERE
        parameters["W" + str(l+1)] -= learning_rate * grads["dW" + str(l + 1)]
        parameters["b" + str(l+1)] -= learning_rate * grads["db" + str(l + 1)]
        # YOUR CODE ENDS HERE
    return parameters

In [11]:
def model(layers_dims, X, Y, num_iterations, learning_rate = 0.0075, print_cost=False):
    
    np.random.seed(1)
    costs = []

    parameters = initialize_parameters(layers_dims)

    
    for i in range(num_iterations): 
        AL, caches = forward(X, parameters)
        cost = compute_cost(AL, Y)
        grads = backward(AL, Y, caches)
        parameters = update_parameters(parameters, grads, learning_rate)

        if print_cost and i % 100 == 0: 
            print(f"iteration : {i}, cost: {cost}")
        if i % 100 == 0:
            costs.append(cost)

    return parameters, costs



In [12]:
import h5py

def load_data():
    train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r")
    train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
    train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels

    test_dataset = h5py.File('datasets/test_catvnoncat.h5', "r")
    test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
    test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels

    classes = np.array(test_dataset["list_classes"][:]) # the list of classes
    
    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
    
    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes


In [13]:
train_x_orig, train_y, test_x_orig, test_y, classes = load_data() 

In [14]:
# Reshape the training and test examples 
train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0], -1).T   # The "-1" makes reshape flatten the remaining dimensions
test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0], -1).T

# Standardize data to have feature values between 0 and 1.
train_x = train_x_flatten/255.
test_x = test_x_flatten/255.

print ("train_x's shape: " + str(train_x.shape))
print ("test_x's shape: " + str(test_x.shape))

train_x's shape: (12288, 209)
test_x's shape: (12288, 50)


In [15]:
layers_dims = [12288, 20, 7, 5, 1]

In [18]:
%debug magic
parameters, costs = model(train_x, train_y, layers_dims, num_iterations = 2500, print_cost = True)

NOTE: Enter 'c' at the ipdb>  prompt to continue execution.
[31m---------------------------------------------------------------------------[39m
[31mNameError[39m                                 Traceback (most recent call last)
[31mNameError[39m: name 'magic' is not defined


TypeError: only integer scalar arrays can be converted to a scalar index