In [10]:
import numpy as np
import matplotlib.pyplot as plt
import h5py
import scipy
from PIL import Image
from scipy import ndimage
from lr_utils import load_dataset

In [19]:
import zipfile

zf = zipfile.ZipFile('datasets/train_catvnoncat_h5.zip', mode='w')

try:

    zf.write('datasets/train_catvnoncat.h5')

    zf.write('datasets/test_catvnoncat.h5')

finally:

    zf.close()

In [20]:
# Loading the data (cat/non-cat)
train_set_x_orig, train_set_y, test_set_x_orig, test_set_y, classes = load_dataset()


In [21]:
train_set_x_orig

array([[[[ 17,  31,  56],
         [ 22,  33,  59],
         [ 25,  35,  62],
         ..., 
         [  1,  28,  57],
         [  1,  26,  56],
         [  1,  22,  51]],

        [[ 25,  36,  62],
         [ 28,  38,  64],
         [ 30,  40,  67],
         ..., 
         [  1,  27,  56],
         [  1,  25,  55],
         [  2,  21,  51]],

        [[ 32,  40,  67],
         [ 34,  42,  69],
         [ 35,  42,  70],
         ..., 
         [  1,  25,  55],
         [  0,  24,  54],
         [  1,  21,  51]],

        ..., 
        [[  0,   0,   0],
         [  0,   0,   0],
         [  0,   0,   0],
         ..., 
         [  0,   0,   0],
         [  0,   0,   0],
         [  0,   0,   0]],

        [[  0,   0,   0],
         [  0,   0,   0],
         [  0,   0,   0],
         ..., 
         [  0,   0,   0],
         [  0,   0,   0],
         [  0,   0,   0]],

        [[  0,   0,   0],
         [  0,   0,   0],
         [  0,   0,   0],
         ..., 
         [  0,   0,   0],
  

In [22]:
train_set_x_orig.shape
# 209 rows 
# 64,64,3 signifies each image is of shape num_px, num_px, 3 where 3 signifies 3 channels. Each image is square(height= num_px and width= num_px)

(209, 64, 64, 3)

In [24]:
x= np.random.randn(5,1)
x

array([[-0.13724943],
       [ 1.04217794],
       [ 0.57738677],
       [ 1.19174757],
       [ 1.13005384]])

In [25]:
x.shape

(5, 1)

In [32]:
test_set_x_orig.shape

(50, 64, 64, 3)

 #Exercise: Find the values for:
- m_train (number of training examples)
- m_test (number of test examples)
- num_px (= height = width of a training image)
Remember that train_set_x_orig is a numpy-array of shape (m_train, num_px, num_px, 3). For instance, you can access m_train by writing train_set_x_orig.shape[0].

In [33]:
m_train=train_set_x_orig.shape[0]

In [38]:
print ("Number of training examples: m_train = " + str(m_train))

Number of training examples: m_train = 209


In [39]:
m_test=test_set_x_orig.shape[0]
print ("Number of test examples: m_test = " + str(m_test))

Number of test examples: m_test = 50


## Exercise: 
Reshape the training and test data sets so that images of size (num_px, num_px, 3) are flattened into single vectors of shape (num_px  ∗∗  num_px  ∗∗  3, 1).


---A trick when you want to flatten a matrix X of shape (a,b,c,d) to a matrix X_flatten of shape (b ∗∗ c ∗∗ d, a) is to use:
#### X_flatten = X.reshape(X.shape[0], -1).T      # X.T is the transpose of X

In [47]:

train_set_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0],-1).T
print ("train_set_x_flatten shape: " + str(train_set_x_flatten.shape))

train_set_x_flatten shape: (12288, 209)


In [48]:
test_set_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0],-1).T
test_set_x_flatten.shape

(12288, 50)

##### To represent color images, the red, green and blue channels (RGB) must be specified for each pixel, and so the pixel value is actually a vector of three numbers ranging from 0 to 255.
One common preprocessing step in machine learning is to center and standardize your dataset,

In [49]:
train_set_x = train_set_x_flatten/255.
test_set_x = test_set_x_flatten/255.

In [50]:
train_set_x

array([[ 0.06666667,  0.76862745,  0.32156863, ...,  0.56078431,
         0.08627451,  0.03137255],
       [ 0.12156863,  0.75294118,  0.27843137, ...,  0.60784314,
         0.09411765,  0.10980392],
       [ 0.21960784,  0.74509804,  0.26666667, ...,  0.64705882,
         0.09019608,  0.20784314],
       ..., 
       [ 0.        ,  0.32156863,  0.54117647, ...,  0.33333333,
         0.01568627,  0.        ],
       [ 0.        ,  0.31372549,  0.55294118, ...,  0.41960784,
         0.01960784,  0.        ],
       [ 0.        ,  0.31764706,  0.55686275, ...,  0.58431373,
         0.        ,  0.        ]])

In [53]:
test_set_x

array([[ 0.61960784,  0.45098039,  1.        , ...,  0.16078431,
         0.07058824,  0.52156863],
       [ 0.40784314,  0.43137255,  0.99215686, ...,  0.18431373,
         0.07058824,  0.63921569],
       [ 0.3254902 ,  0.43529412,  0.99607843, ...,  0.32941176,
         0.0627451 ,  0.29411765],
       ..., 
       [ 0.67843137,  0.67058824,  0.52156863, ...,  0.71764706,
         0.56470588,  0.01960784],
       [ 0.50196078,  0.69019608,  0.39607843, ...,  0.55294118,
         0.5372549 ,  0.08627451],
       [ 0.43137255,  0.72941176,  0.4745098 , ...,  0.45490196,
         0.42352941,  0.01960784]])

## General Architecture of the learning algorithm

## Building the parts of our algorithm
The main steps for building a Neural Network are:
### Define the model structure (such as number of input features)
### Initialize the model's parameters
## Loop:
### Calculate current loss (forward propagation)
### Calculate current gradient (backward propagation)
### Update parameters (gradient descent)
You often build 1-3 separately and integrate them into one function we call model().


In [54]:
## Helper function- Sigmoid

In [55]:
def sigmoid(z):
    s=None
    s=1/(1+np.exp(-z))
    return s

In [57]:
# initialization function

[H[2J

In [69]:
def initialize_with_zeros(dim):
    w = np.zeros((dim,1))
    b=0
    return w,b

In [59]:
# forward and backward propogation

Exercise:** Implement a function `propagate()` that computes the cost function and its gradient.

**Hints**:

Forward Propagation:
- You get X
- You compute $A = \sigma(w^T X + b) = (a^{(0)}, a^{(1)}, ..., a^{(m-1)}, a^{(m)})$
- You calculate the cost function: $J = -\frac{1}{m}\sum_{i=1}^{m}y^{(i)}\log(a^{(i)})+(1-y^{(i)})\log(1-a^{(i)})$

Here are the two formulas you will be using: 

$$ \frac{\partial J}{\partial w} = \frac{1}{m}X(A-Y)^T\tag{7}$$
$$ \frac{\partial J}{\partial b} = \frac{1}{m} \sum_{i=1}^m (a^{(i)}-y^{(i)})\tag{8}$$

In [60]:
def propagate(w, b, X, Y):
    m = X.shape[1]
    
    # FORWARD PROPAGATION (FROM X TO COST)
    ### START CODE HERE ### (≈ 2 lines of code)
    A = None # compute activation
    A= sigmoid(np.dot(w.T,X)+b)
    cost = None   # compute cost
    cost=(-1/m)*np.sum(Y*np.log(A)+(1-Y)*np.log(1-A))
    ### END CODE HERE ###
    
    # BACKWARD PROPAGATION (TO FIND GRAD)
    ### START CODE HERE ### (≈ 2 lines of code)
    dw = None
    db = None
    dw= (np.dot(X,np.transpose(A-Y)))/m
    db = (np.sum(A-Y))/m
    grads = {"dw": dw,
             "db": db}
    
    return grads, cost
    

In [61]:
# Now, you want to update the parameters using gradient descent.

In [62]:
def optimize(w, b, X, Y, num_iterations, learning_rate, print_cost = False):
    
    
    costs = []
    
    for i in range(num_iterations):
        
        
        # Cost and gradient calculation (≈ 1-4 lines of code)
        ### START CODE HERE ### 
        ##grads, cost = None
        #A = sigmoid(np.dot(w.T,X) + b) 
        ##cost = np.mean(-(Y * np.log(A) + (1-Y) * np.log(1-A)),axis=1) 
        #A = sigmoid(np.dot(w.T,X) + b)  
        grads,cost=propagate(w, b, X, Y)
        ### END CODE HERE ###
        
        # Retrieve derivatives from grads
        dw = grads["dw"]
        db = grads["db"]
        
        # update rule (≈ 2 lines of code)
        ### START CODE HERE ###
        #w = None
        #b = None
        #dw = np.mean(np.sum(np.dot(X, (A - Y).T), axis=1, keepdims=True))
        #db = np.mean(np.sum(A - Y))
        w = (w - (learning_rate * dw))
        b = (b - (learning_rate * db))
        ### END CODE HERE ###
        
        # Record the costs
        if i % 100 == 0:
            costs.append(cost)
        
        # Print the cost every 100 training examples
        if print_cost and i % 100 == 0:
            print ("Cost after iteration %i: %f" %(i, cost))
    
    params = {"w": w,
              "b": b}
    
    grads = {"dw": dw,
             "db": db}
    return params, grads, costs

Exercise:** The previous function will output the learned w and b. We are able to use w and b to predict the labels for a dataset X. Implement the `predict()` function. There is two steps to computing predictions:

1. Calculate $\hat{Y} = A = \sigma(w^T X + b)$

2. Convert the entries of a into 0 (if activation <= 0.5) or 1 (if activation > 0.5), stores the predictions in a vector `Y_prediction`. If you wish, you can use an `if`/`else` statement in a `for` loop (though there is also a way to vectorize this). 

In [63]:
def predict(w, b, X):
 
    
    m = X.shape[1]
    Y_prediction = np.zeros((1,m))
    w = w.reshape(X.shape[0], 1)
    
    # Compute vector "A" predicting the probabilities of a cat being present in the picture
    ### START CODE HERE ### (≈ 1 line of code)
    A = None
    A=sigmoid(np.dot(w.T, X)+b)
    ### END CODE HERE ###
    
    for i in range(A.shape[1]):
        
        # Convert probabilities A[0,i] to actual predictions p[0,i]
        ### START CODE HERE ### (≈ 4 lines of code)
        if A[0][i] > 0.5:
            Y_prediction[0][i] = 1
        else:
            Y_prediction[0][i] = 0
        pass
        ### END CODE HERE ###
    
    assert(Y_prediction.shape == (1, m))
    
    return Y_prediction

## Merge all function into Model

In [70]:
def model(X_train, Y_train, X_test, Y_test, num_iterations = 2000, learning_rate = 0.5, print_cost = False):
    """
    Builds the logistic regression model by calling the function you've implemented previously
    
    Arguments:
    X_train -- training set represented by a numpy array of shape (num_px * num_px * 3, m_train)
    Y_train -- training labels represented by a numpy array (vector) of shape (1, m_train)
    X_test -- test set represented by a numpy array of shape (num_px * num_px * 3, m_test)
    Y_test -- test labels represented by a numpy array (vector) of shape (1, m_test)
    num_iterations -- hyperparameter representing the number of iterations to optimize the parameters
    learning_rate -- hyperparameter representing the learning rate used in the update rule of optimize()
    print_cost -- Set to true to print the cost every 100 iterations
    
    Returns:
    d -- dictionary containing information about the model.
    """
    
    ### START CODE HERE ###
    
    # initialize parameters with zeros (≈ 1 line of code)
    #w, b = None
    w, b = initialize_with_zeros(X_train.shape[0])
    

    # Gradient descent (≈ 1 line of code)
    #parameters, grads, costs = None
    parameters, grads, costs = optimize(w, b, X_train, Y_train, num_iterations, learning_rate , print_cost = False)
    
    # Retrieve parameters w and b from dictionary "parameters"
    w = parameters["w"]
    b = parameters["b"]
    
    # Predict test/train set examples (≈ 2 lines of code)
    #Y_prediction_test = None
    #Y_prediction_train = None
    Y_prediction_train=predict(w, b, X_train)
    Y_prediction_test=predict(w, b, X_test)

    ### END CODE HERE ###

    # Print train/test Errors
    print("train accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_train - Y_train)) * 100))
    print("test accuracy: {} %".format(100 - np.mean(np.abs(Y_prediction_test - Y_test)) * 100))

    
    d = {"costs": costs,
         "Y_prediction_test": Y_prediction_test, 
         "Y_prediction_train" : Y_prediction_train, 
         "w" : w, 
         "b" : b,
         "learning_rate" : learning_rate,
         "num_iterations": num_iterations}
    
    return d

In [71]:
d = model(train_set_x, train_set_y, test_set_x, test_set_y, num_iterations = 2000, learning_rate = 0.005, print_cost = True)

train accuracy: 99.04306220095694 %
test accuracy: 70.0 %
