In [None]:
import numpy as np
import h5py
import matplotlib.pyplot as plt


np.random.seed(1)

# Some "service function"

In [None]:
def print_dataset_stats(train_x, train_y, test_x, test_y):
    """
    Prints some statistics about the dataset.
    
    Parameter:
    - train_x : numpy array -> the part of the training set containing the features
    - train_y : numpy array -> the part of the training set containing the target value
    - test_x : numpy array -> the part of the test set containing the features
    - test_y : numpy array -> the part of the test set containing the target value
    """ 
    m_train = train_x.shape[0]
    num_px = train_x.shape[1]
    m_test = test_x.shape[0]

    print ("Number of training examples: " + str(m_train))
    print ("Number of testing examples: " + str(m_test))
    print ("Each image is of size: (" + str(num_px) + ", " + str(num_px) + ", 3)")
    print ("train x shape: " + str(train_x.shape))
    print ("train y shape: " + str(train_y.shape))
    print ("test x shape: " + str(test_x.shape))
    print ("test y shape: " + str(test_y.shape))

In [None]:
def print_dataset_examples(x, y, classes):
    """
    Prints 20 example images from the dataset
    
    Parameter:
    - x : numpy array -> the part of the dataset containing the features (trivially the value of each pixel of the image) 
    - y : numpy array -> the part of the dataset containing the target value (the class assigned to each image)
    - classes : numpy array -> an array containing the classes name
    """
    plt.rcParams['figure.figsize'] = (40.0, 40.0) # set default size of plots
    for i in range(20):
        plt.subplot(4, 5, i + 1)
        plt.imshow(x[i], interpolation='nearest')
        plt.axis('off')
        plt.title("Class: " + classes[y[0,i]].decode("utf-8"), fontsize=30)

In [None]:
def load_cat_dataset(print_stats = False, print_examples = False):
    """
    Loads and preprocess the h5 dataset catvnoncat.
    
    Parameters:
    - printstat : Boolean -> indicates whether to print or not the main statistics about the dataset 
    - print_examples : Boolean -> indicates weather to show or not some examples from the dataset
    
    Returns:
    - train_x: a 12288*209 numpy array containing the flattened training images on its column
    - train_y: a 1*209 numpy array containing the target value for the train
    - test_x: a 12288*50 numpy array containing the flattened test images on its column
    - test_y: a 1*50 numpy array containing the target value for the test
    - classes: a numpy array containing the two classes name ("cat", "non-cat")
    
    
    
    
    
    N.B.
    The dataset is physically splitted into two h5 file: train_catvnoncat.h5 and test_catvnoncat.h5.
    Each h5 file contains 3 subsection: 
     - train/test_set_x with the features;
     - train/test_y with the target value;
     - list_classes containing the two classes name ("cat", "non-cat")
    """ 
    train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r")
    train_x_orig = np.array(train_dataset["train_set_x"][:]) # train set features
    train_y = np.array(train_dataset["train_set_y"][:]) # train set labels

    test_dataset = h5py.File('datasets/test_catvnoncat.h5', "r")
    test_x_orig = np.array(test_dataset["test_set_x"][:]) # test set features
    test_y = np.array(test_dataset["test_set_y"][:]) # test set labels

    classes = np.array(test_dataset["list_classes"][:]) # the list of classes
        
    train_y = train_y.reshape((1, train_y.shape[0])) # since train set labels have shape (209,) we reshape the array in order to eliminate teh rank-1 array and have all the labels in separate columns
    test_y = test_y.reshape((1, test_y.shape[0])) # since test set labels have shape (50,) we reshape the array in order to eliminate teh rank-1 array and have all the labels in separate columns
    
    if print_stats:
        print_dataset_stats(train_x_orig, train_y, test_x_orig, test_y)
        
    if print_examples:
        print_dataset_examples(train_x_orig, train_y, classes)
        
    #preprocessing part
    # Reshape the training and test examples 
    train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0], -1).T # The "-1" makes reshape flatten the remaining dimensions
    test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0], -1).T

    # Standardize data to have feature values between 0 and 1.
    train_x = train_x_flatten/255.
    test_x = test_x_flatten/255.
    
    if print_stats:
        print ("train x shape after preprocessing: " + str(train_x.shape))
        print ("test x shape after preprocessing: " + str(test_x.shape))
    
    return train_x, train_y, test_x, test_y, classes

In [None]:
def load_indian_dataset(print_stats = False):
    """
    Loads the pima-indians-diabetes dataset, which contains informations used to diagnostically predict whether or not a 
    patient has diabetes, based on certain diagnostic measurements.
    
    Parameters:
    - printstat : Boolean -> indicates whether to print or not the main statistics about the dataset 
ì    
    Returns:
    - train_x_orig: a 8*609 numpy array containing the flattened training images on its column
    - train_y: a 1*609 numpy array containing the target value for the train
    - test_x_orig: a 8*159 numpy array containing the flattened test images on its column
    - test_y: a 1*159 numpy array containing the target value for the test
    
    """
    train_dataset = np.loadtxt('datasets/pima-indians-diabetes.txt', delimiter = "," ) 
    train_x_orig = train_dataset[:,:-1].T
    train_y = train_dataset[:,-1]


    test_dataset = np.loadtxt('datasets/pima-indians-diabetes-test.txt', delimiter = "," ) 
    test_x_orig = test_dataset[:,:-1].T
    test_y = test_dataset[:,-1]


    train_y = train_y.reshape((1, train_y.shape[0])) 
    test_y = test_y.reshape((1, test_y.shape[0]))

    if print_stats:
        print("train x shape: " + str(np.shape(train_x_orig)))
        print("train y shape: " + str(np.shape(train_y)))
        print("test x shape: " + str(np.shape(test_x_orig)))
        print("test y shape: " + str(np.shape(test_y)))

    return train_x_orig, train_y, test_x_orig, test_y

In [None]:
def getAccuracy(p,y):
    """
    Return the accuracy of a model
    
    Parameters:
    - p : numpy array -> the model prediction
    - y : numpy array -> the correct label for the predictions
    """
    m = y.shape[1]
    return np.sum((p == y)/m)

# The activation functions with their derivatives

In [None]:
def sigmoid(Z):
    """
    Implements the sigmoid activation in numpy
    
    Parameters:
    Z : a numpy array of any shape
    
    Returns:
    A : a numpy array containing the output of sigmoid(z), same shape as Z
    cache : returns Z as well, useful during backpropagation
    """
    
    A = 1/(1+np.exp(-Z))
    cache = Z
    
    return A, cache



def sigmoid_backward(dA, cache):
    """
    Implement the backward propagation for a single SIGMOID unit.

    Parameters:
    dA : post-activation gradient, of any shape
    cache : 'Z' where we store for computing backward propagation efficiently

    Returns:
    dZ : Gradient of the cost with respect to Z
    """
    
    Z = cache
    
    s = 1/(1+np.exp(-Z))
    dZ = dA * s * (1-s)
    
    assert (dZ.shape == Z.shape)
    
    return dZ



def relu(Z):
    """
    Implement the RELU function.

    Arguments:
    Z : Output of the linear layer, of any shape

    Returns:
    A : a numpy array containing the output of relu(z), same shape as Z
    cache : returns Z as well, useful during backpropagation
    """
    
    A = np.maximum(0,Z)
    cache = Z 
    
    return A, cache



def relu_backward(dA, cache):
    """
    Implement the backward propagation for a single RELU unit.

    Arguments:
    dA : post-activation gradient, of any shape
    cache : 'Z' where we store for computing backward propagation efficiently

    Returns:
    dZ : Gradient of the cost with respect to Z
    """
    
    Z = cache
    dZ = np.array(dA, copy=True) # just converting dz to a correct object.
    
    # When z <= 0, you should set dz to 0 as well. 
    dZ[Z <= 0] = 0
    
    assert (dZ.shape == Z.shape)
    
    return dZ

# The neural network class
This class is useful to define a neural network for a binary classification problem. The class definines the basic methods used in machine learning and it exploits numpy vectorization in order to avoid unnecessary for loops. 

The fit method performs the classic steps useful to train the model:
 - <b>Forward pass</b>: in which the weights for each layer are updated. <br>
   It starts from an input vector A, which represents the output of l-1 layer (we assume that $A^{[0]} = X$, which is the input given to the first layer of the network). 
   Then it computes Z as: $Z^{[l]} = W^{[l]}A^{[l-1]} +b^{[l]}$.
   Finally the activation for each node of the layer is computed as $A^{[l]} = g(Z^{[l]})$. Depending on whether the layer is the last or not, $g$ is the sigmoid function ($\sigma(Z) = \frac{1}{ 1 + e^{-(Z)}}$) or the RELU function ($RELU(Z) = max(0, Z)$) are used (they're defined in the previous cell) ; 
 - <b>Cost computation</b>: in which the cost function is calculated. <br>
   As cost function the cross entropy is used: $J = -\frac{1}{m} \sum\limits_{i = 1}^{m} (y^{(i)}\log\left(a^{[L] (i)}\right) + (1-y^{(i)})\log\left(1- a^{[L](i)}\right))$;
 - <b>Backward pass</b>: in which the gradients for each layer are calculated. <br>
   It start from the derivative of $Z$ which could be calculated applying: $dZ^{[l]} = dA^{[l]} * g'(Z^{[l]})$.
   The, it calculates the other derivative needed in the next step, in particular: 
   $ dW^{[l]} = \frac{\partial \mathcal{J} }{\partial W^{[l]}} = \frac{1}{m} dZ^{[l]} A^{[l-1] T}$, &nbsp;&nbsp;&nbsp;
   $ db^{[l]} = \frac{\partial \mathcal{J} }{\partial b^{[l]}} = \frac{1}{m} \sum_{i = 1}^{m} dZ^{[l](i)}$ &nbsp;&nbsp; and &nbsp;&nbsp; $ dA^{[l-1]} = \frac{\partial \mathcal{J} }{\partial A^{[l-1]}} = W^{[l] T} dZ^{[l]}$; <br>
   In order to calculate the derivative of the activation function, $g'(Z^{[l]})$, this part of the code uses the sigmoid_backward and the relu_backward functions, which have been defined in the previous cell;
 - <b>Weights update</b>: in which the gradient descend is used to update the parameters $W$ and $b$. <br>
   In particular for each node of the layer: $ W^{[l]} = W^{[l]} - \alpha \text{ } dW^{[l]}$ and $ b^{[l]} = b^{[l]} - \alpha \text{ } db^{[l]}$, where $\alpha$ is the selected learning rate.
 
 

In [None]:
class Neural_Network:  
    """
    This is the abstraction for a L-dimensional neural network used for a binary classification problem. 
    The net has L-1 hidden layers which exploit the relu activation, while the latest neuron exploits 
    the sigmoid activation function.
    """
    
    def __init__(self, layers_dims):
        """
        Initializes the dimension and the parameter of the network.
        
        Parameters:
        - layer_dims : list -> a list containg the dimension of each layer of the network, except the output layer since it is automatically added
        """
        self.layers_dims = layers_dims + [1]
        self.parameters = self.initialize_parameters_deep()
        self.dimensions = len(self.layers_dims)-1
        
    
    def initialize_parameters_deep(self):
        """
        Initializes the weight and the bias for each neuron in each layer of the network. The weights initialization is made
        with a random number divided by the square root of the layer's dimension. The biases are all initialized to 0.
        
        Returns:
        parameters : python dictionary  -> containing the parameters "W1", "b1", ..., "WL", "bL":
                        Wl -- weight matrix of shape (layer_dims[l], layer_dims[l-1])
                        bl -- bias vector of shape (layer_dims[l], 1)
        """

        np.random.seed(1) 
        parameters = {}
        L = len(self.layers_dims)            # number of layers in the network

        for l in range(1, L):
            parameters['W' + str(l)] = np.random.randn(self.layers_dims[l], self.layers_dims[l-1]) / np.sqrt(self.layers_dims[l-1])
            parameters['b' + str(l)] = np.zeros((self.layers_dims[l], 1))

            assert(parameters['W' + str(l)].shape == (self.layers_dims[l], self.layers_dims[l-1]))
            assert(parameters['b' + str(l)].shape == (self.layers_dims[l], 1))


        return parameters
    
    def L_model_forward(self, X, parameters):
        """
        Implements the forward propagation for the network. In particular it exploits the function "linear_activation_forward",
        using the relu function for the first L-1 layers (where L is the number of layers). In the last layer the same function
        is called, using the sigmoid function.

        Arguments:
        X : numpy array of shape (input size, number of examples) -> the dataset to learn or predict
        parameters :  python dictionary -> output of initialize_parameters_deep() if the model is not trained, the model 
            parameters otherwise

        Returns:
        AL : numpy array -> the result of the sigmoid activation function for the last neuron
        caches : list of caches -> every cache of linear_activation_forward() (there are L-1 of them, indexed from 0 to L-1) 
            used in the backpropagation step to compute derivatives.
        """

        caches = []
        A = X
        L = len(parameters) // 2                  # number of layers in the neural network

        # Implement [LINEAR -> RELU]*(L-1). Add "cache" to the "caches" list.
        for l in range(1, L):
            A_prev = A 
            A, cache = self.linear_activation_forward(A_prev, parameters['W' + str(l)],  parameters['b' + str(l)], "relu")
            caches.append(cache)

        # Implement LINEAR -> SIGMOID. Add "cache" to the "caches" list.
        AL, cache = self.linear_activation_forward(A, parameters['W' + str(L)],  parameters['b' + str(L)], "sigmoid")
        caches.append(cache)

        assert(AL.shape == (1,X.shape[1]))

        return AL, caches
    
    def linear_activation_forward(self, A_prev, W, b, activation):
        """
        This function is used by L-model_forward, first computes the linear step for a specific layer (exploiting the linear_forward 
        function, then it computes the non-linear activation (RELU or Sigmoid) function for that layer.

        Arguments:
        A_prev : numpy array (shape (size of previous layer, number of examples))-> activations from previous layer (or input data)
        W : numpy array (shape (size of current layer, size of previous layer)) -> the weight matrix for the considered layer
        b : numpy array (shape (size of the current layer, 1) -> the bias vector for the considered layer 
        activation : string -> the activation to be used in this layer, accepted value: "sigmoid" or "relu"

        Returns:
        A : numpy array (shape (size of considered layer, number of examples))-> the output of the activation function
        cache :  python tuple -> it contains "linear_cache" and "activation_cache", stored for computing the backward pass efficiently
        """

        if activation == "sigmoid":
            # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
            Z, linear_cache = self.linear_forward(A_prev, W, b)
            A, activation_cache = sigmoid(Z)

        elif activation == "relu":
            # Inputs: "A_prev, W, b". Outputs: "A, activation_cache".
            Z, linear_cache = self.linear_forward(A_prev, W, b)
            A, activation_cache = relu(Z)

        assert (A.shape == (W.shape[0], A_prev.shape[1]))
        cache = (linear_cache, activation_cache)

        return A, cache
    
    def linear_forward(self, A, W, b):
        """
        This function is used by linear_activation_forward, it implements the linear part of a layer's forward propagation, 
        simply calculating WA+b (in a vectorized fashion)

        Arguments:
        A : numpy array (shape (size of previous layer, number of examples))-> activations from previous layer (or input data)
        W : numpy array (shape (size of current layer, size of previous layer)) -> the weight matrix for the considered layer
        b : numpy array (shape (size of the current layer, 1) -> the bias vector for the considered layer 

        Returns:
        Z : numpy array (shape (size of current layer, number of examples)) -> the vector which will be the input of the activation function
        cache : python tuple -> containing "A", "W" and "b" ; stored for computing the backward pass efficiently
        """

        Z = np.dot(W,A) + b

        assert(Z.shape == (W.shape[0], A.shape[1]))
        cache = (A, W, b)

        return Z, cache
    
    def compute_cost(self, AL, Y):
        """
        Implements the cross entropy cost function. 

        Arguments:
        AL : numpy array (shape (1, number of examples)) -> the output of the last neuron, a probability vector corresponding to your label predictions
        Y : numpy array (shape  (1, number of examples)) -> true "label" vector 

        Returns:
        cost : numpy array -> it contains only a float: the cross-entropy cost
        """

        m = Y.shape[1]

        # Compute loss from aL and y.
        cost = - 1/m * (np.dot(Y, np.log(AL).T) + np.dot((1-Y), np.log(1-AL).T))

        cost = np.squeeze(cost)      # To make sure your cost's shape is what we expect (e.g. this turns [[17]] into 17).
        assert(cost.shape == ())

        return cost
    
    def L_model_backward(self, AL, Y, caches):
        """
        Implements the backward propagation for the network. In particular it first calculate the derivative for AL(the result
        of the activation function on the last neuron). Then it exploits the function "linear_activation_backward", passing the 
        cached valued (voth linear and acrivation), to efficiently calculate the other required derivative. As always, 
        teh sigmoid derivative is calculated for the last neuron, while the relu derivative is apllied to the remaining layers.

        Arguments:
        AL : numpy array (shape (1, number of examples)) -> the output of the last neuron, a probability vector corresponding to your label predictions
        Y : numpy array (shape  (1, number of examples)) -> true "label" vector 
        caches list of caches -> every cache of linear_activation_forward() (there are L-1 of them, indexed from 0 to L-1)

        Returns:
        grads : python dictionary -> It contains the gradients calculated. Example:
                 grads["dA" + str(l)] = ... 
                 grads["dW" + str(l)] = ...
                 grads["db" + str(l)] = ... 
        """
        
        grads = {}
        L = len(caches) # the number of layers

        # Initializing the backpropagation
        dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL)) # derivative of cost with respect to AL

        # Lth layer (SIGMOID -> LINEAR) gradients. Inputs: "dAL, current_cache". Outputs: "grads["dAL-1"], grads["dWL"], grads["dbL"]
        current_cache = caches[L-1]
        grads["dA" + str(L-1)], grads["dW" + str(L)], grads["db" + str(L)] = self.linear_activation_backward(dAL, current_cache, "sigmoid")

        # Loop from l=L-2 to l=0
        for l in reversed(range(L-1)):
            # lth layer: (RELU -> LINEAR) gradients.
            current_cache = caches[l]
            dA_prev_temp, dW_temp, db_temp = self.linear_activation_backward(grads["dA" + str(l + 1)], current_cache, "relu")
            grads["dA" + str(l)] = dA_prev_temp
            grads["dW" + str(l + 1)] = dW_temp
            grads["db" + str(l + 1)] = db_temp

        return grads
    
    def linear_activation_backward(self, dA, cache, activation):
        """
        This function is used by L_model_backward, first computes the specific activation derivative, then it computes
        the derviative for the linear part, exploiting "linear_backward" function.

        Arguments:
        dA : numpy array (shape (size of L-layer, number of examples))-> activation gradient for current layer l 
        cache :  python tuple -> it contains "linear_cache" and "activation_cache", used for computing the backward pass efficiently
        activation : string -> the activation to be used in this layer, accepted value: "sigmoid" or "relu"

        Returns:
        dA_prev : numpy array (shape (size of (L-1)-layer, number of examples)) -> Gradient of the cost with respect to the activation (of the previous layer l-1)
        dW : numpy array  -> Gradient of the cost with respect to W (current layer l), same shape as W
        db : numpy array -> Gradient of the cost with respect to b (current layer l), same shape as b
        """
        linear_cache, activation_cache = cache

        if activation == "relu":
            dZ = relu_backward(dA, activation_cache)
            dA_prev, dW, db = self.linear_backward(dZ, linear_cache)

        elif activation == "sigmoid":
            dZ = sigmoid_backward(dA, activation_cache)
            dA_prev, dW, db = self.linear_backward(dZ, linear_cache)

        return dA_prev, dW, db
    
    def linear_backward(self, dZ, cache):
        """
        Implements the linear portion of backward propagation for a single layer (layer l)

        Arguments:
        dZ : numpy array -> Gradient of the cost with respect to the linear output (of current layer l)
        cache : tuple of values (A_prev, W, b) -> it comes from the forward propagation in the current layer

        Returns:
        dA_prev : numpy array -> Gradient of the cost with respect to the activation (of the previous layer l-1), same shape as A_prev
        dW : numpy array -> Gradient of the cost with respect to W (current layer l), same shape as W
        db : numoy array -> Gradient of the cost with respect to b (current layer l), same shape as b
        """
        A_prev, W, b = cache
        m = A_prev.shape[1]

        dW = 1/m * np.dot(dZ, A_prev.T)
        db = 1/m * np.sum(dZ, axis = 1, keepdims = True)
        dA_prev = np.dot(W.T,dZ)

        assert (dA_prev.shape == A_prev.shape)
        assert (dW.shape == W.shape)
        assert (db.shape == b.shape)

        return dA_prev, dW, db
    
    def update_parameters(self, parameters, grads, learning_rate):
        """
        Updates the parameters using gradient descent. 

        Arguments:
        parameters : python dictionary -> the current model parameters (weights and biases for each neuron of each layer) 
        grads : python dictionary -> contains the calculated gradients, output of L_model_backward

        Returns:
        parameters : python dictionary -> the updated  model parameters
        """

        L = len(parameters) // 2 # number of layers in the neural network

        # Update rule for each parameter
        for l in range(L):
            parameters["W" + str(l+1)] = parameters["W" + str(l+1)] - learning_rate * grads["dW" + str(l + 1)]
            parameters["b" + str(l+1)] = parameters["b" + str(l+1)] - learning_rate * grads["db" + str(l + 1)]
        return parameters
    
    def fit(self, X, Y, learning_rate = 0.0075, epochs = 3000, print_cost=False, print_graph=False):
        """
        The classic fit functions. It performs all the steps required to train the model: first the forward propagation,
        later the cost computation, then the backward propagation and finally the weights updating via the classic
        gradient descend.

        Arguments:
        X : numpy array  (shape (Number of features, number of examples)) -> data matrix in which each column represent a training example
        Y : numoy array(shape (1, number of examples)) -> true "label" vector (each columns is the label of each example)
        learning_rate : float -> learning rate of the gradient descent update rule
        epochs : int -> number of iterations of the optimization loop
        print_cost : boolean -> if True, it prints the cost every 100 steps
        print_graph : boolean -> if True, it prints the cost graph at the end of the training process        
        
        Returns:
        parameters : python dictionary -> the learned  model parameters
        """

        costs = []                         # keep track of cost
        
        # Loop (gradient descent)
        for i in range(0, epochs+1):

            # Forward propagation
            AL, caches = self.L_model_forward(X, self.parameters)

            # Compute cost
            cost = self.compute_cost(AL, Y)
            if i % 100 == 0:
                costs.append(cost)

            # Backward propagation
            grads = self.L_model_backward(AL, Y, caches)

            # Update parameters
            self.parameters = self.update_parameters(self.parameters, grads, learning_rate)

            # Print the cost every 100 training example
            if print_cost and i % 100 == 0:
                print ("Cost after iteration %i: %f" %(i, cost))

        # plot the cost
        if print_graph:
            plt.rcParams['figure.figsize'] = (5.0, 4.0) # set default size of plots
            plt.plot(np.squeeze(costs))
            plt.ylabel('cost')
            plt.xlabel('iterations (per hundreds)')
            plt.title("Learning rate =" + str(learning_rate))
            plt.show()
        
    def predict(self, X):
        """
        This function is used to predict the label of a the given examples. It exploits the weight calculated during the fit.

        Arguments:
         X : numpy array  (shape (Number of features, number of examples to predict)) -> data matrix in which each column represent an example to predict

        Returns:
        p : numpy array (shape (1, number of examples)) -> the predictions for the given dataset X
        """

        m = X.shape[1]
        #n = len(self.parameters) // 2 # number of layers in the neural network
        p = np.zeros((1,m))

        # Forward propagation
        probas, caches = self.L_model_forward(X, self.parameters)


        # convert probas to 0/1 predictions
        for i in range(0, probas.shape[1]):
            if probas[0,i] > 0.5:
                p[0,i] = 1
            else:
                p[0,i] = 0

        return p
        
    

# Load of the catvnocat dataset, train and test fo the network

In [None]:
train_x, train_y, test_x, test_y, classes = load_cat_dataset()

In [None]:
model = Neural_Network([12288, 20, 7, 5]) #model initialization
model.fit(train_x, train_y, epochs = 2500, print_cost = True, print_graph = True) #model fit

In [None]:
print("Train Accuracy = " + str(getAccuracy(model.predict(train_x),train_y)))
print("Test Accuracy = " + str(getAccuracy(model.predict(test_x),test_y)))

# Test on custom images

In order to test on custom image, you can add an image in the "images" folder. Then just set the "my_image" variable with the image name and run the cell

In [None]:
from matplotlib.pyplot import imread
from PIL import Image

my_image = "6.jpg" # change this to the name of your image file 

fname = "images/" + my_image
image = np.array(imread(fname))

my_image = np.array(Image.fromarray(image).resize(size=(64,64))).reshape((64*64*3,1))
my_image = my_image/255.

my_predicted_image = model.predict(my_image)

plt.imshow(image)
print ("y = " + str(np.squeeze(my_predicted_image)) + ", your " + str(model.dimensions) + "-layer model predicts a \"" + classes[int(np.squeeze(my_predicted_image)),].decode("utf-8") +  "\" picture.")

# Indian dataset

In [None]:
train_x_orig, train_y, test_x_orig, test_y = load_indian_dataset(print_stats = True)

In [None]:
diabet_model = Neural_Network([8, 6, 4, 2])
diabet_model.fit(train_x_orig, train_y, epochs = 80000, print_cost = True, learning_rate = 0.0035, print_graph = True)
print("Train Accuracy = " + str(getAccuracy(diabet_model.predict(train_x_orig), train_y)))
print("Test Accuracy = " + str(getAccuracy(diabet_model.predict(test_x_orig), test_y)))

##### Learning rate too high?
From the cost graph we could notice that the larning rate is too high so we can try to decrease it, increasing the 
number of epochs in order to get similar or better results. 

In [None]:
diabet_model = Neural_Network([8, 6, 4, 2])
diabet_model.fit(train_x_orig, train_y, epochs = 140000, print_cost = True, learning_rate = 0.0015, print_graph = True)
print("Train Accuracy = " + str(getAccuracy(diabet_model.predict(train_x_orig), train_y)))
print("Test Accuracy = " + str(getAccuracy(diabet_model.predict(test_x_orig), test_y)))

##### A slight improvement
Decreasing the learning rate and increasing the epoch number doesn't affect the accuracy on training set, but showed a slight improvement in test set accuracy. However better performance could be achieved in both the model (catvnoncat and indian-diabet) normalizing data and fine tuning the hyperparameter.