In [1]:
import numpy as np
import matplotlib.pyplot as plt
import copy

In [2]:
def sigmoid(x):
    s= 1 /(1+np.exp(-x))
    return s


In [8]:
def initialize_with_zeros(dim):
    w = np.random.randn((dim,1)) * 0.01
    b= 0.0
    return w, b #w is weights and b is bias intialized to arbitrary values i.e 0


In [4]:
def propagate(w, b, X, Y):

    '''
    w -- weights, a numpy array of size (num_px * num_px * 3, 1)
    b -- bias, a scalar
    X -- data of size (num_px * num_px * 3, number of examples)
    Y -- true "label" vector (containing 0, 1 depeding on true values ) of size (1, number of examples)
    '''
    
    m = X.shape[1] #number of examples

    #forward propagation
    #dw and db are the derivatives of cost function with respect to w and b
    #these are calculated using chain rule of partial differentiation
    A = sigmoid((np.dot(w.transpose(),X))+b)
    dZ = A - Y
    dw = (1/m)*np.dot(X,dZ.transpose())
    db = (1/m)*np.sum(dZ)

    #backward propagation
    #cost function is calculated using log likelihood function j = -1/m * sum(ylog(a) + (1-y)log(1-a))
    j = np.dot(Y,np.log(A).transpose()) + np.dot((1-Y),np.log(1-A).transpose())
    cost = (-1/m)*np.sum(j)

    cost = np.squeeze(np.array(cost))
    grads = {"dw": dw,
             "db": db}
    return grads, cost


In [5]:
def optimize(w, b, X, Y, num_iterations=2000, learning_rate=0.5, print_cost=False):

    w = copy.deepcopy(w)
    b = copy.deepcopy(b)
    costs = []

    #run the forward and backward propagation for some number of iterations to update gradiend descent
    for i in range(num_iterations):
        #run the propogation
        grads,cost = propagate(w,b,X,Y)
        dw = grads["dw"]
        db = grads["db"]
        #these are values of d weights and d bias for for one loop of grad desc

        #update the weights and bias using learning rate and derivatives. this is gradient descent implemented
        w = w- learning_rate*dw
        b = b - learning_rate*db

        if i % 100 == 0:
            costs.append(cost)
        
            # Print the cost every 100 training iterations
            if print_cost:
                print ("Cost after iteration %i: %f" %(i, cost))
    
    #here the w and b has been learned from data.
    params = {"w": w,
              "b": b}
    
    grads = {"dw": dw,
             "db": db}
    
    return params, grads, costs

In [6]:
def predict(w, b, X):

    #this method will be used to predict after the model has been trained
    m = X.shape[0]
    Y_prediction = np.zeros((1, m))
    w = w.reshape(X.shape[0], 1)

    #using the w and b as learned from training data, we predict the values of Y i.e Y' = sigmoid(w.transpose()*X+b)
    A = sigmoid(np.dot(w.transpose(),X)+b)
    Y_prediction = A.round() #rounding the values to 0 or 1
    return Y_prediction


In [7]:
def load_dataset():
    X_train = np.array([[1,1],
                        [1,2],
                        [1,3],
                        [1,4],
                        [2,1],
                        [3,1],
                        [4,1],
                        [2,4],
                        [4,2],
                        [3,3],
                        [1,5],
                        [5,1],
                        [2,2],
                        [3,2],
                        [2,3],
                        [2.5,2.5],
                        [2.5,3],
                        [3,2.5],
                        [2.1,2.6]


                        ])
    Y_train = [0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1]

    