In [1]:
import numpy as np
%load_ext autoreload
%autoreload 2

In [38]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [39]:
print(sigmoid(0))

0.5


In [40]:
# The 4 training examples by columns
X = np.array([[0, 1, 0, 1], [0, 0, 1, 1]])

# The outputs of the XOR for every example in X
Y = np.array([[0, 1, 1, 0]])

# No. of training examples
m = X.shape[1]
# Set the hyperparameters
n_x = 2     #No. of neurons in first layer
n_h = 2     #No. of neurons in hidden layer
n_y = 1     #No. of neurons in output layer
num_of_iters = 1000
learning_rate = 0.3
print("no_of_training_examples="+str(m))

no_of_training_examples=4


one hidden layer neural network

In [41]:
def initialize_parameters(n_x,n_h,n_y):
    
    W1=np.random.randn(n_h,n_x)
    b1=np.zeros((n_h,1),np.float32)
    W2=np.random.randn(n_y,n_h)
    b2=np.zeros((n_y,1),np.float32)
    
    assert(W1.shape == (n_h, n_x))
    assert(b1.shape == (n_h, 1))
    assert(W2.shape == (n_y, n_h))
    assert(b2.shape == (n_y, 1))
    
    parameters={
       "W1": W1,
       "b1": b1,
       "W2": W2,
       "b2": b2
    }
    
    return parameters

In [42]:
parameters=initialize_parameters(n_x,n_h,n_y)
print("W1=="+str(parameters["W1"]))
print("b1=="+str(parameters["b1"]))
print("W2=="+str(parameters["W2"]))
print("b2=="+str(parameters["b2"]))

W1==[[ 1.08021366 -2.03440106]
 [-1.55257976 -0.45672883]]
b1==[[0.]
 [0.]]
W2==[[ 0.26141192 -0.80545721]]
b2==[[0.]]


First activation function tanh
Second activation function sigmoid

In [43]:
def forward_prop(X,parameters):
    W1=parameters["W1"]
    b1=parameters["b1"]
    W2=parameters["W2"]
    b2=parameters["b2"]
    
    Z1=np.dot(W1,X)+b1
    A1=np.tanh(Z1)
    
    Z2=np.dot(W2,A1)+b2
    A2=sigmoid(Z2)
    
    cache={
        "Z1":Z1,
        "A1":A1,
        "Z2":Z2,
        "A2":A2,
    }
    
    return A2,cache

In [44]:
A2,cache=forward_prop(X,parameters)

print(np.mean(cache['Z1']) ,np.mean(cache['A1']),np.mean(cache['Z2']),np.mean(cache['A2']))

-0.7408739978140908 -0.4026347008697302 0.40462408239445136 0.5961332381541158


In [45]:
def compute_cost(A2,Y):
    m=Y.shape[1] # number of training examples
    
    cost=-(1/m)*np.sum(np.multiply(Y,np.log(A2))+np.multiply(1-Y,np.log(1-A2)))
    
    cost = np.squeeze(cost)
    
    return cost    

In [46]:
cost=compute_cost(A2,Y)
print("cost=="+str(cost))

cost==0.6742145045357022


In [47]:
def backward_prop(X,Y,parameters,cache):
    W1=parameters["W1"]
    b1=parameters["b1"]
    W2=parameters["W2"]
    b2=parameters["b2"]
    
    A1=cache["A1"]
    Z1=cache["Z1"]
    A2=cache["A2"]
    Z2=cache["Z2"]
    
    m=Y.shape[1]
    
    dZ2=A2-Y
    dW2=(1/m)*np.dot(dZ2,A1.T)
    db2=(1/m)*np.sum(dZ2,axis=1,keepdims=True)
    
    dZ1 = np.multiply(np.dot(W2.T, dZ2), 1-np.power(A1, 2))
    dW1=(1/m)*np.dot(dZ1,X.T)
    db1=(1/m)*np.sum(dZ1,axis=1,keepdims=True)
    
    grads = {"dW1": dW1,
             "db1": db1,
             "dW2": dW2,
             "db2": db2}
        
    return grads

In [48]:
grads=backward_prop(X,Y,parameters,cache)
print("dW1=="+str(grads["dW1"]))
print("db1=="+str(grads["db1"]))
print("dW2=="+str(grads["dW2"]))
print("db1=="+str(grads["db2"]))


dW1==[[0.01208376 0.01680952]
 [0.00029598 0.06955339]]
db1==[[ 0.04269888]
 [-0.02186484]]
dW2==[[-0.05929137 -0.0397706 ]]
db1==[[0.09613324]]


In [49]:
def update_parameters(parameters,grads,learning_rate):
    W1=parameters["W1"]
    b1=parameters["b1"]
    W2=parameters["W2"]
    b2=parameters["b2"]
    
    dW1 = grads["dW1"]
    db1 = grads["db1"]
    dW2 = grads["dW2"]
    db2 = grads["db2"]
    
    W1=W1-learning_rate*dW1
    b1=b1-learning_rate*db1
    
    W2=W2-learning_rate*dW2
    b2=b2-learning_rate*db2
    
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    
    return parameters

In [50]:
parameters = update_parameters(parameters, grads,learning_rate)

print("W1 = " + str(parameters["W1"]))
print("b1 = " + str(parameters["b1"]))
print("W2 = " + str(parameters["W2"]))
print("b2 = " + str(parameters["b2"]))

W1 = [[ 1.07658853 -2.03944392]
 [-1.55266855 -0.47759484]]
b1 = [[-0.01280966]
 [ 0.00655945]]
W2 = [[ 0.27919934 -0.79352603]]
b2 = [[-0.02883997]]


In [51]:
def model(X,Y,n_x,n_h,n_y,learning_rate,num_of_iters):
    
    parameters=initialize_parameters(n_x,n_h,n_y)
    
    for i in range(0,num_of_iters+1):
    
            A2,cache=forward_prop(X,parameters)
    
            cost=compute_cost(A2,Y)
    
            grads=backward_prop(X,Y,parameters,cache)
    
            parameters=update_parameters(parameters,grads,learning_rate)
        
            if(i%100==0):
                print("costafter iteration %i:%f"%(i,cost))
        
    
    
    return parameters

In [52]:
def predict(X, parameters):
    
    A2,cache=forward_prop(X,parameters)
    yhat=A2
    yhat=np.squeeze(yhat)
    
    if(yhat>=0.5):
        y_predict=1
    else:
        y_predict=0
        
    return y_predict

In [53]:
trained_parameters = model(X, Y, n_x, n_h,n_y,learning_rate,num_of_iters)

costafter iteration 0:0.862362
costafter iteration 100:0.610073
costafter iteration 200:0.296737
costafter iteration 300:0.097992
costafter iteration 400:0.054189
costafter iteration 500:0.036737
costafter iteration 600:0.027588
costafter iteration 700:0.022011
costafter iteration 800:0.018274
costafter iteration 900:0.015604
costafter iteration 1000:0.013603


In [56]:
X_test = np.array([[0], [1]])
y_predict = predict(X_test, trained_parameters)
# Print the result
print('Neural Network prediction for example ({:d}, {:d}) is {:d}'.format(
    X_test[0][0], X_test[1][0], y_predict))

Neural Network prediction for example (0, 1) is 1
