In [18]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer

In [171]:
def init_params(layer_dims):
    np.random.seed(3)
    return [np.random.randn(d+1,1) for d in layer_dims]

In [140]:
init_params([5,6])

[array([[ 1.78862847,  0.43650985],
        [ 0.09649747, -1.8634927 ],
        [-0.2773882 , -0.35475898],
        [-0.08274148, -0.62700068],
        [-0.04381817, -0.47721803]]),
 array([[-1.31386475,  0.88462238],
        [ 0.88131804,  1.70957306],
        [ 0.05003364, -0.40467741],
        [-0.54535995, -1.54647732],
        [ 0.98236743, -1.10106763],
        [-1.18504653, -0.2056499 ]])]

In [98]:
def sigmoid(Z):
    A = 1/(1+np.exp(np.dot(-1, Z)))
    cache = (Z)
    
    return A, cache

In [123]:
def forward_prop(X, params):
    
    A = X # input to first layer i.e. training data
    caches = []
    L = len(params)//2
    for l in range(0, L):
        A_prev = A
    
        # Linear Hypothesis
        Z = np.dot(params['W'+str(l)], A_prev) + params['b'+str(l)] 
        print(Z,"\n","-"*30)
        # Storing the linear cache
        linear_cache = (A_prev, params['W'+str(l)], params['b'+str(l)]) 
        
        # Applying sigmoid on linear hypothesis
        A, activation_cache = sigmoid(Z) 
        print("A",A,activation_cache)
         # storing the both linear and activation cache
        cache = (linear_cache, activation_cache)
        caches.append(cache)
    
    return A, caches

In [124]:
def cost_function(A, Y):
    m = Y.shape[0]
    cost = (-1/m)*(np.dot(np.log(A), Y.T) + np.dot(log(1-A), 1-Y.T)) 
    return cost

In [125]:
def backprop(AL, Y, caches):
    grads = {}
    L = len(caches)
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)
    
    dAL = -(np.divide(Y, AL) - np.divide(1-Y, 1-AL))
    
    current_cache = caches[L-1]
    grads['dA'+str(L-1)], grads['dW'+str(L-1)], grads['db'+str(L-1)] = one_layer_backward(dAL, current_cache)
    
    for l in reversed(range(L-1)):
        
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = one_layer_backward(grads["dA" + str(l+1)], current_cache)
        grads["dA" + str(l)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp
        
    return grads

In [126]:
def update_parameters(parameters, grads, learning_rate):
    L = len(parameters) // 2
    
    for l in range(L):
        parameters['W'+str(l+1)] = parameters['W'+str(l+1)] -learning_rate*grads['W'+str(l+1)]
        parameters['b'+str(l+1)] = parameters['b'+str(l+1)] -  learning_rate*grads['b'+str(l+1)]
        
    return parameters

In [127]:
def train(X, Y, layer_dims, epochs, lr):
    params = init_params(layer_dims)
    print(params)
    cost_history = []
    for i in range(epochs):
        Y_hat, caches = forward_prop(X, params)
        print(Y_hat)
        cost = cost_function(Y_hat, Y)
        cost_history.append(cost)
        grads = backprop(Y_hat, Y, caches)
        params = update_parameters(params, grads, lr)
    return params, cost_history

In [128]:
data=load_breast_cancer()
X=data["data"]
Y=data["target"]

In [129]:
train(X,Y,[30,8],2,0.03)

{'W0': array([[ 0.01788628,  0.0043651 ],
       [ 0.00096497, -0.01863493],
       [-0.00277388, -0.00354759],
       [-0.00082741, -0.00627001],
       [-0.00043818, -0.00477218],
       [-0.01313865,  0.00884622],
       [ 0.00881318,  0.01709573],
       [ 0.00050034, -0.00404677],
       [-0.0054536 , -0.01546477],
       [ 0.00982367, -0.01101068],
       [-0.01185047, -0.0020565 ],
       [ 0.01486148,  0.00236716],
       [-0.01023785, -0.00712993],
       [ 0.00625245, -0.00160513],
       [-0.00768836, -0.00230031],
       [ 0.00745056,  0.01976111],
       [-0.01244123, -0.00626417],
       [-0.00803766, -0.02419083],
       [-0.00923792, -0.01023876],
       [ 0.01123978, -0.00131914],
       [-0.01623285,  0.00646675],
       [-0.00356271, -0.01743141],
       [-0.0059665 , -0.00588594],
       [-0.00873882,  0.00029714],
       [-0.02248258, -0.00267762],
       [ 0.01013183,  0.00852798],
       [ 0.01108187,  0.01119391],
       [ 0.01487543, -0.01118301],
       [ 0.00

ValueError: shapes (30,2) and (569,30) not aligned: 2 (dim 1) != 569 (dim 0)

In [95]:
np.random.randn(5,3)

array([[-0.92429374,  0.88130113,  0.55644294],
       [ 0.74689153, -0.34836832, -1.81101917],
       [ 0.95789639,  1.22632133, -1.48331661],
       [ 0.13791604,  1.19325258, -1.07572355],
       [ 1.76768278, -0.34903246, -1.07539011]])

In [110]:
X[0].shape

(30,)

In [111]:
W=np.random.randn(30,1)

In [160]:
X_=np.ones((X.shape[0],X.shape[1]+1))
X_[:,1:]=X

In [159]:
%timeit np.hstack((np.ones((X.shape[0],1)),X))

18.1 µs ± 2.52 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [172]:
W=init_params([30,8])
# for i in X_.dot(W[0]):
#     print(i)
X_[0].dot(W[0])

array([-2475.17375334])

In [170]:
W[0].shape

(31, 2)

In [173]:
W

[array([[ 1.78862847],
        [ 0.43650985],
        [ 0.09649747],
        [-1.8634927 ],
        [-0.2773882 ],
        [-0.35475898],
        [-0.08274148],
        [-0.62700068],
        [-0.04381817],
        [-0.47721803],
        [-1.31386475],
        [ 0.88462238],
        [ 0.88131804],
        [ 1.70957306],
        [ 0.05003364],
        [-0.40467741],
        [-0.54535995],
        [-1.54647732],
        [ 0.98236743],
        [-1.10106763],
        [-1.18504653],
        [-0.2056499 ],
        [ 1.48614836],
        [ 0.23671627],
        [-1.02378514],
        [-0.7129932 ],
        [ 0.62524497],
        [-0.16051336],
        [-0.76883635],
        [-0.23003072],
        [ 0.74505627]]),
 array([[ 1.97611078],
        [-1.24412333],
        [-0.62641691],
        [-0.80376609],
        [-2.41908317],
        [-0.92379202],
        [-1.02387576],
        [ 1.12397796],
        [-0.13191423]])]

In [208]:
def init_params_f(layer_dims):
    np.random.seed(3)
    params = {}
    L = len(layer_dims)
    
    for l in range(1, L):
        params['W'+str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1])*0.01
        params['b'+str(l)] = np.zeros((layer_dims[l], 1))
    return params  

In [231]:
params=init_params_f([30,8])

In [258]:
def init_params(layer_dims):
    np.random.seed(3)
    return [np.random.randn(layer_dims[dims-1]+1,layer_dims[dims]) for dims in range(1,len(layer_dims))]


In [259]:
W=init_params([30,8])

In [274]:
params["W1"].shape

(8, 30)

In [261]:
len(params["W1"])

8

In [266]:
len(X_.dot(W[0])[0])

8

In [276]:
W[0].shape

(31, 8)

In [281]:
X.dot(params["W1"])

ValueError: shapes (569,30) and (8,30) not aligned: 30 (dim 1) != 8 (dim 0)

In [282]:
np.dot(params["W1"],X)

ValueError: shapes (8,30) and (569,30) not aligned: 30 (dim 1) != 569 (dim 0)

In [283]:
def init_params(layer_dims):
    np.random.seed(3)
    params = {}
    L = len(layer_dims)
    
    for l in range(1, L):
        params['W'+str(l)] = np.random.randn(layer_dims[l], layer_dims[l-1])*0.01
        params['b'+str(l)] = np.zeros((layer_dims[l], 1))
        
    return params
def sigmoid(Z):
    A = 1/(1+np.exp(np.dot(-1, Z)))
    cache = (Z)
    
    return A, cache
def forward_prop(X, params):
    
    A = X # input to first layer i.e. training data
    caches = []
    L = len(params)//2
    for l in range(1, L+1):
        A_prev = A
        
        # Linear Hypothesis
        Z = np.dot(params['W'+str(l)], A_prev) + params['b'+str(l)] 
        
        # Storing the linear cache
        linear_cache = (A_prev, params['W'+str(l)], params['b'+str(l)]) 
        
        # Applying sigmoid on linear hypothesis
        A, activation_cache = sigmoid(Z) 
        
         # storing the both linear and activation cache
        cache = (linear_cache, activation_cache)
        caches.append(cache)
    
    return A, caches
def cost_function(A, Y):
    m = Y.shape[1]
    
    cost = (-1/m)*(np.dot(np.log(A), Y.T) + np.dot(log(1-A), 1-Y.T)) 
    
    return cost
def one_layer_backward(dA, cache):
    linear_cache, activation_cache = cache
    
    Z = activation_cache
    dZ = dA*sigmoid(Z)*(1-sigmoid(Z)) # The derivative of the sigmoid function
    
    A_prev, W, b = linear_cache
    m = A_prev.shape[1]
    
    dW = (1/m)*np.dot(dZ, A_prev.T)
    db = (1/m)*np.sum(dZ, axis=1, keepdims=True)
    dA_prev = np.dot(W.T, dZ)
    
    return dA_prev, dW, db
def backprop(AL, Y, caches):
    grads = {}
    L = len(caches)
    m = AL.shape[1]
    Y = Y.reshape(AL.shape)
    
    dAL = -(np.divide(Y, AL) - np.divide(1-Y, 1-AL))
    
    current_cache = caches[L-1]
    grads['dA'+str(L-1)], grads['dW'+str(L-1)], grads['db'+str(L-1)] = one_layer_backward(dAL, current_cache)
    
    for l in reversed(range(L-1)):
        
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = one_layer_backward(grads["dA" + str(l+1)], current_cache)
        grads["dA" + str(l)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp
        
    return grads
def update_parameters(parameters, grads, learning_rate):
    L = len(parameters) // 2
    
    for l in range(L):
        parameters['W'+str(l+1)] = parameters['W'+str(l+1)] -learning_rate*grads['W'+str(l+1)]
        parameters['b'+str(l+1)] = parameters['b'+str(l+1)] -  learning_rate*grads['b'+str(l+1)]
        
    return parameters
def train(X, Y, layer_dims, epochs, lr):
    params = init_params(layer_dims)
    cost_history = []
    
    for i in range(epochs):
        Y_hat, caches = forward_prop(X, params)
        cost = cost_function(Y_hat, Y)
        cost_history.append(cost)
        grads = backprop(Y_hat, Y, caches)
        
        params = update_parameters(params, grads, lr)
        
        
    return params, cost_history
train(X,Y,[30,8],2,0.03)

ValueError: shapes (8,30) and (569,30) not aligned: 30 (dim 1) != 569 (dim 0)

In [294]:
[(np.exp(i*2)-1)/(np.exp(i*2)+1) for i in range(-10,10)]

[-0.9999999958776927,
 -0.999999969540041,
 -0.9999997749296758,
 -0.9999983369439446,
 -0.9999877116507956,
 -0.9999092042625951,
 -0.9993292997390671,
 -0.9950547536867306,
 -0.9640275800758168,
 -0.7615941559557649,
 0.0,
 0.7615941559557649,
 0.9640275800758169,
 0.9950547536867305,
 0.999329299739067,
 0.9999092042625951,
 0.9999877116507956,
 0.9999983369439447,
 0.9999997749296758,
 0.999999969540041]

In [295]:
[np.tanh(i) for i in range(-10,10)]

[-0.9999999958776927,
 -0.9999999695400409,
 -0.9999997749296758,
 -0.9999983369439447,
 -0.9999877116507956,
 -0.9999092042625951,
 -0.999329299739067,
 -0.9950547536867305,
 -0.9640275800758169,
 -0.7615941559557649,
 0.0,
 0.7615941559557649,
 0.9640275800758169,
 0.9950547536867305,
 0.999329299739067,
 0.9999092042625951,
 0.9999877116507956,
 0.9999983369439447,
 0.9999997749296758,
 0.9999999695400409]