# Vectorization

In [1]:
import numpy as np

In [22]:
a = np.array([1,2,3,4,5,6,7,8,9,10]) #rank 1 array which is neither a row vector (1,10) nor a column vector (10, 1)
b = np.array([1,2,3,4,5,6,7,8,9,10]).reshape((1,10))
c = np.array([1,2,3,4,5,6,7,8,9,10]).reshape((10, 1))
print(a.shape, b.shape, c.shape)
print(np.array_equal(a, b), np.array_equal(a, c))
print((a*b).shape, (a*c).shape, (b*c).shape)
print(np.dot(b,c).shape, np.dot(c,b).shape)

(10,) (1, 10) (10, 1)
False False
(1, 10) (10, 10) (10, 10)
(1, 1) (10, 10)


In [7]:
import time
a = np.random.rand(1000000)
b = np.random.rand(1000000)

tic = time.time()
c = np.dot(a,b)
toc = time.time()
print(c)
print("Vectorized version:" + str(1000*(toc-tic)) +"ms" )

c = 0
tic = time.time()
for i in range(1000000):
    c += a[i]*b[i]
toc = time.time()
print(c)
print("For loop:" + str(1000*(toc-tic)) + "ms")

249966.59497044512
Vectorized version:1.7580986022949219ms
249966.59497044657
For loop:556.1151504516602ms


In [14]:
print(np.zeros(10).shape)
print(np.zeros((10, 0)).shape)
print(np.zeros((0,10)).shape)

(10,)
(10, 0)
(0, 10)


# Vectorized version of Logistic Regression

There are three for loops:
* The outmost for loop is for the **i iterations**
    * second for loop is for the **m training examples**
        * third for loop is over the **nx weights**

We can vectorize second and third for loops this way:

In [83]:
nx, m = 10, 1000 
X = np.random.rand(nx*m).reshape((nx, m))
y = np.random.randint(low=0, high=2, size=m).reshape((1, m))

iterations = 100
alpha = 0.5
w = np.random.rand(nx).reshape((nx, 1))
b = np.random.rand(1).reshape(1,1)
for i in range(iterations):
    z = np.dot(w.T, X) + b #z.shape()= (1,m) 
    # np.dot(w.T, X) + b has broadcasting for b: (1,1) --> (1,m)
    a = 1/(1+np.exp((-1)*z)) #a.shape() = (1,m)
    J = -1/m*np.sum(y*np.log(a)+(1-y)*np.log(1-a), axis=1) #L.shape(1,m)
    print(J)
    dz = a - y #dz.shape(1,m)
    dw = 1/m * np.dot(X, dz.T) #dw.shape(nx, 1)
    db = 1/m * np.sum(dz)
    w = w - alpha * dw
    b = b - alpha * db
    

[1.41986636]
[1.10393502]
[0.88655215]
[0.76997461]
[0.72108426]
[0.7036203]
[0.69777363]
[0.69580776]
[0.69509335]
[0.69477913]
[0.69459369]
[0.69445083]
[0.6943232]
[0.6942021]
[0.69408473]
[0.69397018]
[0.69385811]
[0.6937484]
[0.69364097]
[0.69353576]
[0.69343272]
[0.6933318]
[0.69323296]
[0.69313616]
[0.69304135]
[0.6929485]
[0.69285755]
[0.69276848]
[0.69268124]
[0.69259579]
[0.69251209]
[0.69243012]
[0.69234982]
[0.69227117]
[0.69219414]
[0.69211868]
[0.69204477]
[0.69197237]
[0.69190145]
[0.69183199]
[0.69176394]
[0.69169728]
[0.69163198]
[0.69156802]
[0.69150535]
[0.69144397]
[0.69138383]
[0.69132492]
[0.69126721]
[0.69121066]
[0.69115527]
[0.691101]
[0.69104783]
[0.69099574]
[0.6909447]
[0.6908947]
[0.6908457]
[0.6907977]
[0.69075066]
[0.69070458]
[0.69065942]
[0.69061517]
[0.69057181]
[0.69052932]
[0.69048769]
[0.69044689]
[0.69040691]
[0.69036773]
[0.69032934]
[0.69029171]
[0.69025483]
[0.69021869]
[0.69018327]
[0.69014856]
[0.69011454]
[0.69008119]
[0.69004851]
[0.69001647

# Building a neural network with arbitrary activation function and hidden layers

**Reminder**: The general methodology to build a Neural Network is to:
    1. Define the neural network structure ( # of input units,  # of hidden units, etc). 
    2. Initialize the model's parameters
    3. Loop:
        - Implement forward propagation
        - Compute loss
        - Implement backward propagation to get the gradients
        - Update parameters (gradient descent)

In [144]:
# activation functions with derivatives
sigmoid = lambda z, derivative=False: 1/(1 + np.exp(-z)) if not derivative else np.exp(-z)/(1+np.exp(-z))**2
tanh = lambda z, derivative=False: np.tanh(z) if not derivative else 1 - np.tanh(z)**2

# now it works with multiple unit output layer
loss = lambda yhat, y, derivative=False: (-1) * (y * np.log(yhat) + (1-y) * np.log(1-yhat)) if not derivative else\
                                        -y/yhat + (1-y)/(1-yhat)
cost = lambda yhat, y: 1/m * np.sum(loss(yhat, y), axis=1)

m = 100 # number of examples
n = [4, 3, 1] # n[0] is the size of the input layer
L = len(n)-1 # actual number of layers are len(n)-1
G = [None, tanh, sigmoid] # using separate functions for each layer

Y = np.random.randint(low=0, high=2, size=(1,m))
X = np.random.rand(n[0], m) # layer zero activation

A = [X] + [np.zeros((n[l], m)) for l in range(1, L+1)] # for layer l we have n[l] activations per example
W = [None] + [np.random.randn(n[l], n[l-1])*0.01 for l in range(1, L+1)] # for each layer w.T= W[l]: (n[l], n[l-1])
b = [None] + [np.zeros((n[l], 1)) for l in range(1, L+1)] # each unit has only one b but n[l-1] ws

Z = [None] + [np.zeros((n[l], m)) for l in range(1, L+1)]
dZ = [None] + [np.zeros((n[l], m)) for l in range(1, L+1)]
dW = [None] + [np.zeros((n[l], n[l-1])) for l in range(1, L+1)]
db = [None] + [np.zeros((n[l], 1)) for l in range(1, L+1)]

alpha = 0.1
iterations = 10
for i in range(iterations):
    
    # forward propagation from layer 1 to layer L
    for l in range(1, L+1):
        Z[l] = np.dot(W[l], A[l-1]) + b[l]
        A[l] = G[l](Z[l]) # applying activation function for layer l on Z[l]
    
    print(cost(A[L], Y))
        
    # backward propagation from layer L to layer 1    
    for l in range(L, 0, -1): # from L <= l <= 1         
        if l == L:              
            dZ[L] = loss(A[L], Y, derivative=True) * G[L](Z[L], derivative=True)            
        else:
            dZ[l] = np.dot(W[l+1].T, dZ[l+1]) * G[l](Z[l], derivative=True)
        dW[l] = 1/m * np.dot(dZ[l], A[l-1].T)
        db[l] = 1/m * np.sum(dZ[l], axis=1, keepdims=True)
        
        W[l] = W[l] - alpha * dW[l]
        b[l] = b[l] - alpha * db[l]
        
    

[0.69315259]
[0.6923522]
[0.69159134]
[0.69086806]
[0.6901805]
[0.68952688]
[0.68890554]
[0.68831486]
[0.68775333]
[0.68721951]
