In [1]:
import numpy as np 

In [2]:
def sigmoid(x) :  
    return 1/(1 + np.exp(-x))

In [3]:
x = np.array([1,2,3])

In [4]:
print(sigmoid(x))

[0.73105858 0.88079708 0.95257413]


In [5]:
def sigmoid_gradient(x) :
    s = sigmoid(x)
    ds = s*(1-s)
    return ds

In [6]:
print(sigmoid_gradient(x))

[0.19661193 0.10499359 0.04517666]


In [8]:
# inputs a image outputs a vector of shape l*h*3,1

def image2vector(image) : 
    
    """
    input : numpy array of shape (length,height,depth)
    
    output : vector shape (length * height * depth,1)
    """
    
    return image.reshape(image.shape[0]*image.shape[1]*image.shape[2],1)
    

In [10]:
# This is a 3 by 3 by 2 array, typically images will be (num_px_x, num_px_y,3) where 3 represents the RGB values
image = np.array([[[ 0.67826139,  0.29380381],
        [ 0.90714982,  0.52835647],
        [ 0.4215251 ,  0.45017551]],

       [[ 0.92814219,  0.96677647],
        [ 0.85304703,  0.52351845],
        [ 0.19981397,  0.27417313]],

       [[ 0.60659855,  0.00533165],
        [ 0.10820313,  0.49978937],
        [ 0.34144279,  0.94630077]]])

print ("image2vector(image) = " + str(image2vector(image)))



image2vector(image) = [[0.67826139]
 [0.29380381]
 [0.90714982]
 [0.52835647]
 [0.4215251 ]
 [0.45017551]
 [0.92814219]
 [0.96677647]
 [0.85304703]
 [0.52351845]
 [0.19981397]
 [0.27417313]
 [0.60659855]
 [0.00533165]
 [0.10820313]
 [0.49978937]
 [0.34144279]
 [0.94630077]]


Another common technique we use in Machine Learning and Deep Learning is to normalize our data. It often leads to a better performance because gradient descent converges faster after normalization. Here, by normalization we mean changing x to $ \frac{x}{\| x\|} $ (dividing each row vector of x by its norm).

For example, if $$x = \begin{bmatrix} 0 &3 &; 4 \\ 2 &; 6 &; 4 \\ \end{bmatrix}\tag{3}$$ then $$\| x\| = np.linalg.norm(x, axis = 1, keepdims = True) = \begin{bmatrix} 5 \\ \sqrt{56} \\ \end{bmatrix}\tag{4} $$and $$ x\_normalized = \frac{x}{\| x\|} = \begin{bmatrix} 0 &; \frac{3}{5} &; \frac{4}{5} \\ \frac{2}{\sqrt{56}} &; \frac{6}{\sqrt{56}} &; \frac{4}{\sqrt{56}} \\ \end{bmatrix}\tag{5}$$ Note that you can divide matrices of different sizes and it works fine: this is called broadcasting and

In [12]:
def normalize_rows(x):

    x_norm = np.linalg.norm(x,ord = 2,axis = 1,keepdims=True)
    x = x/x_norm
    return x

In [15]:
x = np.array([
    [0, 3, 4],
    [1, 6, 4]])
print("normalize_rows(x) = " + str(normalize_rows(x)))

normalize_rows(x) = [[0.         0.6        0.8       ]
 [0.13736056 0.82416338 0.54944226]]


In [16]:
def softmax(x):
    
    x_exp = np.exp(x)
    
    x_sum = np.sum(x_exp,axis = 1,keepdims=True)
    
    return x_exp/x_sum

In [17]:
x = np.array([
    [9, 2, 5, 0, 0],
    [7, 5, 0, 0 ,0]])
print("softmax(x) = " + str(softmax(x)))


softmax(x) = [[9.80897665e-01 8.94462891e-04 1.79657674e-02 1.21052389e-04
  1.21052389e-04]
 [8.78679856e-01 1.18916387e-01 8.01252314e-04 8.01252314e-04
  8.01252314e-04]]


In [18]:
def L1(y_hat,y):
    return np.sum(abs(y-y_hat))
def L2(y_hat,y):
    return np.sum(np.square(y-y_hat))

In [19]:
y_hat = np.array([.9, 0.2, 0.1, .4, .9])
y = np.array([1, 0, 0, 1, 1])

print(L1(y_hat,y))
print(L2(y_hat,y))

1.1
0.43
