# 1 Basic numpy functions

## 1.1 sigmoid function, np.exp()

### using math package to build basic sigmoid function

In [2]:
import math

def basic_sigmoid(x):
    return 1 / (1 + math.exp(-x))

print("basic sigmoid: " + str(basic_sigmoid(1)))

basic sigmoid: 0.7310585786300049


In [None]:
x = [1,2,3] # when x is a list
basic_sigmoid(x) # this fails because function expects a scalar

### using numpy package to build sigmoid function

In [3]:
import numpy as np

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

x = np.array([1,2,3])
print("numpy sigmoid: " + str(sigmoid(x))) # function accepts scalar or numpy array

numpy sigmoid: [0.73105858 0.88079708 0.95257413]


## 1.2 Sigmoid Gradient

with mathematical formula  
$$sigmoid\_derivative(x) = \sigma'(x) = \sigma(x) (1 - \sigma(x))\tag{2}$$

In [None]:
def sigmoid_derivative(x):
    s = sigmoid(x)
    return s * (1 - s)

x = np.array([1,2,3])
print("sigmoid derivative: " + str(sigmoid_derivative(x)))


## 1.3 reshaping arrays

`X.shape`: get the shape (dimension) of the matrix/vector X  
`X.reshape()`: reshape the X into some other dimension  

real life example:  
- image is represented by 3 dimensional array $(length, height, depth = 3)$
- when reading the image as input of algorithm, it is converted into vector of shape $(length * height * depth, 1)$
- the 3D input is "unrolled" into 1D vector

In [None]:
def image2vector(image):
    shape = image.shape
    return image.reshape(shape[0] * shape[1] * shape[2], 1)

t_image = np.array([[[ 0.67826139,  0.29380381],
                     [ 0.90714982,  0.52835647],
                     [ 0.4215251 ,  0.45017551]],

                   [[ 0.92814219,  0.96677647],
                    [ 0.85304703,  0.52351845],
                    [ 0.19981397,  0.27417313]],

                   [[ 0.60659855,  0.00533165],
                    [ 0.10820313,  0.49978937],
                    [ 0.34144279,  0.94630077]]])
print("image2vector(image): " + str(image2vector(t_image)))

## 1.4 Normalizing rows

Normalizing data leads to better performance since gradient descent converges faster.  
normalization: changing x to $\frac{x}{\|x\|}$ (dividing each row vector of x by its norm)  

In [None]:
def normalize_rows(x):
    x_norm = np.linalg.norm(x, axis=1, keepdims=True)
    return x / x_norm

x = np.array([[0., 3., 4.],
              [1., 6., 4.]])
print("normalizeRows(x): " + str(normalize_rows(x)))

### Softmax with numpy
- $\text{for } x \in \mathbb{R}^{1\times n} \text{,     }$

\begin{align*}
 softmax(x) &= softmax\left(\begin{bmatrix}
    x_1  &&
    x_2 &&
    ...  &&
    x_n  
\end{bmatrix}\right) \\&= \begin{bmatrix}
    \frac{e^{x_1}}{\sum_{j}e^{x_j}}  &&
    \frac{e^{x_2}}{\sum_{j}e^{x_j}}  &&
    ...  &&
    \frac{e^{x_n}}{\sum_{j}e^{x_j}} 
\end{bmatrix} 
\end{align*}


- $\text{for a matrix } x \in \mathbb{R}^{m \times n} \text{,  $x_{ij}$ maps to the element in the $i^{th}$ row and $j^{th}$ column of $x$, thus we have: }$  

\begin{align*}
softmax(x) &= softmax\begin{bmatrix}
            x_{11} & x_{12} & x_{13} & \dots  & x_{1n} \\
            x_{21} & x_{22} & x_{23} & \dots  & x_{2n} \\
            \vdots & \vdots & \vdots & \ddots & \vdots \\
            x_{m1} & x_{m2} & x_{m3} & \dots  & x_{mn}
            \end{bmatrix} \\ \\&= 
 \begin{bmatrix}
    \frac{e^{x_{11}}}{\sum_{j}e^{x_{1j}}} & \frac{e^{x_{12}}}{\sum_{j}e^{x_{1j}}} & \frac{e^{x_{13}}}{\sum_{j}e^{x_{1j}}} & \dots  & \frac{e^{x_{1n}}}{\sum_{j}e^{x_{1j}}} \\
    \frac{e^{x_{21}}}{\sum_{j}e^{x_{2j}}} & \frac{e^{x_{22}}}{\sum_{j}e^{x_{2j}}} & \frac{e^{x_{23}}}{\sum_{j}e^{x_{2j}}} & \dots  & \frac{e^{x_{2n}}}{\sum_{j}e^{x_{2j}}} \\
    \vdots & \vdots & \vdots & \ddots & \vdots \\
    \frac{e^{x_{m1}}}{\sum_{j}e^{x_{mj}}} & \frac{e^{x_{m2}}}{\sum_{j}e^{x_{mj}}} & \frac{e^{x_{m3}}}{\sum_{j}e^{x_{mj}}} & \dots  & \frac{e^{x_{mn}}}{\sum_{j}e^{x_{mj}}}
\end{bmatrix} \\ \\ &= \begin{pmatrix}
    softmax\text{(first row of x)}  \\
    softmax\text{(second row of x)} \\
    \vdots  \\
    softmax\text{(last row of x)} \\
\end{pmatrix} 
\end{align*}


In [None]:
def softmax(x):
    x_exp = np.exp(x)
    x_sum = np.sum(x_exp, axis=1, keepdims=True)
    s = x_exp / x_sum
    return s

t_x = np.array([[9,2,5,0,0],
                [7,5,0,0,0 ]])
print("softmax(x): " + str(softmax(t_x)))


(2, 5)
(2, 1)
softmax(x): [[9.80897665e-01 8.94462891e-04 1.79657674e-02 1.21052389e-04
  1.21052389e-04]
 [8.78679856e-01 1.18916387e-01 8.01252314e-04 8.01252314e-04
  8.01252314e-04]]


# Vectorization
Vectorization is used to optimize working with very large datasets.  

## 2.1 L1 and L2 loss functions

### L1 loss function
$$\begin{align*} & L_1(\hat{y}, y) = \sum_{i=0}^{m-1}|y^{(i)} - \hat{y}^{(i)}| \end{align*}$$

In [6]:
def L1(yhat, y):
    loss = np.sum(np.abs(yhat - y))
    return loss

yhat = np.array([.9, 0.2, 0.1, .4, .9])
y = np.array([1, 0, 0, 1, 1])
print("L1: " + str(L1(yhat, y)))


L1: 1.1


### L2 loss function
$$\begin{align*} & L_2(\hat{y},y) = \sum_{i=0}^{m-1}(y^{(i)} - \hat{y}^{(i)})^2 \end{align*}$$

In [7]:
def L2(yhat, y):
    loss = np.sum(np.dot(yhat - y, yhat - y))
    return loss

yhat = np.array([.9, 0.2, 0.1, .4, .9])
y = np.array([1, 0, 0, 1, 1])

print("L2:" + str(L2(yhat, y)))


L2:0.43
