In [1]:
import numpy as np

## Activation Functions
### ReLU - Rectified Linear Unit
ReLU is used to calculate the values of the hidden layer,

$$z_1 = W_1x + b_1$$
$$h = ReLU\left( z_1 \right)$$

In [2]:
# Define a random seed so all random outcomes can be reproduced
np.random.seed(10)

# Define a 5x1 column vector using numpy
z_1 = 10 * np.random.rand(5, 1) - 5
z_1

array([[ 2.71320643],
       [-4.79248051],
       [ 1.33648235],
       [ 2.48803883],
       [-0.01492988]])

In [3]:
h = z_1.copy()
h < 0

array([[False],
       [ True],
       [False],
       [False],
       [ True]])

In [4]:
h[h < 0] = 0
h

array([[2.71320643],
       [0.        ],
       [1.33648235],
       [2.48803883],
       [0.        ]])

In [5]:
def relu(z):
    result = z.copy()
    result[result < 0] = 0
    return result

In [6]:
# Define a new vector and save it in the 'z' variable
z = np.array([[-1.25459881], [ 4.50714306], [ 2.31993942], [ 0.98658484], [-3.4398136 ]])

# Apply ReLU to it
relu(z)

array([[0.        ],
       [4.50714306],
       [2.31993942],
       [0.98658484],
       [0.        ]])

### Softmax
$$z_2 = W_2h + b_2$$
$$\hat y = softmax\left(z_2 \right)$$

$$softmax(z_2)_i = \frac{e^{z_i}}{\sum\limits^V_{j=1}e^{z_j}}$$ 

In [8]:
# Define a new vector and save it in the 'z' variable
z = np.array([9, 8, 11, 10, 8.5])

# Print the vector
z

array([ 9. ,  8. , 11. , 10. ,  8.5])

In [9]:
# Save exponentials of the values in a new vector
e_z = np.exp(z)

# Print the vector with the exponential values
e_z

array([ 8103.08392758,  2980.95798704, 59874.1417152 , 22026.46579481,
        4914.7688403 ])

In [10]:
# Save the sum of the exponentials
sum_e_z = np.sum(e_z)

# Print sum of exponentials
sum_e_z

97899.41826492078

In [11]:
# Print softmax value of the first element in the original vector
e_z[0]/sum_e_z

0.08276947985173956

In [12]:
# Define the 'softmax' function that will include the steps previously seen
def softmax(z):
    e_z = np.exp(z)
    sum_e_z = np.sum(e_z)
    return e_z / sum_e_z

In [13]:
# Print softmax values for original vector
softmax([9, 8, 11, 10, 8.5])

array([0.08276948, 0.03044919, 0.61158833, 0.22499077, 0.05020223])

In [14]:
# Assert that the sum of the softmax values is equal to 1
np.sum(softmax([9, 8, 11, 10, 8.5])) == 1

True

## Dimensions: 1-D arrays vs 2D column vectors

In [15]:
# Define V. Remember this was the size of the vocabulary in the previous lecture notebook
V = 5

# Define vector of length V filled with zeros
x_array = np.zeros(V)

# Print vector
x_array

array([0., 0., 0., 0., 0.])

In [16]:
# Print vector's shape
x_array.shape

(5,)

In [17]:
# Copy vector
x_column_vector = x_array.copy()

# Reshape copy of vector
x_column_vector.shape = (V, 1)  # alternatively ... = (x_array.shape[0], 1)

# Print vector
x_column_vector

array([[0.],
       [0.],
       [0.],
       [0.],
       [0.]])

In [18]:
# Print vector's shape
x_column_vector.shape

(5, 1)