In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.activations import linear, relu, sigmoid
%matplotlib widget
import matplotlib.pyplot as plt

from public_tests import * 

from autils import *
from lab_utils_softmax import plt_softmax
np.set_printoptions(precision=2)


The softmax function can be written:
$$a_j = \frac{e^{z_j}}{ \sum_{k=0}^{N-1}{e^{z_k} }} \tag{1}$$

Where $z = \mathbf{w} \cdot \mathbf{x} + b$ and N is the number of feature/categories in the output layer. 

In [None]:
def my_softmax(z):  
    """ Softmax converts a vector of values to a probability distribution.
    Args:
      z (ndarray (N,))  : input data, N features
    Returns:
      a (ndarray (N,))  : softmax of z
    """    
    ### START CODE HERE ### 
    ez = np.exp(z)
    a = ez/np.sum(ez)
    ### END CODE HERE ### 
    return a

In [None]:
z = np.array([1., 2., 3., 4.])
a = my_softmax(z)
atf = tf.nn.softmax(z)
print(f"my_softmax(z):         {a}")
print(f"tensorflow softmax(z): {atf}")

# BEGIN UNIT TEST  
test_my_softmax(my_softmax)

In [None]:
X, y = load_data() # load dataset

In [None]:
print ('The shape of X is: ' + str(X.shape))
print ('The shape of y is: ' + str(y.shape))

In [None]:
# 1 hidden layer and  1 layer output 
def mlp_1Hidden(num_neuron, activation_hidden, activation_output, X, y):
    """ MLP with 1 hidden layer and 1 output layer
    Args:
      num_neuron (int)             : number of neurons in the hidden layer
      activation_hidden (function) : activation function for hidden layer
      activation_output (function) : activation function for output layer
      X (ndarray (N, M))           : input data, N samples, M features
      y (ndarray (N,))             : input labels, N samples
    Returns:
      model (Sequential)           : keras model
    """
    ### START CODE HERE ### 
    model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(input_size,)),
    tf.keras.layers.Dense(output_size, activation='sigmoid')
    ])
    ### END CODE HERE ### 
    return model

In [None]:
# 2 hidden layers and 1 layer output
def mlp_2Hidden(num_neuron_L1, num_neuron_L2, activation_hidden, activation_output, X, y):
    """ MLP with 2 hidden layers and 1 output layer
    Args:
      num_neuron_L1 (int)          : number of neurons in the 1st hidden layer
      num_neuron_L2 (int)          : number of neurons in the 2nd hidden layer
      activation_hidden (function) : activation function for hidden layer
      activation_output (function) : activation function for output layer
      X (ndarray (N, M))           : input data, N samples, M features
      y (ndarray (N,))             : input labels, N samples
    Returns:
      model (Sequential)           : keras model
    """
    ### START CODE HERE ### 
    model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(input_size,)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(output_size, activation='sigmoid')
    ])
    ### END CODE HERE ### 
    return model

In [None]:
# 0 hidden layer and 1 layer output
def mlp_0Hidden(activation_output, X, y):
    """ MLP with 0 hidden layer and 1 output layer
    Args:
      activation_output (function) : activation function for output layer
      X (ndarray (N, M))           : input data, N samples, M features
      y (ndarray (N,))             : input labels, N samples
    Returns:
      model (Sequential)           : keras model
    """
    ### START CODE HERE ### 
    model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(input_size,)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(output_size, activation='sigmoid')
    ])
    ### END CODE HERE ### 
    return model

In [None]:
model = mlp_0Hidden(sigmoid, X, y)
# model = mlp_1Hidden(4, sigmoid, sigmoid, X, y)
# model = mlp_2Hidden(4, 4, sigmoid, sigmoid, X, y)
model.summary()

In [None]:
[layer1, layer2, layer3] = model.layers

In [None]:
W1,b1 = layer1.get_weights()
W2,b2 = layer2.get_weights()
W3,b3 = layer3.get_weights()
print(f"W1 shape = {W1.shape}, b1 shape = {b1.shape}")
print(f"W2 shape = {W2.shape}, b2 shape = {b2.shape}")
print(f"W3 shape = {W3.shape}, b3 shape = {b3.shape}")

In [None]:
model.compile(
    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
)

history = model.fit(
    X,y,
    epochs=40
)

In [None]:
prediction = model.predict(image_of_two.reshape(1,400))  # prediction

print(f" predicting a Two: \n{prediction}")
print(f" Largest Prediction index: {np.argmax(prediction)}")

In [None]:
prediction_p = tf.nn.softmax(prediction)

print(f" predicting a Two. Probability vector: \n{prediction_p}")
print(f"Total of predictions: {np.sum(prediction_p):0.3f}")