In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.python.framework.ops import EagerTensor
from tensorflow.python.ops.resource_variable_ops import ResourceVariable
import time
import h5py
import matplotlib.pyplot as plt

In [None]:
tf.__version__

The beauty of TensorFlow 2 is in its simplicity. Basically, all you need to do is implement forward propagation through a computational graph. TensorFlow will compute the derivatives for you, by moving backwards through the graph recorded with GradientTape. All that's left for you to do then is specify the cost function and optimizer you want to use!

When writing a TensorFlow program, the main object to get used and transformed is the tf.Tensor. These tensors are the TensorFlow equivalent of Numpy arrays. you'll use tf.Variable to store the state of your variables.the dtype arg in tf.Variable can be set to allow data to be converted to that type.

Here you'll call the TensorFlow dataset created on a HDF5 file, which you can use in place of a Numpy array to store your datasets. You can think of this as a TensorFlow data generator!

In [None]:
train_dataset = h5py.File('datasets/train_signs.h5', "r")
test_dataset = h5py.File('datasets/test_signs.h5', "r")

In [None]:
x_train = tf.data.Dataset.from_tensor_slices(train_dataset['train_set_x'])
y_train = tf.data.Dataset.from_tensor_slices(train_dataset['train_set_y'])

x_test = tf.data.Dataset.from_tensor_slices(test_dataset['test_set_x'])
y_test = tf.data.Dataset.from_tensor_slices(test_dataset['test_set_y'])

In [None]:
type(x_train)

Since TensorFlow Datasets are generators, you can't access directly the contents unless you iterate over them in a for loop, or by explicitly creating a Python iterator using iter and consuming its elements using next. Also, you can inspect the shape and dtype of each element using the element_spec attribute.

In [None]:
print(x_train.element_spec)

In [None]:
print(next(iter(x_train)))  # Each Image


The dataset that you'll be using is a subset of the sign language digits. It contains six different classes representing the digits from 0 to 5.

In [None]:
unique_labels = set()
for element in y_train:
    unique_labels.add(element.numpy())
print(unique_labels)

In [None]:
images_iter = iter(x_train)
labels_iter = iter(y_train)
plt.figure(figsize=(10, 10))
for i in range(25):
    ax = plt.subplot(5, 5, i + 1)
    plt.imshow(next(images_iter).numpy().astype("uint8"))
    plt.title(next(labels_iter).numpy().astype("uint8"))
    plt.axis("off")

There's one more additional difference between TensorFlow datasets and Numpy arrays: If you need to transform one, you would invoke the map method to apply the function passed as an argument to each of the elements.

In [None]:
def normalize(image):
    """
    Transform an image into a tensor of shape (64 * 64 * 3, )
    and normalize its components.

    Arguments
    image - Tensor.

    Returns:
    result -- Transformed tensor
    """
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.reshape(image, [-1,])
    return image

In [None]:
new_train = x_train.map(normalize)
new_test = x_test.map(normalize)

In [None]:
new_train.element_spec

In [None]:
print(next(iter(new_train)))

## Linear Function

you can modify the state of a tf.Variable but cannot change the state of a tf.constant.

In [None]:
X = tf.constant(np.random.randn(3,1), name = "X")

In [None]:
def linear_function():
    """
    Implements a linear function:
            Initializes X to be a random tensor of shape (3,1)
            Initializes W to be a random tensor of shape (4,3)
            Initializes b to be a random tensor of shape (4,1)
    Returns:
    result -- Y = WX + b
    """

    np.random.seed(1)

    """
    Note, to ensure that the "random" numbers generated match the expected results,
    please create the variables in the order given in the starting code below.
    (Do not re-arrange the order).
    """

    X = np.random.randn(3, 1)
    W = np.random.randn(4, 3)
    b = np.random.randn(4, 1)
    Y = tf.matmul(W, X) + b

    return Y

result = linear_function()
print(result)

## Computing the Sigmoid


In [None]:
def sigmoid(z):

    """
    Computes the sigmoid of z

    Arguments:
    z -- input value, scalar or vector

    Returns:
    a -- (tf.float32) the sigmoid of z
    """
    # tf.keras.activations.sigmoid requires float16, float32, float64, complex64, or complex128.

    z = tf.cast(z, tf.float32)
    a = tf.keras.activations.sigmoid(z)   
    return a


result = sigmoid(-1)
print ("type: " + str(type(result)))
print ("dtype: " + str(result.dtype))
print ("sigmoid(-1) = " + str(result))
print ("sigmoid(0) = " + str(sigmoid(0.0)))
print ("sigmoid(12) = " + str(sigmoid(12)))


## Using One Hot Encodings

In "one hot" encoding, exactly one element of each column is "hot" (meaning set to 1)

In [None]:
def one_hot_matrix(label, C=6):
    """
    Computes the one hot encoding for a single label
    
    Arguments:
        label --  (int) Categorical labels
        C --  (int) Number of different classes that label can take
    
    Returns:
         one_hot -- tf.Tensor A one-dimensional tensor (array) with the one hot encoding.
    """

    one_hot = tf.one_hot(indices=label, depth=C)
    one_hot = tf.reshape(one_hot, [C])
    return one_hot
    
label = tf.constant(1)

C = 6
one_hot_matrix(label, C)

In [None]:

new_y_test = y_test.map(one_hot_matrix)
new_y_train = y_train.map(one_hot_matrix)
print(next(iter(new_y_test))) #prints one labeled data

## Initialize the Parameters

In [None]:
def initialize_parameters():
    """
    Initializes parameters to build a neural network with TensorFlow. The shapes are:
                        W1 : [25, 12288]
                        b1 : [25, 1]
                        W2 : [12, 25]
                        b2 : [12, 1]
                        W3 : [6, 12]
                        b3 : [6, 1]

    Returns:
    parameters -- a dictionary of tensors containing W1, b1, W2, b2, W3, b3
    """

    initializer = tf.keras.initializers.GlorotNormal(seed=1)

    W1 = tf.Variable(initializer(shape=(25, 12288)), name="W1")
    b1 = tf.Variable(initializer(shape=(25, 1)),     name="b1")
    W2 = tf.Variable(initializer(shape=(12, 25)),    name="W2")
    b2 = tf.Variable(initializer(shape=(12, 1)),     name="b2")
    W3 = tf.Variable(initializer(shape=(6, 12)),     name="W3")
    b3 = tf.Variable(initializer(shape=(6, 1)),      name="b3")


    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2,
                  "W3": W3,
                  "b3": b3}
    return parameters

In [None]:
parameters = initialize_parameters ()
for key in parameters:
        print(f"{key} shape: {tuple(parameters[key].shape)}")


## Building Your First Neural Network in TensorFlow

In [None]:
def forward_propagation(X, parameters):
    """
    Implements the forward propagation for the model: LINEAR -> RELU -> LINEAR -> RELU -> LINEAR

    Arguments:
    X -- input dataset placeholder, of shape (input size, number of examples)
    parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3"
                  the shapes are given in initialize_parameters

    Returns:
    Z3 -- the output of the last LINEAR unit
    """

    # Retrieve the parameters from the dictionary "parameters"
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    W3 = parameters['W3']
    b3 = parameters['b3']

    #(approx. 5 lines)                   # Numpy Equivalents (NumPy not to be used. Use TF API):
    # Z1 = ...                           # Z1 = np.dot(W1, X) + b1
    # A1 = ...                           # A1 = relu(Z1)
    # Z2 = ...                           # Z2 = np.dot(W2, A1) + b2
    # A2 = ...                           # A2 = relu(Z2)
    # Z3 = ...                           # Z3 = np.dot(W3, A2) + b3

    Z1 = tf.linalg.matmul(W1, X) + b1
    A1 = tf.keras.activations.relu(Z1)
    Z2 = tf.linalg.matmul(W2, A1) + b2
    A2 = tf.keras.activations.relu(Z2)
    Z3 = tf.linalg.matmul(W3, A2) + b3

    return Z3

## Compute the Total Loss

All you have to do now is define the loss function that you're going to use. For this case, since we have a classification problem with 6 labels, a categorical cross entropy will work!

In [None]:
def compute_total_loss(logits, labels):
    """
    Computes the total loss

    Arguments:
    logits -- output of forward propagation (output of the last LINEAR unit), of shape (6, num_examples)
    labels -- "true" labels vector, same shape as Z3

    Returns:
    total_loss - Tensor of the total loss value
    """

    # compute total loss. remember to set `from_logits=True`
    total_loss = tf.reduce_sum(
        tf.keras.losses.categorical_crossentropy( tf.transpose(labels),  tf.transpose(logits), from_logits=True) 
    )

    return total_loss

#Lets test it
labels = tf.constant([[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]])
logits = tf.constant([[1., 0., 0.], [1., 0., 0.], [1., 0., 0.]])

result = compute_total_loss(logits, labels)
print(result)

Note: When using sum of losses for gradient computation, it’s important to reduce the learning rate as the size of the mini-batch increases. This ensures that you don’t take large steps towards minimum.

## Train the Model

tape.gradient function: this allows you to retrieve the operations recorded for automatic differentiation inside the GradientTape block. Then, calling the optimizer method apply_gradients, will apply the optimizer's update rules to each trainable parameter.

tf.Data.dataset = dataset.prefetch(8) : prevents a memory bottleneck that can occur when reading from disk. prefetch() sets aside some data and keeps it ready for when it's needed. It does this by creating a source dataset from your input data, applying a transformation to preprocess the data, then iterating over the dataset the specified number of elements at a time. This works because the iteration is streaming, so the data doesn't need to fit into the memory.

In [None]:
def model(X_train, Y_train, X_test, Y_test, learning_rate=0.0001,
          num_epochs=1500, minibatch_size=32, print_cost=True):
    """
    Implements a three-layer tensorflow neural network: LINEAR->RELU->LINEAR->RELU->LINEAR->SOFTMAX.
    Arguments:
    X_train -- training set, of shape (input size = 12288, number of training examples = 1080)
    Y_train -- test set, of shape (output size = 6, number of training examples = 1080)
    X_test -- training set, of shape (input size = 12288, number of training examples = 120)
    Y_test -- test set, of shape (output size = 6, number of test examples = 120)
    learning_rate -- learning rate of the optimization
    num_epochs -- number of epochs of the optimization loop
    minibatch_size -- size of a minibatch
    print_cost -- True to print the cost every 10 epochs
    Returns:
    parameters -- parameters learnt by the model. They can then be used to predict.
    costs, train_acc, test_acc -- lists for plotting/training history
    """
    costs = []          # To keep track of the cost
    train_acc = []      # To keep track of train accuracy history
    test_acc = []       # To keep track of test accuracy history

    # Initialize your parameters
    parameters = initialize_parameters()
    W1 = parameters['W1']
    b1 = parameters['b1']
    W2 = parameters['W2']
    b2 = parameters['b2']
    W3 = parameters['W3']
    b3 = parameters['b3']

    optimizer = tf.keras.optimizers.Adam(learning_rate)

    # The CategoricalAccuracy will track the accuracy for this multiclass problem
    train_accuracy = tf.keras.metrics.CategoricalAccuracy()
    test_accuracy  = tf.keras.metrics.CategoricalAccuracy()

    # Decide which reset method to use (compatible with TF ≤2.6 and ≥2.7)
    if hasattr(train_accuracy, 'reset_state'):
        reset_accuracy = lambda metric: metric.reset_state()
    else:
        reset_accuracy = lambda metric: metric.reset_states()

    dataset = tf.data.Dataset.zip((X_train, Y_train))
    test_dataset = tf.data.Dataset.zip((X_test, Y_test))

    # We can get the number of elements of a dataset using the cardinality method
    m = dataset.cardinality().numpy()

    minibatches = dataset.batch(minibatch_size).prefetch(8)
    test_minibatches = test_dataset.batch(minibatch_size).prefetch(8)

    # Training loop
    for epoch in range(num_epochs):
        epoch_total_loss = 0.

        # Reset train accuracy at the beginning of each epoch
        reset_accuracy(train_accuracy)

        for (minibatch_X, minibatch_Y) in minibatches:
            with tf.GradientTape() as tape:
                # 1. Forward propagation
                Z3 = forward_propagation(tf.transpose(minibatch_X), parameters)
                
                # 2. Compute loss for this minibatch
                minibatch_total_loss = compute_total_loss(Z3, tf.transpose(minibatch_Y))

            # Accumulate accuracy (Y is one-hot, Z3 is logits → transpose Z3 to match)
            train_accuracy.update_state(minibatch_Y, tf.transpose(Z3))

            # Backpropagation
            trainable_variables = [W1, b1, W2, b2, W3, b3]
            grads = tape.gradient(minibatch_total_loss, trainable_variables)
            optimizer.apply_gradients(zip(grads, trainable_variables))

            epoch_total_loss += minibatch_total_loss

        # Average loss over all examples
        epoch_total_loss /= m

        # Print progress every 10 epochs
        if print_cost and epoch % 10 == 0:
            print(f"Cost after epoch {epoch}: {epoch_total_loss:.4f}")
            print("Train accuracy:", train_accuracy.result().numpy())

            # Evaluate on test set (accumulate over test minibatches)
            reset_accuracy(test_accuracy)
            for (minibatch_X, minibatch_Y) in test_minibatches:
                Z3 = forward_propagation(tf.transpose(minibatch_X), parameters)
                test_accuracy.update_state(minibatch_Y, tf.transpose(Z3))

            print("Test accuracy:", test_accuracy.result().numpy())
            print("-" * 40)

            # Save history
            costs.append(epoch_total_loss)
            train_acc.append(train_accuracy.result().numpy())
            test_acc.append(test_accuracy.result().numpy())

    return parameters, costs, train_acc, test_acc

In [None]:
#Lets Test the model
parameters, costs, train_acc, test_acc = model(new_train, new_y_train, new_test, new_y_test, num_epochs=100)

In [None]:
# Plot the cost
plt.plot(np.squeeze(costs))
plt.ylabel('cost')
plt.xlabel('iterations (per fives)')
plt.title("Learning rate =" + str(0.0001))
plt.show()

In [None]:
# Plot the train accuracy
plt.plot(np.squeeze(train_acc))
plt.ylabel('Train Accuracy')
plt.xlabel('iterations (per fives)')
plt.title("Learning rate =" + str(0.0001))
# Plot the test accuracy
plt.plot(np.squeeze(test_acc))
plt.ylabel('Test Accuracy')
plt.xlabel('iterations (per fives)')
plt.title("Learning rate =" + str(0.0001))
plt.show()