## Understanding Dropouts

In [56]:
import tensorflow as tf
import numpy as np

In [68]:
def dropout_layer(X, p):
    ''' This function performs dropouts on X '''
    assert 0 <= p <= 1
    if p == 0:
        # Drop all & make it zero
        return tf.zeros_like(X).numpy()
    if p == 1:
        # Return X as it
        return X.numpy()
    
    # Create bool of % of instances to drop
    mask = tf.random.uniform(shape = tf.shape(X), minval = 0, maxval = 1) < 1 - p
    print(f" Mask : {mask.numpy()}")
    # Convert Bool to binary & Retain reduced X
    print(f" Masked X : {(tf.cast(mask, dtype = tf.float32)*X).numpy()}")
    # Multiply with X and normalize by fraction retained
    return (tf.cast(mask, dtype = tf.float32)* X / (1 - p)).numpy()

In [69]:
X = tf.random.uniform(shape = (1, 10), minval=0, maxval=10, dtype=tf.float32)

In [70]:
print(f"X : {X.numpy()}")
dropout_layer(X, 0.5)

X : [[3.3110595 3.9678252 4.6132755 3.1029058 1.7998374 1.3511956 2.1178722
  1.3179338 7.7726364 3.8459766]]
 Mask : [[ True False  True  True False False False  True False  True]]
 Masked X : [[3.3110595 0.        4.6132755 3.1029058 0.        0.        0.
  1.3179338 0.        3.8459766]]


array([[6.622119 , 0.       , 9.226551 , 6.2058115, 0.       , 0.       ,
        0.       , 2.6358676, 0.       , 7.691953 ]], dtype=float32)

In [72]:
def dropout_layer(X, p):
    ''' This function performs dropouts on X '''
    assert 0 <= p <= 1
    if p == 0:
        return tf.zeros_like(X)
    if p == 1:
        return X
    mask = tf.random.uniform(shape = tf.shape(X), minval = 0, maxval = 1) < 1 - p
    return (tf.cast(mask, dtype = tf.float32)* X / (1 - p))

In [73]:
X = tf.reshape(tf.range(16, dtype=tf.float32), (2, 8))
print(X)
print(dropout_layer(X, 0.))
print(dropout_layer(X, 0.5))
print(dropout_layer(X, 1.))

tf.Tensor(
[[ 0.  1.  2.  3.  4.  5.  6.  7.]
 [ 8.  9. 10. 11. 12. 13. 14. 15.]], shape=(2, 8), dtype=float32)
tf.Tensor(
[[0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0.]], shape=(2, 8), dtype=float32)
tf.Tensor(
[[ 0.  0.  0.  6.  0.  0.  0.  0.]
 [16.  0. 20. 22. 24. 26.  0. 30.]], shape=(2, 8), dtype=float32)
tf.Tensor(
[[ 0.  1.  2.  3.  4.  5.  6.  7.]
 [ 8.  9. 10. 11. 12. 13. 14. 15.]], shape=(2, 8), dtype=float32)


## Dropout in NN

In [160]:
dropout1, dropout2 = 0.5, 0.5

class Net(tf.keras.Model):
    def __init__(self, num_outputs, num_hidden1, num_hidden2):
        super().__init__()
        self.input_layer = tf.keras.layers.Flatten()
        self.hidden1 = tf.keras.layers.Dense(num_hidden1, activation = 'relu')
        self.hidden2 = tf.keras.layers.Dense(num_hidden2, activation = 'relu')
        self.output_layer = tf.keras.layers.Dense(num_outputs)

    def call(self, inputs, training = None):
        x = self.input_layer(inputs)
        x = self.hidden1(x)
        if training:
            x = dropout_layer(x, dropout1)
        x = self.hidden2(x)
        if training:
            x = dropout_layer(x, dropout2)
        x = self.output_layer(x)
        return x

num_outputs, num_hidden1, num_hidden2 = 10, 256, 256    
net = Net(num_outputs, num_hidden1, num_hidden2)

In [161]:
(train_features, train_labels), (test_features, test_labels) = tf.keras.datasets.fashion_mnist.load_data()

In [162]:
train_features = tf.cast(tf.reshape(train_features, (-1, 784)), tf.float32)/255.0
test_features = tf.cast(tf.reshape(test_features, (-1, 784)), tf.float32)/255.0

In [163]:
def load_data(data, batch_size, is_train = True):
    dataset = tf.data.Dataset.from_tensor_slices(data)
    dataset = dataset.shuffle(buffer_size = 1000)
    dataset = dataset.batch(batch_size)
    return dataset

In [164]:
num_epochs, lr, batch_size = 50, 0.5, 256
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.SGD(learning_rate=lr)

In [165]:
data_iter = load_data((train_features, train_labels), batch_size)

training_loss = []
for epoch in range(num_epochs):
    for X, y in data_iter:
            
        with tf.GradientTape() as tape:
            yhat = net(X, training = True)
            ce_loss = loss(y, yhat)

        grads = tape.gradient(ce_loss, net.trainable_variables)
        optimizer.apply_gradients(zip(grads, net.trainable_variables))

    epoch_loss = loss(train_labels, net(train_features))
    training_loss.append(epoch_loss)
    print(f"epoch : {epoch}, loss : {epoch_loss}")

epoch : 0, loss : 0.5674312710762024
epoch : 1, loss : 0.4548048973083496
epoch : 2, loss : 0.42549094557762146
epoch : 3, loss : 0.41688743233680725
epoch : 4, loss : 0.4007378816604614
epoch : 5, loss : 0.3832167387008667
epoch : 6, loss : 0.4032727777957916
epoch : 7, loss : 0.35775741934776306
epoch : 8, loss : 0.332672655582428
epoch : 9, loss : 0.3756335973739624
epoch : 10, loss : 0.330614298582077
epoch : 11, loss : 0.35788366198539734
epoch : 12, loss : 0.33307766914367676
epoch : 13, loss : 0.33154016733169556
epoch : 14, loss : 0.29667723178863525
epoch : 15, loss : 0.34693488478660583
epoch : 16, loss : 0.31189972162246704
epoch : 17, loss : 0.33852487802505493
epoch : 18, loss : 0.311088502407074
epoch : 19, loss : 0.3039800822734833
epoch : 20, loss : 0.2947123050689697
epoch : 21, loss : 0.2882457971572876
epoch : 22, loss : 0.3064858019351959
epoch : 23, loss : 0.32587528228759766
epoch : 24, loss : 0.28363892436027527
epoch : 25, loss : 0.2855738699436188
epoch : 26, l

In [159]:
from sklearn.metrics import accuracy_score
ytest_pred = np.argmax(net(test_features), axis = 1)
accuracy_score(ytest_pred, test_labels)

0.8839

In [109]:
from sklearn.metrics import accuracy_score
ytest_pred = np.argmax(net(test_features), axis = 1)
accuracy_score(ytest_pred, test_labels)

0.8485