# Simple MNIST convnet


Simple mnist dataset example for concrete dropout, modeled after https://keras.io/examples/vision/mnist_convnet/

## Setup

In [1]:
!pip install concretedropout

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting concretedropout
  Downloading concretedropout-0.1.0-py3-none-any.whl (5.5 kB)
Installing collected packages: concretedropout
Successfully installed concretedropout-0.1.0


In [28]:
import numpy as np
import tensorflow as tf
import keras.backend as K
from tensorflow import keras
from tensorflow.keras import layers
from concretedropout import ConcreteDenseDropout, ConcreteSpatialDropout2D, get_weight_regularizer, get_dropout_regularizer

## Prepare the data

In [5]:
# Model / data parameters
num_classes = 10
input_shape = (28, 28, 1)

# Load the data and split it between train and test sets
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255
# Make sure images have shape (28, 28, 1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")


# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


## Build the model

In [30]:
# compute the regularisation values
Ns = x_train.shape[0]
wr = get_weight_regularizer(Ns, l=1e-2, tau=1.0)
dr = get_dropout_regularizer(Ns, tau=1.0, cross_entropy_loss=True)

# create the neural network
inputs = tf.keras.layers.Input(input_shape, name="inputs")
conv1 = layers.Conv2D(32, kernel_size=(3, 3))
x = conv1(inputs) # we don't place any dropout on the input
x = layers.Activation("relu", name="activation_1")(x)
x = layers.MaxPooling2D(pool_size=(2, 2))(x)

conv2 = layers.Conv2D(64, kernel_size=(3, 3))
x = ConcreteSpatialDropout2D(conv2, is_mc_dropout=False, weight_regularizer=wr, dropout_regularizer=dr)(x)
x = layers.Activation("relu", name="activation_2")(x)
x = layers.MaxPooling2D(pool_size=(2, 2))(x)

x = layers.Flatten()(x)

dense1 = layers.Dense(num_classes)
x = ConcreteDenseDropout(dense1, is_mc_dropout=False, weight_regularizer=wr, dropout_regularizer=dr)(x)

outputs = layers.Activation("softmax", name="activation_3")(x)

model = tf.keras.Model(inputs=inputs, outputs=outputs, name="mnist_model")

model.summary()

Model: "mnist_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 inputs (InputLayer)         [(None, 28, 28, 1)]       0         
                                                                 
 conv2d_12 (Conv2D)          (None, 26, 26, 32)        320       
                                                                 
 activation_1 (Activation)   (None, 26, 26, 32)        0         
                                                                 
 max_pooling2d_9 (MaxPooling  (None, 13, 13, 32)       0         
 2D)                                                             
                                                                 
 concrete_spatial_dropout2d_  (None, 11, 11, 64)       18497     
 5 (ConcreteSpatialDropout2D                                     
 )                                                               
                                                       

## Train the model

In [31]:
batch_size = 128
epochs = 15

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7fcd603dc850>

## Evaluate the trained model

In [32]:
score = model.evaluate(x_test, y_test, verbose=0)
print("Test loss:", score[0])
print("Test accuracy:", score[1])

Test loss: 0.017377963289618492
Test accuracy: 0.9912999868392944


get the dropout values:

In [33]:
ps = np.array([K.eval(layer.p_logit) for layer in model.layers if hasattr(layer, 'p_logit')])
droput_val = tf.nn.sigmoid(ps).numpy()
print(droput_val)

[[0.01307389]
 [0.14507046]]
