<a href="https://colab.research.google.com/github/geleshChrsitUniversity/nikitadurasov-masksembles/blob/main/notebooks/MNIST_Masksembles.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install --upgrade git+http://github.com/nikitadurasov/masksembles
!wget https://github.com/nikitadurasov/masksembles/raw/main/images/complex_sample_mnist.npy

Collecting git+http://github.com/nikitadurasov/masksembles
  Cloning http://github.com/nikitadurasov/masksembles to /tmp/pip-req-build-2foqwk2k
  Running command git clone --filter=blob:none --quiet http://github.com/nikitadurasov/masksembles /tmp/pip-req-build-2foqwk2k
  Resolved http://github.com/nikitadurasov/masksembles to commit c5327a925164b4e93bbbbdbfa23fcc5e1d53d22e
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: masksembles
  Building wheel for masksembles (setup.py) ... [?25l[?25hdone
  Created wheel for masksembles: filename=masksembles-1.1-py3-none-any.whl size=8263 sha256=31941aacd3317158fb28aa175b29b2fd2e32421c3048ab0bc1fb7c40137d98ea
  Stored in directory: /tmp/pip-ephem-wheel-cache-bm6b2prn/wheels/c0/46/4a/5e6e5c7177ca24834c46ba8df4ddc22b805027bff8c4ae3567
Successfully built masksembles
Installing collected packages: masksembles
Successfully installed masksembles-1.1
--2025-03-16 02:30:24--  https://github.com/nikitadurasov

# MNIST

## Keras

In [2]:
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf

import matplotlib.pyplot as plt

In [3]:
from masksembles.keras import Masksembles2D, Masksembles1D

In [4]:
# Model / data parameters
num_classes = 10
input_shape = (28, 28, 1)

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255
# Make sure images have shape (28, 28, 1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")


# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step
x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


In order to transform regular model into Masksembles model one should add Masksembles2D or Masksembles1D layers in it. General recommendation is to insert these layers right before or after convolutional layers.

In example below we'll use both Masksembles2D and Masksembles1D layers applied after convolutions.

In [5]:
model = keras.Sequential(
    [
        keras.Input(shape=input_shape),
        layers.Conv2D(32, kernel_size=(3, 3), activation="elu"),
        Masksembles2D(4, 2.0), # adding Masksembles2D
        layers.MaxPooling2D(pool_size=(2, 2)),

        layers.Conv2D(64, kernel_size=(3, 3), activation="elu"),
        Masksembles2D(4, 2.0), # adding Masksembles2D
        layers.MaxPooling2D(pool_size=(2, 2)),

        layers.Flatten(),
        Masksembles1D(4, 2.), # adding Masksembles1D
        layers.Dense(num_classes, activation="softmax"),
    ]
)

model.summary()

TypeError: Layer.add_weight() got multiple values for argument 'shape'

Training of Masksembles is not different from training of regular model. So we just use standard fit Keras API.

In [13]:
import tensorflow as tf
#from . import common
import masksembles.common as common  # Ensure absolute import


class Masksembles2D(tf.keras.layers.Layer):
    """
    :class:Masksembles2D is high-level class that implements Masksembles approach
    for 2-dimensional inputs (similar to :class:tensorflow.keras.layers.SpatialDropout1D).

    :param n: int, number of masks
    :param scale: float, scale parameter similar to *S* in [1]. Larger values decrease \
        subnetworks correlations but at the same time decrease capacity of every individual model.

    Shape:
        * Input: (N, H, W, C)
        * Output: (N, H, W, C) (same shape as input)

    Examples:

    >>> m = Masksembles2D(4, 2.0)
    >>> inputs = tf.ones([4, 28, 28, 16])
    >>> output = m(inputs)

    References:

    [1] Masksembles for Uncertainty Estimation,
    Nikita Durasov, Timur Bagautdinov, Pierre Baque, Pascal Fua

    """

    def __init__(self, n: int, scale: float):
        super(Masksembles2D, self).__init__()

        self.n = n
        self.scale = scale

    def build(self, input_shape):
        channels = input_shape[-1]
        masks = common.generation_wrapper(channels, self.n, self.scale)
        self.masks = self.add_weight(name="masks",
                                     shape=masks.shape,
                                     trainable=False,
                                     dtype="float32")
        self.masks.assign(masks)

    def call(self, inputs, training=False):
        # inputs : [N, H, W, C]
        # masks : [M, C]
        x = tf.stack(tf.split(inputs, self.n))
        # x : [M, N // M, H, W, C]
        # masks : [M, 1, 1, 1, C]
        x = x * self.masks[:, tf.newaxis, tf.newaxis, tf.newaxis]
        x = tf.concat(tf.split(x, self.n), axis=1)
        return tf.squeeze(x, axis=0)


class Masksembles1D(tf.keras.layers.Layer):
    """
    :class:Masksembles1D is high-level class that implements Masksembles approach
    for 1-dimensional inputs (similar to :class:tensorflow.keras.layers.Dropout).

    :param n: int, number of masks
    :param scale: float, scale parameter similar to *S* in [1]. Larger values decrease \
        subnetworks correlations but at the same time decrease capacity of every individual model.

    Shape:
        * Input: (N, C)
        * Output: (N, C) (same shape as input)

    Examples:

    >>> m = Masksembles1D(4, 2.0)
    >>> inputs = tf.ones([4, 16])
    >>> output = m(inputs)


    References:

    [1] Masksembles for Uncertainty Estimation,
    Nikita Durasov, Timur Bagautdinov, Pierre Baque, Pascal Fua

    """

    def __init__(self, n: int, scale: float):
        super(Masksembles1D, self).__init__()

        self.n = n
        self.scale = scale

    def build(self, input_shape):
        channels = input_shape[-1]
        masks = common.generation_wrapper(channels, self.n, self.scale)
        self.masks = self.add_weight(name="masks",
                                     shape=masks.shape,
                                     trainable=False,
                                     dtype="float32")
        self.masks.assign(masks)

    def call(self, inputs, training=False):
        x = tf.stack(tf.split(inputs, self.n))
        x = x * self.masks[:, tf.newaxis]
        x = tf.concat(tf.split(x, self.n), axis=1)
        return tf.squeeze(x, axis=0)

In [26]:
model = keras.Sequential(
    [
        keras.Input(shape=input_shape),
        layers.Conv2D(32, kernel_size=(3, 3), activation="elu"),
        Masksembles2D(4, 2.0), # adding Masksembles2D
        layers.MaxPooling2D(pool_size=(2, 2)),

        layers.Conv2D(64, kernel_size=(3, 3), activation="elu"),
        Masksembles2D(4, 2.0), # adding Masksembles2D
        layers.MaxPooling2D(pool_size=(2, 2)),

        layers.Flatten(),
        Masksembles1D(4, 2.), # adding Masksembles1D
        layers.Dense(num_classes, activation="softmax"),
    ]
)

model.summary()

In [27]:
batch_size = 128
epochs = 20

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
trainHist = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)

Epoch 1/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 9ms/step - accuracy: 0.7215 - loss: 1.0105 - val_accuracy: 0.9562 - val_loss: 0.1601
Epoch 2/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9467 - loss: 0.1822 - val_accuracy: 0.9708 - val_loss: 0.1013
Epoch 3/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9627 - loss: 0.1255 - val_accuracy: 0.9753 - val_loss: 0.0882
Epoch 4/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9709 - loss: 0.0957 - val_accuracy: 0.9777 - val_loss: 0.0754
Epoch 5/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9747 - loss: 0.0855 - val_accuracy: 0.9815 - val_loss: 0.0669
Epoch 6/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9782 - loss: 0.0720 - val_accuracy: 0.9805 - val_loss: 0.0621
Epoch 7/20
[1m422/422[0m 

In [28]:
import pandas as pd
histDf = pd.DataFrame(trainHist.history)
histDf.sort_values("val_accuracy", ascending=False).head()

Unnamed: 0,accuracy,loss,val_accuracy,val_loss
12,0.985204,0.047907,0.986833,0.047192
16,0.987333,0.040537,0.986,0.048979
19,0.989018,0.034942,0.9855,0.049971
11,0.984648,0.050416,0.9855,0.0531
13,0.986796,0.043706,0.985167,0.050993


In [29]:
histDf.sort_values("val_loss").head()

Unnamed: 0,accuracy,loss,val_accuracy,val_loss
12,0.985204,0.047907,0.986833,0.047192
16,0.987333,0.040537,0.986,0.048979
19,0.989018,0.034942,0.9855,0.049971
13,0.986796,0.043706,0.985167,0.050993
14,0.986222,0.043265,0.985167,0.051626


In [32]:
modelDropOut = keras.Sequential(
    [
        keras.Input(shape=input_shape),
        layers.Conv2D(32, kernel_size=(3, 3), activation="elu"),
        #Masksembles2D(4, 2.0), # adding Masksembles2D
        layers.MaxPooling2D(pool_size=(2, 2)),

        layers.Conv2D(64, kernel_size=(3, 3), activation="elu"),
        #Masksembles2D(4, 2.0), # adding Masksembles2D
        layers.MaxPooling2D(pool_size=(2, 2)),

        layers.Flatten(),
        layers.Dropout(0.35),
        #Masksembles1D(4, 2.), # adding Masksembles1D
        layers.Dense(num_classes, activation="softmax"),
    ]
)

modelDropOut.summary()
modelDropOut.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
trainHistDropOut = modelDropOut.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)
histDfDropOut = pd.DataFrame(trainHistDropOut.history)
histDfDropOut.sort_values("val_accuracy", ascending=False).head()

Epoch 1/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 10ms/step - accuracy: 0.8039 - loss: 0.6763 - val_accuracy: 0.9772 - val_loss: 0.0823
Epoch 2/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9694 - loss: 0.1044 - val_accuracy: 0.9832 - val_loss: 0.0602
Epoch 3/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9763 - loss: 0.0761 - val_accuracy: 0.9853 - val_loss: 0.0539
Epoch 4/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9804 - loss: 0.0629 - val_accuracy: 0.9833 - val_loss: 0.0545
Epoch 5/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9822 - loss: 0.0578 - val_accuracy: 0.9875 - val_loss: 0.0473
Epoch 6/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9844 - loss: 0.0507 - val_accuracy: 0.9890 - val_loss: 0.0414
Epoch 7/20
[1m422/422[0m 

Unnamed: 0,accuracy,loss,val_accuracy,val_loss
15,0.989259,0.031351,0.991333,0.037294
17,0.990981,0.026739,0.991333,0.034643
16,0.990333,0.029325,0.991167,0.035324
11,0.988907,0.034703,0.990833,0.038178
8,0.986815,0.041158,0.990833,0.039773


In [33]:
histDfDropOut.sort_values("val_loss").head()

Unnamed: 0,accuracy,loss,val_accuracy,val_loss
17,0.990981,0.026739,0.991333,0.034643
16,0.990333,0.029325,0.991167,0.035324
15,0.989259,0.031351,0.991333,0.037294
12,0.988815,0.033145,0.989333,0.037572
19,0.991037,0.025974,0.9905,0.038096


In [36]:
print('MaskEnsemble -\n',histDf.sort_values("val_loss").head(5))
print()
print('DropOut      -\n',histDfDropOut.sort_values("val_loss").head(5))


MaskEnsemble -
     accuracy      loss  val_accuracy  val_loss
12  0.985204  0.047907      0.986833  0.047192
16  0.987333  0.040537      0.986000  0.048979
19  0.989018  0.034942      0.985500  0.049971
13  0.986796  0.043706      0.985167  0.050993
14  0.986222  0.043265      0.985167  0.051626

DropOut      -
     accuracy      loss  val_accuracy  val_loss
17  0.990981  0.026739      0.991333  0.034643
16  0.990333  0.029325      0.991167  0.035324
15  0.989259  0.031351      0.991333  0.037294
12  0.988815  0.033145      0.989333  0.037572
19  0.991037  0.025974      0.990500  0.038096


In [37]:
modelDropOut = keras.Sequential(
    [
        keras.Input(shape=input_shape),
        layers.Conv2D(32, kernel_size=(3, 3), activation="elu"),
        #Masksembles2D(4, 2.0), # adding Masksembles2D
        layers.MaxPooling2D(pool_size=(2, 2)),

        layers.Conv2D(64, kernel_size=(3, 3), activation="elu"),
        #Masksembles2D(4, 2.0), # adding Masksembles2D
        layers.MaxPooling2D(pool_size=(2, 2)),

        layers.Flatten(),
        layers.Dropout(0.5),
        #Masksembles1D(4, 2.), # adding Masksembles1D
        layers.Dense(num_classes, activation="softmax"),
    ]
)

modelDropOut.summary()
modelDropOut.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
trainHistDropOut = modelDropOut.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)
histDfDropOut = pd.DataFrame(trainHistDropOut.history)
histDfDropOut.sort_values("val_accuracy", ascending=False).head()

Epoch 1/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 8ms/step - accuracy: 0.7850 - loss: 0.7167 - val_accuracy: 0.9787 - val_loss: 0.0824
Epoch 2/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9634 - loss: 0.1176 - val_accuracy: 0.9837 - val_loss: 0.0618
Epoch 3/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9737 - loss: 0.0848 - val_accuracy: 0.9857 - val_loss: 0.0512
Epoch 4/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9777 - loss: 0.0696 - val_accuracy: 0.9860 - val_loss: 0.0490
Epoch 5/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9802 - loss: 0.0657 - val_accuracy: 0.9862 - val_loss: 0.0497
Epoch 6/20
[1m422/422[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9817 - loss: 0.0592 - val_accuracy: 0.9865 - val_loss: 0.0461
Epoch 7/20
[1m422/422[0m 

Unnamed: 0,accuracy,loss,val_accuracy,val_loss
18,0.988278,0.037323,0.991167,0.039466
13,0.985685,0.04389,0.9905,0.037998
15,0.986185,0.043503,0.990167,0.037168
12,0.985444,0.045017,0.990167,0.041489
10,0.985463,0.047088,0.99,0.039702


In [38]:
print('MaskEnsemble -\n',histDf.sort_values("val_loss").head(5))
print()
print('DropOut 0.5  -\n',histDfDropOut.sort_values("val_loss").head(5))

MaskEnsemble -
     accuracy      loss  val_accuracy  val_loss
12  0.985204  0.047907      0.986833  0.047192
16  0.987333  0.040537      0.986000  0.048979
19  0.989018  0.034942      0.985500  0.049971
13  0.986796  0.043706      0.985167  0.050993
14  0.986222  0.043265      0.985167  0.051626

DropOut 0.5  -
     accuracy      loss  val_accuracy  val_loss
15  0.986185  0.043503      0.990167  0.037168
13  0.985685  0.043890      0.990500  0.037998
16  0.986574  0.041164      0.989667  0.038462
18  0.988278  0.037323      0.991167  0.039466
10  0.985463  0.047088      0.990000  0.039702
