# Custom Layers and Models

In [1]:
import tensorflow as tf

from d2l.tensorflow import config, data, initializers, activations, losses, metrics, optimizers, plot

config.setup()

Tensorflow running on CPU


In [2]:
batch_size = 256

train_iter, test_iter = data.load_tfds_dataset('fashion_mnist', batch_size)

In [3]:
num_epochs, learning_rate = 10, 0.5
num_inputs, num_outputs, num_hiddens = 784, 10, 256

loss_function = losses.softmax_cross_entropy
eval_metric = metrics.accuracy
optimizer = optimizers.sgd

# Custom Layer

In [4]:
class BaseLayer:
    _identifier = -1
    __type__ = 'base'
    def __init__(self):
        pass

In [5]:
class Dense(BaseLayer):
    def __init__(
        self, n_inputs, n_outputs, activation='relu',
        initialization='gaussian', magnitude=None, scale=None
    ):
        Dense.__type__ = 'compute'
        Dense._identifier += 1
        self.__name__ = '{}_{}'.format(Dense.__name__, Dense._identifier).lower()

        self.weights, self.bias = initializers.initialize_parameters(
            n_inputs, n_outputs, method=initialization, magnitude=magnitude, scale=scale
        )
        self.n_inputs = n_inputs
        self.n_outputs = n_outputs
        self.activation = activations.get_activation(activation)

    def __repr__(self):
        return str({
            'name': self.__name__,
            'type': self.__type__,
            'n_inputs': self.n_inputs,
            'n_outputs': self.n_outputs,
            'activation': self.activation.__name__,
            'weights': self.weights.numpy(),
            'bias': self.bias.numpy()
        })

## Custom Model

In [6]:
class BaseModel:
    _identifier = -1

    def __init__(self):
        self.loss_function = None
        self.eval_metric = None
        self.eval_function = None
        self.optimizer = None
        self.compiled = False

In [7]:
class Sequential(BaseModel):
    def __init__(self):
        super(Sequential, self).__init__()
        Sequential._identifier += 1
        self.__name__ = '{}_{}'.format(
            Sequential.__name__, Sequential._identifier
        ).lower()
        self.layers = list()

    def add(self, layer):
        self.layers.append(layer)

    def compile(self, loss_function, eval_metric, optimizer):
        self.loss_function = loss_function
        self.eval_metric = eval_metric
        self.optimizer = optimizer
        self.compiled = True

    def net(self, X, inference=False):
        X = tf.reshape(X, (-1, self.layers[0].n_inputs))

        for layer in self.layers[:-1]:
            X = layer.activation(tf.matmul(X, layer.weights) + layer.bias)

        return tf.matmul(X, self.layers[-1].weights) + self.layers[-1].bias

    def fit(self, epochs, train_iter, val_iter, learning_rate, batch_size, animate=False):
        animator = None
        if animate:
            animator = plot.Animator(
                xlabel='epoch', xlim=[1, epochs], ylim=[0, 1],
                legend=['train loss', 'train eval', 'val loss', 'val eval'],
                title='Training loss and eval'
            )

        for epoch in range(epochs):

            metric_train = metrics.Accumulator(3)
            W = [layer.weights for layer in self.layers]
            b = [layer.bias for layer in self.layers]

            for X, y in train_iter:
                with tf.GradientTape() as t:
                    y_hat = self.net(X)
                    loss = self.loss_function(y, y_hat)
                dW, db = t.gradient(loss, [W, b])
                self.optimizer(W, b, dW, db, learning_rate, batch_size)
                metric_train.add(
                    tf.reduce_sum(loss), self.eval_metric(y, y_hat), y.shape[0]
                )

            train_metrics = (
                metric_train[0] / metric_train[2],
                metric_train[1] / metric_train[2]
            )
            val_metrics = self.predict(val_iter)
            if animator:
                animator.add(epoch + 1, train_metrics + val_metrics)
            else:
                print(
                    'epoch {0} => '
                    '[train loss: {1[0]}, train eval: {1[1]}]'
                    ' | '
                    '[val loss: {2[0]}, val eval: {2[1]}]'.format(
                        epoch + 1, train_metrics, val_metrics
                    )
                )

    def predict(self, test_iter):
        metric_test = metrics.Accumulator(3)

        for X, y in test_iter:
            y_hat = self.net(X, inference=True)
            loss = self.loss_function(y, y_hat)
            metric_test.add(tf.reduce_sum(loss), self.eval_metric(y, y_hat), y.shape[0])

        return metric_test[0] / metric_test[2], metric_test[1] / metric_test[2]

    def __repr__(self):
        if self.compiled:
            return str({
                'name': self.__name__,
                'layers': self.layers,
                'loss function': self.loss_function.__name__,
                'eval metric': self.eval_metric.__name__,
                'optimizer': self.optimizer.__name__
            })
        return str({
            'name': self.__name__,
            'layers': self.layers,
        })

## Testing implementation

In [8]:
d0 = Dense(num_inputs, num_hiddens)
d1 = Dense(num_hiddens, num_outputs)

In [9]:
d0

{'name': 'dense_0', 'type': 'compute', 'n_inputs': 784, 'n_outputs': 256, 'activation': 'relu', 'weights': array([[ 0.14725922,  0.07386696,  0.09464841, ...,  0.04962331,
         0.03240259,  0.16276436],
       [ 0.04921127,  0.12376031,  0.0893697 , ...,  0.13690081,
        -0.02476264, -0.05755023],
       [ 0.01736821, -0.03836132, -0.0116649 , ...,  0.09117223,
        -0.05258843, -0.05509703],
       ...,
       [-0.09189288, -0.02289783, -0.00842697, ...,  0.01262701,
        -0.11798956, -0.0248114 ],
       [-0.15405217, -0.14964525, -0.00509485, ...,  0.08117805,
         0.0152805 , -0.07696279],
       [ 0.19414917, -0.12514627, -0.07362437, ..., -0.06660258,
        -0.08112863, -0.11601476]], dtype=float32), 'bias': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 

In [10]:
model = Sequential()

In [11]:
model.add(d0)
model.add(d1)

In [12]:
model

{'name': 'sequential_0', 'layers': [{'name': 'dense_0', 'type': 'compute', 'n_inputs': 784, 'n_outputs': 256, 'activation': 'relu', 'weights': array([[ 0.14725922,  0.07386696,  0.09464841, ...,  0.04962331,
         0.03240259,  0.16276436],
       [ 0.04921127,  0.12376031,  0.0893697 , ...,  0.13690081,
        -0.02476264, -0.05755023],
       [ 0.01736821, -0.03836132, -0.0116649 , ...,  0.09117223,
        -0.05258843, -0.05509703],
       ...,
       [-0.09189288, -0.02289783, -0.00842697, ...,  0.01262701,
        -0.11798956, -0.0248114 ],
       [-0.15405217, -0.14964525, -0.00509485, ...,  0.08117805,
         0.0152805 , -0.07696279],
       [ 0.19414917, -0.12514627, -0.07362437, ..., -0.06660258,
        -0.08112863, -0.11601476]], dtype=float32), 'bias': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,

In [13]:
model.compile(loss_function, eval_metric, optimizer)

In [14]:
model

{'name': 'sequential_0', 'layers': [{'name': 'dense_0', 'type': 'compute', 'n_inputs': 784, 'n_outputs': 256, 'activation': 'relu', 'weights': array([[ 0.14725922,  0.07386696,  0.09464841, ...,  0.04962331,
         0.03240259,  0.16276436],
       [ 0.04921127,  0.12376031,  0.0893697 , ...,  0.13690081,
        -0.02476264, -0.05755023],
       [ 0.01736821, -0.03836132, -0.0116649 , ...,  0.09117223,
        -0.05258843, -0.05509703],
       ...,
       [-0.09189288, -0.02289783, -0.00842697, ...,  0.01262701,
        -0.11798956, -0.0248114 ],
       [-0.15405217, -0.14964525, -0.00509485, ...,  0.08117805,
         0.0152805 , -0.07696279],
       [ 0.19414917, -0.12514627, -0.07362437, ..., -0.06660258,
        -0.08112863, -0.11601476]], dtype=float32), 'bias': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,

In [15]:
model.fit(num_epochs, train_iter, test_iter, learning_rate, batch_size)

epoch 1 => [train loss: 0.7527556214650472, train eval: 0.7463833333333333] | [val loss: 0.5218812803268432, val eval: 0.8108]
epoch 2 => [train loss: 0.45398745752970376, train eval: 0.83285] | [val loss: 0.435511593914032, val eval: 0.8427]
epoch 3 => [train loss: 0.3964456132253011, train eval: 0.8545166666666667] | [val loss: 0.40081236295700073, val eval: 0.8565]
epoch 4 => [train loss: 0.3665442253112793, train eval: 0.8658833333333333] | [val loss: 0.3912468364238739, val eval: 0.8578]
epoch 5 => [train loss: 0.3482751040140788, train eval: 0.8727166666666667] | [val loss: 0.37469018261432646, val eval: 0.8688]
epoch 6 => [train loss: 0.33423660310109454, train eval: 0.8781] | [val loss: 0.3691064363718033, val eval: 0.865]
epoch 7 => [train loss: 0.31990981820424397, train eval: 0.88255] | [val loss: 0.3697754323959351, val eval: 0.864]
epoch 8 => [train loss: 0.3060824718475342, train eval: 0.8871] | [val loss: 0.3689617602825165, val eval: 0.8647]
epoch 9 => [train loss: 0.29