In [26]:
import tensorflow as tf

from d2l.tensorflow import config, data, initializers, activations, losses, metrics, optimizers, plot

config.setup()

Tensorflow running on CPU


In [27]:
batch_size = 256

train_iter, test_iter = data.load_tfds_dataset('fashion_mnist', batch_size)

In [28]:
num_epochs, learning_rate = 10, 0.5
num_inputs, num_outputs, num_hiddens = 784, 10, 256

loss_function = losses.softmax_cross_entropy
eval_metric = metrics.accuracy
optimizer = optimizers.sgd

# Custom Layer

In [29]:
class BaseLayer:
    _identifier = -1
    __type__ = 'base'
    def __init__(self):
        pass

In [46]:
class Dense(BaseLayer):
    def __init__(
        self, n_inputs, n_outputs, activation='relu',
        initialization='gaussian', scale=None, sigma=None
    ):
        Dense.__type__ = 'compute'
        Dense._identifier += 1
        self.__name__ = '{}_{}'.format(Dense.__name__, Dense._identifier).lower()

        self.weights, self.bias = initializers.initialize_parameters(
            n_inputs, n_outputs, initialization, scale, sigma
        )
        self.n_inputs = n_inputs
        self.n_outputs = n_outputs
        self.activation = activations.get_activation(activation)

    def __repr__(self):
        return str({
            'name': self.__name__,
            'type': self.__type__,
            'n_inputs': self.n_inputs,
            'n_outputs': self.n_outputs,
            'activation': self.activation.__name__,
            'weights': self.weights.numpy(),
            'bias': self.bias.numpy()
        })

## Custom Model

In [47]:
class BaseModel:
    _identifier = -1

    def __init__(self):
        self.loss_function = None
        self.eval_metric = None
        self.eval_function = None
        self.optimizer = None
        self.compiled = False

In [48]:
class Sequential(BaseModel):
    def __init__(self):
        super(Sequential, self).__init__()
        Sequential._identifier += 1
        self.__name__ = '{}_{}'.format(
            Sequential.__name__, Sequential._identifier
        ).lower()
        self.layers = list()

    def add(self, layer):
        self.layers.append(layer)

    def compile(self, loss_function, eval_metric, optimizer):
        self.loss_function = loss_function
        self.eval_metric = eval_metric
        self.optimizer = optimizer
        self.compiled = True

    def net(self, X, inference=False):
        X = tf.reshape(X, (-1, self.layers[0].n_inputs))

        for layer in self.layers[:-1]:
            X = layer.activation(tf.matmul(X, layer.weights) + layer.bias)

        return tf.matmul(X, self.layers[-1].weights) + self.layers[-1].bias

    def fit(self, epochs, train_iter, val_iter, learning_rate, batch_size, animate=False):
        animator = None
        if animate:
            animator = plot.Animator(
                xlabel='epoch', xlim=[1, epochs], ylim=[0, 1],
                legend=['train loss', 'train eval', 'val loss', 'val eval'],
                title='Training loss and eval'
            )

        for epoch in range(epochs):

            metric_train = metrics.Accumulator(3)
            W = [layer.weights for layer in self.layers]
            b = [layer.bias for layer in self.layers]

            for X, y in train_iter:
                with tf.GradientTape() as t:
                    y_hat = self.net(X)
                    loss = self.loss_function(y, y_hat)
                dW, db = t.gradient(loss, [W, b])
                self.optimizer(W, b, dW, db, learning_rate, batch_size)
                metric_train.add(
                    tf.reduce_sum(loss), self.eval_metric(y, y_hat), y.shape[0]
                )

            train_metrics = (
                metric_train[0] / metric_train[2],
                metric_train[1] / metric_train[2]
            )
            val_metrics = self.predict(val_iter)
            if animator:
                animator.add(epoch + 1, train_metrics + val_metrics)
            else:
                print(
                    'epoch {0} => '
                    '[train loss: {1[0]}, train eval: {1[1]}]'
                    ' | '
                    '[val loss: {2[0]}, val eval: {2[1]}]'.format(
                        epoch + 1, train_metrics, val_metrics
                    )
                )

    def predict(self, test_iter):
        metric_test = metrics.Accumulator(3)

        for X, y in test_iter:
            y_hat = self.net(X, inference=True)
            loss = self.loss_function(y, y_hat)
            metric_test.add(tf.reduce_sum(loss), self.eval_metric(y, y_hat), y.shape[0])

        return metric_test[0] / metric_test[2], metric_test[1] / metric_test[2]

    def __repr__(self):
        if self.compiled:
            return str({
                'name': self.__name__,
                'layers': self.layers,
                'loss function': self.loss_function.__name__,
                'eval metric': self.eval_metric.__name__,
                'optimizer': self.optimizer.__name__
            })
        return str({
            'name': self.__name__,
            'layers': self.layers,
        })

## Testing implementation

In [49]:
d0 = Dense(num_inputs, num_hiddens)
d1 = Dense(num_hiddens, num_outputs)

In [50]:
d0

{'name': 'dense_0', 'type': 'compute', 'n_inputs': 784, 'n_outputs': 256, 'activation': 'relu', 'weights': array([[ 0.10286564, -0.20242675, -0.13621886, ...,  0.0080618 ,
         0.04048036,  0.02602621],
       [-0.16428794,  0.09207241,  0.0825626 , ..., -0.03658118,
        -0.0450652 ,  0.12473804],
       [-0.1000337 ,  0.05609204,  0.05150723, ..., -0.14192116,
         0.14169066, -0.20065205],
       ...,
       [-0.05866313, -0.0219347 , -0.02409595, ...,  0.14254047,
         0.12858361, -0.00100724],
       [ 0.10365319,  0.01138475,  0.03546103, ..., -0.10747639,
         0.01589487, -0.08870988],
       [ 0.11321858, -0.21262196,  0.22235206, ..., -0.02750733,
        -0.1954495 , -0.06486427]], dtype=float32), 'bias': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 

In [51]:
model = Sequential()

In [52]:
model.add(d0)
model.add(d1)

In [53]:
model

{'name': 'sequential_0', 'layers': [{'name': 'dense_0', 'type': 'compute', 'n_inputs': 784, 'n_outputs': 256, 'activation': 'relu', 'weights': array([[ 0.10286564, -0.20242675, -0.13621886, ...,  0.0080618 ,
         0.04048036,  0.02602621],
       [-0.16428794,  0.09207241,  0.0825626 , ..., -0.03658118,
        -0.0450652 ,  0.12473804],
       [-0.1000337 ,  0.05609204,  0.05150723, ..., -0.14192116,
         0.14169066, -0.20065205],
       ...,
       [-0.05866313, -0.0219347 , -0.02409595, ...,  0.14254047,
         0.12858361, -0.00100724],
       [ 0.10365319,  0.01138475,  0.03546103, ..., -0.10747639,
         0.01589487, -0.08870988],
       [ 0.11321858, -0.21262196,  0.22235206, ..., -0.02750733,
        -0.1954495 , -0.06486427]], dtype=float32), 'bias': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,

In [54]:
model.compile(loss_function, eval_metric, optimizer)

In [55]:
model

{'name': 'sequential_0', 'layers': [{'name': 'dense_0', 'type': 'compute', 'n_inputs': 784, 'n_outputs': 256, 'activation': 'relu', 'weights': array([[ 0.10286564, -0.20242675, -0.13621886, ...,  0.0080618 ,
         0.04048036,  0.02602621],
       [-0.16428794,  0.09207241,  0.0825626 , ..., -0.03658118,
        -0.0450652 ,  0.12473804],
       [-0.1000337 ,  0.05609204,  0.05150723, ..., -0.14192116,
         0.14169066, -0.20065205],
       ...,
       [-0.05866313, -0.0219347 , -0.02409595, ...,  0.14254047,
         0.12858361, -0.00100724],
       [ 0.10365319,  0.01138475,  0.03546103, ..., -0.10747639,
         0.01589487, -0.08870988],
       [ 0.11321858, -0.21262196,  0.22235206, ..., -0.02750733,
        -0.1954495 , -0.06486427]], dtype=float32), 'bias': array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,

In [56]:
model.fit(num_epochs, train_iter, test_iter, learning_rate, batch_size)

epoch 1 => [train loss: 0.7876606113433838, train eval: 0.7415] | [val loss: 0.512567887878418, val eval: 0.8114]
epoch 2 => [train loss: 0.4659649243036906, train eval: 0.82985] | [val loss: 0.42746301798820496, val eval: 0.8471]
epoch 3 => [train loss: 0.40268772214253745, train eval: 0.8537833333333333] | [val loss: 0.41856889944076536, val eval: 0.8507]
epoch 4 => [train loss: 0.375395339457194, train eval: 0.86365] | [val loss: 0.40838507976531985, val eval: 0.8537]
epoch 5 => [train loss: 0.35098922929763793, train eval: 0.8714166666666666] | [val loss: 0.3852728305339813, val eval: 0.8615]
epoch 6 => [train loss: 0.33924239934285483, train eval: 0.8741] | [val loss: 0.37799762868881226, val eval: 0.8641]
epoch 7 => [train loss: 0.3237171047846476, train eval: 0.8806833333333334] | [val loss: 0.3633623619914055, val eval: 0.8712]
epoch 8 => [train loss: 0.3117334505081177, train eval: 0.8859166666666667] | [val loss: 0.36610000853538516, val eval: 0.864]
epoch 9 => [train loss: 0