In [2]:
import tensorflow as tf
import numpy as np


## 4.1.1 Build model from block ##

In [4]:
# 下面定义的MLP类重载了tf.keras.Model类的__init__函数和call函数
# 分别用于创建模型参数和定义向前计算/正向传播
class MLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        # Flatten层将除第一维(batch_size)以外的维度展平
        self.flatten = tf.keras.layers.Flatten()
        self.dense1 = tf.keras.layers.Dense(units=256, activation=tf.nn.relu)
        self.dense2 = tf.keras.layers.Dense(units=10)

    def call(self, inputs):
        x = self.flatten(inputs)
        x = self.dense1(x)
        output = self.dense2(x)
        return output

In [5]:
# 实例化MLP
X = tf.random.uniform((2, 20))
net = MLP()
net(X)

<tf.Tensor: id=69, shape=(2, 10), dtype=float32, numpy=
array([[ 0.00266737,  0.16411328, -0.19289514,  0.40678027, -0.14189981,
        -0.3080926 , -0.07093648, -0.2706267 , -0.04944729,  0.20102516],
       [-0.07924227, -0.07542968, -0.0474828 ,  0.21872513, -0.20207694,
        -0.09206494,  0.03061593, -0.23421213, -0.07539637,  0.04171912]],
      dtype=float32)>

## 4.1.2 Sequential ##

In [6]:
# 用sequential类来实现前面的MLP类
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation=tf.nn.relu),
    tf.keras.layers.Dense(10)
])

In [8]:
model(X)

<tf.Tensor: id=124, shape=(2, 10), dtype=float32, numpy=
array([[ 0.26132724, -0.25971016,  0.3684109 , -0.02836422, -0.25696072,
         0.19867346,  0.1593259 ,  0.0826593 ,  0.06780139,  0.18635643],
       [ 0.00304466, -0.1493062 ,  0.45815566, -0.00680811, -0.19535393,
         0.20603928,  0.0759685 , -0.18853089, -0.09576741,  0.07905925]],
      dtype=float32)>

## 4.1.3 build complex model ##

In [10]:
# FancyMLP稍微更复杂一些
# 以下网络通过constant函数创建训练中的一个不被迭代的函数，viz.常数参数
# 在向前计算中，除使用创建的常数参数外，还使用tensor的函数和pyton的控制流
# ，并多次调用相同的层
class FancyMLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.flatten = tf.keras.layers.Flatten()
        self.rand_weight = tf.constant(
            tf.random.uniform((20, 20)))
        self.dense = tf.keras.layers.Dense(units=20, activation=tf.nn.relu)

    def call(self, inputs):
        x = self.flatten(inputs)
        x = tf.nn.relu(tf.matmul(x, self.rand_weight) + 1)
        X = self.dense(x)
        while tf.norm(x) > 1:
            x /= 2
        if tf.norm(x) < 0.8:
            x *= 10
        return tf.reduce_sum(x)

# rand_weight是常数权重、用了矩阵乘法操作(tf.matmul)并重复使用了
# 相同的Dense层
net = FancyMLP()
net(X)

<tf.Tensor: id=243, shape=(), dtype=float32, numpy=34.463>

In [15]:
class NestMLP(tf.keras.Model):
    def __init__(self):
        super().__init__()
        self.net = tf.keras.Sequential()
        self.net.add(tf.keras.layers.Flatten())
        self.net.add(tf.keras.layers.Dense(64, activation=tf.nn.relu))
        self.net.add(tf.keras.layers.Dense(32, activation=tf.nn.relu))
        self.dense = tf.keras.layers.Dense(units=16, activation=tf.nn.relu)

    def call(self, inputs, training=None, mask=None):
        return self.dense(self.net(inputs))

net = tf.keras.Sequential()
net.add(NestMLP())
net.add(tf.keras.layers.Dense(20))
net.add(FancyMLP())

net(X)

<tf.Tensor: id=458, shape=(), dtype=float32, numpy=41.931>