In [4]:
#!pip install d2l
#!pip install matplotlib
#!pip install matplotlib_inline


In [5]:
%matplotlib inline
import tensorflow as tf
from d2l import tensorflow as d2l

d2l.use_svg_display()

In [6]:
mnist_train, mnist_test = tf.keras.datasets.fashion_mnist.load_data()

In [7]:
def get_fashion_mnist_labels(labels):
    """返回Fashion-MNIST数据集的文本标签"""
    text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
                   'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
    return [text_labels[int(i)] for i in labels]

In [8]:
def load_data_fashion_mnist(batch_size, resize=None):
    """下载Fashion-MNIST数据集，然后将其加载到内存中"""
    mnist_train, mnist_test = tf.keras.datasets.fashion_mnist.load_data()
    # 将所有数字除以255，使所有像素值介于0和1之间，在最后添加一个批处理维度，
    # 并将标签转换为int32。
    process = lambda X, y: (tf.expand_dims(X, axis=3) / 255,
                            tf.cast(y, dtype='int32'))
    resize_fn = lambda X, y: (
        tf.image.resize_with_pad(X, resize, resize) if resize else X, y)
    return (
        tf.data.Dataset.from_tensor_slices(process(*mnist_train)).batch(
            batch_size).shuffle(len(mnist_train[0])).map(resize_fn),
        tf.data.Dataset.from_tensor_slices(process(*mnist_test)).batch(
            batch_size).map(resize_fn))

In [9]:
n_features = 784
n_labels = 10

In [10]:
train_iter, test_iter = load_data_fashion_mnist(256)
for X, y in train_iter:
    X = tf.reshape(X, (-1, n_features))
    print(X.shape, X.dtype, y.shape, y.dtype)
    X_exp = tf.exp(X)
    partition = tf.reduce_sum(X_exp, 1, keepdims=True)
    print(X_exp.shape, partition.shape)
    break

(256, 784) <dtype: 'float32'> (256,) <dtype: 'int32'>
(256, 784) (256, 1)


In [11]:
def softmax(X):
    X_exp = tf.exp(X)
    partition = tf.reduce_sum(X_exp, 1, keepdims=True)
    return X_exp / partition

In [12]:
class Model(object):
    def __init__(self, n_features, hidden_outputs, n_outputs):
        self.w1 = tf.Variable(tf.random.normal(shape=[n_features, hidden_outputs], mean=0, stddev=0.01), trainable=True)
        self.b1 = tf.Variable(tf.random.normal(shape=[hidden_outputs], mean=0, stddev=0.01), trainable=True)
        self.w2 = tf.Variable(tf.random.normal(shape=[hidden_outputs, n_outputs], mean=0, stddev=0.01), trainable=True)
        self.b2 = tf.Variable(tf.random.normal(shape=[n_outputs], mean=0, stddev=0.01), trainable=True)
        self.trainable_variables = [self.w1, self.b1, self.w2, self.b2]
    
    def forward(self, X):
        hidden = tf.nn.relu(tf.matmul(X, self.w1) + self.b1)
        output = softmax(tf.matmul(hidden, self.w2) + self.b2)
        return output

In [13]:
def loss(y_pred, y, n_labels):
    return -tf.math.log(tf.boolean_mask(y_pred, tf.one_hot(y, depth=n_labels)))

In [14]:
def sgd(params, grads, lr, batch_size):
    for param, grad in zip(params, grads):
        param.assign_sub(lr*grad/batch_size)

In [15]:
def accuracy(data_iter, model):
    accumulator = Accumulator(2)
    for X, y in data_iter:
        X = tf.reshape(X, (-1, n_features))
        logits = model.forward(X)
        y_pred = tf.argmax(logits, axis=1, output_type=tf.dtypes.int32)
        m = tf.reduce_sum(tf.cast(y_pred == y, dtype=tf.dtypes.int32))
        n = y.shape[0]
        accumulator.add(m, n)
    return accumulator[0] * 1.0 / accumulator[1]

In [16]:
class Accumulator:
    """在n个变量上累加"""
    def __init__(self, n):
        self.data = [0.0] * n

    def add(self, *args):
        self.data = [a + float(b) for a, b in zip(self.data, args)]

    def reset(self):
        self.data = [0.0] * len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

In [17]:
lr = 0.01
num_epochs = 200
loss = loss
batch_size = 256

In [18]:
class ModelV2(object):
    def __init__(self, n_features, hidden_outputs_1, hidden_outputs_2, n_outputs):
        self.w1 = tf.Variable(tf.random.normal(shape=[n_features, hidden_outputs_1], mean=0, stddev=0.01), trainable=True)
        self.b1 = tf.Variable(tf.random.normal(shape=[hidden_outputs_1], mean=0, stddev=0.01), trainable=True)
        self.w2 = tf.Variable(tf.random.normal(shape=[hidden_outputs_1, hidden_outputs_2], mean=0, stddev=0.01), trainable=True)
        self.b2 = tf.Variable(tf.random.normal(shape=[hidden_outputs_2], mean=0, stddev=0.01), trainable=True)
        self.w3 = tf.Variable(tf.random.normal(shape=[hidden_outputs_2, n_outputs], mean=0, stddev=0.01), trainable=True)
        self.b3 = tf.Variable(tf.random.normal(shape=[n_outputs], mean=0, stddev=0.01), trainable=True)

        self.trainable_variables = [self.w1, self.b1, self.w2, self.b2, self.w3, self.b3]
    
    def forward(self, X):
        hidden_1 = tf.nn.relu(tf.matmul(X, self.w1) + self.b1)
        hidden_2 = tf.nn.relu(tf.matmul(hidden_1, self.w2) + self.b2)
        output = softmax(tf.matmul(hidden_2, self.w3) + self.b3)
        return output

In [None]:
model = ModelV2(n_features, 50, 30, n_labels)
for epoch in range(num_epochs):
    for X, y in train_iter:
        X = tf.reshape(X, (-1, n_features))
        with tf.GradientTape() as g:
            y_pred = model.forward(X)
            l = loss(y_pred, y, n_labels)
        params = model.trainable_variables
        grads = g.gradient(l, params)
        sgd(params, grads, lr, batch_size)
    print(accuracy(test_iter, model))
    

In [20]:
def sigmoid(x):
    return 1.0 / (1.0 + tf.exp(-x))

In [21]:
class ModelV3(object):
    def __init__(self):
        self.w1 = tf.transpose(tf.Variable([[1, 0], [-1, 0], [0, 1]], dtype=tf.dtypes.float32))
        self.b1 = 0
        self.w2 = tf.Variable([[1], [1], [-1]], dtype=tf.dtypes.float32)
        self.b2 = 0

    def forward(self, X):
        hidden = tf.nn.relu(tf.matmul(X, self.w1) + self.b1)
        output = sigmoid(tf.matmul(hidden, self.w2) + self.b2)
        return output

    def predict(self, X):
        output = self.forward(X)
        return tf.reshape(tf.cast(tf.math.greater(output, 0.5), dtype=tf.dtypes.int32), (len(output)))

In [22]:
X = tf.constant([[-2, 1], [2, 1], [0, 0.5], [0, 2]], dtype=tf.dtypes.float32)
y = tf.constant([1, 1, 0, 0], dtype=tf.dtypes.int32)

In [23]:
model = ModelV3()
print(model.predict(X))

tf.Tensor([1 1 0 0], shape=(4,), dtype=int32)


In [24]:
def accuracy_v2(X, y, model):
    accumulator = Accumulator(2)
    logits = model.forward(X)
    y_pred = tf.argmax(logits, axis=1, output_type=tf.dtypes.int32)
    print(y_pred)
    m = tf.reduce_sum(tf.cast(y_pred == y, dtype=tf.dtypes.int32))
    n = y.shape[0]
    accumulator.add(m, n)
    return accumulator[0] * 1.0 / accumulator[1]

In [25]:
lr = 100
model = ModelV2(2, 10, 5, 2)
for epoch in range(100):
    with tf.GradientTape() as g:
        y_pred = model.forward(X)
        l = loss(y_pred, y, 2)
    params = model.trainable_variables
    grads = g.gradient(l, params)
    sgd(params, grads, lr, batch_size)
    print(tf.reduce_sum(l))
    print(accuracy_v2(X, y, model))

tf.Tensor(2.7726192, shape=(), dtype=float32)
tf.Tensor([0 0 0 0], shape=(4,), dtype=int32)
0.5
tf.Tensor(2.772591, shape=(), dtype=float32)
tf.Tensor([0 0 0 0], shape=(4,), dtype=int32)
0.5
tf.Tensor(2.7725897, shape=(), dtype=float32)
tf.Tensor([0 0 0 0], shape=(4,), dtype=int32)
0.5
tf.Tensor(2.7725897, shape=(), dtype=float32)
tf.Tensor([0 0 0 0], shape=(4,), dtype=int32)
0.5
tf.Tensor(2.7725897, shape=(), dtype=float32)
tf.Tensor([0 0 0 0], shape=(4,), dtype=int32)
0.5
tf.Tensor(2.7725894, shape=(), dtype=float32)
tf.Tensor([0 0 0 0], shape=(4,), dtype=int32)
0.5
tf.Tensor(2.7725894, shape=(), dtype=float32)
tf.Tensor([0 0 1 1], shape=(4,), dtype=int32)
0.0
tf.Tensor(2.7725892, shape=(), dtype=float32)
tf.Tensor([1 0 1 1], shape=(4,), dtype=int32)
0.25
tf.Tensor(2.7725894, shape=(), dtype=float32)
tf.Tensor([1 0 1 1], shape=(4,), dtype=int32)
0.25
tf.Tensor(2.7725892, shape=(), dtype=float32)
tf.Tensor([1 0 1 1], shape=(4,), dtype=int32)
0.25
tf.Tensor(2.7725892, shape=(), dtype=f