In [1]:
import tensorflow as tf
tfe = tf.contrib.eager
tf.enable_eager_execution()


import numpy as np
import matplotlib.pyplot as plt

# 1. 载入数据集

In [2]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data(path='mnist.npz')

print('Training set:')
print('       Data :\t shape:', np.shape(x_train), '\t type:', x_train.dtype)
print('       Label:\t shape:', np.shape(y_train), '\t\t type:', y_train.dtype)
print('Testing set :')
print('       Data :\t shape:', np.shape(x_test), '\t type:', x_test.dtype)
print('       Label:\t shape:', np.shape(y_test), '\t\t type:', y_test.dtype)

Training set:
       Data :	 shape: (60000, 28, 28) 	 type: uint8
       Label:	 shape: (60000,) 		 type: uint8
Testing set :
       Data :	 shape: (10000, 28, 28) 	 type: uint8
       Label:	 shape: (10000,) 		 type: uint8


# 2. 数据预处理

In [3]:
# 转化为 float 型并归一化
x_train = x_train.reshape(60000,-1).astype(np.float32)/255
x_test = x_test.reshape(10000,-1).astype(np.float32)/255
print(np.shape(x_train), np.shape(x_test))

# 标签转化为 ont hot 向量
y_train = tf.one_hot(y_train, 10, dtype=tf.float32)
y_test = tf.one_hot(y_test, 10, dtype=tf.float32)
print(y_train, y_test)

(60000, 784) (10000, 784)
tf.Tensor(
[[0. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]], shape=(60000, 10), dtype=float32) tf.Tensor(
[[0. 0. 0. ... 1. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]], shape=(10000, 10), dtype=float32)


# 3.设置超参数

In [4]:
BATCH_SIZE = 100
NUM_BATCH = len(x_train)//BATCH_SIZE
LEARNING_RATE = 0.2

# 4. 构建网络

In [5]:
# 4.1 创建一个简单的神经网络
class Model(object):
    def __init__(self):
        self.W = tfe.Variable(tf.zeros([784, 10]))
        self.b = tfe.Variable(tf.zeros([10]))

    def __call__(self, x):
        return tf.nn.softmax(tf.matmul(x, self.W) + self.b)

# 4.2 Loss Function
def loss(predicted_y, desired_y):
    return tf.reduce_mean(tf.square(predicted_y - desired_y))

# 4.3 Accuracy Evaluator
def accuracy(predicted_y_prob, desired_y):
    correct_prediction = tf.equal(tf.argmax(desired_y,1), tf.argmax(predicted_y_prob,1))#argmax返回一维张量中最大的值所在的位置
    return tf.reduce_mean(tf.cast(correct_prediction,tf.float32))

# 4.4 Optimizer
def train(model, inputs, outputs, learning_rate):
    with tf.GradientTape() as t:
        current_loss = loss(model(inputs), outputs)
    dW, db = t.gradient(current_loss, [model.W, model.b])
    model.W.assign_sub(learning_rate * dW)
    model.b.assign_sub(learning_rate * db)
    
# 4.5 使用TensoFlow创建可用于训练的数据集
TrainDataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
BatchedDataset = TrainDataset.batch(BATCH_SIZE)

# 5. 训练、评估网络

In [7]:
model = Model()

epochs = range(20)
for epoch in epochs:
    BatchedDataset = BatchedDataset.shuffle(buffer_size=100)
    iterator = BatchedDataset.make_one_shot_iterator()
    for batch in range(NUM_BATCH):
        xs, ys = iterator.get_next()
        Ws.append(model.W.numpy())
        bs.append(model.b.numpy())
        
        train(model, xs, ys, LEARNING_RATE)
    yhat_prob = model(x_test)
    acc = accuracy(yhat_prob, y_test)
    print("Iter %2d : Testing Accuracy %.4f"%(epoch+1, acc.numpy()))

Iter  1 : Testing Accuracy 0.8391
Iter  2 : Testing Accuracy 0.8737
Iter  3 : Testing Accuracy 0.8821
Iter  4 : Testing Accuracy 0.8899
Iter  5 : Testing Accuracy 0.8952
Iter  6 : Testing Accuracy 0.8980
Iter  7 : Testing Accuracy 0.9010
Iter  8 : Testing Accuracy 0.9032
Iter  9 : Testing Accuracy 0.9048
Iter 10 : Testing Accuracy 0.9053
Iter 11 : Testing Accuracy 0.9068
Iter 12 : Testing Accuracy 0.9078
Iter 13 : Testing Accuracy 0.9090
Iter 14 : Testing Accuracy 0.9098
Iter 15 : Testing Accuracy 0.9110
Iter 16 : Testing Accuracy 0.9118
Iter 17 : Testing Accuracy 0.9122
Iter 18 : Testing Accuracy 0.9127
Iter 19 : Testing Accuracy 0.9133
Iter 20 : Testing Accuracy 0.9153
