In [1]:
# 防止过拟合 
# 1. 最最关键也是最核心的是 提供更多的数据
# 2. 减少模型复杂度 正则化
# 3. Drop-out
# 4. early Stopping

In [2]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

from IPython.core.interactiveshell import InteractiveShell
# import matplotlib.pyplot as plt
# import seaborn as sns

In [3]:
# 配置项
# # 这个要放到设置中文之前否则还是小方框
# plt.style.use("seaborn")

# # 指定默认字体 用来正常显示中文标签
# plt.rcParams['font.sans-serif'] = ['SimHei']
# # 解决保存图像是负号'-'显示为方块的问题
# plt.rcParams['axes.unicode_minus'] = False

# #全部行都能输出
InteractiveShell.ast_node_interactivity = "all"

In [26]:
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

In [27]:
def preprocess_data(x, y):
    x = tf.cast(x, dtype=tf.float32) / 255
    x = tf.reshape(x, [28 * 28])
    y = tf.cast(y, dtype=tf.int32)
    y = tf.one_hot(y, depth=10)

    return x, y

In [28]:
x_train.shape

(60000, 28, 28)

In [29]:
# 划分训练集 验证集 测试集
db_train = tf.data.Dataset.from_tensor_slices((x_train[:50000], y_train[:50000]))
db_val = tf.data.Dataset.from_tensor_slices((x_train[50000:], y_train[50000:]))
db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))

batch_size = 128
db_train = db_train.map(preprocess_data).shuffle(10000).batch(batch_size)
db_val = db_val.map(preprocess_data).shuffle(10000).batch(batch_size)
db_test = db_test.map(preprocess_data).batch(batch_size)

In [40]:
# 划分训练集,验证集,和测试集 另外一种方法参考
# idx = tf.range(60000)
# idx = tf.random.shuffle(idx)
# x_train, y_train = tf.gather(x, idx[:50000]), tf.gather(y, idx[:50000])
# x_val, y_val = tf.gather(x, idx[-10000:]) , tf.gather(y, idx[-10000:])
# print(x_train.shape, y_train.shape, x_val.shape, y_val.shape)
# db_train = tf.data.Dataset.from_tensor_slices((x_train,y_train))
# db_train = db_train.map(preprocess).shuffle(50000).batch(batchsz)

# db_val = tf.data.Dataset.from_tensor_slices((x_val,y_val))
# db_val = db_val.map(preprocess).shuffle(10000).batch(batchsz)



# db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
# db_test = db_test.map(preprocess).batch(batchsz) 

In [30]:
sample = next(iter(db_train))
sample[0].shape, sample[1].shape

(TensorShape([128, 784]), TensorShape([128, 10]))

In [31]:
model = keras.Sequential([
    keras.layers.Dense(256, activation='relu'),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(10),
])

model.build(input_shape=[None, 28 * 28])
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_10 (Dense)             multiple                  200960    
_________________________________________________________________
dense_11 (Dense)             multiple                  32896     
_________________________________________________________________
dense_12 (Dense)             multiple                  8256      
_________________________________________________________________
dense_13 (Dense)             multiple                  2080      
_________________________________________________________________
dense_14 (Dense)             multiple                  330       
Total params: 244,522
Trainable params: 244,522
Non-trainable params: 0
_________________________________________________________________


In [32]:
model.compile(optimizer=keras.optimizers.Adam(0.01), 
              loss=tf.losses.CategoricalCrossentropy(from_logits=True), 
              metrics=['accuracy']
             )
model.fit(db_train, epochs=10, validation_data=db_val, validation_freq=2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0xb2999cda0>

In [33]:
model.evaluate(db_test)



[0.14446077743232769, 0.9703]

In [39]:
sample = next(iter(db_test))
x = sample[0]
y = sample[1] # one-hot
pred = model.predict(x) # [b, 10]
# convert back to number 
y = tf.argmax(y, axis=1)
pred = tf.argmax(pred, axis=1)
error = pred != y

print(pred)
print(y)
print('预测错误个数:', tf.reduce_sum(tf.cast(error, tf.int32)))

tf.Tensor(
[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4 9 6 6 5 4 0 7 4 0 1 3 1 3 6 7 2 7
 1 2 1 1 7 4 2 3 5 1 2 4 4 6 3 5 5 6 0 4 1 9 5 7 8 9 3 7 4 6 4 3 0 7 0 2 9
 1 7 3 2 9 7 7 6 2 7 8 4 7 3 6 1 3 6 9 3 1 4 1 7 6 9 6 0 5 4 5 9 2 1 9 4 8
 7 3 9 7 9 4 4 9 2 5 6 7 6 7 9 0 5], shape=(128,), dtype=int64)
tf.Tensor(
[7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4 9 6 6 5 4 0 7 4 0 1 3 1 3 4 7 2 7
 1 2 1 1 7 4 2 3 5 1 2 4 4 6 3 5 5 6 0 4 1 9 5 7 8 9 3 7 4 6 4 3 0 7 0 2 9
 1 7 3 2 9 7 7 6 2 7 8 4 7 3 6 1 3 6 9 3 1 4 1 7 6 9 6 0 5 4 9 9 2 1 9 4 8
 7 3 9 7 4 4 4 9 2 5 4 7 6 7 9 0 5], shape=(128,), dtype=int64)
预测错误个数: tf.Tensor(1, shape=(), dtype=int32)


In [35]:
y.shape

TensorShape([128])