In [1]:
# import numpy as np
# import pandas as pd
import tensorflow as tf
from tensorflow import keras

In [3]:
(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()

In [4]:
x_train.shape, y_train.shape

((60000, 28, 28), (60000,))

In [5]:
def preprocess_data(x, y):
    """
    预处理 
    """
    x = tf.cast(x, dtype=tf.float32) / 255
    y = tf.cast(y, dtype=tf.int32)
    return x, y

In [9]:
db_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
db_train = db_train.map(preprocess_data).shuffle(10000).batch(128)

db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.map(preprocess_data).shuffle(10000).batch(128)

In [12]:
model = keras.Sequential([
    keras.layers.Dense(512, activation=tf.nn.relu),
    keras.layers.Dense(256, activation='relu'),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(10, activation='relu')
])

model.build(input_shape=[None, 28 * 28])

model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_12 (Dense)             multiple                  401920    
_________________________________________________________________
dense_13 (Dense)             multiple                  131328    
_________________________________________________________________
dense_14 (Dense)             multiple                  32896     
_________________________________________________________________
dense_15 (Dense)             multiple                  8256      
_________________________________________________________________
dense_16 (Dense)             multiple                  2080      
_________________________________________________________________
dense_17 (Dense)             multiple                  330       
Total params: 576,810
Trainable params: 576,810
Non-trainable params: 0
________________________________________________

In [32]:
# 优化器
optimizer = keras.optimizers.Adam(1e-3)

epoch = 20
for i in range(epoch):
    for step, (x, y) in enumerate(db_train):
        x = tf.reshape(x, [-1, 28 * 28])
        y = tf.one_hot(y, depth=10)
        y = tf.cast(y, dtype=tf.int32)

        # 求梯度
        with tf.GradientTape() as tap:
            y_pred = model(x)

            # 均方误差
            loss_mse = tf.reduce_mean(keras.losses.mse(y, y_pred))
            # 交叉熵
            loss_ce = tf.reduce_mean(keras.losses.categorical_crossentropy(y, y_pred, from_logits=True))
        # 在loss上对模型可训练的变量求偏导
        grads = tap.gradient(loss_ce, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))
        if step % 100 == 0:
            print(i, step, 'loss:', float(loss_ce), float(loss_mse))
    
    # 测试集
    total_correct, total_num = 0, 0
    for (x, y) in db_test:
        x = tf.reshape(x, [-1, 28 * 28])
        # logit
        y_pred = model(x)
        # 拿到预测值的概率
        y_prob = tf.nn.softmax(y_pred)
        # 需要注意的是argmax出来的类型是dtype=int64 需要把他转换成int32
        y_pred = tf.argmax(y_prob, axis=1)
        y_pred = tf.cast(y_pred, dtype=tf.int32)

        correct = tf.equal(y, y_pred)
        correct = tf.cast(correct, dtype=tf.int32)
        correct = tf.reduce_sum(correct)
        total_correct += correct
        total_num += x.shape[0]
        
    print('【正确率 aucc = %f】' % (total_correct / total_num))
        
            
        

0 0 loss: 0.40906763076782227 82.20747375488281
0 100 loss: 0.2755591869354248 128.34310913085938
0 200 loss: 0.4162246882915497 142.6072998046875
0 300 loss: 0.4017406702041626 77.76034545898438
0 400 loss: 0.5212452411651611 85.77664184570312
【正确率 aucc = 0.804100】
1 0 loss: 0.37136709690093994 91.99339294433594
1 100 loss: 0.2765488624572754 101.18780517578125
1 200 loss: 0.44983184337615967 114.85932159423828
1 300 loss: 0.4647105932235718 91.80796813964844
1 400 loss: 0.5205637216567993 85.68510437011719
【正确率 aucc = 0.806500】
2 0 loss: 0.3703690767288208 93.24105834960938
2 100 loss: 0.2271244078874588 102.40142822265625
2 200 loss: 0.3989446461200714 109.0265121459961
2 300 loss: 0.44546645879745483 70.57353973388672
2 400 loss: 0.5124439001083374 72.99673461914062
【正确率 aucc = 0.802900】
3 0 loss: 0.375171959400177 80.93992614746094
3 100 loss: 0.25963330268859863 98.94503784179688
3 200 loss: 0.3973570764064789 114.83746337890625
3 300 loss: 0.4216497838497162 84.42012023925781
3 

In [33]:
total_correct, total_num = 0, 0
for (x, y) in db_test:
    x = tf.reshape(x, [-1, 28 * 28])
    # logit
    y_pred = model(x)
    # 拿到预测值的概率
    y_prob = tf.nn.softmax(y_pred)
    # 需要注意的是argmax出来的类型是dtype=int64 需要把他转换成int32
    y_pred = tf.argmax(y_prob, axis=1)
    y_pred = tf.cast(y_pred, dtype=tf.int32)
    print(y_pred)

    correct = tf.equal(y, y_pred)
    correct = tf.cast(correct, dtype=tf.int32)
    correct = tf.reduce_sum(correct)
    total_correct += correct
    total_num += x.shape[0]

tf.Tensor(
[9 9 7 1 6 4 0 6 9 0 2 6 8 4 0 6 1 8 5 9 0 0 0 8 4 9 9 1 8 2 4 9 7 6 9 7 0
 6 9 8 0 0 1 9 2 0 1 9 2 6 8 2 1 5 4 9 0 2 5 4 1 2 7 0 4 2 2 6 8 0 5 4 0 7
 0 0 1 9 4 0 7 2 6 7 5 4 6 0 6 5 2 4 8 4 0 5 4 8 4 0 6 0 0 0 9 0 1 0 7 8 6
 0 6 8 4 7 0 0 4 1 8 6 5 6 6 0 0 8], shape=(128,), dtype=int32)
tf.Tensor(
[0 0 6 1 9 5 9 9 4 4 8 0 1 0 0 0 7 0 7 6 0 1 7 0 5 8 7 8 0 7 0 5 1 0 8 8 1
 7 0 9 8 7 9 6 8 4 0 7 7 6 0 2 0 1 2 1 5 5 6 6 7 7 6 6 4 2 8 8 1 0 0 6 1 9
 4 8 0 1 0 6 5 6 0 2 6 0 9 4 8 9 0 7 2 2 8 5 5 6 8 6 9 0 1 6 0 5 0 7 7 6 8
 7 0 7 0 0 6 0 7 8 8 2 4 4 5 9 9 1], shape=(128,), dtype=int32)
tf.Tensor(
[9 6 9 5 0 7 1 2 1 1 8 8 8 1 6 0 0 9 2 5 9 4 9 8 1 9 0 9 8 2 2 9 0 0 9 6 0
 5 2 2 7 0 0 0 1 2 6 0 8 6 2 0 4 6 1 8 0 9 9 8 9 0 0 8 2 2 4 7 0 5 7 2 6 8
 7 6 5 8 9 4 0 7 4 4 8 0 1 8 0 0 7 0 0 1 9 5 4 5 5 2 6 9 5 6 4 8 2 8 9 4 6
 0 8 7 0 9 0 5 5 0 5 0 6 6 4 5 5 5], shape=(128,), dtype=int32)
tf.Tensor(
[0 7 7 8 1 4 4 8 9 6 2 0 0 9 0 5 2 5 0 5 6 7 8 0 8 4 0 8 5 2 9 0 7 8 2 2 0
 0 6 1 8 4 4 0