# tensorflow.keras

## MLP

<https://www.tensorflow.org/tutorials/quickstart/beginner>

用tensorflow.keras实现一个简单的多层感知机（MLP）multi-layer perception模型。

In [2]:
import tensorflow as tf

tf.__version__

'2.17.0'

In [3]:
# Load dataset of mnist
mnist = tf.keras.datasets.mnist

In [4]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

In [5]:
# preprocess data
print(type(x_train), x_train.shape)
x_train, x_test = x_train/255.0, x_test/255.0

<class 'numpy.ndarray'> (60000, 28, 28)


In [82]:
# build a ML model
tf.random.set_seed(0)
model = tf.keras.models.Sequential([
    # flatten input
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    # hidden layer, 128 neurons, relu activation function 
    tf.keras.layers.Dense(128, activation='relu'), 
    # dropout layer, 20% of the neurons will be dropped out during training,
    # won't drop out any neuron during prediction 
    tf.keras.layers.Dropout(0.2), 
    # output layer, 10 neurons
    tf.keras.layers.Dense(10, activation='softmax')
])


  super().__init__(**kwargs)


In [15]:
# loss function

# from_logits default is False, if True, calculating the logits of input
# SparseCategoricalCrossentropy is used for 0, 1, 2
# CategoricalCrossentropy is used for [1, 0, 0], [0, 1, 0], [0, 0, 1]
loss_func = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)

In [14]:
y_train

array([5, 0, 4, ..., 5, 6, 8], dtype=uint8)

In [41]:
# test 测试是否成功运行
y_pred_test = model(x_train[:1])
loss_func(y_train[:1], y_pred_test)

<tf.Tensor: shape=(), dtype=float32, numpy=0.00023481472>

In [83]:
# compile 
model.compile(
    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
    optimizer = tf.keras.optimizers.Adam(),
    metrics = ['accuracy'] # Accuracy会自动把one_hot变成parse, output可以是one_hot也可以是parse
)

# train 
model.fit(x_train, y_train, epochs=5) # epoch为训练轮数，一次前向传播和反向传播为一次epoch

Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 520us/step - accuracy: 0.7176 - loss: 8.6768
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 535us/step - accuracy: 0.8400 - loss: 0.6337
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 485us/step - accuracy: 0.8651 - loss: 0.5087
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 509us/step - accuracy: 0.8871 - loss: 0.4229
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 524us/step - accuracy: 0.8920 - loss: 0.4138


<keras.src.callbacks.history.History at 0x7f600873be20>

In [84]:
# prediction
model.evaluate(x_test, y_test, verbose=2)

313/313 - 1s - 2ms/step - accuracy: 0.9273 - loss: 0.3561


[0.3561297059059143, 0.927299976348877]

**accuracy等价于下面的写法**

In [66]:
# prediction
y_pred = model.predict(x_test)

accuracy = tf.keras.metrics.Accuracy()
accuracy.update_state(y_test, tf.argmax(y_pred, axis=1))
print('Accuracy:', accuracy.result().numpy())

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 444us/step
Accuracy: 0.9787


## other method


In [74]:
x_test.shape, y_test.shape

((10000, 28, 28), (10000,))

* 用其他等价的写法做个对比

In [85]:
tf.random.set_seed(0)
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)), # input_shape参数作为一种tag, 也可以省略
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10), # activation参数默认是linear
    tf.keras.layers.Softmax() # activation单独作为一层
])

model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
              metrics=['accuracy']
            )

model.fit(x_train, y_train, epochs=5)
model.evaluate(x_test, y_test, verbose=2)

Epoch 1/5


  super().__init__(**kwargs)


[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 501us/step - accuracy: 0.7032 - loss: 8.2386
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 478us/step - accuracy: 0.8350 - loss: 0.6440
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 476us/step - accuracy: 0.8727 - loss: 0.4896
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 451us/step - accuracy: 0.8862 - loss: 0.4272
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 505us/step - accuracy: 0.8930 - loss: 0.4020
313/313 - 1s - 2ms/step - accuracy: 0.9378 - loss: 0.2956


[0.29557114839553833, 0.9377999901771545]

*********************
* 另一种写法

In [86]:
tf.random.set_seed(0)
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)), # input_shape参数作为一种tag, 也可以省略
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10), # activation参数默认是linear
])

model.compile(optimizer='adam',
              # from_logits=True指定输入的是logits,不是softmax输出
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy']
            )

model.fit(x_train, y_train, epochs=5)
model.evaluate(x_test, y_test, verbose=2)

Epoch 1/5


  super().__init__(**kwargs)


[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 496us/step - accuracy: 0.7218 - loss: 7.7369
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 511us/step - accuracy: 0.8358 - loss: 0.6595
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 491us/step - accuracy: 0.8726 - loss: 0.4862
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 486us/step - accuracy: 0.8870 - loss: 0.4410
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 491us/step - accuracy: 0.9013 - loss: 0.3912
313/313 - 1s - 2ms/step - accuracy: 0.9365 - loss: 0.2745


[0.27448514103889465, 0.9365000128746033]

* more hidden layers

In [8]:
tf.random.set_seed(0)

model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(10),
    tf.keras.layers.Softmax()
])
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)
model.fit(x_train, y_train, epochs=5)
model.evaluate(x_test, y_test, verbose=2)

  super().__init__(**kwargs)


Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 624us/step - accuracy: 0.8343 - loss: 0.5434
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 600us/step - accuracy: 0.9612 - loss: 0.1349
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 601us/step - accuracy: 0.9734 - loss: 0.0910
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 611us/step - accuracy: 0.9796 - loss: 0.0680
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 623us/step - accuracy: 0.9838 - loss: 0.0553
313/313 - 1s - 2ms/step - accuracy: 0.9706 - loss: 0.1055


[0.10552586615085602, 0.9706000089645386]

In [25]:
# F1 score
y_pred = model.predict(x_test)
y_pred = (y_pred > 0.5).astype(int)
f1_score = tf.keras.metrics.F1Score(average='macro')
f1_score.update_state(y_pred, tf.one_hot(y_test, depth=10))
print(f"F1 score: {f1_score.result().numpy()}") 

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 516us/step
F1 score: 0.9713746309280396
