In [1]:
# 導入函式庫
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.layers import GRU, Activation, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical

# 載入 MNIST 資料庫的訓練資料，並自動分為『訓練組』及『測試組』
(X_train, y_train), (X_test, y_test_org) = mnist.load_data()

# 將 training 的 input 資料轉為3維，並 normalize 把顏色控制在 0 ~ 1 之間
X_train = X_train.reshape(-1, 28, 28) / 255.0
X_test = X_test.reshape(-1, 28, 28) / 255.0

2024-10-26 15:42:35.384325: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-10-26 15:42:35.528656: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1729928555.613756  103237 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1729928555.634735  103237 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-10-26 15:42:35.792469: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [2]:
# 建立簡單的線性執行的模型
model = Sequential()
# 加 RNN 隱藏層(hidden layer)
# 必須是 3 dimension
model.add(GRU(units=256, input_shape=(28, 28)))

# 加 output 層
model.add(Dense(units=10, activation="softmax"))

# 編譯: 選擇損失函數、優化方法及成效衡量方式
LR = 0.001  # Learning Rate
adam = Adam(LR)
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

model.summary()

I0000 00:00:1729928577.467459  103237 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5563 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4060 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9
  super().__init__(**kwargs)


In [3]:
# 將 training 的 label 進行 one-hot encoding，例如數字 7 經過 One-hot encoding 轉換後是 0000001000，即第7個值為 1
y_TrainOneHot = to_categorical(y_train)
y_TestOneHot = to_categorical(y_test_org)

# 將 training 的 input 資料轉為2維
X_train_2D = X_train.reshape(60000, 28, 28)
X_test_2D = X_test.reshape(10000, 28, 28)

x_Train_norm = X_train_2D / 255
x_Test_norm = X_test_2D / 255

In [4]:
# 進行訓練, 訓練過程會存在 train_history 變數中
train_history = model.fit(
    x=x_Train_norm,
    y=y_TrainOneHot,
    validation_split=0.2,
    epochs=10,
    batch_size=800,
    verbose=2,
)

Epoch 1/10


I0000 00:00:1729928585.867805  103548 cuda_dnn.cc:529] Loaded cuDNN version 90300


60/60 - 3s - 43ms/step - accuracy: 0.1116 - loss: 2.3001 - val_accuracy: 0.1061 - val_loss: 2.3134
Epoch 2/10
60/60 - 1s - 15ms/step - accuracy: 0.1766 - loss: 2.2842 - val_accuracy: 0.1338 - val_loss: 2.2655
Epoch 3/10
60/60 - 1s - 15ms/step - accuracy: 0.1428 - loss: 2.2820 - val_accuracy: 0.1072 - val_loss: 2.2904
Epoch 4/10
60/60 - 1s - 14ms/step - accuracy: 0.1225 - loss: 2.2834 - val_accuracy: 0.1420 - val_loss: 2.2683
Epoch 5/10
60/60 - 1s - 15ms/step - accuracy: 0.3072 - loss: 1.9287 - val_accuracy: 0.3948 - val_loss: 1.5159
Epoch 6/10
60/60 - 1s - 15ms/step - accuracy: 0.4426 - loss: 1.5132 - val_accuracy: 0.4872 - val_loss: 1.4116
Epoch 7/10
60/60 - 1s - 15ms/step - accuracy: 0.4816 - loss: 1.4305 - val_accuracy: 0.5043 - val_loss: 1.3578
Epoch 8/10
60/60 - 1s - 15ms/step - accuracy: 0.4987 - loss: 1.3953 - val_accuracy: 0.5221 - val_loss: 1.3228
Epoch 9/10
60/60 - 1s - 15ms/step - accuracy: 0.5201 - loss: 1.3415 - val_accuracy: 0.5378 - val_loss: 1.2796
Epoch 10/10
60/60 - 1

In [5]:
# 顯示訓練成果(分數)
loss, accuracy = model.evaluate(x_Test_norm, y_TestOneHot)
print(f"test loss: {loss}  test accuracy: {accuracy}")

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - accuracy: 0.5724 - loss: 1.2261
test loss: 1.192380428314209  test accuracy: 0.593500018119812


In [6]:
# 預測(prediction)
X = x_Test_norm[0:20, :]
predictions = np.argmax(model.predict(x_Test_norm[0:20]), axis=-1)
# get prediction result
print("actual :", y_test_org[0:20])
print("predict:", predictions)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 85ms/step
actual : [7 2 1 0 4 1 4 9 5 9 0 6 9 0 1 5 9 7 3 4]
predict: [7 2 1 0 4 1 9 9 2 9 2 6 9 0 1 3 9 7 0 4]
