# 循環神經網絡 LSTM (長短期記憶)來學習字母表順序

## 模型 1. 用LSTM學習一個字符到一個字符映射

### STEP1. 匯入 Keras 及相關模組

In [1]:
import numpy
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
import tensorflow.keras.utils as np_utils
from tensorflow.keras.preprocessing.sequence import pad_sequences

# 給定隨機的種子, 以便讓大家跑起來的結果是相同的
numpy.random.seed(7)

2024-10-19 16:23:59.630307: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-10-19 16:23:59.643189: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1729326239.659005   26753 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1729326239.662700   26753 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-10-19 16:23:59.679491: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instr

In [2]:
# 定義序列數據集
alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"

# 創建字符映射到整數（0 - 25)和反相的查詢字典物件
char_to_int = dict((c, i) for i, c in enumerate(alphabet))
int_to_char = dict((i, c) for i, c in enumerate(alphabet))

In [3]:
print("字母對應到數字編號: \n", char_to_int)
print("\n")

print("數字編號對應到字母: \n", int_to_char)

字母對應到數字編號: 
 {'A': 0, 'B': 1, 'C': 2, 'D': 3, 'E': 4, 'F': 5, 'G': 6, 'H': 7, 'I': 8, 'J': 9, 'K': 10, 'L': 11, 'M': 12, 'N': 13, 'O': 14, 'P': 15, 'Q': 16, 'R': 17, 'S': 18, 'T': 19, 'U': 20, 'V': 21, 'W': 22, 'X': 23, 'Y': 24, 'Z': 25}


數字編號對應到字母: 
 {0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E', 5: 'F', 6: 'G', 7: 'H', 8: 'I', 9: 'J', 10: 'K', 11: 'L', 12: 'M', 13: 'N', 14: 'O', 15: 'P', 16: 'Q', 17: 'R', 18: 'S', 19: 'T', 20: 'U', 21: 'V', 22: 'W', 23: 'X', 24: 'Y', 25: 'Z'}


In [4]:
# 準備輸入數據集
seq_length = 1
dataX = []
dataY = []
for i in range(0, len(alphabet) - seq_length, 1):
    seq_in = alphabet[i : i + seq_length]
    seq_out = alphabet[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])  # 輸入:A~Y(25個字)
    dataY.append(char_to_int[seq_out])  # 輸出:B~Z
    print(seq_in, "->", seq_out)

A -> B
B -> C
C -> D
D -> E
E -> F
F -> G
G -> H
H -> I
I -> J
J -> K
K -> L
L -> M
M -> N
N -> O
O -> P
P -> Q
Q -> R
R -> S
S -> T
T -> U
U -> V
V -> W
W -> X
X -> Y
Y -> Z


### 資料預處理
我們需要將NumPy數組重塑為LSTM網絡所期望的格式，也就是: (samples, time_steps, features)。
同時我們將進行資料的歸一化(normalize)來讓資料的值落於0到1之間。並對標籤值進行one-hot的編碼。


> ABCDEFGHIJKLMNOPQRSTUVWXYZ

> 例如: 

> 給 J -> 預測 K

> 給 X -> 預測 Y


目標訓練張量結構: (samples, time_steps, features) -> (n , **1**, **1** )

請特別注意, 這裡的1個字符會變成1個時間步裡頭的1個element的"feature"向量。

In [5]:
# 重塑 X 資料的維度成為 (samples, time_steps, features)
X = numpy.reshape(dataX, (len(dataX), seq_length, 1))  # 25組,1個字,1個特徵

# 歸一化
X = X / float(len(alphabet))

# one-hot 編碼輸出變量
y = np_utils.to_categorical(dataY)

print("X shape: ", X.shape)  # (25筆samples, "1"個時間步長, 1個feature)
print("y shape: ", y.shape)

X shape:  (25, 1, 1)
y shape:  (25, 26)


### STEP5. 建立模型

In [6]:
# 創建模型
model = Sequential()
model.add(LSTM(32, input_shape=(X.shape[1], X.shape[2])))
model.add(Dense(y.shape[1], activation="softmax"))
model.summary()

  super().__init__(**kwargs)


### STEP6. 定義訓練並進行訓練

In [7]:
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.fit(X, y, epochs=500, batch_size=1, verbose=2)

Epoch 1/500
25/25 - 1s - 31ms/step - accuracy: 0.0000e+00 - loss: 3.2661
Epoch 2/500
25/25 - 0s - 1ms/step - accuracy: 0.0000e+00 - loss: 3.2581
Epoch 3/500
25/25 - 0s - 1ms/step - accuracy: 0.0000e+00 - loss: 3.2556
Epoch 4/500
25/25 - 0s - 1ms/step - accuracy: 0.0400 - loss: 3.2527
Epoch 5/500
25/25 - 0s - 1ms/step - accuracy: 0.0400 - loss: 3.2501
Epoch 6/500
25/25 - 0s - 1ms/step - accuracy: 0.0400 - loss: 3.2474
Epoch 7/500
25/25 - 0s - 1ms/step - accuracy: 0.0000e+00 - loss: 3.2447
Epoch 8/500
25/25 - 0s - 1ms/step - accuracy: 0.0400 - loss: 3.2418
Epoch 9/500
25/25 - 0s - 1ms/step - accuracy: 0.0400 - loss: 3.2387
Epoch 10/500
25/25 - 0s - 1ms/step - accuracy: 0.0000e+00 - loss: 3.2354
Epoch 11/500
25/25 - 0s - 1ms/step - accuracy: 0.0000e+00 - loss: 3.2322
Epoch 12/500
25/25 - 0s - 1ms/step - accuracy: 0.0400 - loss: 3.2288
Epoch 13/500
25/25 - 0s - 1ms/step - accuracy: 0.0000e+00 - loss: 3.2249
Epoch 14/500
25/25 - 0s - 1ms/step - accuracy: 0.0400 - loss: 3.2208
Epoch 15/500
2

<keras.src.callbacks.history.History at 0x1463190ad50>

### STEP7. 評估模型準確率

In [8]:
# 評估模型的性能
scores = model.evaluate(X, y, verbose=0)
print("Model Accuracy: %.2f%%" % (scores[1] * 100))

Model Accuracy: 84.00%


### STEP8. 預測結果

In [9]:
# 展示模型預測能力
for pattern in dataX:
    # 把26個字母一個個拿進模型來預測會出現的字母
    x = numpy.reshape(pattern, (1, len(pattern), 1))
    x = x / float(len(alphabet))

    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction)  # 機率最大的idx
    result = int_to_char[index]  # 看看預測出來的是那一個字母
    seq_in = [int_to_char[value] for value in pattern]
    print(seq_in, "->", result)  # 打印結果

['A'] -> B
['B'] -> B
['C'] -> D
['D'] -> E
['E'] -> F
['F'] -> G
['G'] -> H
['H'] -> I
['I'] -> J
['J'] -> K
['K'] -> L
['L'] -> M
['M'] -> N
['N'] -> O
['O'] -> P
['P'] -> Q
['Q'] -> R
['R'] -> S
['S'] -> T
['T'] -> U
['U'] -> W
['V'] -> W
['W'] -> Z
['X'] -> Z
['Y'] -> Z


## 模型 2. LSTM 學習三個字符特徵窗口(Three-Char Feature Window)到一個字符映射


### STEP1. 準備訓練用資料

In [10]:
# 準備輸入數據集
seq_length = 3  # 這次我們要準備3個時間步長的資料
dataX = []
dataY = []
for i in range(0, len(alphabet) - seq_length, 1):
    seq_in = alphabet[i : i + seq_length]  # 3個字符
    seq_out = alphabet[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
    print(seq_in, "->", seq_out)

ABC -> D
BCD -> E
CDE -> F
DEF -> G
EFG -> H
FGH -> I
GHI -> J
HIJ -> K
IJK -> L
JKL -> M
KLM -> N
LMN -> O
MNO -> P
NOP -> Q
OPQ -> R
PQR -> S
QRS -> T
RST -> U
STU -> V
TUV -> W
UVW -> X
VWX -> Y
WXY -> Z


### STEP2. 資料預處理


> ABCDEFGHIJKLMNOPQRSTUVWXYZ

> 例如: 

> 給 HIJ -> 預測 K

> 給 EFG -> 預測 H

目標訓練張量結構: (samples, time_steps, features) -> (n , **1**, **3** )

請特別注意, 這裡的三個字符會變成一個有3個element的"feature" vector。因此在準備訓練資料集的時候, 1筆訓練資料只有"1"個時間步, 裡頭存放著"3"個字符的資料"features"向量。

In [11]:
# 重塑 X 資料的維度成為 (samples, time_steps, features)
X = numpy.reshape(dataX, (len(dataX), 1, seq_length))  # <-- 特別注意這裡

# 歸一化
X = X / float(len(alphabet))

# 使用one hot encode 對Y值進行編碼
y = np_utils.to_categorical(dataY)

print("X shape: ", X.shape)
print("y shape: ", y.shape)

X shape:  (23, 1, 3)
y shape:  (23, 26)


### STEP3. 建立模型

In [12]:
# 創建模型
model = Sequential()
model.add(LSTM(32, input_shape=(X.shape[1], X.shape[2])))  # <-- 特別注意這裡
model.add(Dense(y.shape[1], activation="softmax"))
model.summary()

### STEP4. 定義訓練並進行訓練

In [13]:
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.fit(X, y, epochs=500, batch_size=1, verbose=2)

Epoch 1/500
23/23 - 1s - 40ms/step - accuracy: 0.0000e+00 - loss: 3.2668
Epoch 2/500
23/23 - 0s - 1ms/step - accuracy: 0.0435 - loss: 3.2558
Epoch 3/500
23/23 - 0s - 1ms/step - accuracy: 0.0435 - loss: 3.2508
Epoch 4/500
23/23 - 0s - 1ms/step - accuracy: 0.0435 - loss: 3.2450
Epoch 5/500
23/23 - 0s - 1ms/step - accuracy: 0.0435 - loss: 3.2394
Epoch 6/500
23/23 - 0s - 1ms/step - accuracy: 0.0435 - loss: 3.2342
Epoch 7/500
23/23 - 0s - 1ms/step - accuracy: 0.0435 - loss: 3.2276
Epoch 8/500
23/23 - 0s - 1ms/step - accuracy: 0.0000e+00 - loss: 3.2217
Epoch 9/500
23/23 - 0s - 2ms/step - accuracy: 0.0435 - loss: 3.2146
Epoch 10/500
23/23 - 0s - 1ms/step - accuracy: 0.0435 - loss: 3.2073
Epoch 11/500
23/23 - 0s - 2ms/step - accuracy: 0.0435 - loss: 3.1992
Epoch 12/500
23/23 - 0s - 1ms/step - accuracy: 0.0000e+00 - loss: 3.1914
Epoch 13/500
23/23 - 0s - 1ms/step - accuracy: 0.0435 - loss: 3.1823
Epoch 14/500
23/23 - 0s - 1ms/step - accuracy: 0.0435 - loss: 3.1719
Epoch 15/500
23/23 - 0s - 1ms/

<keras.src.callbacks.history.History at 0x296cb341a60>

### STEP5. 評估模型準確率

In [14]:
# 評估模型的性能
scores = model.evaluate(X, y, verbose=0)
print("Model Accuracy: %.2f%%" % (scores[1] * 100))

Model Accuracy: 86.96%


### STEP6. 預測結果

In [15]:
# 展示一些模型預測
for pattern in dataX:
    x = numpy.reshape(pattern, (1, 1, len(pattern)))
    x = x / float(len(alphabet))
    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    print(seq_in, "->", result)

['A', 'B', 'C'] -> D
['B', 'C', 'D'] -> E
['C', 'D', 'E'] -> F
['D', 'E', 'F'] -> G
['E', 'F', 'G'] -> H
['F', 'G', 'H'] -> I
['G', 'H', 'I'] -> J
['H', 'I', 'J'] -> K
['I', 'J', 'K'] -> L
['J', 'K', 'L'] -> M
['K', 'L', 'M'] -> N
['L', 'M', 'N'] -> O
['M', 'N', 'O'] -> P
['N', 'O', 'P'] -> Q
['O', 'P', 'Q'] -> R
['P', 'Q', 'R'] -> S
['Q', 'R', 'S'] -> T
['R', 'S', 'T'] -> U
['S', 'T', 'U'] -> V
['T', 'U', 'V'] -> X
['U', 'V', 'W'] -> Z
['V', 'W', 'X'] -> Z
['W', 'X', 'Y'] -> Z


## 模型 3. LSTM 學習三個字符的時間步驟窗口(Three-Char Time Step Window)到一個字符的映射

### STEP1. 準備訓練用資料

In [16]:
seq_length = 3
dataX = []
dataY = []
for i in range(0, len(alphabet) - seq_length, 1):
    seq_in = alphabet[i : i + seq_length]
    seq_out = alphabet[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
    print(seq_in, "->", seq_out)

ABC -> D
BCD -> E
CDE -> F
DEF -> G
EFG -> H
FGH -> I
GHI -> J
HIJ -> K
IJK -> L
JKL -> M
KLM -> N
LMN -> O
MNO -> P
NOP -> Q
OPQ -> R
PQR -> S
QRS -> T
RST -> U
STU -> V
TUV -> W
UVW -> X
VWX -> Y
WXY -> Z


### STEP2. 資料預處理


> ABCDEFGHIJKLMNOPQRSTUVWXYZ

> 例如: 

> 給 HIJ -> 預測 K

> 給 EFG -> 預測 H

目標訓練張量結構: (samples, time_steps, features) -> (n , **3**, **1** )

準備訓練資料集的時候要把資料的張量結構轉換成, 1筆訓練資料有"3"個時間步, 裡頭存放著"1"個字符的資料"features"向量。

In [17]:
# 重塑 X 資料的維度成為 (samples, time_steps, features)
X = numpy.reshape(dataX, (len(dataX), seq_length, 1))  # <-- 特別注意這裡

# 歸一化
X = X / float(len(alphabet))

# 使用one hot encode 對Y值進行編碼
y = np_utils.to_categorical(dataY)

### STEP3. 建立模型

In [18]:
# 創建模型
model = Sequential()
model.add(LSTM(32, input_shape=(X.shape[1], X.shape[2])))  # <-- 特別注意這裡
model.add(Dense(y.shape[1], activation="softmax"))

model.summary()

### STEP4. 定義訓練並進行訓練

In [19]:
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.fit(X, y, epochs=500, batch_size=1, verbose=2)

Epoch 1/500
23/23 - 1s - 37ms/step - accuracy: 0.0000e+00 - loss: 3.2682
Epoch 2/500
23/23 - 0s - 1ms/step - accuracy: 0.0435 - loss: 3.2532
Epoch 3/500
23/23 - 0s - 1ms/step - accuracy: 0.0435 - loss: 3.2448
Epoch 4/500
23/23 - 0s - 2ms/step - accuracy: 0.0435 - loss: 3.2372
Epoch 5/500
23/23 - 0s - 2ms/step - accuracy: 0.0435 - loss: 3.2286
Epoch 6/500
23/23 - 0s - 2ms/step - accuracy: 0.0435 - loss: 3.2211
Epoch 7/500
23/23 - 0s - 2ms/step - accuracy: 0.0435 - loss: 3.2119
Epoch 8/500
23/23 - 0s - 2ms/step - accuracy: 0.0435 - loss: 3.2011
Epoch 9/500
23/23 - 0s - 1ms/step - accuracy: 0.0435 - loss: 3.1892
Epoch 10/500
23/23 - 0s - 2ms/step - accuracy: 0.0435 - loss: 3.1765
Epoch 11/500
23/23 - 0s - 2ms/step - accuracy: 0.0435 - loss: 3.1620
Epoch 12/500
23/23 - 0s - 1ms/step - accuracy: 0.0435 - loss: 3.1454
Epoch 13/500
23/23 - 0s - 2ms/step - accuracy: 0.0435 - loss: 3.1285
Epoch 14/500
23/23 - 0s - 1ms/step - accuracy: 0.0435 - loss: 3.1116
Epoch 15/500
23/23 - 0s - 2ms/step - a

<keras.src.callbacks.history.History at 0x296d036f770>

### STEP5. 評估模型準確率

In [20]:
# 評估模型的性能
scores = model.evaluate(X, y, verbose=0)
print("Model Accuracy: %.2f%%" % (scores[1] * 100))

Model Accuracy: 100.00%


### STEP6. 預測結果

In [21]:
# 讓我們擷取3個字符轉成張量結構 shape:(1,3,1)來進行infer
for pattern in dataX:
    x = numpy.reshape(pattern, (1, len(pattern), 1))
    x = x / float(len(alphabet))
    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    print(seq_in, "->", result)

['A', 'B', 'C'] -> D
['B', 'C', 'D'] -> E
['C', 'D', 'E'] -> F
['D', 'E', 'F'] -> G
['E', 'F', 'G'] -> H
['F', 'G', 'H'] -> I
['G', 'H', 'I'] -> J
['H', 'I', 'J'] -> K
['I', 'J', 'K'] -> L
['J', 'K', 'L'] -> M
['K', 'L', 'M'] -> N
['L', 'M', 'N'] -> O
['M', 'N', 'O'] -> P
['N', 'O', 'P'] -> Q
['O', 'P', 'Q'] -> R
['P', 'Q', 'R'] -> S
['Q', 'R', 'S'] -> T
['R', 'S', 'T'] -> U
['S', 'T', 'U'] -> V
['T', 'U', 'V'] -> W
['U', 'V', 'W'] -> X
['V', 'W', 'X'] -> Y
['W', 'X', 'Y'] -> Z


## 模型 4. LSTM學習可變長度字符輸入到單字符輸出

讓我們建立一個模型，來接受"變動字母序列(variable-length)"的輸入來預測下一個字母。

### STEP1. 準備訓練用資料

為了簡化，我們將定義一個最大輸入序列長度(比如說"5", 代表輸入的序列可以是 1 ~ 5)，以加速訓練。

In [22]:
# 準備訓練資料
num_inputs = 1000
max_len = 5  # 最大序列長度
dataX = []
dataY = []
for i in range(num_inputs):
    start = numpy.random.randint(len(alphabet) - 2)
    end = numpy.random.randint(start, min(start + max_len, len(alphabet) - 1))
    sequence_in = alphabet[start : end + 1]
    sequence_out = alphabet[end + 1]
    dataX.append([char_to_int[char] for char in sequence_in])
    dataY.append(char_to_int[sequence_out])
    print(sequence_in, "->", sequence_out)

PQRST -> U
W -> X
O -> P
OPQ -> R
IJKLM -> N
QRSTU -> V
ABCD -> E
X -> Y
GHIJ -> K
M -> N
XY -> Z
QRST -> U
ABC -> D
JKLMN -> O
OP -> Q
XY -> Z
D -> E
T -> U
B -> C
QRSTU -> V
HIJ -> K
JKLM -> N
ABCDE -> F
X -> Y
V -> W
DE -> F
DEFG -> H
BCDE -> F
EFGH -> I
BCDE -> F
FG -> H
RST -> U
TUV -> W
STUV -> W
LMN -> O
P -> Q
MNOP -> Q
JK -> L
MNOP -> Q
OPQRS -> T
UVWXY -> Z
PQRS -> T
D -> E
EFGH -> I
IJK -> L
WX -> Y
STUV -> W
MNOPQ -> R
P -> Q
WXY -> Z
VWX -> Y
V -> W
HI -> J
KLMNO -> P
UV -> W
JKL -> M
ABCDE -> F
WXY -> Z
M -> N
CDEF -> G
KLMNO -> P
RST -> U
RS -> T
W -> X
J -> K
WX -> Y
JKLMN -> O
MN -> O
L -> M
BCDE -> F
TU -> V
MNOPQ -> R
NOPQR -> S
HIJ -> K
JKLM -> N
STUVW -> X
QRST -> U
N -> O
VWXY -> Z
B -> C
UVWX -> Y
OP -> Q
K -> L
C -> D
X -> Y
ST -> U
JKLM -> N
B -> C
QR -> S
RS -> T
VWXY -> Z
S -> T
NOP -> Q
KLMNO -> P
IJ -> K
EF -> G
MNOP -> Q
WXY -> Z
HI -> J
P -> Q
STUVW -> X
Q -> R
MN -> O
O -> P
C -> D
L -> M
JKLM -> N
K -> L
IJKLM -> N
FGHIJ -> K
LM -> N
OPQ -> R
U -> V
HIJ

### STEP2. 資料預處理
因為輸入序列的長度會在1到max_len之間變動，因此需要以"0"來填充(padding)。在這裡，我們使用Keras內附的pad_sequences（）函數並設定使用左側（前綴）填充。

In [23]:
# 將訓練資料轉換為陣列和並進行序列填充（如果需要）
X = pad_sequences(dataX, maxlen=max_len, dtype="float32")  # <-- 注意這裡

# 重塑 X 資料的維度成為 (samples, time_steps, features)
X = numpy.reshape(X, (X.shape[0], max_len, 1))  # <-- 特別注意這裡

# 歸一化
X = X / float(len(alphabet))

# 使用one hot encode 對Y值進行編碼
y = np_utils.to_categorical(dataY)

### STEP3. 建立模型

In [24]:
# 創建模型
batch_size = 1
model = Sequential()
model.add(LSTM(32, input_shape=(X.shape[1], 1)))  # <-- 注意這裡
model.add(Dense(y.shape[1], activation="softmax"))

model.summary()

### STEP4. 定義訓練並進行訓練

In [25]:
model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model.fit(X, y, epochs=500, batch_size=batch_size, verbose=2)

Epoch 1/500
1000/1000 - 2s - 2ms/step - accuracy: 0.0700 - loss: 3.0963
Epoch 2/500
1000/1000 - 1s - 826us/step - accuracy: 0.1070 - loss: 2.8607
Epoch 3/500
1000/1000 - 1s - 774us/step - accuracy: 0.1700 - loss: 2.5559
Epoch 4/500
1000/1000 - 1s - 779us/step - accuracy: 0.2320 - loss: 2.2867
Epoch 5/500
1000/1000 - 1s - 798us/step - accuracy: 0.2800 - loss: 2.1165
Epoch 6/500
1000/1000 - 1s - 811us/step - accuracy: 0.3150 - loss: 1.9774
Epoch 7/500
1000/1000 - 1s - 781us/step - accuracy: 0.3610 - loss: 1.8770
Epoch 8/500
1000/1000 - 1s - 812us/step - accuracy: 0.3630 - loss: 1.7738
Epoch 9/500
1000/1000 - 1s - 806us/step - accuracy: 0.3900 - loss: 1.6888
Epoch 10/500
1000/1000 - 1s - 829us/step - accuracy: 0.4240 - loss: 1.6155
Epoch 11/500
1000/1000 - 1s - 805us/step - accuracy: 0.4570 - loss: 1.5427
Epoch 12/500
1000/1000 - 1s - 778us/step - accuracy: 0.4790 - loss: 1.4768
Epoch 13/500
1000/1000 - 1s - 798us/step - accuracy: 0.5100 - loss: 1.4184
Epoch 14/500
1000/1000 - 1s - 819us/

<keras.src.callbacks.history.History at 0x296d281e270>

### STEP5. 評估模型準確率

In [26]:
# 評估模型的性能
scores = model.evaluate(X, y, verbose=0)
print("Model Accuracy: %.2f%%" % (scores[1] * 100))

Model Accuracy: 98.60%


### STEP6. 預測結果

In [27]:
# 讓我們擷取1~5個字符轉成張量結構 shape:(1,5,1)來進行infer
for i in range(20):
    pattern_index = numpy.random.randint(len(dataX))
    pattern = dataX[pattern_index]
    x = pad_sequences([pattern], maxlen=max_len, dtype="float32")
    x = numpy.reshape(x, (1, max_len, 1))
    x = x / float(len(alphabet))
    prediction = model.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    print(seq_in, "->", result)

['J'] -> K
['H', 'I', 'J'] -> K
['E', 'F'] -> G
['K', 'L', 'M'] -> N
['B'] -> C
['C'] -> D
['R', 'S'] -> T
['A', 'B', 'C'] -> D
['C', 'D', 'E'] -> F
['N', 'O', 'P'] -> Q
['C', 'D'] -> E
['L', 'M'] -> N
['F', 'G', 'H', 'I', 'J'] -> K
['N', 'O', 'P', 'Q'] -> R
['C', 'D', 'E', 'F', 'G'] -> H
['A', 'B', 'C'] -> D
['R', 'S', 'T', 'U', 'V'] -> W
['B', 'C', 'D'] -> E
['F', 'G'] -> H
['K'] -> L


In [None]:
# a = numpy.array([[['L', 'M']]])#要改成數字
# print(model.predict(a, verbose=0))

我們可以看到，雖然這個網絡模型沒有從生成的序列資料中完全學習到英文字母表的順序，但它表現相當的好。如果需要, 我們可以對這個模型進行進一歩的優化與調整，比如更多的訓練循環(more epochs)或更大的網絡(larger network)，或兩者。

### 參考:
* Jason Brownlee - "[Understanding Stateful LSTM Recurrent Neural Networks in Python with Keras](https://machinelearningmastery.com/understanding-stateful-lstm-recurrent-neural-networks-python-keras/)"

* Keras官網 - [Recurrent Layer](https://keras.io/layers/recurrent/)