In [18]:
import random
import tensorflow as tf

In [19]:
ori_data = []
for i in range(10005):
    ori_data.append(random.randint(0, 9))
print(max(ori_data), min(ori_data), len(ori_data))

9 0 10005


In [20]:
ori_data = tf.reshape(tf.convert_to_tensor(ori_data), shape=[-1, 1])
print(ori_data.shape)

(10005, 1)


In [46]:
feature = tf.concat([ori_data[0:-5], ori_data[1:-4], ori_data[2:-3], ori_data[3:-2]], axis=-1)
feature = tf.reshape(feature, shape=[-1, 4])
label = ori_data[4:]
label = tf.reshape(label, shape=[-1,])
print(feature.shape, label.shape)

(10000, 4) (10000,)


In [47]:
feature = tf.one_hot(feature, 10, dtype=tf.float32)
label = tf.one_hot(label, 10, dtype=tf.float32)

In [48]:
x_train, x_test = feature[:7000], feature[7000:]
y_train, y_test = label[:7000], label[7000:]
print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)

(7000, 4, 10) (7000, 10) (3000, 4, 10) (3000, 10)


In [24]:
def data_iter(batch_size, features, labels):
    num_examples = len(features)
    indices = list(range(num_examples))
    for i in range(0, num_examples, batch_size):
        j = tf.constant(indices[i: min(i + batch_size, num_examples)])
        yield tf.gather(features, j), tf.gather(labels, j)

In [25]:
batch_size = 200
for x, y in data_iter(batch_size, x_train, y_train):
    print(x.shape, '\n', y.shape)
    break

(200, 4, 10) 
 (200, 10)


In [26]:
def normal(shape):
    return tf.random.normal(shape=shape, stddev=0.01, mean=0, dtype=tf.float32)

In [27]:
def get_params(vocab_size, num_hiddens):
    num_inputs = num_outputs = vocab_size

    # input part params
    W_xh = tf.Variable(normal((num_inputs, num_hiddens)), dtype=tf.float32)
    W_hh = tf.Variable(normal((num_hiddens, num_hiddens)), dtype=tf.float32)
    b_h = tf.Variable(tf.zeros(num_hiddens), dtype=tf.float32)

    # output part params
    W_hq = tf.Variable(normal((num_hiddens, num_outputs)), dtype=tf.float32)
    b_q = tf.Variable(tf.zeros(num_outputs), dtype=tf.float32)

    params = [W_xh, W_hh, b_h, W_hq, b_q]
    return params

In [28]:
def init_rnn_state(batch_size, num_hiddens):
    return (tf.zeros((batch_size, num_hiddens)), )

In [29]:
def rnn(inputs, state, params):
    W_xh, W_hh, b_h, W_hq, b_q = params
    H = state
    outputs = []
    for X in inputs:
        X = tf.reshape(X, (-1, W_xh.shape[0]))
        H = tf.tanh(tf.matmul(X, W_xh) + tf.matmul(H, W_hh) + b_h)
        Y = tf.matmul(H, W_hq) + b_q
        outputs.append(Y)
    return outputs[-1], (H, )

In [30]:
def accuracy(y, y_hat):
    y_hat = tf.argmax(y_hat, axis=1)
    y = tf.argmax(y, axis=1)
    count = y_hat == y
    return float(tf.reduce_sum(tf.cast(count, dtype=y.dtype)) / y.shape[0])

In [31]:
class RNNModel:
    def __init__(self, vocab_size, num_hiddens, init_state, forward_fn, get_params):
        self.vocab_size, self.num_hiddens = vocab_size, num_hiddens
        self.init_state, self.forward_fn = init_state, forward_fn
        self.trainable_variables = get_params(vocab_size, num_hiddens)

    def __call__(self, X, state):
        X = tf.cast(X, tf.float32)
        return self.forward_fn(X, state, self.trainable_variables)

    def begin_state(self, batch_size):
        return self.init_state(batch_size, self.num_hiddens)

In [32]:
RNN_net = RNNModel(vocab_size=10, num_hiddens=32, init_state=init_rnn_state,
                   forward_fn=rnn, get_params=get_params)

In [33]:
loss = tf.keras.losses.BinaryCrossentropy()
updater = tf.keras.optimizers.Adam()

In [38]:
def train_epoch(net, loss, updater, batch_size, x_train, y_train):
    L = []
    ACC = []
    for x, y in data_iter(batch_size, x_train, y_train):
        state = net.begin_state(batch_size)
        with tf.GradientTape(True) as g:
            y_hat, state = net(tf.transpose(x, [1, 0, 2]), state)
            y_hat_fixed = tf.squeeze(y_hat, axis=0)
            l = loss(y, y_hat_fixed)
        L.append(l)
        ACC.append(accuracy(y, y_hat_fixed))
        params = net.trainable_variables
        grads = g.gradient(l, params)
        updater.apply_gradients(zip(grads, params))
    return float(sum(L) / len(L)), float(sum(ACC) / len(ACC))

In [39]:
epoch = 100
for _ in range(epoch):
    l, acc = train_epoch(RNN_net, loss, updater, batch_size, x_train, y_train)
    print('epoch', _ + 1, 'loss', l, 'accuracy', acc)

epoch 1 loss 0.5003222227096558 accuracy 0.10085714285714287
epoch 2 loss 0.35151228308677673 accuracy 0.09857142857142857
epoch 3 loss 0.3274555802345276 accuracy 0.09785714285714285
epoch 4 loss 0.325801819562912 accuracy 0.10128571428571428
epoch 5 loss 0.3254574239253998 accuracy 0.09785714285714285
epoch 6 loss 0.325308620929718 accuracy 0.09428571428571429
epoch 7 loss 0.3252190053462982 accuracy 0.09342857142857143
epoch 8 loss 0.3251616358757019 accuracy 0.09414285714285714
epoch 9 loss 0.32512366771698 accuracy 0.093
epoch 10 loss 0.32509809732437134 accuracy 0.09314285714285715
epoch 11 loss 0.3250799775123596 accuracy 0.094
epoch 12 loss 0.3250660300254822 accuracy 0.09471428571428571
epoch 13 loss 0.3250541090965271 accuracy 0.09542857142857142
epoch 14 loss 0.32504287362098694 accuracy 0.09557142857142857
epoch 15 loss 0.32503148913383484 accuracy 0.09685714285714286
epoch 16 loss 0.3250196576118469 accuracy 0.09657142857142857
epoch 17 loss 0.3250068426132202 accuracy 0.0

In [42]:
test_loss = []
test_accuracy = []
for x, y in data_iter(batch_size, x_test, y_test):
    state = RNN_net.begin_state(batch_size)
    with tf.GradientTape(True) as g:
        y_hat, state = RNN_net(tf.transpose(x, [1, 0, 2]), state)
        y_hat_fixed = tf.squeeze(y_hat, axis=0)
        l = loss(y, y_hat_fixed)
    test_loss.append(l)
    test_accuracy.append(accuracy(y, y_hat_fixed))
    params = RNN_net.trainable_variables
    grads = g.gradient(l, params)
    updater.apply_gradients(zip(grads, params))
print('test loss:', float(sum(test_loss) / len(test_loss)),
'accuracy:', float(sum(test_accuracy) / len(test_accuracy)))

test loss: 0.3281199634075165 accuracy: 0.095


In [43]:
RNN_tf_model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(4, 10)),
    tf.keras.layers.SimpleRNN(units=32),
    tf.keras.layers.Dense(10, activation='softmax')
])

In [44]:
RNN_tf_model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=[tf.keras.metrics.CategoricalAccuracy()]
)

In [50]:
RNN_tf_model.fit(
    x=x_train, y=y_train,
    batch_size=batch_size, epochs=100,
    validation_data=(x_test, y_test)
)

Epoch 1/100
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - categorical_accuracy: 0.1153 - loss: 2.3273 - val_categorical_accuracy: 0.0983 - val_loss: 2.3331
Epoch 2/100
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - categorical_accuracy: 0.1123 - loss: 2.3175 - val_categorical_accuracy: 0.0960 - val_loss: 2.3273
Epoch 3/100
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - categorical_accuracy: 0.1204 - loss: 2.3087 - val_categorical_accuracy: 0.0947 - val_loss: 2.3249
Epoch 4/100
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - categorical_accuracy: 0.1204 - loss: 2.2999 - val_categorical_accuracy: 0.0920 - val_loss: 2.3237
Epoch 5/100
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step - categorical_accuracy: 0.1239 - loss: 2.2988 - val_categorical_accuracy: 0.0917 - val_loss: 2.3235
Epoch 6/100
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[

<keras.src.callbacks.history.History at 0x784015f052e0>

In [51]:
LSTM_tf_model = tf.keras.Sequential()
LSTM_tf_model.add(tf.keras.layers.Input(shape=(4, 10)))
LSTM_tf_model.add(tf.keras.layers.LSTM(units=32))
LSTM_tf_model.add(tf.keras.layers.Dense(units=10, activation='softmax'))

In [52]:
LSTM_tf_model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=[tf.keras.metrics.CategoricalAccuracy()]
)

In [53]:
LSTM_tf_model.fit(
    x=x_train, y=y_train,
    batch_size=batch_size, epochs=100,
    validation_data=(x_test, y_test)
)

Epoch 1/100
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 20ms/step - categorical_accuracy: 0.1065 - loss: 2.3043 - val_categorical_accuracy: 0.0993 - val_loss: 2.3056
Epoch 2/100
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - categorical_accuracy: 0.1123 - loss: 2.3010 - val_categorical_accuracy: 0.1000 - val_loss: 2.3069
Epoch 3/100
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - categorical_accuracy: 0.1150 - loss: 2.2999 - val_categorical_accuracy: 0.0930 - val_loss: 2.3073
Epoch 4/100
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - categorical_accuracy: 0.1131 - loss: 2.2995 - val_categorical_accuracy: 0.0953 - val_loss: 2.3079
Epoch 5/100
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - categorical_accuracy: 0.1111 - loss: 2.2988 - val_categorical_accuracy: 0.0950 - val_loss: 2.3086
Epoch 6/100
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0

<keras.src.callbacks.history.History at 0x7840216e0e60>