In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.autograd import Variable
import numpy as np

In [2]:
# Chuẩn bị dữ liệu
from tensorflow.keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# Chuyển đổi sang định dạng float32.
x_train, x_test = np.array(x_train, np.float32), np.array(x_test, np.float32)
x_train, x_test = x_train.reshape([-1, 28, 28]), x_test.reshape([-1, 28, 28])
# Chuẩn hóa ảnh từ from [0, 255] to [0, 1].
x_train, x_test = x_train / 255., x_test / 255.
x_train, x_test, y_train, y_test = torch.from_numpy(x_train), torch.from_numpy(x_test), torch.from_numpy(y_train).type(torch.LongTensor), torch.from_numpy(y_test).type(torch.LongTensor)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [13]:
trainloader = []
batch_size = 16
for (i,j) in zip(x_train, y_train):
    trainloader.append([i,j])
trainloader = torch.utils.data.DataLoader(trainloader, shuffle=True, batch_size=batch_size)

testloader = []
for (i,j) in zip(x_test, y_test):
    testloader.append([i,j])
testloader = torch.utils.data.DataLoader(testloader, shuffle=True, batch_size=batch_size)

In [14]:
# Khởi tạo mô hình BiRNN
class BiRNNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(BiRNNModel, self).__init__()

        # Define the dimensions
        self.hidden_dim = hidden_dim  # Number of hidden units in each RNN layer
        self.layer_dim = layer_dim    # Number of layers (stacked RNNs)
        self.bidirectional = True     # Whether to use bidirectional RNN
        self.num_directions = 2 if self.bidirectional else 1

        # Define the bidirectional RNN layer
        self.rnn = nn.RNN(input_dim, hidden_dim, layer_dim, batch_first=True,
                          nonlinearity='relu', bidirectional=self.bidirectional)

        # Fully connected layer
        self.fc = nn.Linear(hidden_dim * self.num_directions, output_dim)  # output_dim size for final output

    def forward(self, x):

        # Initialize hidden state (for both directions if bidirectional)
        h0 = Variable(torch.zeros(self.layer_dim * self.num_directions, x.size(0), self.hidden_dim)).to(x.device)

        # RNN forward pass
        out, hn = self.rnn(x, h0)

        # Pass through the fully connected layer (take output of the last time step)
        out = self.fc(out[:, -1, :])  # Only take the output of the last time step

        return out

In [15]:
# Create RNN
input_dim = 28    # chiều của input
hidden_dim = 100  # chiều của hidden state
layer_dim = 1     # số tầng ẩn
output_dim = 10   # chiều của vector output

model = BiRNNModel(input_dim, hidden_dim, layer_dim, output_dim)

# Cross Entropy Loss
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [17]:
for epoch in range(2):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 1000 == 999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0

print('Finished Training')

[1,  1000] loss: 1.150
[1,  2000] loss: 1.146
[1,  3000] loss: 1.032
[2,  1000] loss: 0.524
[2,  2000] loss: 0.351
[2,  3000] loss: 0.293
Finished Training


In [18]:
correct = 0
total = 0
# quá trình kiểm thử ko cần thiết phải tính gradients cho output
with torch.no_grad():
    for data in testloader:
        images, labels = data
        # calculate outputs by running images through the network
        outputs = model(images)
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 85 %
