In [16]:
import torch
import torchvision
import torchvision.transforms as transforms

In [17]:
import numpy as np

Download dữ liệu chữ số viết tay MNIST

In [18]:
# Chuẩn bị dữ liệu
from tensorflow.keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# Chuyển đổi sang định dạng float32.
x_train, x_test = np.array(x_train, np.float32), np.array(x_test, np.float32)
# Chuẩn hóa ảnh từ from [0, 255] to [0, 1].
x_train, x_test = x_train / 255., x_test / 255.

x_train = x_train.reshape(-1, 28*28)
x_test = x_test.reshape(-1, 28*28)

x_train, x_test, y_train, y_test = torch.from_numpy(x_train), torch.from_numpy(x_test), torch.from_numpy(y_train).type(torch.LongTensor), torch.from_numpy(y_test).type(torch.LongTensor)

In [19]:
from torch.utils.data import TensorDataset, DataLoader
batch_size = 16

train_dataset = TensorDataset(x_train, y_train)
trainloader = DataLoader(train_dataset, shuffle=True, batch_size=batch_size)

test_dataset = TensorDataset(x_test, y_test)
testloader = DataLoader(test_dataset, shuffle=False, batch_size=batch_size)

In [20]:
num_features = 784
n_hidden_1 = 512
n_hidden_2 = 128
n_hidden_3 = 32
num_classes = 10

epoches = 100

Sử dụng các tham số ở trên để xây dựng mô hình

In [21]:
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self, layer_size):
        super().__init__()
        self.fc1 = nn.Linear(layer_size[0], layer_size[1])
        self.fc2 = nn.Linear(layer_size[1], layer_size[2])
        self.fc3 = nn.Linear(layer_size[2], layer_size[3])
        self.output = nn.Linear(layer_size[3], layer_size[4])

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.output(x)
        return x
net = Net([784, 512, 128, 32, 10])

In [22]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [23]:
rloss = 0
for epoch in range(epoches):

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # load input và labels
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        # print(labels.shape)
        # print(outputs.shape)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            running_loss = 0.0
        rloss = running_loss
print('Finished Training')

[1,  2000] loss: 1.469
[2,  2000] loss: 0.318
[3,  2000] loss: 0.210
[4,  2000] loss: 0.152
[5,  2000] loss: 0.114
[6,  2000] loss: 0.092
[7,  2000] loss: 0.073
[8,  2000] loss: 0.064
[9,  2000] loss: 0.053
[10,  2000] loss: 0.046
[11,  2000] loss: 0.040
[12,  2000] loss: 0.032
[13,  2000] loss: 0.027
[14,  2000] loss: 0.023
[15,  2000] loss: 0.019
[16,  2000] loss: 0.015
[17,  2000] loss: 0.013
[18,  2000] loss: 0.011
[19,  2000] loss: 0.008
[20,  2000] loss: 0.006
[21,  2000] loss: 0.005
[22,  2000] loss: 0.005
[23,  2000] loss: 0.003
[24,  2000] loss: 0.003
[25,  2000] loss: 0.003
[26,  2000] loss: 0.002
[27,  2000] loss: 0.002
[28,  2000] loss: 0.001
[29,  2000] loss: 0.001
[30,  2000] loss: 0.001
[31,  2000] loss: 0.001
[32,  2000] loss: 0.001
[33,  2000] loss: 0.001
[34,  2000] loss: 0.001
[35,  2000] loss: 0.001
[36,  2000] loss: 0.001
[37,  2000] loss: 0.001
[38,  2000] loss: 0.001
[39,  2000] loss: 0.001
[40,  2000] loss: 0.001
[41,  2000] loss: 0.001
[42,  2000] loss: 0.001
[

In [24]:
correct = 0
total = 0
# do đang thực hiện việc dự đoán nên ko cần tính đạo hàm
with torch.no_grad():
    for data in testloader:
        inputs, labels = data
        # chạy hàm dự đoán
        outputs = net(inputs)
        # the class với giá trị xác suất cao nhất là đâu ra dự đoán
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 98 %


## Save and load model<br>
Trình bày 1 trong các các lưu model và load model trong PyTorch

1.   Lưu model<br>
```
torch.save(model.state_dict(), PATH)
```
trong đó PATH là đường dẫn tự định nghĩa
2.   Load model <br>

*   Trước tiên phải định nghĩa model trước. Model được định nghĩa phải giống hệt với model đã được lưu lại. Như ví dụ trong bài này, thì sẽ thực hiện như sau: 
```
model = Net()
```
*   Load trọng số đã được học vào mô hình<br>
```
model.load_state_dict(torch.load(PATH))
# vô hiệu hóa các layer như Dropout hay BatchNorm 
model.eval()
```
3. Có thể tham khảo thêm các phương pháp lưu và load model tại: https://pytorch.org/tutorials/beginner/basics/saveloadrun_tutorial.html








In [25]:
torch.save({
    'epoch': epoch,
    'model_state_dict': net.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'loss': rloss / 2000, 
}, 'checkpoint.pth')

torch.save(net, 'net.pth')

In [27]:
model = Net([784, 512, 128, 32, 10])
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) 

checkpoint = torch.load('checkpoint.pth')

model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch_da_luu = checkpoint['epoch']
loss_da_luu = checkpoint['loss']