# Build BPNN

In [1]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [2]:
# Get dataset
train_data = datasets.MNIST(root='./',
                            train=True,
                            transform=transforms.ToTensor(),
                            download=True)
test_data = datasets.MNIST(root='./',
                            train=False,
                            transform=transforms.ToTensor(),
                            download=True)

In [3]:
BATCH_SIZE: int = 64
EPOCHS: int = 10

# Load data
train_loader = DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)    # shuffle=True => fetch out of order
test_loader = DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False)

In [4]:
# Build our BP Net
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(784, 100)
        self.fc2 = nn.Linear(100, 10)
        self.activate = nn.ReLU()
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = x.view(x.size(0), -1)  # Flatten the images into a vector
        x = self.fc1(x)
        x = self.activate(x)
        x = self.fc2(x)
        return x

In [5]:
# Set model, criterion, optimizer
import torch.optim as optim
model = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [6]:
# Train func
def train():
    model.train()   # Set model to training mode
    for data in train_loader:
        inputs, labels = data
        outputs: torch.Tensor = model(inputs)
        loss: torch.Tensor = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [7]:
# Test func
def test() -> torch.Tensor:
    model.eval()   # Set model to evaluate mode
    correct: int = 0
    total: int = 0
    with torch.no_grad():   # Not need to track the gradient
        for data in test_loader:
            inputs, labels = data
            outputs: torch.Tensor = model(inputs)
            _, predicted = torch.max(outputs.data, dim=1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f'Accuracy = {100 * correct // total} %')

In [8]:
# Train and test
for epoch in range(EPOCHS):
    print('Epoch {}/{}'.format(epoch + 1, EPOCHS))
    train()
    test()

Epoch 1/10
Accuracy = 85 %
Epoch 2/10
Accuracy = 89 %
Epoch 3/10
Accuracy = 90 %
Epoch 4/10
Accuracy = 90 %
Epoch 5/10
Accuracy = 91 %
Epoch 6/10
Accuracy = 91 %
Epoch 7/10
Accuracy = 92 %
Epoch 8/10
Accuracy = 92 %
Epoch 9/10
Accuracy = 92 %
Epoch 10/10
Accuracy = 93 %


## Optional: Use GPU

If our GPUs are available and Pytorch we downloaded supports GPUs, we can put our code to GPU (usually CUDA) to speed up.

In [9]:
# Set device
DEVICE: str = 'cuda' if torch.cuda.is_available() else 'cpu'

In [10]:
# Then, we put our model and data to device
model.to(DEVICE)

def train_with_device():
    model.train()   # Set model to training mode
    for data in train_loader:
        inputs, labels = data
        inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
        outputs: torch.Tensor = model(inputs)
        loss: torch.Tensor = criterion(outputs, labels).to(DEVICE)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

def test_with_device():
    correct: int = 0
    total: int = 0
    with torch.no_grad():   # Not need to track the gradient
        for data in test_loader:
            inputs, labels = data
            inputs, labels = inputs.to(DEVICE), labels.to(DEVICE)
            outputs: torch.Tensor = model(inputs)
            _, predicted = torch.max(outputs.data, dim=1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f'Accuracy = {100 * correct // total} %')

## Save and load our model

In [None]:
# Save
torch.save(model.state_dict(), 'MNIST_bp_model.pth')

In [None]:
# Load
PATH: str = 'MNIST_bp_model.pth'
model.load_state_dict(torch.load(PATH))