# Set Up the Environment

> pip install torch torchvision

# Import the Necessary Libraries, and Load and Transform the MNIST Dataset


In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
import time


transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

trainset = MNIST(root='./data', train=True, download=True, transform=transform)
testset = MNIST(root='./data', train=False, download=True, transform=transform)

trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define the Neural Network Architecture

In [2]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 10)

    def forward(self, x):
        x = x.view(-1, 28 * 28)
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model = Net()

# Define a Loss Function and Optimizer

In [3]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

# Train the Network

In [4]:
start_time = time.time()
for epoch in range(10):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 100 == 99:    # print every 100 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 100:.3f}')
            running_loss = 0.0
end_time = time.time()

print('Finished Training')
total_time = end_time - start_time

[1,   100] loss: 1.218
[1,   200] loss: 0.443
[1,   300] loss: 0.386
[1,   400] loss: 0.308
[1,   500] loss: 0.286
[1,   600] loss: 0.263
[1,   700] loss: 0.243
[1,   800] loss: 0.238
[1,   900] loss: 0.207
[2,   100] loss: 0.185
[2,   200] loss: 0.164
[2,   300] loss: 0.159
[2,   400] loss: 0.159
[2,   500] loss: 0.167
[2,   600] loss: 0.142
[2,   700] loss: 0.139
[2,   800] loss: 0.152
[2,   900] loss: 0.133
[3,   100] loss: 0.115
[3,   200] loss: 0.109
[3,   300] loss: 0.114
[3,   400] loss: 0.111
[3,   500] loss: 0.104
[3,   600] loss: 0.105
[3,   700] loss: 0.098
[3,   800] loss: 0.100
[3,   900] loss: 0.102
[4,   100] loss: 0.097
[4,   200] loss: 0.080
[4,   300] loss: 0.079
[4,   400] loss: 0.086
[4,   500] loss: 0.085
[4,   600] loss: 0.088
[4,   700] loss: 0.088
[4,   800] loss: 0.087
[4,   900] loss: 0.080
[5,   100] loss: 0.062
[5,   200] loss: 0.077
[5,   300] loss: 0.065
[5,   400] loss: 0.064
[5,   500] loss: 0.066
[5,   600] loss: 0.075
[5,   700] loss: 0.064
[5,   800] 

# Test the Network on the Test Data

In [5]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print("# Statistics #")
print(f'Total time taken for raining with ReLu activation function: {total_time:.2f} seconds')
print('Accuracy of the network on the 10,000 test images: %d %%' % (
    100 * correct / total))

# Statistics #
Total time taken for raining with ReLu activation function: 131.77 seconds
Accuracy of the network on the 10,000 test images: 97 %
