# Set Up the Environment

> pip install torch torchvision

# Import the Necessary Libraries, and Load and Transform the MNIST Dataset


In [6]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
import time

transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

trainset = MNIST(root='./data', train=True, download=True, transform=transform)
testset = MNIST(root='./data', train=False, download=True, transform=transform)

trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Define the Neural Network Architecture

In [7]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 10)

    def forward(self, x):
        x = x.view(-1, 28 * 28)  # Flatten the image
        x = torch.sigmoid(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        x = self.fc3(x)  # No activation needed for the output layer with CrossEntropyLoss
        return x

model = Net()

# Define a Loss Function and Optimizer

In [8]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

# Train the Network

In [9]:
start_time = time.time()
for epoch in range(10):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 100 == 99:    # print every 100 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 100:.3f}')
            running_loss = 0.0

end_time = time.time()

print('Finished Training')
total_time = end_time - start_time

[1,   100] loss: 2.316
[1,   200] loss: 2.264
[1,   300] loss: 2.124
[1,   400] loss: 1.697
[1,   500] loss: 1.204
[1,   600] loss: 0.907
[1,   700] loss: 0.753
[1,   800] loss: 0.640
[1,   900] loss: 0.580
[2,   100] loss: 0.489
[2,   200] loss: 0.471
[2,   300] loss: 0.459
[2,   400] loss: 0.431
[2,   500] loss: 0.417
[2,   600] loss: 0.393
[2,   700] loss: 0.387
[2,   800] loss: 0.379
[2,   900] loss: 0.366
[3,   100] loss: 0.346
[3,   200] loss: 0.363
[3,   300] loss: 0.342
[3,   400] loss: 0.339
[3,   500] loss: 0.338
[3,   600] loss: 0.306
[3,   700] loss: 0.341
[3,   800] loss: 0.329
[3,   900] loss: 0.323
[4,   100] loss: 0.319
[4,   200] loss: 0.303
[4,   300] loss: 0.316
[4,   400] loss: 0.293
[4,   500] loss: 0.310
[4,   600] loss: 0.302
[4,   700] loss: 0.276
[4,   800] loss: 0.272
[4,   900] loss: 0.296
[5,   100] loss: 0.285
[5,   200] loss: 0.266
[5,   300] loss: 0.275
[5,   400] loss: 0.272
[5,   500] loss: 0.272
[5,   600] loss: 0.268
[5,   700] loss: 0.245
[5,   800] 

# Test the Network on the Test Data

In [10]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
    
print("# Statistics #")
print(f'Total time taken for raining with Sigmoid activation function: {total_time:.2f} seconds')
print('Accuracy of the network on the 10,000 test images: %d %%' % (
    100 * correct / total))

# Statistics #
Total time taken for raining with Sigmoid activation function: 113.86 seconds
Accuracy of the network on the 10,000 test images: 95 %
