In [None]:
import torch
from torch import nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Define the network architecture
class Net(nn.Module):
    def __init__(self, activation):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(28*28, 50)
        self.fc2 = nn.Linear(50, 10)
        if activation == 'relu':
            self.activation = nn.ReLU()
        elif activation == 'sigmoid':
            self.activation = nn.Sigmoid()

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = self.activation(self.fc1(x))
        x = self.fc2(x)
        return x

# Load the MNIST dataset
transform = transforms.ToTensor()
train_data = datasets.MNIST(root='~/.pytorch/MNIST_data/', train=True, transform=transform, download=True)
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)

# Define the loss

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to /root/.pytorch/MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 78767536.48it/s]


Extracting /root/.pytorch/MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz to /root/.pytorch/MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to /root/.pytorch/MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 102138021.77it/s]


Extracting /root/.pytorch/MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz to /root/.pytorch/MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to /root/.pytorch/MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 26707439.26it/s]


Extracting /root/.pytorch/MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz to /root/.pytorch/MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to /root/.pytorch/MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 22706232.14it/s]


Extracting /root/.pytorch/MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz to /root/.pytorch/MNIST_data/MNIST/raw



In [None]:
# Define the loss
criterion = nn.CrossEntropyLoss()

# Create the networks
net_relu = Net('relu')
net_sigmoid = Net('sigmoid')

'''
# Define the optimizer
optimizer_relu = torch.optim.SGD(net_relu.parameters(), lr=0.01)
optimizer_sigmoid = torch.optim.SGD(net_sigmoid.parameters(), lr=0.01)
'''

#with ADAM optimizer:
optimizer_relu = torch.optim.Adam(net_relu.parameters(), lr=0.01)
optimizer_sigmoid = torch.optim.Adam(net_sigmoid.parameters(), lr=0.01)


# Train the networks
for epoch in range(10):  # loop over the dataset multiple times
    for nets, optimizers in [(net_relu, optimizer_relu), (net_sigmoid, optimizer_sigmoid)]:
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            # zero the parameter gradients
            optimizers.zero_grad()

            # forward + backward + optimize
            outputs = nets(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizers.step()

            # print statistics
            running_loss += loss.item()
        print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}')

print('Finished Training')



Epoch 1, Loss: 0.2476522531082381
Epoch 1, Loss: 0.2893677392303308
Epoch 2, Loss: 0.13922109115950224
Epoch 2, Loss: 0.14228193213078957
Epoch 3, Loss: 0.11939063329728189
Epoch 3, Loss: 0.11300492928232322
Epoch 4, Loss: 0.11090206976189142
Epoch 4, Loss: 0.09522830536747888
Epoch 5, Loss: 0.1041019195156457
Epoch 5, Loss: 0.08584771079839325
Epoch 6, Loss: 0.09544148916755416
Epoch 6, Loss: 0.07869305453900653
Epoch 7, Loss: 0.08928745738387024
Epoch 7, Loss: 0.0713954939514891
Epoch 8, Loss: 0.08781476881093982
Epoch 8, Loss: 0.06543938169625205
Epoch 9, Loss: 0.08616920096041666
Epoch 9, Loss: 0.061322095668139946
Epoch 10, Loss: 0.08112983987758197
Epoch 10, Loss: 0.06068831523642071
Finished Training


In [None]:
# Now let's evaluate the performance of the two models
test_data = datasets.MNIST(root='~/.pytorch/MNIST_data/', train=False, transform=transform, download=True)
test_loader = DataLoader(test_data, batch_size=64, shuffle=True)

correct_relu = 0
total_relu = 0
correct_sigmoid = 0
total_sigmoid = 0

with torch.no_grad():
    for data in test_loader:
        images, labels = data

        # Evaluate ReLU network
        outputs = net_relu(images)
        _, predicted = torch.max(outputs.data, 1)
        total_relu += labels.size(0)
        correct_relu += (predicted == labels).sum().item()

        # Evaluate Sigmoid network
        outputs = net_sigmoid(images)
        _, predicted = torch.max(outputs.data, 1)
        total_sigmoid += labels.size(0)
        correct_sigmoid += (predicted == labels).sum().item()

print('Accuracy of the network with ReLU on the 10000 test images: %d %%' % (100 * correct_relu / total_relu))
print('Accuracy of the network with Sigmoid on the 10000 test images: %d %%' % (100 * correct_sigmoid / total_sigmoid))

Accuracy of the network with ReLU on the 10000 test images: 96 %
Accuracy of the network with ReLU on the 10000 test images: 96 %
