In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

# Define the neural network architecture
class FashionMNISTNet(nn.Module):
    def __init__(self, activation_function):
        super(FashionMNISTNet, self).__init__()
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 10)
        self.activation_function = activation_function

    def forward(self, x):
        x = x.view(-1, 784)
        x = self.activation_function(self.fc1(x))
        x = self.activation_function(self.fc2(x))
        x = self.activation_function(self.fc3(x))
        x = self.fc4(x)
        return x

# Define the activation function to be used
activation_function = torch.relu 
#activation_function = torch.selu
#activation_function = torch.sigmoid

# Load the Fashion MNIST dataset
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5,), (0.5,))])

trainset = torchvision.datasets.FashionMNIST(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.FashionMNIST(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64,
                                         shuffle=False, num_workers=2)

# Initialize the neural network model
net = FashionMNISTNet(activation_function)

# Define the loss function and the optimizer
criterion = nn.CrossEntropyLoss()
# lr = learning rate
#optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9)
optimizer = optim.Adam(net.parameters(), lr=0.01)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data

        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        #Performs a single optimization step (parameter update).
        optimizer.step()

        running_loss += loss.item()

    # Print the average loss for this epoch
    print('Epoch [%d/%d], Loss: %.4f' % (epoch+1, num_epochs, running_loss / (i+1)))

# Evaluation
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        inputs, labels = data
        outputs = net(inputs)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

# Print the accuracy of the model
print('Accuracy of the network on the 10000 test images: %.2f %%' % (
    100 * correct / total))


Epoch [1/10], Loss: 0.5731
Epoch [2/10], Loss: 0.4622
Epoch [3/10], Loss: 0.4346
Epoch [4/10], Loss: 0.4102
Epoch [5/10], Loss: 0.4062
Epoch [6/10], Loss: 0.3974
Epoch [7/10], Loss: 0.4039
Epoch [8/10], Loss: 0.3859
Epoch [9/10], Loss: 0.3818
Epoch [10/10], Loss: 0.3692
Accuracy of the network on the 10000 test images: 85.32 %
