In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.transforms as T
from torchvision import datasets
from torch.utils.data import Dataset, DataLoader
from torchsummary import summary
from matplotlib import pyplot as plt

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [3]:
transform = T.Compose([
    T.ToTensor()
])

In [4]:
root = '/content'
train_set = datasets.FashionMNIST(
    root,
    train=True,
    transform=transform,
    download=True
)
test_set = datasets.FashionMNIST(
    root,
    train=False,
    transform=transform,
    download=True
)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to /content/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:01<00:00, 15122051.78it/s]


Extracting /content/FashionMNIST/raw/train-images-idx3-ubyte.gz to /content/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to /content/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 269971.48it/s]


Extracting /content/FashionMNIST/raw/train-labels-idx1-ubyte.gz to /content/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to /content/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:00<00:00, 5037212.25it/s]


Extracting /content/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to /content/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to /content/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 21657248.74it/s]

Extracting /content/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to /content/FashionMNIST/raw






In [5]:
batch_size = 128

train_loader = DataLoader(
    train_set,
    batch_size=batch_size,
    shuffle=True
)
test_loader = DataLoader(
    test_set,
    batch_size=batch_size,
    shuffle=False
)

In [6]:
class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.net = nn.Sequential(
            nn.Conv2d(1, 6, 5, padding=2),
            nn.Sigmoid(),
            nn.AvgPool2d(2, stride=2),
            nn.Conv2d(6, 16, 5),
            nn.Sigmoid(),
            nn.AvgPool2d(2, stride=2),
            nn.Flatten(),
            nn.Linear(400, 120),
            nn.Sigmoid(),
            nn.Linear(120, 84),
            nn.Sigmoid(),
            nn.Linear(84, 10)
        )

    def forward(self, x):
        return self.net(x)

In [7]:
model_for_summary = LeNet().to(device)
summary(model_for_summary, (1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 28, 28]             156
           Sigmoid-2            [-1, 6, 28, 28]               0
         AvgPool2d-3            [-1, 6, 14, 14]               0
            Conv2d-4           [-1, 16, 10, 10]           2,416
           Sigmoid-5           [-1, 16, 10, 10]               0
         AvgPool2d-6             [-1, 16, 5, 5]               0
           Flatten-7                  [-1, 400]               0
            Linear-8                  [-1, 120]          48,120
           Sigmoid-9                  [-1, 120]               0
           Linear-10                   [-1, 84]          10,164
          Sigmoid-11                   [-1, 84]               0
           Linear-12                   [-1, 10]             850
Total params: 61,706
Trainable params: 61,706
Non-trainable params: 0
---------------------------------

In [8]:
input = torch.randn(1, 1, 28, 28).float().to(device)
out = model_for_summary(input)
print(out)

tensor([[-0.2537, -0.2536, -0.0955,  0.1577,  0.3681, -0.1246,  0.2655,  0.0368,
         -0.0885,  0.3593]], device='cuda:0', grad_fn=<AddmmBackward0>)


In [9]:
def train_run_epoch(model, device, train_loader, optimizer, criterion):
    model.train()
    total_loss = 0

    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        output = model(data)
        loss = criterion(output, target)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)

    return avg_loss

In [10]:
def test_run_epoch(model, device, test_loader):
    model.eval()
    total_loss = 0
    correct = 0

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = criterion(output, target)

            pred = output.argmax(1)
            correct += pred.eq(target).sum().item()

            total_loss += loss.item()

        accuracy = correct / len(test_set)
        avg_loss = total_loss / len(test_loader)

    return accuracy, avg_loss

In [11]:
def train(model, device, train_loder, test_loader, optimizer, criterion, num_epochs):
    train_avg_losses = []
    test_avg_losses = []
    accuracies = []

    for epoch in range(1, num_epochs+1):
        print(f"Epoch [{epoch}/{num_epochs}]")

        train_avg_loss = train_run_epoch(model, device, train_loader, optimizer, criterion)
        train_avg_losses.append(train_avg_loss)
        print(f"Average Train Loss = {train_avg_loss:.6f}")

        accuracy, test_avg_loss = test_run_epoch(model, device, test_loader)
        test_avg_losses.append(test_avg_loss)
        accuracies.append(accuracy)
        print(f"Average Test Loss = {test_avg_loss:.6f}")
        print(f"Test Accuracy = {accuracy:.2f}")
        print()

    return train_avg_losses, test_avg_losses, accuracies

In [12]:
num_epochs = 20
learning_rate = 1e-1

model_sgd = LeNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_sgd.parameters(), lr=learning_rate)

In [13]:
sgd_train_losses, sgd_test_losses , sgd_accuracies = train(model_sgd, device, train_loader, test_loader, optimizer, criterion, num_epochs)

Epoch [1/20]
Average Train Loss = 2.307325
Average Test Loss = 2.304787
Test Accuracy = 0.10

Epoch [2/20]
Average Train Loss = 2.306928
Average Test Loss = 2.310335
Test Accuracy = 0.10

Epoch [3/20]
Average Train Loss = 2.306833
Average Test Loss = 2.304823
Test Accuracy = 0.10

Epoch [4/20]
Average Train Loss = 2.306023
Average Test Loss = 2.309367
Test Accuracy = 0.10

Epoch [5/20]
Average Train Loss = 2.305721
Average Test Loss = 2.308794
Test Accuracy = 0.10

Epoch [6/20]
Average Train Loss = 2.305423
Average Test Loss = 2.306512
Test Accuracy = 0.10

Epoch [7/20]
Average Train Loss = 2.305504
Average Test Loss = 2.303954
Test Accuracy = 0.10

Epoch [8/20]
Average Train Loss = 2.304905
Average Test Loss = 2.304245
Test Accuracy = 0.10

Epoch [9/20]
Average Train Loss = 2.304642
Average Test Loss = 2.305611
Test Accuracy = 0.10

Epoch [10/20]
Average Train Loss = 2.304290
Average Test Loss = 2.303201
Test Accuracy = 0.10

Epoch [11/20]
Average Train Loss = 2.303539
Average Test Lo

In [14]:
num_epochs = 20
learning_rate = 1e-2

model_adam = LeNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_adam.parameters(), lr=learning_rate)

In [None]:
adam_train_losses, adam_test_losses, adam_accuracies = train(model_adam, device, train_loader, test_loader, optimizer, criterion, num_epochs)

Epoch [1/20]
Average Train Loss = 1.693529
Average Test Loss = 0.832302
Test Accuracy = 0.71

Epoch [2/20]
Average Train Loss = 0.558892
Average Test Loss = 0.491943
Test Accuracy = 0.82

Epoch [3/20]
Average Train Loss = 0.440933
Average Test Loss = 0.438527
Test Accuracy = 0.84

Epoch [4/20]
Average Train Loss = 0.400141
Average Test Loss = 0.413131
Test Accuracy = 0.84

Epoch [5/20]
Average Train Loss = 0.374499
Average Test Loss = 0.409193
Test Accuracy = 0.85

Epoch [6/20]
Average Train Loss = 0.355198
Average Test Loss = 0.379940
Test Accuracy = 0.86

Epoch [7/20]
Average Train Loss = 0.341979
Average Test Loss = 0.363868
Test Accuracy = 0.86

Epoch [8/20]
Average Train Loss = 0.328638
Average Test Loss = 0.352095
Test Accuracy = 0.86

Epoch [9/20]
Average Train Loss = 0.322272
Average Test Loss = 0.380492
Test Accuracy = 0.86

Epoch [10/20]
Average Train Loss = 0.312653
Average Test Loss = 0.337536
Test Accuracy = 0.87

Epoch [11/20]
Average Train Loss = 0.305689
Average Test Lo

In [None]:
X = range(1, num_epochs+1)
plt.plot(X, sgd_train_losses, label="SGD Train Loss")
plt.plot(X, sgd_test_losses, label="SGD Test Loss")
plt.plot(X, adam_train_losses, label="Adam Train Loss")
plt.plot(X, adam_test_losses, label="Adam Test Loss")
plt.xticks(range(2, num_epochs+1, 2))
plt.legend()
plt.ylim(0, 2.5)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show()

In [None]:
plt.plot(X, sgd_accuracies, label="SGD Accuracy")
plt.plot(X, adam_accuracies, label="Adam Accuracy")
plt.xticks(range(2, num_epochs+1, 2))
plt.legend()
plt.ylim(0, 1)
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.show()