## Load Data

In [2]:
import torch
from torch import nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision
from torchvision.datasets import FashionMNIST
import torchvision.transforms as transforms
import numpy as np
import random

np.random.seed(0)
random.seed(0)
torch.manual_seed(0)
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

In [3]:
batch_size = 512
num_epochs = 15

train_dataset = FashionMNIST('./data', train=True, download=True, transform=transforms.ToTensor())
train_loader = DataLoader(train_dataset, batch_size, shuffle=True)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 26421880/26421880 [00:03<00:00, 7371687.69it/s] 


Extracting ./data/FashionMNIST/raw/train-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 29515/29515 [00:00<00:00, 144800.85it/s]


Extracting ./data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 4422102/4422102 [00:01<00:00, 2723456.08it/s]


Extracting ./data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to ./data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 5148/5148 [00:00<00:00, 2796202.67it/s]

Extracting ./data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/FashionMNIST/raw






In [4]:
class MLP(nn.Module):
    def __init__(self, input_dims, hidden_dims, output_dims):
        super(MLP, self).__init__()
        self.layer1 = nn.Linear(input_dims, hidden_dims)
        self.layer2 = nn.Linear(hidden_dims, hidden_dims)
        self.layer3 = nn.Linear(hidden_dims, hidden_dims)
        self.output = nn.Linear(hidden_dims, output_dims)

        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, mean=0.0, std=0.05)
                nn.init.constant_(m.bias, 0.0)


    def forward(self, x):
        x = nn.Flatten()(x)
        x = self.layer1(x)
        x = nn.Sigmoid()(x)
        x = self.layer2(x)
        x = nn.Sigmoid()(x)
        x = self.layer3(x)
        x = nn.Sigmoid()(x)
        out = self.output(x)

        return out

In [5]:
model = MLP(input_dims=784, hidden_dims=128, output_dims=10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters())

In [7]:
def training(num_epochs, model, optimizer, criterion):
    for epoch in range(num_epochs):    
        t_loss = 0
        t_acc = 0
        cnt = 0
        for X, y in train_loader:
            X, y = X.to(device), y.to(device)
            optimizer.zero_grad()
            outputs = model(X)
            loss = criterion(outputs, y)
            loss.backward()
            optimizer.step()
            
            t_loss += loss.item()
            t_acc += (torch.argmax(outputs, 1) == y).sum().item()
            cnt += len(y)

        t_loss /= len(train_loader)
        t_acc /= cnt
        print(f"Epoch {epoch+1}/{num_epochs}, Train_Loss: {t_loss:.4f}, Train_Acc: {t_acc:.4f}")

In [None]:
training(num_epochs, model, optimizer, criterion)

# 8

In [8]:
model = MLP(input_dims=784, hidden_dims=128, output_dims=10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
training(num_epochs, model, optimizer, criterion)

Epoch 1/15, Train_Loss: 1.6999, Train_Acc: 0.3707
Epoch 2/15, Train_Loss: 0.9142, Train_Acc: 0.6427
Epoch 3/15, Train_Loss: 0.7109, Train_Acc: 0.7343
Epoch 4/15, Train_Loss: 0.5797, Train_Acc: 0.7903
Epoch 5/15, Train_Loss: 0.5005, Train_Acc: 0.8262
Epoch 6/15, Train_Loss: 0.4524, Train_Acc: 0.8428
Epoch 7/15, Train_Loss: 0.4250, Train_Acc: 0.8528
Epoch 8/15, Train_Loss: 0.4014, Train_Acc: 0.8598
Epoch 9/15, Train_Loss: 0.3813, Train_Acc: 0.8660
Epoch 10/15, Train_Loss: 0.3632, Train_Acc: 0.8723
Epoch 11/15, Train_Loss: 0.3519, Train_Acc: 0.8765
Epoch 12/15, Train_Loss: 0.3413, Train_Acc: 0.8796
Epoch 13/15, Train_Loss: 0.3288, Train_Acc: 0.8833
Epoch 14/15, Train_Loss: 0.3245, Train_Acc: 0.8843
Epoch 15/15, Train_Loss: 0.3147, Train_Acc: 0.8880


# 9

In [9]:
class MLP(nn.Module):
    def __init__(self, input_dims, hidden_dims, output_dims):
        super(MLP, self).__init__()
        self.layer1 = nn.Linear(input_dims, hidden_dims)
        self.layer2 = nn.Linear(hidden_dims, hidden_dims)
        self.layer3 = nn.Linear(hidden_dims, hidden_dims)
        self.output = nn.Linear(hidden_dims, output_dims)

        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, mean=0.0, std=0.05)
                nn.init.constant_(m.bias, 0.0)


    def forward(self, x):
        x = nn.Flatten()(x)
        x = self.layer1(x)
        x = nn.ReLU()(x)
        x = self.layer2(x)
        x = nn.ReLU()(x)
        x = self.layer3(x)
        x = nn.ReLU()(x)
        out = self.output(x)

        return out

In [10]:
model = MLP(input_dims=784, hidden_dims=128, output_dims=10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
training(num_epochs, model, optimizer, criterion)

Epoch 1/15, Train_Loss: 0.8443, Train_Acc: 0.7076
Epoch 2/15, Train_Loss: 0.4724, Train_Acc: 0.8319
Epoch 3/15, Train_Loss: 0.4133, Train_Acc: 0.8518
Epoch 4/15, Train_Loss: 0.3804, Train_Acc: 0.8628
Epoch 5/15, Train_Loss: 0.3601, Train_Acc: 0.8689
Epoch 6/15, Train_Loss: 0.3357, Train_Acc: 0.8784
Epoch 7/15, Train_Loss: 0.3195, Train_Acc: 0.8825
Epoch 8/15, Train_Loss: 0.3072, Train_Acc: 0.8872
Epoch 9/15, Train_Loss: 0.3021, Train_Acc: 0.8891
Epoch 10/15, Train_Loss: 0.2870, Train_Acc: 0.8947
Epoch 11/15, Train_Loss: 0.2796, Train_Acc: 0.8963
Epoch 12/15, Train_Loss: 0.2740, Train_Acc: 0.8983
Epoch 13/15, Train_Loss: 0.2657, Train_Acc: 0.9014
Epoch 14/15, Train_Loss: 0.2552, Train_Acc: 0.9061
Epoch 15/15, Train_Loss: 0.2493, Train_Acc: 0.9076


# 10

In [11]:
class MLP(nn.Module):
    def __init__(self, input_dims, hidden_dims, output_dims):
        super(MLP, self).__init__()
        self.layer1 = nn.Linear(input_dims, hidden_dims)
        self.layer2 = nn.Linear(hidden_dims, hidden_dims)
        self.layer3 = nn.Linear(hidden_dims, hidden_dims)
        self.output = nn.Linear(hidden_dims, output_dims)

        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight)
                nn.init.constant_(m.bias, 0.0)


    def forward(self, x):
        x = nn.Flatten()(x)
        x = self.layer1(x)
        x = nn.ReLU()(x)
        x = self.layer2(x)
        x = nn.ReLU()(x)
        x = self.layer3(x)
        x = nn.ReLU()(x)
        out = self.output(x)

        return out

In [12]:
model = MLP(input_dims=784, hidden_dims=128, output_dims=10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
training(num_epochs, model, optimizer, criterion)

Epoch 1/15, Train_Loss: 0.6612, Train_Acc: 0.7773
Epoch 2/15, Train_Loss: 0.4108, Train_Acc: 0.8546
Epoch 3/15, Train_Loss: 0.3691, Train_Acc: 0.8674
Epoch 4/15, Train_Loss: 0.3388, Train_Acc: 0.8776
Epoch 5/15, Train_Loss: 0.3237, Train_Acc: 0.8830
Epoch 6/15, Train_Loss: 0.3053, Train_Acc: 0.8883
Epoch 7/15, Train_Loss: 0.2900, Train_Acc: 0.8942
Epoch 8/15, Train_Loss: 0.2808, Train_Acc: 0.8963
Epoch 9/15, Train_Loss: 0.2706, Train_Acc: 0.9000
Epoch 10/15, Train_Loss: 0.2607, Train_Acc: 0.9055
Epoch 11/15, Train_Loss: 0.2511, Train_Acc: 0.9077
Epoch 12/15, Train_Loss: 0.2425, Train_Acc: 0.9106
Epoch 13/15, Train_Loss: 0.2339, Train_Acc: 0.9130
Epoch 14/15, Train_Loss: 0.2296, Train_Acc: 0.9147
Epoch 15/15, Train_Loss: 0.2205, Train_Acc: 0.9185


# 11

In [13]:
class MLP(nn.Module):
    def __init__(self, input_dims, hidden_dims, output_dims):
        super(MLP, self).__init__()
        self.layer1 = nn.Linear(input_dims, hidden_dims)
        self.bn1 = nn.BatchNorm1d(hidden_dims)
        self.layer2 = nn.Linear(hidden_dims, hidden_dims)
        self.bn2 = nn.BatchNorm1d(hidden_dims)
        self.layer3 = nn.Linear(hidden_dims, hidden_dims)
        self.bn3 = nn.BatchNorm1d(hidden_dims)
        self.output = nn.Linear(hidden_dims, output_dims)

        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight)
                nn.init.constant_(m.bias, 0.0)


    def forward(self, x):
        x = nn.Flatten()(x)
        x = self.layer1(x)
        x = self.bn1(x)
        x = nn.ReLU()(x)
        x = self.layer2(x)
        x = self.bn2(x)
        x = nn.ReLU()(x)
        x = self.layer3(x)
        x = self.bn3(x)
        x = nn.ReLU()(x)
        out = self.output(x)

        return out

In [14]:
model = MLP(input_dims=784, hidden_dims=128, output_dims=10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
training(num_epochs, model, optimizer, criterion)

Epoch 1/15, Train_Loss: 0.5772, Train_Acc: 0.8027
Epoch 2/15, Train_Loss: 0.3463, Train_Acc: 0.8751
Epoch 3/15, Train_Loss: 0.3019, Train_Acc: 0.8911
Epoch 4/15, Train_Loss: 0.2727, Train_Acc: 0.9008
Epoch 5/15, Train_Loss: 0.2480, Train_Acc: 0.9083
Epoch 6/15, Train_Loss: 0.2289, Train_Acc: 0.9158
Epoch 7/15, Train_Loss: 0.2150, Train_Acc: 0.9217
Epoch 8/15, Train_Loss: 0.1985, Train_Acc: 0.9265
Epoch 9/15, Train_Loss: 0.1838, Train_Acc: 0.9321
Epoch 10/15, Train_Loss: 0.1748, Train_Acc: 0.9360
Epoch 11/15, Train_Loss: 0.1647, Train_Acc: 0.9393
Epoch 12/15, Train_Loss: 0.1532, Train_Acc: 0.9441
Epoch 13/15, Train_Loss: 0.1405, Train_Acc: 0.9495
Epoch 14/15, Train_Loss: 0.1354, Train_Acc: 0.9509
Epoch 15/15, Train_Loss: 0.1250, Train_Acc: 0.9542


# 12

In [25]:
class MLP(nn.Module):
    def __init__(self, input_dims, hidden_dims, output_dims):
        super(MLP, self).__init__()
        self.layer1 = nn.Linear(input_dims, hidden_dims)
        self.bn1 = nn.BatchNorm1d(hidden_dims)
        self.layer2 = nn.Linear(hidden_dims, hidden_dims)
        self.bn2 = nn.BatchNorm1d(hidden_dims)
        self.layer3 = nn.Linear(hidden_dims, hidden_dims)
        self.bn3 = nn.BatchNorm1d(hidden_dims)
        self.output = nn.Linear(hidden_dims, output_dims)

        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight)
                nn.init.constant_(m.bias, 0.0)


    def forward(self, x):
        x1 = nn.Flatten()(x)
        x1 = self.layer1(x1)
        x1 = self.bn1(x1)
        x1 = nn.ReLU()(x1)
        identity = x1

        x2 = self.layer2(x1)
        x2 = self.bn2(x2)
        x2 = nn.ReLU()(x2)

        x3 = self.layer3(x2)
        x3 = self.bn3(x3)
        x3 = nn.ReLU()(x3)

        x3 = x3 + identity

        out = self.output(x3)

        return out

In [26]:
model = MLP(input_dims=784, hidden_dims=128, output_dims=10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())
training(num_epochs, model, optimizer, criterion)

Epoch 1/15, Train_Loss: 0.5696, Train_Acc: 0.8019
Epoch 2/15, Train_Loss: 0.3463, Train_Acc: 0.8747
Epoch 3/15, Train_Loss: 0.3045, Train_Acc: 0.8903
Epoch 4/15, Train_Loss: 0.2734, Train_Acc: 0.9013
Epoch 5/15, Train_Loss: 0.2515, Train_Acc: 0.9077
Epoch 6/15, Train_Loss: 0.2320, Train_Acc: 0.9143
Epoch 7/15, Train_Loss: 0.2161, Train_Acc: 0.9222
Epoch 8/15, Train_Loss: 0.2003, Train_Acc: 0.9263
Epoch 9/15, Train_Loss: 0.1893, Train_Acc: 0.9308
Epoch 10/15, Train_Loss: 0.1795, Train_Acc: 0.9351
Epoch 11/15, Train_Loss: 0.1641, Train_Acc: 0.9410
Epoch 12/15, Train_Loss: 0.1540, Train_Acc: 0.9449
Epoch 13/15, Train_Loss: 0.1440, Train_Acc: 0.9481
Epoch 14/15, Train_Loss: 0.1365, Train_Acc: 0.9504
Epoch 15/15, Train_Loss: 0.1203, Train_Acc: 0.9576


# 13