In [1]:
import numpy as np
import argparse

import torch
import torch.nn as nn
from torch.utils.data import DataLoader

from torchvision import datasets
from torchvision import transforms

from tqdm import tqdm
from time import time

In [2]:
transform = transforms.Compose(
    [transforms.ToTensor(),
    transforms.Normalize(0.5, 0.5)]
)

BATCH_SIZE = 256

train_val_dataset = datasets.CIFAR10('./data', train=True, transform=transform, download=True)
test_dataset = datasets.CIFAR10('./data', train=False, transform=transform, download=True)

train_size = int(len(train_val_dataset) * 0.8)
val_size = len(train_val_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(train_val_dataset, [train_size, val_size])

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True, drop_last=True)

classes = ['plane', 'car', 'bird', 'cat', 'deer', 'dog', 'fog', 'horse', 'ship', 'truck']

Files already downloaded and verified
Files already downloaded and verified


In [3]:
samples = next(iter(train_dataloader))
print(f'Sample shape = {samples[0].shape}') # (batch_size, channel, height, width)

Sample shape = torch.Size([256, 3, 32, 32])


In [4]:
class MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim, out_dim, n_layer, act='relu'):
        super(MLP, self).__init__()

        self.flatten = nn.Flatten()
        self.linear = nn.Linear(input_dim, hidden_dim if n_layer != 1 else out_dim)

        if n_layer < 1:
            raise ValueError('Value of n_layer sholud be more than 1')
        
        self.linears = nn.ModuleList()
        for i in range(n_layer -1):
            self.linears.append(nn.Linear(hidden_dim, hidden_dim if i != (n_layer -2) else out_dim))

        if act == 'relu':
            self.act = nn.ReLU()
        else:
            print('Activation function is not applied, only "relu" functioin can be used in this model')
            print('This model is neural network without activation function')
            self.act = lambda x: x


    def forward(self, inputs):

        x = self.flatten(inputs)
        x = self.linear(x)
        for layer in self.linears:
            x = self.act(layer(x))
        return x

In [5]:
def experiment(args):

    model = MLP(args.in_dim, args.hidden_dim, args.out_dim, args.n_layer, args.act)
    model = model.to(args.device)

    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters())

    list_train_losses = []
    list_val_losses = []
    list_acc = []

    for epoch in tqdm(range(args.epoch), desc='Training & Evaluating'):

        ts = time()        

        # train_step
        model.train()
        train_losses = 0.0
        for img, label in train_dataloader:
            img, label = img.to(args.device), label.to(args.device)
            optimizer.zero_grad()

            outputs = model(img)
            loss = loss_fn(outputs, label)
            
            loss.backward()
            optimizer.step()
            train_losses += loss.cpu().detach().numpy()

        train_losses /= len(train_dataloader)
        list_train_losses.append(train_losses)

        # valid_step
        model.eval()
        val_losses = 0.0
        with torch.no_grad():
            for img, label in val_dataloader:

                img, label = img.to(args.device), label.to(args.device)

                outputs = model(img)
                loss = loss_fn(outputs, label)
                val_losses += loss.cpu().detach().numpy()

            val_losses /= len(val_dataloader)
            list_val_losses.append(val_losses)


        # test_step
        model.eval()
        test_acc = 0.0
        with torch.no_grad():
            for img, label in test_dataloader:
                img, label = img.to(args.device), label.to(args.device)

                outputs = model(img) # (batch, output_dim)
                prediction = torch.argmax(outputs, dim=-1) # (batch, )

                test_acc += torch.eq(prediction, label).sum().cpu().detach().numpy()
            test_acc /= len(test_dataloader.dataset)
            list_acc.append(test_acc)
        te = time()

        print('=' * 20)
        print(f'Epoch: {epoch+1:d}, train_loss : {train_losses:.4f}, val_loss : {val_losses:.4f}, test_acc : {test_acc*100:.2f}, Time took (sec) : {te-ts:.2f}')
        
    return list_train_losses, list_val_losses, list_acc

In [6]:
parser = argparse.ArgumentParser()
args = parser.parse_args('')

args.in_dim = 3072 # (3 * 32* 32)
args.hidden_dim = 32
args.out_dim = 10
args.n_layer = 3
args.act = 'relu'

args.device = torch.device('mps')
args.lr = 1e-2
args.epoch = 5

print(vars(args))
result = experiment(args)


{'in_dim': 3072, 'hidden_dim': 32, 'out_dim': 10, 'n_layer': 3, 'act': 'relu', 'device': device(type='mps'), 'lr': 0.01, 'epoch': 5}


Training & Evaluating:  20%|██        | 1/5 [00:03<00:15,  3.97s/it]

Epoch: 1, train_loss : 1.8713, val_loss : 1.7131, test_acc : 40.95, Time took (sec) : 3.97


Training & Evaluating:  40%|████      | 2/5 [00:07<00:11,  3.86s/it]

Epoch: 2, train_loss : 1.6517, val_loss : 1.6408, test_acc : 43.50, Time took (sec) : 3.78


Training & Evaluating:  60%|██████    | 3/5 [00:11<00:07,  3.82s/it]

Epoch: 3, train_loss : 1.5830, val_loss : 1.5915, test_acc : 44.97, Time took (sec) : 3.78


Training & Evaluating:  80%|████████  | 4/5 [00:15<00:03,  3.81s/it]

Epoch: 4, train_loss : 1.5419, val_loss : 1.5696, test_acc : 44.97, Time took (sec) : 3.78


Training & Evaluating: 100%|██████████| 5/5 [00:19<00:00,  3.81s/it]

Epoch: 5, train_loss : 1.5090, val_loss : 1.5621, test_acc : 45.68, Time took (sec) : 3.76



