In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn as nn

import argparse

# 1. Train_Val Split

In [118]:
transform = transforms.Compose(
    [transforms.ToTensor(),    # image파일을 0 ~ 1사이의 값을 갖는 Tensor로 변환(0: 검은색)
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5), )])  # 채널별로 0.5를 빼고(-0.5 ~ 0.5), 0.5로 나눔

batch_size = 4

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
# train val split
trainset, valset = torch.utils.data.random_split(trainset, [40000, 10000])
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)
valloader = torch.utils.data.DataLoader(valset, batch_size=batch_size, 
                                        shuffle=False, num_workers=2)


testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Files already downloaded and verified
Files already downloaded and verified


# 2. Define MLP Model

In [161]:
class MLP(nn.Module):
    def __init__(self, in_dim, out_dim, hid_dim, n_layer, act, dropout, bn=True):
        super(MLP,self).__init__()
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.hid_dim = hid_dim
        self.n_layer = n_layer
        self.act = act
        self.dropout = dropout
        
        self.fc1 = nn.Linear(self.in_dim, self.hid_dim)
        self.linears = nn.ModuleList()
        self.bns = nn.ModuleList()
        for i in range(self.n_layer):    # n_layer: hidden layer의 개수
            self.linears.append(nn.Linear(self.hid_dim, self.hid_dim))
            if bn:
                self.bns.append(nn.BatchNorm1d(hid_dim))
        self.fc2 = nn.Linear(self.hid_dim, self.out_dim)
        
        self.do = nn.Dropout(self.dropout)
            
        if self.act == 'relu':
            self.act = nn.ReLU()
        elif self.act == 'leakyrelu':
            self.act = nn.LeakyReLU()
        else:
            raise ValueError("Invalid activation function!")
            
        
    def forward(self, x):
        x = self.act(self.fc1(x))
        for i in range(len(self.linears)):
            x = self.act(self.linears[i](x))
            x = self.bns[i](x)
            x = self.do(x)
        x = self.fc2(x)          # Softmax 마지막 layer는 activation func이 없어야 함
        return x

In [163]:
model = MLP(32*32*3, 10, 200, 1, 'relu', 0.1, True)
print(model)

MLP(
  (fc1): Linear(in_features=3072, out_features=200, bias=True)
  (linears): ModuleList(
    (0): Linear(in_features=200, out_features=200, bias=True)
  )
  (bns): ModuleList(
    (0): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (fc2): Linear(in_features=200, out_features=10, bias=True)
  (do): Dropout(p=0.1, inplace=False)
  (act): ReLU()
)


# 3. Experiment

In [171]:
def experiments(args):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = MLP(args.in_dim, args.out_dim, args.hid_dim, args.n_layer, args.act, args.dropout, args.bn)
    model = model.to(device)

    criterion = nn.CrossEntropyLoss()
    
    if args.opt == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.mm)
    elif args.opt == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=args.lr, )
    elif args.opt == 'Adagrad':
        optimizer = optim.Adagrad(model.parameters(), lr=args.lr)
    else:
        raise ValueError('Invalid optimizer!')
    
    
    for epoch in range(1, args.epochs+1):

        ## === Train === ##
        running_loss = 0.0
        tr_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            # get the inputs
            inputs, labels = data
            inputs = inputs.view(-1, 32*32*3)
            inputs = inputs.to(device)
            labels = labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward, backward, optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print log
            running_loss += loss.item()
            tr_loss += loss.item()
            if i % 2000 == 1999:    # 2000 mini batches마다 출력
                print('[epoch: {}, {}] running_loss: {:.3f}'.format(epoch, i+1, running_loss / 2000))
                running_loss = 0.0
                
        tr_loss = tr_loss / len(trainloader.dataset)
                
        ## === Validation === ##
        correct = 0
        total = 0
        val_loss = 0.0
        with torch.no_grad():
            for data in valloader:
                inputs, labels = data
                inputs = inputs.view(-1, 32*32*3)
                inputs = inputs.to(device)
                labels = labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item()    # batch에 대한 loss
                _, preds = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (preds == labels).sum().item()

            val_loss = val_loss / total
            val_acc = correct / total * 100
       
        print('Epoch {}, Train Loss: {:.3f}, Val Loss: {:.3f}, Val Acc: {:.3f}%'.format(
            epoch,tr_loss, val_loss, val_acc))
                    
    ## === Test set === ##
    correct = 0
    total = 0

    with torch.no_grad():
        for data in testloader:
            inputs, labels = data
            inputs = inputs.view(-1, 32*32*3)
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (preds == labels).sum().item()
        test_acc = correct / total * 100
        
    return tr_loss, val_loss, val_acc, test_acc

In [174]:
seed= 1212
np.random.seed(seed)
torch.manual_seed(seed)

parser = argparse.ArgumentParser()
args = parser.parse_args("")

args.in_dim = 3 * 32 * 32
args.out_dim = 10
args.hid_dim = 100
args.n_layer = 2
args.act = 'relu'
args.dropout = 0.1
args.bn = True

args.opt = 'Adam'

args.lr = 0.01
args.mm = 0.9

args.epochs = 5

In [160]:
list_var1 = [2, 3]        # layers
list_var2 = [50, 100]   # hidden_dim

for var1 in list_var1:
    for var2 in list_var2:
        print('======= Start Experiment ========')
        print('layers: {}, hidden_dim: {}'.format(var1, var2))

        args.n_layer = var1
        args.hid_dim = var2
        result = experiments(args)
        print(result)
        print('======= End Experiment ========')

layers: 2, hidden_dim: 50
[epoch: 1, 2000] running_loss: 2.055
[epoch: 1, 4000] running_loss: 1.792
[epoch: 1, 6000] running_loss: 1.722
[epoch: 1, 8000] running_loss: 1.669
[epoch: 1, 10000] running_loss: 1.635
Epoch 1, Train Loss: 0.444, Val Loss: 0.416, Val Acc: 40.550%
[epoch: 2, 2000] running_loss: 1.578
[epoch: 2, 4000] running_loss: 1.565
[epoch: 2, 6000] running_loss: 1.562
[epoch: 2, 8000] running_loss: 1.543
[epoch: 2, 10000] running_loss: 1.511
Epoch 2, Train Loss: 0.388, Val Loss: 0.382, Val Acc: 45.880%
(0.3878904568515718, 0.38215211568027735, 45.879999999999995, 46.82)
layers: 2, hidden_dim: 100
[epoch: 1, 2000] running_loss: 2.074
[epoch: 1, 4000] running_loss: 1.794
[epoch: 1, 6000] running_loss: 1.702
[epoch: 1, 8000] running_loss: 1.644
[epoch: 1, 10000] running_loss: 1.623
Epoch 1, Train Loss: 0.442, Val Loss: 0.401, Val Acc: 42.500%
[epoch: 2, 2000] running_loss: 1.544
[epoch: 2, 4000] running_loss: 1.521
[epoch: 2, 6000] running_loss: 1.529
[epoch: 2, 8000] runnin

In [175]:
list_var1 = ['Adam', 'Adagrad']        # layers
list_var2 = [0.001, 0.01]   # hidden_dim


for var1 in list_var1:
    for var2 in list_var2:
        print('======= Start Experiment ========')
        print('Optimizer: {}, learning_rate: {}'.format(var1, var2))

        args.opt = var1
        args.lr = var2
        result = experiments(args)
        print(result)
        print('======= End Experiment ========')

Optimizer: Adam, learning_rate: 0.001
[epoch: 1, 2000] running_loss: 2.164
[epoch: 1, 4000] running_loss: 2.063
[epoch: 1, 6000] running_loss: 2.036
[epoch: 1, 8000] running_loss: 2.020
[epoch: 1, 10000] running_loss: 2.001
Epoch 1, Train Loss: 0.514, Val Loss: 0.502, Val Acc: 27.490%
[epoch: 2, 2000] running_loss: 1.982
[epoch: 2, 4000] running_loss: 1.972
[epoch: 2, 6000] running_loss: 1.925
[epoch: 2, 8000] running_loss: 1.917
[epoch: 2, 10000] running_loss: 1.913
Epoch 2, Train Loss: 0.485, Val Loss: 0.480, Val Acc: 31.370%
[epoch: 3, 2000] running_loss: 1.898
[epoch: 3, 4000] running_loss: 1.873
[epoch: 3, 6000] running_loss: 1.874
[epoch: 3, 8000] running_loss: 1.860
[epoch: 3, 10000] running_loss: 1.843
Epoch 3, Train Loss: 0.467, Val Loss: 0.463, Val Acc: 34.130%
[epoch: 4, 2000] running_loss: 1.830
[epoch: 4, 4000] running_loss: 1.842
[epoch: 4, 6000] running_loss: 1.823
[epoch: 4, 8000] running_loss: 1.808
[epoch: 4, 10000] running_loss: 1.808
Epoch 4, Train Loss: 0.456, Val 

KeyboardInterrupt: 