In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn as nn

import numpy as np
import pandas as pd

from tqdm.notebook import tqdm

import argparse

# 1. Train_Val Split

In [2]:
transform = transforms.Compose(
    [transforms.ToTensor(),    # image파일을 0 ~ 1사이의 값을 갖는 Tensor로 변환(0: 검은색)
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5), )])  # 채널별로 0.5를 빼고(-0.5 ~ 0.5), 0.5로 나눔

batch_size = 128

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
# train val split
trainset, valset = torch.utils.data.random_split(trainset, [40000, 10000])
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


# 2. Define MLP Model

In [3]:
class MLP(nn.Module):
    def __init__(self, in_dim, out_dim, hid_dim, n_layer, act, dropout, bn=True):
        super(MLP,self).__init__()
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.hid_dim = hid_dim
        self.n_layer = n_layer
        self.act = act
        self.dropout = dropout
        self.bn = bn
        
        self.fc1 = nn.Linear(self.in_dim, self.hid_dim)
        self.linears = nn.ModuleList()
        self.bns = nn.ModuleList()
        for i in range(self.n_layer):    # n_layer: hidden layer의 개수
            self.linears.append(nn.Linear(self.hid_dim, self.hid_dim))
            if self.bn:
                self.bns.append(nn.BatchNorm1d(hid_dim))
        self.fc2 = nn.Linear(self.hid_dim, self.out_dim)
        
        self.do = nn.Dropout(self.dropout)
            
        if self.act == 'relu':
            self.act = nn.ReLU()
        elif self.act == 'leakyrelu':
            self.act = nn.LeakyReLU()
        else:
            raise ValueError("Invalid activation function!")
            
        
    def forward(self, x):
        x = self.act(self.fc1(x))
        for i in range(len(self.linears)):
            x = self.act(self.linears[i](x))
            if self.bn:
                x = self.bns[i](x)
            x = self.do(x)
        x = self.fc2(x)          # Softmax 마지막 layer는 activation func이 없어야 함
        return x

In [4]:
model = MLP(32*32*3, 10, 200, 1, 'relu', 0.1, True)
print(model)

MLP(
  (fc1): Linear(in_features=3072, out_features=200, bias=True)
  (linears): ModuleList(
    (0): Linear(in_features=200, out_features=200, bias=True)
  )
  (bns): ModuleList(
    (0): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (fc2): Linear(in_features=200, out_features=10, bias=True)
  (do): Dropout(p=0.1, inplace=False)
  (act): ReLU()
)


# 3. Experiment

In [5]:
def experiments(args):

    # batch-size test
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size,
                                            shuffle=True, num_workers=2)
    valloader = torch.utils.data.DataLoader(valset, batch_size=args.batch_size, 
                                            shuffle=False, num_workers=2)

    testloader = torch.utils.data.DataLoader(testset, batch_size=args.batch_size,
                                            shuffle=False, num_workers=2)


    # load model
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    model = MLP(args.in_dim, args.out_dim, args.hid_dim, args.n_layer, args.act, args.dropout, args.bn)
    model = model.to(device)

    print('model on {}'.format(device))

    criterion = nn.CrossEntropyLoss()
    
    if args.opt == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.mm)
    elif args.opt == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=args.lr, )
    elif args.opt == 'Adagrad':
        optimizer = optim.Adagrad(model.parameters(), lr=args.lr)
    else:
        raise ValueError('Invalid optimizer!')
    
    train_losses = []
    val_losses = []
    val_accs = []
    test_accs = []
    
    for epoch in range(1, args.epochs+1):

        ## === Train === ##
        running_loss = 0.0
        tr_loss = 0.0
        # for i, data in enumerate(tqdm(trainloader, 0)):
        for i, data in enumerate(trainloader, 0):
            # get the inputs
            inputs, labels = data
            inputs = inputs.view(-1, 32*32*3)
            inputs = inputs.to(device)
            labels = labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward, backward, optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print log
            running_loss += loss.item()
            tr_loss += loss.item()
            if i % 2000 == 1999:    # 2000 mini batches마다 출력
                print('[epoch: {}, {}] running_loss: {:.3f}'.format(epoch, i+1, running_loss / 2000))
                running_loss = 0.0
                
        tr_loss = tr_loss / len(trainloader)
                
        ## === Validation === ##
        correct = 0
        total = 0
        val_loss = 0.0
        with torch.no_grad():
            for data in valloader:
                inputs, labels = data
                inputs = inputs.view(-1, 32*32*3)
                inputs = inputs.to(device)
                labels = labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item()    # batch에 대한 loss
                _, preds = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (preds == labels).sum().item()

            val_loss = val_loss / len(valloader)
            val_acc = correct / total * 100
       
        print('Epoch {}, Train Loss: {:.5f}, Val Loss: {:.5f}, Val Acc: {:.3f}%'.format(
            epoch,tr_loss, val_loss, val_acc))
        
        train_losses.append(tr_loss)
        val_losses.append(val_loss)
        val_accs.append(val_acc)
                    
    # ## === Test set === ##
    # correct = 0
    # total = 0

    # with torch.no_grad():
    #     for data in testloader:
    #         inputs, labels = data
    #         inputs = inputs.view(-1, 32*32*3)
    #         inputs = inputs.to(device)
    #         labels = labels.to(device)

    #         outputs = model(inputs)
    #         _, preds = torch.max(outputs, 1)
    #         total += labels.size(0)
    #         correct += (preds == labels).sum().item()
    #     test_acc = correct / total * 100


    result = {}
    result['train_losses'] = train_losses
    result['val_losses'] = val_losses
    result['val_accs'] = val_accs
        
    return vars(args), result

In [14]:
import json

def save_res(result, params, name_var1, name_var2, var1, var2):
    with open(f"/content/drive/MyDrive/SADL/results/{name_var1}{var1}-{name_var2}{var2}.json", "w") as f:
        json.dump(result, f)
    with open(f"/content/drive/MyDrive/SADL/results/{name_var1}{var1}-{name_var2}{var2}_args.json", "w") as f:
        json.dump(params, f)
    print('save result!')

In [15]:
seed= 1212
np.random.seed(seed)
torch.manual_seed(seed)

parser = argparse.ArgumentParser()
args = parser.parse_args("")

args.batch_size = 512

args.in_dim = 3 * 32 * 32
args.out_dim = 10
args.hid_dim = 100
args.n_layer = 2
args.act = 'relu'
args.dropout = 0.1
args.bn = True

args.opt = 'Adam'

args.lr = 0.01
args.mm = 0.9

args.epochs = 10

In [16]:
list_var1 = [2,3,4]        # layers
list_var2 = [50,100,150,200]   # hidden_dim
name_var1, name_var2 = 'n_layer', 'hid_dim'

res = pd.DataFrame(columns=['train_losses', 'val_losses','val_accs'])
for var1 in list_var1:
    for var2 in list_var2:
        print('======= Start Experiment ========')
        print('{}: {}, {}: {}'.format(name_var1, var1, name_var2, var2))

        args.n_layer = var1
        args.hid_dim = var2
        params, result = experiments(args)
        
        save_res(result, params, name_var1, name_var2, var1, var2)
        print(result)
        print('======== End Experiment ========')

n_layer: 2, hid_dim: 50
model on cuda
Epoch 1, Train Loss: 1.86944, Val Loss: 1.72633, Val Acc: 37.770%
Epoch 2, Train Loss: 1.67797, Val Loss: 1.64338, Val Acc: 40.730%
Epoch 3, Train Loss: 1.58345, Val Loss: 1.58548, Val Acc: 43.540%
Epoch 4, Train Loss: 1.51561, Val Loss: 1.54079, Val Acc: 45.240%
Epoch 5, Train Loss: 1.46444, Val Loss: 1.52683, Val Acc: 45.330%
Epoch 6, Train Loss: 1.42901, Val Loss: 1.47997, Val Acc: 47.140%
Epoch 7, Train Loss: 1.38958, Val Loss: 1.48482, Val Acc: 47.770%
Epoch 8, Train Loss: 1.36011, Val Loss: 1.48842, Val Acc: 47.100%
Epoch 9, Train Loss: 1.33611, Val Loss: 1.46215, Val Acc: 48.030%
Epoch 10, Train Loss: 1.30908, Val Loss: 1.43643, Val Acc: 49.750%
save result!
{'train_losses': [1.8694393529167659, 1.6779745183413541, 1.5834493606905393, 1.5156116621403755, 1.4644438963902147, 1.4290100562421582, 1.3895791904835761, 1.3601095570793635, 1.3361099566085428, 1.3090776084344597], 'val_losses': [1.726333999633789, 1.6433834910392762, 1.5854796230793

layers: 4, hidden_dim: 200

In [17]:
seed= 1212
np.random.seed(seed)
torch.manual_seed(seed)

parser = argparse.ArgumentParser()
args = parser.parse_args("")

args.batch_size = 512

args.in_dim = 3 * 32 * 32
args.out_dim = 10
args.hid_dim = 200
args.n_layer = 4
args.act = 'relu'
args.dropout = 0.1
args.bn = True

args.opt = 'Adam'

args.lr = 0.01
args.mm = 0.9

args.epochs = 10


list_var1 = ['Adam', 'SGD', 'Adagrad']        # optimizer
list_var2 = [0.1, 0.01, 0.001]   # learning rate
name_var1, name_var2 = 'optim', 'lr'

res = pd.DataFrame(columns=['train_losses', 'val_losses','val_accs'])
for var1 in list_var1:
    for var2 in list_var2:
        print('======= Start Experiment ========')
        print('{}: {}, {}: {}'.format(name_var1, var1, name_var2, var2))

        args.optim = var1
        args.lr = var2
        params, result = experiments(args)
        
        save_res(result, params, name_var1, name_var2, var1, var2)
        print(result)
        print('======== End Experiment ========')

optim: Adam, lr: 0.1
model on cuda
Epoch 1, Train Loss: 2.38841, Val Loss: 1.96370, Val Acc: 27.330%
Epoch 2, Train Loss: 1.89921, Val Loss: 1.85540, Val Acc: 31.570%
Epoch 3, Train Loss: 1.81939, Val Loss: 1.87402, Val Acc: 30.830%
Epoch 4, Train Loss: 1.78504, Val Loss: 1.76136, Val Acc: 37.480%
Epoch 5, Train Loss: 1.72502, Val Loss: 1.70497, Val Acc: 38.880%
Epoch 6, Train Loss: 1.66877, Val Loss: 1.68814, Val Acc: 39.950%
Epoch 7, Train Loss: 1.65633, Val Loss: 1.67389, Val Acc: 40.020%
Epoch 8, Train Loss: 1.61368, Val Loss: 1.65849, Val Acc: 40.180%
Epoch 9, Train Loss: 1.58199, Val Loss: 1.62898, Val Acc: 41.080%
Epoch 10, Train Loss: 1.54641, Val Loss: 1.61558, Val Acc: 43.410%
save result!
{'train_losses': [2.388409409341933, 1.8992145619814909, 1.8193931232524823, 1.7850437948975382, 1.7250170466266102, 1.6687698107731492, 1.6563323298587074, 1.6136799311336083, 1.5819877039028118, 1.5464137339893775], 'val_losses': [1.9636986553668976, 1.8553960621356964, 1.8740205705165862

Adam, 0.001

In [18]:
seed= 1212
np.random.seed(seed)
torch.manual_seed(seed)

parser = argparse.ArgumentParser()
args = parser.parse_args("")

args.batch_size = 512

args.in_dim = 3 * 32 * 32
args.out_dim = 10
args.hid_dim = 200
args.n_layer = 4
args.act = 'relu'
args.dropout = 0.1
args.bn = True

args.opt = 'Adam'

args.lr = 0.001
args.mm = 0.9

args.epochs = 10


list_var1 = [0.1, 0.2, 0.3, 0.4]        # Dropout
list_var2 = [True, False]   # Batch normalization
name_var1, name_var2 = 'dropout', 'bn'

res = pd.DataFrame(columns=['train_losses', 'val_losses','val_accs'])
for var1 in list_var1:
    for var2 in list_var2:
        print('======= Start Experiment ========')
        print('{}: {}, {}: {}'.format(name_var1, var1, name_var2, var2))

        args.dropout = var1
        args.bn = var2
        params, result = experiments(args)
        
        save_res(result, params, name_var1, name_var2, var1, var2)
        print(result)
        print('======== End Experiment ========')

dropout: 0.1, bn: True
model on cuda
Epoch 1, Train Loss: 1.81017, Val Loss: 1.63951, Val Acc: 41.030%
Epoch 2, Train Loss: 1.55573, Val Loss: 1.53955, Val Acc: 44.760%
Epoch 3, Train Loss: 1.44469, Val Loss: 1.46751, Val Acc: 47.400%
Epoch 4, Train Loss: 1.35177, Val Loss: 1.45348, Val Acc: 48.970%
Epoch 5, Train Loss: 1.28868, Val Loss: 1.42879, Val Acc: 49.870%
Epoch 6, Train Loss: 1.22482, Val Loss: 1.40040, Val Acc: 50.420%
Epoch 7, Train Loss: 1.17550, Val Loss: 1.43008, Val Acc: 49.720%
Epoch 8, Train Loss: 1.14018, Val Loss: 1.40775, Val Acc: 51.200%
Epoch 9, Train Loss: 1.09738, Val Loss: 1.41248, Val Acc: 51.320%
Epoch 10, Train Loss: 1.05408, Val Loss: 1.44315, Val Acc: 50.690%
save result!
{'train_losses': [1.8101665702047227, 1.5557341469994075, 1.444689552995223, 1.3517654621148412, 1.288682122773762, 1.2248163283625735, 1.1755028616023968, 1.1401806330379052, 1.097380401967447, 1.0540775256820871], 'val_losses': [1.639512026309967, 1.5395547688007354, 1.4675077617168426,

In [20]:
seed= 1212
np.random.seed(seed)
torch.manual_seed(seed)

parser = argparse.ArgumentParser()
args = parser.parse_args("")

args.batch_size = 512

args.in_dim = 3 * 32 * 32
args.out_dim = 10
args.hid_dim = 200
args.n_layer = 4
args.act = 'relu'
args.dropout = 0.1
args.bn = False

args.opt = 'Adam'

args.lr = 0.001
args.mm = 0.9

args.epochs = 20


params, result = experiments(args)
print(result)

model on cuda
Epoch 1, Train Loss: 1.88170, Val Loss: 1.69411, Val Acc: 40.130%
Epoch 2, Train Loss: 1.61635, Val Loss: 1.58601, Val Acc: 43.800%
Epoch 3, Train Loss: 1.51184, Val Loss: 1.52016, Val Acc: 46.050%
Epoch 4, Train Loss: 1.40767, Val Loss: 1.45922, Val Acc: 48.180%
Epoch 5, Train Loss: 1.34029, Val Loss: 1.44013, Val Acc: 50.110%
Epoch 6, Train Loss: 1.26686, Val Loss: 1.43870, Val Acc: 49.680%
Epoch 7, Train Loss: 1.21391, Val Loss: 1.40657, Val Acc: 51.520%
Epoch 8, Train Loss: 1.16020, Val Loss: 1.41465, Val Acc: 51.420%
Epoch 9, Train Loss: 1.11530, Val Loss: 1.41424, Val Acc: 51.480%
Epoch 10, Train Loss: 1.06478, Val Loss: 1.45037, Val Acc: 50.810%
Epoch 11, Train Loss: 1.00963, Val Loss: 1.45155, Val Acc: 51.550%
Epoch 12, Train Loss: 0.96715, Val Loss: 1.45925, Val Acc: 51.940%
Epoch 13, Train Loss: 0.92691, Val Loss: 1.50373, Val Acc: 51.400%
Epoch 14, Train Loss: 0.88328, Val Loss: 1.52428, Val Acc: 52.170%
Epoch 15, Train Loss: 0.84202, Val Loss: 1.53716, Val Acc

- Epoch수가 증가함에 따라, Train Loss는 낮아지나 Val Loss는 다시 증가하는 양상을 보임 -> Overfit
- 일반화를 위한 실험 다시 필요 