In [None]:
!pip install -q torch

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import argparse
import numpy as np
import time

In [None]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainset, valset = torch.utils.data.random_split(trainset, [40000, 10000])
testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
partition = {'train': trainset, 'val':valset, 'test':testset}

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=0.0, max=170498071.0), HTML(value='')))


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [None]:
class MLP(nn.Module):
  def __init__(self, in_dim, out_dim, hid_dim, n_layer, act, dropout):
    super(MLP, self).__init__()
    self.in_dim = in_dim    #int
    self.out_dim = out_dim  #int
    self.hid_dim = hid_dim  #int
    self.n_layer = n_layer  #int
    self.act = act          #sigmoid or relu default = relu
    self.dropout = dropout  #floor

    self.fc1 = nn.Linear(self.in_dim, self.hid_dim)  # input layer
    self.linears = nn.ModuleList()


    # output layer를 제외하고 hiddien layer 만들기
    for i in range(self.n_layer - 1):
      self.linears.append(nn.Linear(self.hid_dim, self.hid_dim))
    
    self.fc2 = nn.Linear(self.hid_dim, self.out_dim) # output layer

    # Activation func
    if self.act == 'sigmoid':
      self.act = nn.Sigmoid()
    else:
      self.act = nn.ReLU()

    # dropout
    self.dropout = nn.Dropout(self.dropout)

  def forward(self, x):
    x = self.fc1(x)
    for i in range(len(self.linears)):
      x = self.act(self.linears[i](x))
      x = self.dropout(x)
    x = self.fc2(x)
    return x

In [None]:
def train(model, partition, optimizer, criterion, args):
  #train_data
  trainloader = torch.utils.data.DataLoader(partition['train'],
                                            batch_size = args.train_batch_size,
                                            shuffle = True,
                                            num_workers = 2)
  
  # model이 학습할수 있게 만들기
  model.train()
  correct = 0
  total = 0
  train_loss = 0
  for i, data in enumerate(trainloader, 0):
    optimizer.zero_grad()

    # 데이터를 GPU에 돌리기위해 변형 후 output값 출력 
    inputs, labels = data
    inputs = inputs.view(-1, 3072) # flatten개념인거 같음
    inputs = inputs.cuda()
    labels = labels.cuda()
    outputs = model(inputs) 

    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    train_loss = loss.item()
    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)
    correct += (predicted == labels).sum().item()

  train_loss = train_loss / len(trainloader)
  train_acc = 100 * correct / total

  return model, train_loss, train_acc


In [None]:
def validate(model, partition, criterion, args):
    valloader = torch.utils.data.DataLoader(partition['val'], 
                                            batch_size=args.test_batch_size, 
                                            shuffle=False,
                                            num_workers=2)
    model.eval()

    correct = 0
    total = 0
    val_loss = 0 
    with torch.no_grad():
        for data in valloader:
            images, labels = data
            images = images.view(-1, 3072)
            images = images.cuda()
            labels = labels.cuda()
            outputs = model(images)

            loss = criterion(outputs, labels)
            
            val_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        val_loss = val_loss / len(valloader)
        val_acc = 100 * correct / total
    return val_loss, val_acc

In [None]:
def test(model, partition, args):
    testloader = torch.utils.data.DataLoader(partition['test'], 
                                             batch_size=args.test_batch_size, 
                                             shuffle=False,
                                             num_workers=2)
    model.eval()
    
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images = images.view(-1, 3072)
            images = images.cuda()
            labels = labels.cuda()

            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        test_acc = 100 * correct / total
    return test_acc

In [None]:
def experiment(partition, args):
  
    model = MLP(args.in_dim, args.out_dim, args.hid_dim, args.n_layer, args.act, args.dropout)
    model.cuda()

    criterion = nn.CrossEntropyLoss()
    if args.optim == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=args.lr, weight_decay=args.l2)
    elif args.optim == 'RMSprop':
        optimizer = optim.RMSprop(model.parameters(), lr=args.lr, weight_decay=args.l2)
    elif args.optim == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.l2)
    else:
        raise ValueError('In-valid optimizer choice')
    
    for epoch in range(args.epoch):  # loop over the dataset multiple times
        ts = time.time()
        model, train_loss, train_acc = train(model, partition, optimizer, criterion, args)
        val_loss, val_acc = validate(model, partition, criterion, args)
        te = time.time()
        print('Epoch {}, Acc(train/val): {:2.2f}/{:2.2f}, Loss(train/val) {:2.2f}/{:2.2f}. Took {:2.2f} sec'.format(epoch, train_acc, val_acc, train_loss, val_loss, te-ts))
        
    test_acc = test(model, partition, args)    
    return train_loss, val_loss, train_acc, val_acc, test_acc

In [None]:
import argparse

seed = 123
np.random.seed(seed)
torch.manual_seed(seed)

parser = argparse.ArgumentParser()
args = parser.parse_args("")

args.n_layer = 5
args.in_dim = 3072
args.out_dim = 10
args.hid_dim = 100
args.act = 'relu'

args.lr = 0.001
args.dropout = 0.2
args.l2 = 0.00001

args.epoch = 5
args.optim = 'Adam'

args.train_batch_size = 256
args.test_batch_size = 1024

In [None]:
hid_dim_num = []
layer_num = []
results ={'train_loss': [],
          'val_loss' : [],
          'train_acc' : [],
          'val_acc' : [],
          'test_acc' : []}


for i in range(10):
  var1 = np.random.randint(3,10)
  var2 = 2 ** np.random.randint(5,12)
  args.n_layer = var1
  args.hid_dim = var2
  result = experiment(partition, args)
  hid_dim_num.append(args.hid_dim)
  layer_num.append(args.n_layer)
  results['train_loss'].append(result[0])
  results['val_loss'].append(result[1])
  results['train_acc'].append(result[2])
  results['val_acc'].append(result[3])
  results['test_acc'].append(result[4])
  print('======================================')

Epoch 0, Acc(train/val): 17.17/19.30, Loss(train/val) 0.01/2.00. Took 8.50 sec
Epoch 1, Acc(train/val): 24.49/29.52, Loss(train/val) 0.01/1.84. Took 8.45 sec
Epoch 2, Acc(train/val): 31.35/33.31, Loss(train/val) 0.01/1.79. Took 8.46 sec
Epoch 3, Acc(train/val): 35.51/37.26, Loss(train/val) 0.01/1.70. Took 8.57 sec
Epoch 4, Acc(train/val): 38.06/40.70, Loss(train/val) 0.01/1.67. Took 8.55 sec
Epoch 0, Acc(train/val): 16.88/19.00, Loss(train/val) 0.01/1.99. Took 8.34 sec
Epoch 1, Acc(train/val): 24.25/28.41, Loss(train/val) 0.01/1.85. Took 8.38 sec
Epoch 2, Acc(train/val): 29.86/33.06, Loss(train/val) 0.01/1.77. Took 8.25 sec
Epoch 3, Acc(train/val): 33.47/36.61, Loss(train/val) 0.01/1.71. Took 8.34 sec
Epoch 4, Acc(train/val): 36.52/37.74, Loss(train/val) 0.01/1.69. Took 8.23 sec
Epoch 0, Acc(train/val): 23.40/31.45, Loss(train/val) 0.01/1.82. Took 8.09 sec
Epoch 1, Acc(train/val): 33.06/36.79, Loss(train/val) 0.01/1.73. Took 8.09 sec
Epoch 2, Acc(train/val): 38.12/40.46, Loss(train/val

In [None]:
results

{'test_acc': [41.4,
  38.93,
  45.62,
  31.96,
  46.8,
  44.95,
  32.68,
  46.62,
  35.09,
  46.93],
 'train_acc': [38.065,
  36.5225,
  42.705,
  29.5875,
  44.7475,
  40.74,
  30.5125,
  44.675,
  32.08,
  49.1475],
 'train_loss': [0.011238088273698358,
  0.012147927739817625,
  0.012042118485566158,
  0.011706244414019736,
  0.00999213859533808,
  0.010595375565206929,
  0.012377125442407693,
  0.009947414610795914,
  0.011670624374584028,
  0.007521863196306168],
 'val_acc': [40.7,
  37.74,
  44.09,
  30.66,
  46.58,
  43.18,
  32.55,
  46.25,
  33.87,
  46.51],
 'val_loss': [1.6668489933013917,
  1.685024619102478,
  1.5544967532157898,
  1.7814177036285401,
  1.5183439373970031,
  1.5940255045890808,
  1.7820457220077515,
  1.5123131155967713,
  1.7436251282691955,
  1.5350501656532287]}

In [None]:
layer_num

[9, 9, 7, 9, 6, 6, 9, 3, 9, 3]

In [None]:
hid_dim_num

[1024, 128, 128, 64, 128, 64, 64, 64, 64, 2048]

In [None]:
# layer가 작고 hidden node의 수가 많을때 좋은 결과가 있는거 같다.