In [1]:
import torch
import torchvision
import torchvision.transforms as transforms

import torch.nn as nn
import torch.optim as optim

import numpy as np
import matplotlib.pyplot as plt
import time

In [2]:
VGG = {
    'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M']
}

In [3]:
class VGG_net(nn.Module):
    def __init__(self, in_channels, num_classes, model_code, bn=True, dropout=0.5):
        super(VGG_net, self).__init__()
        self.in_channels = in_channels
        self.num_classes = num_classes
        self.model_code = model_code
        self.bn = bn
        self.dropout = dropout
        
        self.conv_layers = self.create_layers(self.model_code)
        
        if self.dropout:  # dropout이 0이 아니면 수행
            self.fcs = nn.Sequential(
                nn.Linear(in_features=512, out_features=4096),
                nn.ReLU(),
                nn.Dropout(p=self.dropout),
                nn.Linear(in_features=4096, out_features=4096),
                nn.ReLU(),
                nn.Dropout(p=self.dropout),
                nn.Linear(in_features=4096, out_features=self.num_classes)
            )            
        else:
            self.fcs = nn.Sequential(
                nn.Linear(in_features=512, out_features=4096),
                nn.ReLU(),
                nn.Linear(in_features=4096, out_features=4096),
                nn.ReLU(),
                nn.Linear(in_features=4096, out_features=self.num_classes)
            )
        
    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)
        x = self.fcs(x)
        return x
    
    def create_layers(self, model_code):
        layers = []
        in_channels = self.in_channels
        
        for x in VGG[model_code]:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size = 2, stride = 2)]
                
            else:
                out_channels = x
                layers += [nn.Conv2d(in_channels = in_channels, out_channels = out_channels,
                                    kernel_size = 3, stride = 1, padding = 1)]
                if self.bn:
                    layers += [nn.BatchNorm2d(x),
                              nn.ReLU()]
                else:
                    layers += [nn.ReLU()]
                
                in_channels = x
        return nn.Sequential(*layers)

In [4]:
def dim_check():
    x = torch.randn(2,3,32,32)
    m = VGG_net(3, 10, 'VGG11', True)
    y = m(x)
    print(y.size())

In [5]:
dim_check()

torch.Size([2, 10])


In [6]:
from experiments import *
def experiment(partition, args):
    m = VGG_net(in_channels=3, num_classes=10, model_code=args.model_code, bn=args.bn, dropout=args.dropout)
    m = m.cuda()
    criterion = nn.CrossEntropyLoss()
    
    if args.optim == 'SGD':
        optimizer = optim.SGD(m.parameters(), lr=args.lr, weight_decay=args.l2)
    elif args.optim == 'Adam':
        optimizer = optim.Adam(m.parameters(), lr=args.lr, weight_decay=args.l2)
    elif args.optim == 'RMSprop':
        optimizer = optim.RMSprop(m.parameters(), lr=args.lr, weight_decay=args.l2)
    else:
        raise ValueError('Invalid Optimizer! Try Again')
        
    train_losses = []
    val_losses = []
    train_accs = []
    val_accs = []
    
    for epoch in range(1, args.epochs+1):
        ts = time.time()
        m, train_loss, train_acc = train(m, partition, optimizer, criterion, args)
        val_loss, val_acc = validate(m, partition, criterion, args)
        te = time.time()
        
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        train_accs.append(train_acc)
        val_accs.append(val_acc)
        print('Epoch: {}/{}, Acc: {:2.2f}/{:2.2f}, Loss: {:2.2f}/{:2.2f}, Took:{:2.2f}sec'.format(
                    epoch, args.epochs, train_acc, val_acc, train_loss, val_loss, te-ts))
        
    test_acc = test(m, partition, args)
    
    result = {}
    result['train_losses'] = train_losses
    result['val_losses'] = val_losses
    result['train_accs'] = train_accs
    result['val_accs'] = val_accs
    result['test_acc'] = test_acc
    return vars(args), result

In [7]:
transform = transforms.Compose(
    [transforms.ToTensor(),    # image파일을 0 ~ 1사이의 값을 갖는 Tensor로 변환(0: 검은색)
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5), )])  # 채널별로 0.5를 빼고(-0.5 ~ 0.5), 0.5로 나눔

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainset, valset = torch.utils.data.random_split(trainset, [40000,10000])

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)

partition = {'train': trainset, 'val':valset, 'test':testset}

Files already downloaded and verified
Files already downloaded and verified


In [8]:
import argparse

parser = argparse.ArgumentParser()
args = parser.parse_args('')

## model
args.model_code = 'VGG11'
args.bn = True
args.dropout = 0.5

args.l2 = 0.0001
args.optim = 'Adam'
args.train_batch_size = 256
args.val_batch_size = 1024
args.lr = 0.001
args.epochs = 10

In [9]:
parameters, result = experiment(partition, args)

Epoch: 1/10, Acc: 17.74/26.56, Loss: 2.10/1.86, Took:51.04sec
Epoch: 2/10, Acc: 34.16/29.01, Loss: 1.66/1.90, Took:47.46sec
Epoch: 3/10, Acc: 49.43/49.39, Loss: 1.33/1.40, Took:50.49sec
Epoch: 4/10, Acc: 61.30/56.55, Loss: 1.09/1.26, Took:50.99sec
Epoch: 5/10, Acc: 67.74/65.02, Loss: 0.93/1.00, Took:51.59sec
Epoch: 6/10, Acc: 72.99/65.91, Loss: 0.80/1.11, Took:50.97sec
Epoch: 7/10, Acc: 76.63/71.31, Loss: 0.70/0.85, Took:50.36sec
Epoch: 8/10, Acc: 80.12/74.52, Loss: 0.60/0.80, Took:49.97sec
Epoch: 9/10, Acc: 82.54/70.65, Loss: 0.54/0.98, Took:51.65sec
Epoch: 10/10, Acc: 84.44/78.59, Loss: 0.48/0.69, Took:50.33sec


In [38]:
3 * 3 * 256 * 28 * 28 * 192

346816512

In [39]:
(1 * 1 * 64 * 256) + (3 * 3 * 64 * 28 * 28 * 192)

86720512

In [41]:
86720512/ 346816512

0.2500472411186697