In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision
import torchvision.datasets as dsets
import torchvision.transforms as transforms

device = 'cuda' if torch.cuda.is_available() else 'cpu'

torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

# image 3 * 224 * 224 assumed
class VGG(nn.Module): # use this for CIFAR10 datasets
    def __init__(self, layer_list, num_class=10):
        super().__init__()
        self.vgg_layer_list = []
        self.initial_channel = 3
        for m in layer_list:
            if m == 'M':
                self.vgg_layer_list.append(nn.MaxPool2d(2))
            else:
                self.vgg_layer_list += [
                    nn.Conv2d(self.initial_channel, m, kernel_size=3, stride=1, padding=1),
                    nn.BatchNorm2d(m),
                    nn.ReLU(inplace=True)]
                self.initial_channel = m
                
        self.convnet = nn.Sequential(*self.vgg_layer_list)
        self.avgpool = nn.AdaptiveAvgPool2d(7) # 뭘까?????
        self.FClayer = nn.Sequential(
            nn.Linear(512*7*7, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.5),
            nn.Linear(4096, num_class) # softmax skip
        )
    
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
                else:
                    pass # 수정
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant(m.weight, 1)
                nn.init.constant(m.bias, 0)
            else:
                pass
    
    def forward(self, x):
        out = self.avgpool(self.convnet(x))
        out = out.view(out.shape[0], -1)
        out = self.FClayer(out)
        return out
                
layer_list = {
    'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], #8 + 3 =11 == vgg11
    'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'], # 10 + 3 = vgg 13
    'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'], #13 + 3 = vgg 16
    'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'], # 16 +3 =vgg 19
    'custom' : [64,64,64,'M',128,128,128,'M',256,256,256,'M']
}

VGGnet = VGG(layer_list['D'], 10).to(device)
VGGnet

VGG(
  (convnet): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256,

In [2]:
trans = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])

trainset = torchvision.datasets.CIFAR10(root='.CIFAR10', train=True,
                                              transform=trans, download=True)
testset = torchvision.datasets.CIFAR10(root='.CIFAR10', train=False,
                                              transform=trans, download=True)

data_loader_train = DataLoader(trainset, batch_size=32,
                               shuffle=True, drop_last=True)
data_loader_test = DataLoader(testset, batch_size=4,
                               shuffle=False, drop_last=True)

criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.SGD(VGGnet.parameters(), lr=0.00001, momentum=0.9)
lr_sche = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.9) # learning rate를 변경해주는 scheduler      

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to .CIFAR10/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting .CIFAR10/cifar-10-python.tar.gz to .CIFAR10
Files already downloaded and verified


In [3]:
x = torch.randn(1,3,224,224).to(device)
VGGnet(x)

tensor([[ 0.0121,  0.2124, -0.1419, -0.3105,  0.0791, -0.3765, -0.0867, -0.0795,
         -0.2291,  0.0559]], device='cuda:0', grad_fn=<AddmmBackward0>)

In [4]:
total_batch = len(data_loader_train) # batch의 총 개수
VGGnet.train()
for epoch in range(50):
    running_loss = 0.0
    lr_sche.step() # update learning rate
    
    for i, data in enumerate(data_loader_train):
        inputs, labels = data
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()

        hypothesis = VGGnet(inputs)
        loss = criterion(hypothesis, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()

        print("Epoch : %d, Iteration : %d, Loss : %f"%(epoch + 1, i + 1, running_loss / 32))
        running_loss = 0



[1;30;43m스트리밍 출력 내용이 길어서 마지막 5000줄이 삭제되었습니다.[0m
Epoch : 47, Iteration : 1249, Loss : 0.001048
Epoch : 47, Iteration : 1250, Loss : 0.002117
Epoch : 47, Iteration : 1251, Loss : 0.001175
Epoch : 47, Iteration : 1252, Loss : 0.002826
Epoch : 47, Iteration : 1253, Loss : 0.002249
Epoch : 47, Iteration : 1254, Loss : 0.001891
Epoch : 47, Iteration : 1255, Loss : 0.001870
Epoch : 47, Iteration : 1256, Loss : 0.001088
Epoch : 47, Iteration : 1257, Loss : 0.003059
Epoch : 47, Iteration : 1258, Loss : 0.000923
Epoch : 47, Iteration : 1259, Loss : 0.000830
Epoch : 47, Iteration : 1260, Loss : 0.000379
Epoch : 47, Iteration : 1261, Loss : 0.001603
Epoch : 47, Iteration : 1262, Loss : 0.000481
Epoch : 47, Iteration : 1263, Loss : 0.000714
Epoch : 47, Iteration : 1264, Loss : 0.001861
Epoch : 47, Iteration : 1265, Loss : 0.001087
Epoch : 47, Iteration : 1266, Loss : 0.002207
Epoch : 47, Iteration : 1267, Loss : 0.008288
Epoch : 47, Iteration : 1268, Loss : 0.000453
Epoch : 47, Iteration : 1269, 

In [6]:
total = 0
correct = 0
with torch.no_grad():
    VGGnet.eval()
    for data in data_loader_test:
        images, labels = data
        images = images.to(device)
        labels = labels.to(device)
        
        out = VGGnet(images)
        total += labels.size(0)
        correct += (torch.argmax(out, 1) == labels).sum().item()
    print('Accuracy : {}'.format(100 * correct / total))

Accuracy : 63.81
