# CNN

## 0. SoftMax

In [0]:
import numpy as np

a = np.array([0.3, 2.9, 4.0])

exp_a = np.exp(a)

print(exp_a)

sum_exp_a = np.sum(exp_a)

y = exp_a / sum_exp_a

print(y)

[ 1.34985881 18.17414537 54.59815003]
[0.01821127 0.24519181 0.73659691]


## 1. MLP VS CNN with CIFAR-10

### 1.1 MLP Model with CIFAR-10

중요 모듈 import

In [0]:
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.optim as optim
from torchsummary import summary

Hyper Parameters

In [0]:
batch_size = 100
total_epoch = 50
learning_rate = 0.01
use_cuda = torch.cuda.is_available()
criterion = nn.CrossEntropyLoss()

print(use_cuda)

True


dataset

In [0]:
train_dataset = dsets.CIFAR10(root='./data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = dsets.CIFAR10(root='./data', train=False, transform=transforms.ToTensor())

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

Files already downloaded and verified


Training Function

In [0]:
def train(model,train_loader):
    model.train()
    
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
    losses = []
    for i, (image, label) in enumerate(train_loader):
        
        if use_cuda:
            image = image.cuda()
            label = label.cuda()
        
        pred_label = model(image)
        loss = criterion(pred_label, label)
        losses.append(loss.item())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    avg_loss = sum(losses)/len(losses)
    return avg_loss

Evaluation Function

In [0]:
def eval(model, test_loader):
    model.eval()
    device = next(model.parameters()).device.index
    
    total_cnt = 0
    correct_cnt = 0
    
    for i, (image, label) in enumerate(test_loader):
        if use_cuda:
            image = image.cuda()
            label = label.cuda()

        out = model(image)
        _, pred_label = torch.max(out.data, 1)
        total_cnt += image.data.size()[0]
        correct_cnt += (pred_label == label.data).sum().item()
        
    return correct_cnt / total_cnt

CIFAR10 MLP Model

In [0]:
class SimpleMLP(nn.Module):
    def __init__(self):
        super(SimpleMLP, self).__init__()
        
        # Fully-connected layer
        self.fc1 = nn.Linear(3*32*32, 8*28*28)
        self.act1 = nn.ReLU()
        self.fc2 = nn.Linear(8*28*28, 8*24*24)
        self.act2 = nn.ReLU()
        self.fc3 = nn.Linear(8*24*24, 16*8*8)
        self.act3 = nn.ReLU()
        self.fc4 = nn.Linear(16*8*8, 16*4*4)
        self.act4 = nn.ReLU()

        # Output layer
        self.out = nn.Linear(16*4*4, 10)
        
    def forward(self, x):
        x = x.view(-1, 3*32*32)
        
        x = self.act1(self.fc1(x))
        x = self.act2(self.fc2(x))
        x = self.act3(self.fc3(x))
        x = self.act4(self.fc4(x))
        
        out = self.out(x)
        return out

Training & Evaluation

In [0]:
mlp_model = SimpleMLP().cuda()
train_loss_lst = []
test_accuracy_lst = []
for epoch in range(total_epoch):
    train_loss = train(mlp_model, train_loader)
    train_loss_lst.append(train_loss)
    test_accuracy = eval(mlp_model, test_loader)
    test_accuracy_lst.append(test_accuracy)
    
    print(test_accuracy)

summary(mlp_model, input_size = (3,32,32))

0.1567
0.2081
0.2752
0.308
0.3142
0.3395
0.3603
0.3466
0.3948
0.4015
0.3944
0.424
0.4405
0.4516
0.4641
0.4568
0.4581
0.4653
0.4758
0.4635
0.4834
0.5033
0.5018
0.4948
0.4902
0.4812
0.5108
0.5233
0.4582
0.5249
0.5332
0.5231
0.4913
0.5185
0.5031
0.5442
0.496
0.5373
0.5375
0.4629
0.5337
0.5366
0.5282
0.5319
0.5477
0.5195
0.5525
0.5264
0.5632
0.5219
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                 [-1, 6272]      19,273,856
              ReLU-2                 [-1, 6272]               0
            Linear-3                 [-1, 4608]      28,905,984
              ReLU-4                 [-1, 4608]               0
            Linear-5                 [-1, 1024]       4,719,616
              ReLU-6                 [-1, 1024]               0
            Linear-7                  [-1, 256]         262,400
              ReLU-8                  [-1, 256]               0
            

### 1.2 CNN Model with CIFAR-10

중요 모듈 import

In [0]:
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.optim as optim
from torchsummary import summary

Hyper Parameters

In [0]:
batch_size = 100
total_epoch = 50
learning_rate = 0.001
use_cuda = torch.cuda.is_available()
criterion = nn.CrossEntropyLoss()

print(use_cuda)

True


dataset

In [0]:
train_dataset = dsets.CIFAR10(root='./data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = dsets.CIFAR10(root='./data', train=False, transform=transforms.ToTensor())

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

Files already downloaded and verified


CIFAR10 CNN Model

In [0]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        # Convolution layer
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1)
        self.act1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2)
        
        self.conv2 = nn.Conv2d(64, 192, kernel_size=3, padding=1)
        self.act2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2)
        
        self.conv3 = nn.Conv2d(192, 384, kernel_size=3, padding=1)
        self.act3 = nn.ReLU()
        
        self.conv4 = nn.Conv2d(384, 256, kernel_size=3, padding=1)
        self.act4 = nn.ReLU()
        self.pool3 = nn.MaxPool2d(kernel_size=2)
        
        # Fully-Connected layer
        self.fc1 = nn.Linear(256 * 2* 2, 1000)
        self.act5 = nn.ReLU()
        self.output = nn.Linear(1000, 10)
        
        
    def forward(self, x):
        x = self.pool1(self.act1(self.conv1(x)))
        x = self.pool2(self.act2(self.conv2(x)))
        x = self.act3(self.conv3(x))
        x = self.act4(self.conv4(x))
        x = self.pool3(x)
        
        x = x.view(-1, 256 * 2 * 2)
        
        x = self.act5(self.fc1(x))
        out = self.output(x)
        return out

Training Function

In [0]:
def train(model,train_loader):
    model.train()
    
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
    losses = []
    for i, (image, label) in enumerate(train_loader):
        
        if use_cuda:
            image = image.cuda()
            label = label.cuda()
        
        pred_label = model(image)
        loss = criterion(pred_label, label)
        losses.append(loss.item())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    avg_loss = sum(losses)/len(losses)
    return avg_loss

Evaluation Function

In [0]:
def eval(model, test_loader):
    model.eval()
    device = next(model.parameters()).device.index
    
    total_cnt = 0
    correct_cnt = 0
    
    for i, (image, label) in enumerate(test_loader):
        if use_cuda:
            image = image.cuda()
            label = label.cuda()

        out = model(image)
        _, pred_label = torch.max(out.data, 1)
        total_cnt += image.data.size()[0]
        correct_cnt += (pred_label == label.data).sum().item()
        
    return correct_cnt / total_cnt

Training & Evaluation

In [0]:
cnn_model = SimpleCNN().cuda()
train_loss_lst = []
test_accuracy_lst = []
for epoch in range(total_epoch):
    train_loss = train(cnn_model, train_loader)
    train_loss_lst.append(train_loss)
    test_accuracy = eval(cnn_model, test_loader)
    test_accuracy_lst.append(test_accuracy)
    
    print(test_accuracy)

summary(cnn_model, input_size = (3,32,32))

0.1007
0.1143
0.1852
0.2514
0.2611
0.2856
0.2866
0.3274
0.3762
0.3845
0.4125
0.4276
0.4374
0.4615
0.473
0.4891
0.4941
0.4858
0.5053
0.5111
0.4971
0.5579
0.5513
0.5728
0.5844
0.5792
0.5828
0.601
0.6087
0.6155
0.6067
0.6223
0.6386
0.6408
0.6396
0.651
0.6619
0.664
0.6717
0.6721
0.6723
0.6663
0.6341
0.675
0.6851
0.6996
0.698
0.6956
0.69
0.6909
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 16, 16]           1,792
              ReLU-2           [-1, 64, 16, 16]               0
         MaxPool2d-3             [-1, 64, 8, 8]               0
            Conv2d-4            [-1, 192, 8, 8]         110,784
              ReLU-5            [-1, 192, 8, 8]               0
         MaxPool2d-6            [-1, 192, 4, 4]               0
            Conv2d-7            [-1, 384, 4, 4]         663,936
              ReLU-8            [-1, 384, 4, 4]               0
            Conv2

## 2. VGG

In [0]:
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn.functional as F
import torch.optim as optim
from torchsummary import summary


batch_size = 100
total_epoch = 50
learning_rate = 0.01
use_cuda = torch.cuda.is_available()
criterion = nn.CrossEntropyLoss()

print(use_cuda)

train_dataset = dsets.CIFAR10(root='./data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = dsets.CIFAR10(root='./data', train=False, transform=transforms.ToTensor())

train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)


def train(model,train_loader):
    model.train()
    
    optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
    losses = []
    for i, (image, label) in enumerate(train_loader):
        
        if use_cuda:
            image = image.cuda()
            label = label.cuda()
        
        pred_label = model(image)
        loss = criterion(pred_label, label)
        losses.append(loss.item())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    avg_loss = sum(losses)/len(losses)
    return avg_loss


def eval(model, test_loader):
    model.eval()
    device = next(model.parameters()).device.index
    
    total_cnt = 0
    correct_cnt = 0
    
    for i, (image, label) in enumerate(test_loader):
        if use_cuda:
            image = image.cuda()
            label = label.cuda()

        out = model(image)
        _, pred_label = torch.max(out.data, 1)
        total_cnt += image.data.size()[0]
        correct_cnt += (pred_label == label.data).sum().item()
        
    return correct_cnt / total_cnt

0it [00:00, ?it/s]

True
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|█████████▉| 170467328/170498071 [05:01<00:00, 404156.12it/s]

In [0]:
class SimpleVGG(nn.Module):
    def __init__(self):
        super(SimpleVGG, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=(3,3), padding=(1,1))
        self.act1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        
        self.conv2 = nn.Conv2d(64, 128, kernel_size=(3,3), padding=(1,1))
        self.act2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv3_1 = nn.Conv2d(128, 256, kernel_size=(3,3), padding=(1,1))
        self.act3_1 = nn.ReLU()
        self.conv3_2 = nn.Conv2d(256, 256, kernel_size=(3,3), padding=(1,1))
        self.act3_2 = nn.ReLU()
        self.conv3_3 = nn.Conv2d(256, 256, kernel_size=(3,3), padding=(1,1))
        self.act3_3 = nn.ReLU()
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv4_1 = nn.Conv2d(256, 512, kernel_size=(3,3), padding=(1,1))
        self.act4_1 = nn.ReLU()
        self.conv4_2 = nn.Conv2d(512, 512, kernel_size=(3,3), padding=(1,1))
        self.act4_2 = nn.ReLU()
        self.conv4_3 = nn.Conv2d(512, 512, kernel_size=(3,3), padding=(1,1))
        self.act4_3 = nn.ReLU()
        self.pool4 = nn.AvgPool2d(kernel_size=2, stride=2)
        
        # Output layer
        self.fc1 = nn.Linear(512 * 2 * 2, 512)
        self.act5 = nn.ReLU()
        self.out = nn.Linear(512, 10)
        
    def forward(self, x):
        x1 = x
        x2 = self.act1(self.conv1(x1))
        x3 = self.pool1(x2)
        
        x4 = self.act2(self.conv2(x3))
        x5 = self.pool2(x4)
        
        x6 = self.act3_1(self.conv3_1(x5))
        x7 = self.act3_2(self.conv3_2(x6))
        x8 = self.act3_3(self.conv3_2(x7))
        x9 = self.pool3(x8)
        
        x10 = self.act4_1(self.conv4_1(x9))
        x11 = self.act4_2(self.conv4_2(x10))
        x12 = self.act4_3(self.conv4_2(x11))
        x13 = self.pool4(x12)
        
        x14 = x13.view(-1, 512 * 2 * 2)
        
        x15 = self.act5(self.fc1(x14))
        
        out = self.out(x15)        
        return out

In [0]:
vgg_model = SimpleVGG().cuda()
train_loss_lst = []
test_accuracy_lst = []
for epoch in range(total_epoch):
    train_loss = train(vgg_model, train_loader)
    train_loss_lst.append(train_loss)
    test_accuracy = eval(vgg_model, test_loader)
    test_accuracy_lst.append(test_accuracy)
    
    print(test_accuracy)

summary(vgg_model, input_size = (3,32,32))

0.1


KeyboardInterrupt: ignored

## 3. ResNet

In [0]:
class SimpleResNet(nn.Module):
    def __init__(self):
        super(SimpleResNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=(3,3), padding=(1,1))
        self.act1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        
        self.conv2 = nn.Conv2d(64, 128, kernel_size=(3,3), padding=(1,1))
        self.act2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv3_1 = nn.Conv2d(128, 256, kernel_size=(3,3), padding=(1,1))
        self.act3_1 = nn.ReLU()
        self.conv3_2 = nn.Conv2d(256, 256, kernel_size=(3,3), padding=(1,1))
        self.act3_2 = nn.ReLU()
        self.conv3_3 = nn.Conv2d(256, 256, kernel_size=(3,3), padding=(1,1))
        self.act3_3 = nn.ReLU()
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        
        self.conv4_1 = nn.Conv2d(256, 512, kernel_size=(3,3), padding=(1,1))
        self.act4_1 = nn.ReLU()
        self.conv4_2 = nn.Conv2d(512, 512, kernel_size=(3,3), padding=(1,1))
        self.act4_2 = nn.ReLU()
        self.conv4_3 = nn.Conv2d(512, 512, kernel_size=(3,3), padding=(1,1))
        self.act4_3 = nn.ReLU()
        self.pool4 = nn.AvgPool2d(kernel_size=2, stride=2)
        
        # Output layer
        self.fc1 = nn.Linear(512 * 2 * 2, 512)
        self.act5 = nn.ReLU()
        self.out = nn.Linear(512, 10)
        
    def forward(self, x):
        x1 = x
        x2 = self.act1(self.conv1(x1))
        x3 = self.pool1(x2)
        
        x4 = self.act2(self.conv2(x3))
        x5 = self.pool2(x4)
        
        x6 = self.act3_1(self.conv3_1(x5))
        x7 = self.act3_2(self.conv3_2(x6))
        x8 = self.act3_3(self.conv3_2(x7) + x6)
        x9 = self.pool3(x8)
        
        x10 = self.act4_1(self.conv4_1(x9))
        x11 = self.act4_2(self.conv4_2(x10))
        x12 = self.act4_3(self.conv4_2(x11) + x10)
        x13 = self.pool4(x12)
        
        x14 = x13.view(-1, 512 * 2 * 2)
        
        x15 = self.act5(self.fc1(x14))
        
        out = self.out(x15)        
        return out

In [0]:
resnet_model = SimpleResNet().cuda()
train_loss_lst = []
test_accuracy_lst = []
for epoch in range(total_epoch):
    train_loss = train(resnet_model, train_loader)
    train_loss_lst.append(train_loss)
    test_accuracy = eval(resnet_model, test_loader)
    test_accuracy_lst.append(test_accuracy)
    
    print(test_accuracy)

summary(resnet_model, input_size = (3,32,32))

0.1446
0.1608
0.1415


KeyboardInterrupt: ignored

In [0]:
summary(resnet_model, input_size = (3,32,32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 64, 32, 32]           1,792
              ReLU-2           [-1, 64, 32, 32]               0
         MaxPool2d-3           [-1, 64, 16, 16]               0
            Conv2d-4          [-1, 128, 16, 16]          73,856
              ReLU-5          [-1, 128, 16, 16]               0
         MaxPool2d-6            [-1, 128, 8, 8]               0
            Conv2d-7            [-1, 256, 8, 8]         295,168
              ReLU-8            [-1, 256, 8, 8]               0
            Conv2d-9            [-1, 256, 8, 8]         590,080
             ReLU-10            [-1, 256, 8, 8]               0
           Conv2d-11            [-1, 256, 8, 8]         590,080
             ReLU-12            [-1, 256, 8, 8]               0
        MaxPool2d-13            [-1, 256, 4, 4]               0
           Conv2d-14            [-1, 51