# Basis of Convolution Neural Network
### CNN with MNIST

#### feature map size = floor((I-K+2P)/S+1), where I : image size, K : filter size, S : stride, P : padding

In [4]:
# import module and initialize hyper-parameters
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torchvision.datasets as dset   # torchvision module which reads data
import torchvision.transforms as transforms  # torchvision module which transform loaded image
from torch.utils.data import DataLoader

In [5]:
batch_size = 256
learning_rate = 0.0002
num_epoch = 10

# load dataset
mnist_train = dset.MNIST('./', train = True, transform = transforms.ToTensor(), target_transform = None, download = True) # transform : transfomation of image, target_transform : transformation of label
mnist_test = dset.MNIST('./', train = False, transform = transforms.ToTensor(), target_transform = None, download = True)


train_loader = torch.utils.data.DataLoader(mnist_train, batch_size = batch_size, shuffle = True, num_workers = 2, drop_last = True) 
test_loader = torch.utils.data.DataLoader(mnist_test, batch_size = batch_size, shuffle = False, num_workers = 2, drop_last = True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST/raw/train-images-idx3-ubyte.gz


100.1%

Extracting ./MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST/raw/train-labels-idx1-ubyte.gz


113.5%

Extracting ./MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/raw/t10k-images-idx3-ubyte.gz


100.4%

Extracting ./MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/raw/t10k-labels-idx1-ubyte.gz


180.4%

Extracting ./MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/raw
Processing...
Done!


In [10]:
# build CNN
class CNN(nn.Module):
    def __init__(self):
        super(CNN,self).__init__()
        self.layer = nn.Sequential(
            nn.Conv2d(1, 16, 5),
            nn.ReLU(),
            nn.Conv2d(16, 32, 5),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(32, 64, 5),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        self.fc_layer = nn.Sequential(
            nn.Linear(64*3*3, 100),
            nn.ReLU(),
            nn.Linear(100, 10),
        )
        
    def forward(self, x):
        out = self.layer(x)
        out = out.view(batch_size, -1)
        out = self.fc_layer(out)
        
        return out

In [11]:
# initialize model and loss function
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = CNN().to(device)
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = learning_rate)

# training
loss_arr = []
for i in range(num_epoch):
    for j, [image, label]in enumerate(train_loader):
        x = image.to(device)
        y_ = label.to(device)
        
        optimizer.zero_grad()
        output = model.forward(x)
        loss = loss_func(output, y_)
        loss.backward()
        optimizer.step()
        
        if j%1000 == 0 :
            print(loss)
            loss_arr.append(loss.cpu().detach().numpy())

tensor(2.3005, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(0.2530, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(0.1471, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(0.1329, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(0.0790, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(0.0638, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(0.0489, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(0.0655, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(0.0280, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(0.0230, device='cuda:0', grad_fn=<NllLossBackward>)


In [13]:
# validate trained model with validation data
correct = 0  # the number of correct
total = 0  #

with torch.no_grad():
    for image, label in test_loader :
        x = image.to(device)
        y_ = label.to(device)
        
        output = model.forward(x)
        _, output_index = torch.max(output, 1)
        
        total += label.size(0)
        correct += (output_index == y_).sum().float()
        
    print('Accuracy of Test Data: {}'.format(100*correct/total))

Accuracy of Test Data: 98.8581771850586


### Naive VGG16

In [15]:
# Since VGG16 consists of 2 parts where the convolution operation is repeated 2 times, 3 parts that are repeated 3 times, build conv_2_block and conv_3_block first
def conv_2_block(in_dim, out_dim):
    model = nn.Sequential(
            nn.Conv2d(in_dim, out_dim, kernel_size = 3, padding = 1),
            nn.ReLU(),
            nn.Conv2d(out_dim, out_dim, kernel_size = 3, padding = 1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
    )
    
    return model
    
def conv_3_block(in_dim, out_dim):
    model = nn.Sequential(
            nn.Conv2d(in_dim, out_dim, kernel_size = 3, padding = 1),
            nn.ReLU(),
            nn.Conv2d(out_dim, out_dim, kernel_size = 3, padding = 1),
            nn.ReLU(),
            nn.Conv2d(out_dim, out_dim, kernel_size = 3, padding = 1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
    )
    
    return model

In [17]:
# building VGG16 net
class VGG(nn.Module):
    
    def __init__(self, base_dim, num_classes=2):
        super(VGG, self).__init__()
        self.feature = nn.Sequential(
            conv_2_block(3, base_dim),
            conv_2_block(base_dim, 2*base_dim),
            conv_3_block(2*base_dim, 4*base_dim),
            conv_3_block(4*base_dim, 8*base_dim),
            conv_3_block(8*base_dim, 8*base_dim),
        )
        
        
        self.fc_layer = nn.Sequential(
            nn.Linear(8*base_dim * 7 * 7, 100),
            nn.ReLU(True),
            nn.Linear(100, 20),
            nn.ReLU(True),
            nn.Linear(20, num_classes),
        )
        
    def forward(self, x):
        x = self.feature(x)
        x = x.view(x.size(0), -1)
        x = self.fc_layer(x)
        
        return x
        

In [20]:
# Pytorch official VGG16 Net

class VGG(nn.Module):
    
    def __init__(self, features, num_classes = 1000, init_weights=True):
        super(VGG, self).__init__()
        self.features = features
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7,4096),
            nn.ReLU(True),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Linear(4096, num_classes),
        )
        if init_weights:
            self._initialize_weights()
            
    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)  # flatten x according to batch_size
        x = self.classifier(x)
        
        return x
    
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode = 'fan_out', nonlinearlity = 'relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)
                
    def make_layers(cfg, batch_norm = False):
        layer = []
        in_channels = 3
        for v in cfg :
            if v == 'M':
                layers += [nn.MaxPool2d(kernel_size = 2, stride = 2)]
            else : 
                conv2d = nn.Conv2d(in_channels, v, kernel_size = 3, padding = 1)
                if batch_norm :
                    layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace = True)]
                else :
                    layers += [conv2d, nn.ReLU(inplace = True)]
                in_channels = v
        
        return nn.Sequential(*layers)
    
# configs
cfgs = {
    'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M']}

### GoogLeNet

In [21]:
# building inception moduel in GoogLeNet

# 1*1 conv layer in GoogLeNet
def conv_1(in_dim, out_dim):
    model = nn.Sequential(
        nn.Conv2d(in_dim, out_dim, 1, 1),
        nn.ReLU(),
    )
    
    return model

# 1*1 conv -> 3*3 conv layer in GoogLeNet
def conv_1_3(in_dim, mid_dim, out_dim):
    model = nn.Sequential(
        nn.Conv2d(in_dim, mid_dim, 1, 1),
        nn.ReLU(),
        nn.Conv2d(mid_dim, out_dim, 3, 1, 1),
        nn.ReLU(),
    )
    
    return model

def conv_1_5(in_dim, mid_dim, out_dim):
    model = nn.Sequential(
        nn.Conv2d(in_dim, mid_dim, 1, 1),
        nn.ReLU(),
        nn.Conv2d(mid_dim, out_dim, 5, 1, 2),
        nn.ReLU(),
    )
    
    return model

def max_3_1(in_dim, out_dim):
    model = nn.Sequential(
        nn.MaxPool2d(3, 1, 1),
        nn.Conv2d(in_dim, out_dim, 1, 1),
        nn.ReLU(),
    )
    
    return model

In [22]:
# modeling inception module
class inception_module(nn.Module):
    
    def __init__(self, in_dim, out_dim_1, mid_dim_3, out_dim_3, mid_dim_5, out_dim_5, pool):
        super(inception_module, self).__init__()
        
        self.conv_1 = conv_1(in_dim, out_dim_1)
        self.conv_1_3 = conv_1_3(in_dim, mid_dim_3, out_dim_3)
        self.conv_1_5 = conv_1_5(in_dim, mid_dim_5, out_dim_5)
        self.max_3_1 = max_3_1(in_dim, pool)
        
    def forward(self, x):
        out_1 = self.conv_1(x)
        out_2 = self.conv_1_3(x)
        out_3 = self.conv_1_5(x)
        out_4 = self.max_3_1(x)
        output = torch.cat([out_1, out_2, out_3, out_4], 1)
        
        return output
        

In [24]:
# constructing GoogLeNet
class GoogLeNet(nn.Module):
    
    def __init__(self, base_dim, num_classes = 2):
        super(GoogLeNet, self).__init__()
        self.layer_1 = nn.Sequential(
            nn.Conv2d(3, base_dim, 7, 2, 3),
            nn.MaxPool2d(3, 2, 1),
            nn.Conv2d(base_dim, base_dim*3, 3, 1, 1),
            nn.MaxPool2d(3, 2, 1),
        )
        
        self.layer_2 = nn.Sequential(
            inception_module(base_dim * 3, 64, 96, 128, 16, 32, 32),
            inception_module(base_dim * 4, 128, 128, 192, 32, 96, 64),
            nn.MaxPool2d(3, 2, 1),
        )
        
        self.layer_3 = nn.Sequential(
            inception_module(480, 192, 96, 208, 16, 48, 64),
            inception_module(512, 160, 112, 224, 24, 64, 64),
            inception_module(512, 128, 128, 256, 24, 64, 64),
            inception_module(512, 112, 144, 288, 32, 64, 64),
            inception_module(528, 256, 160, 320, 32, 128, 128),
            nn.MaxPool2d(3, 2, 1),
        )
        
        self.layer_4 = nn.Sequential(
            inception_module(832, 256, 160, 320, 32, 128, 128),
            inception_module(832, 384, 192, 384, 48, 128, 128),
            nn.AvgPool2d(7, 1),
        )
        
        self.layer_5 = nn.Dropout2d(0.4)
        self.fc_layer = nn.Linear(1024, 1000)
        
    def forward(self, x):
        out = self.layer_1(x)
        out = self.layer_2(out)
        out = self.layer_3(out)
        out = self.layer_4(out)
        out = self.layer_5(out)
        out = out.view(batch_size, -1)
        out = self.fc_layer(out)
        
        return out

### ResNet

In [26]:
# basic conv block
def conv_block_1(in_dim, out_dim, act_fn, stride = 1):
    model = nn.Sequential(
        nn.Conv2d(in_dim, out_dim, kernel_size = 1, stride = stride),
        act_fn,
    )
    
    return model

def conv_block_3(in_dim, out_dim, act_fn):
    model = nn.Sequential(
        nn.Conv2d(in_dim, out_dim, kernel_size = 3, stride = 1, padding = 1),
        act_fn,
    )
    
    return model

class BottleNeck(nn.Module):
    
    def __init__(self, in_dim, mid_dim, out_dim, act_fn, down = False):
        super(BottleNeck, self).__init()
        self.act_fn = act_fn
        self.down = down
        
        if self.down:
            self.layer = nn.Sequential(
                conv_block_1(in_dim, mid_dim, act_fn, 2),
                conv_block_3(mid_dim, mid_dim, act_fn),
                conv_block_1(mid_dim, out_dim, act_fn),
            )
            self.downsample = nn.Conv2d(in_dim, out_dim, 1, 2)
        else :
            self.layer = nn.Sequential(
                conv_block_1(in_dim, mid_dim, act_fn),
                conv_block_3(mid_dim, mid_dim, act_fn),
                conv_block_1(mid_dim, out_dim, act_fn),
            )
            self.dim_equalizer = nn.Conv2d(in_dim, out_dim, kernel_size = 1)
            
    def forward(self, x):
        if self.down:
            downsample = self.downsample(x)
            out = self.layer(x)
            out = out + downsample
            
        else :
            out = self.layer(x)
            if x.size() is not out.size():
                x = self.dim_equailizer(x)
            out = out + x
            
        return out
    
class ResNet(nn.Module):
    
    def __init__(self, base_dim, num_classes = 2):
        super(ResNet, self).__init__()
        self.act_fn = nn.ReLU()
        self.layer_1 = nn.Sequential(
            nn.Conv2d(3, base_dim, 7, 2, 3),
            nn.ReLU(),
            nn.MaxPool(3, 2, 1),
        )
        self.layer_2 = nn.Sequential(
            BottleNeck(base_dim, base_dim, base_dim * 4, self.act_fn),
            BottleNeck(base_dim * 4, base_dim, base_dim * 4, self.act_fn),
            BottleNeck(base_dim * 4, base_dim, base_dim *4, self.act_fn, down = True),
            
        )
        self.layer_3 = nn.Sequential(
            BottleNeck(base_dim * 4, base_dim * 2, base_dim * 8, self.act_fn),
            BottleNeck(base_dim * 8, base_dim * 2, base_dim * 8, self.act_fn),
            BottleNeck(base_dim * 8, base_dim * 2, base_dim * 8, self.act_fn),
            BottleNeck(base_dim * 8, base_dim * 2, base_dim * 8, self.act_fn, down = True),
            )
        self.layer_4 = nn.Sequential(
            BottleNeck(base_dim * 8, base_dim * 4, base_dim * 16, self.act_fn),
            BottleNeck(base_dim * 16, base_dim * 4, base_dim * 16, self.act_fn),
            BottleNeck(base_dim * 16, base_dim * 4, base_dim * 16, self.act_fn),
            BottleNeck(base_dim * 16, base_dim * 4, base_dim * 16, self.act_fn),
            BottleNeck(base_dim * 16, base_dim * 4, base_dim * 16, self.act_fn),
            BottleNeck(base_dim * 16, base_dim * 4, base_dim * 16, self.act_fn, down = True),
            )
        self.layer_5 = nn.Sequential(
            BottleNeck(base_dim * 16, base_dim * 8, base_dim * 32, self.act_fn),
            BottleNeck(base_dim * 32, base_dim * 8, base_dim * 32, self.act_fn),
            BottleNeck(base_dim * 32, base_dim * 8, base_dim * 32, self.act_fn),
            )
        self.avgpool = nn.AvgPool2d(7, 1)
        self.fc_layer = nn.Linear(base_dim * 32, num_classes)
        
    def forward(self, x):
        out = self.layer_1(x)
        out = self.layer_2(out)
        out = self.layer_3(out)
        out = self.layer_4(out)
        out = self.layer_5(out)
        out = self.avgpool(out)
        out = out.view(batch_size, -1)
        out = self.fc_layer(out)
        
        return out
