<a href="https://colab.research.google.com/github/dongwon18/DenseNet_CIFAR10/blob/main/DenseNet_CIFAR10.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

- Copyright 2021. Dongwon Kim All rights reserved.
- File name : DenseNet_CIFAR10.ipynb
- Written by Dongwon Kim
    
- DenseNet
    - build, train DenseNet
    - test the model with CIFAR10 dataset to get accuracy over than 93%
- Modificatoin history
    - written by Dongwon Kim on Oct 2, 2021

- using Google Colab

#1

In [None]:
import numpy as np
import torchvision.datasets as datasets
from torchvision import transforms
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.model_selection import train_test_split
import torch
from google.colab import files
from torchsummary import summary
import math

[reference](https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html)
- image: 3 x 32 x32
- total 10 classes

In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))
])

train_dataset = datasets.CIFAR10(
    root = './',
    download = True,
    train = True,
    transform = transform
)

test_dataset = datasets.CIFAR10(
    root = './',
    download = True,
    train = False,
    transform = transform
)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting ./cifar-10-python.tar.gz to ./
Files already downloaded and verified


In [None]:
print(train_dataset.data.shape, len(train_dataset.targets))

(50000, 32, 32, 3) 50000


In [None]:
print(test_dataset.data.shape, len(test_dataset.targets))

(10000, 32, 32, 3) 10000


In [None]:
tr_index, val_index = train_test_split(list(range(len(train_dataset))), test_size = 0.1, shuffle=True, stratify = train_dataset.targets)

In [None]:
tr_sampler= SubsetRandomSampler(tr_index)
val_sampler = SubsetRandomSampler(val_index)

In [None]:
batch_size = 128
train_loader = DataLoader(
    dataset = train_dataset,
    batch_size = batch_size,
    num_workers = 0,
    sampler = tr_sampler
)

val_loader = DataLoader(
    dataset = train_dataset,
    batch_size = batch_size,
    num_workers = 0,
    sampler = val_sampler
)

test_loader = DataLoader(
    dataset = test_dataset,
    batch_size = batch_size,
    num_workers = 0
)

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
class BottleneckBlock(nn.Module):
    def __init__(self, in_plane, growth_rate):
        super(BottleneckBlock, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_plane)
        self.conv1 = nn.Conv2d(in_plane, 4*growth_rate, kernel_size=1, stride=1, bias=False)
        self.bn2 = nn.BatchNorm2d(4*growth_rate)
        self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size = 3, padding=1, bias=False)
        self.relu = nn.ReLU(inplace=True)
    
    def forward(self, x):
        out = self.bn1(x)
        out = self.relu(out)
        out = self.conv1(out)

        out = self.bn2(out)
        out = self.relu(out)
        out = self.conv2(out)

        return torch.cat([out, x], 1)





In [None]:
class TransitionBlock(nn.Module):
    def __init__(self, in_plane, out_plane):
        super(TransitionBlock, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_plane)
        self.relu = nn.ReLU(inplace=True)
        self.conv1 = nn.Conv2d(in_plane, out_plane, kernel_size=1, stride=1, bias=False)
       

    def forward(self, x):
        out = self.conv1(self.relu(self.bn1(x)))       

        return F.avg_pool2d(out, 2)

In [None]:
class DenseNet(nn.Module):
    def __init__(self, block=BottleneckBlock, growth_rate=12, num_classes=10, reduction=0.5):
        super(DenseNet, self).__init__()
        self.growth_rate = growth_rate

        in_plane = 2 * growth_rate
        self.conv1 = nn.Conv2d(3, in_plane, kernel_size = 3, padding=1, bias=False)

        # 1st Dense & Transition
        self.dense1 = self.make_dense_block(block, in_plane, 6)
        in_plane += 6 * growth_rate
        out_plane = int(math.floor(in_plane * reduction))
        self.trans1 = TransitionBlock(in_plane, out_plane)
        in_plane = out_plane

        # 2nd Dense & Transition
        self.dense2 = self.make_dense_block(block, in_plane, 12)
        in_plane += 12 * growth_rate
        out_plane = int(math.floor(in_plane * reduction))
        self.trans2 = TransitionBlock(in_plane, out_plane)
        in_plane = out_plane

        # 3rd Dense & Transition
        self.dense3 = self.make_dense_block(block, in_plane, 24)
        in_plane += 24 * growth_rate
        out_plane = int(math.floor(in_plane * reduction))
        self.trans3 = TransitionBlock(in_plane, out_plane)
        in_plane = out_plane

        # 4th Dense
        self.dense4 = self.make_dense_block(block, in_plane, 16)
        in_plane += 16 * growth_rate
        
        self.bn = nn.BatchNorm2d(in_plane)
        self.fc = nn.Linear(in_plane, num_classes)
        self.relu = nn.ReLU()
       

    def make_dense_block(self, block, in_plane, nblock):
        layers=[]
        for i in range(nblock):
            layers.append(block(in_plane, self.growth_rate))
            in_plane += self.growth_rate
        return nn.Sequential(*layers)

    def forward(self, x):
        # 32 x 32
        out = self.conv1(x)

        # 32 x 32
        out = self.dense1(out)
        out = self.trans1(out) # 32 -> 16
        
        # 16 x 16
        out = self.dense2(out)
        out = self.trans2(out) # 16 -> 8
        
        # 8 x 8
        out = self.dense3(out)
        out = self.trans3(out) # 8 -> 4

        out = self.dense4(out)

        out = self.bn(out)
        out = self.relu(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.fc(out)

        return out


In [None]:
model = DenseNet()
model = model.to(device)

In [None]:
model

DenseNet(
  (conv1): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (dense1): Sequential(
    (0): BottleneckBlock(
      (bn1): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(24, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (relu): ReLU(inplace=True)
    )
    (1): BottleneckBlock(
      (bn1): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv1): Conv2d(36, 48, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn2): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (relu): ReLU(inplace=True)
    )
    (2): BottleneckBlock(
      (bn1

In [None]:
summary(model, (3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 24, 32, 32]             648
       BatchNorm2d-2           [-1, 24, 32, 32]              48
              ReLU-3           [-1, 24, 32, 32]               0
            Conv2d-4           [-1, 48, 32, 32]           1,152
       BatchNorm2d-5           [-1, 48, 32, 32]              96
              ReLU-6           [-1, 48, 32, 32]               0
            Conv2d-7           [-1, 12, 32, 32]           5,184
   BottleneckBlock-8           [-1, 36, 32, 32]               0
       BatchNorm2d-9           [-1, 36, 32, 32]              72
             ReLU-10           [-1, 36, 32, 32]               0
           Conv2d-11           [-1, 48, 32, 32]           1,728
      BatchNorm2d-12           [-1, 48, 32, 32]              96
             ReLU-13           [-1, 48, 32, 32]               0
           Conv2d-14           [-1, 12,

In [None]:
learning_rate = 0.1
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr = learning_rate, momentum=0.9, weight_decay=5e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=200)

In [None]:
train_batches = len(train_loader)
val_batches = len(val_loader)

best_valid_loss = 1024
patience = 0

In [None]:
epochs = 200

for epoch in range(epochs):
    model.train()

    train_loss = 0
    train_total = 0
    train_correct = 0

    for batch_idx, (inputs, labels) in enumerate(train_loader):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

        _, predicted = outputs.max(1)
        train_correct += predicted.eq(labels).sum().item()
        train_total += labels.size(0)
    train_loss = train_loss / train_batches
    train_acc = train_correct / train_total

    model.eval()

    val_loss = 0
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for batch_idx, (inputs, labels) in enumerate(val_loader):
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            val_loss += loss.item()

            _, predicted = outputs.max(1)
            val_correct += predicted.eq(labels).sum().item()
            val_total += labels.size(0)

    val_loss = val_loss / val_batches
    val_acc = val_correct / val_total

    if val_loss < best_valid_loss:
        torch.save(model.state_dict(), './DenseNet_CIFAR10.pt')
        best_valid_loss = val_loss
        patience = 0
    
    print('[%d/%d] TrainLoss: %.3f, ValLoss: %.3f | TrainAcc: %.2f, ValAcc: %.2f'\
          % (epoch+1, epochs, train_loss, val_loss, train_acc, val_acc))
    

    scheduler.step()

[1/200] TrainLoss: 1.504, ValLoss: 1.201 | TrainAcc: 0.45, ValAcc: 0.57
[2/200] TrainLoss: 0.954, ValLoss: 0.798 | TrainAcc: 0.66, ValAcc: 0.71
[3/200] TrainLoss: 0.697, ValLoss: 0.727 | TrainAcc: 0.76, ValAcc: 0.75
[4/200] TrainLoss: 0.572, ValLoss: 0.764 | TrainAcc: 0.80, ValAcc: 0.73
[5/200] TrainLoss: 0.507, ValLoss: 0.674 | TrainAcc: 0.82, ValAcc: 0.77
[6/200] TrainLoss: 0.460, ValLoss: 0.561 | TrainAcc: 0.84, ValAcc: 0.81
[7/200] TrainLoss: 0.430, ValLoss: 0.574 | TrainAcc: 0.85, ValAcc: 0.81
[8/200] TrainLoss: 0.402, ValLoss: 0.568 | TrainAcc: 0.86, ValAcc: 0.80
[9/200] TrainLoss: 0.388, ValLoss: 0.598 | TrainAcc: 0.87, ValAcc: 0.80
[10/200] TrainLoss: 0.370, ValLoss: 0.548 | TrainAcc: 0.87, ValAcc: 0.81
[11/200] TrainLoss: 0.361, ValLoss: 0.619 | TrainAcc: 0.87, ValAcc: 0.80
[12/200] TrainLoss: 0.358, ValLoss: 0.499 | TrainAcc: 0.88, ValAcc: 0.84
[13/200] TrainLoss: 0.343, ValLoss: 0.547 | TrainAcc: 0.88, ValAcc: 0.81
[14/200] TrainLoss: 0.347, ValLoss: 0.696 | TrainAcc: 0.88, 

In [None]:
files.download('./DenseNet_CIFAR10.pt')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>