In [1]:
import time

from food101 import FOOD101
from dense import _DenseLayer, _DenseBlock, _Transition


import torch
import torch.nn as nn
from torch.optim.lr_scheduler import ExponentialLR

In [2]:
BATCH_SIZE = 128
EPOCHS = 5

In [3]:
device = torch.device('cpu')
if torch.cuda.is_available():
    device = torch.device('cuda')
device

device(type='cuda')

All pre-trained models expect input images normalized in the same way, i.e. mini-batches of 3-channel RGB images of shape (3 x H x W), where H and W are expected to be at least 224. The images have to be loaded in to a range of [0, 1] and then normalized using mean = [0.485, 0.456, 0.406] and std = [0.229, 0.224, 0.225].

In [4]:
food = FOOD101() 
train_ds, valid_ds, classes = food.get_dataset()
dls = food.get_dls(train_ds, valid_ds, BATCH_SIZE)
train_dl, valid_dl = dls[0], dls[1]

Load pretrained model

In [5]:
model = torch.hub.load('pytorch/vision:v0.10.0', 'densenet121', pretrained=True)
# or any of these variants
# model = torch.hub.load('pytorch/vision:v0.10.0', 'densenet169', pretrained=True)
# model = torch.hub.load('pytorch/vision:v0.10.0', 'densenet201', pretrained=True)
# model = torch.hub.load('pytorch/vision:v0.10.0', 'densenet161', pretrained=True)
model.eval()

Using cache found in /home/raha/.cache/torch/hub/pytorch_vision_v0.10.0


DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu

To add another DenseBlock to the network I need to delete norm5 then add a Transition and then a DenseBlock then put back norm5

To delete norm5 I have to use Identity

In [6]:
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
        
    def forward(self, x):
        return x

In [7]:
# model.features.norm5 = Identity()

Freeze parameters so we don't backprop through them

In [8]:
denselayer14 = model.features.denseblock4.denselayer14
denselayer15 = model.features.denseblock4.denselayer15
denselayer16 = model.features.denseblock4.denselayer16
norm5 = model.features.norm5

model.features.denseblock4.denselayer14 = Identity()
model.features.denseblock4.denselayer15 = Identity()
model.features.denseblock4.denselayer16 = Identity()
model.features.norm5 = Identity()

for param in model.features.parameters():
    param.requires_grad = False

model.features.denseblock4.denselayer14 = denselayer14
model.features.denseblock4.denselayer15 = denselayer15
model.features.denseblock4.denselayer16 = denselayer16
model.features.norm5 = norm5

In [9]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X.to(device))
        loss = loss_fn(pred, y.to(device))

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
#         for param in model.features.denseblock4.denselayer14.norm1.parameters():
#             print(param.data)

        if batch % 10 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [10]:
def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X.to(device))
            test_loss += loss_fn(pred, y.to(device)).item()
            correct += (pred.argmax(1) == y.to(device)).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [11]:
transition = _Transition(1024, 512)
transition

_Transition(
  (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (conv): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
  (pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
)

In [12]:
block = _DenseBlock(
                num_layers=2,
                num_input_features=512,
                bn_size=4,
                growth_rate=32,
                drop_rate=0.1,
                memory_efficient=False,
            )

block

_DenseBlock(
  (denselayer1): _DenseLayer(
    (norm1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu1): ReLU(inplace=True)
    (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu2): ReLU(inplace=True)
    (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  )
  (denselayer2): _DenseLayer(
    (norm1): BatchNorm2d(544, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu1): ReLU(inplace=True)
    (conv1): Conv2d(544, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu2): ReLU(inplace=True)
    (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  )
)

In [13]:
# model.features.add_module("transition4", transition)

In [14]:
# model.features.add_module("denseblock5", block)

In [15]:
# norm6 = nn.BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
# model.features.add_module("norm6", norm6)

In [16]:
model.classifier = nn.Sequential(nn.Linear(1024,101))

In [17]:
model.to(device)

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu

Only train the classifier parameters, feature parameters are frozen

In [18]:
parameters = []
parameters.extend(model.features.denseblock4.denselayer14.parameters())
parameters.extend(model.features.denseblock4.denselayer15.parameters())
parameters.extend(model.features.denseblock4.denselayer16.parameters())
parameters.extend(model.features.norm5.parameters())
parameters.extend(model.classifier.parameters())

In [19]:
def train(model, train_dataloader, test_dataloader, learning_rate = 0.9):
    loss_fn = nn.CrossEntropyLoss()
#     momentum=0.9
    optimizer = torch.optim.Adam(parameters, lr=0.001, betas=[0.9, 0.999]) 
    scheduler = ExponentialLR(optimizer, gamma=0.9)

    for t in range(EPOCHS):
        print(f"Epoch {t+1}\n-------------------------------")
        train_loop(train_dataloader, model, loss_fn, optimizer)
        test_loop(test_dataloader, model, loss_fn)
        scheduler.step()
    print("Done!")

In [20]:
start = time.time()
train(model, train_dl, valid_dl)
print(time.time() - start)

Epoch 1
-------------------------------
loss: 4.809968  [    0/75750]
loss: 4.403024  [ 1280/75750]
loss: 3.946434  [ 2560/75750]
loss: 3.656369  [ 3840/75750]
loss: 3.166634  [ 5120/75750]
loss: 3.213634  [ 6400/75750]
loss: 2.690969  [ 7680/75750]
loss: 2.808483  [ 8960/75750]
loss: 2.850514  [10240/75750]
loss: 2.566983  [11520/75750]
loss: 2.861523  [12800/75750]
loss: 2.444502  [14080/75750]
loss: 2.600265  [15360/75750]
loss: 2.561052  [16640/75750]
loss: 2.381367  [17920/75750]
loss: 2.237467  [19200/75750]
loss: 2.051880  [20480/75750]
loss: 2.329972  [21760/75750]
loss: 2.249926  [23040/75750]
loss: 1.925582  [24320/75750]
loss: 2.027059  [25600/75750]
loss: 2.225584  [26880/75750]
loss: 1.820618  [28160/75750]
loss: 2.081346  [29440/75750]
loss: 1.909963  [30720/75750]
loss: 2.252575  [32000/75750]
loss: 1.941972  [33280/75750]
loss: 1.913125  [34560/75750]
loss: 2.180875  [35840/75750]
loss: 2.156192  [37120/75750]
loss: 1.935024  [38400/75750]
loss: 2.000908  [39680/75750]


loss: 1.515787  [25600/75750]
loss: 1.377910  [26880/75750]
loss: 1.525049  [28160/75750]
loss: 1.525113  [29440/75750]
loss: 1.258832  [30720/75750]
loss: 1.421516  [32000/75750]
loss: 1.355422  [33280/75750]
loss: 1.418138  [34560/75750]
loss: 1.134953  [35840/75750]
loss: 1.173194  [37120/75750]
loss: 1.103275  [38400/75750]
loss: 1.418373  [39680/75750]
loss: 1.416216  [40960/75750]
loss: 1.188002  [42240/75750]
loss: 1.303174  [43520/75750]
loss: 1.404651  [44800/75750]
loss: 1.206712  [46080/75750]
loss: 1.347214  [47360/75750]
loss: 1.216923  [48640/75750]
loss: 1.592039  [49920/75750]
loss: 1.309904  [51200/75750]
loss: 1.147219  [52480/75750]
loss: 1.251061  [53760/75750]
loss: 1.176600  [55040/75750]
loss: 1.384013  [56320/75750]
loss: 1.183360  [57600/75750]
loss: 1.320241  [58880/75750]
loss: 1.552264  [60160/75750]
loss: 1.606850  [61440/75750]
loss: 1.139521  [62720/75750]
loss: 1.105605  [64000/75750]
loss: 1.599120  [65280/75750]
loss: 1.271189  [66560/75750]
loss: 1.40