# Reference
[Dual Path Networks](https://arxiv.org/abs/1707.01629)

[Keras-DPN](https://github.com/titu1994/Keras-DualPathNetworks/blob/master/dual_path_network.py)

[oyam/Pytorch-DPN](https://github.com/oyam/pytorch-DPNs/blob/master/dpn.py)

[Pytorch-ResNext](https://github.com/miraclewkf/ResNeXt-PyTorch/blob/master/resnext.py)

[Pytorch Docs](https://pytorch.org/docs/stable/nn.html?highlight=maxpool2d#torch.nn.MaxPool2d)

[torchvision/densenet.py](https://github.com/pytorch/vision/blob/master/torchvision/models/densenet.py)

[ResNext](https://arxiv.org/pdf/1611.05431.pdf)

[cypw/DPN](https://github.com/cypw/DPNs/tree/master/settings)

# Import

In [1]:
import os
import shutil
import time

import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torchvision.models as models
import torch.nn.functional as F

from collections import OrderedDict
from torch.autograd import Variable

# Model

In [10]:
class DualPathBlock(nn.Module):
    def __init__(self, in_channels, num_1x1_a, num_3x3_b, num_1x1_c, width_increment, cardinality=32, block_type='normal'):
        super().__init__()
        self.num_1x1_c = num_1x1_c
        
        if block_type == 'normal':
            key_stride = 1
            self.has_proj = False
        elif block_type == 'proj':
            key_stride = 1
            self.has_proj = True
        elif block_type == 'down':
            key_stride = 2
            self.has_proj = True
        
        if self.has_proj:
            # author believes to keep more information compare with normal layer, so out_channels is set to be 2*increment
            self.c1x1_w = self.BN_ReLU_Conv(in_channels=in_channels, out_channels=num_1x1_c + width_increment*2, kernel_size=1, stride=key_stride)
    
        self.layers = nn.Sequential(OrderedDict([
            ('c1x1_a', self.BN_ReLU_Conv(in_channels, num_1x1_a, kernel_size=1, stride=1)),
            ('c3x3_b', self.BN_ReLU_Conv(num_1x1_a, num_3x3_b, kernel_size=3, stride=key_stride, padding=1, groups=cardinality)),
            ('c1x1_c', self.BN_ReLU_Conv(num_3x3_b, num_1x1_c + width_increment, kernel_size=1, stride=1))
        ]))
        
    
     
    
    def BN_ReLU_Conv(self, in_channels, out_channels, kernel_size, stride, padding=0, groups=1):
        return nn.Sequential(OrderedDict([
            ('bn', nn.BatchNorm2d(in_channels)),
            ('relu', nn.ReLU(inplace=True)),
            ('conv', nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, groups=groups, bias=False))
        ]))
    
    def forward(self, input):
        init = torch.cat(input ,dim=1) if isinstance(input, list) else input
        if self.has_proj:
            projection_path = self.c1x1_w(init)
            input_residual_path = projection_path[:, :self.num_1x1_c, :, :]
            input_dense_path = projection_path[:, self.num_1x1_c:, :, :]
        else:
            input_residual_path = input[0]
            input_dense_path = input[1]
        
        out = self.layers(init)
        
        
        residual_path = input_residual_path + out[:, :self.num_1x1_c, :, :]
        dense_path = torch.cat([input_dense_path, out[:, self.num_1x1_c:, :, :]], dim=1)
        
        return [residual_path, dense_path]
    
        

In [8]:
class DPN(nn.Module):
    def __init__(self, num_init_features=64, cardinality=32, \
                 depth=[3,4,20,3], width_increment=[16, 32, 24, 128], width=3, num_classes=1000, test_phase=False):
        super().__init__()
        
        blocks = OrderedDict()
        
        # conv1
        blocks['conv1'] = nn.Sequential(
            nn.Conv2d(3, num_init_features, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(num_init_features),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )
        
        # conv2
        out_channels = 256
        inc = width_increment[0]
        filters = width * cardinality
        
        blocks['conv2_1'] = DualPathBlock(num_init_features, filters, filters, out_channels, inc, cardinality, 'proj')
        in_channels = out_channels + inc * 3
        for i in range(2, depth[0] + 1):
            blocks['conv2_{}'.format(i)] = DualPathBlock(in_channels, filters, filters, out_channels, inc, cardinality, 'normal')
            in_channels += inc
            
        # conv3
        out_channels = 512
        inc = width_increment[1]
        filters = filters * 2
        
        blocks['conv3_1'] = DualPathBlock(num_init_features, filters, filters, out_channels, inc, cardinality, 'down')
        in_channels = out_channels + inc * 3
        for i in range(2, depth[1] + 1):
            blocks['conv3_{}'.format(i)] = DualPathBlock(in_channels, filters, filters, out_channels, inc, cardinality, 'normal')
            in_channels += inc
        
        # conv4
        out_channels = 1024
        inc = width_increment[2]
        filters = filters * 2
        
        blocks['conv4_1'] = DualPathBlock(num_init_features, filters, filters, out_channels, inc, cardinality, 'down')
        in_channels = out_channels + inc * 3
        for i in range(2, depth[2] + 1):
            blocks['conv4_{}'.format(i)] = DualPathBlock(in_channels, filters, filters, out_channels, inc, cardinality, 'normal')
            in_channels += inc
            
        # conv5
        out_channels = 2048
        inc = width_increment[3]
        filters = filters * 2
        
        blocks['conv5_1'] = DualPathBlock(num_init_features, filters, filters, out_channels, inc, cardinality, 'down')
        in_channels = out_channels + inc * 3
        for i in range(2, depth[3] + 1):
            blocks['conv5_{}'.format(i)] = DualPathBlock(in_channels, filters, filters, out_channels, inc, cardinality, 'normal')
            in_channels += inc
            
            
        self.features = nn.Sequential(blocks)
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)
        self.classifier = nn.Linear(in_channels, num_classes)
        
    def BN_ReLU(self, in_channels):
        return nn.Seqeuntial(OrderedDict([
            ('bn', nn.BatchNorm2d(in_channels)),
            ('relu', nn.ReLU(inplace=True))
        ]))
    
    def forward(self, x):
        features = torch.cat(self.features(x), dim=1)
        features = self.BN_ReLU(features.size(0))
        
        if test_phase:
            avg_pool = self.avg_pool(features)
            max_pool = self.max_pool(features)
            out = (avg_pool + max_pool) * 0.5
            
            
        else: 
            out = self.avg_pool(features)
            
        out = out.view(features.size(0), -1)
        out = self.classifier(out)    
            
        return out

In [4]:
def dpn92(num_classes=1000, test_phase=False):
    return DPN(num_init_features=64, cardinality=32, \
                 depth=[3,4,20,3], width_increment=[16, 32, 24, 128], width=3, num_classes=num_classes, test_phase=test_phase)

def dpn98(num_classes=1000, test_phase=False):
    return DPN(num_init_features=96, cardinality=40,\
                 depth=[3,6,20,3], width_increment=[16, 32, 32, 128], width=4, num_classes=num_classes, test_phase=test_phase)

def dpn131(num_classes=1000, test_phase=False):
    return DPN(num_init_features=128, cardinality=40,\
                 depth=[4,8,28,3], width_increment=[16, 32, 32, 128], width=4, num_classes=num_classes, test_phase=test_phase)

In [11]:
model = dpn98()
print(model)

DPN(
  (features): Sequential(
    (conv1): Sequential(
      (0): Conv2d(3, 96, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace)
      (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    )
    (conv2_1): DualPathBlock(
      (c1x1_w): Sequential(
        (bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace)
        (conv): Conv2d(96, 288, kernel_size=(1, 1), stride=(1, 1), bias=False)
      )
      (layers): Sequential(
        (c1x1_a): Sequential(
          (bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (relu): ReLU(inplace)
          (conv): Conv2d(96, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
        )
        (c3x3_b): Sequential(
          (bn): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, t