In [2]:
import math
import copy

In [3]:
def layer_params(layer):
    s = layer['s'] if 's' in layer else 1
    d = layer['d'] if 'd' in layer else 1
    k = layer['k']
    return k, s, d

def output_size(i, k, s=1, p=0, d=1):
    o = math.floor((i + 2 * p - d * (k - 1) - 1) / s) + 1
    return int(o)
    
def find_output_size(network, i=224):
    net = copy.deepcopy(network)
    for layer in net:
        layer['i'] = i
        o = output_size(**layer)
        i = o
    return o

def find_receptive_field(net):
    output = [] # (rf, effective stride)
    for i in range(len(net)):
        k, s, d = layer_params(net[i])
        if i == 0:
            rf_p = 1
            s_p = 1
        else:
            rf_p, s_p = output[i-1]
        es = s * s_p
        rf = rf_p + d * s_p * (k-1)
        output.append((rf, es))
    return output

def find_full_info(network_with_names, input_size=224):
    structure = network_with_names[0::2]
    layer_names = network_with_names[1::2]
    print('%-10s| %-16s| %-16s| %-16s' % ('Layer Name', 'Receptive Field', 'Effective Stride', 'Output Size'))
    print('-' * 59)
    print('%-10s| %-16s| %-16s| %-16s' % ('Input', '--', '--', str(input_size)))
    rf_s = find_receptive_field(structure)
    for i in range(0, len(structure)):
        print('%-10s| %-16d| %-16d| %-16d' % (
            layer_names[i], rf_s[i][0], rf_s[i][1], find_output_size(structure[:i+1], input_size)))
    i = len(structure) - 1
    return rf_s[i][0], rf_s[i][1], find_output_size(structure[:i+1], input_size)

In [4]:
# Format: {
# 'k': kernel_size, 
# 's': stride (default 1), 
# 'p': padding (default 0), 
# 'd': dilation (default 1)
# }

print('1 Conv')
network = [
    {'k': 3}, 'conv1',
]
find_full_info(network);

1 Conv
Layer Name| Receptive Field | Effective Stride| Output Size     
-----------------------------------------------------------
Input     | --              | --              | 224             
conv1     | 3               | 1               | 222             


In [5]:
print('2 Conv + 1 Pool')
network = [
    {'k': 3}, 'conv1',
    {'k': 3}, 'conv2',
    {'k': 2, 's': 2}, 'pool1',
]
find_full_info(network);

2 Conv + 1 Pool
Layer Name| Receptive Field | Effective Stride| Output Size     
-----------------------------------------------------------
Input     | --              | --              | 224             
conv1     | 3               | 1               | 222             
conv2     | 5               | 1               | 220             
pool1     | 6               | 2               | 110             


In [6]:
print('AlexNet')
network = [
    {'k': 11, 's': 4, 'p': 0}, 'conv1',
    {'k': 3,  's': 2, 'p': 0}, 'pool1',
    {'k': 5,  's': 1, 'p': 2}, 'conv2',
    {'k': 3,  's': 2, 'p': 0}, 'pool2',
    {'k': 3,  's': 1, 'p': 1}, 'conv3',
    {'k': 3,  's': 1, 'p': 1}, 'conv4',
    {'k': 3,  's': 1, 'p': 1}, 'conv5',
    {'k': 3,  's': 2, 'p': 0}, 'pool5',
    {'k': 6}, 'fc6',
    {'k': 1}, 'fc7',
]
find_full_info(network, input_size=227);

AlexNet
Layer Name| Receptive Field | Effective Stride| Output Size     
-----------------------------------------------------------
Input     | --              | --              | 227             
conv1     | 11              | 4               | 55              
pool1     | 19              | 8               | 27              
conv2     | 51              | 8               | 27              
pool2     | 67              | 16              | 13              
conv3     | 99              | 16              | 13              
conv4     | 131             | 16              | 13              
conv5     | 163             | 16              | 13              
pool5     | 195             | 32              | 6               
fc6       | 355             | 32              | 1               
fc7       | 355             | 32              | 1               


In [7]:
print('VGG-16')
network = [
    {'k': 3, 'p': 1}, 'conv1_1',
    {'k': 3, 'p': 1}, 'conv1_2',
    {'k': 2, 's': 2}, 'pool1',
    {'k': 3, 'p': 1}, 'conv2_1',
    {'k': 3, 'p': 1}, 'conv2_2',
    {'k': 2, 's': 2}, 'pool2',
    {'k': 3, 'p': 1}, 'conv3_1',
    {'k': 3, 'p': 1}, 'conv3_2',
    {'k': 3, 'p': 1}, 'conv3_3',
    {'k': 2, 's': 2}, 'pool3',
    {'k': 3, 'p': 1}, 'conv4_1',
    {'k': 3, 'p': 1}, 'conv4_2',
    {'k': 3, 'p': 1}, 'conv4_3',
    {'k': 2, 's': 2}, 'pool4',
    {'k': 3, 'p': 1}, 'conv5_1',
    {'k': 3, 'p': 1}, 'conv5_2',
    {'k': 3, 'p': 1}, 'conv5_3',
    {'k': 2, 's': 2}, 'pool5',
    {'k': 7}, 'fc6',
    {'k': 1}, 'fc7',
    {'k': 1}, 'fc8',
]
find_full_info(network);

VGG-16
Layer Name| Receptive Field | Effective Stride| Output Size     
-----------------------------------------------------------
Input     | --              | --              | 224             
conv1_1   | 3               | 1               | 224             
conv1_2   | 5               | 1               | 224             
pool1     | 6               | 2               | 112             
conv2_1   | 10              | 2               | 112             
conv2_2   | 14              | 2               | 112             
pool2     | 16              | 4               | 56              
conv3_1   | 24              | 4               | 56              
conv3_2   | 32              | 4               | 56              
conv3_3   | 40              | 4               | 56              
pool3     | 44              | 8               | 28              
conv4_1   | 60              | 8               | 28              
conv4_2   | 76              | 8               | 28              
conv4_3   | 92         

In [8]:
# https://arxiv.org/pdf/1511.07122v3.pdf
# https://github.com/fyu/dilation/blob/master/models/dilation8_pascal_voc_deploy.prototxt
# No intermediate padding.
print('VGG-16 with Dilated Convs for Dense Prediction\n')
print('Pascal VOC front end')
network = [
    {'k': 3}, 'conv1_1',
    {'k': 3}, 'conv1_2',
    {'k': 2, 's': 2}, 'pool1',
    {'k': 3}, 'conv2_1',
    {'k': 3}, 'conv2_2',
    {'k': 2, 's': 2}, 'pool2',
    {'k': 3}, 'conv3_1',
    {'k': 3}, 'conv3_2',
    {'k': 3}, 'conv3_3',
    {'k': 2, 's': 2}, 'pool3',
    {'k': 3}, 'conv4_1',
    {'k': 3}, 'conv4_2',
    {'k': 3}, 'conv4_3',
    {'k': 3, 'd': 2}, 'conv5_1',
    {'k': 3, 'd': 2}, 'conv5_2',
    {'k': 3, 'd': 2}, 'conv5_3',
    {'k': 7, 'd': 4}, 'fc6',
    {'k': 1}, 'fc7',
    {'k': 1}, 'fc-final',
]
find_full_info(network, input_size=900);
print('')

print('Context Aggregation Module')
network = [
    {'k': 3, 'p': 33}, 'ct_conv1_1',
    {'k': 3}, 'ct_conv1_2',
    {'k': 3, 'd': 2}, 'ct_conv2_1',
    {'k': 3, 'd': 4}, 'ct_conv3_1',
    {'k': 3, 'd': 8}, 'ct_conv4_1',
    {'k': 3, 'd': 16}, 'ct_conv5_1',
    {'k': 3}, 'ct_fc1',
    {'k': 1}, 'ct_final',
]
find_full_info(network, input_size=66);

VGG-16 with Dilated Convs for Dense Prediction

Pascal VOC front end
Layer Name| Receptive Field | Effective Stride| Output Size     
-----------------------------------------------------------
Input     | --              | --              | 900             
conv1_1   | 3               | 1               | 898             
conv1_2   | 5               | 1               | 896             
pool1     | 6               | 2               | 448             
conv2_1   | 10              | 2               | 446             
conv2_2   | 14              | 2               | 444             
pool2     | 16              | 4               | 222             
conv3_1   | 24              | 4               | 220             
conv3_2   | 32              | 4               | 218             
conv3_3   | 40              | 4               | 216             
pool3     | 44              | 8               | 108             
conv4_1   | 60              | 8               | 106             
conv4_2   | 76            