This repository has been archived by the owner. It is now read-only.
Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
696 lines (668 sloc) 18.5 KB
# Batch Normalization
# Sergey Ioffe and Christian Szegedy
# http://arxiv.org/abs/1502.03167
#
# A pretrained model of this conf can be downloaded from http://goo.gl/EOskAl
# with the mean file from http://goo.gl/wOsuim
#
# Notes by Zhongwen Xu:
#
# Learning rates: descreased by 10x on round 20 and round 28.
#
# Performance of single center crop on three stages:
#
# On round 20, lr = 1e-2: val-rec@1:0.54192 val-rec@5:0.7912
# On round 28, lr = 1e-3: val-rec@1:0.67544 val-rec@5:0.8826
# On round 39, lr = 1e-4: val-rec@1:0.70454 val-rec@5:0.899
#
# The performance does not match the BN paper since we apply much less
# augmentation, we only apply random cropping 224x224 and flipping here.
# However, this model matches the performance of GoogLeNet (error rate
# 10.07% for top 5) from single crop.
#
data = train
iter = imgrec
# image_list = "imagenet/train.lst"
image_rec = "imagenet/train.bin"
image_mean = "models/mean_224.bin"
rand_crop=1
rand_mirror=1
shuffle=1
iter = threadbuffer
iter = end
eval = val
iter = imgrec
# image_list = "imagenet/val.lst"
image_rec = "imagenet/val.bin"
image_mean = "models/mean_224.bin"
#no random crop and mirror in test
iter = end
netconfig = start
layer[0->0.1] = conv:conv_1
kernel_size = 7
nchannel = 64
pad = 3
stride = 2
layer[0.1->0.2] = batch_norm:bn_1
layer[0.2->1] = relu:relu_1
layer[1->2] = max_pooling:max_pool_1
kernel_size = 3
stride = 2
layer[2->2.1] = conv:conv_2_reduce
kernel_size = 1
nchannel = 64
pad = 0
stride = 1
layer[2.1->2.2] = batch_norm:bn_2_1
layer[2.2->3] = relu:relu_2_1
layer[3->3.1] = conv:conv_2
kernel_size = 3
nchannel = 192
pad = 1
stride = 1
layer[3.1->3.2] = batch_norm:bn_2
layer[3.2->4] = relu:relu_2
layer[4->5] = max_pooling:max_pool_2
kernel_size = 3
stride = 2
##### inception 3a #####
layer[5->6.1.0,6.2.0,6.3.0,6.4.0] = split:split_3a_split
## inception 1x1
layer[6.1.0->6.1.1] = conv:conv_3a_1x1
kernel_size = 1
nchannel = 64
pad = 0
stride = 1
layer[6.1.1->6.1.2] = batch_norm:bn_3a_1x1
layer[6.1.2->6.1.3] = relu:relu_3a_1x1
## inception 3x3
layer[6.2.0->6.2.1] = conv:conv_3a_3x3_reduce
kernel_size = 1
nchannel = 64
pad = 0
stride = 1
layer[6.2.1->6.2.2] = batch_norm:bn_3a_3x3_reduce
layer[6.2.2->6.2.3] = relu:relu_3a_3x3_reduce
layer[6.2.3->6.2.4] = conv:conv_3a_3x3
kernel_size = 3
nchannel = 64
pad = 1
stride = 1
layer[6.2.4->6.2.5] = batch_norm:bn_3a_3x3
layer[6.2.5->6.2.6] = relu:relu_3a_3x3
## inception double 3x3
layer[6.3.0->6.3.1] = conv:conv_3a_double_3x3_reduce
kernel_size = 1
nchannel = 64
pad = 0
stride = 1
layer[6.3.1->6.3.2] = batch_norm:bn_3a_double_3x3_reduce
layer[6.3.2->6.3.3] = relu:relu_3a_double_3x3_reduce
layer[6.3.3->6.3.4] = conv:conv_3a_double_3x3_0
kernel_size = 3
nchannel = 96
pad = 1
stride = 1
layer[6.3.4->6.3.5] = batch_norm:bn_3a_double_3x3_0
layer[6.3.5->6.3.6] = relu:relu_3a_double_3x3_0
layer[6.3.6->6.3.7] = conv:conv_3a_double_3x3_1
kernel_size = 3
nchannel = 96
pad = 1
stride = 1
layer[6.3.7->6.3.8] = batch_norm:bn_3a_double_3x3_1
layer[6.3.8->6.3.9] = relu:relu_3a_double_3x3_1
## inception proj
layer[6.4.0->6.4.1] = avg_pooling:avg_pool_3a_pool
kernel_size = 3
stride = 1
pad = 1
layer[6.4.1->6.4.2] = conv:conv_3a_proj
kernel_size = 1
nchannel = 32
pad = 0
stride = 1
layer[6.4.2->6.4.3] = batch_norm:bn_3a_proj
layer[6.4.3->6.4.4] = relu:relu_3a_proj
layer[6.1.3,6.2.6,6.3.9,6.4.4->6] = ch_concat:ch_concat_3a_chconcat
##### inception 3b #####
layer[6->7.1.0,7.2.0,7.3.0,7.4.0] = split:split_3b_split
## inception 1x1
layer[7.1.0->7.1.1] = conv:conv_3b_1x1
kernel_size = 1
nchannel = 64
pad = 0
stride = 1
layer[7.1.1->7.1.2] = batch_norm:bn_3b_1x1
layer[7.1.2->7.1.3] = relu:relu_3b_1x1
## inception 3x3
layer[7.2.0->7.2.1] = conv:conv_3b_3x3_reduce
kernel_size = 1
nchannel = 64
pad = 0
stride = 1
layer[7.2.1->7.2.2] = batch_norm:bn_3b_3x3_reduce
layer[7.2.2->7.2.3] = relu:relu_3b_3x3_reduce
layer[7.2.3->7.2.4] = conv:conv_3b_3x3
kernel_size = 3
nchannel = 96
pad = 1
stride = 1
layer[7.2.4->7.2.5] = batch_norm:bn_3b_3x3
layer[7.2.5->7.2.6] = relu:relu_3b_3x3
## inception double 3x3
layer[7.3.0->7.3.1] = conv:conv_3b_double_3x3_reduce
kernel_size = 1
nchannel = 64
pad = 0
stride = 1
layer[7.3.1->7.3.2] = batch_norm:bn_3b_double_3x3_reduce
layer[7.3.2->7.3.3] = relu:relu_3b_double_3x3_reduce
layer[7.3.3->7.3.4] = conv:conv_3b_double_3x3_0
kernel_size = 3
nchannel = 96
pad = 1
stride = 1
layer[7.3.4->7.3.5] = batch_norm:bn_3b_double_3x3_0
layer[7.3.5->7.3.6] = relu:relu_3b_double_3x3_0
layer[7.3.6->7.3.7] = conv:conv_3b_double_3x3_1
kernel_size = 3
nchannel = 96
pad = 1
stride = 1
layer[7.3.7->7.3.8] = batch_norm:bn_3b_double_3x3_1
layer[7.3.8->7.3.9] = relu:relu_3b_double_3x3_1
## inception proj
layer[7.4.0->7.4.1] = avg_pooling:avg_pool_3b_pool
kernel_size = 3
stride = 1
pad = 1
layer[7.4.1->7.4.2] = conv:conv_3b_proj
kernel_size = 1
nchannel = 64
pad = 0
stride = 1
layer[7.4.2->7.4.3] = batch_norm:bn_3b_proj
layer[7.4.3->7.4.4] = relu:relu_3b_proj
layer[7.1.3,7.2.6,7.3.9,7.4.4->7] = ch_concat:ch_concat_3b_chconcat
##### inception 3c #####
layer[7->8.2.0,8.3.0,8.4.0] = split:split_3c_split
## inception 3x3
layer[8.2.0->8.2.1] = conv:conv_3c_3x3_reduce
kernel_size = 1
nchannel = 128
pad = 0
stride = 1
layer[8.2.1->8.2.2] = batch_norm:bn_3c_3x3_reduce
layer[8.2.2->8.2.3] = relu:relu_3c_3x3_reduce
layer[8.2.3->8.2.4] = conv:conv_3c_3x3
kernel_size = 3
nchannel = 160
pad = 1
stride = 2
layer[8.2.4->8.2.5] = batch_norm:bn_3c_3x3
layer[8.2.5->8.2.6] = relu:relu_3c_3x3
## inception double 3x3
layer[8.3.0->8.3.1] = conv:conv_3c_double_3x3_reduce
kernel_size = 1
nchannel = 64
pad = 0
stride = 1
layer[8.3.1->8.3.2] = batch_norm:bn_3c_double_3x3_reduce
layer[8.3.2->8.3.3] = relu:relu_3c_double_3x3_reduce
layer[8.3.3->8.3.4] = conv:conv_3c_double_3x3_0
kernel_size = 3
nchannel = 96
pad = 1
stride = 1
layer[8.3.4->8.3.5] = batch_norm:bn_3c_double_3x3_0
layer[8.3.5->8.3.6] = relu:relu_3c_double_3x3_0
layer[8.3.6->8.3.7] = conv:conv_3c_double_3x3_1
kernel_size = 3
nchannel = 96
pad = 1
stride = 2
layer[8.3.7->8.3.8] = batch_norm:bn_3c_double_3x3_1
layer[8.3.8->8.3.9] = relu:relu_3c_double_3x3_1
## inception proj
layer[8.4.0->8.4.1] = max_pooling:max_pool_3c_pool
kernel_size = 3
stride = 2
layer[8.2.6,8.3.9,8.4.1->8] = ch_concat:ch_concat_3c_chconcat
##### inception 4a #####
layer[8->9.1.0,9.2.0,9.3.0,9.4.0] = split:split_4a_split
## inception 1x1
layer[9.1.0->9.1.1] = conv:conv_4a_1x1
kernel_size = 1
nchannel = 224
pad = 0
stride = 1
layer[9.1.1->9.1.2] = batch_norm:bn_4a_1x1
layer[9.1.2->9.1.3] = relu:relu_4a_1x1
## inception 3x3
layer[9.2.0->9.2.1] = conv:conv_4a_3x3_reduce
kernel_size = 1
nchannel = 64
pad = 0
stride = 1
layer[9.2.1->9.2.2] = batch_norm:bn_4a_3x3_reduce
layer[9.2.2->9.2.3] = relu:relu_4a_3x3_reduce
layer[9.2.3->9.2.4] = conv:conv_4a_3x3
kernel_size = 3
nchannel = 96
pad = 1
stride = 1
layer[9.2.4->9.2.5] = batch_norm:bn_4a_3x3
layer[9.2.5->9.2.6] = relu:relu_4a_3x3
## inception double 3x3
layer[9.3.0->9.3.1] = conv:conv_4a_double_3x3_reduce
kernel_size = 1
nchannel = 96
pad = 0
stride = 1
layer[9.3.1->9.3.2] = batch_norm:bn_4a_double_3x3_reduce
layer[9.3.2->9.3.3] = relu:relu_4a_double_3x3_reduce
layer[9.3.3->9.3.4] = conv:conv_4a_double_3x3_0
kernel_size = 3
nchannel = 128
pad = 1
stride = 1
layer[9.3.4->9.3.5] = batch_norm:bn_4a_double_3x3_0
layer[9.3.5->9.3.6] = relu:relu_4a_double_3x3_0
layer[9.3.6->9.3.7] = conv:conv_4a_double_3x3_1
kernel_size = 3
nchannel = 128
pad = 1
stride = 1
layer[9.3.7->9.3.8] = batch_norm:bn_4a_double_3x3_1
layer[9.3.8->9.3.9] = relu:relu_4a_double_3x3_1
## inception proj
layer[9.4.0->9.4.1] = avg_pooling:avg_pool_4a_pool
kernel_size = 3
stride = 1
pad = 1
layer[9.4.1->9.4.2] = conv:conv_4a_proj
kernel_size = 1
nchannel = 128
pad = 0
stride = 1
layer[9.4.2->9.4.3] = batch_norm:bn_4a_proj
layer[9.4.3->9.4.4] = relu:relu_4a_proj
layer[9.1.3,9.2.6,9.3.9,9.4.4->9] = ch_concat:ch_concat_4a_chconcat
##### inception 4b #####
layer[9->10.1.0,10.2.0,10.3.0,10.4.0] = split:split_4b_split
## inception 1x1
layer[10.1.0->10.1.1] = conv:conv_4b_1x1
kernel_size = 1
nchannel = 192
pad = 0
stride = 1
layer[10.1.1->10.1.2] = batch_norm:bn_4b_1x1
layer[10.1.2->10.1.3] = relu:relu_4b_1x1
## inception 3x3
layer[10.2.0->10.2.1] = conv:conv_4b_3x3_reduce
kernel_size = 1
nchannel = 96
pad = 0
stride = 1
layer[10.2.1->10.2.2] = batch_norm:bn_4b_3x3_reduce
layer[10.2.2->10.2.3] = relu:relu_4b_3x3_reduce
layer[10.2.3->10.2.4] = conv:conv_4b_3x3
kernel_size = 3
nchannel = 128
pad = 1
stride = 1
layer[10.2.4->10.2.5] = batch_norm:bn_4b_3x3
layer[10.2.5->10.2.6] = relu:relu_4b_3x3
## inception double 3x3
layer[10.3.0->10.3.1] = conv:conv_4b_double_3x3_reduce
kernel_size = 1
nchannel = 96
pad = 0
stride = 1
layer[10.3.1->10.3.2] = batch_norm:bn_4b_double_3x3_reduce
layer[10.3.2->10.3.3] = relu:relu_4b_double_3x3_reduce
layer[10.3.3->10.3.4] = conv:conv_4b_double_3x3_0
kernel_size = 3
nchannel = 128
pad = 1
stride = 1
layer[10.3.4->10.3.5] = batch_norm:bn_4b_double_3x3_0
layer[10.3.5->10.3.6] = relu:relu_4b_double_3x3_0
layer[10.3.6->10.3.7] = conv:conv_4b_double_3x3_1
kernel_size = 3
nchannel = 128
pad = 1
stride = 1
layer[10.3.7->10.3.8] = batch_norm:bn_4b_double_3x3_1
layer[10.3.8->10.3.9] = relu:relu_4b_double_3x3_1
## inception proj
layer[10.4.0->10.4.1] = avg_pooling:avg_pool_4b_pool
kernel_size = 3
stride = 1
pad = 1
layer[10.4.1->10.4.2] = conv:conv_4b_proj
kernel_size = 1
nchannel = 128
pad = 0
stride = 1
layer[10.4.2->10.4.3] = batch_norm:bn_4b_proj
layer[10.4.3->10.4.4] = relu:relu_4b_proj
layer[10.1.3,10.2.6,10.3.9,10.4.4->10] = ch_concat:ch_concat_4b_chconcat
##### inception 4c #####
layer[10->11.1.0,11.2.0,11.3.0,11.4.0] = split:split_4c_split
## inception 1x1
layer[11.1.0->11.1.1] = conv:conv_4c_1x1
kernel_size = 1
nchannel = 160
pad = 0
stride = 1
layer[11.1.1->11.1.2] = batch_norm:bn_4c_1x1
layer[11.1.2->11.1.3] = relu:relu_4c_1x1
## inception 3x3
layer[11.2.0->11.2.1] = conv:conv_4c_3x3_reduce
kernel_size = 1
nchannel = 128
pad = 0
stride = 1
layer[11.2.1->11.2.2] = batch_norm:bn_4c_3x3_reduce
layer[11.2.2->11.2.3] = relu:relu_4c_3x3_reduce
layer[11.2.3->11.2.4] = conv:conv_4c_3x3
kernel_size = 3
nchannel = 160
pad = 1
stride = 1
layer[11.2.4->11.2.5] = batch_norm:bn_4c_3x3
layer[11.2.5->11.2.6] = relu:relu_4c_3x3
## inception double 3x3
layer[11.3.0->11.3.1] = conv:conv_4c_double_3x3_reduce
kernel_size = 1
nchannel = 128
pad = 0
stride = 1
layer[11.3.1->11.3.2] = batch_norm:bn_4c_double_3x3_reduce
layer[11.3.2->11.3.3] = relu:relu_4c_double_3x3_reduce
layer[11.3.3->11.3.4] = conv:conv_4c_double_3x3_0
kernel_size = 3
nchannel = 160
pad = 1
stride = 1
layer[11.3.4->11.3.5] = batch_norm:bn_4c_double_3x3_0
layer[11.3.5->11.3.6] = relu:relu_4c_double_3x3_0
layer[11.3.6->11.3.7] = conv:conv_4c_double_3x3_1
kernel_size = 3
nchannel = 160
pad = 1
stride = 1
layer[11.3.7->11.3.8] = batch_norm:bn_4c_double_3x3_1
layer[11.3.8->11.3.9] = relu:relu_4c_double_3x3_1
## inception proj
layer[11.4.0->11.4.1] = avg_pooling:avg_pool_4c_pool
kernel_size = 3
stride = 1
pad = 1
layer[11.4.1->11.4.2] = conv:conv_4c_proj
kernel_size = 1
nchannel = 128
pad = 0
stride = 1
layer[11.4.2->11.4.3] = batch_norm:bn_4c_proj
layer[11.4.3->11.4.4] = relu:relu_4c_proj
layer[11.1.3,11.2.6,11.3.9,11.4.4->11] = ch_concat:ch_concat_4c_chconcat
##### inception 4d #####
layer[11->12.1.0,12.2.0,12.3.0,12.4.0] = split:split_4d_split
## inception 1x1
layer[12.1.0->12.1.1] = conv:conv_4d_1x1
kernel_size = 1
nchannel = 96
pad = 0
stride = 1
layer[12.1.1->12.1.2] = batch_norm:bn_4d_1x1
layer[12.1.2->12.1.3] = relu:relu_4d_1x1
## inception 3x3
layer[12.2.0->12.2.1] = conv:conv_4d_3x3_reduce
kernel_size = 1
nchannel = 128
pad = 0
stride = 1
layer[12.2.1->12.2.2] = batch_norm:bn_4d_3x3_reduce
layer[12.2.2->12.2.3] = relu:relu_4d_3x3_reduce
layer[12.2.3->12.2.4] = conv:conv_4d_3x3
kernel_size = 3
nchannel = 192
pad = 1
stride = 1
layer[12.2.4->12.2.5] = batch_norm:bn_4d_3x3
layer[12.2.5->12.2.6] = relu:relu_4d_3x3
## inception double 3x3
layer[12.3.0->12.3.1] = conv:conv_4d_double_3x3_reduce
kernel_size = 1
nchannel = 160
pad = 0
stride = 1
layer[12.3.1->12.3.2] = batch_norm:bn_4d_double_3x3_reduce
layer[12.3.2->12.3.3] = relu:relu_4d_double_3x3_reduce
layer[12.3.3->12.3.4] = conv:conv_4d_double_3x3_0
kernel_size = 3
nchannel = 192
pad = 1
stride = 1
layer[12.3.4->12.3.5] = batch_norm:bn_4d_double_3x3_0
layer[12.3.5->12.3.6] = relu:relu_4d_double_3x3_0
layer[12.3.6->12.3.7] = conv:conv_4d_double_3x3_1
kernel_size = 3
nchannel = 192
pad = 1
stride = 1
layer[12.3.7->12.3.8] = batch_norm:bn_4d_double_3x3_1
layer[12.3.8->12.3.9] = relu:relu_4d_double_3x3_1
## inception proj
layer[12.4.0->12.4.1] = avg_pooling:avg_pool_4d_pool
kernel_size = 3
stride = 1
pad = 1
layer[12.4.1->12.4.2] = conv:conv_4d_proj
kernel_size = 1
nchannel = 128
pad = 0
stride = 1
layer[12.4.2->12.4.3] = batch_norm:bn_4d_proj
layer[12.4.3->12.4.4] = relu:relu_4d_proj
layer[12.1.3,12.2.6,12.3.9,12.4.4->12] = ch_concat:ch_concat_4d_chconcat
##### inception 4e #####
layer[12->13.2.0,13.3.0,13.4.0] = split:split_4e_split
## inception 3x3
layer[13.2.0->13.2.1] = conv:conv_4e_3x3_reduce
kernel_size = 1
nchannel = 128
pad = 0
stride = 1
layer[13.2.1->13.2.2] = batch_norm:bn_4e_3x3_reduce
layer[13.2.2->13.2.3] = relu:relu_4e_3x3_reduce
layer[13.2.3->13.2.4] = conv:conv_4e_3x3
kernel_size = 3
nchannel = 192
pad = 1
stride = 2
layer[13.2.4->13.2.5] = batch_norm:bn_4e_3x3
layer[13.2.5->13.2.6] = relu:relu_4e_3x3
## inception double 3x3
layer[13.3.0->13.3.1] = conv:conv_4e_double_3x3_reduce
kernel_size = 1
nchannel = 192
pad = 0
stride = 1
layer[13.3.1->13.3.2] = batch_norm:bn_4e_double_3x3_reduce
layer[13.3.2->13.3.3] = relu:relu_4e_double_3x3_reduce
layer[13.3.3->13.3.4] = conv:conv_4e_double_3x3_0
kernel_size = 3
nchannel = 256
pad = 1
stride = 1
layer[13.3.4->13.3.5] = batch_norm:bn_4e_double_3x3_0
layer[13.3.5->13.3.6] = relu:relu_4e_double_3x3_0
layer[13.3.6->13.3.7] = conv:conv_4e_double_3x3_1
kernel_size = 3
nchannel = 256
pad = 1
stride = 2
layer[13.3.7->13.3.8] = batch_norm:bn_4e_double_3x3_1
layer[13.3.8->13.3.9] = relu:relu_4e_double_3x3_1
## inception proj
layer[13.4.0->13.4.1] = max_pooling:max_pool_4e_pool
kernel_size = 3
stride = 2
layer[13.2.6,13.3.9,13.4.1->13] = ch_concat:ch_concat_4e_chconcat
##### inception 5a #####
layer[13->14.1.0,14.2.0,14.3.0,14.4.0] = split:split_5a_split
## inception 1x1
layer[14.1.0->14.1.1] = conv:conv_5a_1x1
kernel_size = 1
nchannel = 352
pad = 0
stride = 1
layer[14.1.1->14.1.2] = batch_norm:bn_5a_1x1
layer[14.1.2->14.1.3] = relu:relu_5a_1x1
## inception 3x3
layer[14.2.0->14.2.1] = conv:conv_5a_3x3_reduce
kernel_size = 1
nchannel = 192
pad = 0
stride = 1
layer[14.2.1->14.2.2] = batch_norm:bn_5a_3x3_reduce
layer[14.2.2->14.2.3] = relu:relu_5a_3x3_reduce
layer[14.2.3->14.2.4] = conv:conv_5a_3x3
kernel_size = 3
nchannel = 320
pad = 1
stride = 1
layer[14.2.4->14.2.5] = batch_norm:bn_5a_3x3
layer[14.2.5->14.2.6] = relu:relu_5a_3x3
## inception double 3x3
layer[14.3.0->14.3.1] = conv:conv_5a_double_3x3_reduce
kernel_size = 1
nchannel = 160
pad = 0
stride = 1
layer[14.3.1->14.3.2] = batch_norm:bn_5a_double_3x3_reduce
layer[14.3.2->14.3.3] = relu:relu_5a_double_3x3_reduce
layer[14.3.3->14.3.4] = conv:conv_5a_double_3x3_0
kernel_size = 3
nchannel = 224
pad = 1
stride = 1
layer[14.3.4->14.3.5] = batch_norm:bn_5a_double_3x3_0
layer[14.3.5->14.3.6] = relu:relu_5a_double_3x3_0
layer[14.3.6->14.3.7] = conv:conv_5a_double_3x3_1
kernel_size = 3
nchannel = 224
pad = 1
stride = 1
layer[14.3.7->14.3.8] = batch_norm:bn_5a_double_3x3_1
layer[14.3.8->14.3.9] = relu:relu_5a_double_3x3_1
## inception proj
layer[14.4.0->14.4.1] = avg_pooling:avg_pool_5a_pool
kernel_size = 3
stride = 1
pad = 1
layer[14.4.1->14.4.2] = conv:conv_5a_proj
kernel_size = 1
nchannel = 128
pad = 0
stride = 1
layer[14.4.2->14.4.3] = batch_norm:bn_5a_proj
layer[14.4.3->14.4.4] = relu:relu_5a_proj
layer[14.1.3,14.2.6,14.3.9,14.4.4->14] = ch_concat:ch_concat_5a_chconcat
##### inception 5b #####
layer[14->15.1.0,15.2.0,15.3.0,15.4.0] = split:split_5b_split
## inception 1x1
layer[15.1.0->15.1.1] = conv:conv_5b_1x1
kernel_size = 1
nchannel = 352
pad = 0
stride = 1
layer[15.1.1->15.1.2] = batch_norm:bn_5b_1x1
layer[15.1.2->15.1.3] = relu:relu_5b_1x1
## inception 3x3
layer[15.2.0->15.2.1] = conv:conv_5b_3x3_reduce
kernel_size = 1
nchannel = 192
pad = 0
stride = 1
layer[15.2.1->15.2.2] = batch_norm:bn_5b_3x3_reduce
layer[15.2.2->15.2.3] = relu:relu_5b_3x3_reduce
layer[15.2.3->15.2.4] = conv:conv_5b_3x3
kernel_size = 3
nchannel = 320
pad = 1
stride = 1
layer[15.2.4->15.2.5] = batch_norm:bn_5b_3x3
layer[15.2.5->15.2.6] = relu:relu_5b_3x3
## inception double 3x3
layer[15.3.0->15.3.1] = conv:conv_5b_double_3x3_reduce
kernel_size = 1
nchannel = 192
pad = 0
stride = 1
layer[15.3.1->15.3.2] = batch_norm:bn_5b_double_3x3_reduce
layer[15.3.2->15.3.3] = relu:relu_5b_double_3x3_reduce
layer[15.3.3->15.3.4] = conv:conv_5b_double_3x3_0
kernel_size = 3
nchannel = 224
pad = 1
stride = 1
layer[15.3.4->15.3.5] = batch_norm:bn_5b_double_3x3_0
layer[15.3.5->15.3.6] = relu:relu_5b_double_3x3_0
layer[15.3.6->15.3.7] = conv:conv_5b_double_3x3_1
kernel_size = 3
nchannel = 224
pad = 1
stride = 1
layer[15.3.7->15.3.8] = batch_norm:bn_5b_double_3x3_1
layer[15.3.8->15.3.9] = relu:relu_5b_double_3x3_1
## inception proj
layer[15.4.0->15.4.1] = max_pooling:max_pool_5b_pool
kernel_size = 3
stride = 1
pad = 1
layer[15.4.1->15.4.2] = conv:conv_5b_proj
kernel_size = 1
nchannel = 128
pad = 0
stride = 1
layer[15.4.2->15.4.3] = batch_norm:bn_5b_proj
layer[15.4.3->15.4.4] = relu:relu_5b_proj
layer[15.1.3,15.2.6,15.3.9,15.4.4->15] = ch_concat:ch_concat_5b_chconcat
layer[15->16] = avg_pooling:global_pool
kernel_size = 7
stride = 1
layer[+1] = flatten:flatten
layer[+1] = fullc:fc
nhidden = 1000
layer[+0] = softmax:softmax
netconfig = end
# evaluation metric
metric = rec@1
metric = rec@5
max_round = 100
num_round = 100
# input shape not including batch
input_shape = 3,224,224
batch_size = 64
update_period = 2
# global parameters in any sectiion outside netconfig, and iter
momentum = 0.9
wmat:lr = 0.05
wmat:wd = 0.0001
bias:wd = 0.000
bias:lr = 0.1
# all the learning rate schedule starts with lr
lr:schedule = constant
save_model=1
model_dir=models
print_step=1
clip_gradient = 10
# random config
random_type = xavier
# new line
dev = gpu:0-3