add readme

joe-siyuan-qiao · Jun 25, 2018 · 7e7aa93 · 7e7aa93
1 parent adcc0c6
commit 7e7aa93
Show file tree

Hide file tree

Showing 13 changed files with 202 additions and 262 deletions.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
diff --git a/INSTALL.md b/INSTALL.md
diff --git a/PATENTS b/PATENTS
diff --git a/README.md b/README.md
@@ -1,5 +1,50 @@
-GUNN-15 training in Torch
-============================
+## Gradually Updated Neural Networks for Large-Scale Image Recognition
+
+Torch implementation for gradually updated neural networks:  
+[Gradually Updated Neural Networks for Large-Scale Image Recognition](http://www.cs.jhu.edu/~alanlab/Pubs18/qiao2018gunn.pdf)  
+[Siyuan Qiao](http://www.cs.jhu.edu/~syqiao/), [Zhishuai Zhang](https://zhishuai.xyz/), [Wei Shen](http://wei-shen.weebly.com/), [Bo Wang](https://bowang87.weebly.com/), [Alan Yuille](http://www.cs.jhu.edu/~ayuille/)  
+In Thirty-fifth International Conference on Machine Learning (ICML), 2018.
+
+The code is built on [fb.resnet.torch](https://github.com/facebook/fb.resnet.torch).
+
+```
+@inproceedings{Gunn,
+   title = {Gradually Updated Neural Networks for Large-Scale Image Recognition},
+   author = {Siyuan Qiao and Zhishuai Zhang and Wei Shen and Bo Wang and Alan L. Yuille},
+   booktitle = {International Conference on Machine Learning (ICML)},
+   year = {2018}
+}
+```
+
+### Introduction
+The state-of-the-art network architectures usually increase the depths by cascading convolutional layers or building blocks.
+Gradually Updated Neural Network (GUNN) presents an alternative method to increase the depth.
+It introduces computation orderings to the channels within convolutional
+layers or blocks, based on which it gradually computes the outputs in a channel-wise manner.
+The added orderings not only increase the depths and the learning capacities of the networks without any additional computation costs, but also eliminate the overlap singularities so that the networks are able to converge faster and perform
+better.
+
+<img src="intro.png"/>
+
+### Usage
+Install Torch and required packages following [here](https://github.com/facebook/fb.resnet.torch/blob/master/INSTALL.md).
+Training on CIFAR
 ```bash
-th main.lua -netType gunn -dataset cifar10 -batchSize 64 -nGPU 4 -nThreads 8 -shareGradInput true
+th main.lua -netType gunn-15 -dataset cifar10 -batchSize 64 -nGPU 4 -nThreads 8 -shareGradInput true -nEpochs 300
+```
+For CIFAR-100, please change cifar10 to cifar100 after -dataset. Training on ImageNet
+```
+th main.lua -netType gunn-18 -dataset imagenet -batchSize 256 -nGPU 4 -nThreads 16 -shareGradInput true -nEpochs 120 -data [data folder]
 ```
+
+### Results
+
+Model  | Parameters| CIFAR-10 | CIFAR-100
+-------|:---------:|:---------:|:----------:
+GUNN-15 | 1.6M | 4.15 | 20.45
+GUNN-24 | 29.6M | 3.21 | 16.69
+
+Model  | Parameters| ImageNet Top-1 | ImageNet Top-5
+-------|:---------:|:---------:|:----------:
+GUNN-18 | 28.9M | 21.65 | 5.87
+Wide GUNN-18 | 45.6M | 20.59 | 5.52
diff --git a/TRAINING.md b/TRAINING.md
diff --git a/datasets/cifar10.lua b/datasets/cifar10.lua
@@ -46,6 +46,7 @@ function CifarDataset:preprocess()
          t.ColorNormalize(meanstd),
          t.HorizontalFlip(0.5),
          t.RandomCrop(32, 4),
+         t.Jigsaw(),
       }
    elseif self.split == 'val' then
       return t.ColorNormalize(meanstd)

diff --git a/datasets/cifar100.lua b/datasets/cifar100.lua
@@ -57,6 +57,7 @@ function CifarDataset:preprocess()
          t.ColorNormalize(meanstd),
          t.HorizontalFlip(0.5),
          t.RandomCrop(32, 4),
+         t.Jigsaw(),
       }
    elseif self.split == 'val' then
       return t.ColorNormalize(meanstd)

diff --git a/datasets/transforms.lua b/datasets/transforms.lua
@@ -289,4 +289,29 @@ function M.ColorJitter(opt)
    return M.RandomOrder(ts)
 end
 
+function M.Jigsaw()
+   return function(input)
+      c, h, w = input:size(1), input:size(2), input:size(3)
+      if torch.uniform() < 1/3 then
+          return input
+      end
+      if torch.uniform() < 0.5 then
+         d = torch.random(1, w)
+         if d < w then
+            l, r = input:narrow(3, 1, d):clone(), input:narrow(3, d + 1, w - d):clone()
+            input:narrow(3, 1, w - d):copy(r)
+            input:narrow(3, w - d + 1, d):copy(l)
+         end
+      else
+         d = torch.random(1, h)
+         if d < h then
+            u, b = input:narrow(2, 1, d):clone(), input:narrow(2, d + 1, h - d):clone()
+            input:narrow(2, 1, h - d):copy(b)
+            input:narrow(2, h - d + 1, d):copy(u)
+         end
+      end
+      return input
+   end
+end
+
 return M
diff --git a/intro.png b/intro.png
diff --git a/models/GunnLayer.lua b/models/GunnLayer.lua
@@ -1,11 +1,10 @@
 require 'nn'
 require 'cunn'
 require 'cudnn'
-local nninit = require 'nninit'
 
 local GunnLayer, parent = torch.class('nn.GunnLayer', 'nn.Container')
 
-function GunnLayer:__init(nChannels, nSegments)
+function GunnLayer:__init(nChannels, nSegments, opt)
     parent.__init(self)
     self.train = true
     assert(nChannels % nSegments == 0)
@@ -21,13 +20,17 @@ function GunnLayer:__init(nChannels, nSegments)
         convLayer:add(cudnn.ReLU(true))
         convLayer:add(cudnn.SpatialConvolution(oChannels * 2, oChannels, 1, 1, 1, 1, 0, 0))
         convLayer:add(cudnn.SpatialBatchNormalization(oChannels))
-        local shortcut = nn.Sequential()
-        shortcut:add(cudnn.SpatialConvolution(nChannels, oChannels, 1, 1, 1, 1, 0, 0))
-        shortcut:add(cudnn.SpatialBatchNormalization(oChannels))
-        local module = nn.Sequential()
-        module:add(nn.ConcatTable():add(shortcut):add(convLayer))
-        module:add(nn.CAddTable(true))
-        table.insert(self.modules, module)
+        if opt.dataset == 'imagenet' then
+            table.insert(self.modules, convLayer)
+        else
+            local shortcut = nn.Sequential()
+            shortcut:add(cudnn.SpatialConvolution(nChannels, oChannels, 1, 1, 1, 1, 0, 0))
+            shortcut:add(cudnn.SpatialBatchNormalization(oChannels))
+            local module = nn.Sequential()
+            module:add(nn.ConcatTable():add(shortcut):add(convLayer))
+            module:add(nn.CAddTable(true))
+            table.insert(self.modules, module)
+        end
     end
     self.inputContiguous = torch.CudaTensor()
     self.inputTable = {}