add mp to mz

hunterlang · Feb 5, 2016 · 49b0525 · 49b0525
commit 49b0525
Show file tree

Hide file tree

Showing 4 changed files with 230 additions and 0 deletions.
diff --git a/README.md b/README.md
@@ -0,0 +1,59 @@
+# Model 
+This is an implementation of the deep residual network used
+for [Mini-Places2](http://6.869.csail.mit.edu/fa15/project.html) as
+described in [He et. al., "Deep Residual Learning for Image
+Recognition"](http://arxiv.org/abs/1512.03385).  The model is
+structured as a very deep network with skip connections designed to
+have convolutional parameters adjusting to residual activations. The
+training protocol uses minimal pre-processing (mean subtraction) and
+very simple data augmentation (shuffling, flipping, and cropping).
+All model parameters (even batch norm parameters) are updated using
+simple stochastic gradient descent with weight decay. The learning
+rate is dropped only twice (at 90 and 135 epochs in the paper).
+
+### Acknowledgments
+Many thanks to Dr. He and his team at MSRA for their helpful input in
+replicating the model as described in their paper.
+
+### Model script
+The model train script is included at ([miniplaces_msra.py](./miniplaces_msra.py)).
+
+### Trained weights
+The trained weights file can be downloaded from AWS
+([miniplaces_msra_e66.pkl](https://s3-us-west-1.amazonaws.com/nervana-modelzoo/miniplaces_msra_e66.pkl))
+
+### Performance
+Training this model with the options described below should be able to achieve roughly 17.5% top-5
+error using only mean subtraction, random cropping, and random flips. With multiscale evaluation (see the evaluation script),
+the model should achieve roughly 14.6% top-5 error.
+
+## Instructions
+This script was tested with [neon version 1.2](https://github.com/NervanaSystems/neon/tree/v1.2.0).
+Make sure that your local repo is synced to this commit and run the [installation
+procedure](http://neon.nervanasys.com/docs/latest/user_guide.html#installation) before proceeding.
+Commit SHA for v1.2 is  `385483881ee1fe1f0445fc78d7edf5b8ddc5c8c5`
+
+This example uses the `ImageLoader` module to load the images for consumption while applying random
+cropping, flipping, and shuffling.  Prior to beginning training, you need to write out the padded
+mini-places2 images into a macrobatch repository. See [miniplaces_batchwriter.sh](./miniplaces_batchwriter.sh).
+
+Note that it is good practice to choose your `data_dir` to be local to your machine in order to
+avoid having `ImageLoader` module perform reads over the network.
+
+Once the batches have been written out, you may initiate training:
+```
+miniplaces_msra.py -r 0 -vv \
+    --log <logfile> \
+    --epochs 80 \
+    --save_path <model-save-path> \
+    --eval_freq 1 \
+    --backend gpu \
+    --data_dir <path-to-saved-batches>
+```
+
+If you just want to run evaluation, you can use the much simpler script that loads the serialized
+model and evaluates it on the validation set:
+
+```
+miniplaces_eval.py -vv --model_file <model-save-path>
+```
diff --git a/miniplaces_batchwriter.sh b/miniplaces_batchwriter.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+### CHANGE THESE VARIABLES
+SRC_DIR=/mnt/data/miniplaces  # where data.tar.gz, development_kit.tar.gz live
+IMG_DIR=/scratch/minitest  # where images will be written out to
+OUT_DIR=/scratch/minitest/macro  # where macrobatches will be written out to
+
+# Unpack the images
+cd $IMG_DIR
+tar -xzf $SRC_DIR/data.tar.gz images --strip-components=1
+
+# Create the label mapping files
+tar -xzf $SRC_DIR/development_kit.tar.gz development_kit/data/train.txt -O | tr " " , | shuf | gzip > $IMG_DIR/train_file.csv.gz
+tar -xzf $SRC_DIR/development_kit.tar.gz development_kit/data/val.txt -O | tr " " , | gzip > $IMG_DIR/val_file.csv.gz
+
+# Write the batches
+python neon/data/batch_writer.py --data_dir $OUT_DIR --image_dir $IMG_DIR --set_type csv
+
diff --git a/miniplaces_eval.py b/miniplaces_eval.py
@@ -0,0 +1,76 @@
+from neon.util.argparser import NeonArgparser
+from neon.initializers import Kaiming, IdentityInit
+from neon.layers import Conv, Pooling, GeneralizedCost, Affine, ResidualModule, Activation
+from neon.optimizers import GradientDescentMomentum, Schedule
+from neon.transforms import Rectlin, Softmax, CrossEntropyMulti, TopKMisclassification
+from neon.models import Model
+from neon.data import ImageLoader
+from neon.callbacks.callbacks import Callbacks
+
+# parse the command line arguments (generates the backend)
+parser = NeonArgparser(__doc__)
+parser.add_argument('--network', default='plain', choices=['plain', 'resnet'],
+                    help='type of network to create (plain or resnet)')
+parser.add_argument('--depth', type=int, default=9,
+                    help='depth of each stage (network depth will be 6n+2)')
+args = parser.parse_args()
+
+# setup data provider
+imgset_options = dict(inner_size=112, scale_range=140, repo_dir=args.data_dir)
+
+#train = ImageLoader(set_name='train', shuffle=True, do_transforms=True,
+#                   inner_size=112, scale_range=(128,240), repo_dir=args.data_dir)
+
+
+
+def conv_params(fsize, nfm, stride=1, relu=True):
+    return dict(fshape=(fsize, fsize, nfm), strides=stride, padding=(1 if fsize > 1 else 0),
+                activation=(Rectlin() if relu else None),
+                init=Kaiming(local=True),
+                batch_norm=True)
+
+
+def module_factory(nfm, stride=1):
+    projection = None if stride == 1 else IdentityInit()
+    module = [Conv(**conv_params(3, nfm, stride=stride)),
+              Conv(**conv_params(3, nfm, relu=False))]
+    module = module if args.network == 'plain' else [ResidualModule(module, projection)]
+    module.append(Activation(Rectlin()))
+    return module
+
+
+# Structure of the deep residual part of the network:
+# args.depth modules of 2 convolutional layers each at feature map depths of 16, 32, 64
+nfms = [2**(stage + 5) for stage in sorted(range(4) * args.depth)]
+strides = [1] + [1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1])]
+
+# Now construct the network
+from neon.layers import ColorNoise
+#layers = [ColorNoise()]
+
+# layers.append(Affine(nout=100, init=Kaiming(local=False), batch_norm=True, activation=Softmax()))
+
+scales = [112, 128, 160, 240]
+
+for scale in scales:
+    print scale
+
+    layers = []
+    layers += [Conv(**conv_params(7, 32, 2))]
+    for nfm, stride in zip(nfms, strides):
+        layers.append(module_factory(nfm, stride))
+    layers.append(Pooling(7, op='avg'))
+
+    layers.append(Conv(fshape=(1,1,100), init=Kaiming(local=True), batch_norm=True))
+    layers.append(Pooling(fshape='all', op='avg'))
+    layers.append(Activation(Softmax()))
+
+    model = Model(layers=layers)
+    test = ImageLoader(set_name='validation', shuffle=False, do_transforms=False, inner_size=scale,
+                       scale_range=scale, repo_dir=args.data_dir)
+
+    model.load_params("/home/users/hunter/bigfeat_dropout.pkl")
+
+    softmaxes = model.get_outputs(test)
+    from neon.util.persist import save_obj
+    save_obj(softmaxes, "bigfeat_dropout_SM_{}.pkl".format(scale))
diff --git a/miniplaces_msra.py b/miniplaces_msra.py
@@ -0,0 +1,77 @@
+from neon.util.argparser import NeonArgparser
+from neon.initializers import Kaiming, IdentityInit
+from neon.layers import Conv, Pooling, GeneralizedCost, Affine, ResidualModule, Activation, Dropout
+from neon.optimizers import GradientDescentMomentum, Schedule
+from neon.transforms import Rectlin, Softmax, CrossEntropyMulti, TopKMisclassification
+from neon.models import Model
+from neon.data import ImageLoader
+from neon.callbacks.callbacks import Callbacks
+
+# parse the command line arguments (generates the backend)
+parser = NeonArgparser(__doc__)
+parser.add_argument('--network', default='plain', choices=['plain', 'resnet'],
+                    help='type of network to create (plain or resnet)')
+parser.add_argument('--depth', type=int, default=9,
+                    help='depth of each stage (network depth will be 6n+2)')
+args = parser.parse_args()
+
+# setup data provider
+imgset_options = dict(inner_size=112, scale_range=140, repo_dir=args.data_dir)
+train = ImageLoader(set_name='train', shuffle=True, do_transforms=True,
+                   inner_size=112, scale_range=(128,240), repo_dir=args.data_dir)
+
+test = ImageLoader(set_name='validation', shuffle=False, do_transforms=False,
+                  inner_size=112, scale_range=0, repo_dir=args.data_dir)
+
+
+def conv_params(fsize, nfm, stride=1, relu=True):
+    return dict(fshape=(fsize, fsize, nfm), strides=stride, padding=(1 if fsize > 1 else 0),
+                activation=(Rectlin() if relu else None),
+                init=Kaiming(local=True),
+                batch_norm=True)
+
+
+def module_factory(nfm, stride=1):
+    projection = None if stride == 1 else IdentityInit()
+    module = [Conv(**conv_params(3, nfm, stride=stride)),
+              Conv(**conv_params(3, nfm, relu=False))]
+    module = module if args.network == 'plain' else [ResidualModule(module, projection)]
+    module.append(Activation(Rectlin()))
+    return module
+
+
+# Structure of the deep residual part of the network:
+# args.depth modules of 2 convolutional layers each at feature map depths of 16, 32, 64
+nfms = [2**(stage + 5) for stage in sorted(range(4) * args.depth)]
+strides = [1] + [1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1])]
+
+# Now construct the network
+from neon.layers import ColorNoise
+#layers = [ColorNoise()]
+layers = []
+layers += [Conv(**conv_params(3, 32, 2))]
+for nfm, stride in zip(nfms, strides):
+    layers.append(module_factory(nfm, stride))
+layers.append(Pooling(7, op='avg'))
+
+# for multiscale evaluation, uncomment these lines and comment out the 
+# affine layer. then change the scale_range of the validation set ImageLoader to 
+# be scale_range=desired_image_size. then use model.get_outputs to get the final softmax
+# outputs on the validation set.
+#layers.append(Conv(fshape=(1,1,100), init=Kaiming(local=True), batch_norm=True))
+#layers.append(Pooling(fshape='all', op='avg'))
+#layers.append(Activation(Softmax()))
+
+layers.append(Affine(nout=100, init=Kaiming(local=False), batch_norm=True, activation=Softmax()))
+
+model = Model(layers=layers)
+opt = GradientDescentMomentum(0.1, 0.9, wdecay=0.0005, schedule=Schedule([40, 70], 0.1))
+
+# configure callbacks
+valmetric = TopKMisclassification(k=5)
+callbacks = Callbacks(model, train, eval_set=test, metric=valmetric, **args.callback_args)
+callbacks.add_deconv_callback(train, test)
+
+cost = GeneralizedCost(costfunc=CrossEntropyMulti())
+
+model.fit(train, optimizer=opt, num_epochs=args.epochs, cost=cost, callbacks=callbacks)