Merge eff764a into 299637b

chainer · Apr 17, 2019 · cb8748b · cb8748b
2 parents 299637b + eff764a
commit cb8748b
Show file tree

Hide file tree

Showing 3 changed files with 40 additions and 27 deletions.
diff --git a/examples/cifar/README.md b/examples/cifar/README.md
@@ -13,7 +13,7 @@ This uses the VGG-style network from [here](http://torch.ch/blog/2015/07/30/cifa
 
 No data augmentation is used and the classification accuracy on the CIFAR-10 test set for the VGG-style model should reach approximately 89% after 200 iterations or so.
 
-If you want to run this example on the N-th GPU, pass `--gpu=N` to the script. To run on CPU, pass `--gpu=-1`.
+If you want to run this example on the N-th GPU, pass `--device=N` to the script. To run on CPU, pass `--device=-1`.
 
 For example, to run the default model, which uses CIFAR-10 and GPU 0:
 ```
@@ -22,5 +22,5 @@ train_cifar.py
 
 to run the CIFAR-100 dataset on GPU 1:
 ```
-train_cifar.py --gpu=1 --dataset='cifar100'
+train_cifar.py --device=1 --dataset='cifar100'
 ```
diff --git a/examples/cifar/train_cifar.py b/examples/cifar/train_cifar.py
@@ -1,6 +1,7 @@
 import argparse
 
 import chainer
+from chainer import backend
 import chainer.links as L
 from chainer import training
 from chainer.training import extensions
@@ -14,25 +15,35 @@
 
 def main():
     parser = argparse.ArgumentParser(description='Chainer CIFAR example:')
-    parser.add_argument('--dataset', '-d', default='cifar10',
+    parser.add_argument('--dataset', default='cifar10',
                         help='The dataset to use: cifar10 or cifar100')
     parser.add_argument('--batchsize', '-b', type=int, default=64,
                         help='Number of images in each mini-batch')
     parser.add_argument('--learnrate', '-l', type=float, default=0.05,
                         help='Learning rate for SGD')
     parser.add_argument('--epoch', '-e', type=int, default=300,
                         help='Number of sweeps over the dataset to train')
-    parser.add_argument('--gpu', '-g', type=int, default=0,
-                        help='GPU ID (negative value indicates CPU)')
+    parser.add_argument('--device', '-d', type=str, default='-1',
+                        help='Device specifier. Either ChainerX device '
+                        'specifier or an integer. If non-negative integer, '
+                        'CuPy arrays with specified device id are used. If '
+                        'negative integer, NumPy arrays are used')
     parser.add_argument('--out', '-o', default='result',
                         help='Directory to output the result')
     parser.add_argument('--resume', '-r', default='',
                         help='Resume the training from snapshot')
     parser.add_argument('--early-stopping', type=str,
                         help='Metric to watch for early stopping')
+    group = parser.add_argument_group('deprecated arguments')
+    group.add_argument('--gpu', '-g', dest='device',
+                       type=int, nargs='?', const=0,
+                       help='GPU ID (negative value indicates CPU)')
     args = parser.parse_args()
 
-    print('GPU: {}'.format(args.gpu))
+    device = chainer.get_device(args.device)
+    device.use()
+
+    print('Device: {}'.format(device))
     print('# Minibatch-size: {}'.format(args.batchsize))
     print('# epoch: {}'.format(args.epoch))
     print('')
@@ -51,10 +62,7 @@ def main():
     else:
         raise RuntimeError('Invalid dataset choice.')
     model = L.Classifier(models.VGG.VGG(class_labels))
-    if args.gpu >= 0:
-        # Make a specified GPU current
-        chainer.backends.cuda.get_device_from_id(args.gpu).use()
-        model.to_gpu()  # Copy the model to the GPU
+    model.to_device(device)
 
     optimizer = chainer.optimizers.MomentumSGD(args.learnrate)
     optimizer.setup(model)
@@ -73,19 +81,21 @@ def main():
 
     # Set up a trainer
     updater = training.updaters.StandardUpdater(
-        train_iter, optimizer, device=args.gpu)
+        train_iter, optimizer, device=device)
     trainer = training.Trainer(updater, stop_trigger, out=args.out)
 
     # Evaluate the model with the test dataset for each epoch
-    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu))
+    trainer.extend(extensions.Evaluator(test_iter, model, device=device))
 
     # Reduce the learning rate by half every 25 epochs.
     trainer.extend(extensions.ExponentialShift('lr', 0.5),
                    trigger=(25, 'epoch'))
 
     # Dump a computational graph from 'loss' variable at the first iteration
     # The "main" refers to the target link of the "main" optimizer.
-    trainer.extend(extensions.DumpGraph('main/loss'))
+    # TODO(imanishi): Support for ChainerX
+    if not isinstance(device, backend.ChainerxDevice):
+        trainer.extend(extensions.DumpGraph('main/loss'))
 
     # Take a snapshot at each epoch
     trainer.extend(extensions.snapshot(

diff --git a/examples/cifar/train_cifar_custom_loop.py b/examples/cifar/train_cifar_custom_loop.py
@@ -23,25 +23,35 @@
 
 def main():
     parser = argparse.ArgumentParser(description='Chainer CIFAR example:')
-    parser.add_argument('--dataset', '-d', default='cifar10',
+    parser.add_argument('--dataset', default='cifar10',
                         help='The dataset to use: cifar10 or cifar100')
     parser.add_argument('--batchsize', '-b', type=int, default=64,
                         help='Number of images in each mini-batch')
     parser.add_argument('--learnrate', '-l', type=float, default=0.05,
                         help='Learning rate for SGD')
     parser.add_argument('--epoch', '-e', type=int, default=300,
                         help='Number of sweeps over the dataset to train')
-    parser.add_argument('--gpu', '-g', type=int, default=0,
-                        help='GPU ID (negative value indicates CPU)')
+    parser.add_argument('--device', '-d', type=str, default='-1',
+                        help='Device specifier. Either ChainerX device '
+                        'specifier or an integer. If non-negative integer, '
+                        'CuPy arrays with specified device id are used. If '
+                        'negative integer, NumPy arrays are used')
     parser.add_argument('--out', '-o', default='result',
                         help='Directory to output the result')
     parser.add_argument('--test', action='store_true',
                         help='Use tiny datasets for quick tests')
     parser.add_argument('--resume', '-r', type=str,
                         help='Directory that has `vgg.model` and `vgg.state`')
+    group = parser.add_argument_group('deprecated arguments')
+    group.add_argument('--gpu', '-g', dest='device',
+                       type=int, nargs='?', const=0,
+                       help='GPU ID (negative value indicates CPU)')
     args = parser.parse_args()
 
-    print('GPU: {}'.format(args.gpu))
+    device = chainer.get_device(args.device)
+    device.use()
+
+    print('Device: {}'.format(device))
     print('# Minibatch-size: {}'.format(args.batchsize))
     print('# epoch: {}'.format(args.epoch))
     print('')
@@ -68,10 +78,7 @@ def main():
     test_count = len(test)
 
     model = L.Classifier(models.VGG.VGG(class_labels))
-    if args.gpu >= 0:
-        # Make a specified GPU current
-        chainer.backends.cuda.get_device_from_id(args.gpu).use()
-        model.to_gpu()  # Copy the model to the GPU
+    model.to_device(device)
 
     optimizer = chainer.optimizers.MomentumSGD(args.learnrate)
     optimizer.setup(model)
@@ -102,9 +109,7 @@ def main():
             optimizer.lr *= 0.5
             print('Reducing learning rate to: {}'.format(optimizer.lr))
 
-        x_array, t_array = convert.concat_examples(batch, args.gpu)
-        x = chainer.Variable(x_array)
-        t = chainer.Variable(t_array)
+        x, t = convert.concat_examples(batch, device)
         optimizer.update(model, x, t)
         sum_loss += float(model.loss.array) * len(t)
         sum_acc += float(model.accuracy.array) * len(t)
@@ -120,9 +125,7 @@ def main():
                 # This is optional but can reduce computational overhead.
                 with chainer.using_config('enable_backprop', False):
                     for batch in test_iter:
-                        x, t = convert.concat_examples(batch, args.gpu)
-                        x = chainer.Variable(x)
-                        t = chainer.Variable(t)
+                        x, t = convert.concat_examples(batch, device)
                         loss = model(x, t)
                         sum_loss += float(loss.array) * len(t)
                         sum_acc += float(model.accuracy.array) * len(t)