Skip to content

Commit

Permalink
examples: some cleaning
Browse files Browse the repository at this point in the history
  • Loading branch information
ymjiang committed Jul 10, 2019
1 parent 9fdd497 commit b825042
Show file tree
Hide file tree
Showing 6 changed files with 23 additions and 28 deletions.
6 changes: 3 additions & 3 deletions example/keras/keras_imagenet_resnet50.py
Expand Up @@ -33,8 +33,8 @@
help='tensorboard log directory')
parser.add_argument('--checkpoint-format', default='./checkpoint-{epoch}.h5',
help='checkpoint file format')
parser.add_argument('--fp16-allreduce', action='store_true', default=False,
help='use fp16 compression during allreduce')
parser.add_argument('--fp16-pushpull', action='store_true', default=False,
help='use fp16 compression during pushpull')

# Default settings from https://arxiv.org/abs/1706.02677.
parser.add_argument('--batch-size', type=int, default=32,
Expand Down Expand Up @@ -96,7 +96,7 @@
model = keras.applications.resnet50.ResNet50(weights=None)

# BytePS: (optional) compression algorithm.
compression = bps.Compression.fp16 if args.fp16_allreduce else bps.Compression.none
compression = bps.Compression.fp16 if args.fp16_pushpull else bps.Compression.none

# Restore from a previous checkpoint, if initial_epoch is specified.
# BytePS: restore on the first worker which will broadcast both model and optimizer weights
Expand Down
9 changes: 3 additions & 6 deletions example/pytorch/benchmark_byteps.py
Expand Up @@ -14,8 +14,8 @@
# Benchmark settings
parser = argparse.ArgumentParser(description='PyTorch Synthetic Benchmark',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--fp16-allreduce', action='store_true', default=False,
help='use fp16 compression during allreduce')
parser.add_argument('--fp16-pushpull', action='store_true', default=False,
help='use fp16 compression during byteps pushpull')

parser.add_argument('--model', type=str, default='resnet50',
help='model to benchmark')
Expand Down Expand Up @@ -60,7 +60,7 @@
optimizer = optim.SGD(model.parameters(), lr=0.01)

# BytePS: (optional) compression algorithm.
compression = bps.Compression.fp16 if args.fp16_allreduce else bps.Compression.none
compression = bps.Compression.fp16 if args.fp16_pushpull else bps.Compression.none

# BytePS: wrap optimizer with DistributedOptimizer.
optimizer = bps.DistributedOptimizer(optimizer,
Expand Down Expand Up @@ -127,6 +127,3 @@ def log(s, nl=True):
log('Img/sec per %s: %.1f +-%.1f' % (device, img_sec_mean, img_sec_conf))
log('Total img/sec on %d %s(s): %.1f +-%.1f' %
(bps.size(), device, bps.size() * img_sec_mean, bps.size() * img_sec_conf))

# to let arnold shutdown all tasks
raise Exception
2 changes: 0 additions & 2 deletions example/pytorch/microbenchmark-byteps.py
Expand Up @@ -44,10 +44,8 @@ def log(s, nl=True):

def benchmark(tensor, average, name):
if not args.no_wait and bps.rank() == 0:
# let other workers submit allreduce request first
time.sleep(0.01)
start = time.time()
# do not use allreduce_() as it polls every 1ms
handle = push_pull_async_inplace(tensor, average, name)
while True:
if poll(handle):
Expand Down
22 changes: 11 additions & 11 deletions example/pytorch/train_imagenet_resnet50_byteps.py
Expand Up @@ -24,11 +24,11 @@
help='tensorboard log directory')
parser.add_argument('--checkpoint-format', default='./checkpoint-{epoch}.pth.tar',
help='checkpoint file format')
parser.add_argument('--fp16-allreduce', action='store_true', default=False,
help='use fp16 compression during allreduce')
parser.add_argument('--batches-per-allreduce', type=int, default=1,
parser.add_argument('--fp16-pushpull', action='store_true', default=False,
help='use fp16 compression during pushpull')
parser.add_argument('--batches-per-pushpull', type=int, default=1,
help='number of batches processed locally before '
'executing allreduce across workers; it multiplies '
'executing pushpull across workers; it multiplies '
'total batch size.')

# Default settings from https://arxiv.org/abs/1706.02677.
Expand All @@ -55,7 +55,7 @@
args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()

allreduce_batch_size = args.batch_size * args.batches_per_allreduce
pushpull_batch_size = args.batch_size * args.batches_per_pushpull

bps.init()
torch.manual_seed(args.seed)
Expand Down Expand Up @@ -101,7 +101,7 @@
train_sampler = torch.utils.data.distributed.DistributedSampler(
train_dataset, num_replicas=bps.size(), rank=bps.rank())
train_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=allreduce_batch_size,
train_dataset, batch_size=pushpull_batch_size,
sampler=train_sampler, **kwargs)

val_dataset = \
Expand All @@ -127,20 +127,20 @@
model.cuda()

# BytePS: scale learning rate by the number of GPUs.
# Gradient Accumulation: scale learning rate by batches_per_allreduce
# Gradient Accumulation: scale learning rate by batches_per_pushpull
optimizer = optim.SGD(model.parameters(),
lr=(args.base_lr *
args.batches_per_allreduce * bps.size()),
args.batches_per_pushpull * bps.size()),
momentum=args.momentum, weight_decay=args.wd)

# BytePS: (optional) compression algorithm.
compression = bps.Compression.fp16 if args.fp16_allreduce else bps.Compression.none
compression = bps.Compression.fp16 if args.fp16_pushpull else bps.Compression.none

# BytePS: wrap optimizer with DistributedOptimizer.
optimizer = bps.DistributedOptimizer(
optimizer, named_parameters=model.named_parameters(),
compression=compression,
backward_passes_per_step=args.batches_per_allreduce)
backward_passes_per_step=args.batches_per_pushpull)

# Restore from a previous checkpoint, if initial_epoch is specified.
# BytePS: restore on the first worker which will broadcast weights to other workers.
Expand Down Expand Up @@ -233,7 +233,7 @@ def adjust_learning_rate(epoch, batch_idx):
else:
lr_adj = 1e-3
for param_group in optimizer.param_groups:
param_group['lr'] = args.base_lr * bps.size() * args.batches_per_allreduce * lr_adj
param_group['lr'] = args.base_lr * bps.size() * args.batches_per_pushpull * lr_adj


def accuracy(output, target):
Expand Down
6 changes: 3 additions & 3 deletions example/pytorch/train_mnist_byteps.py
Expand Up @@ -26,8 +26,8 @@
help='random seed (default: 42)')
parser.add_argument('--log-interval', type=int, default=10, metavar='N',
help='how many batches to wait before logging training status')
parser.add_argument('--fp16-allreduce', action='store_true', default=False,
help='use fp16 compression during allreduce')
parser.add_argument('--fp16-pushpull', action='store_true', default=False,
help='use fp16 compression during pushpull')
args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()

Expand Down Expand Up @@ -96,7 +96,7 @@ def forward(self, x):
momentum=args.momentum)

# BytePS: (optional) compression algorithm.
compression = bps.Compression.fp16 if args.fp16_allreduce else bps.Compression.none
compression = bps.Compression.fp16 if args.fp16_pushpull else bps.Compression.none

# BytePS: wrap optimizer with DistributedOptimizer.
optimizer = bps.DistributedOptimizer(optimizer,
Expand Down
6 changes: 3 additions & 3 deletions example/tensorflow/synthetic_benchmark.py
Expand Up @@ -13,8 +13,8 @@
# Benchmark settings
parser = argparse.ArgumentParser(description='TensorFlow Synthetic Benchmark',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('--fp16-allreduce', action='store_true', default=False,
help='use fp16 compression during allreduce')
parser.add_argument('--fp16-pushpull', action='store_true', default=False,
help='use fp16 compression during pushpull')

parser.add_argument('--model', type=str, default='ResNet50',
help='model to benchmark')
Expand Down Expand Up @@ -58,7 +58,7 @@
opt = tf.train.GradientDescentOptimizer(0.01)

# BytePS: (optional) compression algorithm.
compression = bps.Compression.fp16 if args.fp16_allreduce else bps.Compression.none
compression = bps.Compression.fp16 if args.fp16_pushpull else bps.Compression.none

# BytePS: wrap optimizer with DistributedOptimizer.
opt = bps.DistributedOptimizer(opt, compression=compression)
Expand Down

0 comments on commit b825042

Please sign in to comment.