Skip to content

Commit

Permalink
Merge pull request #1174 from GY-GitCode/24-5-24-dev
Browse files Browse the repository at this point in the history
  • Loading branch information
lzjpaul committed May 31, 2024
2 parents 3ac79d1 + 90357af commit d46f338
Showing 1 changed file with 87 additions and 0 deletions.
87 changes: 87 additions & 0 deletions examples/cnn_ms/autograd/resnet_dist.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#

# the code is modified from
# https://github.com/pytorch/vision/blob/master/torchvision/models/resnet.py

from singa import autograd
from singa import tensor
from singa import device
from singa import opt

import numpy as np
from tqdm import trange

if __name__ == "__main__":
sgd = opt.SGD(lr=0.1, momentum=0.9, weight_decay=1e-5)
sgd = opt.DistOpt(sgd)

if (sgd.global_rank == 0):
print("Start intialization...........", flush=True)

dev = device.create_cuda_gpu_on(sgd.local_rank)

from resnet import resnet50
model = resnet50()

niters = 100
batch_size = 32
IMG_SIZE = 224

tx = tensor.Tensor((batch_size, 3, IMG_SIZE, IMG_SIZE), dev)
ty = tensor.Tensor((batch_size,), dev, tensor.int32)
autograd.training = True
x = np.random.randn(batch_size, 3, IMG_SIZE, IMG_SIZE).astype(np.float32)
y = np.random.randint(0, 1000, batch_size, dtype=np.int32)
tx.copy_from_numpy(x)
ty.copy_from_numpy(y)

import time

dev.Sync()
start = time.time()
fd = 0
softmax = 0
update = 0
with trange(niters) as t:
for _ in t:
dev.Sync()
tick = time.time()
x = model(tx)
dev.Sync()
fd += time.time() - tick
tick = time.time()
loss = autograd.softmax_cross_entropy(x, ty)
dev.Sync()
softmax += time.time() - tick
sgd.backward_and_update(loss)

dev.Sync()
end = time.time()
throughput = float(sgd.world_size * niters * batch_size) / (end - start)
titer = (end - start) / float(niters)
tforward = float(fd) / float(niters)
tsoftmax = float(softmax) / float(niters)
tbackward = titer - tforward - tsoftmax

if (sgd.global_rank == 0):
print("\nThroughput = {} per second".format(throughput), flush=True)
print("Total={}, forward={}, softmax={}, backward={}".format(
titer, tforward, tsoftmax, tbackward),
flush=True)

0 comments on commit d46f338

Please sign in to comment.