forked from horovod/horovod
-
Notifications
You must be signed in to change notification settings - Fork 0
/
common.py
56 lines (47 loc) · 2.43 KB
/
common.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
# Modifications copyright (C) 2018 Uber Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
def mpi_env_rank_and_size():
"""Get MPI rank and size from environment variables and return them as a
tuple of integers.
Most MPI implementations have an `mpirun` or `mpiexec` command that will
run an MPI executable and set up all communication necessary between the
different processors. As part of that set up, they will set environment
variables that contain the rank and size of the MPI_COMM_WORLD
communicator. We can read those environment variables from Python in order
to ensure that `hvd.rank()` and `hvd.size()` return the expected values.
Since MPI is just a standard, not an implementation, implementations
typically choose their own environment variable names. This function tries
to support several different implementation, but really it only needs to
support whatever implementation we want to use for the TensorFlow test
suite.
If this is not running under MPI, then defaults of rank zero and size one
are returned. (This is appropriate because when you call MPI_Init in an
application not started with mpirun, it will create a new independent
communicator with only one process in it.)
"""
rank_env = 'PMI_RANK OMPI_COMM_WORLD_RANK'.split()
size_env = 'PMI_SIZE OMPI_COMM_WORLD_SIZE'.split()
for rank_var, size_var in zip(rank_env, size_env):
rank = os.environ.get(rank_var)
size = os.environ.get(size_var)
if rank is not None and size is not None:
return int(rank), int(size)
# Default to rank zero and size one if there are no environment variables
return 0, 1