horovod/common/wire/mpi_message.fbs

// Copyright 2016 The TensorFlow Authors. All Rights Reserved.
// Modifications copyright (C) 2017 Uber Technologies, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// =============================================================================

namespace horovod.common.wire;

// Supported data types.
enum MPIDataType:byte {
    HOROVOD_UINT8 = 0,
    HOROVOD_INT8 = 1,
    HOROVOD_UINT16 = 2,
    HOROVOD_INT16 = 3,
    HOROVOD_INT32 = 4,
    HOROVOD_INT64 = 5,
    HOROVOD_FLOAT16 = 6,
    HOROVOD_FLOAT32 = 7,
    HOROVOD_FLOAT64 = 8,
    HOROVOD_BOOL = 9
}

// An MPIRequest is a message sent from a rank greater than zero to the
// coordinator (rank zero), informing the coordinator of an operation that
// the rank wants to do and the tensor that it wants to apply the operation to.
enum MPIRequestType:byte {
    ALLREDUCE = 0,
    ALLGATHER = 1,
    BROADCAST = 2
}
table MPIRequest {
    // The request rank is necessary to create a consistent ordering of results,
    // for example in the allgather where the order of outputs should be sorted
    // by rank.
    request_rank:int;
    request_type:MPIRequestType;
    tensor_type:MPIDataType;
    tensor_name:string;

    // Root rank is necessary for broadcast operation.
    root_rank:int;

    // Device this request is made on.
    device:int;

    // We use a repeated integer instead of a TensorShapeProto because linking directly
    // to TensorFlow protos causes issues. See the comment for MPIDataType.
    tensor_shape:[long];
}
table MPIRequestList {
    requests:[MPIRequest];

    // Flag indicating if worker is requesting a shutdown.
    shutdown:bool;
}

// An MPIResponse is a message sent from the coordinator (rank zero) to a rank
// greater than zero, informing the rank of an operation should be performed
// now. If the operation requested would result in an error (for example, due
// to a type or shape mismatch), then the MPIResponse can contain an error and
// an error message instead.
enum MPIResponseType:byte {
    ALLREDUCE = 0,
    ALLGATHER = 1,
    BROADCAST = 2,
    ERROR = 3
}
table MPIResponse {
    response_type:MPIResponseType;

    // If tensor_names has more than one entry, this is a fused operation and the
    // rest of the fields are the same.
    tensor_names:[string];

    // Empty unless response_type is ERROR.
    error_message:string;

    // List of devices participating in this operation.
    devices:[int];

    // Empty unless response_type is ALLGATHER.
    // These tensor sizes are the dimension zero sizes of all the input matrices,
    // indexed by the rank.
    tensor_sizes:[long];
}
table MPIResponseList {
    responses:[MPIResponse];

    // Flag indicating if worker is requested to shutdown.
    shutdown:bool;
}