Skip to content

Commit

Permalink
fixup! feat(proto): Define CommonReplicaSpec in common.proto
Browse files Browse the repository at this point in the history
  • Loading branch information
MortalHappiness committed May 18, 2024
1 parent b2225d2 commit 5cf6113
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 3 deletions.
14 changes: 13 additions & 1 deletion flyteidl/protos/flyteidl/plugins/kubeflow/mpi.proto
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ package flyteidl.plugins.kubeflow;

option go_package = "github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/plugins/kubeflow";

import "flyteidl/core/tasks.proto";
import "flyteidl/plugins/kubeflow/common.proto";

// Proto for plugin that enables distributed training using https://github.com/kubeflow/mpi-operator
Expand All @@ -25,7 +26,18 @@ message DistributedMPITrainingTask {

// Replica specification for distributed MPI training
message DistributedMPITrainingReplicaSpec {
reserved 1, 2, 3, 4;
// 1~4 deprecated. Use common instead.
// Number of replicas
int32 replicas = 1 [deprecated = true];

// Image used for the replica group
string image = 2 [deprecated = true];

// Resources required for the replica group
core.Resources resources = 3 [deprecated = true];

// Restart policy determines whether pods will be restarted when they exit
RestartPolicy restart_policy = 4 [deprecated = true];

// MPI sometimes requires different command set for different replica groups
repeated string command = 5;
Expand Down
14 changes: 13 additions & 1 deletion flyteidl/protos/flyteidl/plugins/kubeflow/pytorch.proto
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ package flyteidl.plugins.kubeflow;

option go_package = "github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/plugins/kubeflow";

import "flyteidl/core/tasks.proto";
import "flyteidl/plugins/kubeflow/common.proto";

// Custom proto for torch elastic config for distributed training using
Expand Down Expand Up @@ -34,7 +35,18 @@ message DistributedPyTorchTrainingTask {
}

message DistributedPyTorchTrainingReplicaSpec {
reserved 1, 2, 3, 4;
// 1~4 deprecated. Use common instead.
// Number of replicas
int32 replicas = 1 [deprecated = true];

// Image used for the replica group
string image = 2 [deprecated = true];

// Resources required for the replica group
core.Resources resources = 3 [deprecated = true];

// Restart policy determines whether pods will be restarted when they exit
RestartPolicy restart_policy = 4 [deprecated = true];

// The common replica spec
CommonReplicaSpec common = 5;
Expand Down
14 changes: 13 additions & 1 deletion flyteidl/protos/flyteidl/plugins/kubeflow/tensorflow.proto
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ package flyteidl.plugins.kubeflow;

option go_package = "github.com/flyteorg/flyte/flyteidl/gen/pb-go/flyteidl/plugins/kubeflow";

import "flyteidl/core/tasks.proto";
import "flyteidl/plugins/kubeflow/common.proto";

// Proto for plugin that enables distributed training using https://github.com/kubeflow/tf-operator
Expand All @@ -27,7 +28,18 @@ message DistributedTensorflowTrainingTask {
}

message DistributedTensorflowTrainingReplicaSpec {
reserved 1, 2, 3, 4;
// 1~4 deprecated. Use common instead.
// Number of replicas
int32 replicas = 1 [deprecated = true];

// Image used for the replica group
string image = 2 [deprecated = true];

// Resources required for the replica group
core.Resources resources = 3 [deprecated = true];

// Restart policy determines whether pods will be restarted when they exit
RestartPolicy restart_policy = 4 [deprecated = true];

// The common replica spec
CommonReplicaSpec common = 5;
Expand Down

0 comments on commit 5cf6113

Please sign in to comment.