-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(model): add task schema in model struct (#578)
Because - FE needs input and output schema for trigger form rendering This commit - add schema fields for model input and output in model struct - add view filter logic in conversion
- Loading branch information
Showing
9 changed files
with
2,125 additions
and
23 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,306 @@ | ||
// Copyright 2017 The Ray Authors. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
syntax = "proto3"; | ||
option cc_enable_arenas = true; | ||
|
||
package ray.serve; | ||
|
||
option go_package = "./rayserver"; | ||
|
||
|
||
// Configuration options for Serve's replica autoscaler. | ||
message AutoscalingConfig { | ||
// Minimal number of replicas, must be a non-negative integer. | ||
uint32 min_replicas = 1; | ||
|
||
// Maximal number of replicas, must be a non-negative integer and greater or equals | ||
// to min_replicas. | ||
uint32 max_replicas = 2; | ||
|
||
// Target number of in flight requests per replicas. This is the primary configuration | ||
// knob for replica autoscaler. Lower the number, the more rapidly will the replicas | ||
// being scaled up. Must be a non-negative integer. | ||
double target_num_ongoing_requests_per_replica = 3; | ||
|
||
// The frequency of how long does each replica sending metrics to autoscaler. | ||
double metrics_interval_s = 4; | ||
|
||
// The window (in seconds) for autoscaler to calculate rolling average of metrics on. | ||
double look_back_period_s = 5; | ||
|
||
// The multiplicative "gain" factor to limit scaling decisions. | ||
double smoothing_factor = 6; | ||
|
||
// How long to wait before scaling down replicas. | ||
double downscale_delay_s = 7; | ||
|
||
// How long to wait before scaling up replicas. | ||
double upscale_delay_s = 8; | ||
|
||
// Initial number of replicas deployment should start with. Must be non-negative. | ||
optional uint32 initial_replicas = 9; | ||
|
||
// The multiplicative "gain" factor to limit upscale. | ||
optional double upscale_smoothing_factor = 10; | ||
|
||
// The multiplicative "gain" factor to limit downscale. | ||
optional double downscale_smoothing_factor = 11; | ||
|
||
// The cloudpickled policy definition. | ||
bytes serialized_policy_def = 12; | ||
|
||
// The import path of the policy if user passed a string. Will be the concatenation | ||
// of the policy module and the policy name if user passed a callable. | ||
string policy = 13; | ||
} | ||
|
||
//[Begin] LOGGING CONFIG | ||
// Encoding type | ||
enum EncodingType { | ||
TEXT = 0; | ||
JSON = 1; | ||
} | ||
|
||
message LoggingConfig { | ||
EncodingType encoding = 1; | ||
string log_level = 2; | ||
string logs_dir = 3; | ||
bool enable_access_log = 4; | ||
} | ||
|
||
//[End] Logging Config | ||
|
||
// Configuration options for a deployment, to be set by the user. | ||
message DeploymentConfig { | ||
// The number of processes to start up that will handle requests to this deployment. | ||
// Defaults to 1. | ||
int32 num_replicas = 1; | ||
|
||
// The maximum number of queries that will be sent to a replica of this deployment | ||
// without receiving a response. Defaults to 100. | ||
int32 max_concurrent_queries = 2; | ||
|
||
// Arguments to pass to the reconfigure method of the deployment. The reconfigure method | ||
// is called if user_config is not None. | ||
bytes user_config = 3; | ||
|
||
// Duration that deployment replicas will wait until there is no more work to be done | ||
// before shutting down. | ||
double graceful_shutdown_wait_loop_s = 4; | ||
|
||
// Controller waits for this duration to forcefully kill the replica for shutdown. | ||
double graceful_shutdown_timeout_s = 5; | ||
|
||
// Frequency at which the controller health checks replicas. | ||
double health_check_period_s = 6; | ||
|
||
// Timeout after which a replica is marked unhealthy without a response. | ||
double health_check_timeout_s = 7; | ||
|
||
// Is the construction of deployment is cross language? | ||
bool is_cross_language = 8; | ||
|
||
// The deployment's programming language. | ||
DeploymentLanguage deployment_language = 9; | ||
|
||
// The deployment's autoscaling configuration. | ||
AutoscalingConfig autoscaling_config = 10; | ||
|
||
string version = 11; | ||
|
||
repeated string user_configured_option_names = 12; | ||
|
||
LoggingConfig logging_config = 13; | ||
} | ||
|
||
// Deployment language. | ||
enum DeploymentLanguage { | ||
PYTHON = 0; | ||
JAVA = 1; | ||
} | ||
|
||
message RequestMetadata { | ||
string request_id = 1; | ||
|
||
string endpoint = 2; | ||
|
||
string call_method = 3; | ||
|
||
map<string, string> context = 4; | ||
|
||
string multiplexed_model_id = 5; | ||
|
||
string route = 6; | ||
} | ||
|
||
message RequestWrapper { | ||
bytes body = 1; | ||
} | ||
|
||
message UpdatedObject { | ||
bytes object_snapshot = 1; | ||
int32 snapshot_id = 2; | ||
} | ||
|
||
message LongPollRequest { | ||
map<string, int32> keys_to_snapshot_ids = 1; | ||
} | ||
|
||
message LongPollResult { | ||
map<string, UpdatedObject> updated_objects = 1; | ||
} | ||
|
||
message EndpointInfo { | ||
string endpoint_name = 1; | ||
string route = 2; | ||
map<string, string> config = 3; | ||
} | ||
|
||
message EndpointSet { | ||
map<string, EndpointInfo> endpoints = 1; | ||
} | ||
|
||
// Now Actor handle can be transfered across language through ray call, but the list of | ||
// Actor handles can't. So we use this message wrapped a Actor name list to pass actor | ||
// list across language. When Actor handle list supports across language, this message can | ||
// be replaced. | ||
message ActorNameList { | ||
repeated string names = 1; | ||
} | ||
|
||
message DeploymentVersion { | ||
string code_version = 1; | ||
DeploymentConfig deployment_config = 2; | ||
string ray_actor_options = 3; | ||
string placement_group_bundles = 4; | ||
string placement_group_strategy = 5; | ||
int32 max_replicas_per_node = 6; | ||
} | ||
|
||
message ReplicaConfig { | ||
string deployment_def_name = 1; | ||
bytes deployment_def = 2; | ||
bytes init_args = 3; | ||
bytes init_kwargs = 4; | ||
string ray_actor_options = 5; | ||
string placement_group_bundles = 6; | ||
string placement_group_strategy = 7; | ||
int32 max_replicas_per_node = 8; | ||
} | ||
|
||
enum TargetCapacityDirection { | ||
UNSET = 0; | ||
UP = 1; | ||
DOWN = 2; | ||
} | ||
|
||
message DeploymentInfo { | ||
string name = 1; | ||
DeploymentConfig deployment_config = 2; | ||
ReplicaConfig replica_config = 3; | ||
int64 start_time_ms = 4; | ||
string actor_name = 5; | ||
string version = 6; | ||
int64 end_time_ms = 7; | ||
double target_capacity = 8; | ||
TargetCapacityDirection target_capacity_direction = 9; | ||
} | ||
|
||
// Wrap DeploymentInfo and route. The "" route value need to be convert to None/null. | ||
message DeploymentRoute { | ||
DeploymentInfo deployment_info = 1; | ||
string route = 2; | ||
} | ||
|
||
// Wrap a list for DeploymentRoute. | ||
message DeploymentRouteList { | ||
repeated DeploymentRoute deployment_routes = 1; | ||
} | ||
|
||
enum DeploymentStatus { | ||
// Keep frontend code of ServeDeploymentStatus in dashboard/client/src/type/serve.ts | ||
// in sync with this enum | ||
DEPLOYMENT_STATUS_UPDATING = 0; | ||
DEPLOYMENT_STATUS_HEALTHY = 1; | ||
DEPLOYMENT_STATUS_UNHEALTHY = 2; | ||
DEPLOYMENT_STATUS_UPSCALING = 3; | ||
DEPLOYMENT_STATUS_DOWNSCALING = 4; | ||
} | ||
|
||
enum DeploymentStatusTrigger { | ||
DEPLOYMENT_STATUS_TRIGGER_UNSPECIFIED = 0; | ||
DEPLOYMENT_STATUS_TRIGGER_CONFIG_UPDATE_STARTED = 1; | ||
DEPLOYMENT_STATUS_TRIGGER_CONFIG_UPDATE_COMPLETED = 2; | ||
DEPLOYMENT_STATUS_TRIGGER_UPSCALE_COMPLETED = 3; | ||
DEPLOYMENT_STATUS_TRIGGER_DOWNSCALE_COMPLETED = 4; | ||
DEPLOYMENT_STATUS_TRIGGER_AUTOSCALING = 5; | ||
DEPLOYMENT_STATUS_TRIGGER_REPLICA_STARTUP_FAILED = 6; | ||
DEPLOYMENT_STATUS_TRIGGER_HEALTH_CHECK_FAILED = 7; | ||
DEPLOYMENT_STATUS_TRIGGER_INTERNAL_ERROR = 8; | ||
DEPLOYMENT_STATUS_TRIGGER_DELETING = 9; | ||
} | ||
|
||
message DeploymentStatusInfo { | ||
string name = 1; | ||
DeploymentStatus status = 2; | ||
string message = 3; | ||
DeploymentStatusTrigger status_trigger = 4; | ||
} | ||
|
||
// Wrap a list for DeploymentStatusInfo. | ||
message DeploymentStatusInfoList { | ||
repeated DeploymentStatusInfo deployment_status_infos = 1; | ||
} | ||
|
||
enum ApplicationStatus { | ||
// Keep frontend code of ServeApplicationStatus in dashboard/client/src/type/serve.ts | ||
// in sync with this enum | ||
APPLICATION_STATUS_DEPLOYING = 0; | ||
APPLICATION_STATUS_RUNNING = 1; | ||
APPLICATION_STATUS_DEPLOY_FAILED = 2; | ||
APPLICATION_STATUS_DELETING = 3; | ||
APPLICATION_STATUS_NOT_STARTED = 5; | ||
APPLICATION_STATUS_UNHEALTHY = 6; | ||
} | ||
|
||
message ApplicationStatusInfo { | ||
ApplicationStatus status = 1; | ||
string message = 2; | ||
double deployment_timestamp = 3; | ||
} | ||
|
||
message StatusOverview { | ||
ApplicationStatusInfo app_status = 1; | ||
DeploymentStatusInfoList deployment_statuses = 2; | ||
string name = 3; | ||
} | ||
|
||
// Used for gRPC proxy health check | ||
message ListApplicationsRequest {} | ||
|
||
message ListApplicationsResponse { | ||
repeated string application_names = 1; | ||
} | ||
|
||
message HealthzRequest {} | ||
|
||
message HealthzResponse { | ||
string message = 1; | ||
} | ||
|
||
service RayServeAPIService { | ||
rpc ListApplications(ListApplicationsRequest) returns (ListApplicationsResponse); | ||
rpc Healthz(HealthzRequest) returns (HealthzResponse); | ||
} |
Oops, something went wrong.