Permalink
Cannot retrieve contributors at this time
Join GitHub today
GitHub is home to over 50 million developers working together to host and review code, manage projects, and build software together.
Sign up
Fetching contributors…
| // Licensed to the Apache Software Foundation (ASF) under one | |
| // or more contributor license agreements. See the NOTICE file | |
| // distributed with this work for additional information | |
| // regarding copyright ownership. The ASF licenses this file | |
| // to you under the Apache License, Version 2.0 (the | |
| // "License"); you may not use this file except in compliance | |
| // with the License. You may obtain a copy of the License at | |
| // | |
| // http://www.apache.org/licenses/LICENSE-2.0 | |
| // | |
| // Unless required by applicable law or agreed to in writing, software | |
| // distributed under the License is distributed on an "AS IS" BASIS, | |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| // See the License for the specific language governing permissions and | |
| // limitations under the License. | |
| syntax = "proto2"; | |
| package mesos; | |
| option cc_enable_arenas = true; | |
| option java_package = "org.apache.mesos"; | |
| option java_outer_classname = "Protos"; | |
| /** | |
| * Status is used to indicate the state of the scheduler and executor | |
| * driver after function calls. | |
| */ | |
| enum Status { | |
| DRIVER_NOT_STARTED = 1; | |
| DRIVER_RUNNING = 2; | |
| DRIVER_ABORTED = 3; | |
| DRIVER_STOPPED = 4; | |
| } | |
| /** | |
| * A unique ID assigned to a framework. A framework can reuse this ID | |
| * in order to do failover (see MesosSchedulerDriver). | |
| */ | |
| message FrameworkID { | |
| required string value = 1; | |
| } | |
| /** | |
| * A unique ID assigned to an offer. | |
| */ | |
| message OfferID { | |
| required string value = 1; | |
| } | |
| /** | |
| * A unique ID assigned to a slave. Currently, a slave gets a new ID | |
| * whenever it (re)registers with Mesos. Framework writers shouldn't | |
| * assume any binding between a slave ID and and a hostname. | |
| */ | |
| message SlaveID { | |
| required string value = 1; | |
| } | |
| /** | |
| * A framework-generated ID to distinguish a task. The ID must remain | |
| * unique while the task is active. A framework can reuse an ID _only_ | |
| * if the previous task with the same ID has reached a terminal state | |
| * (e.g., TASK_FINISHED, TASK_KILLED, etc.). However, reusing task IDs | |
| * is strongly discouraged (MESOS-2198). | |
| */ | |
| message TaskID { | |
| required string value = 1; | |
| } | |
| /** | |
| * A framework-generated ID to distinguish an executor. Only one | |
| * executor with the same ID can be active on the same slave at a | |
| * time. However, reusing executor IDs is discouraged. | |
| */ | |
| message ExecutorID { | |
| required string value = 1; | |
| } | |
| /** | |
| * ID used to uniquely identify a container. If the `parent` is not | |
| * specified, the ID is a UUID generated by the agent to uniquely | |
| * identify the container of an executor run. If the `parent` field is | |
| * specified, it represents a nested container. | |
| */ | |
| message ContainerID { | |
| required string value = 1; | |
| optional ContainerID parent = 2; | |
| } | |
| /** | |
| * A unique ID assigned to a resource provider. Currently, a resource | |
| * provider gets a new ID whenever it (re)registers with Mesos. | |
| */ | |
| message ResourceProviderID { | |
| required string value = 1; | |
| } | |
| /** | |
| * A framework-generated ID to distinguish an operation. The ID | |
| * must be unique within the framework. | |
| */ | |
| message OperationID { | |
| required string value = 1; | |
| } | |
| /** | |
| * Represents time since the epoch, in nanoseconds. | |
| */ | |
| message TimeInfo { | |
| required int64 nanoseconds = 1; | |
| } | |
| /** | |
| * Represents duration in nanoseconds. | |
| */ | |
| message DurationInfo { | |
| required int64 nanoseconds = 1; | |
| } | |
| /** | |
| * A network address. | |
| * | |
| * TODO(bmahler): Use this more widely. | |
| */ | |
| message Address { | |
| // May contain a hostname, IP address, or both. | |
| optional string hostname = 1; | |
| optional string ip = 2; | |
| required int32 port = 3; | |
| } | |
| /** | |
| * Represents a URL. | |
| */ | |
| message URL { | |
| required string scheme = 1; | |
| required Address address = 2; | |
| optional string path = 3; | |
| repeated Parameter query = 4; | |
| optional string fragment = 5; | |
| } | |
| /** | |
| * Represents an interval, from a given start time over a given duration. | |
| * This interval pertains to an unavailability event, such as maintenance, | |
| * and is not a generic interval. | |
| */ | |
| message Unavailability { | |
| required TimeInfo start = 1; | |
| // When added to `start`, this represents the end of the interval. | |
| // If unspecified, the duration is assumed to be infinite. | |
| optional DurationInfo duration = 2; | |
| // TODO(josephw): Add additional fields for expressing the purpose and | |
| // urgency of the unavailability event. | |
| } | |
| /** | |
| * Represents a single machine, which may hold one or more slaves. | |
| * | |
| * NOTE: In order to match a slave to a machine, both the `hostname` and | |
| * `ip` must match the values advertised by the slave to the master. | |
| * Hostname is not case-sensitive. | |
| */ | |
| message MachineID { | |
| optional string hostname = 1; | |
| optional string ip = 2; | |
| } | |
| /** | |
| * Holds information about a single machine, its `mode`, and any other | |
| * relevant information which may affect the behavior of the machine. | |
| */ | |
| message MachineInfo { | |
| // Describes the several states that a machine can be in. A `Mode` | |
| // applies to a machine and to all associated slaves on the machine. | |
| enum Mode { | |
| // In this mode, a machine is behaving normally; | |
| // offering resources, executing tasks, etc. | |
| UP = 1; | |
| // In this mode, all slaves on the machine are expected to cooperate with | |
| // frameworks to drain resources. In general, draining is done ahead of | |
| // a pending `unavailability`. The resources should be drained so as to | |
| // maximize utilization prior to the maintenance but without knowingly | |
| // violating the frameworks' requirements. | |
| DRAINING = 2; | |
| // In this mode, a machine is not running any tasks and will not offer | |
| // any of its resources. Slaves on the machine will not be allowed to | |
| // register with the master. | |
| DOWN = 3; | |
| } | |
| required MachineID id = 1; | |
| optional Mode mode = 2; | |
| // Signifies that the machine may be unavailable during the given interval. | |
| // See comments in `Unavailability` and for the `unavailability` fields | |
| // in `Offer` and `InverseOffer` for more information. | |
| optional Unavailability unavailability = 3; | |
| } | |
| /** | |
| * Describes a framework. | |
| */ | |
| message FrameworkInfo { | |
| // Used to determine the Unix user that an executor or task should be | |
| // launched as. | |
| // | |
| // When using the MesosSchedulerDriver, if the field is set to an | |
| // empty string, it will automagically set it to the current user. | |
| // | |
| // When using the HTTP Scheduler API, the user has to be set | |
| // explicitly. | |
| required string user = 1; | |
| // Name of the framework that shows up in the Mesos Web UI. | |
| required string name = 2; | |
| // Used to uniquely identify the framework. | |
| // | |
| // This field must be unset when the framework subscribes for the | |
| // first time upon which the master will assign a new ID. To | |
| // resubscribe after scheduler failover the framework should set | |
| // 'id' to the ID assigned by the master. Setting 'id' to values | |
| // not assigned by Mesos masters is unsupported. | |
| optional FrameworkID id = 3; | |
| // The amount of time (in seconds) that the master will wait for the | |
| // scheduler to failover before it tears down the framework by | |
| // killing all its tasks/executors. This should be non-zero if a | |
| // framework expects to reconnect after a failure and not lose its | |
| // tasks/executors. | |
| // | |
| // NOTE: To avoid accidental destruction of tasks, production | |
| // frameworks typically set this to a large value (e.g., 1 week). | |
| optional double failover_timeout = 4 [default = 0.0]; | |
| // If set, agents running tasks started by this framework will write | |
| // the framework pid, executor pids and status updates to disk. If | |
| // the agent exits (e.g., due to a crash or as part of upgrading | |
| // Mesos), this checkpointed data allows the restarted agent to | |
| // reconnect to executors that were started by the old instance of | |
| // the agent. Enabling checkpointing improves fault tolerance, at | |
| // the cost of a (usually small) increase in disk I/O. | |
| optional bool checkpoint = 5 [default = false]; | |
| // Roles are the entities to which allocations are made. | |
| // The framework must have at least one role in order to | |
| // be offered resources. Note that `role` is deprecated | |
| // in favor of `roles` and only one of these fields must | |
| // be used. Since we cannot distinguish between empty | |
| // `roles` and the default unset `role`, we require that | |
| // frameworks set the `MULTI_ROLE` capability if | |
| // setting the `roles` field. | |
| optional string role = 6 [default = "*", deprecated=true]; | |
| repeated string roles = 12; | |
| // Used to indicate the current host from which the scheduler is | |
| // registered in the Mesos Web UI. If set to an empty string Mesos | |
| // will automagically set it to the current hostname if one is | |
| // available. | |
| optional string hostname = 7; | |
| // This field should match the credential's principal the framework | |
| // uses for authentication. This field is used for framework API | |
| // rate limiting and dynamic reservations. It should be set even | |
| // if authentication is not enabled if these features are desired. | |
| optional string principal = 8; | |
| // This field allows a framework to advertise its web UI, so that | |
| // the Mesos web UI can link to it. It is expected to be a full URL, | |
| // for example http://my-scheduler.example.com:8080/. | |
| optional string webui_url = 9; | |
| message Capability { | |
| enum Type { | |
| // This must be the first enum value in this list, to | |
| // ensure that if 'type' is not set, the default value | |
| // is UNKNOWN. This enables enum values to be added | |
| // in a backwards-compatible way. See: MESOS-4997. | |
| UNKNOWN = 0; | |
| // Receive offers with revocable resources. See 'Resource' | |
| // message for details. | |
| REVOCABLE_RESOURCES = 1; | |
| // Receive the TASK_KILLING TaskState when a task is being | |
| // killed by an executor. The executor will examine this | |
| // capability to determine whether it can send TASK_KILLING. | |
| TASK_KILLING_STATE = 2; | |
| // Indicates whether the framework is aware of GPU resources. | |
| // Frameworks that are aware of GPU resources are expected to | |
| // avoid placing non-GPU workloads on GPU agents, in order | |
| // to avoid occupying a GPU agent and preventing GPU workloads | |
| // from running! Currently, if a framework is unaware of GPU | |
| // resources, it will not be offered *any* of the resources on | |
| // an agent with GPUs. This restriction is in place because we | |
| // do not have a revocation mechanism that ensures GPU workloads | |
| // can evict GPU agent occupants if necessary. | |
| // | |
| // TODO(bmahler): As we add revocation we can relax the | |
| // restriction here. See MESOS-5634 for more information. | |
| GPU_RESOURCES = 3; | |
| // Receive offers with resources that are shared. | |
| SHARED_RESOURCES = 4; | |
| // Indicates that (1) the framework is prepared to handle the | |
| // following TaskStates: TASK_UNREACHABLE, TASK_DROPPED, | |
| // TASK_GONE, TASK_GONE_BY_OPERATOR, and TASK_UNKNOWN, and (2) | |
| // the framework will assume responsibility for managing | |
| // partitioned tasks that reregister with the master. | |
| // | |
| // Frameworks that enable this capability can define how they | |
| // would like to handle partitioned tasks. Frameworks will | |
| // receive TASK_UNREACHABLE for tasks on agents that are | |
| // partitioned from the master. | |
| // | |
| // Without this capability, frameworks will receive TASK_LOST | |
| // for tasks on partitioned agents. | |
| // NOTE: Prior to Mesos 1.5, such tasks will be killed by Mesos | |
| // when the agent reregisters (unless the master has failed over). | |
| // However due to the lack of benefit in maintaining different | |
| // behaviors depending on whether the master has failed over | |
| // (see MESOS-7215), as of 1.5, Mesos will not kill these | |
| // tasks in either case. | |
| PARTITION_AWARE = 5; | |
| // This expresses the ability for the framework to be | |
| // "multi-tenant" via using the newly introduced `roles` | |
| // field, and examining `Offer.allocation_info` to determine | |
| // which role the offers are being made to. We also | |
| // expect that "single-tenant" schedulers eventually | |
| // provide this and move away from the deprecated | |
| // `role` field. | |
| MULTI_ROLE = 6; | |
| // This capability has two effects for a framework. | |
| // | |
| // (1) The framework is offered resources in a new format. | |
| // | |
| // The offered resources have the `Resource.reservations` field set | |
| // rather than `Resource.role` and `Resource.reservation`. In short, | |
| // an empty `reservations` field denotes unreserved resources, and | |
| // each `ReservationInfo` in the `reservations` field denotes a | |
| // reservation that refines the previous one. | |
| // | |
| // See the 'Resource Format' section for more details. | |
| // | |
| // (2) The framework can create refined reservations. | |
| // | |
| // A framework can refine an existing reservation via the | |
| // `Resource.reservations` field. For example, a reservation for role | |
| // `eng` can be refined to `eng/front_end`. | |
| // | |
| // See `ReservationInfo.reservations` for more details. | |
| // | |
| // NOTE: Without this capability, a framework is not offered resources | |
| // that have refined reservations. A resource is said to have refined | |
| // reservations if it uses the `Resource.reservations` field, and | |
| // `Resource.reservations_size() > 1`. | |
| RESERVATION_REFINEMENT = 7; // EXPERIMENTAL. | |
| // Indicates that the framework is prepared to receive offers | |
| // for agents whose region is different from the master's | |
| // region. Network links between hosts in different regions | |
| // typically have higher latency and lower bandwidth than | |
| // network links within a region, so frameworks should be | |
| // careful to only place suitable workloads in remote regions. | |
| // Frameworks that are not region-aware will never receive | |
| // offers for remote agents; region-aware frameworks are assumed | |
| // to implement their own logic to decide which workloads (if | |
| // any) are suitable for placement on remote agents. | |
| REGION_AWARE = 8; | |
| } | |
| // Enum fields should be optional, see: MESOS-4997. | |
| optional Type type = 1; | |
| } | |
| // This field allows a framework to advertise its set of | |
| // capabilities (e.g., ability to receive offers for revocable | |
| // resources). | |
| repeated Capability capabilities = 10; | |
| // Labels are free-form key value pairs supplied by the framework | |
| // scheduler (e.g., to describe additional functionality offered by | |
| // the framework). These labels are not interpreted by Mesos itself. | |
| // Labels should not contain duplicate key-value pairs. | |
| optional Labels labels = 11; | |
| // Specifc resource requirements for each of the framework's roles. This field | |
| // is used by e.g., the default allocator to decide whether a framework is | |
| // interested in seeing a resource of a certain shape. | |
| map<string, OfferFilters> offer_filters = 13; | |
| } | |
| /** | |
| * Describes a general non-interpreting non-killing check for a task or | |
| * executor (or any arbitrary process/command). A type is picked by | |
| * specifying one of the optional fields. Specifying more than one type | |
| * is an error. | |
| * | |
| * NOTE: This API is subject to change and the related feature is experimental. | |
| */ | |
| message CheckInfo { | |
| enum Type { | |
| UNKNOWN = 0; | |
| COMMAND = 1; | |
| HTTP = 2; | |
| TCP = 3; | |
| // TODO(alexr): Consider supporting custom user checks. They should | |
| // probably be paired with a `data` field and complemented by a | |
| // `data` response in `CheckStatusInfo`. | |
| } | |
| // Describes a command check. If applicable, enters mount and/or network | |
| // namespaces of the task. | |
| message Command { | |
| required CommandInfo command = 1; | |
| } | |
| // Describes an HTTP check. Sends a GET request to | |
| // http://<host>:port/path. Note that <host> is not configurable and is | |
| // resolved automatically to 127.0.0.1. | |
| message Http { | |
| // Port to send the HTTP request. | |
| required uint32 port = 1; | |
| // HTTP request path. | |
| optional string path = 2; | |
| // TODO(alexr): Add support for HTTP method. While adding POST | |
| // and PUT is simple, supporting payload is more involved. | |
| // TODO(alexr): Add support for custom HTTP headers. | |
| // TODO(alexr): Consider adding an optional message to describe TLS | |
| // options and thus enabling https. Such message might contain certificate | |
| // validation, TLS version. | |
| } | |
| // Describes a TCP check, i.e. based on establishing a TCP connection to | |
| // the specified port. Note that <host> is not configurable and is resolved | |
| // automatically to 127.0.0.1. | |
| message Tcp { | |
| required uint32 port = 1; | |
| } | |
| // The type of the check. | |
| optional Type type = 1; | |
| // Command check. | |
| optional Command command = 2; | |
| // HTTP check. | |
| optional Http http = 3; | |
| // TCP check. | |
| optional Tcp tcp = 7; | |
| // Amount of time to wait to start checking the task after it | |
| // transitions to `TASK_RUNNING` or `TASK_STARTING` if the latter | |
| // is used by the executor. | |
| optional double delay_seconds = 4 [default = 15.0]; | |
| // Interval between check attempts, i.e., amount of time to wait after | |
| // the previous check finished or timed out to start the next check. | |
| optional double interval_seconds = 5 [default = 10.0]; | |
| // Amount of time to wait for the check to complete. Zero means infinite | |
| // timeout. | |
| // | |
| // After this timeout, the check attempt is aborted and no result is | |
| // reported. Note that this may be considered a state change and hence | |
| // may trigger a check status change delivery to the corresponding | |
| // scheduler. See `CheckStatusInfo` for more details. | |
| optional double timeout_seconds = 6 [default = 20.0]; | |
| } | |
| /** | |
| * Describes a health check for a task or executor (or any arbitrary | |
| * process/command). A type is picked by specifying one of the | |
| * optional fields. Specifying more than one type is an error. | |
| */ | |
| message HealthCheck { | |
| enum Type { | |
| UNKNOWN = 0; | |
| COMMAND = 1; | |
| HTTP = 2; | |
| TCP = 3; | |
| } | |
| // Describes an HTTP health check. Sends a GET request to | |
| // scheme://<host>:port/path. Note that <host> is not configurable and is | |
| // resolved automatically, in most cases to 127.0.0.1. Default executors | |
| // treat return codes between 200 and 399 as success; custom executors | |
| // may employ a different strategy, e.g. leveraging the `statuses` field. | |
| message HTTPCheckInfo { | |
| optional NetworkInfo.Protocol protocol = 5 [default = IPv4]; | |
| // Currently "http" and "https" are supported. | |
| optional string scheme = 3; | |
| // Port to send the HTTP request. | |
| required uint32 port = 1; | |
| // HTTP request path. | |
| optional string path = 2; | |
| // TODO(alexr): Add support for HTTP method. While adding POST | |
| // and PUT is simple, supporting payload is more involved. | |
| // TODO(alexr): Add support for custom HTTP headers. | |
| // TODO(alexr): Add support for success and possibly failure | |
| // statuses. | |
| // NOTE: It is up to the custom executor to interpret and act on this | |
| // field. Setting this field has no effect on the default executors. | |
| // | |
| // TODO(haosdent): Deprecate this field when we add better support for | |
| // success and possibly failure statuses, e.g. ranges of success and | |
| // failure statuses. | |
| repeated uint32 statuses = 4; | |
| // TODO(haosdent): Consider adding a flag to enable task's certificate | |
| // validation for HTTPS health checks, see MESOS-5997. | |
| // TODO(benh): Include an 'optional bytes data' field for checking | |
| // for specific data in the response. | |
| } | |
| // Describes a TCP health check, i.e. based on establishing | |
| // a TCP connection to the specified port. | |
| message TCPCheckInfo { | |
| optional NetworkInfo.Protocol protocol = 2 [default = IPv4]; | |
| // Port expected to be open. | |
| required uint32 port = 1; | |
| } | |
| // TODO(benh): Consider adding a URL health check strategy which | |
| // allows doing something similar to the HTTP strategy but | |
| // encapsulates all the details in a single string field. | |
| // Amount of time to wait to start health checking the task after it | |
| // transitions to `TASK_RUNNING` or `TASK_STATING` if the latter is | |
| // used by the executor. | |
| optional double delay_seconds = 2 [default = 15.0]; | |
| // Interval between health checks, i.e., amount of time to wait after | |
| // the previous health check finished or timed out to start the next | |
| // health check. | |
| optional double interval_seconds = 3 [default = 10.0]; | |
| // Amount of time to wait for the health check to complete. After this | |
| // timeout, the health check is aborted and treated as a failure. Zero | |
| // means infinite timeout. | |
| optional double timeout_seconds = 4 [default = 20.0]; | |
| // Number of consecutive failures until the task is killed by the executor. | |
| optional uint32 consecutive_failures = 5 [default = 3]; | |
| // Amount of time after the task is launched during which health check | |
| // failures are ignored. Once a check succeeds for the first time, | |
| // the grace period does not apply anymore. Note that it includes | |
| // `delay_seconds`, i.e., setting `grace_period_seconds` < `delay_seconds` | |
| // has no effect. | |
| optional double grace_period_seconds = 6 [default = 10.0]; | |
| // TODO(alexr): Add an optional `KillPolicy` that should be used | |
| // if the task is killed because of a health check failure. | |
| // The type of health check. | |
| optional Type type = 8; | |
| // Command health check. | |
| optional CommandInfo command = 7; | |
| // HTTP health check. | |
| optional HTTPCheckInfo http = 1; | |
| // TCP health check. | |
| optional TCPCheckInfo tcp = 9; | |
| } | |
| /** | |
| * Describes a kill policy for a task. Currently does not express | |
| * different policies (e.g. hitting HTTP endpoints), only controls | |
| * how long to wait between graceful and forcible task kill: | |
| * | |
| * graceful kill --------------> forcible kill | |
| * grace_period | |
| * | |
| * Kill policies are best-effort, because machine failures / forcible | |
| * terminations may occur. | |
| * | |
| * NOTE: For executor-less command-based tasks, the kill is performed | |
| * via sending a signal to the task process: SIGTERM for the graceful | |
| * kill and SIGKILL for the forcible kill. For the docker executor-less | |
| * tasks the grace period is passed to 'docker stop --time'. | |
| */ | |
| message KillPolicy { | |
| // The grace period specifies how long to wait before forcibly | |
| // killing the task. It is recommended to attempt to gracefully | |
| // kill the task (and send TASK_KILLING) to indicate that the | |
| // graceful kill is in progress. Once the grace period elapses, | |
| // if the task has not terminated, a forcible kill should occur. | |
| // The task should not assume that it will always be allotted | |
| // the full grace period. For example, the executor may be | |
| // shutdown more quickly by the agent, or failures / forcible | |
| // terminations may occur. | |
| optional DurationInfo grace_period = 1; | |
| } | |
| /** | |
| * Describes a command, executed via: '/bin/sh -c value'. Any URIs specified | |
| * are fetched before executing the command. If the executable field for an | |
| * uri is set, executable file permission is set on the downloaded file. | |
| * Otherwise, if the downloaded file has a recognized archive extension | |
| * (currently [compressed] tar and zip) it is extracted into the executor's | |
| * working directory. This extraction can be disabled by setting `extract` to | |
| * false. In addition, any environment variables are set before executing | |
| * the command (so they can be used to "parameterize" your command). | |
| */ | |
| message CommandInfo { | |
| message URI { | |
| required string value = 1; | |
| optional bool executable = 2; | |
| // In case the fetched file is recognized as an archive, extract | |
| // its contents into the sandbox. Note that a cached archive is | |
| // not copied from the cache to the sandbox in case extraction | |
| // originates from an archive in the cache. | |
| optional bool extract = 3 [default = true]; | |
| // If this field is "true", the fetcher cache will be used. If not, | |
| // fetching bypasses the cache and downloads directly into the | |
| // sandbox directory, no matter whether a suitable cache file is | |
| // available or not. The former directs the fetcher to download to | |
| // the file cache, then copy from there to the sandbox. Subsequent | |
| // fetch attempts with the same URI will omit downloading and copy | |
| // from the cache as long as the file is resident there. Cache files | |
| // may get evicted at any time, which then leads to renewed | |
| // downloading. See also "docs/fetcher.md" and | |
| // "docs/fetcher-cache-internals.md". | |
| optional bool cache = 4; | |
| // The fetcher's default behavior is to use the URI string's basename to | |
| // name the local copy. If this field is provided, the local copy will be | |
| // named with its value instead. If there is a directory component (which | |
| // must be a relative path), the local copy will be stored in that | |
| // subdirectory inside the sandbox. | |
| optional string output_file = 5; | |
| } | |
| repeated URI uris = 1; | |
| optional Environment environment = 2; | |
| // There are two ways to specify the command: | |
| // 1) If 'shell == true', the command will be launched via shell | |
| // (i.e., /bin/sh -c 'value'). The 'value' specified will be | |
| // treated as the shell command. The 'arguments' will be ignored. | |
| // 2) If 'shell == false', the command will be launched by passing | |
| // arguments to an executable. The 'value' specified will be | |
| // treated as the filename of the executable. The 'arguments' | |
| // will be treated as the arguments to the executable. This is | |
| // similar to how POSIX exec families launch processes (i.e., | |
| // execlp(value, arguments(0), arguments(1), ...)). | |
| // NOTE: The field 'value' is changed from 'required' to 'optional' | |
| // in 0.20.0. It will only cause issues if a new framework is | |
| // connecting to an old master. | |
| optional bool shell = 6 [default = true]; | |
| optional string value = 3; | |
| repeated string arguments = 7; | |
| // Enables executor and tasks to run as a specific user. If the user | |
| // field is present both in FrameworkInfo and here, the CommandInfo | |
| // user value takes precedence. | |
| optional string user = 5; | |
| } | |
| /** | |
| * Describes information about an executor. | |
| */ | |
| message ExecutorInfo { | |
| enum Type { | |
| UNKNOWN = 0; | |
| // Mesos provides a simple built-in default executor that frameworks can | |
| // leverage to run shell commands and containers. | |
| // | |
| // NOTES: | |
| // | |
| // 1) `command` must not be set when using a default executor. | |
| // | |
| // 2) Default executor only accepts a *single* `LAUNCH` or `LAUNCH_GROUP` | |
| // operation. | |
| // | |
| // 3) If `container` is set, `container.type` must be `MESOS` | |
| // and `container.mesos.image` must not be set. | |
| DEFAULT = 1; | |
| // For frameworks that need custom functionality to run tasks, a `CUSTOM` | |
| // executor can be used. Note that `command` must be set when using a | |
| // `CUSTOM` executor. | |
| CUSTOM = 2; | |
| } | |
| // For backwards compatibility, if this field is not set when using `LAUNCH` | |
| // operation, Mesos will infer the type by checking if `command` is set | |
| // (`CUSTOM`) or unset (`DEFAULT`). `type` must be set when using | |
| // `LAUNCH_GROUP` operation. | |
| // | |
| // TODO(vinod): Add support for explicitly setting `type` to `DEFAULT` in | |
| // `LAUNCH` operation. | |
| optional Type type = 15; | |
| required ExecutorID executor_id = 1; | |
| optional FrameworkID framework_id = 8; // TODO(benh): Make this required. | |
| optional CommandInfo command = 7; | |
| // Executor provided with a container will launch the container | |
| // with the executor's CommandInfo and we expect the container to | |
| // act as a Mesos executor. | |
| optional ContainerInfo container = 11; | |
| repeated Resource resources = 5; | |
| optional string name = 9; | |
| // 'source' is an identifier style string used by frameworks to | |
| // track the source of an executor. This is useful when it's | |
| // possible for different executor ids to be related semantically. | |
| // | |
| // NOTE: 'source' is exposed alongside the resource usage of the | |
| // executor via JSON on the slave. This allows users to import usage | |
| // information into a time series database for monitoring. | |
| // | |
| // This field is deprecated since 1.0. Please use labels for | |
| // free-form metadata instead. | |
| optional string source = 10 [deprecated = true]; // Since 1.0. | |
| // This field can be used to pass arbitrary bytes to an executor. | |
| optional bytes data = 4; | |
| // Service discovery information for the executor. It is not | |
| // interpreted or acted upon by Mesos. It is up to a service | |
| // discovery system to use this information as needed and to handle | |
| // executors without service discovery information. | |
| optional DiscoveryInfo discovery = 12; | |
| // When shutting down an executor the agent will wait in a | |
| // best-effort manner for the grace period specified here | |
| // before forcibly destroying the container. The executor | |
| // must not assume that it will always be allotted the full | |
| // grace period, as the agent may decide to allot a shorter | |
| // period and failures / forcible terminations may occur. | |
| optional DurationInfo shutdown_grace_period = 13; | |
| // Labels are free-form key value pairs which are exposed through | |
| // master and slave endpoints. Labels will not be interpreted or | |
| // acted upon by Mesos itself. As opposed to the data field, labels | |
| // will be kept in memory on master and slave processes. Therefore, | |
| // labels should be used to tag executors with lightweight metadata. | |
| // Labels should not contain duplicate key-value pairs. | |
| optional Labels labels = 14; | |
| } | |
| /** | |
| * Describes a domain. A domain is a collection of hosts that have | |
| * similar characteristics. Mesos currently only supports "fault | |
| * domains", which identify groups of hosts with similar failure | |
| * characteristics. | |
| * | |
| * Frameworks can generally assume that network links between hosts in | |
| * the same fault domain have lower latency, higher bandwidth, and better | |
| * availability than network links between hosts in different domains. | |
| * Schedulers may prefer to place network-intensive workloads in the | |
| * same domain, as this may improve performance. Conversely, a single | |
| * failure that affects a host in a domain may be more likely to | |
| * affect other hosts in the same domain; hence, schedulers may prefer | |
| * to place workloads that require high availability in multiple | |
| * domains. (For example, all the hosts in a single rack might lose | |
| * power or network connectivity simultaneously.) | |
| * | |
| * There are two kinds of fault domains: regions and zones. Regions | |
| * offer the highest degree of fault isolation, but network latency | |
| * between regions is typically high (typically >50 ms). Zones offer a | |
| * modest degree of fault isolation along with reasonably low network | |
| * latency (typically <10 ms). | |
| * | |
| * The mapping from fault domains to physical infrastructure is up to | |
| * the operator to configure. In cloud environments, regions and zones | |
| * can be mapped to the "region" and "availability zone" concepts | |
| * exposed by most cloud providers, respectively. In on-premise | |
| * deployments, regions and zones can be mapped to data centers and | |
| * racks, respectively. | |
| * | |
| * Both masters and agents can be configured with domains. Frameworks | |
| * can compare the domains of two hosts to determine if the hosts are | |
| * in the same zone, in different zones in the same region, or in | |
| * different regions. Note that all masters in a given Mesos cluster | |
| * must be in the same region. | |
| * | |
| * Complex deployments may have additional levels of hierarchy: for example, | |
| * multiple racks might be grouped together into "halls" and multiple DCs in | |
| * the same geographical vicinity might be grouped together. As a convention, | |
| * the recommended way to represent additional levels of hierarchy is via dot- | |
| * separated labels in the existing zone and region fields. For example, the | |
| * fact that racks "abc" and "def" are in the same hall might be represented | |
| * using the zone names "rack-abc.hall-1" and "rack-def.hall-1", for example. | |
| * Software that is not aware of this additional structure will compare the | |
| * zone names for equality- hence, the two zones will be treated as different | |
| * (unrelated) zones. Software that is "hall-aware" can inspect the zone names | |
| * and make use of the additional hierarchy. | |
| */ | |
| message DomainInfo { | |
| message FaultDomain { | |
| message RegionInfo { | |
| required string name = 1; | |
| } | |
| message ZoneInfo { | |
| required string name = 1; | |
| } | |
| required RegionInfo region = 1; | |
| required ZoneInfo zone = 2; | |
| } | |
| optional FaultDomain fault_domain = 1; | |
| } | |
| /** | |
| * Describes a master. This will probably have more fields in the | |
| * future which might be used, for example, to link a framework webui | |
| * to a master webui. | |
| */ | |
| message MasterInfo { | |
| required string id = 1; | |
| // The IP address (only IPv4) as a packed 4-bytes integer, | |
| // stored in network order. Deprecated, use `address.ip` instead. | |
| required uint32 ip = 2; | |
| // The TCP port the Master is listening on for incoming | |
| // HTTP requests; deprecated, use `address.port` instead. | |
| required uint32 port = 3 [default = 5050]; | |
| // In the default implementation, this will contain information | |
| // about both the IP address, port and Master name; it should really | |
| // not be relied upon by external tooling/frameworks and be | |
| // considered an "internal" implementation field. | |
| optional string pid = 4; | |
| // The server's hostname, if available; it may be unreliable | |
| // in environments where the DNS configuration does not resolve | |
| // internal hostnames (eg, some public cloud providers). | |
| // Deprecated, use `address.hostname` instead. | |
| optional string hostname = 5; | |
| // The running Master version, as a string; taken from the | |
| // generated "master/version.hpp". | |
| optional string version = 6; | |
| // The full IP address (supports both IPv4 and IPv6 formats) | |
| // and supersedes the use of `ip`, `port` and `hostname`. | |
| // Since Mesos 0.24. | |
| optional Address address = 7; | |
| // The domain that this master belongs to. All masters in a Mesos | |
| // cluster should belong to the same region. | |
| optional DomainInfo domain = 8; | |
| message Capability { | |
| enum Type { | |
| UNKNOWN = 0; | |
| // NOTE: When the master starts to use a new capability that | |
| // may prevent compatible downgrade, remember to add the | |
| // capability to `Registry::MinimumCapability`. Conversely, | |
| // the added minimum capability should be removed if the capability | |
| // is deemed to be no longer required for compatible downgrade. | |
| // See MESOS-8878 for more details. | |
| // The master can handle slaves whose state | |
| // changes after reregistering. | |
| AGENT_UPDATE = 1; | |
| // The master can drain or deactivate agents when requested | |
| // via operator APIs. | |
| AGENT_DRAINING = 2; | |
| // The master can handle the new quota API, which supports setting | |
| // limits separately from guarantees (introduced in Mesos 1.9). | |
| QUOTA_V2 = 3; | |
| } | |
| optional Type type = 1; | |
| } | |
| repeated Capability capabilities = 9; | |
| } | |
| /** | |
| * Describes a slave. Note that the 'id' field is only available after | |
| * a slave is registered with the master, and is made available here | |
| * to facilitate re-registration. | |
| */ | |
| message SlaveInfo { | |
| required string hostname = 1; | |
| optional int32 port = 8 [default = 5051]; | |
| // The configured resources at the agent. This does not include any | |
| // dynamic reservations or persistent volumes that may currently | |
| // exist at the agent. | |
| repeated Resource resources = 3; | |
| repeated Attribute attributes = 5; | |
| optional SlaveID id = 6; | |
| // The domain that this slave belongs to. If the slave's region | |
| // differs from the master's region, it will not appear in resource | |
| // offers to frameworks that have not enabled the REGION_AWARE | |
| // capability. | |
| optional DomainInfo domain = 10; | |
| // Slave checkpointing is always enabled in recent Mesos versions; | |
| // the value of this field is ignored. | |
| // TODO(joerg84): Remove checkpoint field after deprecation cycle starting | |
| // with 0.27 (MESOS-2317). | |
| optional bool checkpoint = 7 [default = false]; | |
| message Capability { | |
| enum Type { | |
| // This must be the first enum value in this list, to | |
| // ensure that if 'type' is not set, the default value | |
| // is UNKNOWN. This enables enum values to be added | |
| // in a backwards-compatible way. See: MESOS-4997. | |
| UNKNOWN = 0; | |
| // This expresses the ability for the agent to be able | |
| // to launch tasks of a 'multi-role' framework. | |
| MULTI_ROLE = 1; | |
| // This expresses the ability for the agent to be able to launch | |
| // tasks, reserve resources, and create volumes using resources | |
| // allocated to a 'hierarchical-role'. | |
| // NOTE: This capability is required specifically for creating | |
| // volumes because a hierchical role includes '/' (slashes) in them. | |
| // Agents with this capability know to transform the '/' (slashes) | |
| // into ' ' (spaces). | |
| HIERARCHICAL_ROLE = 2; | |
| // This capability has three effects for an agent. | |
| // | |
| // (1) The format of the checkpointed resources, and | |
| // the resources reported to master. | |
| // | |
| // These resources are reported in the "pre-reservation-refinement" | |
| // format if none of the resources have refined reservations. If any | |
| // of the resources have refined reservations, they are reported in | |
| // the "post-reservation-refinement" format. The purpose is to allow | |
| // downgrading of an agent as well as communication with a pre-1.4.0 | |
| // master until the reservation refinement feature is actually used. | |
| // | |
| // See the 'Resource Format' section for more details. | |
| // | |
| // (2) The format of the resources reported by the HTTP endpoints. | |
| // | |
| // For resources reported by agent endpoints, the | |
| // "pre-reservation-refinement" format is "injected" if possible. | |
| // That is, resources without refined reservations will have the | |
| // `Resource.role` and `Resource.reservation` set, whereas | |
| // resources with refined reservations will not. | |
| // | |
| // See the 'Resource Format' section for more details. | |
| // | |
| // (3) The ability for the agent to launch tasks, reserve resources, and | |
| // create volumes using resources that have refined reservations. | |
| // | |
| // See `ReservationInfo.reservations` section for more details. | |
| // | |
| // NOTE: Resources are said to have refined reservations if it uses the | |
| // `Resource.reservations` field, and `Resource.reservations_size() > 1`. | |
| RESERVATION_REFINEMENT = 3; | |
| // This expresses the ability for the agent to handle resource | |
| // provider related operations. This includes the following: | |
| // | |
| // (1) The ability to report resources that are provided by some | |
| // local resource providers through the resource provider API. | |
| // | |
| // (2) The ability to provide operation feedback. This also means | |
| // that this capability is a prerequisite for full support of | |
| // feedback for operations on agent default resources. If an | |
| // agent has the mandatory AGENT_OPERATION_FEEDBACK capability | |
| // set but not the RESOURCE_PROVIDER capability, then | |
| // operations on agent default resources which request feedback | |
| // will not be allowed. | |
| RESOURCE_PROVIDER = 4; | |
| // This expresses the capability for the agent to handle persistent volume | |
| // resize operations safely. This capability is turned on by default. | |
| RESIZE_VOLUME = 5; | |
| // This expresses the ability of the agent to handle operation feedback | |
| // for operations on agent default resources. | |
| // | |
| // Note that full support for this feature also requires the | |
| // RESOURCE_PROVIDER capability; if you would like the agent to | |
| // handle feedback for operations on agent default resources, the | |
| // RESOURCE_PROVIDER capability should be set as well. | |
| AGENT_OPERATION_FEEDBACK = 6; | |
| // This expresses the ability for the agent to automatically drain tasks | |
| // in preparation for operator maintenance. This capability is required. | |
| AGENT_DRAINING = 7; | |
| // This expresses the ability for the agent to launch tasks which specify | |
| // resource limits for CPU and/or memory. | |
| TASK_RESOURCE_LIMITS = 8; | |
| } | |
| // Enum fields should be optional, see: MESOS-4997. | |
| optional Type type = 1; | |
| } | |
| } | |
| /** | |
| * Describes the container configuration to run a CSI plugin component. | |
| */ | |
| message CSIPluginContainerInfo { | |
| enum Service { | |
| UNKNOWN = 0; | |
| CONTROLLER_SERVICE = 1; | |
| NODE_SERVICE = 2; | |
| } | |
| repeated Service services = 1; | |
| optional CommandInfo command = 2; | |
| repeated Resource resources = 3; | |
| optional ContainerInfo container = 4; | |
| } | |
| /** | |
| * Describes a CSI plugin. | |
| */ | |
| message CSIPluginInfo { | |
| // The type of the CSI plugin. This uniquely identifies a CSI | |
| // implementation. For instance: | |
| // org.apache.mesos.csi.test | |
| // | |
| // Please follow to Java package naming convention | |
| // (https://en.wikipedia.org/wiki/Java_package#Package_naming_conventions) | |
| // to avoid conflicts on type names. | |
| required string type = 1; | |
| // The name of the CSI plugin. There could be multiple instances of a | |
| // type of CSI plugin within a Mesos cluster. The name field is used to | |
| // distinguish these instances. It should be a legal Java identifier | |
| // (https://docs.oracle.com/javase/tutorial/java/nutsandbolts/variables.html) | |
| // to avoid conflicts on concatenation of type and name. | |
| // | |
| // The type and name together provide the means to uniquely identify a storage | |
| // backend and its resources in the cluster, so the operator should ensure | |
| // that the concatenation of type and name is unique in the cluster, and it | |
| // remains the same if the instance is migrated to another agent (e.g., there | |
| // is a change in the agent ID). | |
| required string name = 2; | |
| // A list of container configurations to run CSI plugin components. | |
| // The controller service will be served by the first configuration | |
| // that contains `CONTROLLER_SERVICE`, and the node service will be | |
| // served by the first configuration that contains `NODE_SERVICE`. | |
| repeated CSIPluginContainerInfo containers = 3; | |
| } | |
| /** | |
| * Describes a resource provider. Note that the 'id' field is only available | |
| * after a resource provider is registered with the master, and is made | |
| * available here to facilitate re-registration. | |
| */ | |
| message ResourceProviderInfo { | |
| optional ResourceProviderID id = 1; | |
| repeated Attribute attributes = 2; | |
| // The type of the resource provider. This uniquely identifies a | |
| // resource provider implementation. For instance: | |
| // org.apache.mesos.rp.local.storage | |
| // | |
| // Please follow to Java package naming convention | |
| // (https://en.wikipedia.org/wiki/Java_package#Package_naming_conventions) | |
| // to avoid conflicts on type names. | |
| required string type = 3; | |
| // The name of the resource provider. There could be multiple | |
| // instances of a type of resource provider. The name field is used | |
| // to distinguish these instances. It should be a legal Java identifier | |
| // (https://docs.oracle.com/javase/tutorial/java/nutsandbolts/variables.html) | |
| // to avoid conflicts on concatenation of type and name. | |
| required string name = 4; | |
| // The stack of default reservations. If this field is not empty, it | |
| // indicates that resources from this resource provider are reserved | |
| // by default, except for the resources that have been reserved or | |
| // unreserved through operations. The first `ReservationInfo` | |
| // may have type `STATIC` or `DYNAMIC`, but the rest must have | |
| // `DYNAMIC`. One can create a new reservation on top of an existing | |
| // one by pushing a new `ReservationInfo` to the back. The last | |
| // `ReservationInfo` in this stack is the "current" reservation. The | |
| // new reservation's role must be a child of the current one. | |
| repeated Resource.ReservationInfo default_reservations = 5; // EXPERIMENTAL. | |
| // Storage resource provider related information. | |
| message Storage { | |
| required CSIPluginInfo plugin = 1; | |
| // Amount of time to wait after the resource provider finishes reconciling | |
| // existing volumes and storage pools against the CSI plugin to start the | |
| // next reconciliation. A non-positive value means that no reconciliation | |
| // will happen after startup. | |
| optional double reconciliation_interval_seconds = 2; | |
| } | |
| optional Storage storage = 6; // EXPERIMENTAL. | |
| } | |
| /** | |
| * Describes an Attribute or Resource "value". A value is described | |
| * using the standard protocol buffer "union" trick. | |
| */ | |
| message Value { | |
| enum Type { | |
| SCALAR = 0; | |
| RANGES = 1; | |
| SET = 2; | |
| TEXT = 3; | |
| } | |
| message Scalar { | |
| // Scalar values are represented using floating point. To reduce | |
| // the chance of unpredictable floating point behavior due to | |
| // roundoff error, Mesos only supports three decimal digits of | |
| // precision for scalar resource values. That is, floating point | |
| // values are converted to a fixed point format that supports | |
| // three decimal digits of precision, and then converted back to | |
| // floating point on output. Any additional precision in scalar | |
| // resource values is discarded (via rounding). | |
| required double value = 1; | |
| } | |
| message Range { | |
| required uint64 begin = 1; | |
| required uint64 end = 2; | |
| } | |
| message Ranges { | |
| repeated Range range = 1; | |
| } | |
| message Set { | |
| repeated string item = 1; | |
| } | |
| message Text { | |
| required string value = 1; | |
| } | |
| required Type type = 1; | |
| optional Scalar scalar = 2; | |
| optional Ranges ranges = 3; | |
| optional Set set = 4; | |
| optional Text text = 5; | |
| } | |
| /** | |
| * Describes an attribute that can be set on a machine. For now, | |
| * attributes and resources share the same "value" type, but this may | |
| * change in the future and attributes may only be string based. | |
| */ | |
| message Attribute { | |
| required string name = 1; | |
| required Value.Type type = 2; | |
| optional Value.Scalar scalar = 3; | |
| optional Value.Ranges ranges = 4; | |
| optional Value.Set set = 6; | |
| optional Value.Text text = 5; | |
| } | |
| /** | |
| * Describes a resource from a resource provider. The `name` field is | |
| * a string like "cpus" or "mem" that indicates which kind of resource | |
| * this is; the rest of the fields describe the properties of the | |
| * resource. A resource can take on one of three types: scalar | |
| * (double), a list of finite and discrete ranges (e.g., [1-10, | |
| * 20-30]), or a set of items. A resource is described using the | |
| * standard protocol buffer "union" trick. | |
| * | |
| * Note that "disk" and "mem" resources are scalar values expressed in | |
| * megabytes. Fractional "cpus" values are allowed (e.g., "0.5"), | |
| * which correspond to partial shares of a CPU. | |
| */ | |
| message Resource { | |
| // Specified if the resource comes from a particular resource provider. | |
| optional ResourceProviderID provider_id = 12; | |
| required string name = 1; | |
| required Value.Type type = 2; | |
| optional Value.Scalar scalar = 3; | |
| optional Value.Ranges ranges = 4; | |
| optional Value.Set set = 5; | |
| // The role that this resource is reserved for. If "*", this indicates | |
| // that the resource is unreserved. Otherwise, the resource will only | |
| // be offered to frameworks that belong to this role. | |
| // | |
| // NOTE: Frameworks must not set this field if `reservations` is set. | |
| // See the 'Resource Format' section for more details. | |
| // | |
| // TODO(mpark): Deprecate once `reservations` is no longer experimental. | |
| optional string role = 6 [default = "*", deprecated=true]; | |
| // This was initially introduced to support MULTI_ROLE capable | |
| // frameworks. Frameworks that are not MULTI_ROLE capable can | |
| // continue to assume that the offered resources are allocated | |
| // to their role. | |
| message AllocationInfo { | |
| // If set, this resource is allocated to a role. Note that in the | |
| // future, this may be unset and the scheduler may be responsible | |
| // for allocating to one of its roles. | |
| optional string role = 1; | |
| // In the future, we may add additional fields here, e.g. priority | |
| // tier, type of allocation (quota / fair share). | |
| } | |
| optional AllocationInfo allocation_info = 11; | |
| // Resource Format: | |
| // | |
| // Frameworks receive resource offers in one of two formats, depending on | |
| // whether the RESERVATION_REFINEMENT capability is enabled. | |
| // | |
| // __WITHOUT__ the RESERVATION_REFINEMENT capability, the framework is offered | |
| // resources in the "pre-reservation-refinement" format. In this format, the | |
| // `Resource.role` and `Resource.reservation` fields are used in conjunction | |
| // to describe the reservation state of a `Resource` message. | |
| // | |
| // The following is an overview of the possible reservation states: | |
| // | |
| // +------------+------------------------------------------------------------+ | |
| // | unreserved | { | | |
| // | | role: "*", | | |
| // | | reservation: <not set>, | | |
| // | | reservations: <unused> | | |
| // | | } | | |
| // +------------+------------------------------------------------------------+ | |
| // | static | { | | |
| // | | role: "eng", | | |
| // | | reservation: <not set>, | | |
| // | | reservations: <unused> | | |
| // | | } | | |
| // +------------+------------------------------------------------------------+ | |
| // | dynamic | { | | |
| // | | role: "eng", | | |
| // | | reservation: { | | |
| // | | type: <unused>, | | |
| // | | role: <unused>, | | |
| // | | principal: <optional>, | | |
| // | | labels: <optional> | | |
| // | | }, | | |
| // | | reservations: <unused> | | |
| // | | } | | |
| // +------------+------------------------------------------------------------+ | |
| // | |
| // __WITH__ the RESERVATION_REFINEMENT capability, the framework is offered | |
| // resources in the "post-reservation-refinement" format. In this format, the | |
| // reservation state of a `Resource` message is expressed solely in | |
| // `Resource.reservations` field. | |
| // | |
| // The following is an overview of the possible reservation states: | |
| // | |
| // +------------+------------------------------------------------------------+ | |
| // | unreserved | { | | |
| // | | role: <unused>, | | |
| // | | reservation: <unused>, | | |
| // | | reservations: [] | | |
| // | | } | | |
| // +------------+------------------------------------------------------------+ | |
| // | static | { | | |
| // | | role: <unused>, | | |
| // | | reservation: <unused>, | | |
| // | | reservations: [ | | |
| // | | { | | |
| // | | type: STATIC, | | |
| // | | role: "eng", | | |
| // | | principal: <optional>, | | |
| // | | labels: <optional> | | |
| // | | } | | |
| // | | ] | | |
| // | | } | | |
| // +------------+------------------------------------------------------------+ | |
| // | dynamic | { | | |
| // | | role: <unused>, | | |
| // | | reservation: <unused>, | | |
| // | | reservations: [ | | |
| // | | { | | |
| // | | type: DYNAMIC, | | |
| // | | role: "eng", | | |
| // | | principal: <optional>, | | |
| // | | labels: <optional> | | |
| // | | } | | |
| // | | ] | | |
| // | | } | | |
| // +------------+------------------------------------------------------------+ | |
| // | |
| // We can also __refine__ reservations with this capability like so: | |
| // | |
| // +------------+------------------------------------------------------------+ | |
| // | refined | { | | |
| // | | role: <unused>, | | |
| // | | reservation: <unused>, | | |
| // | | reservations: [ | | |
| // | | { | | |
| // | | type: STATIC or DYNAMIC, | | |
| // | | role: "eng", | | |
| // | | principal: <optional>, | | |
| // | | labels: <optional> | | |
| // | | }, | | |
| // | | { | | |
| // | | type: DYNAMIC, | | |
| // | | role: "eng/front_end", | | |
| // | | principal: <optional>, | | |
| // | | labels: <optional> | | |
| // | | } | | |
| // | | ] | | |
| // | | } | | |
| // +------------+------------------------------------------------------------+ | |
| // | |
| // NOTE: Each `ReservationInfo` in the `reservations` field denotes | |
| // a reservation that refines the previous `ReservationInfo`. | |
| message ReservationInfo { | |
| // Describes a reservation. A static reservation is set by the operator on | |
| // the command-line and they are immutable without agent restart. A dynamic | |
| // reservation is made by an operator via the '/reserve' HTTP endpoint | |
| // or by a framework via the offer cycle by sending back an | |
| // 'Offer::Operation::Reserve' message. | |
| // | |
| // NOTE: We currently do not allow frameworks with role "*" to make dynamic | |
| // reservations. | |
| enum Type { | |
| UNKNOWN = 0; | |
| STATIC = 1; | |
| DYNAMIC = 2; | |
| } | |
| // The type of this reservation. | |
| // | |
| // NOTE: This field must not be set for `Resource.reservation`. | |
| // See the 'Resource Format' section for more details. | |
| optional Type type = 4; | |
| // The role to which this reservation is made for. | |
| // | |
| // NOTE: This field must not be set for `Resource.reservation`. | |
| // See the 'Resource Format' section for more details. | |
| optional string role = 3; | |
| // Indicates the principal, if any, of the framework or operator | |
| // that reserved this resource. If reserved by a framework, the | |
| // field should match the `FrameworkInfo.principal`. It is used in | |
| // conjunction with the `UnreserveResources` ACL to determine | |
| // whether the entity attempting to unreserve this resource is | |
| // permitted to do so. | |
| optional string principal = 1; | |
| // Labels are free-form key value pairs that can be used to | |
| // associate arbitrary metadata with a reserved resource. For | |
| // example, frameworks can use labels to identify the intended | |
| // purpose for a portion of the resources the framework has | |
| // reserved at a given slave. Labels should not contain duplicate | |
| // key-value pairs. | |
| optional Labels labels = 2; | |
| } | |
| // If this is set, this resource was dynamically reserved by an | |
| // operator or a framework. Otherwise, this resource is either unreserved | |
| // or statically reserved by an operator via the --resources flag. | |
| // | |
| // NOTE: Frameworks must not set this field if `reservations` is set. | |
| // See the 'Resource Format' section for more details. | |
| // | |
| // TODO(mpark): Deprecate once `reservations` is no longer experimental. | |
| optional ReservationInfo reservation = 8; | |
| // The stack of reservations. If this field is empty, it indicates that this | |
| // resource is unreserved. Otherwise, the resource is reserved. The first | |
| // `ReservationInfo` may have type `STATIC` or `DYNAMIC`, but the rest must | |
| // have `DYNAMIC`. One can create a new reservation on top of an existing | |
| // one by pushing a new `ReservationInfo` to the back. The last | |
| // `ReservationInfo` in this stack is the "current" reservation. The new | |
| // reservation's role must be a child of the current reservation's role. | |
| // | |
| // NOTE: Frameworks must not set this field if `reservation` is set. | |
| // See the 'Resource Format' section for more details. | |
| // | |
| // TODO(mpark): Deprecate `role` and `reservation` once this is stable. | |
| repeated ReservationInfo reservations = 13; // EXPERIMENTAL. | |
| message DiskInfo { | |
| // Describes a persistent disk volume. | |
| // | |
| // A persistent disk volume will not be automatically garbage | |
| // collected if the task/executor/slave terminates, but will be | |
| // re-offered to the framework(s) belonging to the 'role'. | |
| // | |
| // NOTE: Currently, we do not allow persistent disk volumes | |
| // without a reservation (i.e., 'role' cannot be '*'). | |
| message Persistence { | |
| // A unique ID for the persistent disk volume. This ID must be | |
| // unique per role on each slave. Although it is possible to use | |
| // the same ID on different slaves in the cluster and to reuse | |
| // IDs after a volume with that ID has been destroyed, both | |
| // practices are discouraged. | |
| required string id = 1; | |
| // This field indicates the principal of the operator or | |
| // framework that created this volume. It is used in conjunction | |
| // with the "destroy" ACL to determine whether an entity | |
| // attempting to destroy the volume is permitted to do so. | |
| // | |
| // NOTE: This field should match the FrameworkInfo.principal of | |
| // the framework that created the volume. | |
| optional string principal = 2; | |
| } | |
| optional Persistence persistence = 1; | |
| // Describes how this disk resource will be mounted in the | |
| // container. If not set, the disk resource will be used as the | |
| // sandbox. Otherwise, it will be mounted according to the | |
| // 'container_path' inside 'volume'. The 'host_path' inside | |
| // 'volume' is ignored. | |
| // NOTE: If 'volume' is set but 'persistence' is not set, the | |
| // volume will be automatically garbage collected after | |
| // task/executor terminates. Currently, if 'persistence' is set, | |
| // 'volume' must be set. | |
| optional Volume volume = 2; | |
| // Describes where a disk originates from. | |
| message Source { | |
| enum Type { | |
| UNKNOWN = 0; | |
| PATH = 1; | |
| MOUNT = 2; | |
| BLOCK = 3; | |
| RAW = 4; | |
| } | |
| // A folder that can be located on a separate disk device. This | |
| // can be shared and carved up as necessary between frameworks. | |
| message Path { | |
| // Path to the folder (e.g., /mnt/raid/disk0). If the path is a | |
| // relative path, it is relative to the agent work directory. | |
| optional string root = 1; | |
| } | |
| // A mounted file-system set up by the Agent administrator. This | |
| // can only be used exclusively: a framework cannot accept a | |
| // partial amount of this disk. | |
| message Mount { | |
| // Path to mount point (e.g., /mnt/raid/disk0). If the path is a | |
| // relative path, it is relative to the agent work directory. | |
| optional string root = 1; | |
| } | |
| required Type type = 1; | |
| optional Path path = 2; | |
| optional Mount mount = 3; | |
| // The vendor of this source. If present, this field provides the means to | |
| // uniquely identify the storage backend of this source in the cluster. | |
| optional string vendor = 7; // EXPERIMENTAL. | |
| // The identifier of this source. This field maps onto CSI volume IDs and | |
| // is not expected to be set by frameworks. If both `vendor` and `id` are | |
| // present, these two fields together provide the means to uniquely | |
| // identify this source in the cluster. | |
| optional string id = 4; // EXPERIMENTAL. | |
| // Additional metadata for this source. This field maps onto CSI volume | |
| // context. Frameworks should neither alter this field, nor expect this | |
| // field to remain unchanged. | |
| optional Labels metadata = 5; // EXPERIMENTAL. | |
| // This field serves as an indirection to a set of storage | |
| // vendor specific disk parameters which describe the properties | |
| // of the disk. The operator will setup mappings between a | |
| // profile name to a set of vendor specific disk parameters. And | |
| // the framework will do disk selection based on profile names, | |
| // instead of vendor specific disk parameters. | |
| // | |
| // Also see the DiskProfileAdaptor module. | |
| optional string profile = 6; // EXPERIMENTAL. | |
| } | |
| optional Source source = 3; | |
| } | |
| optional DiskInfo disk = 7; | |
| message RevocableInfo {} | |
| // If this is set, the resources are revocable, i.e., any tasks or | |
| // executors launched using these resources could get preempted or | |
| // throttled at any time. This could be used by frameworks to run | |
| // best effort tasks that do not need strict uptime or performance | |
| // guarantees. Note that if this is set, 'disk' or 'reservation' | |
| // cannot be set. | |
| optional RevocableInfo revocable = 9; | |
| // Allow the resource to be shared across tasks. | |
| message SharedInfo {} | |
| // If this is set, the resources are shared, i.e. multiple tasks | |
| // can be launched using this resource and all of them shall refer | |
| // to the same physical resource on the cluster. Note that only | |
| // persistent volumes can be shared currently. | |
| // | |
| // NOTE: Different shared resources must be uniquely identifiable. | |
| // This currently holds as persistent volume should have unique `id` | |
| // (this is not validated for enforced though). | |
| optional SharedInfo shared = 10; | |
| } | |
| /** | |
| * Represents filters that allow a framework to control the shape of | |
| * offers that will be sent to its role(s). These filters apply | |
| * globally to any agent (unlike the existing `DECLINE` filter which | |
| * us a time-based resource subset filter that only applies to the | |
| * agent that was declined). | |
| * | |
| * NOTE: Custom allocators might interpret these fields in a different | |
| * way, or not at all. | |
| */ | |
| message OfferFilters { | |
| message ResourceQuantities { | |
| // Quantities are pairs of identifiers of scalar resources and | |
| // an associated value, e.g., `{"disk": Scalar {"value": 30}}`. | |
| map<string, Value.Scalar> quantities = 1; | |
| } | |
| message MinAllocatableResources { | |
| // A set of resources is considered allocatable if contained in any of | |
| // the following quantities. If no quantities are specified any resource | |
| // is considered allocatable. | |
| repeated ResourceQuantities quantities = 1; | |
| } | |
| optional MinAllocatableResources min_allocatable_resources = 1; | |
| } | |
| /** | |
| * When the network bandwidth caps are enabled and the container | |
| * is over its limit, outbound packets may be either delayed or | |
| * dropped completely either because it exceeds the maximum bandwidth | |
| * allocation for a single container (the cap) or because the combined | |
| * network traffic of multiple containers on the host exceeds the | |
| * transmit capacity of the host (the share). We can report the | |
| * following statistics for each of these conditions exported directly | |
| * from the Linux Traffic Control Queueing Discipline. | |
| * | |
| * id : name of the limiter, e.g. 'tx_bw_cap' | |
| * backlog : number of packets currently delayed | |
| * bytes : total bytes seen | |
| * drops : number of packets dropped in total | |
| * overlimits : number of packets which exceeded allocation | |
| * packets : total packets seen | |
| * qlen : number of packets currently queued | |
| * rate_bps : throughput in bytes/sec | |
| * rate_pps : throughput in packets/sec | |
| * requeues : number of times a packet has been delayed due to | |
| * locking or device contention issues | |
| * | |
| * More information on the operation of Linux Traffic Control can be | |
| * found at http://www.lartc.org/lartc.html. | |
| */ | |
| message TrafficControlStatistics { | |
| required string id = 1; | |
| optional uint64 backlog = 2; | |
| optional uint64 bytes = 3; | |
| optional uint64 drops = 4; | |
| optional uint64 overlimits = 5; | |
| optional uint64 packets = 6; | |
| optional uint64 qlen = 7; | |
| optional uint64 ratebps = 8; | |
| optional uint64 ratepps = 9; | |
| optional uint64 requeues = 10; | |
| } | |
| message IpStatistics { | |
| optional int64 Forwarding = 1; | |
| optional int64 DefaultTTL = 2; | |
| optional int64 InReceives = 3; | |
| optional int64 InHdrErrors = 4; | |
| optional int64 InAddrErrors = 5; | |
| optional int64 ForwDatagrams = 6; | |
| optional int64 InUnknownProtos = 7; | |
| optional int64 InDiscards = 8; | |
| optional int64 InDelivers = 9; | |
| optional int64 OutRequests = 10; | |
| optional int64 OutDiscards = 11; | |
| optional int64 OutNoRoutes = 12; | |
| optional int64 ReasmTimeout = 13; | |
| optional int64 ReasmReqds = 14; | |
| optional int64 ReasmOKs = 15; | |
| optional int64 ReasmFails = 16; | |
| optional int64 FragOKs = 17; | |
| optional int64 FragFails = 18; | |
| optional int64 FragCreates = 19; | |
| } | |
| message IcmpStatistics { | |
| optional int64 InMsgs = 1; | |
| optional int64 InErrors = 2; | |
| optional int64 InCsumErrors = 3; | |
| optional int64 InDestUnreachs = 4; | |
| optional int64 InTimeExcds = 5; | |
| optional int64 InParmProbs = 6; | |
| optional int64 InSrcQuenchs = 7; | |
| optional int64 InRedirects = 8; | |
| optional int64 InEchos = 9; | |
| optional int64 InEchoReps = 10; | |
| optional int64 InTimestamps = 11; | |
| optional int64 InTimestampReps = 12; | |
| optional int64 InAddrMasks = 13; | |
| optional int64 InAddrMaskReps = 14; | |
| optional int64 OutMsgs = 15; | |
| optional int64 OutErrors = 16; | |
| optional int64 OutDestUnreachs = 17; | |
| optional int64 OutTimeExcds = 18; | |
| optional int64 OutParmProbs = 19; | |
| optional int64 OutSrcQuenchs = 20; | |
| optional int64 OutRedirects = 21; | |
| optional int64 OutEchos = 22; | |
| optional int64 OutEchoReps = 23; | |
| optional int64 OutTimestamps = 24; | |
| optional int64 OutTimestampReps = 25; | |
| optional int64 OutAddrMasks = 26; | |
| optional int64 OutAddrMaskReps = 27; | |
| } | |
| message TcpStatistics { | |
| optional int64 RtoAlgorithm = 1; | |
| optional int64 RtoMin = 2; | |
| optional int64 RtoMax = 3; | |
| optional int64 MaxConn = 4; | |
| optional int64 ActiveOpens = 5; | |
| optional int64 PassiveOpens = 6; | |
| optional int64 AttemptFails = 7; | |
| optional int64 EstabResets = 8; | |
| optional int64 CurrEstab = 9; | |
| optional int64 InSegs = 10; | |
| optional int64 OutSegs = 11; | |
| optional int64 RetransSegs = 12; | |
| optional int64 InErrs = 13; | |
| optional int64 OutRsts = 14; | |
| optional int64 InCsumErrors = 15; | |
| } | |
| message UdpStatistics { | |
| optional int64 InDatagrams = 1; | |
| optional int64 NoPorts = 2; | |
| optional int64 InErrors = 3; | |
| optional int64 OutDatagrams = 4; | |
| optional int64 RcvbufErrors = 5; | |
| optional int64 SndbufErrors = 6; | |
| optional int64 InCsumErrors = 7; | |
| optional int64 IgnoredMulti = 8; | |
| } | |
| message SNMPStatistics { | |
| optional IpStatistics ip_stats = 1; | |
| optional IcmpStatistics icmp_stats = 2; | |
| optional TcpStatistics tcp_stats = 3; | |
| optional UdpStatistics udp_stats = 4; | |
| } | |
| message DiskStatistics { | |
| optional Resource.DiskInfo.Source source = 1; | |
| optional Resource.DiskInfo.Persistence persistence = 2; | |
| optional uint64 limit_bytes = 3; | |
| optional uint64 used_bytes = 4; | |
| } | |
| /** | |
| * A snapshot of resource usage statistics. | |
| */ | |
| message ResourceStatistics { | |
| required double timestamp = 1; // Snapshot time, in seconds since the Epoch. | |
| optional uint32 processes = 30; | |
| optional uint32 threads = 31; | |
| // CPU Usage Information: | |
| // Total CPU time spent in user mode, and kernel mode. | |
| optional double cpus_user_time_secs = 2; | |
| optional double cpus_system_time_secs = 3; | |
| // Hard CPU limit. | |
| optional double cpus_limit = 4; | |
| // Soft CPU limit. | |
| optional double cpus_soft_limit = 45; | |
| // cpu.stat on process throttling (for contention issues). | |
| optional uint32 cpus_nr_periods = 7; | |
| optional uint32 cpus_nr_throttled = 8; | |
| optional double cpus_throttled_time_secs = 9; | |
| // Memory Usage Information: | |
| // mem_total_bytes was added in 0.23.0 to represent the total memory | |
| // of a process in RAM (as opposed to in Swap). This was previously | |
| // reported as mem_rss_bytes, which was also changed in 0.23.0 to | |
| // represent only the anonymous memory usage, to keep in sync with | |
| // Linux kernel's (arguably erroneous) use of terminology. | |
| optional uint64 mem_total_bytes = 36; | |
| // Total memory + swap usage. This is set if swap is enabled. | |
| optional uint64 mem_total_memsw_bytes = 37; | |
| // Hard memory limit. | |
| optional uint64 mem_limit_bytes = 6; | |
| // Soft memory limit. | |
| optional uint64 mem_soft_limit_bytes = 38; | |
| // Broken out memory usage information: pagecache, rss (anonymous), | |
| // mmaped files and swap. | |
| // TODO(chzhcn) mem_file_bytes and mem_anon_bytes are deprecated in | |
| // 0.23.0 and will be removed in 0.24.0. | |
| optional uint64 mem_file_bytes = 10; | |
| optional uint64 mem_anon_bytes = 11; | |
| // mem_cache_bytes is added in 0.23.0 to represent page cache usage. | |
| optional uint64 mem_cache_bytes = 39; | |
| // Since 0.23.0, mem_rss_bytes is changed to represent only | |
| // anonymous memory usage. Note that neither its requiredness, type, | |
| // name nor numeric tag has been changed. | |
| optional uint64 mem_rss_bytes = 5; | |
| optional uint64 mem_mapped_file_bytes = 12; | |
| // This is only set if swap is enabled. | |
| optional uint64 mem_swap_bytes = 40; | |
| optional uint64 mem_unevictable_bytes = 41; | |
| // Number of occurrences of different levels of memory pressure | |
| // events reported by memory cgroup. Pressure listening (re)starts | |
| // with these values set to 0 when slave (re)starts. See | |
| // https://www.kernel.org/doc/Documentation/cgroups/memory.txt for | |
| // more details. | |
| optional uint64 mem_low_pressure_counter = 32; | |
| optional uint64 mem_medium_pressure_counter = 33; | |
| optional uint64 mem_critical_pressure_counter = 34; | |
| // Disk Usage Information for executor working directory. | |
| optional uint64 disk_limit_bytes = 26; | |
| optional uint64 disk_used_bytes = 27; | |
| // Per disk (resource) statistics. | |
| repeated DiskStatistics disk_statistics = 43; | |
| // Cgroups blkio statistics. | |
| optional CgroupInfo.Blkio.Statistics blkio_statistics = 44; | |
| // Perf statistics. | |
| optional PerfStatistics perf = 13; | |
| // Network Usage Information: | |
| optional uint64 net_rx_packets = 14; | |
| optional uint64 net_rx_bytes = 15; | |
| optional uint64 net_rx_errors = 16; | |
| optional uint64 net_rx_dropped = 17; | |
| optional uint64 net_tx_packets = 18; | |
| optional uint64 net_tx_bytes = 19; | |
| optional uint64 net_tx_errors = 20; | |
| optional uint64 net_tx_dropped = 21; | |
| // The kernel keeps track of RTT (round-trip time) for its TCP | |
| // sockets. RTT is a way to tell the latency of a container. | |
| optional double net_tcp_rtt_microsecs_p50 = 22; | |
| optional double net_tcp_rtt_microsecs_p90 = 23; | |
| optional double net_tcp_rtt_microsecs_p95 = 24; | |
| optional double net_tcp_rtt_microsecs_p99 = 25; | |
| optional double net_tcp_active_connections = 28; | |
| optional double net_tcp_time_wait_connections = 29; | |
| // Network traffic flowing into or out of a container can be delayed | |
| // or dropped due to congestion or policy inside and outside the | |
| // container. | |
| repeated TrafficControlStatistics net_traffic_control_statistics = 35; | |
| // Network SNMP statistics for each container. | |
| optional SNMPStatistics net_snmp_statistics = 42; | |
| } | |
| /** | |
| * Describes a snapshot of the resource usage for executors. | |
| */ | |
| message ResourceUsage { | |
| message Executor { | |
| required ExecutorInfo executor_info = 1; | |
| // This includes resources used by the executor itself | |
| // as well as its active tasks. | |
| repeated Resource allocated = 2; | |
| // Current resource usage. If absent, the containerizer | |
| // cannot provide resource usage. | |
| optional ResourceStatistics statistics = 3; | |
| // The container id for the executor specified in the executor_info field. | |
| required ContainerID container_id = 4; | |
| message Task { | |
| required string name = 1; | |
| required TaskID id = 2; | |
| repeated Resource resources = 3; | |
| optional Labels labels = 4; | |
| } | |
| // Non-terminal tasks. | |
| repeated Task tasks = 5; | |
| } | |
| repeated Executor executors = 1; | |
| // Slave's total resources including checkpointed dynamic | |
| // reservations and persistent volumes. | |
| repeated Resource total = 2; | |
| } | |
| /** | |
| * Describes a sample of events from "perf stat". Only available on | |
| * Linux. | |
| * | |
| * NOTE: Each optional field matches the name of a perf event (see | |
| * "perf list") with the following changes: | |
| * 1. Names are downcased. | |
| * 2. Hyphens ('-') are replaced with underscores ('_'). | |
| * 3. Events with alternate names use the name "perf stat" returns, | |
| * e.g., for the event "cycles OR cpu-cycles" perf always returns | |
| * cycles. | |
| */ | |
| message PerfStatistics { | |
| required double timestamp = 1; // Start of sample interval, in seconds since the Epoch. | |
| required double duration = 2; // Duration of sample interval, in seconds. | |
| // Hardware event. | |
| optional uint64 cycles = 3; | |
| optional uint64 stalled_cycles_frontend = 4; | |
| optional uint64 stalled_cycles_backend = 5; | |
| optional uint64 instructions = 6; | |
| optional uint64 cache_references = 7; | |
| optional uint64 cache_misses = 8; | |
| optional uint64 branches = 9; | |
| optional uint64 branch_misses = 10; | |
| optional uint64 bus_cycles = 11; | |
| optional uint64 ref_cycles = 12; | |
| // Software event. | |
| optional double cpu_clock = 13; | |
| optional double task_clock = 14; | |
| optional uint64 page_faults = 15; | |
| optional uint64 minor_faults = 16; | |
| optional uint64 major_faults = 17; | |
| optional uint64 context_switches = 18; | |
| optional uint64 cpu_migrations = 19; | |
| optional uint64 alignment_faults = 20; | |
| optional uint64 emulation_faults = 21; | |
| // Hardware cache event. | |
| optional uint64 l1_dcache_loads = 22; | |
| optional uint64 l1_dcache_load_misses = 23; | |
| optional uint64 l1_dcache_stores = 24; | |
| optional uint64 l1_dcache_store_misses = 25; | |
| optional uint64 l1_dcache_prefetches = 26; | |
| optional uint64 l1_dcache_prefetch_misses = 27; | |
| optional uint64 l1_icache_loads = 28; | |
| optional uint64 l1_icache_load_misses = 29; | |
| optional uint64 l1_icache_prefetches = 30; | |
| optional uint64 l1_icache_prefetch_misses = 31; | |
| optional uint64 llc_loads = 32; | |
| optional uint64 llc_load_misses = 33; | |
| optional uint64 llc_stores = 34; | |
| optional uint64 llc_store_misses = 35; | |
| optional uint64 llc_prefetches = 36; | |
| optional uint64 llc_prefetch_misses = 37; | |
| optional uint64 dtlb_loads = 38; | |
| optional uint64 dtlb_load_misses = 39; | |
| optional uint64 dtlb_stores = 40; | |
| optional uint64 dtlb_store_misses = 41; | |
| optional uint64 dtlb_prefetches = 42; | |
| optional uint64 dtlb_prefetch_misses = 43; | |
| optional uint64 itlb_loads = 44; | |
| optional uint64 itlb_load_misses = 45; | |
| optional uint64 branch_loads = 46; | |
| optional uint64 branch_load_misses = 47; | |
| optional uint64 node_loads = 48; | |
| optional uint64 node_load_misses = 49; | |
| optional uint64 node_stores = 50; | |
| optional uint64 node_store_misses = 51; | |
| optional uint64 node_prefetches = 52; | |
| optional uint64 node_prefetch_misses = 53; | |
| } | |
| /** | |
| * Describes a request for resources that can be used by a framework | |
| * to proactively influence the allocator. If 'slave_id' is provided | |
| * then this request is assumed to only apply to resources on that | |
| * slave. | |
| */ | |
| message Request { | |
| optional SlaveID slave_id = 1; | |
| repeated Resource resources = 2; | |
| } | |
| /** | |
| * Describes some resources available on a slave. An offer only | |
| * contains resources from a single slave. | |
| */ | |
| message Offer { | |
| required OfferID id = 1; | |
| required FrameworkID framework_id = 2; | |
| required SlaveID slave_id = 3; | |
| required string hostname = 4; | |
| // URL for reaching the slave running on the host. | |
| optional URL url = 8; | |
| // The domain of the slave. | |
| optional DomainInfo domain = 11; | |
| repeated Resource resources = 5; | |
| repeated Attribute attributes = 7; | |
| // Executors of the same framework running on this agent. | |
| repeated ExecutorID executor_ids = 6; | |
| // Signifies that the resources in this Offer may be unavailable during | |
| // the given interval. Any tasks launched using these resources may be | |
| // killed when the interval arrives. For example, these resources may be | |
| // part of a planned maintenance schedule. | |
| // | |
| // This field only provides information about a planned unavailability. | |
| // The unavailability interval may not necessarily start at exactly this | |
| // interval, nor last for exactly the duration of this interval. | |
| // The unavailability may also be forever! See comments in | |
| // `Unavailability` for more details. | |
| optional Unavailability unavailability = 9; | |
| // An offer represents resources allocated to *one* of the | |
| // roles managed by the scheduler. (Therefore, each | |
| // `Offer.resources[i].allocation_info` will match the | |
| // top level `Offer.allocation_info`). | |
| optional Resource.AllocationInfo allocation_info = 10; | |
| // Defines an operation that can be performed against offers. | |
| message Operation { | |
| enum Type { | |
| UNKNOWN = 0; | |
| LAUNCH = 1; | |
| LAUNCH_GROUP = 6; | |
| RESERVE = 2; | |
| UNRESERVE = 3; | |
| CREATE = 4; | |
| DESTROY = 5; | |
| GROW_VOLUME = 11; // EXPERIMENTAL. | |
| SHRINK_VOLUME = 12; // EXPERIMENTAL. | |
| CREATE_DISK = 13; // EXPERIMENTAL. | |
| DESTROY_DISK = 14; // EXPERIMENTAL. | |
| } | |
| // TODO(vinod): Deprecate this in favor of `LaunchGroup` below. | |
| message Launch { | |
| repeated TaskInfo task_infos = 1; | |
| } | |
| // Unlike `Launch` above, all the tasks in a `task_group` are | |
| // atomically delivered to an executor. | |
| // | |
| // `NetworkInfo` set on executor will be shared by all tasks in | |
| // the task group. | |
| // | |
| // TODO(vinod): Any volumes set on executor could be used by a | |
| // task by explicitly setting `Volume.source` in its resources. | |
| message LaunchGroup { | |
| required ExecutorInfo executor = 1; | |
| required TaskGroupInfo task_group = 2; | |
| } | |
| message Reserve { | |
| repeated Resource source = 2; | |
| repeated Resource resources = 1; | |
| } | |
| message Unreserve { | |
| repeated Resource resources = 1; | |
| } | |
| message Create { | |
| repeated Resource volumes = 1; | |
| } | |
| message Destroy { | |
| repeated Resource volumes = 1; | |
| } | |
| // Grow a volume by an additional disk resource. | |
| // NOTE: This is currently experimental and only for persistent volumes | |
| // created on ROOT/PATH disk. | |
| message GrowVolume { | |
| required Resource volume = 1; | |
| required Resource addition = 2; | |
| } | |
| // Shrink a volume by the size specified in the `subtract` field. | |
| // NOTE: This is currently experimental and only for persistent volumes | |
| // created on ROOT/PATH disk. | |
| message ShrinkVolume { | |
| required Resource volume = 1; | |
| // See comments in `Value.Scalar` for maximum precision supported. | |
| required Value.Scalar subtract = 2; | |
| } | |
| // Create a `MOUNT` or `BLOCK` disk resource backed by a CSI volume from a | |
| // `RAW` disk resource. | |
| // | |
| // In the typical case where the `RAW` disk resource has a profile and no | |
| // source ID, a new CSI volume will be provisioned by Mesos to back the | |
| // returned `MOUNT` or `BLOCK` disk resource. However, the `RAW` disk | |
| // resource can instead have no profile but a source ID, indicating that | |
| // it is already backed by a CSI volume in one of the following scenarios: | |
| // | |
| // (1) The CSI volume is preprovisioned out-of-band. | |
| // | |
| // (2) The CSI volume is provisioned by Mesos, but Mesos has lost the | |
| // corresponding `MOUNT` or `BLOCK` resource metadata. This could | |
| // happen if there has been a change in the agent ID or resource | |
| // provider ID where the volume belongs. | |
| // | |
| // In the above cases, Mesos won't provision a new CSI volume, but instead | |
| // will simply return a `MOUNT` or `BLOCK` disk resource backed by the same | |
| // CSI volume, with the profile specified in this call. | |
| // | |
| // NOTE: For the time being, this API is subject to change and the related | |
| // feature is experimental. | |
| message CreateDisk { | |
| required Resource source = 1; | |
| // NOTE: Only `MOUNT` or `BLOCK` is allowed in this field. | |
| required Resource.DiskInfo.Source.Type target_type = 2; | |
| // Apply the specified profile to the created disk. This field must be set | |
| // if `source` does not have a profile, and must not be set if it has one. | |
| // | |
| // NOTE: The operation will fail If the specified profile is unknown to | |
| // Mesos, i.e., not reported by the disk profile adaptor. | |
| optional string target_profile = 3; | |
| } | |
| // Destroy a disk resource backed by a CSI volume. | |
| // | |
| // In the typical case where the CSI plugin of the volume supports volume | |
| // deprovisioning and the disk resource is a `MOUNT` or `BLOCK` disk with a | |
| // profile known to Mesos, the volume will be deprovisioned and a `RAW` disk | |
| // resource with the same profile but no source ID will be returned. | |
| // However, the following scenarios could lead to different outcomes: | |
| // | |
| // (1) If the CSI plugin supports volume deprovisioning but the profile of | |
| // the disk resource is unknown to the disk profile adaptor, or the disk | |
| // resource is a `RAW` disk with no profile but a source ID (see above | |
| // for possible scenarios), the volume will be deprovisioned but no | |
| // resource will be returned. | |
| // | |
| // (2) If the CSI plugin does not support volume deprovisioning, the volume | |
| // won't be deprovisioned and a `RAW` disk resource with no profile but | |
| // the same source ID will be returned. | |
| // | |
| // NOTE: For the time being, this API is subject to change and the related | |
| // feature is experimental. | |
| message DestroyDisk { | |
| // NOTE: Only a `MOUNT`, `BLOCK` or `RAW` disk is allowed in this field. | |
| required Resource source = 1; | |
| } | |
| optional Type type = 1; | |
| // The `id` field allows frameworks to indicate that they wish to receive | |
| // feedback about an operation via the UPDATE_OPERATION_STATUS event in the | |
| // v1 scheduler API. | |
| optional OperationID id = 12; // EXPERIMENTAL. | |
| optional Launch launch = 2; | |
| optional LaunchGroup launch_group = 7; | |
| optional Reserve reserve = 3; | |
| optional Unreserve unreserve = 4; | |
| optional Create create = 5; | |
| optional Destroy destroy = 6; | |
| optional GrowVolume grow_volume = 13; // EXPERIMENTAL. | |
| optional ShrinkVolume shrink_volume = 14; // EXPERIMENTAL. | |
| optional CreateDisk create_disk = 15; // EXPERIMENTAL. | |
| optional DestroyDisk destroy_disk = 16; // EXPERIMENTAL. | |
| } | |
| } | |
| /** | |
| * A request to return some resources occupied by a framework. | |
| */ | |
| message InverseOffer { | |
| // This is the same OfferID as found in normal offers, which allows | |
| // re-use of some of the OfferID-only messages. | |
| required OfferID id = 1; | |
| // URL for reaching the slave running on the host. This enables some | |
| // optimizations as described in MESOS-3012, such as allowing the | |
| // scheduler driver to bypass the master and talk directly with a slave. | |
| optional URL url = 2; | |
| // The framework that should release its resources. | |
| // If no specifics are provided (i.e. which slave), all the framework's | |
| // resources are requested back. | |
| required FrameworkID framework_id = 3; | |
| // Specified if the resources need to be released from a particular slave. | |
| // All the framework's resources on this slave are requested back, | |
| // unless further qualified by the `resources` field. | |
| optional SlaveID slave_id = 4; | |
| // This InverseOffer represents a planned unavailability event in the | |
| // specified interval. Any tasks running on the given framework or slave | |
| // may be killed when the interval arrives. Therefore, frameworks should | |
| // aim to gracefully terminate tasks prior to the arrival of the interval. | |
| // | |
| // For reserved resources, the resources are expected to be returned to the | |
| // framework after the unavailability interval. This is an expectation, | |
| // not a guarantee. For example, if the unavailability duration is not set, | |
| // the resources may be removed permanently. | |
| // | |
| // For other resources, there is no guarantee that requested resources will | |
| // be returned after the unavailability interval. The allocator has no | |
| // obligation to re-offer these resources to the prior framework after | |
| // the unavailability. | |
| required Unavailability unavailability = 5; | |
| // A list of resources being requested back from the framework, | |
| // on the slave identified by `slave_id`. If no resources are specified | |
| // then all resources are being requested back. For the purpose of | |
| // maintenance, this field is always empty (maintenance always requests | |
| // all resources back). | |
| repeated Resource resources = 6; | |
| // TODO(josephw): Add additional options for narrowing down the resources | |
| // being requested back. Such as specific executors, tasks, etc. | |
| } | |
| /** | |
| * Describes a task. Passed from the scheduler all the way to an | |
| * executor (see SchedulerDriver::launchTasks and | |
| * Executor::launchTask). Either ExecutorInfo or CommandInfo should be set. | |
| * A different executor can be used to launch this task, and subsequent tasks | |
| * meant for the same executor can reuse the same ExecutorInfo struct. | |
| */ | |
| message TaskInfo { | |
| required string name = 1; | |
| required TaskID task_id = 2; | |
| required SlaveID slave_id = 3; | |
| repeated Resource resources = 4; | |
| optional ExecutorInfo executor = 5; | |
| optional CommandInfo command = 7; | |
| // Task provided with a container will launch the container as part | |
| // of this task paired with the task's CommandInfo. | |
| optional ContainerInfo container = 9; | |
| // A health check for the task. Implemented for executor-less | |
| // command-based tasks. For tasks that specify an executor, it is | |
| // the executor's responsibility to implement the health checking. | |
| optional HealthCheck health_check = 8; | |
| // A general check for the task. Implemented for all built-in executors. | |
| // For tasks that specify an executor, it is the executor's responsibility | |
| // to implement checking support. Executors should (all built-in executors | |
| // will) neither interpret nor act on the check's result. | |
| // | |
| // NOTE: Check support in built-in executors is experimental. | |
| // | |
| // TODO(alexr): Consider supporting multiple checks per task. | |
| optional CheckInfo check = 13; | |
| // A kill policy for the task. Implemented for executor-less | |
| // command-based and docker tasks. For tasks that specify an | |
| // executor, it is the executor's responsibility to implement | |
| // the kill policy. | |
| optional KillPolicy kill_policy = 12; | |
| optional bytes data = 6; | |
| // Labels are free-form key value pairs which are exposed through | |
| // master and slave endpoints. Labels will not be interpreted or | |
| // acted upon by Mesos itself. As opposed to the data field, labels | |
| // will be kept in memory on master and slave processes. Therefore, | |
| // labels should be used to tag tasks with light-weight meta-data. | |
| // Labels should not contain duplicate key-value pairs. | |
| optional Labels labels = 10; | |
| // Service discovery information for the task. It is not interpreted | |
| // or acted upon by Mesos. It is up to a service discovery system | |
| // to use this information as needed and to handle tasks without | |
| // service discovery information. | |
| optional DiscoveryInfo discovery = 11; | |
| // Maximum duration for task completion. If the task is non-terminal at the | |
| // end of this duration, it will fail with the reason | |
| // `REASON_MAX_COMPLETION_TIME_REACHED`. Mesos supports this field for | |
| // executor-less tasks, and tasks that use Docker or default executors. | |
| // It is the executor's responsibility to implement this, so it might not be | |
| // supported by all custom executors. | |
| optional DurationInfo max_completion_time = 14; | |
| // Resource limits associated with the task. | |
| map<string, Value.Scalar> limits = 15; | |
| } | |
| /** | |
| * Describes a group of tasks that belong to an executor. The | |
| * executor will receive the task group in a single message to | |
| * allow the group to be launched "atomically". | |
| * | |
| * NOTES: | |
| * 1) `NetworkInfo` must not be set inside task's `ContainerInfo`. | |
| * 2) `TaskInfo.executor` doesn't need to set. If set, it should match | |
| * `LaunchGroup.executor`. | |
| */ | |
| message TaskGroupInfo { | |
| repeated TaskInfo tasks = 1; | |
| } | |
| // TODO(bmahler): Add executor_uuid here, and send it to the master. This will | |
| // allow us to expose executor work directories for tasks in the webui when | |
| // looking from the master level. Currently only the slave knows which run the | |
| // task belongs to. | |
| /** | |
| * Describes a task, similar to `TaskInfo`. | |
| * | |
| * `Task` is used in some of the Mesos messages found below. | |
| * `Task` is used instead of `TaskInfo` if: | |
| * 1) we need additional IDs, such as a specific | |
| * framework, executor, or agent; or | |
| * 2) we do not need the additional data, such as the command run by the | |
| * task. These additional fields may be large and unnecessary for some | |
| * Mesos messages. | |
| * | |
| * `Task` is generally constructed from a `TaskInfo`. See protobuf::createTask. | |
| */ | |
| message Task { | |
| required string name = 1; | |
| required TaskID task_id = 2; | |
| required FrameworkID framework_id = 3; | |
| optional ExecutorID executor_id = 4; | |
| required SlaveID slave_id = 5; | |
| required TaskState state = 6; // Latest state of the task. | |
| repeated Resource resources = 7; | |
| repeated TaskStatus statuses = 8; | |
| // These fields correspond to the state and uuid of the latest | |
| // status update forwarded to the master. | |
| // NOTE: Either both the fields must be set or both must be unset. | |
| optional TaskState status_update_state = 9; | |
| optional bytes status_update_uuid = 10; | |
| optional Labels labels = 11; | |
| // Service discovery information for the task. It is not interpreted | |
| // or acted upon by Mesos. It is up to a service discovery system | |
| // to use this information as needed and to handle tasks without | |
| // service discovery information. | |
| optional DiscoveryInfo discovery = 12; | |
| // Container information for the task. | |
| optional ContainerInfo container = 13; | |
| optional HealthCheck health_check = 15; | |
| // TODO(greggomann): Add the task's `CheckInfo`. See MESOS-8780. | |
| // The kill policy used for this task when it is killed. It's possible for | |
| // this policy to be overridden by the scheduler when killing the task. | |
| optional KillPolicy kill_policy = 16; | |
| // Specific user under which task is running. | |
| optional string user = 14; | |
| // Resource limits associated with the task. | |
| map<string, Value.Scalar> limits = 17; | |
| } | |
| /** | |
| * Describes possible task states. IMPORTANT: Mesos assumes tasks that | |
| * enter terminal states (see below) imply the task is no longer | |
| * running and thus clean up any thing associated with the task | |
| * (ultimately offering any resources being consumed by that task to | |
| * another task). | |
| */ | |
| enum TaskState { | |
| TASK_STAGING = 6; // Initial state. Framework status updates should not use. | |
| TASK_STARTING = 0; // The task is being launched by the executor. | |
| TASK_RUNNING = 1; | |
| // NOTE: This should only be sent when the framework has | |
| // the TASK_KILLING_STATE capability. | |
| TASK_KILLING = 8; // The task is being killed by the executor. | |
| // The task finished successfully on its own without external interference. | |
| TASK_FINISHED = 2; // TERMINAL. | |
| TASK_FAILED = 3; // TERMINAL: The task failed to finish successfully. | |
| TASK_KILLED = 4; // TERMINAL: The task was killed by the executor. | |
| TASK_ERROR = 7; // TERMINAL: The task description contains an error. | |
| // In Mesos 1.3, this will only be sent when the framework does NOT | |
| // opt-in to the PARTITION_AWARE capability. | |
| // | |
| // NOTE: This state is not always terminal. For example, tasks might | |
| // transition from TASK_LOST to TASK_RUNNING or other states when a | |
| // partitioned agent reregisters. | |
| TASK_LOST = 5; // The task failed but can be rescheduled. | |
| // The following task states are only sent when the framework | |
| // opts-in to the PARTITION_AWARE capability. | |
| // The task failed to launch because of a transient error. The | |
| // task's executor never started running. Unlike TASK_ERROR, the | |
| // task description is valid -- attempting to launch the task again | |
| // may be successful. | |
| TASK_DROPPED = 9; // TERMINAL. | |
| // The task was running on an agent that has lost contact with the | |
| // master, typically due to a network failure or partition. The task | |
| // may or may not still be running. | |
| TASK_UNREACHABLE = 10; | |
| // The task is no longer running. This can occur if the agent has | |
| // been terminated along with all of its tasks (e.g., the host that | |
| // was running the agent was rebooted). It might also occur if the | |
| // task was terminated due to an agent or containerizer error, or if | |
| // the task was preempted by the QoS controller in an | |
| // oversubscription scenario. | |
| TASK_GONE = 11; // TERMINAL. | |
| // The task was running on an agent that the master cannot contact; | |
| // the operator has asserted that the agent has been shutdown, but | |
| // this has not been directly confirmed by the master. If the | |
| // operator is correct, the task is not running and this is a | |
| // terminal state; if the operator is mistaken, the task may still | |
| // be running and might return to RUNNING in the future. | |
| TASK_GONE_BY_OPERATOR = 12; | |
| // The master has no knowledge of the task. This is typically | |
| // because either (a) the master never had knowledge of the task, or | |
| // (b) the master forgot about the task because it garbage collected | |
| // its metadata about the task. The task may or may not still be | |
| // running. | |
| TASK_UNKNOWN = 13; | |
| } | |
| /** | |
| * Describes a resource limitation that caused a task failure. | |
| */ | |
| message TaskResourceLimitation { | |
| // This field contains the resource whose limits were violated. | |
| // | |
| // NOTE: 'Resources' is used here because the resource may span | |
| // multiple roles (e.g. `"mem(*):1;mem(role):2"`). | |
| repeated Resource resources = 1; | |
| } | |
| /** | |
| * A 128 bit (16 byte) UUID, see RFC 4122. | |
| */ | |
| message UUID { | |
| required bytes value = 1; | |
| } | |
| /** | |
| * Describes an operation, similar to `Offer.Operation`, with | |
| * some additional information. | |
| */ | |
| message Operation { | |
| optional FrameworkID framework_id = 1; | |
| optional SlaveID slave_id = 2; | |
| required Offer.Operation info = 3; | |
| required OperationStatus latest_status = 4; | |
| // All the statuses known to this operation. Some of the statuses in this | |
| // list might not have been acknowledged yet. The statuses are ordered. | |
| repeated OperationStatus statuses = 5; | |
| // This is the internal UUID for the operation, which is kept independently | |
| // from the framework-specified operation ID, which is optional. | |
| required UUID uuid = 6; | |
| } | |
| /** | |
| * Describes possible operation states. | |
| */ | |
| enum OperationState { | |
| // Default value if the enum is not set. See MESOS-4997. | |
| OPERATION_UNSUPPORTED = 0; | |
| // Initial state. | |
| OPERATION_PENDING = 1; | |
| // TERMINAL: The operation was successfully applied. | |
| OPERATION_FINISHED = 2; | |
| // TERMINAL: The operation failed to apply. | |
| OPERATION_FAILED = 3; | |
| // TERMINAL: The operation description contains an error. | |
| OPERATION_ERROR = 4; | |
| // TERMINAL: The operation was dropped due to a transient error. | |
| OPERATION_DROPPED = 5; | |
| // The operation affects an agent that has lost contact with the master, | |
| // typically due to a network failure or partition. The operation may or may | |
| // not still be pending. | |
| OPERATION_UNREACHABLE = 6; | |
| // The operation affected an agent that the master cannot contact; | |
| // the operator has asserted that the agent has been shutdown, but this has | |
| // not been directly confirmed by the master. | |
| // | |
| // If the operator is correct, the operation is not pending and this is a | |
| // terminal state; if the operator is mistaken, the operation may still be | |
| // pending and might return to a different state in the future. | |
| OPERATION_GONE_BY_OPERATOR = 7; | |
| // The operation affects an agent that the master recovered from its | |
| // state, but that agent has not yet re-registered. | |
| // | |
| // The operation can transition to `OPERATION_UNREACHABLE` if the | |
| // corresponding agent is marked as unreachable, and will transition to | |
| // another status if the agent re-registers. | |
| OPERATION_RECOVERING = 8; | |
| // The master has no knowledge of the operation. This is typically | |
| // because either (a) the master never had knowledge of the operation, or | |
| // (b) the master forgot about the operation because it garbage collected | |
| // its metadata about the operation. The operation may or may not still be | |
| // pending. | |
| OPERATION_UNKNOWN = 9; | |
| } | |
| /** | |
| * Describes the current status of an operation. | |
| */ | |
| message OperationStatus { | |
| // While frameworks will only receive status updates for operations on which | |
| // they have set an ID, this field is optional because this message is also | |
| // used internally by Mesos components when the operation's ID has not been | |
| // set. | |
| optional OperationID operation_id = 1; | |
| required OperationState state = 2; | |
| optional string message = 3; | |
| // Converted resources after applying the operation. This only | |
| // applies if the `state` is `OPERATION_FINISHED`. | |
| repeated Resource converted_resources = 4; | |
| // Statuses that are delivered reliably to the scheduler will | |
| // include a `uuid`. The status is considered delivered once | |
| // it is acknowledged by the scheduler. | |
| optional UUID uuid = 5; | |
| // If the operation affects resources from a local resource provider, | |
| // both `slave_id` and `resource_provider_id` will be set. | |
| // | |
| // If the operation affects resources that belong to an external | |
| // resource provider, only `resource_provider_id` will be set. | |
| // | |
| // In certain cases, e.g., invalid operations, neither `uuid`, | |
| // `slave_id` nor `resource_provider_id` will be set, and the | |
| // scheduler does not need to acknowledge this status update. | |
| optional SlaveID slave_id = 6; | |
| optional ResourceProviderID resource_provider_id = 7; | |
| } | |
| /** | |
| * Describes the status of a check. Type and the corresponding field, i.e., | |
| * `command` or `http` must be set. If the result of the check is not available | |
| * (e.g., the check timed out), these fields must contain empty messages, i.e., | |
| * `exit_code` or `status_code` will be unset. | |
| * | |
| * NOTE: This API is subject to change and the related feature is experimental. | |
| */ | |
| message CheckStatusInfo { | |
| message Command { | |
| // Exit code of a command check. It is the result of calling | |
| // `WEXITSTATUS()` on `waitpid()` termination information on | |
| // Posix and calling `GetExitCodeProcess()` on Windows. | |
| optional int32 exit_code = 1; | |
| } | |
| message Http { | |
| // HTTP status code of an HTTP check. | |
| optional uint32 status_code = 1; | |
| } | |
| message Tcp { | |
| // Whether a TCP connection succeeded. | |
| optional bool succeeded = 1; | |
| } | |
| // TODO(alexr): Consider adding a `data` field, which can contain, e.g., | |
| // truncated stdout/stderr output for command checks or HTTP response body | |
| // for HTTP checks. Alternatively, it can be an even shorter `message` field | |
| // containing the last line of stdout or Reason-Phrase of the status line of | |
| // the HTTP response. | |
| // The type of the check this status corresponds to. | |
| optional CheckInfo.Type type = 1; | |
| // Status of a command check. | |
| optional Command command = 2; | |
| // Status of an HTTP check. | |
| optional Http http = 3; | |
| // Status of a TCP check. | |
| optional Tcp tcp = 4; | |
| // TODO(alexr): Consider introducing a "last changed at" timestamp, since | |
| // task status update's timestamp may not correspond to the last check's | |
| // state, e.g., for reconciliation. | |
| // TODO(alexr): Consider introducing a `reason` enum here to explicitly | |
| // distinguish between completed, delayed, and timed out checks. | |
| } | |
| /** | |
| * Describes the current status of a task. | |
| */ | |
| message TaskStatus { | |
| // Describes the source of the task status update. | |
| enum Source { | |
| SOURCE_MASTER = 0; | |
| SOURCE_SLAVE = 1; | |
| SOURCE_EXECUTOR = 2; | |
| } | |
| // Detailed reason for the task status update. | |
| // Refer to docs/task-state-reasons.md for additional explanation. | |
| enum Reason { | |
| // TODO(jieyu): The default value when a caller doesn't check for | |
| // presence is 0 and so ideally the 0 reason is not a valid one. | |
| // Since this is not used anywhere, consider removing this reason. | |
| REASON_COMMAND_EXECUTOR_FAILED = 0; | |
| REASON_CONTAINER_LAUNCH_FAILED = 21; | |
| REASON_CONTAINER_LIMITATION = 19; | |
| REASON_CONTAINER_LIMITATION_DISK = 20; | |
| REASON_CONTAINER_LIMITATION_MEMORY = 8; | |
| REASON_CONTAINER_PREEMPTED = 17; | |
| REASON_CONTAINER_UPDATE_FAILED = 22; | |
| REASON_MAX_COMPLETION_TIME_REACHED = 33; | |
| REASON_EXECUTOR_REGISTRATION_TIMEOUT = 23; | |
| REASON_EXECUTOR_REREGISTRATION_TIMEOUT = 24; | |
| REASON_EXECUTOR_TERMINATED = 1; | |
| REASON_EXECUTOR_UNREGISTERED = 2; // No longer used. | |
| REASON_FRAMEWORK_REMOVED = 3; | |
| REASON_GC_ERROR = 4; | |
| REASON_INVALID_FRAMEWORKID = 5; | |
| REASON_INVALID_OFFERS = 6; | |
| REASON_IO_SWITCHBOARD_EXITED = 27; | |
| REASON_MASTER_DISCONNECTED = 7; | |
| REASON_RECONCILIATION = 9; | |
| REASON_RESOURCES_UNKNOWN = 18; | |
| REASON_SLAVE_DISCONNECTED = 10; | |
| REASON_SLAVE_DRAINING = 34; | |
| REASON_SLAVE_REMOVED = 11; | |
| REASON_SLAVE_REMOVED_BY_OPERATOR = 31; | |
| REASON_SLAVE_REREGISTERED = 32; | |
| REASON_SLAVE_RESTARTED = 12; | |
| REASON_SLAVE_UNKNOWN = 13; | |
| REASON_TASK_KILLED_DURING_LAUNCH = 30; | |
| REASON_TASK_CHECK_STATUS_UPDATED = 28; | |
| REASON_TASK_HEALTH_CHECK_STATUS_UPDATED = 29; | |
| REASON_TASK_GROUP_INVALID = 25; | |
| REASON_TASK_GROUP_UNAUTHORIZED = 26; | |
| REASON_TASK_INVALID = 14; | |
| REASON_TASK_UNAUTHORIZED = 15; | |
| REASON_TASK_UNKNOWN = 16; | |
| } | |
| required TaskID task_id = 1; | |
| required TaskState state = 2; | |
| optional string message = 4; // Possible message explaining state. | |
| optional Source source = 9; | |
| optional Reason reason = 10; | |
| optional bytes data = 3; | |
| optional SlaveID slave_id = 5; | |
| optional ExecutorID executor_id = 7; // TODO(benh): Use in master/slave. | |
| optional double timestamp = 6; | |
| // Statuses that are delivered reliably to the scheduler will | |
| // include a 'uuid'. The status is considered delivered once | |
| // it is acknowledged by the scheduler. Schedulers can choose | |
| // to either explicitly acknowledge statuses or let the scheduler | |
| // driver implicitly acknowledge (default). | |
| // | |
| // TODO(bmahler): This is currently overwritten in the scheduler | |
| // driver and executor driver, but executors will need to set this | |
| // to a valid RFC-4122 UUID if using the HTTP API. | |
| optional bytes uuid = 11; | |
| // Describes whether the task has been determined to be healthy (true) or | |
| // unhealthy (false) according to the `health_check` field in `TaskInfo`. | |
| optional bool healthy = 8; | |
| // Contains check status for the check specified in the corresponding | |
| // `TaskInfo`. If no check has been specified, this field must be | |
| // absent, otherwise it must be present even if the check status is | |
| // not available yet. If the status update is triggered for a different | |
| // reason than `REASON_TASK_CHECK_STATUS_UPDATED`, this field will contain | |
| // the last known value. | |
| // | |
| // NOTE: A check-related task status update is triggered if and only if | |
| // the value or presence of any field in `CheckStatusInfo` changes. | |
| // | |
| // NOTE: Check support in built-in executors is experimental. | |
| optional CheckStatusInfo check_status = 15; | |
| // Labels are free-form key value pairs which are exposed through | |
| // master and slave endpoints. Labels will not be interpreted or | |
| // acted upon by Mesos itself. As opposed to the data field, labels | |
| // will be kept in memory on master and slave processes. Therefore, | |
| // labels should be used to tag TaskStatus message with light-weight | |
| // meta-data. Labels should not contain duplicate key-value pairs. | |
| optional Labels labels = 12; | |
| // Container related information that is resolved dynamically such as | |
| // network address. | |
| optional ContainerStatus container_status = 13; | |
| // The time (according to the master's clock) when the agent where | |
| // this task was running became unreachable. This is only set on | |
| // status updates for tasks running on agents that are unreachable | |
| // (e.g., partitioned away from the master). | |
| optional TimeInfo unreachable_time = 14; | |
| // If the reason field indicates a container resource limitation, | |
| // this field optionally contains additional information. | |
| optional TaskResourceLimitation limitation = 16; | |
| } | |
| /** | |
| * Describes possible filters that can be applied to unused resources | |
| * (see SchedulerDriver::launchTasks) to influence the allocator. | |
| */ | |
| message Filters { | |
| // Time to consider unused resources refused. Note that all unused | |
| // resources will be considered refused and use the default value | |
| // (below) regardless of whether Filters was passed to | |
| // SchedulerDriver::launchTasks. You MUST pass Filters with this | |
| // field set to change this behavior (i.e., get another offer which | |
| // includes unused resources sooner or later than the default). | |
| // | |
| // If this field is set to a number of seconds greater than 31536000 | |
| // (365 days), then the resources will be considered refused for 365 | |
| // days. If it is set to a negative number, then the default value | |
| // will be used. | |
| optional double refuse_seconds = 1 [default = 5.0]; | |
| } | |
| /** | |
| * Describes a collection of environment variables. This is used with | |
| * CommandInfo in order to set environment variables before running a | |
| * command. The contents of each variable may be specified as a string | |
| * or a Secret; only one of `value` and `secret` must be set. | |
| */ | |
| message Environment { | |
| message Variable { | |
| required string name = 1; | |
| enum Type { | |
| UNKNOWN = 0; | |
| VALUE = 1; | |
| SECRET = 2; | |
| } | |
| // In Mesos 1.2, the `Environment.variables.value` message was made | |
| // optional. The default type for `Environment.variables.type` is now VALUE, | |
| // which requires `value` to be set, maintaining backward compatibility. | |
| // | |
| // TODO(greggomann): The default can be removed in Mesos 2.1 (MESOS-7134). | |
| optional Type type = 3 [default = VALUE]; | |
| // Only one of `value` and `secret` must be set. | |
| optional string value = 2; | |
| optional Secret secret = 4; | |
| } | |
| repeated Variable variables = 1; | |
| } | |
| /** | |
| * A generic (key, value) pair used in various places for parameters. | |
| */ | |
| message Parameter { | |
| required string key = 1; | |
| required string value = 2; | |
| } | |
| /** | |
| * Collection of Parameter. | |
| */ | |
| message Parameters { | |
| repeated Parameter parameter = 1; | |
| } | |
| /** | |
| * Credential used in various places for authentication and | |
| * authorization. | |
| * | |
| * NOTE: A 'principal' is different from 'FrameworkInfo.user'. The | |
| * former is used for authentication and authorization while the | |
| * latter is used to determine the default user under which the | |
| * framework's executors/tasks are run. | |
| */ | |
| message Credential { | |
| required string principal = 1; | |
| optional string secret = 2; | |
| } | |
| /** | |
| * Credentials used for framework authentication, HTTP authentication | |
| * (where the common 'username' and 'password' are captured as | |
| * 'principal' and 'secret' respectively), etc. | |
| */ | |
| message Credentials { | |
| repeated Credential credentials = 1; | |
| } | |
| /** | |
| * Secret used to pass privileged information. It is designed to provide | |
| * pass-by-value or pass-by-reference semantics, where the REFERENCE type can be | |
| * used by custom modules which interact with a secure back-end. | |
| */ | |
| message Secret | |
| { | |
| enum Type { | |
| UNKNOWN = 0; | |
| REFERENCE = 1; | |
| VALUE = 2; | |
| } | |
| // Can be used by modules to refer to a secret stored in a secure back-end. | |
| // The `key` field is provided to permit reference to a single value within a | |
| // secret containing arbitrary key-value pairs. | |
| // | |
| // For example, given a back-end secret store with a secret named | |
| // "my-secret" containing the following key-value pairs: | |
| // | |
| // { | |
| // "username": "my-user", | |
| // "password": "my-password | |
| // } | |
| // | |
| // the username could be referred to in a `Secret` by specifying | |
| // "my-secret" for the `name` and "username" for the `key`. | |
| message Reference | |
| { | |
| required string name = 1; | |
| optional string key = 2; | |
| } | |
| // Used to pass the value of a secret. | |
| message Value | |
| { | |
| required bytes data = 1; | |
| } | |
| optional Type type = 1; | |
| // Only one of `reference` and `value` must be set. | |
| optional Reference reference = 2; | |
| optional Value value = 3; | |
| } | |
| /** | |
| * Rate (queries per second, QPS) limit for messages from a framework to master. | |
| * Strictly speaking they are the combined rate from all frameworks of the same | |
| * principal. | |
| */ | |
| message RateLimit { | |
| // Leaving QPS unset gives it unlimited rate (i.e., not throttled), | |
| // which also implies unlimited capacity. | |
| optional double qps = 1; | |
| // Principal of framework(s) to be throttled. Should match | |
| // FrameworkInfo.principal and Credential.principal (if using authentication). | |
| required string principal = 2; | |
| // Max number of outstanding messages from frameworks of this principal | |
| // allowed by master before the next message is dropped and an error is sent | |
| // back to the sender. Messages received before the capacity is reached are | |
| // still going to be processed after the error is sent. | |
| // If unspecified, this principal is assigned unlimited capacity. | |
| // NOTE: This value is ignored if 'qps' is not set. | |
| optional uint64 capacity = 3; | |
| } | |
| /** | |
| * Collection of RateLimit. | |
| * Frameworks without rate limits defined here are not throttled unless | |
| * 'aggregate_default_qps' is specified. | |
| */ | |
| message RateLimits { | |
| // Items should have unique principals. | |
| repeated RateLimit limits = 1; | |
| // All the frameworks not specified in 'limits' get this default rate. | |
| // This rate is an aggregate rate for all of them, i.e., their combined | |
| // traffic is throttled together at this rate. | |
| optional double aggregate_default_qps = 2; | |
| // All the frameworks not specified in 'limits' get this default capacity. | |
| // This is an aggregate value similar to 'aggregate_default_qps'. | |
| optional uint64 aggregate_default_capacity = 3; | |
| } | |
| /** | |
| * Describe an image used by tasks or executors. Note that it's only | |
| * for tasks or executors launched by MesosContainerizer currently. | |
| */ | |
| message Image { | |
| enum Type { | |
| APPC = 1; | |
| DOCKER = 2; | |
| } | |
| // Protobuf for specifying an Appc container image. See: | |
| // https://github.com/appc/spec/blob/master/spec/aci.md | |
| message Appc { | |
| // The name of the image. | |
| required string name = 1; | |
| // An image ID is a string of the format "hash-value", where | |
| // "hash" is the hash algorithm used and "value" is the hex | |
| // encoded string of the digest. Currently the only permitted | |
| // hash algorithm is sha512. | |
| optional string id = 2; | |
| // Optional labels. Suggested labels: "version", "os", and "arch". | |
| optional Labels labels = 3; | |
| } | |
| message Docker { | |
| // The name of the image. Expected format: | |
| // [REGISTRY_HOST[:REGISTRY_PORT]/]REPOSITORY[:TAG|@TYPE:DIGEST] | |
| // | |
| // See: https://docs.docker.com/reference/commandline/pull/ | |
| required string name = 1; | |
| // Credential to authenticate with docker registry. | |
| // NOTE: This is not encrypted, therefore framework and operators | |
| // should enable SSL when passing this information. | |
| // | |
| // This field has never been used in Mesos before and is | |
| // deprecated since Mesos 1.3. Please use `config` below | |
| // (see MESOS-7088 for details). | |
| optional Credential credential = 2 [deprecated = true]; // Since 1.3. | |
| // Docker config containing credentials to authenticate with | |
| // docker registry. The secret is expected to be a docker | |
| // config file in JSON format with UTF-8 character encoding. | |
| optional Secret config = 3; | |
| } | |
| required Type type = 1; | |
| // Only one of the following image messages should be set to match | |
| // the type. | |
| optional Appc appc = 2; | |
| optional Docker docker = 3; | |
| // With this flag set to false, the mesos containerizer will pull | |
| // the docker/appc image from the registry even if the image is | |
| // already downloaded on the agent. | |
| optional bool cached = 4 [default = true]; | |
| } | |
| /** | |
| * Describes how the mount will be propagated for a volume. See the | |
| * following doc for more details about mount propagation: | |
| * https://www.kernel.org/doc/Documentation/filesystems/sharedsubtree.txt | |
| */ | |
| message MountPropagation { | |
| enum Mode { | |
| UNKNOWN = 0; | |
| // The volume in a container will receive new mounts from the host | |
| // or other containers, but filesystems mounted inside the | |
| // container won't be propagated to the host or other containers. | |
| // This is currently the default behavior for all volumes. | |
| HOST_TO_CONTAINER = 1; | |
| // The volume in a container will receive new mounts from the host | |
| // or other containers, and its own mounts will be propagated from | |
| // the container to the host or other containers. | |
| BIDIRECTIONAL = 2; | |
| } | |
| optional Mode mode = 1; | |
| } | |
| /** | |
| * Describes a volume mapping either from host to container or vice | |
| * versa. Both paths can either refer to a directory or a file. | |
| */ | |
| message Volume { | |
| enum Mode { | |
| RW = 1; // read-write. | |
| RO = 2; // read-only. | |
| } | |
| // TODO(gyliu513): Make this as `optional` after deprecation cycle of 1.0. | |
| required Mode mode = 3; | |
| // Path pointing to a directory or file in the container. If the | |
| // path is a relative path, it is relative to the container work | |
| // directory. If the path is an absolute path, that path must | |
| // already exist. | |
| required string container_path = 1; | |
| // The following specifies the source of this volume. At most one of | |
| // the following should be set. | |
| // Absolute path pointing to a directory or file on the host or a | |
| // path relative to the container work directory. | |
| optional string host_path = 2; | |
| // The source of the volume is an Image which describes a root | |
| // filesystem which will be provisioned by Mesos. | |
| optional Image image = 4; | |
| // Describes where a volume originates from. | |
| message Source { | |
| enum Type { | |
| // This must be the first enum value in this list, to | |
| // ensure that if 'type' is not set, the default value | |
| // is UNKNOWN. This enables enum values to be added | |
| // in a backwards-compatible way. See: MESOS-4997. | |
| UNKNOWN = 0; | |
| // TODO(gyliu513): Add IMAGE as volume source type. | |
| DOCKER_VOLUME = 1; | |
| HOST_PATH = 4; | |
| SANDBOX_PATH = 2; | |
| SECRET = 3; | |
| } | |
| message DockerVolume { | |
| // Driver of the volume, it can be flocker, convoy, raxrey etc. | |
| optional string driver = 1; | |
| // Name of the volume. | |
| required string name = 2; | |
| // Volume driver specific options. | |
| optional Parameters driver_options = 3; | |
| } | |
| // Absolute path pointing to a directory or file on the host. | |
| message HostPath { | |
| required string path = 1; | |
| optional MountPropagation mount_propagation = 2; | |
| } | |
| // Describe a path from a container's sandbox. The container can | |
| // be the current container (SELF), or its parent container | |
| // (PARENT). PARENT allows all child containers to share a volume | |
| // from their parent container's sandbox. It'll be an error if | |
| // the current container is a top level container. | |
| message SandboxPath { | |
| enum Type { | |
| UNKNOWN = 0; | |
| SELF = 1; | |
| PARENT = 2; | |
| } | |
| optional Type type = 1; | |
| // A path relative to the corresponding container's sandbox. | |
| // Note that upwards traversal (i.e. ../../abc) is not allowed. | |
| required string path = 2; | |
| } | |
| // Enum fields should be optional, see: MESOS-4997. | |
| optional Type type = 1; | |
| // The following specifies the source of this volume. At most one of | |
| // the following should be set. | |
| // The source of the volume created by docker volume driver. | |
| optional DockerVolume docker_volume = 2; | |
| optional HostPath host_path = 5; | |
| optional SandboxPath sandbox_path = 3; | |
| // The volume/secret isolator uses the secret-fetcher module (third-party or | |
| // internal) downloads the secret and makes it available at container_path. | |
| optional Secret secret = 4; | |
| } | |
| optional Source source = 5; | |
| } | |
| /** | |
| * Describes a network request from a framework as well as network resolution | |
| * provided by Mesos. | |
| * | |
| * A framework may request the network isolator on the Agent to isolate the | |
| * container in a network namespace and create a virtual network interface. | |
| * The `NetworkInfo` message describes the properties of that virtual | |
| * interface, including the IP addresses and network isolation policy | |
| * (network group membership). | |
| * | |
| * The NetworkInfo message is not interpreted by the Master or Agent and is | |
| * intended to be used by Agent and Master modules implementing network | |
| * isolation. If the modules are missing, the message is simply ignored. In | |
| * future, the task launch will fail if there is no module providing the | |
| * network isolation capabilities (MESOS-3390). | |
| * | |
| * An executor, Agent, or an Agent module may append NetworkInfos inside | |
| * TaskStatus::container_status to provide information such as the container IP | |
| * address and isolation groups. | |
| */ | |
| message NetworkInfo { | |
| enum Protocol { | |
| IPv4 = 1; | |
| IPv6 = 2; | |
| } | |
| // Specifies a request for an IP address, or reports the assigned container | |
| // IP address. | |
| // | |
| // Users can request an automatically assigned IP (for example, via an | |
| // IPAM service) or a specific IP by adding a NetworkInfo to the | |
| // ContainerInfo for a task. On a request, specifying neither `protocol` | |
| // nor `ip_address` means that any available address may be assigned. | |
| message IPAddress { | |
| // Specify IP address requirement. Set protocol to the desired value to | |
| // request the network isolator on the Agent to assign an IP address to the | |
| // container being launched. If a specific IP address is specified in | |
| // ip_address, this field should not be set. | |
| optional Protocol protocol = 1 [default = IPv4]; | |
| // Statically assigned IP provided by the Framework. This IP will be | |
| // assigned to the container by the network isolator module on the Agent. | |
| // This field should not be used with the protocol field above. | |
| // | |
| // If an explicit address is requested but is unavailable, the network | |
| // isolator should fail the task. | |
| optional string ip_address = 2; | |
| } | |
| // When included in a ContainerInfo, each of these represent a | |
| // request for an IP address. Each request can specify an explicit address | |
| // or the IP protocol to use. | |
| // | |
| // When included in a TaskStatus message, these inform the framework | |
| // scheduler about the IP addresses that are bound to the container | |
| // interface. When there are no custom network isolator modules installed, | |
| // this field is filled in automatically with the Agent IP address. | |
| repeated IPAddress ip_addresses = 5; | |
| // Name of the network which will be used by network isolator to determine | |
| // the network that the container joins. It's up to the network isolator | |
| // to decide how to interpret this field. | |
| optional string name = 6; | |
| // A group is the name given to a set of logically-related interfaces that | |
| // are allowed to communicate among themselves. Network traffic is allowed | |
| // between two container interfaces that share at least one network group. | |
| // For example, one might want to create separate groups for isolating dev, | |
| // testing, qa and prod deployment environments. | |
| repeated string groups = 3; | |
| // To tag certain metadata to be used by Isolator/IPAM, e.g., rack, etc. | |
| optional Labels labels = 4; | |
| // Specifies a port mapping request for the task on this network. | |
| message PortMapping { | |
| required uint32 host_port = 1; | |
| required uint32 container_port = 2; | |
| // Protocol to expose as (ie: tcp, udp). | |
| optional string protocol = 3; | |
| } | |
| repeated PortMapping port_mappings = 7; | |
| }; | |
| /** | |
| * Encapsulation of `Capabilities` supported by Linux. | |
| * Reference: http://linux.die.net/man/7/capabilities. | |
| */ | |
| message CapabilityInfo { | |
| // We start the actual values at an offset(1000) because Protobuf 2 | |
| // uses the first value as the default one. Separating the default | |
| // value from the real first value helps to disambiguate them. This | |
| // is especially valuable for backward compatibility. | |
| // See: MESOS-4997. | |
| enum Capability { | |
| UNKNOWN = 0; | |
| CHOWN = 1000; | |
| DAC_OVERRIDE = 1001; | |
| DAC_READ_SEARCH = 1002; | |
| FOWNER = 1003; | |
| FSETID = 1004; | |
| KILL = 1005; | |
| SETGID = 1006; | |
| SETUID = 1007; | |
| SETPCAP = 1008; | |
| LINUX_IMMUTABLE = 1009; | |
| NET_BIND_SERVICE = 1010; | |
| NET_BROADCAST = 1011; | |
| NET_ADMIN = 1012; | |
| NET_RAW = 1013; | |
| IPC_LOCK = 1014; | |
| IPC_OWNER = 1015; | |
| SYS_MODULE = 1016; | |
| SYS_RAWIO = 1017; | |
| SYS_CHROOT = 1018; | |
| SYS_PTRACE = 1019; | |
| SYS_PACCT = 1020; | |
| SYS_ADMIN = 1021; | |
| SYS_BOOT = 1022; | |
| SYS_NICE = 1023; | |
| SYS_RESOURCE = 1024; | |
| SYS_TIME = 1025; | |
| SYS_TTY_CONFIG = 1026; | |
| MKNOD = 1027; | |
| LEASE = 1028; | |
| AUDIT_WRITE = 1029; | |
| AUDIT_CONTROL = 1030; | |
| SETFCAP = 1031; | |
| MAC_OVERRIDE = 1032; | |
| MAC_ADMIN = 1033; | |
| SYSLOG = 1034; | |
| WAKE_ALARM = 1035; | |
| BLOCK_SUSPEND = 1036; | |
| AUDIT_READ = 1037; | |
| } | |
| repeated Capability capabilities = 1; | |
| } | |
| /** | |
| * Encapsulation for Seccomp configuration, which is Linux specific. | |
| */ | |
| message SeccompInfo { | |
| // A filename of the Seccomp profile. This should be a path | |
| // relative to the directory containing Seccomp profiles, | |
| // which is specified on the agent via the `--seccomp_config_dir` flag. | |
| optional string profile_name = 1; | |
| // If set to `true`, Seccomp is not applied to the container. | |
| // If not set or set to `false`, the container is launched with | |
| // the profile specified in the `profile_name` field. | |
| // | |
| // NOTE: `profile_name` must not be specified if `unconfined` set to `true`. | |
| // `profile_name` must be specified if `unconfined` is not set or | |
| // is set to `false`. | |
| optional bool unconfined = 2; | |
| } | |
| /** | |
| * Encapsulation for Linux specific configuration. | |
| * E.g, capabilities, limits etc. | |
| */ | |
| message LinuxInfo { | |
| // Since 1.4.0, deprecated in favor of `effective_capabilities`. | |
| optional CapabilityInfo capability_info = 1 [deprecated = true]; | |
| // The set of capabilities that are allowed but not initially | |
| // granted to tasks. | |
| optional CapabilityInfo bounding_capabilities = 2; | |
| // Represents the set of capabilities that the task will | |
| // be executed with. | |
| optional CapabilityInfo effective_capabilities = 3; | |
| // If set as 'true', the container shares the pid namespace with | |
| // its parent. If the container is a top level container, it will | |
| // share the pid namespace with the agent. If the container is a | |
| // nested container, it will share the pid namespace with its | |
| // parent container. This field will be ignored if 'namespaces/pid' | |
| // isolator is not enabled. | |
| optional bool share_pid_namespace = 4; | |
| // Represents Seccomp configuration, which is used for syscall filtering. | |
| // This field is used to override the agent's default Seccomp configuration. | |
| optional SeccompInfo seccomp = 5; | |
| enum IpcMode { | |
| UNKNOWN = 0; | |
| // The container will have its own IPC namespace and /dev/shm, with a | |
| // possibility to share them with its child containers. | |
| PRIVATE = 1; | |
| // The container will share the IPC namespace and /dev/shm from its | |
| // parent. If the container is a top level container, it will share | |
| // the IPC namespace and /dev/shm from the agent host, if the container | |
| // is a nested container, it will share the IPC namespace and /dev/shm | |
| // from its parent container. The implication is if a nested container | |
| // wants to share the IPC namespace and /dev/shm from the agent host, | |
| // its parent container has to do it first. | |
| SHARE_PARENT = 2; | |
| } | |
| // There are two special cases that we need to handle for this field: | |
| // 1. This field is not set: For backward compatibility we will keep the | |
| // previous behavior: Top level container will have its own IPC namespace | |
| // and nested container will share the IPC namespace from its parent | |
| // container. If the container does not have its own rootfs, it will share | |
| // agent's /dev/shm, otherwise it will have its own /dev/shm. | |
| // 2. The `namespaces/ipc` isolator is not enabled: This field will be ignored | |
| // in this case. For backward compatibility, in the `filesystem/linux` | |
| // isolator we will keep the previous behavior: Any containers will share | |
| // IPC namespace from agent, and if the container does not have its own | |
| // rootfs, it will also share agent's /dev/shm, otherwise it will have its | |
| // own /dev/shm. | |
| // | |
| // TODO(qianzhang): Remove the support for the above two cases after the | |
| // deprecation cycle (started in 1.9). Eventually we want a single isolator | |
| // (`namespaces/ipc`) to handle both IPC namespace and /dev/shm, and decouple | |
| // /dev/shm from container's rootfs (i.e., whether a container will have its | |
| // own /dev/shm depends on its `ipc_mode` instead of whether the container | |
| // has its own rootfs). | |
| optional IpcMode ipc_mode = 6; | |
| // Size of /dev/shm in MB. If not set, the size of the /dev/shm for container | |
| // will be value of the `--default_container_shm_size` agent flag, if that | |
| // flag is not set too, the size of the /dev/shm will be half of the host RAM | |
| // which is the default behavior of Linux. This field will be ignored for the | |
| // container which shares /dev/shm from its parent and it will be also ignored | |
| // for any containers if the `namespaces/ipc` isolator is not enabled. Please | |
| // note that we only support setting this field when the `ipc_mode` field is | |
| // set to `PRIVATE` otherwise the container launch will be rejected. | |
| optional uint32 shm_size = 7; | |
| // If set as 'true', the container will share the cgroups from its parent | |
| // container, otherwise it will have its own cgroups created. Please note: | |
| // 1. For tasks in a task group launched via the LAUNCH_GROUP operation, | |
| // this field may be set to 'true' or 'false'. Resource limits may only be | |
| // set for tasks in a task group when this field is set to 'false'. | |
| // 2. For tasks launched via the LAUNCH operation, this field may only be set | |
| // to 'true', and in this case resource limits may be set on these tasks. | |
| // 3. For containers launched via the agent's LAUNCH_NESTED_CONTAINER_SESSION | |
| // call, this field must be set to 'true'. | |
| // 4. For executor containers, this field may only be set to 'false'. | |
| // 5. All tasks under a single executor must share the same value of this | |
| // field, if it is set. Note that this means that all tasks within a single | |
| // task group must set this field to the same value. | |
| optional bool share_cgroups = 8 [default = true]; | |
| } | |
| /** | |
| * Encapsulation for POSIX rlimits, see | |
| * http://pubs.opengroup.org/onlinepubs/009695399/functions/getrlimit.html. | |
| * Note that some types might only be defined for Linux. | |
| * We use a custom prefix to avoid conflict with existing system macros | |
| * (e.g., `RLIMIT_CPU` or `NOFILE`). | |
| */ | |
| message RLimitInfo { | |
| message RLimit { | |
| enum Type { | |
| UNKNOWN = 0; | |
| RLMT_AS = 1; | |
| RLMT_CORE = 2; | |
| RLMT_CPU = 3; | |
| RLMT_DATA = 4; | |
| RLMT_FSIZE = 5; | |
| RLMT_LOCKS = 6; | |
| RLMT_MEMLOCK = 7; | |
| RLMT_MSGQUEUE = 8; | |
| RLMT_NICE = 9; | |
| RLMT_NOFILE = 10; | |
| RLMT_NPROC = 11; | |
| RLMT_RSS = 12; | |
| RLMT_RTPRIO = 13; | |
| RLMT_RTTIME = 14; | |
| RLMT_SIGPENDING = 15; | |
| RLMT_STACK = 16; | |
| } | |
| optional Type type = 1; | |
| // Either both are set or both are not set. | |
| // If both are not set, it represents unlimited. | |
| // If both are set, we require `soft` <= `hard`. | |
| optional uint64 hard = 2; | |
| optional uint64 soft = 3; | |
| } | |
| repeated RLimit rlimits = 1; | |
| } | |
| /** | |
| * Describes the information about (pseudo) TTY that can | |
| * be attached to a process running in a container. | |
| */ | |
| message TTYInfo { | |
| message WindowSize { | |
| required uint32 rows = 1; | |
| required uint32 columns = 2; | |
| } | |
| optional WindowSize window_size = 1; | |
| } | |
| /** | |
| * Describes a container configuration and allows extensible | |
| * configurations for different container implementations. | |
| * | |
| * NOTE: `ContainerInfo` may be specified, e.g., by a task, even if no | |
| * container image is provided. In this case neither `MesosInfo` nor | |
| * `DockerInfo` is set, the required `type` must be `MESOS`. This is to | |
| * address a case when a task without an image, e.g., a shell script | |
| * with URIs, wants to use features originally designed for containers, | |
| * for example custom network isolation via `NetworkInfo`. | |
| */ | |
| message ContainerInfo { | |
| // All container implementation types. | |
| // For each type there should be a field in the ContainerInfo itself | |
| // with exactly matching name in lowercase. | |
| enum Type { | |
| DOCKER = 1; | |
| MESOS = 2; | |
| } | |
| message DockerInfo { | |
| // The docker image that is going to be passed to the registry. | |
| required string image = 1; | |
| // Network options. | |
| enum Network { | |
| HOST = 1; | |
| BRIDGE = 2; | |
| NONE = 3; | |
| USER = 4; | |
| } | |
| optional Network network = 2 [default = HOST]; | |
| message PortMapping { | |
| required uint32 host_port = 1; | |
| required uint32 container_port = 2; | |
| // Protocol to expose as (ie: tcp, udp). | |
| optional string protocol = 3; | |
| } | |
| repeated PortMapping port_mappings = 3; | |
| optional bool privileged = 4 [default = false]; | |
| // Allowing arbitrary parameters to be passed to docker CLI. | |
| // Note that anything passed to this field is not guaranteed | |
| // to be supported moving forward, as we might move away from | |
| // the docker CLI. | |
| repeated Parameter parameters = 5; | |
| // With this flag set to true, the docker containerizer will | |
| // pull the docker image from the registry even if the image | |
| // is already downloaded on the slave. | |
| optional bool force_pull_image = 6; | |
| // The name of volume driver plugin. | |
| optional string volume_driver = 7 [deprecated = true]; // Since 1.0 | |
| } | |
| message MesosInfo { | |
| optional Image image = 1; | |
| } | |
| required Type type = 1; | |
| repeated Volume volumes = 2; | |
| optional string hostname = 4; | |
| // At most one of the following *Info messages should be set to match | |
| // the type, i.e. the "protobuf union" in ContainerInfo should be valid. | |
| optional DockerInfo docker = 3; | |
| optional MesosInfo mesos = 5; | |
| // A list of network requests. A framework can request multiple IP addresses | |
| // for the container. | |
| repeated NetworkInfo network_infos = 7; | |
| // Linux specific information for the container. | |
| optional LinuxInfo linux_info = 8; | |
| // (POSIX only) rlimits of the container. | |
| optional RLimitInfo rlimit_info = 9; | |
| // If specified a tty will be attached to the container entrypoint. | |
| optional TTYInfo tty_info = 10; | |
| } | |
| /** | |
| * Container related information that is resolved during container | |
| * setup. The information is sent back to the framework as part of the | |
| * TaskStatus message. | |
| */ | |
| message ContainerStatus { | |
| optional ContainerID container_id = 4; | |
| // This field can be reliably used to identify the container IP address. | |
| repeated NetworkInfo network_infos = 1; | |
| // Information about Linux control group (cgroup). | |
| optional CgroupInfo cgroup_info = 2; | |
| // Information about Executor PID. | |
| optional uint32 executor_pid = 3; | |
| } | |
| /** | |
| * Linux control group (cgroup) information. | |
| */ | |
| message CgroupInfo { | |
| // Configuration of a blkio cgroup subsystem. | |
| message Blkio { | |
| enum Operation { | |
| UNKNOWN = 0; | |
| TOTAL = 1; | |
| READ = 2; | |
| WRITE = 3; | |
| SYNC = 4; | |
| ASYNC = 5; | |
| DISCARD = 6; | |
| } | |
| // Describes a stat value without the device descriptor part. | |
| message Value { | |
| optional Operation op = 1; // Required. | |
| optional uint64 value = 2; // Required. | |
| } | |
| message CFQ { | |
| message Statistics { | |
| // Stats are grouped by block devices. If `device` is not | |
| // set, it represents `Total`. | |
| optional Device.Number device = 1; | |
| // blkio.sectors | |
| optional uint64 sectors = 2; | |
| // blkio.time | |
| optional uint64 time = 3; | |
| // blkio.io_serviced | |
| repeated Value io_serviced = 4; | |
| // blkio.io_service_bytes | |
| repeated Value io_service_bytes = 5; | |
| // blkio.io_service_time | |
| repeated Value io_service_time = 6; | |
| // blkio.io_wait_time | |
| repeated Value io_wait_time = 7; | |
| // blkio.io_merged | |
| repeated Value io_merged = 8; | |
| // blkio.io_queued | |
| repeated Value io_queued = 9; | |
| } | |
| // TODO(jasonlai): Add fields for blkio weight and weight | |
| // device. | |
| } | |
| message Throttling { | |
| message Statistics { | |
| // Stats are grouped by block devices. If `device` is not | |
| // set, it represents `Total`. | |
| optional Device.Number device = 1; | |
| // blkio.throttle.io_serviced | |
| repeated Value io_serviced = 2; | |
| // blkio.throttle.io_service_bytes | |
| repeated Value io_service_bytes = 3; | |
| } | |
| // TODO(jasonlai): Add fields for blkio.throttle.*_device. | |
| } | |
| message Statistics { | |
| repeated CFQ.Statistics cfq = 1; | |
| repeated CFQ.Statistics cfq_recursive = 2; | |
| repeated Throttling.Statistics throttling = 3; | |
| } | |
| } | |
| // Configuration of a net_cls cgroup subsystem. | |
| message NetCls { | |
| // The 32-bit classid consists of two parts, a 16 bit major handle | |
| // and a 16-bit minor handle. The major and minor handle are | |
| // represented using the format 0xAAAABBBB, where 0xAAAA is the | |
| // 16-bit major handle and 0xBBBB is the 16-bit minor handle. | |
| optional uint32 classid = 1; | |
| } | |
| optional NetCls net_cls = 1; | |
| } | |
| /** | |
| * Collection of labels. Labels should not contain duplicate key-value | |
| * pairs. | |
| */ | |
| message Labels { | |
| repeated Label labels = 1; | |
| } | |
| /** | |
| * Key, value pair used to store free form user-data. | |
| */ | |
| message Label { | |
| required string key = 1; | |
| optional string value = 2; | |
| } | |
| /** | |
| * Named port used for service discovery. | |
| */ | |
| message Port { | |
| // Port number on which the framework exposes a service. | |
| required uint32 number = 1; | |
| // Name of the service hosted on this port. | |
| optional string name = 2; | |
| // Layer 4-7 protocol on which the framework exposes its services. | |
| optional string protocol = 3; | |
| // This field restricts discovery within a framework (FRAMEWORK), | |
| // within a Mesos cluster (CLUSTER), or places no restrictions (EXTERNAL). | |
| // The visibility setting for a Port overrides the general visibility setting | |
| // in the DiscoveryInfo. | |
| optional DiscoveryInfo.Visibility visibility = 4; | |
| // This can be used to decorate the message with metadata to be | |
| // interpreted by external applications such as firewalls. | |
| optional Labels labels = 5; | |
| } | |
| /** | |
| * Collection of ports. | |
| */ | |
| message Ports { | |
| repeated Port ports = 1; | |
| } | |
| /** | |
| * Service discovery information. | |
| * The visibility field restricts discovery within a framework (FRAMEWORK), | |
| * within a Mesos cluster (CLUSTER), or places no restrictions (EXTERNAL). | |
| * Each port in the ports field also has an optional visibility field. | |
| * If visibility is specified for a port, it overrides the default service-wide | |
| * DiscoveryInfo.visibility for that port. | |
| * The environment, location, and version fields provide first class support for | |
| * common attributes used to differentiate between similar services. The | |
| * environment may receive values such as PROD/QA/DEV, the location field may | |
| * receive values like EAST-US/WEST-US/EUROPE/AMEA, and the version field may | |
| * receive values like v2.0/v0.9. The exact use of these fields is up to each | |
| * service discovery system. | |
| */ | |
| message DiscoveryInfo { | |
| enum Visibility { | |
| FRAMEWORK = 0; | |
| CLUSTER = 1; | |
| EXTERNAL = 2; | |
| } | |
| required Visibility visibility = 1; | |
| optional string name = 2; | |
| optional string environment = 3; | |
| optional string location = 4; | |
| optional string version = 5; | |
| optional Ports ports = 6; | |
| optional Labels labels = 7; | |
| } | |
| /** | |
| * Named WeightInfo to indicate resource allocation | |
| * priority between the different roles. | |
| */ | |
| message WeightInfo { | |
| required double weight = 1; | |
| // Related role name. | |
| optional string role = 2; | |
| } | |
| /** | |
| * Version information of a component. | |
| */ | |
| message VersionInfo { | |
| required string version = 1; | |
| optional string build_date = 2; | |
| optional double build_time = 3; | |
| optional string build_user = 4; | |
| optional string git_sha = 5; | |
| optional string git_branch = 6; | |
| optional string git_tag = 7; | |
| } | |
| /** | |
| * Flag consists of a name and optionally its value. | |
| */ | |
| message Flag { | |
| required string name = 1; | |
| optional string value = 2; | |
| } | |
| /** | |
| * Describes a Role. Roles can be used to specify that certain resources are | |
| * reserved for the use of one or more frameworks. | |
| */ | |
| message Role { | |
| required string name = 1; | |
| required double weight = 2; | |
| repeated FrameworkID frameworks = 3; | |
| // TODO(bmahler): Deprecate `resources` and introduce quota, | |
| // consumed quota, allocated, offered, and reserved resource | |
| // quantity fields. This is blocked by MESOS-9497 since the | |
| // computation of these quantities is currently expensive. | |
| repeated Resource resources = 4; | |
| } | |
| /** | |
| * Metric consists of a name and optionally its value. | |
| */ | |
| message Metric { | |
| required string name = 1; | |
| optional double value = 2; | |
| } | |
| /** | |
| * Describes a File. | |
| */ | |
| message FileInfo { | |
| // Absolute path to the file. | |
| required string path = 1; | |
| // Number of hard links. | |
| optional int32 nlink = 2; | |
| // Total size in bytes. | |
| optional uint64 size = 3; | |
| // Last modification time. | |
| optional TimeInfo mtime = 4; | |
| // Represents a file's mode and permission bits. The bits have the same | |
| // definition on all systems and is portable. | |
| optional uint32 mode = 5; | |
| // User ID of owner. | |
| optional string uid = 6; | |
| // Group ID of owner. | |
| optional string gid = 7; | |
| } | |
| /** | |
| * Describes information about a device. | |
| */ | |
| message Device { | |
| message Number { | |
| required uint64 major_number = 1; | |
| required uint64 minor_number = 2; | |
| } | |
| optional string path = 1; | |
| optional Number number = 2; | |
| } | |
| /** | |
| * Describes a device whitelist entry that expose from host to container. | |
| */ | |
| message DeviceAccess { | |
| message Access { | |
| optional bool read = 1; | |
| optional bool write = 2; | |
| optional bool mknod = 3; | |
| } | |
| required Device device = 1; | |
| required Access access = 2; | |
| } | |
| message DeviceWhitelist { | |
| repeated DeviceAccess allowed_devices = 1; | |
| } | |
| enum DrainState { | |
| UNKNOWN = 0; | |
| // The agent is currently draining. | |
| DRAINING = 1; | |
| // The agent has been drained: all tasks have terminated, all terminal | |
| // task status updates have been acknowledged by the frameworks, and all | |
| // operations have finished and had their terminal updates acknowledged. | |
| DRAINED = 2; | |
| } | |
| message DrainConfig { | |
| // An upper bound for tasks with a KillPolicy. | |
| // If a task has a KillPolicy grace period greater than this value, this value | |
| // will be used instead. This allows the operator to limit the maximum time it | |
| // will take the agent to drain. If this field is unset, the task's KillPolicy | |
| // or the executor's default grace period is used. | |
| // | |
| // NOTE: Grace periods start when the executor receives the associated kill. | |
| // If, for example, the agent is unreachable when this call is made, | |
| // tasks will still receive their full grace period to kill gracefully. | |
| optional DurationInfo max_grace_period = 1; | |
| // Whether or not this agent will be removed permanently from the cluster when | |
| // draining is complete. This transition is automatic and does **NOT** require | |
| // a separate call to `MarkAgentGone`. If this field is unset, then the | |
| // default value of `false` is used. | |
| // | |
| // Compared to `MarkAgentGone`, which is used for unreachable agents, | |
| // marking agents gone after draining will respect kill policies. | |
| // To notify frameworks, tasks terminated during draining will return | |
| // a `TASK_GONE_BY_OPERATOR` status update instead of any other terminal | |
| // status. Executors will not need to account for this case, because | |
| // the terminal status update will be intercepted and modified by the agent. | |
| optional bool mark_gone = 2 [default = false]; | |
| } | |
| message DrainInfo { | |
| // The drain state of the agent. | |
| required DrainState state = 1; | |
| // The configuration used to drain the agent. | |
| required DrainConfig config = 2; | |
| } |