From ea5b662599fa42d1d80c0c84d75be6c187a86944 Mon Sep 17 00:00:00 2001 From: Ximo Guanter Date: Fri, 28 May 2021 22:33:09 +0200 Subject: [PATCH] Update k8s user guide to use deployments --- .../rust/executor/executor_config_spec.toml | 2 +- ballista/rust/executor/src/main.rs | 2 +- .../rust/scheduler/scheduler_config_spec.toml | 2 +- ballista/rust/scheduler/src/main.rs | 2 +- benchmarks/README.md | 6 +- benchmarks/docker-compose.yaml | 4 +- .../src/distributed/docker-compose.md | 2 +- docs/user-guide/src/distributed/kubernetes.md | 70 +++++++++---------- .../user-guide/src/distributed/raspberrypi.md | 2 +- docs/user-guide/src/distributed/standalone.md | 12 ++-- 10 files changed, 50 insertions(+), 54 deletions(-) diff --git a/ballista/rust/executor/executor_config_spec.toml b/ballista/rust/executor/executor_config_spec.toml index 8d817fee9cc5..3cb168e77241 100644 --- a/ballista/rust/executor/executor_config_spec.toml +++ b/ballista/rust/executor/executor_config_spec.toml @@ -53,7 +53,7 @@ doc = "Host name or IP address to register with scheduler so that other executor [[param]] abbr = "p" -name = "port" +name = "bind_port" type = "u16" default = "50051" doc = "bind port" diff --git a/ballista/rust/executor/src/main.rs b/ballista/rust/executor/src/main.rs index ad7c001e654a..a069016392dd 100644 --- a/ballista/rust/executor/src/main.rs +++ b/ballista/rust/executor/src/main.rs @@ -77,7 +77,7 @@ async fn main() -> Result<()> { let external_host = opt.external_host; let bind_host = opt.bind_host; - let port = opt.port; + let port = opt.bind_port; let addr = format!("{}:{}", bind_host, port); let addr = addr diff --git a/ballista/rust/scheduler/scheduler_config_spec.toml b/ballista/rust/scheduler/scheduler_config_spec.toml index 560e9a2599bd..81e77d31b0a0 100644 --- a/ballista/rust/scheduler/scheduler_config_spec.toml +++ b/ballista/rust/scheduler/scheduler_config_spec.toml @@ -54,7 +54,7 @@ doc = "Local host name or IP address to bind to. Default: 0.0.0.0" [[param]] abbr = "p" -name = "port" +name = "bind_port" type = "u16" default = "50050" doc = "bind port. Default: 50050" \ No newline at end of file diff --git a/ballista/rust/scheduler/src/main.rs b/ballista/rust/scheduler/src/main.rs index 713103fcf043..34386ca6c561 100644 --- a/ballista/rust/scheduler/src/main.rs +++ b/ballista/rust/scheduler/src/main.rs @@ -116,7 +116,7 @@ async fn main() -> Result<()> { let namespace = opt.namespace; let bind_host = opt.bind_host; - let port = opt.port; + let port = opt.bind_port; let addr = format!("{}:{}", bind_host, port); let addr = addr.parse()?; diff --git a/benchmarks/README.md b/benchmarks/README.md index e347130689b3..0b5ccfc16e46 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -122,7 +122,7 @@ RUST_LOG=info RUSTFLAGS='-C target-cpu=native -C lto -C codegen-units=1 -C embed To run the benchmarks: ```bash -cd $ARROW_HOME/ballista/rust/benchmarks/tpch +cd $ARROW_HOME/benchmarks cargo run --release benchmark ballista --host localhost --port 50050 --query 1 --path $(pwd)/data --format tbl ``` @@ -131,9 +131,9 @@ cargo run --release benchmark ballista --host localhost --port 50050 --query 1 - To start a Rust scheduler and executor using Docker Compose: ```bash -cd $BALLISTA_HOME +cd $ARROW_HOME ./dev/build-rust.sh -cd $BALLISTA_HOME/rust/benchmarks/tpch +cd $ARROW_HOME/benchmarks docker-compose up ``` diff --git a/benchmarks/docker-compose.yaml b/benchmarks/docker-compose.yaml index c13e9eb48c88..74c6703f30b1 100644 --- a/benchmarks/docker-compose.yaml +++ b/benchmarks/docker-compose.yaml @@ -21,7 +21,7 @@ services: command: "etcd -advertise-client-urls http://etcd:2379 -listen-client-urls http://0.0.0.0:2379" ballista-scheduler: image: ballista:0.5.0-SNAPSHOT - command: "/scheduler --config-backend etcd --etcd-urls etcd:2379 --bind-host 0.0.0.0 --port 50050" + command: "/scheduler --config-backend etcd --etcd-urls etcd:2379 --bind-host 0.0.0.0 --bind-port 50050" environment: - RUST_LOG=ballista=debug volumes: @@ -30,7 +30,7 @@ services: - etcd ballista-executor: image: ballista:0.5.0-SNAPSHOT - command: "/executor --bind-host 0.0.0.0 --port 50051 --scheduler-host ballista-scheduler" + command: "/executor --bind-host 0.0.0.0 --bind-port 50051 --scheduler-host ballista-scheduler" scale: 2 environment: - RUST_LOG=info diff --git a/docs/user-guide/src/distributed/docker-compose.md b/docs/user-guide/src/distributed/docker-compose.md index de27364fc252..5ea86b5caea4 100644 --- a/docs/user-guide/src/distributed/docker-compose.md +++ b/docs/user-guide/src/distributed/docker-compose.md @@ -33,7 +33,7 @@ services: - "2379:2379" ballista-executor: image: ballistacompute/ballista-rust:0.4.2-SNAPSHOT - command: "/executor --bind-host 0.0.0.0 --port 50051 --local" + command: "/executor --bind-host 0.0.0.0 --bind-port 50051 --local" environment: - RUST_LOG=info ports: diff --git a/docs/user-guide/src/distributed/kubernetes.md b/docs/user-guide/src/distributed/kubernetes.md index 7b9b356dfa42..07b51f7871b6 100644 --- a/docs/user-guide/src/distributed/kubernetes.md +++ b/docs/user-guide/src/distributed/kubernetes.md @@ -24,8 +24,8 @@ you are already comfortable with managing Kubernetes deployments. The k8s deployment consists of: -- k8s stateful set for one or more scheduler processes -- k8s stateful set for one or more executor processes +- k8s deployment for one or more scheduler processes +- k8s deployment for one or more executor processes - k8s service to route traffic to the schedulers - k8s persistent volume and persistent volume claims to make local data accessible to Ballista @@ -38,6 +38,14 @@ Ballista is at an early stage of development and therefore has some significant - Only a single scheduler instance is currently supported unless the scheduler is configured to use `etcd` as a backing store. +## Publishing your images + +Currently there are no official Ballista images that work with the instructions in this guide. For the time being, +you will need to build and publish your own images. You can do that by invoking the `dev/build-ballista-docker.sh`. + +Once the images have been built, you can retag them with `docker tag ballista:0.5.0-SNAPSHOT ` so you +can push them to your favourite docker registry. + ## Create Persistent Volume and Persistent Volume Claim Copy the following yaml to a `pv.yaml` file and apply to the cluster to create a persistent volume and a persistent @@ -88,7 +96,7 @@ persistentvolumeclaim/data-pv-claim created ## Deploying Ballista Scheduler and Executors -Copy the following yaml to a `cluster.yaml` file. +Copy the following yaml to a `cluster.yaml` file and change `` with the name of your Ballista Docker image. ```yaml apiVersion: v1 @@ -101,16 +109,14 @@ spec: ports: - port: 50050 name: scheduler - clusterIP: None selector: app: ballista-scheduler --- apiVersion: apps/v1 -kind: StatefulSet +kind: Deployment metadata: name: ballista-scheduler spec: - serviceName: "ballista-scheduler" replicas: 1 selector: matchLabels: @@ -122,27 +128,26 @@ spec: ballista-cluster: ballista spec: containers: - - name: ballista-scheduler - image: ballistacompute/ballista-rust:0.4.2-SNAPSHOT - command: ["/scheduler"] - args: ["--port=50050"] - ports: - - containerPort: 50050 - name: flight - volumeMounts: - - mountPath: /mnt - name: data + - name: ballista-scheduler + image: + command: ["/scheduler"] + args: ["--bind-port=50050"] + ports: + - containerPort: 50050 + name: flight + volumeMounts: + - mountPath: /mnt + name: data volumes: - name: data persistentVolumeClaim: claimName: data-pv-claim --- apiVersion: apps/v1 -kind: StatefulSet +kind: Deployment metadata: name: ballista-executor spec: - serviceName: "ballista-scheduler" replicas: 2 selector: matchLabels: @@ -155,20 +160,12 @@ spec: spec: containers: - name: ballista-executor - image: ballistacompute/ballista-rust:0.4.2-SNAPSHOT + image: command: ["/executor"] args: - [ - "--port=50051", - "--scheduler-host=ballista-scheduler", - "--scheduler-port=50050", - "--external-host=$(MY_POD_IP)", - ] - env: - - name: MY_POD_IP - valueFrom: - fieldRef: - fieldPath: status.podIP + - "--bind-port=50051", + - "--scheduler-host=ballista-scheduler", + - "--scheduler-port=50050" ports: - containerPort: 50051 name: flight @@ -189,19 +186,18 @@ This should show the following output: ``` service/ballista-scheduler created -statefulset.apps/ballista-scheduler created -statefulset.apps/ballista-executor created +deployment.apps/ballista-scheduler created +deployment.apps/ballista-executor created ``` You can also check status by running `kubectl get pods`: ```bash $ kubectl get pods -NAME READY STATUS RESTARTS AGE -busybox 1/1 Running 0 16m -ballista-scheduler-0 1/1 Running 0 42s -ballista-executor-0 1/1 Running 2 42s -ballista-executor-1 1/1 Running 0 26s +NAME READY STATUS RESTARTS AGE +ballista-executor-78cc5b6486-4rkn4 0/1 Pending 0 42s +ballista-executor-78cc5b6486-7crdm 0/1 Pending 0 42s +ballista-scheduler-879f874c5-rnbd6 0/1 Pending 0 42s ``` You can view the scheduler logs with `kubectl logs ballista-scheduler-0`: diff --git a/docs/user-guide/src/distributed/raspberrypi.md b/docs/user-guide/src/distributed/raspberrypi.md index 0083d191770b..3bf36c7227a7 100644 --- a/docs/user-guide/src/distributed/raspberrypi.md +++ b/docs/user-guide/src/distributed/raspberrypi.md @@ -116,7 +116,7 @@ Run the benchmarks: ```bash docker run -it myrepo/ballista-arm64 \ /tpch benchmark datafusion --query=1 --path=/path/to/data --format=parquet \ - --concurrency=24 --iterations=1 --debug --host=ballista-scheduler --port=50050 + --concurrency=24 --iterations=1 --debug --host=ballista-scheduler --bind-port=50050 ``` Note that it will be necessary to mount appropriate volumes into the containers and also configure networking diff --git a/docs/user-guide/src/distributed/standalone.md b/docs/user-guide/src/distributed/standalone.md index e9db425dc111..66b6bc835695 100644 --- a/docs/user-guide/src/distributed/standalone.md +++ b/docs/user-guide/src/distributed/standalone.md @@ -26,7 +26,7 @@ Start a scheduler using the following syntax: ```bash docker run --network=host \ -d ballistacompute/ballista-rust:0.4.2-SNAPSHOT \ - /scheduler --port 50050 + /scheduler --bind-port 50050 ``` Run `docker ps` to check that the process is running: @@ -34,7 +34,7 @@ Run `docker ps` to check that the process is running: ``` $ docker ps CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES -59452ce72138 ballistacompute/ballista-rust:0.4.2-SNAPSHOT "/scheduler --port 5…" 6 seconds ago Up 5 seconds affectionate_hofstadter +59452ce72138 ballistacompute/ballista-rust:0.4.2-SNAPSHOT "/scheduler --bind-p…" 6 seconds ago Up 5 seconds affectionate_hofstadter ``` Run `docker logs CONTAINER_ID` to check the output from the process: @@ -51,7 +51,7 @@ Start one or more executor processes. Each executor process will need to listen ```bash docker run --network=host \ -d ballistacompute/ballista-rust:0.4.2-SNAPSHOT \ - /executor --external-host localhost --port 50051 + /executor --external-host localhost --bind-port 50051 ``` Use `docker ps` to check that both the scheduer and executor(s) are now running: @@ -60,14 +60,14 @@ Use `docker ps` to check that both the scheduer and executor(s) are now running: $ docker ps CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES 0746ce262a19 ballistacompute/ballista-rust:0.4.2-SNAPSHOT "/executor --externa…" 2 seconds ago Up 1 second naughty_mclean -59452ce72138 ballistacompute/ballista-rust:0.4.2-SNAPSHOT "/scheduler --port 5…" 4 minutes ago Up 4 minutes affectionate_hofstadter +59452ce72138 ballistacompute/ballista-rust:0.4.2-SNAPSHOT "/scheduler --bind-p…" 4 minutes ago Up 4 minutes affectionate_hofstadter ``` Use `docker logs CONTAINER_ID` to check the output from the executor(s): ``` $ docker logs 0746ce262a19 -[2021-02-14T18:36:25Z INFO executor] Running with config: ExecutorConfig { host: "localhost", port: 50051, work_dir: "/tmp/.tmpVRFSvn", concurrent_tasks: 4 } +[2021-02-14T18:36:25Z INFO executor] Running with config: ExecutorConfig { host: "localhost", bind_port: 50051, work_dir: "/tmp/.tmpVRFSvn", concurrent_tasks: 4 } [2021-02-14T18:36:25Z INFO executor] Ballista v0.4.2-SNAPSHOT Rust Executor listening on 0.0.0.0:50051 [2021-02-14T18:36:25Z INFO executor] Starting registration with scheduler ``` @@ -84,7 +84,7 @@ Ballista can optionally use [etcd](https://etcd.io/) as a backing store for the ```bash docker run --network=host \ -d ballistacompute/ballista-rust:0.4.2-SNAPSHOT \ - /scheduler --port 50050 \ + /scheduler --bind-port 50050 \ --config-backend etcd \ --etcd-urls etcd:2379 ```