diff --git a/examples/localstack.yml b/examples/localstack.yml index d461280d..1c3088b1 100644 --- a/examples/localstack.yml +++ b/examples/localstack.yml @@ -39,6 +39,7 @@ spec: - -c - > awslocal s3 mb s3://spark-events; + awslocal s3 mb s3://ingest; awslocal s3 mb s3://data; awslocal s3 cp /opt/code/localstack/Makefile s3://data/ --- diff --git a/examples/stream-word-count.yaml b/examples/stream-word-count.yaml new file mode 100644 index 00000000..039b84e0 --- /dev/null +++ b/examples/stream-word-count.yaml @@ -0,0 +1,40 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +apiVersion: spark.apache.org/v1beta1 +kind: SparkApplication +metadata: + name: stream-word-count +spec: + mainClass: "org.apache.spark.examples.streaming.HdfsWordCount" + jars: "local:///opt/spark/examples/jars/spark-examples.jar" + driverArgs: [ "s3a://ingest" ] + sparkConf: + spark.jars.packages: "org.apache.hadoop:hadoop-aws:3.4.1" + spark.jars.ivy: "/tmp/.ivy2.5.2" + spark.dynamicAllocation.enabled: "true" + spark.dynamicAllocation.shuffleTracking.enabled: "true" + spark.dynamicAllocation.maxExecutors: "3" + spark.kubernetes.authenticate.driver.serviceAccountName: "spark" + spark.kubernetes.container.image: "apache/spark:4.0.0-java21-scala" + spark.log.level: "WARN" + spark.eventLog.enabled: "true" + spark.eventLog.dir: "s3a://spark-events/" + spark.eventLog.rolling.maxFileSize: "10m" + spark.hadoop.fs.s3a.endpoint: "http://localstack:4566" + spark.hadoop.fs.s3a.path.style.access: "true" + spark.hadoop.fs.s3a.access.key: "test" + spark.hadoop.fs.s3a.secret.key: "test" + runtimeVersions: + sparkVersion: "4.0.0"