Skip to content

Commit

Permalink
Update spark version to 3.1.3 (#134)
Browse files Browse the repository at this point in the history
Signed-off-by: Khor Shu Heng <khor.heng@gojek.com>

Co-authored-by: Khor Shu Heng <khor.heng@gojek.com>
  • Loading branch information
khorshuheng and khorshuheng committed Apr 21, 2022
1 parent eee8af8 commit ed4322d
Show file tree
Hide file tree
Showing 13 changed files with 41 additions and 32 deletions.
15 changes: 13 additions & 2 deletions .github/workflows/master_only.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,22 @@ jobs:
run: make lint-python

unit-test-python:
runs-on: [ubuntu-latest]
runs-on: ubuntu-latest
needs: lint-python
container: gcr.io/kf-feast/feast-ci:latest
env:
PYSPARK_PYTHON: python3.8
steps:
- uses: actions/checkout@v2
- name: Set up JDK 11
uses: actions/setup-java@v1
with:
java-version: '11'
java-package: jdk
architecture: x64
- uses: actions/setup-python@v2
with:
python-version: '3.8'
architecture: 'x64'
- name: Install python
run: make install-python
- name: Test python
Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ jobs:
architecture: x64
- uses: actions/setup-python@v2
with:
python-version: '3.6'
python-version: '3.8'
architecture: 'x64'
- uses: actions/cache@v2
with:
Expand All @@ -36,7 +36,7 @@ jobs:
run: make test-java

lint-python:
container: gcr.io/kf-feast/feast-ci:latest
container: python:3.8
runs-on: [ubuntu-latest]
steps:
- uses: actions/checkout@v2
Expand All @@ -49,18 +49,18 @@ jobs:
runs-on: ubuntu-latest
needs: lint-python
env:
PYSPARK_PYTHON: python3.7
PYSPARK_PYTHON: python3.8
steps:
- uses: actions/checkout@v2
- name: Set up JDK 11
uses: actions/setup-java@v1
with:
java-version: '8'
java-version: '11'
java-package: jdk
architecture: x64
- uses: actions/setup-python@v2
with:
python-version: '3.7'
python-version: '3.8'
architecture: 'x64'
- name: Install python
run: make install-python
Expand Down
9 changes: 2 additions & 7 deletions infra/docker/spark/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,22 +8,17 @@ ARG VERSION=dev

RUN REVISION=$VERSION make build-ingestion-jar-no-tests

FROM gcr.io/kf-feast/feast-spark-base:v3.0.3 as runtime
FROM gcr.io/kf-feast/feast-spark-base:v3.1.3 as runtime

ARG VERSION=dev

ARG HADOOP_AWS_VERSION=3.2.1
ARG AWS_JAVA_SDK_VERSION=1.11.874
ARG TFRECORD_VERSION=0.3.0
ARG GCS_CONNECTOR_VERSION=2.0.1
ARG GCS_CONNECTOR_VERSION=2.2.5
ARG BQ_CONNECTOR_VERSION=0.18.1

COPY --from=builder /build/spark/ingestion/target/feast-ingestion-spark-${VERSION}.jar /opt/spark/jars

USER root
# Add HADOOP_AWS_JAR and AWS_JAVA_SDK
ADD https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/${HADOOP_AWS_VERSION}/hadoop-aws-${HADOOP_AWS_VERSION}.jar /opt/spark/jars
ADD https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/${AWS_JAVA_SDK_VERSION}/aws-java-sdk-bundle-${AWS_JAVA_SDK_VERSION}.jar /opt/spark/jars
ADD https://repo1.maven.org/maven2/com/linkedin/sparktfrecord/spark-tfrecord_2.12/${TFRECORD_VERSION}/spark-tfrecord_2.12-${TFRECORD_VERSION}.jar /opt/spark/jars
ADD https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop2-${GCS_CONNECTOR_VERSION}.jar /opt/spark/jars
ADD https://repo1.maven.org/maven2/com/google/cloud/spark/spark-bigquery-with-dependencies_2.12/${BQ_CONNECTOR_VERSION}/spark-bigquery-with-dependencies_2.12-${BQ_CONNECTOR_VERSION}.jar /opt/spark/jars
Expand Down
7 changes: 1 addition & 6 deletions infra/docker/spark/dev.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,17 +1,12 @@
FROM gcr.io/kf-feast/feast-spark-base:v3.0.3 as runtime
FROM gcr.io/kf-feast/feast-spark-base:v3.1.3 as runtime

ARG VERSION=dev

ARG HADOOP_AWS_VERSION=3.2.1
ARG AWS_JAVA_SDK_VERSION=1.11.874
ARG TFRECORD_VERSION=0.3.0
ARG GCS_CONNECTOR_VERSION=2.0.1
ARG BQ_CONNECTOR_VERSION=0.18.1

USER root
# Add HADOOP_AWS_JAR and AWS_JAVA_SDK
ADD https://repo1.maven.org/maven2/org/apache/hadoop/hadoop-aws/${HADOOP_AWS_VERSION}/hadoop-aws-${HADOOP_AWS_VERSION}.jar /opt/spark/jars
ADD https://repo1.maven.org/maven2/com/amazonaws/aws-java-sdk-bundle/${AWS_JAVA_SDK_VERSION}/aws-java-sdk-bundle-${AWS_JAVA_SDK_VERSION}.jar /opt/spark/jars
ADD https://repo1.maven.org/maven2/com/linkedin/sparktfrecord/spark-tfrecord_2.12/${TFRECORD_VERSION}/spark-tfrecord_2.12-${TFRECORD_VERSION}.jar /opt/spark/jars
ADD https://storage.googleapis.com/hadoop-lib/gcs/gcs-connector-hadoop2-${GCS_CONNECTOR_VERSION}.jar /opt/spark/jars
ADD https://repo1.maven.org/maven2/com/google/cloud/spark/spark-bigquery-with-dependencies_2.12/${BQ_CONNECTOR_VERSION}/spark-bigquery-with-dependencies_2.12-${BQ_CONNECTOR_VERSION}.jar /opt/spark/jars
Expand Down
2 changes: 1 addition & 1 deletion infra/scripts/helm/k8s-jobservice.tpl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ feast-jobservice:
"fs.gs.project.id": "kf-feast"
"google.cloud.auth.service.account.enable": "true"
"google.cloud.auth.service.account.json.keyfile": "/mnt/secrets/credentials.json"
sparkVersion: "3.0.2"
sparkVersion: "3.1.3"
timeToLiveSeconds: 3600
pythonVersion: "3"
restartPolicy:
Expand Down
2 changes: 1 addition & 1 deletion infra/scripts/test-end-to-end-sparkop.sh
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ kubectl delete pod -n "$NAMESPACE" ci-test-runner 2>/dev/null || true
kubectl run -n "$NAMESPACE" -i ci-test-runner \
--pod-running-timeout=5m \
--restart=Never \
--image="python:3.7" \
--image="python:3.8" \
--env="FEAST_TELEMETRY=false" \
--env="DISABLE_FEAST_SERVICE_FIXTURES=1" \
--env="DISABLE_SERVICE_FIXTURES=1" \
Expand Down
6 changes: 3 additions & 3 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<scala.version>2.12</scala.version>
<scala.fullVersion>${scala.version}.12</scala.fullVersion>
<spark.version>3.0.2</spark.version>
<scala.fullVersion>${scala.version}.10</scala.fullVersion>
<spark.version>3.1.3</spark.version>
<scala-maven-plugin.version>4.4.0</scala-maven-plugin.version>
<maven-assembly-plugin.version>3.3.0</maven-assembly-plugin.version>
<protobuf.version>3.12.2</protobuf.version>
<commons.lang3.version>3.10</commons.lang3.version>
<hbase.version>2.1.10</hbase.version>
<hbase.version>2.4.11</hbase.version>

<license.content><![CDATA[
/*
Expand Down
2 changes: 1 addition & 1 deletion python/requirements-ci.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ flake8
black==19.10b0
isort>=5
grpcio-tools==1.31.0
pyspark==3.0.1
pyspark==3.1.3
pandas~=1.0.0
mock==2.0.0
pandavro==1.5.*
Expand Down
2 changes: 2 additions & 0 deletions python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@
"mypy-protobuf==2.5",
"croniter==1.*",
"redis==4.1.*",
"pyarrow==2.0.0",
"pandas==1.0.5",
"prometheus-client==0.14.0"
]

Expand Down
Empty file added python/tests/__init__.py
Empty file.
6 changes: 3 additions & 3 deletions spark/ingestion/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@
<dependency>
<groupId>com.google.cloud.bigtable</groupId>
<artifactId>bigtable-hbase-2.x-hadoop</artifactId>
<version>1.19.0</version>
<version>1.26.3</version>
<exclusions>
<exclusion>
<groupId>org.apache.hadoop</groupId>
Expand Down Expand Up @@ -186,7 +186,7 @@
<dependency>
<groupId>com.datastax.spark</groupId>
<artifactId>spark-cassandra-connector_${scala.version}</artifactId>
<version>3.0.0</version>
<version>3.1.0</version>
</dependency>

<dependency>
Expand All @@ -204,7 +204,7 @@
<dependency>
<groupId>org.json4s</groupId>
<artifactId>json4s-ext_${scala.version}</artifactId>
<version>3.7.0-M6</version>
<version>3.7.0-M5</version>
</dependency>

<dependency>
Expand Down
4 changes: 2 additions & 2 deletions spark/ingestion/src/test/resources/python/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ if [[ -f "$DESTINATION/libs.tar.gz" ]]; then
echo "$DESTINATION/libs.tar.gz exists."
else
tmp_dir=$(mktemp -d)
pip3 install -t ${tmp_dir}/libs great-expectations==0.13.2 pyarrow==2.0.0
pip3 install -t ${tmp_dir}/libs great-expectations==0.13.2 pyarrow==2.0.0 Jinja2==3.0.3
cd $tmp_dir && tar -czf libs.tar.gz libs/ && mv libs.tar.gz $DESTINATION/libs.tar.gz
fi

# 2. Pickle python udf
cd $CURRENT_PATH
pip3 install great-expectations==0.13.2 setuptools pyspark==3.0.1 Jinja2==3.0.3 pyarrow==2.0.0
pip3 install great-expectations==0.13.2 setuptools pyspark==3.1.3 Jinja2==3.0.3 pyarrow==2.0.0
python3 udf.py $DESTINATION/udf.pickle
8 changes: 7 additions & 1 deletion tests/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,10 @@ pytest-postgresql==2.5.1
pytest-redis==2.0.0
pytest-kafka==0.4.0
deepdiff==4.3.2
kafka-python==2.0.2
kafka-python==2.0.2
great-expectations==0.13.2
Jinja2==3.0.3
pandavro==1.5.*
avro==1.10.0
pyspark==3.1.3
gcsfs

0 comments on commit ed4322d

Please sign in to comment.