Skip to content

Commit

Permalink
Update jobservice base image (#136)
Browse files Browse the repository at this point in the history
* use python 3.8 as base image for jobservic

Signed-off-by: Khor Shu Heng <khor.heng@gojek.com>

* fix linting

Signed-off-by: Khor Shu Heng <khor.heng@gojek.com>

* use official python image for test pipeline

Signed-off-by: Khor Shu Heng <khor.heng@gojek.com>

* Use python3 as default python version

Signed-off-by: Khor Shu Heng <khor.heng@gojek.com>

* simplify python package installation

Signed-off-by: Khor Shu Heng <khor.heng@gojek.com>

* Use JDK 11 for testing

Signed-off-by: Khor Shu Heng <khor.heng@gojek.com>

Co-authored-by: Khor Shu Heng <khor.heng@gojek.com>
  • Loading branch information
khorshuheng and khorshuheng committed Apr 12, 2022
1 parent 6499fb2 commit ae6309e
Show file tree
Hide file tree
Showing 10 changed files with 12 additions and 13 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pr.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ jobs:
PYSPARK_PYTHON: python3.7
steps:
- uses: actions/checkout@v2
- name: Set up JDK 8
- name: Set up JDK 11
uses: actions/setup-java@v1
with:
java-version: '8'
Expand Down
3 changes: 1 addition & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ install-python-ci-dependencies:

# Supports feast-dev repo master branch
install-python: install-python-ci-dependencies
pip install --user --upgrade setuptools wheel
cd ${ROOT_DIR}/python; rm -rf dist; python setup.py install
pip install -e python

lint-python:
cd ${ROOT_DIR}/python ; mypy feast_spark/ tests/
Expand Down
4 changes: 1 addition & 3 deletions infra/docker/jobservice/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM jupyter/pyspark-notebook:399cbb986c6b
FROM python:3.8

USER root
WORKDIR /app
Expand All @@ -13,9 +13,7 @@ RUN apt-get update && apt-get -y install make git wget
# Install Feast SDK
RUN git init .
COPY README.md README.md
RUN make install-python-ci-dependencies
RUN make install-python
RUN pip install "s3fs" "boto3" "urllib3>=1.25.4"

#
# Download grpc_health_probe to run health checks
Expand Down
2 changes: 1 addition & 1 deletion infra/docker/spark/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ ARG VERSION=dev

RUN REVISION=$VERSION make build-ingestion-jar-no-tests

FROM gcr.io/kf-feast/spark-py:3.0.2 as runtime
FROM gcr.io/kf-feast/feast-spark-base:v3.0.3 as runtime

ARG VERSION=dev

Expand Down
2 changes: 1 addition & 1 deletion infra/docker/spark/dev.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM gcr.io/kf-feast/spark-py:3.0.2 as runtime
FROM gcr.io/kf-feast/feast-spark-base:v3.0.3 as runtime

ARG VERSION=dev

Expand Down
2 changes: 1 addition & 1 deletion infra/scripts/test-end-to-end-sparkop.sh
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ kubectl delete pod -n "$NAMESPACE" ci-test-runner 2>/dev/null || true
kubectl run -n "$NAMESPACE" -i ci-test-runner \
--pod-running-timeout=5m \
--restart=Never \
--image="${DOCKER_REPOSITORY}/feast-ci:latest" \
--image="python:3.7" \
--env="FEAST_TELEMETRY=false" \
--env="DISABLE_FEAST_SERVICE_FIXTURES=1" \
--env="DISABLE_SERVICE_FIXTURES=1" \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ def _map_column(df: DataFrame, col_mapping: Dict[str, str]):
else:
projection[col_name] = expr(col_mapping.get(col_name, col_name))

return df.select([c.alias(a) for a, c in projection.items()])
return df.select([c.alias(str(a)) for a, c in projection.items()])


def as_of_join(
Expand Down Expand Up @@ -545,7 +545,7 @@ def join_entity_to_feature_tables(
joined_df = as_of_join(
joined_df, entity_event_timestamp_column, feature_table_df, feature_table,
)
if SparkContext._active_spark_context._jsc.sc().getCheckpointDir().nonEmpty():
if SparkContext._active_spark_context._jsc.sc().getCheckpointDir().nonEmpty(): # type: ignore
joined_df = joined_df.checkpoint()

return joined_df
Expand Down Expand Up @@ -614,7 +614,7 @@ def filter_feature_table_by_time_range(
)
.where(col("distance") == col("min_distance"))
)
if SparkContext._active_spark_context._jsc.sc().getCheckpointDir().nonEmpty():
if SparkContext._active_spark_context._jsc.sc().getCheckpointDir().nonEmpty(): # type: ignore
time_range_filtered_df = time_range_filtered_df.checkpoint()

return time_range_filtered_df
Expand Down
1 change: 1 addition & 0 deletions python/requirements-ci.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,4 @@ great-expectations==0.13.2
adlfs==0.5.9
redis==4.1.*
Jinja2==3.0.3
croniter==1.*
1 change: 1 addition & 0 deletions python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
"mypy-protobuf==2.5",
"croniter==1.*",
"redis==4.1.*",
"prometheus-client==0.14.0"
]

# README file from Feast repo root directory
Expand Down
2 changes: 1 addition & 1 deletion spark/ingestion/src/test/resources/python/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,5 @@ fi

# 2. Pickle python udf
cd $CURRENT_PATH
pip3 install great-expectations==0.13.2 setuptools pyspark==3.0.1
pip3 install great-expectations==0.13.2 setuptools pyspark==3.0.1 Jinja2==3.0.3 pyarrow==2.0.0
python3 udf.py $DESTINATION/udf.pickle

0 comments on commit ae6309e

Please sign in to comment.