Skip to content

Commit

Permalink
Bundle python dependencies for spark images (#137)
Browse files Browse the repository at this point in the history
Signed-off-by: Khor Shu Heng <khor.heng@gojek.com>

Co-authored-by: Khor Shu Heng <khor.heng@gojek.com>
  • Loading branch information
khorshuheng and khorshuheng committed Apr 21, 2022
1 parent ae6309e commit eee8af8
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 7 deletions.
4 changes: 2 additions & 2 deletions infra/docker/spark/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@ RUN echo 'spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible
RUN echo 'spark.executor.extraJavaOptions="-Dcom.google.cloud.spark.bigquery.repackaged.io.netty.tryReflectionSetAccessible=true"' >> $SPARK_HOME/conf/spark-defaults.conf

# python dependencies
RUN pip3 install -U pip wheel
RUN pip3 install pandas pyarrow==2.0.0 'numpy<1.20.0'
RUN pip3 install pandas==1.3.5 great-expectations==0.13.2 pyarrow==2.0.0 Jinja2==3.0.3 datadog==0.44.0 'numpy<1.20.0'

# For logging to /dev/termination-log
RUN mkdir -p /dev


ENTRYPOINT [ "/opt/entrypoint.sh" ]
5 changes: 2 additions & 3 deletions infra/docker/spark/dev.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,10 @@ RUN mkdir -p /opt/spark/conf
RUN echo 'spark.driver.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true"' >> $SPARK_HOME/conf/spark-defaults.conf
RUN echo 'spark.driver.extraJavaOptions="-Dcom.google.cloud.spark.bigquery.repackaged.io.netty.tryReflectionSetAccessible=true"' >> $SPARK_HOME/conf/spark-defaults.conf
RUN echo 'spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true"' >> $SPARK_HOME/conf/spark-defaults.conf
RUN echo 'spark.executor.extraJavaOptions="-Dcom.google.cloud.spark.bigquery.repackaged.io.netty.tryReflectionSetAccessible=true"' >> $SPARK_HOME/conf/spark-defaults.conf
RUN echo 'spark.executor.extraJavaOptions="-Dcom.google.cloud.spark.bigquery.repackaged.io.netty.tryReflectionSetAccessible=true"' >> $SPARK_HOME/conf/spark-defaults.conf \

# python dependencies
RUN pip3 install -U pip wheel
RUN pip3 install pandas pyarrow==2.0.0 'numpy<1.20.0'
RUN pip3 install pandas==1.3.5 great-expectations==0.13.2 pyarrow==2.0.0 Jinja2==3.0.3 datadog==0.44.0 'numpy<1.20.0'

# For logging to /dev/termination-log
RUN mkdir -p /dev
Expand Down
8 changes: 6 additions & 2 deletions tests/e2e/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,9 @@ def test_validation_with_ge(
expectations = ge_ds.get_expectation_suite()

udf = create_validation_udf("testUDF", expectations, feature_table)
apply_validation(feast_client, feature_table, udf, validation_window_secs=1)
apply_validation(
feast_client, feature_table, udf, validation_window_secs=1, include_py_libs=""
)

job = start_job(feast_spark_client, feature_table, pytestconfig)

Expand Down Expand Up @@ -123,7 +125,9 @@ def test_validation_reports_metrics(
expectations = ge_ds.get_expectation_suite()

udf = create_validation_udf("testUDF", expectations, feature_table)
apply_validation(feast_client, feature_table, udf, validation_window_secs=10)
apply_validation(
feast_client, feature_table, udf, validation_window_secs=10, include_py_libs=""
)

job = start_job(feast_spark_client, feature_table, pytestconfig)

Expand Down

0 comments on commit eee8af8

Please sign in to comment.