Skip to content

Commit

Permalink
/usr/bin/python3 is now symlinked rather than copied
Browse files Browse the repository at this point in the history
  • Loading branch information
da115115 committed Nov 29, 2023
1 parent 786a9d7 commit 2479474
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 14 deletions.
4 changes: 2 additions & 2 deletions corretto-emr-dbs-universal-base/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#
# Source: https://github.com/data-engineering-helpers/dpp-images/tree/main/pyspark-coretto-emr-dbs-universal-base/Dockerfile
# On Docker Hub: https://hub.docker.com/repository/docker/infrahelpers/dpp/general
# Convention for the tags of the generated images:
# Convention for the tags of the generated images:
# * infrahelpers/dpp:jdk{JDK_VERSION} e.g.:
# * infrahelpers/dpp:jdk8
# * infrahelpers/dpp:jdk11
Expand All @@ -17,7 +17,7 @@
# (see the pyspark-py3X/ directories), with specific versions.
# Note that:
# * DataBricks uses Python 3.8 internally by default
# * AWS EMR uses Python 3.7.10 by default
# * AWS EMR uses Python 3.7.16 by default
#
# AWS Corretto / EMR
# ==================
Expand Down
15 changes: 8 additions & 7 deletions corretto-emr-dbs-universal-pyspark/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#
# Source: https://github.com/data-engineering-helpers/dpp-images/tree/main/pyspark-py310/Dockerfile
# Source: https://github.com/data-engineering-helpers/dpp-images/tree/main/corretto-emr-dbs-universal-pyspark/Dockerfile
# On Docker Hub: https://hub.docker.com/repository/docker/infrahelpers/dpp/general
# Usual Docker tags:
# * infrahelpers/dpp:jdk{JDK_VERSION}-python{PYTHON_MINOR_VERSION}
# * infrahelpers/dpp:jdk{JDK_VERSION}-python{PYTHON_MICRO_VERSION}
#
# Image containing python installation, to be accessed by EMR and Databricks (for pyspark)
# Image containing python installation, to be accessed by EMR and Databricks (for PySpark)
# See https://github.com/data-engineering-helpers/dpp-images/tree/main/coretto-emr-dbs-universal-base/Dockerfile
# for more details about the base image (tag: infrahelpers/dpp:jdk{JDK_VERSION})
#
Expand All @@ -28,7 +28,7 @@ ENV PYTHON_MICRO_VERSION=$PYTHON_MICRO_VERSION
ENV PYSPARK_PYTHON="/databricks/python3/bin/python3"

# Update the OS
RUN yum -y update && yum clean all
RUN yum -y update && yum clean all && rm -rf /var/cache/yum

# Install the PYTHON_MICRO_VERSION version of Python
RUN curl -kLs \
Expand All @@ -38,13 +38,14 @@ RUN curl -kLs \
rm -f Python-${PYTHON_MICRO_VERSION}.tgz && \
cd Python-${PYTHON_MICRO_VERSION} && \
./configure --prefix=/usr --enable-optimizations && \
make && \
make altinstall
make && make altinstall && \
cd .. && rm -rf Python-${PYTHON_MICRO_VERSION}

# Set the PYTHON_MICRO_VERSION version of Python as system Python
# This is what is used by AWS EMR
RUN cp -f /usr/bin/python${PYTHON_MINOR_VERSION} /usr/bin/python3 && \
python3 --version
RUN cd /usr/bin && \
rm -f /usr/bin/python3 && ln -s python${PYTHON_MINOR_VERSION} python3 \
&& cd .. && python -V && python3 -V

# Install a virtual environment in /databricks/python3
RUN python3 -mpip install -U pip && python3 -mpip install virtualenv && \
Expand Down
14 changes: 9 additions & 5 deletions corretto-emr-dbs-universal-spark-scala/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,21 +1,25 @@
#
# Source: https://github.com/data-engineering-helpers/dpp-images/tree/main/pyspark-py310/Dockerfile
# Source: https://github.com/data-engineering-helpers/dpp-images/tree/main/corretto-emr-dbs-universal-spark-scala/Dockerfile
# On Docker Hub: https://hub.docker.com/repository/docker/infrahelpers/dpp/general
# Usual Docker tags:
# * infrahelpers/dpp:jdk8-sbt{SBT_VERSION}
#
# Image containing python installation, to be accessed by EMR and Databricks (for spark scala)
# Image containing python installation, to be accessed by EMR and Databricks (for Spark Scala)
# See https://github.com/data-engineering-helpers/dpp-images/tree/main/coretto-emr-dbs-universal-base/Dockerfile
# for more details about the base image (tag: infrahelpers/dpp:jdk{JDK_VERSION})
#
FROM infrahelpers/dpp:jdk8

ARG SBT_VERSION

LABEL authors="Antoine Chenon<antoine.chenon@decathlon.com>"
LABEL authors="Antoine Chenon<antoine.chenon@decathlon.com>, Denis Arnaud <denis.arnaud_fedora@m4x.org>"

# Update the OS
RUN yum -y update && yum clean all
RUN yum -y update && yum clean all && rm -rf /var/cache/yum

# Install sbt
RUN rm -f /etc/yum.repos.d/bintray-rpm.repo; curl -L https://www.scala-sbt.org/sbt-rpm.repo > sbt-rpm.repo; mv sbt-rpm.repo /etc/yum.repos.d/; yum -y install sbt-${SBT_VERSION}-0
RUN rm -f /etc/yum.repos.d/bintray-rpm.repo && \
curl -kLs https://www.scala-sbt.org/sbt-rpm.repo -o /etc/yum.repos.d/sbt-rpm.repo && \
yum -y install sbt-${SBT_VERSION}-0 && \
sbt -version

0 comments on commit 2479474

Please sign in to comment.