Skip to content

Commit

Permalink
Merge pull request #1 from data-engineering-helpers/refactor/mutualiz…
Browse files Browse the repository at this point in the history
…e-dockerfiles

Refactor/mutualize dockerfiles
  • Loading branch information
nicolasgibaud committed May 30, 2023
2 parents ab15fab + 547e9dd commit e02902a
Show file tree
Hide file tree
Showing 10 changed files with 77 additions and 357 deletions.
74 changes: 54 additions & 20 deletions .github/workflows/docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,14 @@ name: Docker

on:
push:
branches: main
branches:
- main

jobs:
docker_build:
build_base_images:
strategy:
matrix:
jdk_version: [8, 11]
environment: docker-hub
runs-on: ubuntu-latest
steps:
Expand All @@ -22,36 +26,66 @@ jobs:
id: buildx
uses: docker/setup-buildx-action@v2

#- name: Lint pyspark-coretto-8-emr-dbs-universal-python
# uses: hadolint/hadolint-action@v3.1.0
# with:
# dockerfile: pyspark-coretto-8-emr-dbs-universal-python/Dockerfile
# failure-threshold: error

- name: Run privileged
run: sudo docker run --privileged --rm tonistiigi/binfmt --install arm64

- name: Build pyspark-coretto-8-emr-dbs-universal-python
id: docker_build_base
- name: Build base corretto image
id: docker_build_corretto_base
uses: docker/build-push-action@v4
with:
builder: ${{ steps.buildx.outputs.name }}
context: ./pyspark-coretto-8-emr-dbs-universal-python
file: ./pyspark-coretto-8-emr-dbs-universal-python/Dockerfile
push: false
tags: infrahelpers/dpp:pyspark-emr-dbs-univ
context: ./corretto-emr-dbs-universal-base
file: ./corretto-emr-dbs-universal-base/Dockerfile
build-args: |
JDK_VERSION=${{ matrix.jdk_version }}
push: true
tags: infrahelpers/dpp:jdk${{ matrix.jdk_version }}
cache-from: type=gha
cache-to: type=gha,mode=max

- name: Build pyspark-py311
id: docker_build_py311
build_python_images:
needs: build_base_images
strategy:
matrix:
jdk_version: [8, 11]
python_micro_version: [3.8.16, 3.9.16, 3.10.11, 3.11.3 ] # Use the latest micro versions of each minor version
environment: docker-hub
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3

- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v2

- name: Extract Minor Version
id: extract_minor_version
run: |
minor_version=$(echo "${{ matrix.python_micro_version }}" | cut -d. -f1-2)
echo "minor_version=${minor_version}" >> $GITHUB_OUTPUT
- name: Build corretto with EMR / DBS python installed
id: docker_build_corretto_python
uses: docker/build-push-action@v4
with:
builder: ${{ steps.buildx.outputs.name }}
context: ./pyspark-py311
file: ./pyspark-py311/Dockerfile
push: false
tags: infrahelpers/dpp:py311
context: ./corretto-emr-dbs-universal-pyspark
file: ./corretto-emr-dbs-universal-pyspark/Dockerfile
build-args: |
JDK_VERSION=${{ matrix.jdk_version }}
PYTHON_MINOR_VERSION=${{ steps.extract_minor_version.outputs.minor_version }}
PYTHON_MICRO_VERSION=${{ matrix.python_micro_version }}
push: true
tags: |
infrahelpers/dpp:jdk${{ matrix.jdk_version }}-python${{ steps.extract_minor_version.outputs.minor_version }}
infrahelpers/dpp:jdk${{ matrix.jdk_version }}-python${{ matrix.python_micro_version }}
cache-from: type=gha
cache-to: type=gha,mode=max

Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
#
# Source: https://github.com/data-engineering-helpers/dpp-images/tree/main/pyspark-coretto-8-emr-dbs-universal-python/Dockerfile
# Source: https://github.com/data-engineering-helpers/dpp-images/tree/main/pyspark-coretto-emr-dbs-universal-base/Dockerfile
# On Docker Hub: https://hub.docker.com/repository/docker/infrahelpers/dpp/general
# Usual Docker tag: pyspark-emr-dbs-univ (infrahelpers/dpp:pyspark-emr-dbs-univ)
# Convention for the tags of the generated images:
# * infrahelpers/dpp:jdk{JDK_VERSION} e.g.:
# * infrahelpers/dpp:jdk8
# * infrahelpers/dpp:jdk11
#
# Base image for Data Processing Pipelines (DPP), with images
# for specific Python versions
Expand Down Expand Up @@ -40,9 +43,10 @@
# /databricks/python3, and Python is the main one (pristine, installed manually
# by that container image)
#
FROM amazoncorretto:8
ARG JDK_VERSION
FROM amazoncorretto:${JDK_VERSION}

LABEL authors "Denis Arnaud <denis.arnaud_fedora@m4x.org>"
LABEL authors="Denis Arnaud <denis.arnaud_fedora@m4x.org>, Nicolas Gibaud <nicolas.gibaud.partner@decathlon.com>"

# Environment
ENV container="docker"
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,24 +1,30 @@
#
# Source: https://github.com/data-engineering-helpers/dpp-images/tree/main/pyspark-py310/Dockerfile
# On Docker Hub: https://hub.docker.com/repository/docker/infrahelpers/dpp/general
# Usual Docker tag: py310 (infrahelpers/dpp:py310)
# Usual Docker tags:
# * infrahelpers/dpp:jdk{JDK_VERSION}-python{PYTHON_MINOR_VERSION}
# * infrahelpers/dpp:jdk{JDK_VERSION}-python{PYTHON_MICRO_VERSION}
#
# Specific image for Python 3.10 based Data Processing Pipelines (DPP)
# Image containing python installation, to be accessed by EMR and Databricks (for pyspark)
# See https://github.com/data-engineering-helpers/dpp-images/tree/main/coretto-emr-dbs-universal-base/Dockerfile
# for more details about the base image (tag: infrahelpers/dpp:jdk{JDK_VERSION})
#
# See https://github.com/data-engineering-helpers/dpp-images/tree/main/pyspark-coretto-8-emr-dbs-universal-python/Dockerfile
# for more details about the base image (tag: infrahelpers/dpp:pyspark-emr-dbs-univ)
#
FROM infrahelpers/dpp:pyspark-emr-dbs-univ
ARG JDK_VERSION

FROM infrahelpers/dpp:jdk${JDK_VERSION}

LABEL authors="Denis Arnaud <denis.arnaud_fedora@m4x.org>, Nicolas Gibaud <nicolas.gibaud.partner@decathlon.com>"

LABEL authors "Denis Arnaud <denis.arnaud_fedora@m4x.org>"
ARG PYTHON_MINOR_VERSION
ARG PYTHON_MICRO_VERSION

# Environment
ENV container="docker"
ENV HOME="/root"
ENV HOMEUSR="/home/ubuntu"
ENV PYSPARK_DRIVER_PYTHON="python3"
ENV PYTHON_MINOR_VERSION="3.10"
ENV PYTHON_MICRO_VERSION="${PYTHON_MINOR_VERSION}.11"
ENV PYTHON_MINOR_VERSION=$PYTHON_MINOR_VERSION
ENV PYTHON_MICRO_VERSION=$PYTHON_MICRO_VERSION
ENV PYSPARK_PYTHON="/databricks/python3/bin/python3"

# Update the OS
Expand Down
108 changes: 0 additions & 108 deletions pyspark-coretto-11-emr-dbs-universal-python/Dockerfile

This file was deleted.

20 changes: 0 additions & 20 deletions pyspark-coretto-8-emr-dbs-universal-python/bashrc

This file was deleted.

49 changes: 0 additions & 49 deletions pyspark-py311-jdk11/Dockerfile

This file was deleted.

49 changes: 0 additions & 49 deletions pyspark-py311/Dockerfile

This file was deleted.

Loading

0 comments on commit e02902a

Please sign in to comment.