Skip to content

Commit

Permalink
Add a Docker image for Spark in Scala (#2)
Browse files Browse the repository at this point in the history
* build: add a Docker image with sbt installed + add gitignore
  • Loading branch information
FreddieMercuryDKT committed Jul 19, 2023
1 parent 7ed5e67 commit 2b28ec9
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 1 deletion.
39 changes: 38 additions & 1 deletion .github/workflows/docker.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Docker
name: Push Docker images on Docker Hub

on:
push:
Expand Down Expand Up @@ -43,6 +43,43 @@ jobs:
cache-from: type=gha
cache-to: type=gha,mode=max

build_scala_images:
needs: build_base_images
strategy:
matrix:
sbt_version: [1.8.2, 1.8.3, 1.9.0, 1.9.1, 1.9.2]
environment: docker-hub
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3

- name: Login to Docker Hub
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

- name: Set up Docker Buildx
id: buildx
uses: docker/setup-buildx-action@v2

- name: Build corretto with sbt installed
id: docker_build_corretto_sbt
uses: docker/build-push-action@v4
with:
builder: ${{ steps.buildx.outputs.name }}
context: ./corretto-emr-dbs-universal-spark-scala
file: ./corretto-emr-dbs-universal-spark-scala/Dockerfile
# Only Java 8 is supported for Spark
build-args: |
JDK_VERSION=8
SBT_VERSION=${{ matrix.sbt_version }}
push: true
tags: infrahelpers/dpp:jdk8-sbt${{ matrix.sbt_version }}
cache-from: type=gha
cache-to: type=gha,mode=max

build_python_images:
needs: build_base_images
strategy:
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.idea
21 changes: 21 additions & 0 deletions corretto-emr-dbs-universal-spark-scala/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#
# Source: https://github.com/data-engineering-helpers/dpp-images/tree/main/pyspark-py310/Dockerfile
# On Docker Hub: https://hub.docker.com/repository/docker/infrahelpers/dpp/general
# Usual Docker tags:
# * infrahelpers/dpp:jdk8-sbt{SBT_VERSION}
#
# Image containing python installation, to be accessed by EMR and Databricks (for spark scala)
# See https://github.com/data-engineering-helpers/dpp-images/tree/main/coretto-emr-dbs-universal-base/Dockerfile
# for more details about the base image (tag: infrahelpers/dpp:jdk{JDK_VERSION})
#
FROM infrahelpers/dpp:jdk8

ARG SBT_VERSION

LABEL authors="Antoine Chenon<antoine.chenon@decathlon.com>"

# Update the OS
RUN yum -y update && yum clean all

# Install sbt
RUN rm -f /etc/yum.repos.d/bintray-rpm.repo; curl -L https://www.scala-sbt.org/sbt-rpm.repo > sbt-rpm.repo; mv sbt-rpm.repo /etc/yum.repos.d/; yum -y install sbt-${SBT_VERSION}-0

0 comments on commit 2b28ec9

Please sign in to comment.