/
Dockerfile
75 lines (61 loc) · 2.06 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# Base Image
FROM --platform=linux/amd64 spark-base:latest
LABEL maintainer="MarcLamberti"
# Arguments that can be set with docker build
ARG AIRFLOW_VERSION=2.1.2
ARG AIRFLOW_HOME=/usr/local/airflow
# Export the environment variable AIRFLOW_HOME where airflow will be installed
ENV AIRFLOW_HOME=${AIRFLOW_HOME}
# Install dependencies and tools
RUN apt-get update -y && \
apt-get upgrade -yqq && \
apt-get install -yqq --no-install-recommends \
python3-dev \
wget \
libczmq-dev \
curl \
libssl-dev \
git \
inetutils-telnet \
bind9utils freetds-dev \
libkrb5-dev \
libsasl2-dev \
libffi-dev libpq-dev \
freetds-bin build-essential \
default-libmysqlclient-dev \
apt-utils \
rsync \
zip \
unzip \
gcc \
vim \
netcat \
&& apt-get autoremove -yqq --purge && apt-get clean
COPY ./requirements-python3.7.txt /requirements-python3.7.txt
# Upgrade pip
# Create airflow user
# Install apache airflow with subpackages
RUN pip install --upgrade "pip==20.2.4" && \
useradd -ms /bin/bash -d ${AIRFLOW_HOME} airflow && \
pip install apache-airflow[crypto,celery,postgres,apache.hive,jdbc,mysql,ssh,docker,apache.hdfs,redis,slack,apache.webhdfs,apache.spark,papermill,http]==${AIRFLOW_VERSION} \
jupyter \
great_expectations \
airflow-provider-great-expectations \
--constraint /requirements-python3.7.txt
RUN pip install -U scipy
# Copy the airflow.cfg file (config)
#COPY ./config/airflow.cfg ${AIRFLOW_HOME}/airflow.cfg
# Set the owner of the files in AIRFLOW_HOME to the user airflow
RUN chown -R airflow: ${AIRFLOW_HOME}
# Copy the entrypoint.sh from host to container (at path AIRFLOW_HOME)
COPY ./start-airflow.sh ./start-airflow.sh
# Set the entrypoint.sh file to be executable
RUN chmod +x ./start-airflow.sh
# Set the username to use
USER airflow
# Create the folder dags inside $AIRFLOW_HOME
RUN mkdir -p ${AIRFLOW_HOME}/dags
# Expose ports (just to indicate that this container needs to map port)
EXPOSE 8080
# Execute start-airflow.sh
CMD [ "./start-airflow.sh" ]