From 50ab50d282761bac36003d27ff3c638ceeeb663a Mon Sep 17 00:00:00 2001 From: Abraham Toriz Date: Fri, 29 Apr 2022 10:39:20 +0800 Subject: [PATCH 1/2] proposal of a Containerfile for datapusher-plus --- Containerfile | 70 +++++++++++++++++++++++++++++++ container/initialize-and-start.sh | 23 ++++++++++ container/uwsgi.ini | 16 +++++++ datapusher/settings.py | 2 +- 4 files changed, 110 insertions(+), 1 deletion(-) create mode 100644 Containerfile create mode 100644 container/initialize-and-start.sh create mode 100644 container/uwsgi.ini diff --git a/Containerfile b/Containerfile new file mode 100644 index 00000000..b2eb0346 --- /dev/null +++ b/Containerfile @@ -0,0 +1,70 @@ +FROM ubuntu:20.04 + +# Set timezone +ENV TZ=UTC +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + +# Setting the locale +ENV LC_ALL=en_US.UTF-8 +RUN apt-get update && apt-get install --no-install-recommends -y locales +RUN sed -i "/$LC_ALL/s/^# //g" /etc/locale.gen +RUN dpkg-reconfigure --frontend=noninteractive locales +RUN update-locale LANG=${LC_ALL} + +# Install required system packages +RUN apt-get -q -y update \ + && DEBIAN_FRONTEND=noninteractive apt-get -q -y upgrade \ + && apt-get -q -y install \ + python3 \ + pip \ + virtualenv \ + postgresql-client \ + unzip \ + wget \ + && apt-get -q clean \ + && rm -rf /var/lib/apt/lists/* + +# Define environment variables +ENV DATAPUSHER_HOME=/usr/lib/ckan/datapusher +ENV DATAPUSHER_CODE=$DATAPUSHER_HOME/code +ENV DATAPUSHER_VENV=$DATAPUSHER_HOME/venv +ENV DATAPUSHER_CONFIG=/etc/ckan/datapusher + +# Create ckan user +RUN useradd -r -u 900 -m -c "ckan account" -d $DATAPUSHER_HOME -s /bin/false ckan + +# Install qsv +ENV QSV_RELEASE=0.43.0 +ENV QSV_ARCHIVE=qsv-$QSV_RELEASE-x86_64-unknown-linux-gnu.zip +RUN cd /tmp && \ + wget https://github.com/jqnatividad/qsv/releases/download/$QSV_RELEASE/$QSV_ARCHIVE && \ + unzip $QSV_ARCHIVE && mv qsvlite /usr/local/bin/ && rm $QSV_ARCHIVE + +# Setup virtual environment for CKAN +RUN mkdir -p $DATAPUSHER_CONFIG && \ + virtualenv $DATAPUSHER_VENV && \ + ln -s $DATAPUSHER_VENV/bin/pip3 /usr/local/bin/ckan-pip3 && \ + ln -s $DATAPUSHER_VENV/bin/ckan /usr/local/bin/ckan + +# Virtual environment binaries/scripts to be used first +ENV PATH=${DATAPUSHER_VENV}/bin:${PATH} + +# install the dependencies +RUN ckan-pip3 install -U pip && \ + CPUCOUNT=1 ckan-pip3 install --upgrade --no-cache-dir uwsgi psycopg2-binary + +# Copy the ckan code to the image +COPY . $DATAPUSHER_CODE +RUN cp $DATAPUSHER_CODE/container/initialize-and-start.sh / && \ + chmod +x /initialize-and-start.sh + +# install datapusher-plus +RUN ckan-pip3 install -e $DATAPUSHER_CODE + +# Set ownership of directories +RUN chown -R ckan:ckan $DATAPUSHER_HOME $DATAPUSHER_CONFIG + +USER ckan +EXPOSE 8800 + +CMD ["/initialize-and-start.sh"] diff --git a/container/initialize-and-start.sh b/container/initialize-and-start.sh new file mode 100644 index 00000000..c82c1482 --- /dev/null +++ b/container/initialize-and-start.sh @@ -0,0 +1,23 @@ +#!/bin/sh +set -e + +UWSGI_FILE=${DATAPUSHER_CONFIG}/uwsgi.ini + +abort () { + echo "$@" >&2 + exit 1 +} + +# Fail if postgresql is not running +if ! pg_isready -h "${POSTGRES_HOST}" -U "${POSTGRES_USER}"; then + abort "Postgresql not running" +fi + +if [ ! -e $UWSGI_FILE ]; then + cp $DATAPUSHER_CODE/container/uwsgi.ini $UWSGI_FILE +fi + +datapusher_initdb $DATAPUSHER_CODE/datapusher/settings.py + +# run ckan with uwsgi +exec $DATAPUSHER_VENV/bin/uwsgi -i $UWSGI_FILE diff --git a/container/uwsgi.ini b/container/uwsgi.ini new file mode 100644 index 00000000..8aa04d74 --- /dev/null +++ b/container/uwsgi.ini @@ -0,0 +1,16 @@ +[uwsgi] + +http = 0.0.0.0:8800 +uid = ckan +guid = ckan +virtualenv = /usr/lib/ckan/datapusher/venv +module = datapusher.wsgi:application +master = true +harakiri = 50 +max-requests = 5000 +vacuum = true +callable = application +buffer-size = 32768 +workers = 4 +threads = 2 +lazy-apps = true diff --git a/datapusher/settings.py b/datapusher/settings.py index 1e5a6898..4f692f30 100644 --- a/datapusher/settings.py +++ b/datapusher/settings.py @@ -19,7 +19,7 @@ # PostgreSQL COPY settings # set this to the same value as your ckan.datastore.write_url -WRITE_ENGINE_URL = os.environ.get('WRITE_ENGINE_URL', 'postgresql://datapusher:THEPASSWORD@localhost/datastore_default') +WRITE_ENGINE_URL = os.environ.get('DATAPUSHER_WRITE_ENGINE_URL', 'postgresql://datapusher:THEPASSWORD@localhost/datastore_default') # qsv settings QSV_BIN = os.environ.get('DATAPUSHER_QSV_BIN', '/usr/local/bin/qsvlite') From 609678df6af7673988c94e22a884bbbc640fd9c1 Mon Sep 17 00:00:00 2001 From: Abraham Toriz Date: Fri, 29 Apr 2022 10:43:18 +0800 Subject: [PATCH 2/2] mention in readme that all variables are tunable via environment --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9a58be61..1839a5ee 100644 --- a/README.md +++ b/README.md @@ -248,7 +248,7 @@ CREATE ROLE datapusher LOGIN SUPERUSER PASSWORD 'thepassword'; quit ``` -Most of the configuration options above can be also provided as environment +All of the configuration options above can be also provided as environment variables prepending the name with `DATAPUSHER_`, eg `DATAPUSHER_SQLALCHEMY_DATABASE_URI`, `DATAPUSHER_PORT`, etc. For variables with boolean values you must use `1` or `0`.