Skip to content

Replacing DataPusher with XLoader

Brett Jones edited this page Jul 9, 2023 · 9 revisions

Here we describe the changes needed to this (ckan-docker) repository to use the XLoader CKAN extension rather than DataPusher. Similar to DataPusher, XLoader is used to automatically download any tabular data files like CSV or Excel from resources when they are added to the CKAN site, parses them to extract the actual data, and then uses the DataStore API to push the data into the CKAN's DataStore database

There is one caveat though: The NGINX reverse proxy container has been taken out of the configuration.

XLoader is installed into the same container as CKAN core

At the time of writing (July 2023) the version of CKAN used is 2.10.1 and xloader 1.0.1

There are 3 file changes required:

  1. docker-compose.yml
  2. .env
  3. Dockerfile

Also an xloader/ directory that includes the Dockerfile described below is needed

  1. docker-compose.yml
version: "3"

volumes:
  ckan_storage:
  pg_data:
  solr_data:

services:
    
  ckan-xloader:
    container_name: ${XLOADER_CONTAINER_NAME}
    build:
      context: xloader/
      dockerfile: Dockerfile
      args:
        - TZ=${TZ}
    networks:
      - xloadernet
      - dbnet
      - solrnet
      - redisnet
    env_file:
      - .env
    depends_on:
      db:
        condition: service_healthy
      solr:
        condition: service_healthy
      redis:
        condition: service_healthy
    volumes:
      - ckan_storage:/var/lib/ckan
    ports:
      - "0.0.0.0:${XLOADER_PORT_HOST}:${XLOADER_PORT}"
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "wget", "-qO", "/dev/null", "http://xloader:5000"]

  db:
    container_name: ${POSTGRESQL_CONTAINER_NAME}
    build:
      context: postgresql/
    networks:
      - dbnet
    environment:
      - POSTGRES_USER
      - POSTGRES_PASSWORD
      - POSTGRES_DB
      - PGDATA
      - CKAN_DB_USER
      - CKAN_DB_PASSWORD
      - CKAN_DB
      - DATASTORE_READONLY_USER
      - DATASTORE_READONLY_PASSWORD
      - DATASTORE_DB
    volumes:
      - pg_data:/var/lib/postgresql/data
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "pg_isready", "-U", "${POSTGRES_USER}", "-d", "${POSTGRES_DB}"]
     
  solr:
    container_name: ${SOLR_CONTAINER_NAME}
    networks:
      - solrnet
    image: ckan/ckan-solr:${SOLR_IMAGE_VERSION}
    volumes:
      - solr_data:/var/solr
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "wget", "-qO", "/dev/null", "http://localhost:8983/solr/"]

  redis:
    container_name: ${REDIS_CONTAINER_NAME}
    image: redis:${REDIS_VERSION}
    networks:
      - redisnet
    restart: unless-stopped
    healthcheck:
      test: ["CMD", "redis-cli", "-e", "QUIT"]
    
networks:
  xloadernet:
  solrnet:
    internal: true
  dbnet:
    internal: true
  redisnet:
    internal: true
  1. .env
# Container names
NGINX_CONTAINER_NAME=nginx
REDIS_CONTAINER_NAME=redis
POSTGRESQL_CONTAINER_NAME=db
SOLR_CONTAINER_NAME=solr
DATAPUSHER_CONTAINER_NAME=datapusher
CKAN_CONTAINER_NAME=ckan
XLOADER_CONTAINER_NAME=xloader

# Host Ports
CKAN_PORT_HOST=5000
NGINX_PORT_HOST=81
NGINX_SSLPORT_HOST=8443

# CKAN databases
POSTGRES_USER=postgres
POSTGRES_PASSWORD=postgres
POSTGRES_DB=postgres
POSTGRES_HOST=db
PGDATA=/var/lib/postgresql/data/db
CKAN_DB_USER=ckandbuser
CKAN_DB_PASSWORD=ckandbpassword
CKAN_DB=ckandb
DATASTORE_READONLY_USER=datastore_ro
DATASTORE_READONLY_PASSWORD=datastore
DATASTORE_DB=datastore
CKAN_SQLALCHEMY_URL=postgresql://ckandbuser:ckandbpassword@db/ckandb
CKAN_DATASTORE_WRITE_URL=postgresql://ckandbuser:ckandbpassword@db/datastore
CKAN_DATASTORE_READ_URL=postgresql://datastore_ro:datastore@db/datastore

# Test database connections
TEST_CKAN_SQLALCHEMY_URL=postgres://ckan:ckan@db/ckan_test
TEST_CKAN_DATASTORE_WRITE_URL=postgresql://ckan:ckan@db/datastore_test
TEST_CKAN_DATASTORE_READ_URL=postgresql://datastore_ro:datastore@db/datastore_test

# CKAN core
CKAN_VERSION=2.10.0
CKAN_SITE_ID=default
CKAN_SITE_URL=http://localhost:5000
CKAN_PORT=5000
CKAN_PORT_HOST=5000
CKAN___BEAKER__SESSION__SECRET=CHANGE_ME
# See https://docs.ckan.org/en/latest/maintaining/configuration.html#api-token-settings
CKAN___API_TOKEN__JWT__ENCODE__SECRET=string:CHANGE_ME
CKAN___API_TOKEN__JWT__DECODE__SECRET=string:CHANGE_ME
CKAN_SYSADMIN_NAME=ckan_admin
CKAN_SYSADMIN_PASSWORD=test1234
CKAN_SYSADMIN_EMAIL=your_email@example.com
CKAN_STORAGE_PATH=/var/lib/ckan
CKAN_SMTP_SERVER=smtp.corporateict.domain:25
CKAN_SMTP_STARTTLS=True
CKAN_SMTP_USER=user
CKAN_SMTP_PASSWORD=pass
CKAN_SMTP_MAIL_FROM=ckan@localhost
TZ=UTC

# Solr
SOLR_IMAGE_VERSION=2.9-solr8
CKAN_SOLR_URL=http://solr:8983/solr/ckan
TEST_CKAN_SOLR_URL=http://solr:8983/solr/ckan

# Redis
REDIS_VERSION=6
CKAN_REDIS_URL=redis://redis:6379/1
TEST_CKAN_REDIS_URL=redis://redis:6379/1

# Datapusher
DATAPUSHER_VERSION=0.0.20
CKAN_DATAPUSHER_URL=http://xloader:5000
CKAN__DATAPUSHER__CALLBACK_URL_BASE=http://xloader:5000

# Xloader
XLOADER_VERSION=1.0.1
XLOADER_PORT=5000
XLOADER_PORT_HOST=5000

# NGINX
NGINX_PORT=80
NGINX_SSLPORT=443

# Extensions
CKAN__PLUGINS="envvars image_view text_view recline_view datastore datapusher"
XLOADER__PLUGINS="envvars image_view text_view recline_view datastore xloader"
CKAN__HARVEST__MQ__TYPE=redis
CKAN__HARVEST__MQ__HOSTNAME=redis
CKAN__HARVEST__MQ__PORT=6379
CKAN__HARVEST__MQ__REDIS_DB=1
  1. Dockerfile
FROM ckan/ckan-base-xloader:1.0.1

# Set up environment variables
ENV APP_DIR=/srv/app
ENV TZ=UTC
RUN echo ${TZ} > /etc/timezone

# Make sure both files are not exactly the same
RUN if ! [ /usr/share/zoneinfo/${TZ} -ef /etc/localtime ]; then \
        cp /usr/share/zoneinfo/${TZ} /etc/localtime ;\
    fi ;

EXPOSE 5000
Clone this wiki locally