diff --git a/README.rst b/README.rst index 146197aa..2bc1d031 100644 --- a/README.rst +++ b/README.rst @@ -1,7 +1,7 @@ Deploying, Running and Using Dataverse on Kubernetes ==================================================== -.. image:: https://raw.githubusercontent.com/IQSS/dataverse-kubernetes/master/docs/img/title-composition.png +.. image:: docs/img/title-composition.png |Dataverse badge| |Validation badge| @@ -11,25 +11,28 @@ Deploying, Running and Using Dataverse on Kubernetes |Docs badge| |IRC badge| -This community-supported project aims to provide simple to re-use Kubernetes -objects on how to run Dataverse on a Kubernetes cluster. +This community-supported project aims at offering a new way to deploy, run and +maintain a Dataverse installation for any purpose on any kind of Kubernetes-based +cloud infrastructure. -It aims at day-1 deployments and day-2 operations. +You can use this on your laptop, in your on-prem datacentre or public cloud. +With the power of `Kubernetes `_, many scenarios are possible. * Documentation: https://dataverse-k8s.rtfd.io -* Support: https://github.com/IQSS/dataverse-kubernetes/issues -* Roadmap: https://dataverse-k8s.rtfd.io/en/latest/roadmap.html +* Support and new ideas: https://github.com/IQSS/dataverse-kubernetes/issues -If you would like to contribute, you are most welcome. Head over to the -`contribution guide `_ -for details. +If you would like to contribute, you are most welcome. +This project follows the same branching strategy as the upstream Dataverse +project, using a ``release`` branch for stable releases plus a ``develop`` +branch. In this branch unexpected or breaking changes may happen. -.. |Dataverse badge| image:: https://img.shields.io/badge/Dataverse-v4.19-important.svg + +.. |Dataverse badge| image:: https://img.shields.io/badge/Dataverse-v4.20-important.svg :target: https://dataverse.org -.. |Validation badge| image:: https://jenkins.dataverse.org/job/dataverse-k8s/job/Kubeval%20Linting/job/master/badge/icon?subject=kubeval&status=valid&color=purple - :target: https://jenkins.dataverse.org/blue/organizations/jenkins/dataverse-k8s%2FKubeval%20Linting/activity?branch=master +.. |Validation badge| image:: https://jenkins.dataverse.org/job/dataverse-k8s/job/Kubeval%20Linting/job/release/badge/icon?subject=kubeval&status=valid&color=purple + :target: https://jenkins.dataverse.org/blue/organizations/jenkins/dataverse-k8s%2FKubeval%20Linting/activity?branch=release .. |DockerHub dataverse-k8s badge| image:: https://img.shields.io/static/v1.svg?label=image&message=dataverse-k8s&logo=docker :target: https://hub.docker.com/r/iqss/dataverse-k8s .. |DockerHub solr-k8s badge| image:: https://img.shields.io/static/v1.svg?label=image&message=solr-k8s&logo=docker diff --git a/dataverse b/dataverse index affbf4fb..4e07b625 160000 --- a/dataverse +++ b/dataverse @@ -1 +1 @@ -Subproject commit affbf4fbeb511e0a8591348accdec51423e02d92 +Subproject commit 4e07b625ed585d2feb0ff44b9273c95746958476 diff --git a/docker-compose.yaml b/docker-compose.yaml new file mode 100644 index 00000000..a3a3b667 --- /dev/null +++ b/docker-compose.yaml @@ -0,0 +1,32 @@ +--- +version: '3.5' +services: + + postgresql: + image: postgres:9.6 + expose: + - 5432 + environment: + - POSTGRES_USER=dataverse + - POSTGRES_PASSWORD=changeme + + solr: + image: iqss/solr-k8s + expose: + - 8983 + + dataverse: + build: + context: . + dockerfile: ./docker/dataverse-k8s/glassfish-dev/Dockerfile + image: iqss/dataverse-k8s:dev + depends_on: + - postgresql + - solr + ports: + - 8080:8080 + volumes: + - type: bind + source: ./personas/docker-compose/secrets + target: /secrets + read_only: true diff --git a/docker/dataverse-k8s/Jenkinsfile b/docker/dataverse-k8s/Jenkinsfile index 49db2291..0ba978bc 100644 --- a/docker/dataverse-k8s/Jenkinsfile +++ b/docker/dataverse-k8s/Jenkinsfile @@ -74,7 +74,7 @@ pipeline { } stage('latest') { when { - branch 'master' + branch 'release' } environment { // credentials() will magically add DOCKER_HUB_USR and DOCKER_HUB_PSW @@ -83,7 +83,7 @@ pipeline { } steps { script { - // Push master image to latest tag + // Push release image to latest tag docker.withRegistry("${env.DOCKER_REGISTRY}", "${env.DOCKER_HUB_CRED}") { gf_docker_image.push("latest") pyr_docker_image.push("payara") diff --git a/docker/dataverse-k8s/bin/bootstrap-job.sh b/docker/dataverse-k8s/bin/bootstrap-job.sh index acb3f2ae..c59152a7 100644 --- a/docker/dataverse-k8s/bin/bootstrap-job.sh +++ b/docker/dataverse-k8s/bin/bootstrap-job.sh @@ -17,7 +17,8 @@ DATAVERSE_URL=${DATAVERSE_URL:-"http://${DATAVERSE_SERVICE_HOST}:${DATAVERSE_SER # The Solr Service IP is always available under its name within the same namespace. # If people want to use a different Solr than we normally deploy, they have the # option to override. -SOLR_K8S_HOST=${SOLR_K8S_HOST:-"solr"} +SOLR_SERVICE_HOST=${SOLR_SERVICE_HOST:-"solr"} +SOLR_SERVICE_PORT=${SOLR_SERVICE_PORT:-"8983"} # Check postgres and API key secrets are available if [ ! -s "${SECRETS_DIR}/db/password" ]; then @@ -53,7 +54,7 @@ sed -i -e "s#dataverse@mailinator.com#${CONTACT_MAIL}#" data/user-admin.json ./setup-all.sh --insecure -p="${ADMIN_PASSWORD:-admin}" # 4.) Configure Solr location -curl -sS -X PUT -d "${SOLR_K8S_HOST}:8983" "${DATAVERSE_URL}/api/admin/settings/:SolrHostColonPort" +curl -sS -X PUT -d "${SOLR_SERVICE_HOST}:${SOLR_SERVICE_PORT}" "${DATAVERSE_URL}/api/admin/settings/:SolrHostColonPort" # 5.) Provision builtin users key to enable creation of more builtin users if [ -s "${SECRETS_DIR}/api/userskey" ]; then diff --git a/docker/dataverse-k8s/bin/default.config b/docker/dataverse-k8s/bin/default.config index 10de0cb4..eb18ab34 100644 --- a/docker/dataverse-k8s/bin/default.config +++ b/docker/dataverse-k8s/bin/default.config @@ -16,6 +16,14 @@ JMX_EXPORTER_CONFIG=${JMX_EXPORTER_CONFIG:-"${HOME}/jmx_exporter_config.yaml"} # (Exporting needed as they cannot be seen by `env` otherwise) export dataverse_files_directory=${dataverse_files_directory:-/data} +export dataverse_files_storage__driver__id=${dataverse_files_storage__driver__id:-local} + +if [ "${dataverse_files_storage__driver__id}" = "local" ]; then + export dataverse_files_local_type=${dataverse_files_local_type:-file} + export dataverse_files_local_label=${dataverse_files_local_label:-Local} + export dataverse_files_local_directory=${dataverse_files_local_directory:-/data} +fi + export dataverse_rserve_host=${dataverse_rserve_host:-rserve} export dataverse_rserve_port=${dataverse_rserve_port:-6311} export dataverse_rserve_user=${dataverse_rserve_user:-rserve} diff --git a/docker/dataverse-k8s/glassfish/Dockerfile b/docker/dataverse-k8s/glassfish/Dockerfile index fa15a908..258309f8 100644 --- a/docker/dataverse-k8s/glassfish/Dockerfile +++ b/docker/dataverse-k8s/glassfish/Dockerfile @@ -8,9 +8,9 @@ FROM centos:7 LABEL maintainer="FDM FZJ " -ARG TINI_VERSION=v0.18.0 +ARG TINI_VERSION=v0.19.0 ARG JMX_EXPORTER_VERSION=0.12.0 -ARG VERSION=4.19 +ARG VERSION=4.20 ARG DOMAIN=domain1 ENV HOME_DIR=/opt/dataverse\ @@ -21,11 +21,12 @@ ENV HOME_DIR=/opt/dataverse\ DOCROOT_DIR=/docroot\ METADATA_DIR=/metadata\ SECRETS_DIR=/secrets\ + DUMPS_DIR=/dumps\ GLASSFISH_PKG=http://download.java.net/glassfish/4.1/release/glassfish-4.1.zip\ GLASSFISH_SHA1=704a90899ec5e3b5007d310b13a6001575827293\ WELD_PKG=https://repo1.maven.org/maven2/org/jboss/weld/weld-osgi-bundle/2.2.10.SP1/weld-osgi-bundle-2.2.10.SP1-glassfish4.jar\ - GRIZZLY_PKG=http://guides.dataverse.org/en/latest/_downloads/glassfish-grizzly-extra-all.jar\ - PGDRIVER_PKG=https://jdbc.postgresql.org/download/postgresql-42.2.10.jar\ + GRIZZLY_PKG=http://guides.dataverse.org/en/${VERSION}/_downloads/glassfish-grizzly-extra-all.jar\ + PGDRIVER_PKG=https://jdbc.postgresql.org/download/postgresql-42.2.12.jar\ DATAVERSE_VERSION=${VERSION}\ DATAVERSE_PKG=https://github.com/IQSS/dataverse/releases/download/v${VERSION}/dvinstall.zip\ JMX_EXPORTER_PKG=https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_javaagent/${JMX_EXPORTER_VERSION}/jmx_prometheus_javaagent-${JMX_EXPORTER_VERSION}.jar\ @@ -43,15 +44,13 @@ RUN groupadd -g 1000 glassfish && \ useradd -u 1000 -M -s /bin/bash -d ${HOME_DIR} glassfish -g glassfish && \ echo glassfish:glassfish | chpasswd && \ mkdir -p ${HOME_DIR} ${SCRIPT_DIR} ${SECRETS_DIR} && \ - mkdir -p ${DATA_DIR} ${METADATA_DIR} ${DOCROOT_DIR} && \ - chown -R glassfish: ${HOME_DIR} ${DATA_DIR} ${METADATA_DIR} ${DOCROOT_DIR} + mkdir -p ${DATA_DIR} ${METADATA_DIR} ${DOCROOT_DIR} ${DUMPS_DIR} && \ + chown -R glassfish: ${HOME_DIR} ${DATA_DIR} ${METADATA_DIR} ${DOCROOT_DIR} ${DUMPS_DIR} # Install tini as minimized init system -RUN wget --no-verbose -O /tini https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini && \ - wget --no-verbose -O /tini.asc https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini.asc && \ - gpg --batch --keyserver "hkp://p80.pool.sks-keyservers.net:80" --recv-keys 595E85A6B1B4779EA4DAAEC70B588DFF0527A9B7 && \ - gpg --batch --verify /tini.asc /tini && \ - chmod +x /tini +RUN wget --no-verbose -O tini-amd64 https://github.com/krallin/tini/releases/download/${TINI_VERSION}/tini-amd64 && \ + echo '93dcc18adc78c65a028a84799ecf8ad40c936fdfc5f2a57b1acda5a8117fa82c tini-amd64' | sha256sum -c - && \ + mv tini-amd64 /tini && chmod +x /tini # Install esh template engine from Github RUN wget --no-verbose -O esh https://raw.githubusercontent.com/jirutka/esh/v0.3.0/esh && \ @@ -94,6 +93,14 @@ RUN ${GLASSFISH_DIR}/bin/asadmin start-domain && \ for MEMORY_JVM_OPTION in $(${GLASSFISH_DIR}/bin/asadmin list-jvm-options | grep "Xm[sx]"); do\ ${GLASSFISH_DIR}/bin/asadmin delete-jvm-options $MEMORY_JVM_OPTION;\ done && \ + ${GLASSFISH_DIR}/bin/asadmin create-jvm-options -- "-XX\:+HeapDumpOnOutOfMemoryError" && \ + ${GLASSFISH_DIR}/bin/asadmin create-jvm-options -- "-XX\:HeapDumpPath=${DUMPS_DIR}" && \ + ${GLASSFISH_DIR}/bin/asadmin create-jvm-options -- "-XX\:+UseG1GC" && \ + ${GLASSFISH_DIR}/bin/asadmin create-jvm-options -- "-XX\:+UseStringDeduplication" && \ + ${GLASSFISH_DIR}/bin/asadmin create-jvm-options -- "-XX\:MaxGCPauseMillis=500" && \ + ${GLASSFISH_DIR}/bin/asadmin create-jvm-options -- "-XX\:MetaspaceSize=256m" && \ + ${GLASSFISH_DIR}/bin/asadmin create-jvm-options -- "-XX\:MaxMetaspaceSize=2g" && \ + ${GLASSFISH_DIR}/bin/asadmin create-jvm-options -- "-XX\:+IgnoreUnrecognizedVMOptions" && \ ${GLASSFISH_DIR}/bin/asadmin create-jvm-options -- "-server" && \ ${GLASSFISH_DIR}/bin/asadmin stop-domain && \ mkdir -p ${DOMAIN_DIR}/autodeploy && \ diff --git a/docker/dataverse-k8s/glassfish/bin/init_1_conf_glassfish.sh b/docker/dataverse-k8s/glassfish/bin/init_1_conf_glassfish.sh index bb26b3eb..42dba629 100644 --- a/docker/dataverse-k8s/glassfish/bin/init_1_conf_glassfish.sh +++ b/docker/dataverse-k8s/glassfish/bin/init_1_conf_glassfish.sh @@ -30,16 +30,29 @@ do done # 1b. Create AWS access credentials when storage driver is set to s3 -# See IQSS/dataverse-kubernetes#28 for details of this workaround. -if [ "s3" = "${dataverse_files_storage__driver__id}" ]; then - if [ -f ${SECRETS_DIR}/s3/access-key ] && [ -f ${SECRETS_DIR}/s3/secret-key ]; then - mkdir -p ${HOME_DIR}/.aws - echo "[default]" > ${HOME_DIR}/.aws/credentials - cat ${SECRETS_DIR}/s3/access-key | sed -e "s#^#aws_access_key_id = #" -e "s#\$#\n#" >> ${HOME_DIR}/.aws/credentials - cat ${SECRETS_DIR}/s3/secret-key | sed -e "s#^#aws_secret_access_key = #" -e "s#\$#\n#" >> ${HOME_DIR}/.aws/credentials - else - echo "WARNING: Could not find all S3 access secrets in ${SECRETS_DIR}/s3/(access-key|secret-key). Check your Kubernetes Secrets and their mounting!" - fi +# Find all access keys +if [ -d "${SECRETS_DIR}/s3" ]; then + S3_KEYS=`find "${SECRETS_DIR}/s3" -readable -type f -iname '*access-key'` + S3_CRED_FILE=${HOME_DIR}/.aws/credentials + mkdir -p `dirname "${S3_CRED_FILE}"` + rm -f ${S3_CRED_FILE} + # Iterate keys + while IFS= read -r S3_ACCESS_KEY; do + echo "Loading S3 key ${S3_ACCESS_KEY}" + # Try to find the secret key, parse for profile and add to the credentials file. + S3_PROFILE=`echo "${S3_ACCESS_KEY}" | sed -ne "s#.*/\(.*\)-access-key#\1#p"` + S3_SECRET_KEY=`echo "${S3_ACCESS_KEY}" | sed -ne "s#\(.*/\|.*/.*-\)access-key#\1secret-key#p"` + + if [ -r ${S3_SECRET_KEY} ]; then + [ -z "${S3_PROFILE}" ] && echo "[default]" >> "${S3_CRED_FILE}" || echo "[${S3_PROFILE}]" >> "${S3_CRED_FILE}" + cat "${S3_ACCESS_KEY}" | sed -e "s#^#aws_access_key_id = #" -e "s#\$#\n#" >> "${S3_CRED_FILE}" + cat "${S3_SECRET_KEY}" | sed -e "s#^#aws_secret_access_key = #" -e "s#\$#\n#" >> "${S3_CRED_FILE}" + echo "" >> "${S3_CRED_FILE}" + else + echo "ERROR: Could not find or read matching \"$S3_SECRET_KEY\"." + exit 1 + fi + done <<< "${S3_KEYS}" fi # 2. Domain-spaced resources (JDBC, JMS, ...) diff --git a/docker/dataverse-k8s/payara/Dockerfile b/docker/dataverse-k8s/payara/Dockerfile index 9b48484a..8116a128 100644 --- a/docker/dataverse-k8s/payara/Dockerfile +++ b/docker/dataverse-k8s/payara/Dockerfile @@ -4,44 +4,33 @@ # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 -FROM payara/server-full:5.201 +FROM payara/server-full:5.2020.3 LABEL maintainer="FDM FZJ " -ARG VERSION=4.19 +ARG VERSION=4.20 ARG DOMAIN=domain1 ENV DATA_DIR=/data\ DOCROOT_DIR=/docroot\ METADATA_DIR=/metadata\ SECRETS_DIR=/secrets\ + DUMPS_DIR=/dumps\ DOMAIN_DIR=${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}\ DATAVERSE_VERSION=${VERSION}\ DATAVERSE_PKG=https://github.com/IQSS/dataverse/releases/download/v${VERSION}/dvinstall.zip\ PGDRIVER_PKG=https://jdbc.postgresql.org/download/postgresql-42.2.12.jar\ - MEM_MAX_RAM_PERCENTAGE=70.0\ - MEM_XSS=512k + # Make heap dumps on OOM appear in DUMPS_DIR + JVM_ARGS="-XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=\${ENV=DUMPS_DIR}" # Create basic pathes USER root RUN mkdir -p ${HOME_DIR} ${SCRIPT_DIR} ${SECRETS_DIR} && \ - mkdir -p ${DATA_DIR} ${METADATA_DIR} ${DOCROOT_DIR} && \ - chown -R payara: ${DATA_DIR} ${METADATA_DIR} ${DOCROOT_DIR} ${SECRETS_DIR} - -# WORKAROUND MEMORY ISSUES UNTIL UPSTREAM FIXES THEM IN NEW RELEASE -RUN ${PAYARA_DIR}/bin/asadmin --user=${ADMIN_USER} --passwordfile=${PASSWORD_FILE} start-domain ${DOMAIN_NAME} && \ - ${PAYARA_DIR}/bin/asadmin --user=${ADMIN_USER} --passwordfile=${PASSWORD_FILE} delete-jvm-options \ - '-XX\:+UnlockExperimentalVMOptions:-XX\:+UseCGroupMemoryLimitForHeap:-XX\:MaxRAMFraction=1' && \ - ${PAYARA_DIR}/bin/asadmin --user=${ADMIN_USER} --passwordfile=${PASSWORD_FILE} create-jvm-options \ - '-XX\:+UseContainerSupport:-XX\:MaxRAMPercentage=${ENV=MEM_MAX_RAM_PERCENTAGE}:-Xss${ENV=MEM_XSS}' && \ - ${PAYARA_DIR}/bin/asadmin --user=${ADMIN_USER} --passwordfile=${PASSWORD_FILE} stop-domain ${DOMAIN_NAME} && \ - # Cleanup after initialization - rm -rf \ - ${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/osgi-cache \ - ${PAYARA_DIR}/glassfish/domains/${DOMAIN_NAME}/logs + mkdir -p ${DATA_DIR} ${METADATA_DIR} ${DOCROOT_DIR} ${DUMPS_DIR} && \ + chown -R payara: ${DATA_DIR} ${METADATA_DIR} ${DOCROOT_DIR} ${SECRETS_DIR} ${DUMPS_DIR} # Install prerequisites RUN apt-get -qq update && \ - apt-get -qqy install postgresql-client jq imagemagick curl + apt-get -qqy install postgresql-client jq imagemagick curl wget unzip # Install esh template engine from Github RUN wget --no-verbose -O esh https://raw.githubusercontent.com/jirutka/esh/v0.3.0/esh && \ diff --git a/docker/dataverse-k8s/payara/bin/init_2_conf_payara.sh b/docker/dataverse-k8s/payara/bin/init_2_conf_payara.sh index d61b6563..ac9a68a2 100644 --- a/docker/dataverse-k8s/payara/bin/init_2_conf_payara.sh +++ b/docker/dataverse-k8s/payara/bin/init_2_conf_payara.sh @@ -34,17 +34,29 @@ do done # 1b. Create AWS access credentials when storage driver is set to s3 -# See IQSS/dataverse-kubernetes#28 for details of this workaround. -if [ "s3" = "${dataverse_files_storage__driver__id}" ]; then - if [ -f ${SECRETS_DIR}/s3/access-key ] && [ -f ${SECRETS_DIR}/s3/secret-key ]; then - echo "INFO: Deploying AWS credentials." - mkdir -p ${HOME_DIR}/.aws - echo "[default]" > ${HOME_DIR}/.aws/credentials - cat ${SECRETS_DIR}/s3/access-key | sed -e "s#^#aws_access_key_id = #" -e "s#\$#\n#" >> ${HOME_DIR}/.aws/credentials - cat ${SECRETS_DIR}/s3/secret-key | sed -e "s#^#aws_secret_access_key = #" -e "s#\$#\n#" >> ${HOME_DIR}/.aws/credentials - else - echo "WARNING: Could not find all S3 access secrets in ${SECRETS_DIR}/s3/(access-key|secret-key). Check your Kubernetes Secrets and their mounting!" - fi +# Find all access keys +if [ -d "${SECRETS_DIR}/s3" ]; then + S3_KEYS=`find "${SECRETS_DIR}/s3" -readable -type f -iname '*access-key'` + S3_CRED_FILE=${HOME_DIR}/.aws/credentials + mkdir -p `dirname "${S3_CRED_FILE}"` + rm -f ${S3_CRED_FILE} + # Iterate keys + while IFS= read -r S3_ACCESS_KEY; do + echo "Loading S3 key ${S3_ACCESS_KEY}" + # Try to find the secret key, parse for profile and add to the credentials file. + S3_PROFILE=`echo "${S3_ACCESS_KEY}" | sed -ne "s#.*/\(.*\)-access-key#\1#p"` + S3_SECRET_KEY=`echo "${S3_ACCESS_KEY}" | sed -ne "s#\(.*/\|.*/.*-\)access-key#\1secret-key#p"` + + if [ -r ${S3_SECRET_KEY} ]; then + [ -z "${S3_PROFILE}" ] && echo "[default]" >> "${S3_CRED_FILE}" || echo "[${S3_PROFILE}]" >> "${S3_CRED_FILE}" + cat "${S3_ACCESS_KEY}" | sed -e "s#^#aws_access_key_id = #" -e "s#\$#\n#" >> "${S3_CRED_FILE}" + cat "${S3_SECRET_KEY}" | sed -e "s#^#aws_secret_access_key = #" -e "s#\$#\n#" >> "${S3_CRED_FILE}" + echo "" >> "${S3_CRED_FILE}" + else + echo "ERROR: Could not find or read matching \"$S3_SECRET_KEY\"." + exit 1 + fi + done <<< "${S3_KEYS}" fi # 2. Domain-spaced resources (JDBC, JMS, ...) diff --git a/docker/solr-k8s/Dockerfile b/docker/solr-k8s/Dockerfile index d4eced60..214a5572 100644 --- a/docker/solr-k8s/Dockerfile +++ b/docker/solr-k8s/Dockerfile @@ -10,7 +10,7 @@ LABEL maintainer="FDM FZJ " ARG WEBHOOK_VERSION=2.6.11 ARG TINI_VERSION=v0.18.0 -ARG VERSION=4.19 +ARG VERSION=4.20 ARG COLLECTION=collection1 ENV SOLR_OPTS="-Dsolr.jetty.request.header.size=102400"\ COLLECTION_DIR=/opt/solr/server/solr/${COLLECTION}\ diff --git a/docker/solr-k8s/Jenkinsfile b/docker/solr-k8s/Jenkinsfile index 97d13365..6baf9e2e 100644 --- a/docker/solr-k8s/Jenkinsfile +++ b/docker/solr-k8s/Jenkinsfile @@ -68,7 +68,7 @@ pipeline { } stage('latest') { when { - branch 'master' + branch 'release' } environment { // credentials() will magically add DOCKER_HUB_USR and DOCKER_HUB_PSW @@ -77,7 +77,7 @@ pipeline { } steps { script { - // Push master image to latest tag + // Push release image to latest tag docker.withRegistry("${env.DOCKER_REGISTRY}", "${env.DOCKER_HUB_CRED}") { docker_image.push("latest") } diff --git a/docs/.gitignore b/docs/.gitignore index 24e5b0a1..187d5941 100644 --- a/docs/.gitignore +++ b/docs/.gitignore @@ -1 +1,2 @@ .build +_build diff --git a/docs/conf.py b/docs/conf.py index 1f46a2f0..4fa94ab8 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -25,7 +25,7 @@ author = u'Oliver Bertuch' # The short X.Y version -version = u'4.19' +version = u'4.20' # The full version, including alpha/beta/rc tags release = version @@ -87,6 +87,7 @@ autosectionlabel_prefix_document = True extlinks = { + 'tree': ('https://github.com/IQSS/dataverse-kubernetes/tree/master/%s', 'folder of master branch '), 'issue': ('https://github.com/IQSS/dataverse-kubernetes/issues/%s', 'issue '), 'issue_dv': ('https://github.com/IQSS/dataverse/issues/%s', 'issue '), 'guide_dv': ('http://guides.dataverse.org/en/'+version+'/%s', 'upstream docs ') diff --git a/docs/day1/config.rst b/docs/day1/config.rst index 277f4bb8..f5b9d43e 100644 --- a/docs/day1/config.rst +++ b/docs/day1/config.rst @@ -62,8 +62,17 @@ server). Examples (see below :ref:`full-example`): -.. literalinclude:: examples/configmap.yaml - :lines: 12,16-18,26-27,34 +.. code-block:: yaml + + data: + ### GENERAL SETTINGS + dataverse_fqdn: data.example.org + dataverse_siteUrl: https://\${dataverse.fqdn} + dataverse_auth_password__reset__timeout__in__minutes: 30 + + ### DOI SETTINGS + doi_baseurlstring: https://mds.test.datacite.org + doi_username: EXAMPLEORG.TEST .. warning:: @@ -93,7 +102,7 @@ Provide settings ^^^^^^^^^^^^^^^^ 1. Pick a `Database setting `_ -2. Remove the ``:``` and replace it with ``db_``. Keep the Pascal case! +2. Remove the ``:`` and replace it with ``db_``. Keep the Pascal case! 3. Put the transformed value into the ``ConfigMap.data``. 4. Add your value, which can be any value you see in the docs. Keep in mind: when you need to use JSON, format it as a string! @@ -101,8 +110,18 @@ Provide settings Examples (see below :ref:`full-example`): -.. literalinclude:: examples/configmap.yaml - :lines: 12,27-31,42-43 +.. code-block:: yaml + + data: + ### DOI SETTINGS + db_DoiProvider: DataCite + db_Protocol: doi + db_Authority: "10.12345" + db_Shoulder: EXAMPLE/ + + ### CUSTOMIZATION + db_StatusMessageHeader: "Example.org is not yet in production" + db_StatusMessageText: "
Please do not save any real data, only use for testing and sneak-peek." .. warning:: @@ -117,9 +136,9 @@ is a bad idea. It's always a good idea to put it in revision control. .. code:: # Update ConfigMap: - kubectl apply -f k8s/dataverse/configmap.yaml + kubectl apply -f path/to/your/configmap.yaml # Deploy a new config job: - kubectl create -f k8s/dataverse/jobs/configure.yaml + kubectl create -f https://gitcdn.link/repo/IQSS/dataverse-kubernetes/release/k8s/dataverse/jobs/configure.yaml You might consider providing a `CronJob` for scheduled, regular updates. diff --git a/docs/day1/init-deploy.rst b/docs/day1/deploy-behind-scenes.rst similarity index 63% rename from docs/day1/init-deploy.rst rename to docs/day1/deploy-behind-scenes.rst index f0e6eacb..8f343ff3 100644 --- a/docs/day1/init-deploy.rst +++ b/docs/day1/deploy-behind-scenes.rst @@ -1,24 +1,15 @@ -================== -Initial Deployment -================== +================= +Behind the Scenes +================= -Please familiarize yourself with the `architecture of Dataverse `_ -if not already done: it helps a lot knowing how things are connected in Dataverse -to also understand using it as a Kubernetes application. - -The below UML sequence diagram shows all necessary steps by "you" (the user activity on the left) -or (preferably) your deployment framework (like Kustomize.io, Helm or similar) -on your behalf for a new deployment of Dataverse. It also explains what happens -in the background on an overview level. +The below UML sequence diagram shows all necessary steps by "you" +(the user activity on the left) or (preferably) your deployment framework +(like Kustomize.io, Helm or similar) on your behalf for a new deployment of +Dataverse. It explains what happens in the background on an overview level. When you are done with the initial deployment, you have to :doc:`bootstrap ` (and :doc:`configure `, which is done during bootstrapping, too). -Maybe you should just read :doc:`/get-started/index` and follow that before going into details. - - .. note:: Choose stable Glassfish 4.1 based images or experimental Payara 5 - based with corresponding image tag. See :doc:`/images/dataverse-k8s` for available tags. - .. uml:: @startuml diff --git a/docs/day1/examples/configmap.yaml b/docs/day1/examples/configmap.yaml index 0f95f430..1a089301 100644 --- a/docs/day1/examples/configmap.yaml +++ b/docs/day1/examples/configmap.yaml @@ -34,11 +34,13 @@ data: ### FILE STORAGE dataverse_files_directory: /data - dataverse_files_storage__driver__id: "s3" - dataverse_files_s3__custom__endpoint__url: http://minio:9000 - dataverse_files_s3__bucket__name: dataverse + dataverse_files_storage__driver__id: "myremote" + dataverse_files_myremote_type: "s3" + dataverse_files_myremote_label: "My Remote S3 Object Store" + dataverse_files_myremote_custom__endpoint__url: http://minio:9000 + dataverse_files_myremote_bucket__name: dataverse # required for Minio! - dataverse_files_s3__path__style__access: "true" + dataverse_files_myremote_path__style__access: "true" ### CUSTOMIZATION db_StatusMessageHeader: "Example.org is not yet in production" diff --git a/docs/day1/img/skeleton-tree.png b/docs/day1/img/skeleton-tree.png new file mode 100644 index 00000000..ae27b545 Binary files /dev/null and b/docs/day1/img/skeleton-tree.png differ diff --git a/docs/day1/index.rst b/docs/day1/index.rst index fc8afb11..0ae8525c 100644 --- a/docs/day1/index.rst +++ b/docs/day1/index.rst @@ -7,9 +7,166 @@ Day 1 - Deployment :caption: Contents: :hidden: - ./init-deploy - ./job-bootstrap ./config ./secrets ./resources ./storage + ./provider-hints + ./job-bootstrap + ./deploy-behind-scenes + +Once you start building your own installation beyond ephemeral demo or +development purposes, you should take some time to prepare and successfully +roll out your deployment. + +--------------------------- +Guided Deployment Checklist +--------------------------- + +We provide you with this little checklist of topics and aspects to consider +while preparing your setup. + +1. Pick your poison +------------------- + +If you never touched a commandline, never thought about why using cloud +infrastructure might be a good idea: maybe you should stick with the old, +but paved and solid ways of installing complex applications like Dataverse. + +Keen to learn new technology? Be part of the future? Want to streamline +CI/CD and your application? *Continue*. + + + +2. Install necessary tools +-------------------------- + +You will at least need: + +- | `kubectl`_, at least version 1.14 + | This will be your main tool for all operations in your installation. +- | `git`_ (or another VCS) + | Your tool of choice to do revision control for your deployment objects. + Using such tools is *lege artis*. + +Depending on your use-case and targeted environment that might be just it. +If something else is necessary, it'll be documented in its respective documentation part. + +.. hint:: + Please be aware that this project relies on the use of `Kustomize `_ + and does **not** provide a `Helm Chart `_ (very different + templated approach) **nor** an `Operator `_ (to be done + in the future, see :issue:`182`). Pull requests welcome, please get in touch. + + +3. Grasp some knowledge +----------------------- + +If you never used Kubernetes before, but want to deploy to production, you +definitely should be reading some docs first. Some starting points: + +- https://kubernetes.io/docs/tutorials/kubernetes-basics/ +- https://ramitsurana.github.io/awesome-kubernetes +- https://kubernetes-on-aws.readthedocs.io/en/latest/admin-guide/kubernetes-in-production.html + +Please familiarize yourself with the `architecture of Dataverse `_ +if not already done: it helps a lot knowing how things are connected in a complex +application like Dataverse to also understand hosting on Kubernetes. + + + +4. Grab a cluster +----------------- + +You'll need a running and fully configured Kubernetes cluster. + +- | Local options: + | These aren't necessarily meant for production, but might serve you well for + | testing and maybe staging environments. + + - `k3s `_ + - `minikube `_ + - `microk8s `_ + - `kind `_ + +- Deploy your own (production) cluster. Many tools to choose from. Examples: + + - `k3s `_ (+1 for small installations) + - `kops `_ + - `kubespray `_ + - `Rancher `_ + - `VMWare vSphere 7+ `_ + +- Use a managed solution to avoid maintenance burden for the infrastructure. + + Many universities and research institutes already offer managed Kubernetes + clusters for scientific use. + + Some examples by commercial companies: + + - Google: `GKE `_ + - Microsoft: `Azure AKS `_ + - Amazon: `AWS EKS `_ + - RedHat: `OpenShift `_ + + Please note there is an extensible collection of hints for some of these + cloud providers at the :doc:`provider-hints` page. + + + +5. Choose persistent identifiers +-------------------------------- + +When you want to register datasets and/or files in your deployment to +DataCite, EZID or similar, you will need active accounts. Be sure to have +access credentials around. As an alternative, you might want to use the FAKE provider. + +.. seealso:: + + For more information on Dataverses supported providers: + + - `Installation Guide: Persistent Identifiers and Publishing Datasets `_ + - `Installation Guide: Configuration Option :DoiProvider `_ + + + +6. Build bases and environment overlays +--------------------------------------- + +Create an empty repository for your Kubernetes files and add a base layout. +A skeleton is available for you convenience to have a copy-paste starter +in ``personas/prod-skel``: + +.. image:: img/skeleton-tree.png + +You should make a copy of env1 for both your production and testing environment. +Depending on your installation, it might make sense to create your own base. + +.. tip:: + The skeleton already provides and example how to add Minio as S3 object store + and add a custom SSL termination endpoint, but lacks an ``Ingress`` object. + All of this needs to be adapted in the next step, depending on your cluster + and necessities. + +7. Configure and deploy +----------------------- + +You now need to change all :doc:`config`, :doc:`limits `, +:doc:`storage classes `, etc. depending on your particular deployment. + +Make sure to create all the :doc:`secrets` and match names between them. + +Once you're done, DEPLOY! :-) +When deployment is ready, you have to run the one-time :doc:`job-bootstrap`. + +.. code-block:: shell + + kubectl apply -k envs/env1 + kubectl create -f https://gitcdn.link/repo/IQSS/dataverse-kubernetes/release/k8s/dataverse/jobs/bootstrap.yaml + + +More in detail docs about what happens during the deployment can be found at +:doc:`deploy-behind-scenes`. + +.. _kubectl: https://kubernetes.io/docs/tasks/tools/install-kubectl +.. _git: https://git-scm.com/downloads diff --git a/docs/day1/job-bootstrap.rst b/docs/day1/job-bootstrap.rst index a0fe0e2d..38254db4 100644 --- a/docs/day1/job-bootstrap.rst +++ b/docs/day1/job-bootstrap.rst @@ -3,8 +3,9 @@ Bootstrap Job ============= After deploying every components of Dataverse on Kubernetes for the first time -(see :doc:`./init-deploy`), you will need to bootstrap your installation. -That will create a superadmin user, root dataverse and block important API endpoints. +(see the :ref:`deployment step` or more details at +:doc:`deploy-behind-scenes`), you will need to bootstrap your installation. That +will create a superadmin user, root dataverse and block important API endpoints. It will also set the option ``:SolrHostColonPort``, configuring where Dataverse can find the Solr Search index. It will default to ``solr:8983``, but can be @@ -15,6 +16,12 @@ When the very basic configuration has been done, the configuration given in the ``ConfigMap`` will be applied, like you would :ref:`run a configure Kubernetes job `. +To create a bootstrapping job, use ``kubectl``: + +.. code-block:: shell + + kubectl create -f https://gitcdn.link/repo/IQSS/dataverse-kubernetes/release/k8s/dataverse/jobs/bootstrap.yaml + .. uml:: @startuml diff --git a/docs/day1/provider-hints.rst b/docs/day1/provider-hints.rst new file mode 100644 index 00000000..3f320b26 --- /dev/null +++ b/docs/day1/provider-hints.rst @@ -0,0 +1,27 @@ +-------------------- +Cloud Provider Hints +-------------------- + +If you want to add some notes about a cloud provider you know, please create +a pull request. Feel free to include files in the ``examples`` directory. + +Microsoft Azure +--------------- + +You might need to patch your ``PersistentVolumeClaim``\ s according to the +`Azure AKS storage docs `_. +This depends on your requirements and should be tested. An example is given below. + +.. code-block:: yaml + :caption: patch-azure-pvc.yaml + + --- + kind: PersistentVolumeClaim + apiVersion: v1 + metadata: + name: XXX + spec: + storageClassName: managed-premium + +Remember that for ``ReadWriteMany`` volumes you have to use a different type of +them, documented as `Azure Files `_. diff --git a/docs/day1/resources.rst b/docs/day1/resources.rst index cee96931..4790c1c8 100644 --- a/docs/day1/resources.rst +++ b/docs/day1/resources.rst @@ -23,12 +23,12 @@ tweaked for memory usage. Obviously the values below count per instance. - Recommendend min. RAM for production use * - Application server w/ Dataverse - - 1 GiB - - 512 MiB + - 1.5 GiB + - 768 MiB - 4 GiB * - Solr Search Index - - 1 GiB - - 512 MiB + - 820 MiB + - 410 MiB - 4 GiB .. hint:: @@ -62,13 +62,19 @@ To configure these limits, simply configure it in the ``Deployment`` object: .. seealso:: For development or demo use, you'll be good in most cases with much less. - You need to ensure the JVM uses at least 512 MiB for heap space. + You need to ensure the JVM uses at least ~800 MiB for heap space. Using less heap space will not even deploy successfully. + How much RAM is used at max for Java Heap can be easily adjusted by using the + JVM option ``-XX:MaxRAMPercentage=xx.x``. For your convenience this has been + simplified by supporting an environment variable ``${MEM_MAX_RAM_PERCENTAGE}``, + see hidden example below. *Please keep in mind: value must be a floating point + value!* + .. toggle-header:: :header: Development values are hidden on purpose to avoid confusion. *Expand/hide* - 1 GiB RAM means 717 MB of heap space with 70% default setting, which is safe. + 1.5 GiB RAM means ~1 GiB of heap space using the 70% default setting, which is safe. You can tweak the setting to match your necessities like below: .. code-block:: yaml @@ -80,14 +86,93 @@ To configure these limits, simply configure it in the ``Deployment`` object: - name: dataverse resources: requests: - memory: "1Gi" + memory: "1.0Gi" limits: - memory: "1Gi" + memory: "1.5Gi" env: - name: MEM_MAX_RAM_PERCENTAGE value: "50.0" - How much RAM is used at max for Java Heap can be easily adjusted by using the - JVM option ``-XX:MaxRAMPercentage=xx.x``. For your convenience this has been - simplified by supporting an environment variable ``${MEM_MAX_RAM_PERCENTAGE}``, - see hidden example above. *Please keep in mind: must be a floating point value!* +Fighting "Out Of Memory" situations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Java Memory usage is a very complex topic and you should take great care of +monitoring it's usage within your deployment. + +Most operators are aware of the *Heap*, where values of POJOs are stored during +the applications runtime. You should also be aware of the *Stack*, *Metaspace* +and other memory terms, like *Native Memory*. + +The following sections is not so much about prevention of these situations +(that's why you're doing monitoring, right?), but what you can do to troubleshoot +when bad things happened. + + + +Shortage of Heap Space +...................... + +Many times, you're application is killed by running out of memory. Often this is +related to running out of *Heap Space*, the most important type of memory for a +Java application. + +If you don't know what *Garbage Collection* is and how memory allocation works +in Java, you can find lots of resources online. Some examples: + +- https://www.baeldung.com/java-stack-heap +- https://www.baeldung.com/jvm-garbage-collectors +- https://www.youtube.com/watch?v=kR8_r3kMK-Y + +When running out of *Heap Space*, your JVM will throw an ``OutOfMemoryError`` +exception (see also `Oracle docs on OutOfMemoryError`_). + +In these cases, a *heap dump* will be written to ``$DUMPS_DIR``, which is +``/dumps`` by default. Those can be analyzed using tools like `Eclipse MAT`_. + +.. note:: + In the ``dev`` persona, a sidecar container is deployed with whom you + can use ``kubectl cp`` to download the ``.hprof`` dump file for analysis. + +You might want to deploy your own solution, maybe uploading to an object store, +sending notifications or other great ideas. Some inspirations: + +- https://link.medium.com/Ifnt4khj68 +- https://link.medium.com/gZfpnGTH48 +- https://danlebrero.com/2018/11/20/how-to-do-java-jvm-heapdump-in-kubernetes + + + +Shortage of other memory spaces +............................... +For many users of Java applications, other types of memory than the *Heap* are +less commonly known. Compared to the huge amounts of Heap spaces, those are +often rather small, yet they might get you into trouble. + +To understand how this might happen, you need to be aware that the Linux kernel +will non-gracefully kill processes exceeding their memory limits. A container +running on a Kubernetes cluster usually should have resource limits restrictions +applied. (Java will align it's memory usage to these as outlined above.) Once +the containers starts using more RAM than the limits allow, the out of memory +killer will stop the process (usually the only one running in a single container) +and Kubernetes will log an event ``OOMKilled``. + +Depending on how much RAM budget you have left on your nodes, you might either +simply raise the limits. Or you might want to do deeper analysis of the problem, +because there might be a memory leak, coming back no matter how much you raise +the limits. + +There are some excellent resources to read when you go for a hunt: + +- https://devcenter.heroku.com/articles/java-memory-issues +- https://stackoverflow.com/questions/38597965 +- https://medium.com/swlh/native-memory-the-silent-jvm-killer-595913cba8e7 + +So regarding to monitoring, you should always keep an eye not only on heap +and GC stats, but also on the very basic containers metrics offered by K8s. +Try to match the JVM memory stats with those from the container. If things +fall apart, there is a good chance you'll see it before it dies from memory leaks. + + + + +.. _Oracle docs on OutOfMemoryError: https://docs.oracle.com/javase/8/docs/technotes/guides/troubleshoot/memleaks002.html +.. _Eclipse MAT: https://www.eclipse.org/mat/ diff --git a/docs/day1/secrets.rst b/docs/day1/secrets.rst index f9bdc8fd..9da669d0 100644 --- a/docs/day1/secrets.rst +++ b/docs/day1/secrets.rst @@ -93,11 +93,12 @@ Example: Admin account password ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The password for the superadmin account ``dataverseAdmin`` defaults to **admin1** -when you install (precise: bootstrap) Dataverse on Kubernetes using the -:ref:`demos persona `. +when you install (precise: bootstrap) Dataverse on Kubernetes running the +:doc:`/quickstart/index`. -Create a ``Secret`` first (or use some other way to get the password into the file). -(For a complete ``Secret`` example, have a look at ``/personas/demo/secrets.yaml``) +To use a different initial password, create a ``Secret`` (or use some other way +to get the password into the file). (For a complete ``Secret`` example, have a +look at ``/personas/demo/secrets.yaml``) .. code-block:: yaml diff --git a/docs/day1/storage.rst b/docs/day1/storage.rst index cfefd984..2560fbab 100644 --- a/docs/day1/storage.rst +++ b/docs/day1/storage.rst @@ -28,7 +28,11 @@ there, some hints how to cope with that on Kubernetes: - Research data area - Use a *ReadWriteMany* type volume when using a multi-instance deployment. - When using object storage like S3 or Swift, you might configure it as ``emptyDir`` volume for temporary upload only. + When using remote storage (like S3), you may use an ``emptyDir`` volume + for temporary upload storage. + + Keep in mind that as of Dataverse v4.20, you may enable multiple storage + locations, mix-n-matching local and remote storages. * - **/docroot** - Web application area - Users upload data into this area. Dataverse application writes data here. @@ -42,6 +46,65 @@ there, some hints how to cope with that on Kubernetes: you need to populate this directory. A sidecar pattern is likely to be a good fit for this, retrieving data from remote (like a Git repository). + * - **/dumps** + - Heap dumps area + - In case of :ref:`running out of heap space `, + heap dumps will be saved here for further shipping, analysis etc. + By default this is backed by an ``emptyDir`` temporary storage volume. + Should be monitored in a sidecar container. + +Temporary Data Storage +^^^^^^^^^^^^^^^^^^^^^^ +Depending on the ``dataverse_files_directory`` :doc:`setting ` data +uploaded by users will be stored in a ``temp`` sub-directory of the given +path for processing (ingest) and moving to final location. With default +``/data``, this will result in temporary storage at ``/data/temp``. + +Remember to have temporary storage available at ``/dumps``. Heap dumps +might grow as large as your configured container memory limits and storing +them on the overlay filesytem of the container is a bad idea. + +"Local" Data Storage +^^^^^^^^^^^^^^^^^^^^ +*Local storage* is any kind of volume mounted into the application container. It +will look like a local filesystem to the application. + +It might be a ``hostPath`` flavored volume, a Docker volume, a NFS share or even +a clustered file system. Plenty of options are available for Kubernetes. +For any mounts, you should think about using a subdirectory of ``/data``. + +Remember that you will have to ensure proper permissions on the mounted volume +(the appserver uses ``uid=1000, gid=1000``). One option to solve this is by +adding an init container to your deployment object: + +.. code-block:: yaml + + initContainers: + - name: volume-mount-hack + image: giantswarm/tiny-tools + command: ["sh"] + args: + - -c + - chown -c 1000:1000 /data/mystorage + volumeMounts: + - name: mystorage + mountPath: /data/mystorage + + +Remote Data Storage +^^^^^^^^^^^^^^^^^^^ +*Remote storage* is any kind of storage not mounted as a local filesystem, +reachable over a network and having storage driver support inside Dataverse. +Examples are any S3-based or Swift object stores. + +They can be activated via :doc:`configuration ` in your ``ConfigMap``. +Please see upstream documentation about +:guide_dv:`file storage ` +for extensive docs on the available options. :ref:`full-example` provides +a handy S3 example using Minio. + +Further explenaition and an example can be found in the integration docs about +:doc:`/day3/objectstore`. Index server ------------ diff --git a/docs/day2/job-index.rst b/docs/day2/job-index.rst index e330d68e..02675b1c 100644 --- a/docs/day2/job-index.rst +++ b/docs/day2/job-index.rst @@ -24,7 +24,7 @@ in the docs. Simply deploy it during off-hours (or fork and create a ``CronJob`` .. code-block:: shell - kubectl create -f k8s/dataverse/jobs/inplace-reindex.yaml + kubectl create -f https://gitcdn.link/repo/IQSS/dataverse-kubernetes/release/k8s/dataverse/jobs/inplace-reindex.yaml .. hint:: diff --git a/docs/day2/job-metadata.rst b/docs/day2/job-metadata.rst index f58d27c3..6ddf2ec6 100644 --- a/docs/day2/job-metadata.rst +++ b/docs/day2/job-metadata.rst @@ -21,7 +21,7 @@ get your custom metadata inside that job somehow, see below. .. code-block:: shell - kubectl create -f k8s/dataverse/jobs/metadata-update.yaml + kubectl create -f https://gitcdn.link/repo/IQSS/dataverse-kubernetes/release/k8s/dataverse/jobs/metadata-update.yaml .. important:: @@ -48,7 +48,7 @@ to re-export all citation metadata. A simple job does the trick: .. code-block:: shell - kubectl create -f k8s/dataverse/jobs/metadata-reexport.yaml + kubectl create -f https://gitcdn.link/repo/IQSS/dataverse-kubernetes/release/k8s/dataverse/jobs/metadata-reexport.yaml Having a large set of published dataverses and datasets, you might want to run this during off-hours. diff --git a/docs/day2/upgrade.rst b/docs/day2/upgrade.rst index d7ec206b..11c614d8 100644 --- a/docs/day2/upgrade.rst +++ b/docs/day2/upgrade.rst @@ -63,3 +63,53 @@ a similar project named `Imago `_ to sync your This works independently from your ``imagePullPolicy`` by using the ``sha256`` image checksum in background. + + + +Flyway Database Migration Issues +-------------------------------- +When using a custom version of Dataverse (e. g. when you maintain a small +fork and deploy it based on images of this project), you might run into a +situation where deployments fail due to "out of order" migrations. + +When deploying, `Flyway `_ takes care of maintaining +the database tables to be inline with the object-relational mapping. +Backporting or adding a custom change might leave you in a state where +Flyway complains about migrations not being applicable because newer migrations +already happened. The deployment will fail for good. + +To solve this situation, you need to apply the migrations "out of order". +Add the Flyway plugin to ``pom.xml`` (when you maintain a fork, this should +be pretty clear where to do this). + +.. code-block:: xml + + + org.flywaydb + flyway-maven-plugin + ${flyway.version} + + jdbc:postgresql://localhost/dataverse + dataverse + YOUR DATABASE PASSWORD HERE + + + + org.postgresql + postgresql + 42.2.12 + + + + +Now forward your PostgreSQL server to ``localhost`` (keep it running): + +.. code-block:: shell + + kubectl port-forward service/postgresql 5432 + +And then apply the migrations out of order: + +.. code-block:: shell + + mvn -Dflyway.outOfOrder=true flyway:migrate diff --git a/docs/day3/index.rst b/docs/day3/index.rst index 2fbb46c7..f4c5ff5d 100644 --- a/docs/day3/index.rst +++ b/docs/day3/index.rst @@ -2,12 +2,23 @@ Day 3 - Integrations ==================== -This section is is about getting fancy. Day 1 and day 2 operations are mostly -about covering the pure basics, while the true power of Dataverse unleashes -with its fabulous integrations of and in other tools. +This section is about getting fancy. :doc:`Day 1 ` and :doc:`day 2 +` operations are mostly about covering the pure basics, while the +true power of Dataverse unleashes with its fabulous integrations of and in other +tools. Previewers, data analysis, data capturing and many more await you. + +.. hint:: + + Currently, none of these are supported or maintained by this project, although + this is a mid-term goal. If you feel a need, raise an issue. You are most + welcome to contribute. + +Apart from external tools, the following provides a list of integrations +supported by this project: .. toctree:: :maxdepth: 2 :caption: Contents: auth + objectstore diff --git a/docs/day3/objectstore.rst b/docs/day3/objectstore.rst new file mode 100644 index 00000000..e5583026 --- /dev/null +++ b/docs/day3/objectstore.rst @@ -0,0 +1,32 @@ +============== +Object Storage +============== + +Dataverse offers storing data on object storage like AWS S3 or OpenStack Swift. +You should read upstream docs on this: + +- :guide_dv:`Configure Storage Locations ` +- :guide_dv:`Direct Access for Big Data ` + +Since Dataverse v4.20, multiple storage locations are supported for both local +or remote storage or mixed (see also :doc:`/day1/storage`). + +To enable object storage locations, you need to follow three steps: + +1. Create your ``Secrets``. If you deploy both your storage solution and + Dataverse to the same cluster / location, you should share them somehow. + Docs on how to create secrets for Dataverse and apply them to the pod + can be found in :doc:`/day1/secrets`. +2. Prepare and/or deploy your object storage solution +3. Configure Dataverse to make use of it. See :doc:`/day1/config` for details + on how to create a configuration. + + +Minio Example Demo +------------------ +In the :tree:`Minio Integration Demo ` you can +find a very basic example how to deploy a simple Minio service to your cluster +plus patching the Dataverse deployment to include the S3 credentials from +the ``Secret``. + +Simply deploy with ``kubectl apply -k github.com/IQSS/dataverse-kubernetes/personas/demo-integrate-minio`` diff --git a/docs/development/index.rst b/docs/development/index.rst index a0de9107..de17fb82 100644 --- a/docs/development/index.rst +++ b/docs/development/index.rst @@ -3,9 +3,9 @@ This section is primarily targeted at people developing the Dataverse application on a Kubernetes platform, run CI jobs or similar. -================= -Development usage -================= +=========================== +Dataverse Development Usage +=========================== Images on Docker Hub are meant for production usage or quick demos. When developing Dataverse, testing a new feature not yet shipped in a release or @@ -60,9 +60,10 @@ will access your cluster. Your options: 1. When using ``skaffold dev`` (see below!), you can add ``--port-forward``. See also `port forward docs `_. This is currently not possible with ``run`` mode. -2. When using Minikube, see :ref:`get-started/demo/minikube:Make Dataverse reachable via browser` +2. When using Minikube, see :ref:`quickstart/minikube:Make Dataverse reachable via browser` 3. When using KinD, easiest way forward is ``kubectl port-forward``. ``Ingress`` is also possible, see `upstream doc `_. +4. When using k3d, easiest way to go is by `exposing the ingress `_ Example workflow for local development ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/docs/development/mail.rst b/docs/development/mail.rst index 5e062a9f..2d132147 100644 --- a/docs/development/mail.rst +++ b/docs/development/mail.rst @@ -10,7 +10,7 @@ just use `MailCatcher `_ as a small extra deployment: .. code-block:: shell - kubectl create -f k8s/utils/mailcatcher.yaml + kubectl create -f https://gitcdn.link/repo/IQSS/dataverse-kubernetes/release/personas/demo/mailcatcher.yaml minikube service mailcatcher (The last will open the web UI in your default browser.) diff --git a/docs/development/prepare.rst b/docs/development/prepare.rst index e6a89843..cb364c9b 100644 --- a/docs/development/prepare.rst +++ b/docs/development/prepare.rst @@ -7,19 +7,17 @@ Install Toolchain For efficient workflows, tools make life easier. Please install: -1. `skaffold`_, v1.7.0 +1. `skaffold`_ 2. `kustomize`_, v2.0.3 (same as in ``kubectl``, necessary as long as `this issue `_ hasn't been resolved) +3. `kubectl`_ When you opt for using a local cluster (see below), add: 1. `minikube`_ or -2. `kind`_, v0.7.0 plus `Docker `_ - -.. important:: - - The tools mentioned in :doc:`/get-started/index` are obligatory. +2. `kind`_ plus `Docker`_ +3. `k3d`_, using `k3s`_ plus `Docker`_ .. tip:: @@ -58,7 +56,7 @@ Skaffold docs, search via Google, etc. Again: *PRs welcome.* Minikube '''''''' -Please follow :ref:`get-started/demo/minikube:Start with setup of *minikube* VM` +Please follow :ref:`quickstart/minikube:Start with setup of *minikube* VM` to create your cluster. No need to deploy yet, we are just preparing for now. **K**\ ubernetes **IN** **D**\ ocker ("KinD") @@ -109,7 +107,7 @@ Clone source files For building images from any branch or commit of Dataverse, you need to have it in your (Docker) build context. Easily achieved by running the following -after cloning the project (``master`` branch): +after cloning the project (``release`` branch): .. code-block:: shell @@ -179,5 +177,9 @@ Example: Switch to feature branch in (your) fork .. _skaffold: https://skaffold.dev/docs/getting-started/#installing-skaffold .. _kustomize: https://github.com/kubernetes-sigs/kustomize/blob/master/docs/INSTALL.md +.. _kubectl: https://kubernetes.io/docs/tasks/tools/install-kubectl .. _kind: https://kind.sigs.k8s.io/docs/user/quick-start .. _minikube: https://kubernetes.io/docs/setup/learning-environment/minikube +.. _k3s: https://k3s.io +.. _k3d: https://k3d.io +.. _Docker: https://docs.docker.com/install diff --git a/docs/get-started/index.rst b/docs/get-started/index.rst deleted file mode 100644 index 46f3bc26..00000000 --- a/docs/get-started/index.rst +++ /dev/null @@ -1,178 +0,0 @@ -=============== -Getting started -=============== - -.. toctree:: - :maxdepth: 2 - :hidden: - - demo/k3s - demo/minikube - -`The dataverse project `_ describes itself as: - - | Dataverse is an open source web application to share, preserve, cite, explore, - | and analyze research data. It facilitates making data available to others, and - | allows you to replicate others' work more easily. Researchers, journals, data - | authors, publishers, data distributors, and affiliated institutions all receive - | academic credit and web visibility. - ------------------------------------- -Introduction: what's this all about? ------------------------------------- -This project aims at offering a new way to deploy, run and maintain a Dataverse -installation for any purpose on any kind of Kubernetes-based cloud infrastructure. - -You can use this on your laptop, in your on-prem datacentre or public cloud. -With the power of `Kubernetes `_, many scenarios are possible. - -.. tip:: - - | **tl;dr...** - | Quick'n'dirty demo persona on naked cluster [1]_: - - .. code-block:: shell - - kubectl apply -k github.com/IQSS/dataverse-kubernetes/personas/demo/common - - Wait. Regularly check logs and pods. Login with ``dataverseAdmin:admin1``. - - .. [1] *Your mileage may vary due to storage classes. You really should look at the* :ref:`demos ` *below.* - - - - - - ----------------------------------- -Prerequisites: First things first. ----------------------------------- - -Before you start deploying, make sure to look at the following checklist: - -| **1. Think first** - -If you never touched a commandline, never thought about why using cloud -infrastructure might be a good idea: maybe you should stick with the old, -but paved and solid ways of installing complex applications like Dataverse. - -Keen to learn new technology? Be part of the future? Want to streamline -CI/CD and your application? *Continue*. - -| **2. Install tools** - -You will at least need: - -- `kubectl `_, at least version 1.14 -- `git `_ (or another VCS) - -Depending on your use-case and targeted environment that might be just it. -If something else is necessary, it'll be documented in its respective documentation part. - -| **3. Grasp some knowledge** - -If you never used Kubernetes, but want to deploy to production, you definitely -should be reading some docs first. Some starting points: - -- https://kubernetes.io/docs/tutorials/kubernetes-basics/ -- https://ramitsurana.github.io/awesome-kubernetes -- https://kubernetes-on-aws.readthedocs.io/en/latest/admin-guide/kubernetes-in-production.html - -| **4. Grab a cluster** - -You'll need a running and fully configured Kubernetes cluster. - -- Local options: - - - `k3s `_ - - `minikube `_ - - `microk8s `_ - - `kind `_ - -- Deploy your own (production) cluster. Many tools to choose from. Examples: - - - `kops `_ - - `kubespray `_ - -- Use a hosted solution. Some example services at - - - Google: `GKE `_ - - Microsoft: `Azure AKS `_ - - Amazon: `AWS EKS `_ - - RedHat: `OpenShift `_ - -| **5. Choose persistent identifiers** - -When you want to register datasets and/or files in your deployment to -DataCite, EZID or similar, you will need active accounts. Be sure to have -access credentials around. As an alternative, you might want to use the FAKE provider. - -.. seealso:: - - For more information on Dataverses supported providers: - - - `Installation Guide: Persistent Identifiers and Publishing Datasets `_ - - `Installation Guide: Configuration Option :DoiProvider `_ - - - - - ---------------------------------------------- -Use Cases: What installation persona are you? ---------------------------------------------- - -.. _demos: - -1. Demo time! -------------- -Demos provide showcases what Dataverse can do for you. Currently pre-packaged: - -- Local - - - Using ``minikube``, see :doc:`demo/minikube` - - Using ``k3s``, see :doc:`demo/k3s` - -- Cloud-based - - - Using ``kops`` on Amazon EC2 VMs, see :doc:`demo/aws-kops` - -2. Developing is my thing -------------------------- - -There is an entire section in this guide dealing with how to use this project -for developing Dataverse, run development snapshots for tech demos, etc. - -Please go to :doc:`development docs here `. - -3. Gimme the *production* stuff ----------------------------------- - -.. todo:: - This needs yet to be refactored. - -You should make yourself familiar with a series of documentation articles, linked below: - -* [Container images](https://github.com/IQSS/dataverse-kubernetes/blob/master/docs/images.md) -* [Persistance storage](https://github.com/IQSS/dataverse-kubernetes/blob/master/docs/storage.md) -* [Detailed insight into inner workings](https://github.com/IQSS/dataverse-kubernetes/blob/master/docs/how-it-works.md) -* [Using Kubernetes descriptors from this project](https://github.com/IQSS/dataverse-kubernetes/blob/master/docs/reuse.md) -* [Configuration of Dataverse](https://github.com/IQSS/dataverse-kubernetes/blob/master/docs/config.md) -* [Secrets usage](https://github.com/IQSS/dataverse-kubernetes/blob/master/docs/secrets.md) -* [(Custom) Metadata Blocks](https://github.com/IQSS/dataverse-kubernetes/blob/master/docs/metadata.md) -* [Maintenance Jobs and Little Helpers](https://github.com/IQSS/dataverse-kubernetes/blob/master/docs/little-helpers.md) - -Please be aware that this project currently only offers images and support -for basic usage. Integrations are not yet part of this, but may be added as needed. -See also relevant docs within Dataverse guides and upstream projects. - -4. Integrate yourself! ----------------------- -One of the true superpowers of Dataverse is its ability to integrate with external -tools. Previewers, data analysis, data capturing and many more await you. - -.. hint:: - - Currently, none of these are supported or maintained by this project, although - this is a mid-term goal. If you feel a need, raise an issue. You are most - welcome to contribute. diff --git a/docs/images/build.rst b/docs/images/build.rst index fa7deffa..37f11493 100644 --- a/docs/images/build.rst +++ b/docs/images/build.rst @@ -19,8 +19,8 @@ Simple with Docker (or Podman) after cloning the project and accessing the sourc .. code-block:: shell - docker build -t iqss/dataverse-k8s:4.19 -f docker/dataverse-k8s/glassfish/Dockerfile . - docker build -t iqss/solr-k8s:4.19 docker/solr-k8s + docker build -t iqss/dataverse-k8s:4.20 -f docker/dataverse-k8s/glassfish/Dockerfile . + docker build -t iqss/solr-k8s:4.20 docker/solr-k8s *Please remember to change the tag above as appropriate. You should be* diff --git a/docs/images/dataverse-k8s.md b/docs/images/dataverse-k8s.md index b43c9d95..c53faf4e 100644 --- a/docs/images/dataverse-k8s.md +++ b/docs/images/dataverse-k8s.md @@ -1,10 +1,10 @@ # Image "dataverse-k8s" -[![Upstream](https://img.shields.io/badge/Dataverse-v4.19-important.svg)](https://github.com/IQSS/dataverse/releases/v4.19) +[![Upstream](https://img.shields.io/badge/Dataverse-v4.20-important.svg)](https://github.com/IQSS/dataverse/releases/v4.20) [![Hub](https://img.shields.io/static/v1.svg?label=image&message=dataverse-k8s&logo=docker)](https://hub.docker.com/r/iqss/dataverse-k8s) ![Pulls](https://img.shields.io/docker/pulls/iqss/dataverse-k8s) [![RTD](https://img.shields.io/readthedocs/dataverse-k8s)](https://dataverse-k8s.readthedocs.io) -[![Build](https://jenkins.dataverse.org/job/dataverse-k8s/job/image-dataverse/job/master/badge/icon?subject=master&status=pushed&color=purple)](https://jenkins.dataverse.org/job/dataverse-k8s/job/image-dataverse/job/master) +[![Build](https://jenkins.dataverse.org/job/dataverse-k8s/job/image-dataverse/job/release/badge/icon?subject=release&status=pushed&color=purple)](https://jenkins.dataverse.org/job/dataverse-k8s/job/image-dataverse/job/release) This container image enables you to run [Dataverse](https://dataverse.org), a Java EE based web application for research data management, on a container platform. @@ -15,15 +15,15 @@ like [Docker](https://docker.io) or [podman](https://podman.io). ## Supported tags -- `latest`: master branch based build ([`Dockerfile`](https://github.com/IQSS/dataverse-kubernetes/blob/master/docker/dataverse-k8s/glassfish/Dockerfile)) -- `payara`: master branch based build, payara flavor ([`Dockerfile`](https://github.com/IQSS/dataverse-kubernetes/blob/master/docker/dataverse-k8s/payara/Dockerfile)) -- `build-cache`: a maven cache image to speedup dev builds, refreshed every night based on latest upstream `develop`. ([`Dockerfile`](https://github.com/IQSS/dataverse-kubernetes/blob/master/docker/dataverse-k8s/build-cache/Dockerfile), [`Jenkinsfile`](https://github.com/IQSS/dataverse-kubernetes/blob/master/docker/dataverse-k8s/build-cache/Jenkinsfile)) -- `4.19`, ..., `4.15.1`, ..., `4.11`: stable (tagged) releases +- `latest`: release branch based build ([`Dockerfile`](https://github.com/IQSS/dataverse-kubernetes/blob/release/docker/dataverse-k8s/glassfish/Dockerfile)) +- `payara`: release branch based build, payara flavor ([`Dockerfile`](https://github.com/IQSS/dataverse-kubernetes/blob/release/docker/dataverse-k8s/payara/Dockerfile)) +- `build-cache`: a maven cache image to speedup dev builds, refreshed every night based on latest upstream `develop`. ([`Dockerfile`](https://github.com/IQSS/dataverse-kubernetes/blob/release/docker/dataverse-k8s/build-cache/Dockerfile), [`Jenkinsfile`](https://github.com/IQSS/dataverse-kubernetes/blob/release/docker/dataverse-k8s/build-cache/Jenkinsfile)) +- `4.20`, ..., `4.15.1`, ..., `4.11`: stable (tagged) releases - Using [upstream release schema](https://github.com/IQSS/dataverse/releases/) down to `4.11`. - See also [list on Docker Hub](https://hub.docker.com/r/iqss/dataverse-k8s/tags?page=1&ordering=last_updated&name=4.) for releases - - Last stable tag ([`Dockerfile`](https://github.com/IQSS/dataverse-kubernetes/blob/v4.19/docker/dataverse-k8s/glassfish/Dockerfile)) -- `4.19-payara`, ..., `4.19-payara`: same as above, but using Payara 5 as appserver. + - Last stable tag ([`Dockerfile`](https://github.com/IQSS/dataverse-kubernetes/blob/v4.20/docker/dataverse-k8s/glassfish/Dockerfile)) +- `4.20-payara`, ..., `4.20-payara`: same as above, but using Payara 5 as appserver. ## Quick reference @@ -62,6 +62,10 @@ to this user on any volumes (except secrets) used for the below directories. Installation root of application server, WAR files, scripts etc. See `Dockerfile` for all details. +- **/dumps**
+ Mount a volume here to access Java Heap dumps when running out of heap memory. + Also available as `$DUMPS_DIR`. + ### Secrets and Credentials Currently understood secrets in the container, mounted at `$SECRETS_DIR` (see diff --git a/docs/images/index.rst b/docs/images/index.rst index 48ef044d..69274e5c 100644 --- a/docs/images/index.rst +++ b/docs/images/index.rst @@ -6,7 +6,7 @@ This project provides ``Dockerfile`` s and scripts included in container images to be used for Dataverse deployment and maintenance. 1. You can use images from Docker Hub. Those are built and tested by CI on every - push to ``master`` and tags. + push to ``release`` and tags. 2. Derive from these and push your customized image to a place you like. 3. Build the genuine images yourself and push to a registry of your choice. diff --git a/docs/images/solr-k8s.md b/docs/images/solr-k8s.md index 49046140..d7d1f416 100644 --- a/docs/images/solr-k8s.md +++ b/docs/images/solr-k8s.md @@ -1,16 +1,16 @@ # Image "solr-k8s" -[![Upstream](https://img.shields.io/badge/Dataverse-v4.19-important.svg)](https://github.com/IQSS/dataverse/releases/v4.19) +[![Upstream](https://img.shields.io/badge/Dataverse-v4.20-important.svg)](https://github.com/IQSS/dataverse/releases/v4.20) [![Hub](https://img.shields.io/static/v1.svg?label=image&message=solr-k8s&logo=docker)](https://hub.docker.com/r/iqss/solr-k8s) [![Solr](https://img.shields.io/static/v1.svg?label=upstream&message=7.3.1&logo=docker)](https://hub.docker.com/_/solr) ![Pulls](https://img.shields.io/docker/pulls/iqss/solr-k8s) [![RTD](https://img.shields.io/readthedocs/dataverse-k8s)](https://dataverse-k8s.readthedocs.io) -[![Build](https://jenkins.dataverse.org/job/dataverse-k8s/job/image-solr/job/master/badge/icon?subject=master&status=pushed&color=purple)](https://jenkins.dataverse.org/job/dataverse-k8s/job/image-solr/job/master) +[![Build](https://jenkins.dataverse.org/job/dataverse-k8s/job/image-solr/job/release/badge/icon?subject=release&status=pushed&color=purple)](https://jenkins.dataverse.org/job/dataverse-k8s/job/image-solr/job/release) This container image includes a dependency service to run [Dataverse](https://dataverse.org), a Java EE based web application for research data management, on a container platform. It is derived from [upstream Solr images](https://hub.docker.com/_/solr), [using the -required version](http://guides.dataverse.org/en/4.19/installation/prerequisites.html#solr). +required version](http://guides.dataverse.org/en/4.20/installation/prerequisites.html#solr). It is primarily targeted to be used in production on [Kubernetes](https://kubernetes.io), but if you follow the same conventions, you should be able to use it with other tools @@ -18,12 +18,12 @@ like [Docker](https://docker.io) or [podman](https://podman.io). ## Supported tags -- `latest`: master branch based build ([`Dockerfile`](https://github.com/IQSS/dataverse-kubernetes/blob/master/docker/dataverse-k8s/glassfish/Dockerfile)) -- `4.19`, ..., `4.15.1`, ..., `4.11`: stable (tagged) releases +- `latest`: release branch based build ([`Dockerfile`](https://github.com/IQSS/dataverse-kubernetes/blob/release/docker/dataverse-k8s/glassfish/Dockerfile)) +- `4.20`, ..., `4.15.1`, ..., `4.11`: stable (tagged) releases - Using [upstream release schema](https://github.com/IQSS/dataverse/releases/) down to `4.11`. - See also [list on Docker Hub](https://hub.docker.com/r/iqss/dataverse-k8s/tags?page=1&ordering=last_updated&name=4.) for releases - - Last stable tag ([`Dockerfile`](https://github.com/IQSS/dataverse-kubernetes/blob/v4.19/docker/solr-k8s/Dockerfile)) + - Last stable tag ([`Dockerfile`](https://github.com/IQSS/dataverse-kubernetes/blob/v4.20/docker/solr-k8s/Dockerfile)) ## Quick reference @@ -54,8 +54,8 @@ to this user on any volumes used for the below directories. Also available as `$SCHEMA_DIR` Please read the detailed docs about Solr schema provisioning: - - [Upstream: updating Solr schema](http://guides.dataverse.org/en/4.19/admin/metadatacustomization.html#updating-the-solr-schema). - - [Kubernetes `Job`s for Search Index](https://dataverse-k8s.rtfd.io/en/4.19/day2/job-index.html) + - [Upstream: updating Solr schema](http://guides.dataverse.org/en/4.20/admin/metadatacustomization.html#updating-the-solr-schema). + - [Kubernetes `Job`s for Search Index](https://dataverse-k8s.rtfd.io/en/4.20/day2/job-index.html) - **/scripts**
A collection of scripts for init containers and sidecars. See guide for more information on those scripts. Also available as `$SCRIPT_DIR`. diff --git a/docs/img/building-blocks.png b/docs/img/building-blocks.png new file mode 100644 index 00000000..7f1c9c30 Binary files /dev/null and b/docs/img/building-blocks.png differ diff --git a/docs/img/building-blocks.svg b/docs/img/building-blocks.svg new file mode 100644 index 00000000..8e8802d6 --- /dev/null +++ b/docs/img/building-blocks.svg @@ -0,0 +1,554 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + More tooling(Integrations, Auth, ...) + + + + Images + + + + + + + + + + + + + + + + + + + + + + + + + + DVCLI + DATAVERSE FROM CMDLINE + + + + + + + + + + + Cloud &Container Guide + + + + + K8s Objects + + + + Operator + (tbd) + + + + + + + + + + ComposeSupport + + (tbd) + + + + + + CommunityProject + + + + + + + + + + + + + + diff --git a/docs/index.rst b/docs/index.rst index a7ec1517..6a5323a7 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,25 +1,74 @@ +.. warning:: + **tl;dr:** Just :doc:`take me to the quickstart demos `. + ===================================== The Dataverse Cloud & Container Guide ===================================== -.. include:: ../README.rst +This project aims at offering a new way to deploy, run and maintain a Dataverse +installation for any purpose on any kind of Kubernetes-based cloud infrastructure. +You can use it on your laptop, in your on-prem datacentre or public cloud. +With the power of `Kubernetes `_, many scenarios are possible. .. important:: | This is a **community driven and supported project**, unsupported by `IQSS `_, Harvard. - | Current main drivers are `Forschungszentrum Jülich `_ and `DANS KNAW `_. + | Current main driver is `Forschungszentrum Jülich `_. | | If you need help, please :issue:`open an issue ` or find us on `IRC `_ or Twitter. + +.. image:: img/building-blocks.svg + :height: 250px + +Content: what can you do for me? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +We provide you with simple to re-use Kubernetes objects as building blocks. +Those help with + +- :doc:`deployments on day 1 `, +- :doc:`operations on day 2 ` and +- :doc:`integrations on day 3 `. + +And if you're into developing Dataverse, we offer an easy to use +:doc:`development setup approach `. + +We're also maintaining the :doc:`container images ` deployments +build upon. + + + +Context: what is Dataverse? +^^^^^^^^^^^^^^^^^^^^^^^^^^^ +`The dataverse project `_ describes itself as: + + | Dataverse is an open source web application to share, preserve, cite, explore, + | and analyze research data. It facilitates making data available to others, and + | allows you to replicate others' work more easily. Researchers, journals, data + | authors, publishers, data distributors, and affiliated institutions all receive + | academic credit and web visibility. + + + +Media: where are the stories? +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This project was presented at a few Dataverse related events: + +- Presentation at European Dataverse Workshop 2020, Tromso, https://doi.org/10.7557/5.5427 +- Presentation at Dataverse Community Meeting 2020, Online Conference, https://youtu.be/OTiEkaDc7j8?t=3817 + + + .. toctree:: :maxdepth: 4 :hidden: - get-started/index + quickstart/index day1/index day2/index day3/index images/index development/index - ./roadmap diff --git a/docs/quickstart/docker-compose.rst b/docs/quickstart/docker-compose.rst new file mode 100644 index 00000000..beb7521e --- /dev/null +++ b/docs/quickstart/docker-compose.rst @@ -0,0 +1,32 @@ +============================== +Using *docker-compose* persona +============================== + +.. hint:: + This is in a proof of concept stage. Enhance if you feel this is + useful to you. + +This persona is about setting up a demo environment very quickly with +installing `Docker`_ and `docker-compose`_ only. + +Simply clone the project to any directory and execute + +.. code-block:: shell + + docker-compose up + +This will build all images necessary and deploy the application. +Once deployment is done (you'll see in the logs when auto deploy worked), +you'll need to bootstrap the database: + +.. code-block:: shell + + docker-compose run --no-deps --rm dataverse scripts/bootstrap-job.sh + +Then you can access Dataverse at http://localhost:8080 and login +with the default ``dataverseAdmin:admin1``. Enjoy! + + + +.. _Docker: https://docs.docker.com/install +.. _docker-compose: https://docs.docker.com/compose/install diff --git a/docs/get-started/demo/img/dataverse-dataversedemo.png b/docs/quickstart/img/dataverse-dataversedemo.png similarity index 100% rename from docs/get-started/demo/img/dataverse-dataversedemo.png rename to docs/quickstart/img/dataverse-dataversedemo.png diff --git a/docs/get-started/demo/img/dataverse-localhost-8080.png b/docs/quickstart/img/dataverse-localhost-8080.png similarity index 100% rename from docs/get-started/demo/img/dataverse-localhost-8080.png rename to docs/quickstart/img/dataverse-localhost-8080.png diff --git a/docs/get-started/demo/img/k3s-deploy.png b/docs/quickstart/img/k3s-deploy.png similarity index 100% rename from docs/get-started/demo/img/k3s-deploy.png rename to docs/quickstart/img/k3s-deploy.png diff --git a/docs/get-started/demo/img/k3s-done.png b/docs/quickstart/img/k3s-done.png similarity index 100% rename from docs/get-started/demo/img/k3s-done.png rename to docs/quickstart/img/k3s-done.png diff --git a/docs/get-started/demo/img/k3s-setup.png b/docs/quickstart/img/k3s-setup.png similarity index 100% rename from docs/get-started/demo/img/k3s-setup.png rename to docs/quickstart/img/k3s-setup.png diff --git a/docs/get-started/demo/img/k3s-wait.png b/docs/quickstart/img/k3s-wait.png similarity index 100% rename from docs/get-started/demo/img/k3s-wait.png rename to docs/quickstart/img/k3s-wait.png diff --git a/docs/get-started/demo/img/minikube-deploy.png b/docs/quickstart/img/minikube-deploy.png similarity index 100% rename from docs/get-started/demo/img/minikube-deploy.png rename to docs/quickstart/img/minikube-deploy.png diff --git a/docs/get-started/demo/img/minikube-done.png b/docs/quickstart/img/minikube-done.png similarity index 100% rename from docs/get-started/demo/img/minikube-done.png rename to docs/quickstart/img/minikube-done.png diff --git a/docs/get-started/demo/img/minikube-ingress.png b/docs/quickstart/img/minikube-ingress.png similarity index 100% rename from docs/get-started/demo/img/minikube-ingress.png rename to docs/quickstart/img/minikube-ingress.png diff --git a/docs/get-started/demo/img/minikube-portfwd-8080.png b/docs/quickstart/img/minikube-portfwd-8080.png similarity index 100% rename from docs/get-started/demo/img/minikube-portfwd-8080.png rename to docs/quickstart/img/minikube-portfwd-8080.png diff --git a/docs/get-started/demo/img/minikube-setup.png b/docs/quickstart/img/minikube-setup.png similarity index 100% rename from docs/get-started/demo/img/minikube-setup.png rename to docs/quickstart/img/minikube-setup.png diff --git a/docs/quickstart/index.rst b/docs/quickstart/index.rst new file mode 100644 index 00000000..60f2e0bd --- /dev/null +++ b/docs/quickstart/index.rst @@ -0,0 +1,23 @@ +================= +Quickstart / Demo +================= + +.. toctree:: + :maxdepth: 2 + :hidden: + + k3s + minikube + docker-compose + +.. _demos: + +Our quick to start demos provide showcases what Dataverse can do for you. +Currently pre-packaged to be run on your local machine: + +- Using ``minikube``, see :doc:`minikube` +- Using ``k3s``, see :doc:`k3s` +- Using ``docker-compose``, see :doc:`docker-compose` + +:ref:`day3/objectstore:Minio Example Demo` bases on these, but includes +integrating an S3 compatible object store, too. diff --git a/docs/get-started/demo/k3s.rst b/docs/quickstart/k3s.rst similarity index 92% rename from docs/get-started/demo/k3s.rst rename to docs/quickstart/k3s.rst index 90b3d158..9c35f47d 100644 --- a/docs/get-started/demo/k3s.rst +++ b/docs/quickstart/k3s.rst @@ -52,13 +52,14 @@ Let's get ready to Dataverse... Please be aware that the *k3s.io* persona is using `Kustomize `_ to re-use the provided descriptors, but suited for usage with K3s. - Please ensure having *kubectl* v1.14 or later installed as described in :doc:`/get-started/index`. + Please ensure having *kubectl* v1.14 or later installed or follow the + `kubectl installation docs `_. Now start to deploy Dataverse plus any necessary services and bootstrap via Kustomize: .. code-block:: shell - kubectl apply -k ./personas/demo-k3s + kubectl apply -k github.com/IQSS/dataverse-kubernetes/personas/demo-k3s .. image:: img/k3s-deploy.png diff --git a/docs/get-started/demo/minikube.rst b/docs/quickstart/minikube.rst similarity index 87% rename from docs/get-started/demo/minikube.rst rename to docs/quickstart/minikube.rst index 9ada0319..7dd6728f 100644 --- a/docs/get-started/demo/minikube.rst +++ b/docs/quickstart/minikube.rst @@ -16,13 +16,13 @@ Booting your *minikube* K8s cluster: .. code-block:: shell - minikube start --memory=4096 + minikube start --memory 6144 --cpus 4 --disk-size 30g .. image:: img/minikube-setup.png .. important:: - Please provide at least 4 GB of RAM for the Minikube VM, as Dataverse will - use **a lot** of RAM during deployment and at least 1024 MB when idle. + Please provide at least 6 GB of RAM for the Minikube VM, as Dataverse will + use **a lot** of RAM (see :doc:`/day1/resources`). Remember: this is a VM, so you should have at least 8 GB available in your hardware. .. note:: @@ -37,7 +37,7 @@ its ready) to create a demo: .. code-block:: shell - kubectl apply -k personas/demo-minikube + minikube kubectl -- apply -k github.com/IQSS/dataverse-kubernetes/personas/demo-minikube .. image:: img/minikube-deploy.png @@ -64,12 +64,12 @@ Add the `Ingress` IP address to your `/etc/hosts`: minikube addons enable ingress # wait for about 1 minute... - kubectl get ingress + minikube kubectl -- get ingress Take a note of the IP address (it might take a while till it appears, try again) and add it to ``/etc/hosts``, replacing ``XXX.XXX.XXX.XXX`` with it: -.. code-block:: hosts +.. code-block:: XXX.XXX.XXX.XXX dataverse.demo @@ -102,6 +102,9 @@ or for other reasons, you can always use the ``kubectl`` builtin reverse proxy: This command has the advantage to work in all cases, remotely or not, as long as you have access to the K8s API server. + Please ensure having *kubectl* v1.14 or later installed or follow the + `kubectl installation docs `_. + Now access your freshly baked Dataverse demo via your browser at http://localhost:8080. .. image:: img/dataverse-localhost-8080.png diff --git a/docs/requirements.txt b/docs/requirements.txt index 9c5f788b..2497a0f7 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,6 +1,6 @@ -sphinx==1.8.5 -sphinxcontrib-plantuml==0.17.1 +sphinx==3.2.1 +sphinxcontrib-plantuml==0.18 sphinx_materialdesign_theme==0.1.11 recommonmark==0.6.0 -sphinxcontrib-contentui==0.2.3 -sphinxcontrib-images==0.8.0 +sphinxcontrib-contentui==0.2.5 +sphinxcontrib-images==0.9.2 diff --git a/docs/roadmap.rst b/docs/roadmap.rst deleted file mode 100644 index 6f8ae51b..00000000 --- a/docs/roadmap.rst +++ /dev/null @@ -1,14 +0,0 @@ -======= -Roadmap -======= - -.. todo:: - This needs yet to be refactored. Goals are missing, a vision is absent. - -- At a later point in time, an [Operator](https://coreos.com/operators/) might be added for even easier usage. - -- The docker images should at some point be moved into the upstream code, - so they can be build and used for development purposes, too. - See also [issue 5292](https://github.com/IQSS/dataverse/issues/5292) on this. - -- This should support testing S3 remote file storage with Minio out of the box. diff --git a/k8s/dataverse/configmap.yaml b/k8s/dataverse/configmap.yaml deleted file mode 100644 index 5514049f..00000000 --- a/k8s/dataverse/configmap.yaml +++ /dev/null @@ -1,11 +0,0 @@ ---- -kind: ConfigMap -apiVersion: v1 -metadata: - name: dataverse - labels: - app.kubernetes.io/name: configmap - app.kubernetes.io/version: "1.0" - app.kubernetes.io/component: configmap - app.kubernetes.io/part-of: dataverse - app.kubernetes.io/managed-by: kubectl diff --git a/k8s/dataverse/deployment.yaml b/k8s/dataverse/deployment.yaml index 7283aad9..b5a7edf7 100644 --- a/k8s/dataverse/deployment.yaml +++ b/k8s/dataverse/deployment.yaml @@ -5,7 +5,7 @@ metadata: name: dataverse labels: app.kubernetes.io/name: dataverse - app.kubernetes.io/version: "4.19" + app.kubernetes.io/version: "4.20" app.kubernetes.io/component: appserver app.kubernetes.io/part-of: dataverse app.kubernetes.io/managed-by: kubectl @@ -55,6 +55,8 @@ spec: mountPath: /data - name: docroot mountPath: /docroot + - name: heapdumps + mountPath: /dumps - name: db-secret mountPath: "/secrets/db" readOnly: true @@ -103,6 +105,8 @@ spec: - name: doi-secret secret: secretName: dataverse-doi + - name: heapdumps + emptyDir: {} # This is here because of IQSS/dataverse-kubernetes#177. TBR for #178 - name: primefaces-upload emptyDir: {} diff --git a/k8s/dataverse/kustomization.yaml b/k8s/dataverse/kustomization.yaml index dc8bea50..805f4881 100644 --- a/k8s/dataverse/kustomization.yaml +++ b/k8s/dataverse/kustomization.yaml @@ -3,10 +3,9 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: - deployment.yaml - - configmap.yaml - pvc.yaml - svc.yaml images: - name: iqss/dataverse-k8s - newTag: "4.19" + newTag: "4.20" diff --git a/k8s/kustomization.yaml b/k8s/kustomization.yaml index f38f4e30..27edc04f 100644 --- a/k8s/kustomization.yaml +++ b/k8s/kustomization.yaml @@ -7,6 +7,6 @@ bases: images: - name: iqss/dataverse-k8s - newTag: "4.19" + newTag: "4.20" - name: iqss/solr-k8s - newTag: "4.19" + newTag: "4.20" diff --git a/k8s/solr/deployment.yaml b/k8s/solr/deployment.yaml index 36efd9f7..535a5168 100644 --- a/k8s/solr/deployment.yaml +++ b/k8s/solr/deployment.yaml @@ -5,7 +5,7 @@ metadata: name: solr labels: app.kubernetes.io/name: solr - app.kubernetes.io/version: "4.19" + app.kubernetes.io/version: "4.20" app.kubernetes.io/component: searchindex app.kubernetes.io/part-of: dataverse app.kubernetes.io/managed-by: kubectl diff --git a/k8s/solr/kustomization.yaml b/k8s/solr/kustomization.yaml index a2f13375..f9aa7e3c 100644 --- a/k8s/solr/kustomization.yaml +++ b/k8s/solr/kustomization.yaml @@ -8,4 +8,4 @@ resources: images: - name: iqss/solr-k8s - newTag: "4.19" + newTag: "4.20" diff --git a/personas/demo-integrate-minio/configmap.yaml b/personas/demo-integrate-minio/configmap.yaml new file mode 100644 index 00000000..010c0250 --- /dev/null +++ b/personas/demo-integrate-minio/configmap.yaml @@ -0,0 +1,21 @@ +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: dataverse + labels: + app.kubernetes.io/name: configmap + app.kubernetes.io/part-of: dataverse +data: + # PIDs + db_DoiProvider: FAKE + + ### FILE STORAGE + dataverse_files_directory: /data + dataverse_files_storage__driver__id: "myremote" + dataverse_files_myremote_type: "s3" + dataverse_files_myremote_label: "My Remote S3 Object Store" + dataverse_files_myremote_custom__endpoint__url: http://minio:9000 + dataverse_files_myremote_bucket__name: dataverse + # required for Minio! + dataverse_files_myremote_path__style__access: "true" diff --git a/personas/demo-integrate-minio/kustomization.yaml b/personas/demo-integrate-minio/kustomization.yaml new file mode 100644 index 00000000..94f2b62c --- /dev/null +++ b/personas/demo-integrate-minio/kustomization.yaml @@ -0,0 +1,14 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +bases: + - ../demo + - minio-standalone + +resources: + - secrets.yaml + +patchesStrategicMerge: + - configmap.yaml + - patch-pod-s3-secret.yaml + - patch-memory.yaml diff --git a/personas/demo-integrate-minio/minio-standalone/deployment.yaml b/personas/demo-integrate-minio/minio-standalone/deployment.yaml new file mode 100644 index 00000000..2bef0109 --- /dev/null +++ b/personas/demo-integrate-minio/minio-standalone/deployment.yaml @@ -0,0 +1,91 @@ +kind: Deployment +apiVersion: apps/v1 +metadata: + name: minio + labels: + app.kubernetes.io/name: minio + app.kubernetes.io/version: "latest" + app.kubernetes.io/component: storage + app.kubernetes.io/part-of: dataverse + app.kubernetes.io/managed-by: kubectl +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: minio + app.kubernetes.io/part-of: dataverse + revisionHistoryLimit: 1 + strategy: + # Choosen as upstream example uses this. Might be a thing of locks... + type: Recreate + template: + metadata: + labels: + app.kubernetes.io/name: minio + app.kubernetes.io/part-of: dataverse + annotations: {} + spec: + containers: + - name: minio + image: minio/minio + args: ["server", "/data"] + resources: + requests: + memory: "0.5Gi" + cpu: "0.5" + limits: + memory: "1Gi" + cpu: "1" + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + env: + # MinIO access key and secret key + - name: MINIO_ACCESS_KEY_FILE + value: minio/access-key + - name: MINIO_SECRET_KEY_FILE + value: minio/secret-key + volumeMounts: + - name: data + mountPath: /data + - name: minio-secrets + mountPath: "/run/secrets/minio" + readOnly: true + ports: + - containerPort: 9000 + name: service + readinessProbe: + httpGet: + path: /minio/health/ready + port: service + initialDelaySeconds: 30 + periodSeconds: 30 + livenessProbe: + httpGet: + path: /minio/health/live + port: service + initialDelaySeconds: 30 + periodSeconds: 30 + securityContext: + fsGroup: 1000 + initContainers: + - name: perm-and-bucket + image: giantswarm/tiny-tools + securityContext: + runAsUser: 0 + command: + - sh + - -c + - | + mkdir -p /data/dataverse /data/mdclogs; + chown -c 1000:1000 /data /data/dataverse /data/mdclogs + volumeMounts: + - name: data + mountPath: /data + volumes: + - name: data + persistentVolumeClaim: + claimName: minio + - name: minio-secrets + secret: + secretName: dataverse-s3 diff --git a/personas/demo-integrate-minio/minio-standalone/kustomization.yaml b/personas/demo-integrate-minio/minio-standalone/kustomization.yaml new file mode 100644 index 00000000..db8781da --- /dev/null +++ b/personas/demo-integrate-minio/minio-standalone/kustomization.yaml @@ -0,0 +1,7 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - deployment.yaml + - svc.yaml + - pvc.yaml diff --git a/personas/demo-integrate-minio/minio-standalone/pvc.yaml b/personas/demo-integrate-minio/minio-standalone/pvc.yaml new file mode 100644 index 00000000..bda24e34 --- /dev/null +++ b/personas/demo-integrate-minio/minio-standalone/pvc.yaml @@ -0,0 +1,13 @@ +--- +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: minio + labels: + app: minio +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi diff --git a/personas/demo-integrate-minio/minio-standalone/svc.yaml b/personas/demo-integrate-minio/minio-standalone/svc.yaml new file mode 100644 index 00000000..10e7152d --- /dev/null +++ b/personas/demo-integrate-minio/minio-standalone/svc.yaml @@ -0,0 +1,16 @@ +--- +kind: Service +apiVersion: v1 +metadata: + name: minio + labels: + app: minio +spec: + ports: + - port: 9000 + name: minio + targetPort: 9000 + protocol: TCP + selector: + app.kubernetes.io/name: minio + app.kubernetes.io/part-of: dataverse diff --git a/personas/demo-integrate-minio/patch-memory.yaml b/personas/demo-integrate-minio/patch-memory.yaml new file mode 100644 index 00000000..46b1feba --- /dev/null +++ b/personas/demo-integrate-minio/patch-memory.yaml @@ -0,0 +1,15 @@ +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: dataverse +spec: + template: + spec: + containers: + - name: dataverse + resources: + requests: + memory: "2Gi" + limits: + memory: "2Gi" diff --git a/personas/demo-integrate-minio/patch-pod-s3-secret.yaml b/personas/demo-integrate-minio/patch-pod-s3-secret.yaml new file mode 100644 index 00000000..2bd3006a --- /dev/null +++ b/personas/demo-integrate-minio/patch-pod-s3-secret.yaml @@ -0,0 +1,18 @@ +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: dataverse +spec: + template: + spec: + volumes: + - name: s3-secret + secret: + secretName: dataverse-s3 + containers: + - name: dataverse + volumeMounts: + - name: s3-secret + mountPath: "/secrets/s3" + readOnly: true diff --git a/personas/demo-integrate-minio/secrets.yaml b/personas/demo-integrate-minio/secrets.yaml new file mode 100644 index 00000000..b52fabda --- /dev/null +++ b/personas/demo-integrate-minio/secrets.yaml @@ -0,0 +1,15 @@ +--- +apiVersion: v1 +kind: Secret +metadata: + name: dataverse-s3 + labels: + app.kubernetes.io/name: dataverse-s3 + app.kubernetes.io/version: "1.0" + app.kubernetes.io/component: secret + app.kubernetes.io/part-of: dataverse + app.kubernetes.io/managed-by: kubectl +type: Opaque +stringData: + access-key: dataverse + secret-key: changeme diff --git a/personas/demo/kustomization.yaml b/personas/demo/kustomization.yaml index 8e242b8e..39c9126c 100644 --- a/personas/demo/kustomization.yaml +++ b/personas/demo/kustomization.yaml @@ -9,9 +9,9 @@ bases: resources: - secrets.yaml - mailcatcher.yaml + - configmap.yaml images: patchesStrategicMerge: - - configmap.yaml - patch-memory.yaml diff --git a/personas/demo/patch-memory.yaml b/personas/demo/patch-memory.yaml index 988dbdd0..6764d880 100644 --- a/personas/demo/patch-memory.yaml +++ b/personas/demo/patch-memory.yaml @@ -10,9 +10,9 @@ spec: - name: dataverse resources: requests: - memory: "1Gi" + memory: "1.5Gi" limits: - memory: "1Gi" + memory: "1.5Gi" --- kind: Deployment apiVersion: apps/v1 @@ -25,6 +25,6 @@ spec: - name: solr resources: requests: - memory: "1Gi" + memory: "0.8Gi" limits: - memory: "1Gi" + memory: "0.8Gi" diff --git a/personas/dev/patch-dev.yaml b/personas/dev/patch-dev.yaml index 46cda0f0..e934bbce 100644 --- a/personas/dev/patch-dev.yaml +++ b/personas/dev/patch-dev.yaml @@ -10,16 +10,26 @@ spec: - name: dataverse resources: requests: - memory: "1Gi" + memory: "1.5Gi" limits: - memory: "1Gi" + memory: "1.5Gi" env: - name: MEM_MAX_RAM_PERCENTAGE - value: "60.0" + value: "70.0" - name: "ENABLE_JMX" value: "1" - name: "ENABLE_JDWP" value: "1" + - name: heapdumps + image: wardsco/sleep + resources: + requests: + memory: "64Mi" + limits: + memory: "64Mi" + volumeMounts: + - name: heapdumps + mountPath: /dumps --- kind: Deployment apiVersion: apps/v1 @@ -32,6 +42,6 @@ spec: - name: solr resources: requests: - memory: "1Gi" + memory: "0.8Gi" limits: - memory: "1Gi" + memory: "0.8Gi" diff --git a/personas/docker-compose/secrets/admin/password b/personas/docker-compose/secrets/admin/password new file mode 100644 index 00000000..4607aca9 --- /dev/null +++ b/personas/docker-compose/secrets/admin/password @@ -0,0 +1 @@ +admin1 diff --git a/personas/docker-compose/secrets/api/key b/personas/docker-compose/secrets/api/key new file mode 100644 index 00000000..b5f90786 --- /dev/null +++ b/personas/docker-compose/secrets/api/key @@ -0,0 +1 @@ +supersecret diff --git a/personas/docker-compose/secrets/db/password b/personas/docker-compose/secrets/db/password new file mode 100644 index 00000000..dff0ae5a --- /dev/null +++ b/personas/docker-compose/secrets/db/password @@ -0,0 +1 @@ +changeme diff --git a/personas/docker-compose/secrets/doi/password b/personas/docker-compose/secrets/doi/password new file mode 100644 index 00000000..dff0ae5a --- /dev/null +++ b/personas/docker-compose/secrets/doi/password @@ -0,0 +1 @@ +changeme diff --git a/personas/prod-skel/bases/minio-standalone/deployment.yaml b/personas/prod-skel/bases/minio-standalone/deployment.yaml new file mode 100644 index 00000000..9bbe4d34 --- /dev/null +++ b/personas/prod-skel/bases/minio-standalone/deployment.yaml @@ -0,0 +1,91 @@ +kind: Deployment +apiVersion: apps/v1 +metadata: + name: minio + labels: + app.kubernetes.io/name: minio + app.kubernetes.io/version: "latest" + app.kubernetes.io/component: storage + app.kubernetes.io/part-of: dataverse + app.kubernetes.io/managed-by: kubectl +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: minio + app.kubernetes.io/part-of: dataverse + revisionHistoryLimit: 1 + strategy: + # Choosen as upstream example uses this. Might be a thing of locks... + type: Recreate + template: + metadata: + labels: + app.kubernetes.io/name: minio + app.kubernetes.io/part-of: dataverse + annotations: {} + spec: + containers: + - name: minio + image: minio/minio + args: ["server", "/data"] + resources: + requests: + memory: "2Gi" + cpu: "0.5" + limits: + memory: "4Gi" + cpu: "2" + securityContext: + runAsUser: 1000 + runAsGroup: 1000 + env: + # MinIO access key and secret key + - name: MINIO_ACCESS_KEY_FILE + value: minio/access-key + - name: MINIO_SECRET_KEY_FILE + value: minio/secret-key + volumeMounts: + - name: data + mountPath: /data + - name: minio-secrets + mountPath: "/run/secrets/minio" + readOnly: true + ports: + - containerPort: 9000 + name: service + readinessProbe: + httpGet: + path: /minio/health/ready + port: service + initialDelaySeconds: 30 + periodSeconds: 30 + livenessProbe: + httpGet: + path: /minio/health/live + port: service + initialDelaySeconds: 30 + periodSeconds: 30 + securityContext: + fsGroup: 1000 + initContainers: + - name: perm-and-bucket + image: giantswarm/tiny-tools + securityContext: + runAsUser: 0 + command: + - sh + - -c + - | + mkdir -p /data/dataverse /data/mdclogs; + chown -c 1000:1000 /data /data/dataverse /data/mdclogs + volumeMounts: + - name: data + mountPath: /data + volumes: + - name: data + persistentVolumeClaim: + claimName: minio + - name: minio-secrets + secret: + secretName: dataverse-s3 diff --git a/personas/prod-skel/bases/minio-standalone/kustomization.yaml b/personas/prod-skel/bases/minio-standalone/kustomization.yaml new file mode 100644 index 00000000..db8781da --- /dev/null +++ b/personas/prod-skel/bases/minio-standalone/kustomization.yaml @@ -0,0 +1,7 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - deployment.yaml + - svc.yaml + - pvc.yaml diff --git a/personas/prod-skel/bases/minio-standalone/pvc.yaml b/personas/prod-skel/bases/minio-standalone/pvc.yaml new file mode 100644 index 00000000..fd2a2850 --- /dev/null +++ b/personas/prod-skel/bases/minio-standalone/pvc.yaml @@ -0,0 +1,13 @@ +--- +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: minio + labels: + app: minio +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 80Gi diff --git a/personas/prod-skel/bases/minio-standalone/svc.yaml b/personas/prod-skel/bases/minio-standalone/svc.yaml new file mode 100644 index 00000000..10e7152d --- /dev/null +++ b/personas/prod-skel/bases/minio-standalone/svc.yaml @@ -0,0 +1,16 @@ +--- +kind: Service +apiVersion: v1 +metadata: + name: minio + labels: + app: minio +spec: + ports: + - port: 9000 + name: minio + targetPort: 9000 + protocol: TCP + selector: + app.kubernetes.io/name: minio + app.kubernetes.io/part-of: dataverse diff --git a/personas/prod-skel/envs/env1/config.yaml b/personas/prod-skel/envs/env1/config.yaml new file mode 100644 index 00000000..fd2a81d6 --- /dev/null +++ b/personas/prod-skel/envs/env1/config.yaml @@ -0,0 +1,44 @@ +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: dataverse + labels: + app: dataverse +data: + ### GENERAL SETTINGS + dataverse_fqdn: data-test.example.org + dataverse_siteUrl: https://data-test.example.org + POSTGRES_DATABASE: dataverse + + ### CONTACT SETTINGS + CONTACT_MAIL: rdm@example.org + MAIL_FROMADDRESS: rdm@example.org + db_SystemEmail: Research Data Management Unit Example.org + + ### DOI SETTINGS + doi_baseurlstring: https://mds.test.datacite.org + doi_username: EXAMPLE.ORG + db_DoiProvider: DataCite + db_Protocol: doi + db_Authority: "10.9999999999" + db_Shoulder: EXAMPLE-ORG-TEST/ + + ### FILE STORAGE + dataverse_files_directory: /data + dataverse_files_storage__driver__id: "s3" + dataverse_files_s3__custom__endpoint__url: http://minio:9000 + dataverse_files_s3__bucket__name: dataverse + # required for Minio! + dataverse_files_s3__path__style__access: "true" + + ### INGEST + # deactivate ingest for tabular files + db_TabularIngestSizeLimit: "0" + # allow uploads only < 2GB + db_MaxFileUploadSizeInBytes: "2147483648" + db_FileFixityChecksumAlgorithm: SHA-256 + + ### CUSTOMIZATION + db_StatusMessageHeader: "Test Environment" + db_StatusMessageText: "
This instance is meant for testing, training and experiments. DOIs minted here are not citeable!" diff --git a/personas/prod-skel/envs/env1/db/kustomization.yaml b/personas/prod-skel/envs/env1/db/kustomization.yaml new file mode 100644 index 00000000..97a41f31 --- /dev/null +++ b/personas/prod-skel/envs/env1/db/kustomization.yaml @@ -0,0 +1,10 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +bases: + - github.com/IQSS/dataverse-kubernetes/personas/demo/postgresql?ref=v4.20 + +patchesStrategicMerge: + - patch-pvc.yaml + - patch-limits.yaml diff --git a/personas/prod-skel/envs/env1/db/patch-limits.yaml b/personas/prod-skel/envs/env1/db/patch-limits.yaml new file mode 100644 index 00000000..cf0f5fd9 --- /dev/null +++ b/personas/prod-skel/envs/env1/db/patch-limits.yaml @@ -0,0 +1,17 @@ +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: postgresql +spec: + template: + spec: + containers: + - name: postgresql + resources: + requests: + memory: "1Gi" + cpu: "0.5" + limits: + memory: "4Gi" + cpu: "2" diff --git a/personas/prod-skel/envs/env1/db/patch-pvc.yaml b/personas/prod-skel/envs/env1/db/patch-pvc.yaml new file mode 100644 index 00000000..d47308d6 --- /dev/null +++ b/personas/prod-skel/envs/env1/db/patch-pvc.yaml @@ -0,0 +1,10 @@ +--- +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: postgresql +spec: + resources: + requests: + storage: 30Gi + # storageClassName: pm-local diff --git a/personas/prod-skel/envs/env1/index/kustomization.yaml b/personas/prod-skel/envs/env1/index/kustomization.yaml new file mode 100644 index 00000000..17500a76 --- /dev/null +++ b/personas/prod-skel/envs/env1/index/kustomization.yaml @@ -0,0 +1,14 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +bases: + - github.com/IQSS/dataverse-kubernetes/k8s/solr?ref=v4.20 + +images: + - name: iqss/solr-k8s + newTag: "4.20" + +patchesStrategicMerge: + - patch-pvc.yaml + - patch-pull-always.yaml diff --git a/personas/prod-skel/envs/env1/index/patch-pull-always.yaml b/personas/prod-skel/envs/env1/index/patch-pull-always.yaml new file mode 100644 index 00000000..d89991bf --- /dev/null +++ b/personas/prod-skel/envs/env1/index/patch-pull-always.yaml @@ -0,0 +1,14 @@ +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: solr +spec: + template: + spec: + containers: + - name: solr + imagePullPolicy: Always + initContainers: + - name: schema-init + imagePullPolicy: Always diff --git a/personas/prod-skel/envs/env1/index/patch-pvc.yaml b/personas/prod-skel/envs/env1/index/patch-pvc.yaml new file mode 100644 index 00000000..e00da555 --- /dev/null +++ b/personas/prod-skel/envs/env1/index/patch-pvc.yaml @@ -0,0 +1,10 @@ +--- +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: solr +spec: + resources: + requests: + storage: 60Gi + # storageClassName: pm-local diff --git a/personas/prod-skel/envs/env1/kustomization.yaml b/personas/prod-skel/envs/env1/kustomization.yaml new file mode 100644 index 00000000..c35f2ae1 --- /dev/null +++ b/personas/prod-skel/envs/env1/kustomization.yaml @@ -0,0 +1,14 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: dv-test + +bases: + - db + - index + - webapp + +resources: + - namespace.yaml + - config.yaml diff --git a/personas/prod-skel/envs/env1/namespace.yaml b/personas/prod-skel/envs/env1/namespace.yaml new file mode 100644 index 00000000..1489f560 --- /dev/null +++ b/personas/prod-skel/envs/env1/namespace.yaml @@ -0,0 +1,6 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: dv-test + labels: + name: dv-test diff --git a/personas/prod-skel/envs/env1/store/kustomization.yaml b/personas/prod-skel/envs/env1/store/kustomization.yaml new file mode 100644 index 00000000..035ad8aa --- /dev/null +++ b/personas/prod-skel/envs/env1/store/kustomization.yaml @@ -0,0 +1,10 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +bases: + - ../../../bases/minio-standalone + + +patchesStrategicMerge: + - patch-pvc.yaml diff --git a/personas/prod-skel/envs/env1/store/patch-pvc.yaml b/personas/prod-skel/envs/env1/store/patch-pvc.yaml new file mode 100644 index 00000000..03931350 --- /dev/null +++ b/personas/prod-skel/envs/env1/store/patch-pvc.yaml @@ -0,0 +1,10 @@ +--- +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: minio +spec: + resources: + requests: + storage: 5000Gi + # storageClassName: pm-local diff --git a/personas/prod-skel/envs/env1/webapp/kustomization.yaml b/personas/prod-skel/envs/env1/webapp/kustomization.yaml new file mode 100644 index 00000000..88d21560 --- /dev/null +++ b/personas/prod-skel/envs/env1/webapp/kustomization.yaml @@ -0,0 +1,20 @@ +--- +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +bases: + - github.com/IQSS/dataverse-kubernetes/k8s/dataverse?ref=v4.20 + +images: + - name: iqss/dataverse-k8s + newTag: "v4.20" + +patchesStrategicMerge: + - patch-pvc.yaml + - patch-svc.yaml + - patch-pod-s3-secret.yaml + - patch-pull-always.yaml + - sidecars/ssl/patch.yaml + +resources: + - sidecars/ssl/config.yaml diff --git a/personas/prod-skel/envs/env1/webapp/patch-limits.yaml b/personas/prod-skel/envs/env1/webapp/patch-limits.yaml new file mode 100644 index 00000000..b6d15647 --- /dev/null +++ b/personas/prod-skel/envs/env1/webapp/patch-limits.yaml @@ -0,0 +1,15 @@ +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: dataverse +spec: + template: + spec: + containers: + - name: dataverse + resources: + requests: + memory: "4Gi" + limits: + memory: "8Gi" diff --git a/personas/prod-skel/envs/env1/webapp/patch-pod-s3-secret.yaml b/personas/prod-skel/envs/env1/webapp/patch-pod-s3-secret.yaml new file mode 100644 index 00000000..2bd3006a --- /dev/null +++ b/personas/prod-skel/envs/env1/webapp/patch-pod-s3-secret.yaml @@ -0,0 +1,18 @@ +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: dataverse +spec: + template: + spec: + volumes: + - name: s3-secret + secret: + secretName: dataverse-s3 + containers: + - name: dataverse + volumeMounts: + - name: s3-secret + mountPath: "/secrets/s3" + readOnly: true diff --git a/personas/prod-skel/envs/env1/webapp/patch-pull-always.yaml b/personas/prod-skel/envs/env1/webapp/patch-pull-always.yaml new file mode 100644 index 00000000..c85d4bf7 --- /dev/null +++ b/personas/prod-skel/envs/env1/webapp/patch-pull-always.yaml @@ -0,0 +1,11 @@ +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: dataverse +spec: + template: + spec: + containers: + - name: dataverse + imagePullPolicy: Always diff --git a/personas/prod-skel/envs/env1/webapp/patch-pvc.yaml b/personas/prod-skel/envs/env1/webapp/patch-pvc.yaml new file mode 100644 index 00000000..376ca2cd --- /dev/null +++ b/personas/prod-skel/envs/env1/webapp/patch-pvc.yaml @@ -0,0 +1,20 @@ +--- +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: dataverse-files +spec: + resources: + requests: + storage: 500Gi + # storageClassName: pm-local +--- +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: dataverse-docroot +spec: + resources: + requests: + storage: 20Gi + # storageClassName: pm-local diff --git a/personas/prod-skel/envs/env1/webapp/sidecars/ssl/config.yaml b/personas/prod-skel/envs/env1/webapp/sidecars/ssl/config.yaml new file mode 100644 index 00000000..8af53bfd --- /dev/null +++ b/personas/prod-skel/envs/env1/webapp/sidecars/ssl/config.yaml @@ -0,0 +1,73 @@ +--- +kind: ConfigMap +apiVersion: v1 +metadata: + name: ssl-sidecar + labels: + app: dataverse +data: + default.conf: | + # Adapted from these sources: + # https://github.com/pbrumblay/tls-sidecar/blob/master/default.conf + # https://github.com/kubernetes/kubernetes/blob/master/examples/https-nginx/default.conf + # https://www.nginx.com/resources/admin-guide/nginx-tcp-ssl-termination/ + # https://hynek.me/articles/hardening-your-web-servers-ssl-ciphers/ + # https://ssl-config.mozilla.org/#server=nginx&version=1.17.7&config=intermediate&openssl=1.1.1d&hsts=false&ocsp=false&guideline=5.4 + server { + listen 443 ssl http2; + listen [::]:443 ssl http2; + + # allow large body sizes + client_max_body_size 120g; + + # increase client body buffer for performance. + client_body_buffer_size 128k; + + # map these into the nginx container using a volume secret + ssl_certificate /etc/nginx/ssl/tls.crt; + ssl_certificate_key /etc/nginx/ssl/tls.key; + + # intermediate configuration + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384; + ssl_prefer_server_ciphers off; + + ssl_session_timeout 1d; + ssl_session_cache shared:MozSSL:10m; # about 40000 sessions + ssl_session_tickets off; + + access_log /dev/stdout; + error_log /dev/stdout info; + + merge_slashes off; + # replace merge_slashes' behavior with "redirect_slashes" + location ~* "//" { + rewrite ^(.*)//(.*)$ $1/$2; + rewrite ^ $uri permanent; + } + + location / { + # proxy to upstream application + proxy_pass http://127.0.0.1:8080; + + # don't use http 1.0 so keepalive enabled by default + proxy_http_version 1.1; + + # prevent client from closing keepalive + proxy_set_header Connection ""; + + # don't need timeouts for process to process communication + # (why complicate things with this MITM proxy? let client timeout) + proxy_send_timeout 86400s; + proxy_read_timeout 86400s; + + # don't write client body to docker file system + proxy_request_buffering off; + } + + location ~ ^/(logos|branding)/ { + root /docroot; + rewrite "^(.*);jsessionid=(.*)$" $1 last; + try_files $uri =404; + } + } diff --git a/personas/prod-skel/envs/env1/webapp/sidecars/ssl/patch.yaml b/personas/prod-skel/envs/env1/webapp/sidecars/ssl/patch.yaml new file mode 100644 index 00000000..21c7735f --- /dev/null +++ b/personas/prod-skel/envs/env1/webapp/sidecars/ssl/patch.yaml @@ -0,0 +1,35 @@ +--- +kind: Deployment +apiVersion: apps/v1 +metadata: + name: dataverse +spec: + template: + spec: + containers: + - name: ssl + image: nginx:1.17-alpine + resources: + requests: + memory: "256Mi" + cpu: "0.25" + limits: + memory: "1024Mi" + cpu: "1" + ports: + - containerPort: 443 + name: https + volumeMounts: + - name: certificates + mountPath: "/etc/nginx/ssl" + - name: config + mountPath: "/etc/nginx/conf.d" + - name: docroot + mountPath: "/docroot" + volumes: + - name: certificates + secret: + secretName: dataverse-certificate + - name: config + configMap: + name: ssl-sidecar