From 482dbdc9a7986f59ca7c2ee37656da5b97945fcc Mon Sep 17 00:00:00 2001 From: slallemand Date: Fri, 27 Sep 2019 13:10:52 +0200 Subject: [PATCH] Feature/docker improvements (#85) : Resolves #70 * Revert "Add minimum column width as an option to reading order (#2)" This reverts commit 95efdd25bc0f8d432acfc95c6db8603a6d627dd3. PR #43 should be merged before having considered this change stable. * docker improvements --- .drone.yml | 100 ++++++++++++------ docker-compose.yml | 28 +++-- docker/parsr-base/Dockerfile | 32 ++++++ docker/parsr-ui/Dockerfile | 25 +++++ docker/parsr-ui/build.sh | 13 +++ docker/parsr-ui/nginx.conf | 17 +++ docker/parsr/Dockerfile | 79 +------------- docker/parsr/build.sh | 23 ++++ docker/parsr/run.sh | 9 ++ docker/tags.sh | 15 +++ .../ReadingOrderDetectionModule/README.md | 5 - 11 files changed, 227 insertions(+), 119 deletions(-) create mode 100644 docker/parsr-base/Dockerfile create mode 100644 docker/parsr-ui/Dockerfile create mode 100755 docker/parsr-ui/build.sh create mode 100644 docker/parsr-ui/nginx.conf create mode 100755 docker/parsr/build.sh create mode 100755 docker/parsr/run.sh create mode 100755 docker/tags.sh diff --git a/.drone.yml b/.drone.yml index 019f45ea..59af7407 100644 --- a/.drone.yml +++ b/.drone.yml @@ -1,48 +1,65 @@ --- kind: pipeline -name: default +name: baseimage platform: os: linux arch: amd64 +trigger: + branch: + - master + - develop + +steps: +- name: Build Docker baseimage + image: plugins/docker + settings: + repo: axarev/parsr-base + context: . + dockerfile: docker/parsr-base/Dockerfile + username: + from_secret: registry_user + password: + from_secret: registry_password + +--- +kind: pipeline +name: API + +platform: + os: linux + arch: amd64 + +trigger: + branch: + - master + - develop + steps: - name: Change file ownership image: alpine:latest commands: - chown -R 1001:0 /drone/src -- name: Build project - image: axarev/parsr +- name: Build API + image: axarev/parsr-base environment: - LD_LIBRARY_PATH: /opt/rh/rh-nodejs8/root/usr/lib64 NODE_ENV: development commands: - - export PATH=/opt/rh/rh-nodejs8/root/usr/bin:$PATH - - npm install - -- name: Run formatter - image: axarev/parsr - environment: - LD_LIBRARY_PATH: /opt/rh/rh-nodejs8/root/usr/lib64 - commands: - - export PATH=/opt/rh/rh-nodejs8/root/usr/bin:$PATH - - npm run format + - docker/parsr/build.sh -- name: Run linter - image: node:8 +- name: Tests + image: axarev/parsr-base commands: - - npm run lint + - npm run test -- name: Run tests - image: axarev/parsr - environment: - LD_LIBRARY_PATH: /opt/rh/rh-nodejs8/root/usr/lib64 +- name: Docker tag + image: busybox commands: - - export PATH=/opt/rh/rh-nodejs8/root/usr/bin:$PATH - - npm run test + - sh docker/tags.sh -- name: Build Docker image +- name: Build Docker API image image: plugins/docker settings: repo: axarev/parsr @@ -54,11 +71,32 @@ steps: from_secret: registry_password build_args: DEV_MODE: 'true' - when: - branch: - - develop - - feature/drone* - event: - exclude: - - pull_request +depends_on: + - baseimage + +--- +kind: pipeline +name: UI + +platform: + os: linux + arch: amd64 + +trigger: + branch: + - master + - develop + +steps: +- name: Change file ownership + image: alpine:latest + commands: + - chown -R 1001:0 /drone/src + +- name: Build UI + image: registry.access.redhat.com/ubi8/nodejs-10 + environment: + NODE_ENV: development + commands: + - docker/parsr-ui/build.sh diff --git a/docker-compose.yml b/docker-compose.yml index fe401c38..c6cc63ec 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,31 +2,41 @@ version: '3.3' services: duckling: - build: - context: docker/duckling - args: - DUCKLING_RELEASE: 'v0.1.6.1' ports: - 8000:8000 - image: duckling + image: axarev/duckling + + parsr-base: + image: axarev/parsr-base + build: + context: . + dockerfile: docker/parsr-base/Dockerfile parsr: + image: axarev/parsr build: context: . dockerfile: docker/parsr/Dockerfile args: DEV_MODE: 'true' ports: - - 8080:3000 - 3001:3001 environment: - DUCKLING_HOST: http://duckling:8000 + DUCKLING_HOST: http://localhost:8000 ABBYY_SERVER_URL: - # NPM_RUN: start:all - image: parsr volumes: - ./pipeline/:/opt/app-root/src/demo/web-viewer/pipeline/ + parsr-ui: + image: parsr-ui + build: + context: . + dockerfile: docker/parsr-ui/Dockerfile + args: + VUE_APP_API: ${VUE_APP_API:-http://localhost:3001} + ports: + - 8080:80 + volumes: pipeline: driver: local diff --git a/docker/parsr-base/Dockerfile b/docker/parsr-base/Dockerfile new file mode 100644 index 00000000..328f689f --- /dev/null +++ b/docker/parsr-base/Dockerfile @@ -0,0 +1,32 @@ +FROM debian:10 as builder + +RUN apt-get update && \ + apt-get install -y git build-essential + +RUN git clone https://github.com/AXATechLab/pdf2json /src/pdf2json && \ + cd /src/pdf2json && \ + ./configure --prefix=/opt/pdf2json && \ + make -j && \ + make install && \ + cd /src && \ + rm -rf pdf2json + + +FROM debian:10 as engine + +RUN apt-get update && \ + apt-get install -y imagemagick mupdf mupdf-tools qpdf pandoc tesseract-ocr-all nodejs npm python-pdfminer python-pip python3-pip python-tk python3-pdfminer python3-opencv && \ + pip install ghostscript camelot-py && \ + pip3 install ghostscript camelot-py + +WORKDIR /opt/app-root/src +RUN chown 1001:0 /opt/app-root/src + +COPY --from=builder /opt/pdf2json /opt/pdf2json +COPY --chown=1001:0 . /opt/app-root/src + + +ENV PATH $PATH:/opt/app-root/src/node_modules/.bin:/opt/pdf2json/bin +ENV HOME /opt/app-root/src + +USER 1001 \ No newline at end of file diff --git a/docker/parsr-ui/Dockerfile b/docker/parsr-ui/Dockerfile new file mode 100644 index 00000000..f19c86ec --- /dev/null +++ b/docker/parsr-ui/Dockerfile @@ -0,0 +1,25 @@ +FROM registry.access.redhat.com/ubi8/nodejs-10 as build + +# Disable RHSM +USER root +RUN sed -i -e 's/^\(enabled\).*/\1=0/g' /etc/yum/pluginconf.d/subscription-manager.conf +USER 1001 + +ARG NODE_ENV=development +ARG DEV_MODE=true + +ARG VUE_APP_API=${VUE_APP_API} + +#COPY --chown=1001:root demo/vue-viewer /opt/app-root/src +#COPY --chown=1001:root docker/parsr-ui docker/parsr-ui + +COPY --chown=1001:root . /opt/app-root/src + +RUN docker/parsr-ui/build.sh + + +FROM nginx:stable + +COPY --from=build /opt/app-root/src/demo/vue-viewer/dist /usr/share/nginx/html +RUN rm /etc/nginx/conf.d/default.conf +COPY docker/parsr-ui/nginx.conf /etc/nginx/conf.d \ No newline at end of file diff --git a/docker/parsr-ui/build.sh b/docker/parsr-ui/build.sh new file mode 100755 index 00000000..a92966f5 --- /dev/null +++ b/docker/parsr-ui/build.sh @@ -0,0 +1,13 @@ +#!/bin/sh + +set -e + +cd demo/vue-viewer + +echo "Installing modules" +npm install + +echo + +echo "Building UI" +npm run build \ No newline at end of file diff --git a/docker/parsr-ui/nginx.conf b/docker/parsr-ui/nginx.conf new file mode 100644 index 00000000..3818b456 --- /dev/null +++ b/docker/parsr-ui/nginx.conf @@ -0,0 +1,17 @@ +server { + + listen 80; + + location / { + root /usr/share/nginx/html; + index index.html index.htm; + try_files $uri $uri/ /index.html ; + } + + error_page 500 502 503 504 /50x.html; + + location = /50x.html { + root /usr/share/nginx/html; + } + +} diff --git a/docker/parsr/Dockerfile b/docker/parsr/Dockerfile index 8a35eca1..8236c0af 100644 --- a/docker/parsr/Dockerfile +++ b/docker/parsr/Dockerfile @@ -1,77 +1,8 @@ -FROM centos:7 AS builder -USER root +FROM axarev/parsr-base -RUN yum -y update && \ - yum-config-manager --enable epel && \ - yum -y groupinstall 'Development Tools' && \ - yum -y install git zlib-devel libjpeg-turbo-devel libtiff-devel libpng-devel && \ - mkdir /src && \ - cd /src +ENV NODE_ENV development -RUN git clone https://github.com/AXATechLab/pdf2json && \ - cd pdf2json && \ - ./configure --prefix=/opt/app-root && \ - make -j && \ - make install && \ - cd /src && \ - rm -rf pdf2json +EXPOSE 3001 -RUN curl -o mupdf.tar.gz https://www.mupdf.com/downloads/archive/mupdf-1.14.0-source.tar.gz && \ - tar xvfz mupdf.tar.gz && \ - cd mupdf-1.14.0-source/ && \ - make prefix=/opt/app-root HAVE_GLUT=no -j install && \ - cd /src && \ - rm -rf mupdf-1.14.0-source - - -RUN curl -sL https://github.com/qpdf/qpdf/releases/download/release-qpdf-8.3.0/qpdf-8.3.0.tar.gz | tar xfz - && \ - cd qpdf-8.3.0 && \ - ./configure --prefix=/opt/app-root && \ - make -j && \ - make install && \ - cd /src && \ - rm -rf qpdf-8.3.0 - -RUN curl -sL https://github.com/jgm/pandoc/releases/download/2.7.3/pandoc-2.7.3-linux.tar.gz | tar xvfz - --strip-components 1 -C /opt/app-root - -RUN mkdir -p /opt/app-root/share/tessdata/ && \ - git clone https://github.com/tesseract-ocr/tessdata_fast.git && \ - cd tessdata_fast && \ - cp *.traineddata /opt/app-root/share/tessdata/ && \ - cd /src && \ - rm -rf tessdata_fast - -USER 1001 - - -FROM centos/nodejs-8-centos7 as engine -USER root - -RUN yum -y update && \ - yum -y install zlib libjpeg-turbo libtiff libpng ImageMagick - -RUN yum-config-manager --add-repo https://download.opensuse.org/repositories/home:/Alexander_Pozdnyakov/CentOS_7/ && \ - rpm --import https://build.opensuse.org/projects/home:Alexander_Pozdnyakov/public_key && \ - yum -y update && \ - yum -y install tesseract tesseract-langpack-* - -COPY --from=builder /opt/app-root/bin /opt/app-root/bin -COPY --from=builder /opt/app-root/etc /opt/app-root/etc -COPY --from=builder /opt/app-root/include /opt/app-root/include -COPY --from=builder /opt/app-root/lib /opt/app-root/lib -COPY --from=builder /opt/app-root/share /opt/app-root/share - -ENV PATH=$PATH:/opt/app-root/bin -ARG DEV_MODE=true - -# Copying in override assemble/run scripts -COPY .s2i/bin /tmp/scripts -COPY --chown=1001:root . /tmp/src - -USER 1001 - -RUN /tmp/scripts/assemble && \ - mkdir -p /opt/app-root/src/demo/web-viewer/pipeline/output && \ - chmod -R g+w /opt/app-root/src/demo/web-viewer && \ - rm -rf /tmp/src -CMD /tmp/scripts/run +RUN docker/parsr/build.sh +CMD docker/parsr/run.sh \ No newline at end of file diff --git a/docker/parsr/build.sh b/docker/parsr/build.sh new file mode 100755 index 00000000..476150fb --- /dev/null +++ b/docker/parsr/build.sh @@ -0,0 +1,23 @@ +#!/bin/sh + +set -e + +export PATH=$PATH:$PWD/node_modules/.bin + +echo "Installing packages : npm install" +npm install + +echo + +echo "Building typescript : npm run build:ts" +npm run build:ts + +echo + +echo "Installing packages for api/server : npm install --prefix api/server" +npm install --prefix api/server + +echo + +echo "Building parsr API" +npm run --prefix api/server build \ No newline at end of file diff --git a/docker/parsr/run.sh b/docker/parsr/run.sh new file mode 100755 index 00000000..f0e81719 --- /dev/null +++ b/docker/parsr/run.sh @@ -0,0 +1,9 @@ +#!/bin/sh + +set -e + + +echo "Starting par.sr API : node api/server/dist/index.js" +#exec node api/server/dist/index.js +cd api/server +exec node dist/index.js \ No newline at end of file diff --git a/docker/tags.sh b/docker/tags.sh new file mode 100755 index 00000000..20e214ef --- /dev/null +++ b/docker/tags.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +set -e + +case ${DRONE_BRANCH} in + master) + echo "Add \"stable\" tag" + echo -n stable > .tags + ;; + develop) + echo "Add \"latest,develop\" tag" + echo -n lastest,develop > .tags + ;; + +esac \ No newline at end of file diff --git a/server/src/processing/ReadingOrderDetectionModule/README.md b/server/src/processing/ReadingOrderDetectionModule/README.md index f1e15645..ce8151ef 100644 --- a/server/src/processing/ReadingOrderDetectionModule/README.md +++ b/server/src/processing/ReadingOrderDetectionModule/README.md @@ -22,11 +22,6 @@ First, the algorithm will try to find possible vertical cuts in the page between Good -## Options - -minColumnWidthInPagePercent : The minimal column width in % of the page width -minVerticalGapWidth : The minimal gap width in points - ## Limitations - It sometimes fails if bounding boxes are too far from each others.