Skip to content
Permalink
Browse files

First public release 🚀

  • Loading branch information
BinaryBrain committed Aug 6, 2019
0 parents commit aefa4de0dc4299866b79f849282a2960bfdae109
Showing with 30,918 additions and 0 deletions.
  1. +1 −0 .dockerignore
  2. +159 −0 .drone.yml
  3. +108 −0 .gitignore
  4. +8 −0 .prettierrc.js
  5. +20 −0 .s2i/bin/assemble
  6. +3 −0 .s2i/bin/run
  7. +28 −0 .vscode/launch.json
  8. +16 −0 .vscode/tasks.json
  9. +61 −0 CONTRIBUTING.md
  10. +201 −0 LICENSE
  11. +331 −0 README.md
  12. +832 −0 api/server/package-lock.json
  13. +29 −0 api/server/package.json
  14. +112 −0 api/server/src/FileManager.ts
  15. +25 −0 api/server/src/Logger.ts
  16. +86 −0 api/server/src/ProcessManager.ts
  17. +292 −0 api/server/src/api.ts
  18. +21 −0 api/server/src/index.ts
  19. +76 −0 api/server/src/types.ts
  20. +14 −0 api/server/tsconfig.json
  21. +436 −0 api/swagger/openapi.yaml
  22. +518 −0 api/swagger/swagger-generated.yaml
  23. +232 −0 demo/jupyter-notebook/Jupyter - API Access Demo.ipynb
  24. +81 −0 demo/jupyter-notebook/parsr_api.py
  25. +85 −0 demo/jupyter-notebook/parsr_output_interpreter.py
  26. +44 −0 demo/jupyter-notebook/sampleConfig.json
  27. BIN demo/jupyter-notebook/sampleFile.pdf
  28. +22 −0 demo/python-module/README.md
  29. +51 −0 demo/python-module/echo-module.py
  30. +5 −0 demo/web-viewer/.gitignore
  31. +238 −0 demo/web-viewer/index.js
  32. +57 −0 demo/web-viewer/localStorage.js
  33. +2,448 −0 demo/web-viewer/package-lock.json
  34. +24 −0 demo/web-viewer/package.json
  35. +33 −0 demo/web-viewer/public/css/style.css
  36. +105 −0 demo/web-viewer/public/css/viewer.css
  37. BIN demo/web-viewer/public/favicon.ico
  38. +81 −0 demo/web-viewer/public/index.html
  39. +346 −0 demo/web-viewer/public/js/renderer.js
  40. +62 −0 demo/web-viewer/public/js/viewer.js
  41. +54 −0 demo/web-viewer/public/viewer.html
  42. +30 −0 demo/web-viewer/public/views/downloader.tpl
  43. +38 −0 demo/web-viewer/public/views/loader.tpl
  44. +47 −0 demo/web-viewer/public/views/visualization.tpl
  45. +164 −0 demo/web-viewer/style/style.scss
  46. +32 −0 docker-compose.yml
  47. +24 −0 docker/duckling/Dockerfile
  48. +77 −0 docker/parsr/Dockerfile
  49. +102 −0 docs/API-deprecated.md
  50. +216 −0 docs/api-guide.md
  51. +5,076 −0 docs/api.html
  52. 0 docs/architecture.md
  53. +170 −0 docs/configuration-file.md
  54. +28 −0 docs/create-your-module.md
  55. +23 −0 docs/docker.md
  56. +373 −0 docs/json-output.md
  57. +37 −0 docs/modules/header-footer-detection-module.md
  58. +29 −0 docs/modules/heading-detection-module.md
  59. +15 −0 docs/modules/hierarchy-detection-module.md
  60. +53 −0 docs/modules/key-value-detection-module.md
  61. +27 −0 docs/modules/lines-to-paragraph-module.md
  62. +27 −0 docs/modules/link-detection-module.md
  63. +27 −0 docs/modules/list-detection-module.md
  64. +33 −0 docs/modules/number-correction-module.md
  65. +26 −0 docs/modules/out-of-page-removal-module.md
  66. +29 −0 docs/modules/reading-order-detection-module.md
  67. +29 −0 docs/modules/redundancy-detection-module.md
  68. +18 −0 docs/modules/remote-module.md
  69. +25 −0 docs/modules/whitespace-removal-module.md
  70. +26 −0 docs/modules/words-to-line-module.md
  71. +4,471 −0 package-lock.json
  72. +93 −0 package.json
  73. +298 −0 server/bin/index.ts
  74. +107 −0 server/configKeyValueSearch.json
  75. +44 −0 server/defaultConfig.json
  76. +46 −0 server/remoteModuleConfig.json
  77. +211 −0 server/src/Cleaner.ts
  78. +59 −0 server/src/Orchestrator.ts
  79. +23 −0 server/src/exporters/ConfidencesExporter.ts
  80. +60 −0 server/src/exporters/CsvExporter.ts
  81. +43 −0 server/src/exporters/Exporter.ts
  82. +23 −0 server/src/exporters/JsonCompactExporter.ts
  83. +273 −0 server/src/exporters/JsonExporter.ts
  84. +83 −0 server/src/exporters/MarkdownExporter.ts
  85. +70 −0 server/src/exporters/PdfExporter.ts
  86. +66 −0 server/src/exporters/TextExporter.ts
  87. +23 −0 server/src/exporters/XmlExporter.ts
  88. +25 −0 server/src/exporters/index.ts
  89. +34 −0 server/src/extractors/Extractor.ts
  90. +362 −0 server/src/extractors/abbyy/AbbyyClient.ts
  91. +456 −0 server/src/extractors/abbyy/AbbyyTools.ts
  92. +51 −0 server/src/extractors/abbyy/AbbyyToolsXml.ts
  93. +58 −0 server/src/extractors/extract-fonts.ts
  94. +30 −0 server/src/extractors/json/JsonExtractor.ts
  95. +36 −0 server/src/extractors/pdf2json/PdfJsonExtractor.ts
  96. +164 −0 server/src/extractors/pdf2json/pdf2json.ts
  97. +62 −0 server/src/extractors/set-page-dimensions.ts
  98. +36 −0 server/src/extractors/tesseract/TesseractExtractor.ts
  99. +144 −0 server/src/extractors/tesseract/tesseract2json.ts
  100. +205 −0 server/src/modules/HeaderFooterDetectionModule.ts
  101. +153 −0 server/src/modules/HeadingDetectionModule.ts
  102. +143 −0 server/src/modules/HierarchyDetectionModule.ts
  103. +238 −0 server/src/modules/KeyValueDetectionModule.ts
  104. +134 −0 server/src/modules/LinesToParagraphModule.ts
  105. +61 −0 server/src/modules/LinkDetectionModule.ts
  106. +48 −0 server/src/modules/ListDetectionModule.ts
  107. +57 −0 server/src/modules/Module.ts
  108. +270 −0 server/src/modules/NumberCorrectionModule.ts
  109. +38 −0 server/src/modules/OutOfPageRemovalModule.ts
  110. +204 −0 server/src/modules/ReadingOrderDetectionModule.ts
  111. +120 −0 server/src/modules/RedundancyDetectionModule.ts
  112. +84 −0 server/src/modules/RegexMatcherModule.ts
  113. +53 −0 server/src/modules/RemoteModule.ts
  114. +197 −0 server/src/modules/SeparateWordsModule.ts
  115. +69 −0 server/src/modules/TemplateModule.ts
  116. +70 −0 server/src/modules/WhitespaceRemovalModule.ts
  117. +117 −0 server/src/modules/WordsToLineModule.ts
  118. +47 −0 server/src/tslint.conf
  119. +228 −0 server/src/types/Config.ts
  120. +65 −0 server/src/types/DocumentRepresentation/Barcode.ts
  121. +160 −0 server/src/types/DocumentRepresentation/BoundingBox.ts
  122. +54 −0 server/src/types/DocumentRepresentation/Character.ts
  123. +20 −0 server/src/types/DocumentRepresentation/Color.ts
  124. +97 −0 server/src/types/DocumentRepresentation/Document.ts
  125. +48 −0 server/src/types/DocumentRepresentation/Drawing.ts
  126. +215 −0 server/src/types/DocumentRepresentation/Element.ts
  127. +222 −0 server/src/types/DocumentRepresentation/Font.ts
  128. +48 −0 server/src/types/DocumentRepresentation/Heading.ts
  129. +48 −0 server/src/types/DocumentRepresentation/Image.ts
  130. +82 −0 server/src/types/DocumentRepresentation/JsonExport.ts
  131. +90 −0 server/src/types/DocumentRepresentation/Line.ts
  132. +89 −0 server/src/types/DocumentRepresentation/List.ts
  133. +281 −0 server/src/types/DocumentRepresentation/Page.ts
  134. +162 −0 server/src/types/DocumentRepresentation/Paragraph.ts
  135. +139 −0 server/src/types/DocumentRepresentation/SvgLine.ts
  136. +19 −0 server/src/types/DocumentRepresentation/SvgShape.ts
  137. +327 −0 server/src/types/DocumentRepresentation/Table.ts
  138. +89 −0 server/src/types/DocumentRepresentation/TableCell.ts
  139. +44 −0 server/src/types/DocumentRepresentation/TableRow.ts
  140. +58 −0 server/src/types/DocumentRepresentation/Text.ts
  141. +111 −0 server/src/types/DocumentRepresentation/Word.ts
  142. +35 −0 server/src/types/DocumentRepresentation/index.ts
  143. +60 −0 server/src/types/Metadata/ComplexMetadata.ts
  144. +26 −0 server/src/types/Metadata/KeyValueMetadata.ts
  145. +21 −0 server/src/types/Metadata/Metadata.ts
  146. +60 −0 server/src/types/Metadata/NumberMetadata.ts
  147. +33 −0 server/src/types/Metadata/Properties.ts
  148. +26 −0 server/src/types/Metadata/RegexMetadata.ts
  149. +22 −0 server/src/types/Metadata/index.ts
  150. +22 −0 server/src/types/Pdf2JsonFont.ts
  151. +36 −0 server/src/types/Pdf2JsonPage.ts
  152. +24 −0 server/src/types/Pdf2JsonText.ts
  153. +38 −0 server/src/types/TableInfo.ts
  154. +23 −0 server/src/types/TableReconstruction.ts
  155. +32 −0 server/src/types/TsvElement.ts
  156. +590 −0 server/src/utils.ts
  157. +95 −0 server/src/utils/Logger.ts
  158. +402 −0 server/src/utils/json2document.ts
  159. +3 −0 sonar-project.properties
  160. BIN test/assets/html.pdf
  161. BIN test/assets/line-merge-2.pdf
  162. +19 −0 test/assets/line-merge-2.pdf.json
  163. BIN test/assets/line-merge.pdf
  164. +23 −0 test/assets/line-merge.pdf.json
  165. BIN test/assets/lists.pdf
  166. BIN test/assets/number-correction-1.pdf
  167. +322 −0 test/assets/number-correction-1.pdf.json
  168. BIN test/assets/page-number.pdf
  169. BIN test/assets/page-numbers.pdf
  170. BIN test/assets/paragraph-merge-1.pdf
  171. +39 −0 test/assets/paragraph-merge-1.pdf.json
  172. BIN test/assets/paragraph-merge.pdf
  173. +362 −0 test/assets/paragraph-merge.pdf.json
  174. BIN test/assets/redundancy-detection.pdf
  175. BIN test/assets/text-order-detection.pdf
  176. +1,309 −0 test/assets/text-order-detection.pdf.json
  177. BIN test/assets/text-order-mini.pdf
  178. +56 −0 test/helpers.ts
  179. +83 −0 test/json-export-import.spec.ts
  180. +63 −0 test/line-merge.spec.ts
  181. +151 −0 test/number-correction.spec.ts
  182. +65 −0 test/paragraph-merge.spec.ts
  183. +45 −0 test/redundancy-detection.spec.ts
  184. +65 −0 test/text-order-detection.spec.ts
  185. +130 −0 test/utils.spec.ts
  186. +19 −0 tsconfig.json
  187. +18 −0 tslint.json
@@ -0,0 +1 @@
node_modules
@@ -0,0 +1,159 @@
---
kind: pipeline
name: default

platform:
os: linux
arch: amd64

node:
memory: high

steps:
- name: Change file ownership
image: alpine:latest
commands:
- chown -R 1001:0 /drone/src

- name: Build project
image: axarev/documentparser
environment:
LD_LIBRARY_PATH: /opt/rh/rh-nodejs8/root/usr/lib64
NODE_ENV: development
commands:
- export PATH=/opt/rh/rh-nodejs8/root/usr/bin:$PATH
- npm install

- name: Run formatter
image: axarev/documentparser
environment:
LD_LIBRARY_PATH: /opt/rh/rh-nodejs8/root/usr/lib64
commands:
- export PATH=/opt/rh/rh-nodejs8/root/usr/bin:$PATH
- npm run format

- name: Run linter
image: node:8
commands:
- npm run lint

- name: Run tests
image: axarev/documentparser
environment:
LD_LIBRARY_PATH: /opt/rh/rh-nodejs8/root/usr/lib64
commands:
- export PATH=/opt/rh/rh-nodejs8/root/usr/bin:$PATH
- npm run test

- name: Code-analysis
image: aosapps/drone-sonar-plugin:1.0
settings:
sonar_host:
from_secret: sonar_host
sonar_token:
from_secret: sonar_token
when:
branch:
- master

- name: Tag with demo
image: busybox
commands:
- echo demo > .tags
when:
branch:
- demo

- name: Build Docker image
image: plugins/docker
settings:
repo: axarev/documentparser
context: .
dockerfile: docker/parsr/Dockerfile
username:
from_secret: registry_user
password:
from_secret: registry_password
build_args:
DEV_MODE: 'true'
# auto_tag: true
when:
branch:
- develop
- demo
event:
exclude:
- pull_request

- name: Deploy dev
image: docker
environment:
DOCKER_HOST:
from_secret: docker_host
CA:
from_secret: docker_ca
CLIENT_CERT:
from_secret: docker_cert
CLIENT_KEY:
from_secret: docker_key
DOCKER_CERT_PATH: /cert
DOCKER_TLS_VERIFY: 1
DOCKER_IMAGE: axarev/documentparser:latest
DOCKER_SERVICE: documentparser_documentparser-dev
REGISTRY_USER:
from_secret: registry_user
REGISTRY_PASSWORD:
from_secret: registry_password
commands:
- mkdir -p "$DOCKER_CERT_PATH"
- echo "$CA" > $DOCKER_CERT_PATH/ca.pem
- echo "$CLIENT_CERT" > $DOCKER_CERT_PATH/cert.pem
- echo "$CLIENT_KEY" > $DOCKER_CERT_PATH/key.pem
- docker login -u "$REGISTRY_USER" -p"$REGISTRY_PASSWORD"
- docker service update --with-registry-auth --image $DOCKER_IMAGE $DOCKER_SERVICE
- rm -rf $DOCKER_CERT_PATH
when:
branch:
- develop
- drone-ci
event:
exclude:
- pull_request

- name: Deploy demo
image: docker
environment:
DOCKER_HOST:
from_secret: docker_host
CA:
from_secret: docker_ca
CLIENT_CERT:
from_secret: docker_cert
CLIENT_KEY:
from_secret: docker_key
DOCKER_CERT_PATH: /cert
DOCKER_TLS_VERIFY: 1
DOCKER_IMAGE: axarev/documentparser:demo
DOCKER_SERVICE: documentparser_parsr-demo
REGISTRY_USER:
from_secret: registry_user
REGISTRY_PASSWORD:
from_secret: registry_password
commands:
- mkdir -p "$DOCKER_CERT_PATH"
- echo "$CA" > $DOCKER_CERT_PATH/ca.pem
- echo "$CLIENT_CERT" > $DOCKER_CERT_PATH/cert.pem
- echo "$CLIENT_KEY" > $DOCKER_CERT_PATH/key.pem
- docker login -u "$REGISTRY_USER" -p"$REGISTRY_PASSWORD"
- docker service update --with-registry-auth --image $DOCKER_IMAGE $DOCKER_SERVICE
- rm -rf $DOCKER_CERT_PATH
when:
branch:
- demo
event:
exclude:
- pull_request


image_pull_secrets:
- dockerconfigjson
@@ -0,0 +1,108 @@
# General
.DS_Store
.AppleDouble
.LSOverride

# Icon must end with two \r
Icon


# Thumbnails
._*

# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent

# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk

# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
yarn.lock

# Runtime data
pids
*.pid
*.seed
*.pid.lock

# Directory for instrumented libs generated by jscoverage/JSCover
lib-cov

# Coverage directory used by tools like istanbul
coverage

# nyc test coverage
.nyc_output

# Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
.grunt

# Bower dependency directory (https://bower.io/)
bower_components

# node-waf configuration
.lock-wscript

# Compiled binary addons (https://nodejs.org/api/addons.html)
build/Release

# Dependency directories
node_modules/
jspm_packages/

# TypeScript v1 declaration files
typings/

# Optional npm cache directory
.npm

# Optional eslint cache
.eslintcache

# Optional REPL history
.node_repl_history

# Output of 'npm pack'
*.tgz

# Yarn Integrity file
.yarn-integrity

# dotenv environment variables file
.env

# next.js build output
.next

*dist*
samples
mutool-extraction
mutool-images
pipeline


# vscode settings
.vscode/settings.json

# SonarQube
.sonar/
.scannerwork/

# python / jupyter ignores
.ipynb_checkpoints
__pycache__
@@ -0,0 +1,8 @@
// Rationale about style choices can be found there https://prettier.io/docs/en/rationale.html

module.exports = {
printWidth: 100,
singleQuote: true,
useTabs: true,
trailingComma: 'all',
};
@@ -0,0 +1,20 @@
#!/bin/bash


[ -x /usr/libexec/s2i/assemble ] && /usr/libexec/s2i/assemble


echo ""

echo "Installing API"
npm run install:api

echo

echo "Installing Frontend (from demo)"
# "install:front": "npm install && npm run build:ts && npm run build:sass",
#npm run install:front
npm run build:ts
cd demo/web-viewer
npm install
npm run build:sass
@@ -0,0 +1,3 @@
#!/bin/bash

exec /usr/libexec/s2i/run
@@ -0,0 +1,28 @@
{
// Use IntelliSense to learn about possible Node.js debug attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"type": "node",
"request": "launch",
"name": "Launch Program",
"program": "${workspaceRoot}/dist/bin/index.js",
"outFiles": ["${workspaceRoot}/dist/bin/**/*.js"],
"sourceMaps": true,
"args": [
"-f", "${workspaceRoot}/samples/README.pdf",
"-o", "${workspaceRoot}/demo/web-viewer/pipeline/output",
"-n", "example",
"-c", "${workspaceRoot}/server/defaultConfig.json",
"-l", "debug",
"-p"
],
"env": {
"NODE_DEBUG": "pipeline"
},
"outputCapture": "std"
}
]
}
@@ -0,0 +1,16 @@
{
// See https://go.microsoft.com/fwlink/?LinkId=733558
// for the documentation about the tasks.json format
"version": "2.0.0",
"tasks": [
{
"type": "typescript",
"tsconfig": "tsconfig.json",
"problemMatcher": ["$tsc"],
"group": {
"kind": "build",
"isDefault": true,
}
}
]
}

0 comments on commit aefa4de

Please sign in to comment.
You can’t perform that action at this time.