fix(deps): Update pdf2json to version 2 (#116)

By @copmerbenjamin: * Update pdf2json to version 2 MozBlobBuilder is not defined in newer versions of node, pdf2json 2 resolves this but drops node versions below 14 (#101, #103, #114) * BREAKING CHANGE: drop support for Node.js < 14 * Increase ecmaVersion, according to node.js version upgrade
adrienjoly · Apr 23, 2022 · 740cccb · 740cccb
1 parent a8b4764
commit 740cccb
Show file tree

Hide file tree

Showing 8 changed files with 94 additions and 343 deletions.
diff --git a/.eslintrc.json b/.eslintrc.json
@@ -1,4 +1,4 @@
 {
   "extends": ["plugin:prettier/recommended"],
-  "parserOptions": { "ecmaVersion": 6 }
+  "parserOptions": { "ecmaVersion": 2020 }
 }
diff --git a/.github/workflows/nodejs.yml b/.github/workflows/nodejs.yml
@@ -9,27 +9,12 @@ on:
       - master
 
 jobs:
-  # Make sure that pdfreader can still run on old Node.js versions
-  old-node:
-    runs-on: ubuntu-latest
-    strategy:
-      matrix:
-        node-version: [8.x, 10.x]
-    steps:
-      - uses: actions/checkout@v1
-      - name: Use Node.js ${{ matrix.node-version }}
-        uses: actions/setup-node@v1
-        with:
-          node-version: ${{ matrix.node-version }}
-      - run: npm ci # install dependencies
-      - run: npm run test:samples
-
   # Prevent functional regressions on supported Node.js versions
   tests:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        node-version: [12.x, 14.x, 16.x]
+        node-version: [14.x, 16.x]
     steps:
       - uses: actions/checkout@v1
       - name: Use Node.js ${{ matrix.node-version }}
@@ -53,7 +38,6 @@ jobs:
   release:
     needs:
       - tests
-      - old-node
       - formatting
     runs-on: ubuntu-latest
     steps:

diff --git a/PdfReader.js b/PdfReader.js
@@ -47,10 +47,13 @@ function PdfReader(options) {
  **/
 PdfReader.prototype.parseFileItems = function (pdfFilePath, itemHandler) {
   itemHandler(null, { file: { path: pdfFilePath } });
-  var pdfParser = new PFParser();
+  var pdfParser;
   if (this.options.password) {
-    pdfParser.setPassword(this.options.password);
+    pdfParser = new PFParser(null, null, this.options.password);
+  } else {
+    pdfParser = new PFParser();
   }
+
   pdfParser.on("pdfParser_dataError", itemHandler);
   pdfParser.on("pdfParser_dataReady", function (pdfData) {
     forEachItem(pdfData, itemHandler);
@@ -64,10 +67,13 @@ PdfReader.prototype.parseFileItems = function (pdfFilePath, itemHandler) {
  */
 PdfReader.prototype.parseBuffer = function (pdfBuffer, itemHandler) {
   itemHandler(null, { file: { buffer: pdfBuffer } });
-  var pdfParser = new PFParser();
+  var pdfParser;
   if (this.options.password) {
-    pdfParser.setPassword(this.options.password);
+    pdfParser = new PFParser(null, null, this.options.password);
+  } else {
+    pdfParser = new PFParser();
   }
+
   pdfParser.on("pdfParser_dataError", itemHandler);
   pdfParser.on("pdfParser_dataReady", function (pdfData) {
     forEachItem(pdfData, itemHandler);

diff --git a/README.md b/README.md
@@ -7,6 +7,7 @@ Supports **tabular data** with automatic column detection, and **rule-based pars
 Dependencies: it is based on [pdf2json](https://www.npmjs.com/package/pdf2json), which itself relies on Mozilla's [pdf.js](https://github.com/mozilla/pdf.js/).
 
 ℹ️ Important notes:
+
 - This module is meant to be run using Node.js only. **It does not work from a web browser.**
 - This module extracts text entries from PDF files. It does not support photographed text. If you cannot select text from the PDF file, **you may need to use OCR software first**.