Skip to content

Commit

Permalink
fix(deps)!: update pdf2json from 2.0.1 to 3.0.1 (#125)
Browse files Browse the repository at this point in the history
Fixes #124.

BREAKING CHANGE: switch `pdfreader` from commonJS to ES Module, as pdf2json did.
  • Loading branch information
MOUNIKA0536 committed Dec 2, 2022
1 parent 561fcf5 commit 8e19815
Show file tree
Hide file tree
Showing 16 changed files with 81 additions and 81 deletions.
2 changes: 1 addition & 1 deletion .eslintrc.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"extends": ["plugin:prettier/recommended"],
"parserOptions": { "ecmaVersion": 2020 }
"parserOptions": { "ecmaVersion": 2020, "sourceType": "module" }
}
16 changes: 7 additions & 9 deletions PdfReader.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
*
**/

var LOG = require("./lib/LOG.js");
var PFParser = require("pdf2json/pdfparser"); // doc: https://github.com/modesty/pdf2json
import { log as LOG } from "./lib/LOG.js";
import PDFParser from "pdf2json"; // doc: https://github.com/modesty/pdf2json

function forEachItem(pdf, handler) {
var pageNumber = 0;
Expand All @@ -37,7 +37,7 @@ function forEachItem(pdf, handler) {
handler();
}

function PdfReader(options) {
export function PdfReader(options) {
LOG("PdfReader"); // only displayed if LOG.js was first loaded with `true` as init parameter
this.options = options || {};
}
Expand All @@ -49,9 +49,9 @@ PdfReader.prototype.parseFileItems = function (pdfFilePath, itemHandler) {
itemHandler(null, { file: { path: pdfFilePath } });
var pdfParser;
if (this.options.password) {
pdfParser = new PFParser(null, null, this.options.password);
pdfParser = new PDFParser(null, null, this.options.password);
} else {
pdfParser = new PFParser();
pdfParser = new PDFParser();
}

pdfParser.on("pdfParser_dataError", itemHandler);
Expand All @@ -69,9 +69,9 @@ PdfReader.prototype.parseBuffer = function (pdfBuffer, itemHandler) {
itemHandler(null, { file: { buffer: pdfBuffer } });
var pdfParser;
if (this.options.password) {
pdfParser = new PFParser(null, null, this.options.password);
pdfParser = new PDFParser(null, null, this.options.password);
} else {
pdfParser = new PFParser();
pdfParser = new PDFParser();
}

pdfParser.on("pdfParser_dataError", itemHandler);
Expand All @@ -81,5 +81,3 @@ PdfReader.prototype.parseBuffer = function (pdfBuffer, itemHandler) {
var verbosity = this.options.debug ? 1 : 0;
pdfParser.parseBuffer(pdfBuffer, verbosity);
};

module.exports = PdfReader;
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ It's up to your callback to process these items into a data structure of your ch
For example:

```javascript
const { PdfReader } = require("pdfreader");
import { PdfReader } from "pdfreader";

new PdfReader().parseFileItems("test/sample.pdf", (err, item) => {
if (err) console.error("error:", err);
Expand All @@ -75,8 +75,8 @@ new PdfReader({ password: "YOUR_PASSWORD" }).parseFileItems(
As above, but reading from a buffer in memory rather than from a file referenced by path. For example:

```javascript
const fs = require("fs");
const { PdfReader } = require("pdfreader");
import fs from "fs";
import { PdfReader } from "pdfreader";

fs.readFile("test/sample.pdf", (err, pdfBuffer) => {
// pdfBuffer contains the file content
Expand Down
12 changes: 6 additions & 6 deletions Rule.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,15 @@
* This content is released under the MIT License.
**/

var LOG = require("./lib/LOG.js");
import { log as LOG } from "./lib/LOG.js";
import { parseColumns } from "./lib/parseColumns.js";
import { parseTable } from "./lib/parseTable.js";

/**
* regexp: a regular expression which a PDF item's text must match in order to execute that rule.
* => a Rule object exposes "accumulators": methods that defines the data extraction strategy of a rule.
**/
function Rule(regexp) {
export function Rule(regexp) {
this.regexp = regexp;
var self = this;
// proxy accumulators methods
Expand Down Expand Up @@ -180,11 +182,9 @@ Rule.addAccumulator("accumulateFromSameX", function () {
/**
* This accumulator will store a table by detecting its columns, given their names.
**/
Rule.addAccumulator("parseColumns", require("./lib/parseColumns.js"));
Rule.addAccumulator("parseColumns", parseColumns);

/**
* This accumulator will store a table by detecting its columns, given their count.
**/
Rule.addAccumulator("parseTable", require("./lib/parseTable.js"));

module.exports = Rule;
Rule.addAccumulator("parseTable", parseTable);
24 changes: 16 additions & 8 deletions index.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
exports.PdfReader = require("./PdfReader");
exports.Rule = require("./Rule");
exports.LOG = require("./lib/LOG.js");
exports.parseTable = require("./lib/parseTable.js");
exports.parseColumns = require("./lib/parseColumns.js");
exports.SequentialParser = require("./lib/SequentialParser.js"); // experimental
exports.TableParser = require("./lib/TableParser.js");
exports.ColumnsParser = require("./lib/ColumnsParser.js");
export { PdfReader } from "./PdfReader.js";
export { Rule } from "./Rule.js";
export * as LOG from "./lib/LOG.js";
import * as parseTableExports from "./lib/parseTable.js";
export const parseTable = Object.assign(
parseTableExports.parseTable,
parseTableExports
);
import * as parseColumnsExports from "./lib/parseColumns.js";
export const parseColumns = Object.assign(
parseColumnsExports.parseColumns,
parseColumnsExports
);
export { SequentialParser } from "./lib/SequentialParser.js"; // experimental
export { TableParser } from "./lib/TableParser.js";
export { ColumnsParser } from "./lib/ColumnsParser.js";
6 changes: 2 additions & 4 deletions lib/ColumnsParser.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
* This content is released under the MIT License.
**/

var LOG = require("./LOG.js");
import { log as LOG } from "./LOG.js";

function getColumnIndex(cols, x) {
var bestDist = null;
Expand All @@ -20,7 +20,7 @@ function getColumnIndex(cols, x) {
return i - 1;
}

function ColumnsParser(colNames) {
export function ColumnsParser(colNames) {
this.cols = [];
var cols = this.cols,
colNames = colNames.slice(), // clone (for parameter immutability)
Expand Down Expand Up @@ -48,5 +48,3 @@ function ColumnsParser(colNames) {
}
};
}

module.exports = ColumnsParser;
11 changes: 5 additions & 6 deletions lib/LOG.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
* This content is released under the MIT License.
**/

var util = require("util");
import util from "util";

var nullLog = function LOG() {};

Expand All @@ -17,11 +17,10 @@ var realLog = function LOG() {

var LOG = nullLog;

module.exports = function () {
export function log() {
LOG.apply(null, arguments);
};
}

module.exports.toggle = function (enabled) {
export function toggle(enabled) {
LOG = !enabled ? nullLog : realLog;
return module.exports;
};
}
4 changes: 1 addition & 3 deletions lib/SequentialParser.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
* Provides a list of parsed `fields`.
* Calls `callback(error, this)` when all accumulators were processed, or when processing a null item.
**/
function SequentialParser(accumulators, callback) {
export function SequentialParser(accumulators, callback) {
var step = 0;
var fields = {};
return {
Expand Down Expand Up @@ -32,5 +32,3 @@ function SequentialParser(accumulators, callback) {
},
};
}

module.exports = SequentialParser;
4 changes: 1 addition & 3 deletions lib/TableParser.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
* This content is released under the MIT License.
**/

function TableParser() {
export function TableParser() {
this.rows = {};
}

Expand Down Expand Up @@ -114,5 +114,3 @@ function renderMatrix(matrix) {
TableParser.prototype.renderMatrix = function () {
return renderMatrix(this.getMatrix());
};

module.exports = TableParser;
4 changes: 2 additions & 2 deletions lib/parseColumns.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
* This content is released under the MIT License.
**/

var LOG = require("./LOG.js");
import { log as LOG } from "./LOG.js";

module.exports = function (/* columns */) {
export const parseColumns = function (/* columns */) {
this.output = [];
this.cols = Array.prototype.slice.apply(arguments);
var colNames = this.cols,
Expand Down
21 changes: 7 additions & 14 deletions lib/parseTable.js
Original file line number Diff line number Diff line change
Expand Up @@ -64,23 +64,23 @@ function fillTab(str) {
return str.substr(0, 7);
}

function renderTable(table) {
export function renderTable(table) {
return (table || [])
.map(function (row) {
return (row || []).map(fillTab).join("\t");
})
.join("\n");
}

function renderMatrix(matrix) {
export function renderMatrix(matrix) {
return (matrix || [])
.map(function (row) {
return (row || []).map(joinCellCollisions("+")).join("\t");
})
.join("\n");
}

function renderRows(rows) {
export function renderRows(rows) {
return (rows || [])
.map(function (row, rowId) {
var cells = [rowId + ":"];
Expand All @@ -91,7 +91,7 @@ function renderRows(rows) {
.join("\n");
}

function renderItems(items) {
export function renderItems(items) {
return items
.map(function (i) {
return [i.y, i.x, i.text].join("\t");
Expand All @@ -113,7 +113,7 @@ function buildMatrix(rows, classifyColumn) {
return matrix;
}

function detectCollisions(matrix) {
export function detectCollisions(matrix) {
var collisions = [];
(matrix || []).map(function (row, rowN) {
(row || []).map(function (cellItems, colN) {
Expand All @@ -128,7 +128,7 @@ function detectCollisions(matrix) {
return collisions;
}

function makeAccumulator(nbRows, headerRow) {
export const parseTable = function makeAccumulator(nbRows, headerRow) {
var rule = this,
items = [];

Expand All @@ -155,11 +155,4 @@ function makeAccumulator(nbRows, headerRow) {
});

return accumulate; // then the same function will be run on all following items, until another rule is triggered
}

module.exports = makeAccumulator;
module.exports.renderItems = renderItems;
module.exports.renderRows = renderRows;
module.exports.renderMatrix = renderMatrix;
module.exports.renderTable = renderTable;
module.exports.detectCollisions = detectCollisions;
};
24 changes: 12 additions & 12 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
{
"name": "pdfreader",
"type": "module",
"version": "0.0.0-development",
"description": "Read text and parse tables from PDF files. Supports tabular data with automatic column detection, and rule-based parsing.",
"main": "index.js",
Expand Down Expand Up @@ -38,7 +39,7 @@
},
"homepage": "https://github.com/adrienjoly/npm-pdfreader",
"dependencies": {
"pdf2json": "2.0.1"
"pdf2json": "3.0.1"
},
"devDependencies": {
"@semantic-release/changelog": "^6.0.1",
Expand Down
6 changes: 4 additions & 2 deletions parse.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
var LOG = require("./lib/LOG.js").toggle(false);
var PdfReader = require("./index.js").PdfReader;
import { toggle } from "./lib/LOG.js";
import { PdfReader } from "./index.js";

toggle(false);

function printRawItems(filename, callback) {
new PdfReader().parseFileItems(filename, function (err, item) {
Expand Down
8 changes: 5 additions & 3 deletions parseAsBuffer.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
var LOG = require("./lib/LOG.js").toggle(false);
var PdfReader = require("./index.js").PdfReader;
var fs = require("fs");
import fs from "fs";
import { toggle } from "./lib/LOG.js";
import { PdfReader } from "./index.js";

toggle(false);

function printRawItems(pdfBuffer, callback) {
new PdfReader().parseBuffer(pdfBuffer, function (err, item) {
Expand Down

0 comments on commit 8e19815

Please sign in to comment.