/
convert.js
88 lines (75 loc) 路 2.15 KB
/
convert.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
'use strict';
const fs = require('fs');
const path = require('path');
const zlib = require('zlib');
const util = require('util');
const crypto = require('crypto');
const {Transform} = require('stream');
const Listr = require('listr');
const PDFParser = require('pdf2json');
const pathEnsure = require('path-ensure');
const findCacheDir = require('find-cache-dir');
const pkg = require('../package.json');
const CACHE_DIR = findCacheDir({name: pkg.name});
const paths = pathEnsure({cwd: CACHE_DIR});
const PDF_DIR = path.resolve(process.cwd(), '.tmp', 'pdf');
const fsP = {
readdir: util.promisify(fs.readdir)
};
const argv = process.argv.slice(2);
class StringifyStream extends Transform {
constructor() {
super();
// An example from 'pdf2json' source code:
// https://github.com/modesty/pdf2json/blob/master/lib/p2jcmd.js#L45-L56
this._readableState.objectMode = false;
this._writableState.objectMode = true;
}
_transform(obj, encoding, done) {
try {
this.push(JSON.stringify(obj));
done();
} catch (err) {
done(err);
}
}
}
function hashFilename(input) {
const hash = crypto.createHash('md5');
hash.update(input);
return `${hash.digest('hex')}.json.gz`;
}
const convert = filename => new Promise((resolve, reject) => {
fs.createReadStream(path.join(PDF_DIR, filename))
.pipe(new PDFParser())
.on('end', resolve)
.on('error', reject)
.pipe(new StringifyStream())
.pipe(zlib.createGzip())
.pipe(fs.createWriteStream(paths.sync(hashFilename(filename))));
});
const tasks = new Listr([
{
title: 'Create cache files',
skip: () => {
let files;
if (argv.includes('--force')) {
return;
}
if (!fs.existsSync(PDF_DIR) || (files = fs.readdirSync(PDF_DIR), files.length !== 34)) {
return `PDF files not completed, got [${files.length}] files. Try to download 'npm run download'`;
}
},
task: async () => {
const files = await fsP.readdir(PDF_DIR);
return new Listr(files.map(filename => ({
title: `Convert '${filename}' to JSON and compress`,
task: () => convert(filename)
})));
}
}
]);
tasks.run().catch(err => {
console.error(err);
process.exit(1); // eslint-disable-line unicorn/no-process-exit
});