/
builder.js
74 lines (62 loc) 路 1.84 KB
/
builder.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
'use strict';
const fs = require('fs');
const path = require('path');
const util = require('util');
const sortBy = require('lodash.sortby');
const kebabcase = require('lodash.kebabcase');
const code = require('./code');
const chunk = require('./chunk');
const transform = require('./transform');
const uncompress = require('./uncompress');
const textIgnores = require('./text-ignores');
const normalizeWord = require('./normalize-word');
const fsP = {
readdir: util.promisify(fs.readdir)
};
function makeContext(tables) {
const hasNextWord = val => (
// Val[1].endsWith('%20');
// before: ['xyz' 'abc%20']
// after: ['abc%20', 'xyz']
val[1].endsWith('%20') ||
// [33.06.12.2035]
// before: ['Z', 'ABC']
// after: ['ABC', 'Z']
val[0].length === 1
);
tables = tables.map(columns => columns.map(values => {
const id = values.shift();
values = values.filter(x => !x.startsWith('%20'));
// [TODO]: remove this if unnecessary.
textIgnores.forEach(x => {
values = values.filter(v => v !== x);
});
if (values.length === 2) {
// Swap values
values = hasNextWord(values) ? [values[1] + values[0]] : [values[0]];
}
if (values.length > 2) {
values = [values[0]];
}
return {
code: id,
type: code.type(id),
name: normalizeWord(decodeURIComponent(values[0])).toUpperCase()
};
}));
tables = tables.reduce((prev, after) => prev.concat(after));
return sortBy(tables, 'code');
}
module.exports.run = async sources => {
const files = await fsP.readdir(sources);
return Promise.all(files.map(filepath => {
return uncompress(path.join(sources, filepath)).then(result => {
const data = makeContext(result.map(page => transform(page.Texts)).map(chunk));
const obj = data.find(ctx => ctx.type === 'provinsi');
return {
path: `${kebabcase([obj.code, obj.name].join('-'))}.json`,
data
};
});
}));
};