Skip to content

Commit

Permalink
Merge 3d83691 into 4589ab2
Browse files Browse the repository at this point in the history
  • Loading branch information
iguntur committed Mar 21, 2018
2 parents 4589ab2 + 3d83691 commit 1956cce
Show file tree
Hide file tree
Showing 39 changed files with 7,580 additions and 6,036 deletions.
2 changes: 0 additions & 2 deletions .travis.yml
Expand Up @@ -6,7 +6,5 @@ before_install:
- npm --version
before_script:
- npm install coveralls
- npm run download
- npm run convert
after_success:
- './node_modules/.bin/nyc report --reporter=text-lcov | ./node_modules/.bin/coveralls'
23 changes: 11 additions & 12 deletions index.js
Expand Up @@ -5,33 +5,32 @@ const path = require('path');
class AdministratifIndonesia {
constructor() {
this.path = path.resolve(__dirname, 'storages');
this.store = Object.create(null);

fs.readdirSync(this.path).forEach(filename => {
this.store[path.parse(filename).name] = path.join(this.path, filename);
});
Object.defineProperty(this, '_store', {
value: (obj => {
fs.readdirSync(this.path).forEach(filename => {
obj[path.parse(filename).name] = path.join(this.path, filename);
});

Object.defineProperty(this, 'store', {
enumerable: false,
configurable: false,
writable: false
return obj;
})({})
});
}

all() {
return this.store;
return this._store;
}

size() {
return Object.keys(this.store).length;
return Object.keys(this._store).length;
}

has(key) {
return key in this.store;
return key in this._store;
}

get(key) {
return this.has(key) ? this.store[key] : null;
return this.has(key) ? this._store[key] : null;
}
}

Expand Down
10 changes: 3 additions & 7 deletions lib/builder.js
Expand Up @@ -4,17 +4,13 @@ const path = require('path');
const util = require('util');
const sortBy = require('lodash.sortby');
const kebabcase = require('lodash.kebabcase');
const findCacheDir = require('find-cache-dir');
const pkg = require('../package.json');
const code = require('./code');
const chunk = require('./chunk');
const transform = require('./transform');
const uncompress = require('./uncompress');
const textIgnores = require('./text-ignores');
const normalizeWord = require('./normalize-word');

const CACHE_DIR = findCacheDir({name: pkg.name});

const fsP = {
readdir: util.promisify(fs.readdir)
};
Expand Down Expand Up @@ -61,11 +57,11 @@ function makeContext(tables) {
return sortBy(tables, 'code');
}

module.exports.run = async () => {
const files = await fsP.readdir(CACHE_DIR);
module.exports.run = async sources => {
const files = await fsP.readdir(sources);

return Promise.all(files.map(filepath => {
return uncompress(path.join(CACHE_DIR, filepath)).then(result => {
return uncompress(path.join(sources, filepath)).then(result => {
const data = makeContext(result.map(page => transform(page.Texts)).map(chunk));
const obj = data.find(ctx => ctx.type === 'provinsi');

Expand Down
2 changes: 2 additions & 0 deletions package.json
Expand Up @@ -50,12 +50,14 @@
"del": "^3.0.0",
"find-cache-dir": "^1.0.0",
"got": "^7.1.0",
"jsdom": "^11.6.2",
"listr": "^0.12.0",
"lodash.kebabcase": "^4.1.1",
"lodash.sortby": "^4.7.0",
"nyc": "^11.2.1",
"path-ensure": "^1.1.0",
"pdf2json": "^1.1.7",
"unzip": "^0.1.11",
"xo": "^0.18.2"
}
}
9 changes: 7 additions & 2 deletions scripts/build.js
Expand Up @@ -3,9 +3,14 @@ const fs = require('fs');
const path = require('path');
const util = require('util');
const del = require('del');
const sortBy = require('lodash.sortby');
const pathEnsure = require('path-ensure');
const findCacheDir = require('find-cache-dir');
const pkg = require('../package.json');
const builder = require('../lib/builder');

const CACHE_DIR = findCacheDir({name: pkg.name});

const fsP = {
writeFile: util.promisify(fs.writeFile)
};
Expand All @@ -29,7 +34,7 @@ const buildIndexProvince = stats => {
.map(stat => stat.data.filter(x => x.type === 'provinsi'))
.reduce((p, c) => p.concat(c));

storage.write('index.json', JSON.stringify(data, null, '\t'));
storage.write('index.json', JSON.stringify(sortBy(data, 'code'), null, '\t'));
};

const buildDist = stats => {
Expand All @@ -40,7 +45,7 @@ const buildDist = stats => {

del([path.join(storage.dir, '*')]);

builder.run()
builder.run(CACHE_DIR)
.then(stats => {
buildDist(stats);
buildIndexProvince(stats);
Expand Down
12 changes: 10 additions & 2 deletions scripts/convert.js
Expand Up @@ -18,6 +18,8 @@ const fsP = {
readdir: util.promisify(fs.readdir)
};

const argv = process.argv.slice(2);

class StringifyStream extends Transform {
constructor() {
super();
Expand Down Expand Up @@ -59,8 +61,14 @@ const tasks = new Listr([
{
title: 'Create cache files',
skip: () => {
if (!fs.existsSync(PDF_DIR) || fs.readdirSync(PDF_DIR).length !== 34) {
return `PDF files doesn't exists or some files missing. Try to download 'npm run download'`;
let files;

if (argv.includes('--force')) {
return;
}

if (!fs.existsSync(PDF_DIR) || (files = fs.readdirSync(PDF_DIR), files.length !== 34)) {
return `PDF files not completed, got [${files.length}] files. Try to download 'npm run download'`;
}
},
task: async () => {
Expand Down
100 changes: 82 additions & 18 deletions scripts/download.js
@@ -1,34 +1,98 @@
'use strict';
const fs = require('fs');
const path = require('path');
const url = require('url');
const util = require('util');
const got = require('got');
const unzip = require('unzip');
const {JSDOM} = require('jsdom');
const pathEnsure = require('path-ensure');

const paths = pathEnsure();
const cwd = path.dirname(__dirname);
const paths = pathEnsure({cwd});
const fsP = {
writeFile: util.promisify(fs.writeFile),
readFile: util.promisify(fs.readFile)
};

const KEMENDAGRI_DATA_LINK = 'http://www.kemendagri.go.id/pages/data-wilayah';

/**
* Download all PDF files
*
* link: http://www.kemendagri.go.id/pages/data-wilayah
*/

got('http://www.kemendagri.go.id/pages/data-wilayah')
.then(res => res.body.match(/2015\/08\/18\/(.*?)\.pdf/g).map(
links => url.resolve('http://www.kemendagri.go.id/media/filemanager/', links)
))
.then(links => {
links.forEach(link => {
paths('.tmp', 'pdf', path.basename(link))
.then(fp => got.stream(link).pipe(fs.createWriteStream(fp)));
});
})
.catch(err => {
if (err) {
if (err.response.body || err.response) {
return console.error(err.response.body || err.response);
}
async function getHtml() {
const filepath = await paths('.tmp', 'html', 'file.html');
const isCached = fs.existsSync(filepath);

if (isCached) {
return fsP.readFile(filepath, 'utf8');
}

try {
const response = await got(KEMENDAGRI_DATA_LINK);

await fsP.writeFile(filepath, response.body, 'utf8');

return console.error(err);
return getHtml();
} catch (err) {
if (err.response.body || err.response) {
return Promise.reject(err.response.body || err.response);
}

return Promise.reject(err);
}
}

function downloadFromKemendagri(url) {
const opts = {url};

const urlFilter = /\.(pdf|zip)$/g;

if (!opts.url || !urlFilter.test(opts.url)) {
return;
}

const filename = path.basename(opts.url);

paths('.tmp', 'pdf', filename).then(filepath => {
if (filepath.endsWith('.pdf')) {
got.stream(opts.url)
.pipe(fs.createWriteStream(filepath));
}

if (filepath.endsWith('.zip')) {
got.stream(opts.url)
.pipe(unzip.Extract({path: path.dirname(filepath)})); // eslint-disable-line new-cap
}
});
}

getHtml()
.then(body => {
const {window} = new JSDOM(body);
const selector = window.document.querySelector('ol').getElementsByTagName('a');
const link = {
googleDrive: {},
kemendagri: {}
};

for (const a of selector) {
if (a.href.includes('drive.google.com')) {
link.googleDrive[a.textContent] = a.href;
} else if (a.href.includes('kemendagri.go.id')) {
link.kemendagri[a.textContent] = a.href;
}
}

Object.values(link.kemendagri).forEach(downloadFromKemendagri);

console.log();
console.log('Download manualy from google drive.\n');
console.log(JSON.stringify(link.googleDrive, null, 4));
console.log();
})
.catch(err => {
return console.error(err);
});

0 comments on commit 1956cce

Please sign in to comment.