Skip to content

Commit

Permalink
added command to build tree.json
Browse files Browse the repository at this point in the history
  • Loading branch information
miya0001 committed Jun 8, 2020
1 parent 6e7288a commit 2f88ee5
Show file tree
Hide file tree
Showing 5 changed files with 504 additions and 1 deletion.
1 change: 1 addition & 0 deletions .gitignore
Expand Up @@ -30,3 +30,4 @@ src/config.json
.env

/data
/cache
78 changes: 78 additions & 0 deletions bin/download.sh
@@ -0,0 +1,78 @@
#!/usr/bin/env bash

SCRIPT_DIR=$(cd $(dirname $0); pwd)
CACHE_DIR=${SCRIPT_DIR}/../cache
OUTPUT=${CACHE_DIR}/isj.txt.gz

SAC_LABEL=${CACHE_DIR}/sac_label.json
SAC_PARENT=${CACHE_DIR}/sac_parent.json
SAC_CHANGE=${CACHE_DIR}/sac_change.json

mkdir -p ${CACHE_DIR}
rm -f ${OUTPUT}

for no in `seq -w 1000 1000 47000` ; do
url="https://nlftp.mlit.go.jp/isj/dls/data/11.0b/${no}-11.0b.zip"
zip=${CACHE_DIR}/${no}.zip
if [ ! -e "${zip}" ] ; then
curl --insecure $url > ${zip}
fi
unzip -p ${zip} '*.[cC][sS][vV]' | iconv -f CP932 -t utf8 | \
sed 's/"白糠町"/"白糠郡白糠町"/' | \
sed 's/"07023","郡山市"/"07203","郡山市"/' | \
sed 's/"431050098005","弓削六丁目"/"431050098006","弓削六丁目"/' | \
sed '/"041030090000"/s/,"3"/,"1"/' | \
sed '/"072070142000"/s/,"3"/,"1"/' | \
sed '/"072070143000"/s/,"3"/,"1"/' | \
sed '/"072070144000"/s/,"3"/,"1"/' | \
sed '/"072030237001"/s/,"1"/,"3"/' | \
sed '/"072030237002"/s/,"1"/,"3"/' | \
sed '/"072030237003"/s/,"1"/,"3"/' | \
sed '/"072030238001"/s/,"1"/,"3"/' | \
sed '/"072030238002"/s/,"1"/,"3"/' | \
sed '/"112310025001"/s/,"1"/,"3"/' | \
sed '/"112310025002"/s/,"1"/,"3"/' | \
sed '/"112310025003"/s/,"1"/,"3"/' | \
sed '/"232200381001"/s/,"1"/,"3"/' | \
grep -v '"222130002000","紅葉台"' | \
gzip >> ${OUTPUT}

done

if [ ! -e "${SAC_LABEL}" ] ; then

curl --insecure -v -H 'Accept: application/sparql-results+json' --data-urlencode 'query@-' http://data.e-stat.go.jp/lod/sparql/alldata/query > ${SAC_LABEL} << EOS
PREFIX sacs: <http://data.e-stat.go.jp/lod/terms/sacs#>
PREFIX ic: <http://imi.go.jp/ns/core/rdf#>
select ?id ?label ?valid where {?id a sacs:StandardAreaCode ; ic:表記 ?label. optional {?id dcterms:valid ?valid.}}
EOS

fi

if [ ! -e "${SAC_PARENT}" ] ; then

curl --insecure -v -H 'Accept: application/sparql-results+json' --data-urlencode 'query@-' http://data.e-stat.go.jp/lod/sparql/alldata/query > ${SAC_PARENT} << EOS
PREFIX sacs: <http://data.e-stat.go.jp/lod/terms/sacs#>
PREFIX dcterms: <http://purl.org/dc/terms/>
select ?id ?parent where {?id a sacs:StandardAreaCode ; dcterms:isPartOf ?parent.}
EOS

fi

if [ ! -e "${SAC_CHANGE}" ] ; then

curl --insecure -v -H 'Accept: application/sparql-results+json' --data-urlencode 'query@-' http://data.e-stat.go.jp/lod/sparql/alldata/query > ${SAC_CHANGE} << EOS
PREFIX sacs: <http://data.e-stat.go.jp/lod/terms/sacs#>
select ?id ?next where {
{?id a sacs:StandardAreaCode; sacs:succeedingCode ?next.}
union
{?id a sacs:StandardAreaCode; sacs:succeedingMunicipality ?next.}
}
EOS

fi
164 changes: 164 additions & 0 deletions bin/tree.js
@@ -0,0 +1,164 @@
const readline = require('readline');
const fs = require('fs');
const simplify = require('../node_modules/imi-enrichment-address/lib/util').simplify;

const chome = [
"〇", "一", "二", "三", "四", "五", "六", "七", "八", "九",
"十", "十一", "十二", "十三", "十四", "十五", "十六", "十七", "十八", "十九",
"二十", "二十一", "二十二", "二十三", "二十四", "二十五", "二十六", "二十七", "二十八", "二十九",
"三十", "三十一", "三十二", "三十三", "三十四", "三十五", "三十六", "三十七", "三十八", "三十九",
"四十", "四十一", "四十二", "四十三", "四十四", "四十五", "四十六", "四十七", "四十八", "四十九",
"五十", "五十一", "五十二", "五十三", "五十四", "五十五", "五十六", "五十七", "五十八", "五十九",
"六十", "六十一", "六十二", "六十三", "六十四", "六十五", "六十六", "六十七", "六十八", "六十九",
"七十", "七十一", "七十二", "七十三", "七十四", "七十五", "七十六", "七十七", "七十八", "七十九",
"八十", "八十一", "八十二", "八十三", "八十四", "八十五", "八十六", "八十七", "八十八", "八十九",
"九十", "九十一", "九十二", "九十三", "九十四", "九十五", "九十六", "九十七", "九十八", "九十九"
];

//

const map = {};

(function() {

// 基本情報の付与
JSON.parse(simplify(fs.readFileSync(process.argv[2], "UTF-8"))).results.bindings.forEach(e => {
const code = e.id.value.replace("http://data.e-stat.go.jp/lod/sac/C", "").split("-")[0];
const f = {
id: e.id.value,
label: e.label.value,
parent: [],
children: [],
next: [],
prev: [],
visible: !!(e.label.value.match(/[都道府県市区町村]$/)),
code: code,
key: code + e.label.value
};
map[f.id] = f;
});

// 上下関係の接続
JSON.parse(simplify(fs.readFileSync(process.argv[3], "UTF-8"))).results.bindings.forEach(e => {
const child = map[e.id.value];
const parent = map[e.parent.value];
if (child && parent) {
if (child.parent.indexOf(parent) === -1) child.parent.push(parent);
if (parent.children.indexOf(child) === -1) parent.children.push(child);
} else {
console.error("Invalid parent/child", e.id.value, e.parent.value);
}
});

// 前後関係の接続
JSON.parse(simplify(fs.readFileSync(process.argv[4], "UTF-8"))).results.bindings.forEach(e => {
const prev = map[e.id.value];
const next = map[e.next.value];
if (next && prev) {
if (prev.next.indexOf(next) === -1) prev.next.push(next);
if (next.prev.indexOf(prev) === -1) next.prev.push(prev);
} else {
console.error("Invalid prev/next", e.id.value, e.next.value);
}
});

})();


const latest = {};
Object.values(map).forEach(e => {
if (latest[e.code] === undefined) latest[e.code] = e;
else if (latest[e.code].id < e.id) latest[e.code] = e;
});

readline.createInterface({
input: process.stdin
}).on('line', (line) => {
const col = simplify(line).trim().replace(/"/g, "").split(",");
if (col.length !== 10) return;
const pref_code = col[0];
const pref_name = col[1];
const city_code = col[2];
const city_name = col[3];
const area_code = col[4];
const area_name = col[5];

if (!pref_code.match(/^[0-9]+$/)) return;

const parent = latest[city_code];

if (parent === undefined) {
console.error("city_code not found", city_code);
return;
}

let lower = parseInt(area_code.substring(9));
let upper = area_code.substring(0, 9);
let label = (lower === 0 ? area_name : area_name.replace(chome[lower] + "丁目", ""));
if (label === "") {
label = area_name;
lower = 0;
upper = area_code;
}

if (parent.children.find(x => x.id === upper) === undefined)
parent.children.push({
id: upper,
label: label,
parent: [parent],
children: [],
visible: true,
code: upper,
max: 0,
next: [],
key: upper.substring(5) + label
});

parent.children.filter(x => x.id === upper).forEach(x => {
x.max = Math.max(x.max, parseInt(lower));
});

}).on('close', () => {

const root = {};

const dig = function(src, container) {
if (!src.visible) {
src.children.forEach(child => {
dig(child, container);
});
return;
}

if (container[src.key] === undefined) {
const next = [];
const chase = function(a) {
if (next.length > 0) return;
if (a.key !== src.key && next.indexOf(a.key) === -1)
next.push(a.key);
else
a.next.forEach(chase);
};
src.next.forEach(chase);

if (src.max !== undefined) {
container[src.key] = src.max;
} else if (next.length > 0) {
container[src.key] = next[0];
} else {
container[src.key] = {}
}
}
src.children.forEach(child => {
dig(child, container[src.key]);
});
};

// ルートを起点にトラバース
Object.values(map).filter(e => e.parent.length === 0).forEach(e => {
dig(e, root);
});

console.log(JSON.stringify(root, null, 1));

});

0 comments on commit 2f88ee5

Please sign in to comment.