diff --git a/package-lock.json b/package-lock.json index 2f6ea7d65..0c02db126 100644 --- a/package-lock.json +++ b/package-lock.json @@ -796,6 +796,28 @@ "resolved": "https://registry.npmjs.org/charenc/-/charenc-0.0.2.tgz", "integrity": "sha1-wKHS86cJLgN3S/qD8UwPxXkKhmc=" }, + "check-node-version": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/check-node-version/-/check-node-version-2.1.0.tgz", + "integrity": "sha1-hVZYQs95oJ36jiZnIFde4K7BmD0=", + "dev": true, + "requires": { + "map-values": "^1.0.1", + "minimist": "^1.2.0", + "object-filter": "^1.0.2", + "object.assign": "^4.0.4", + "run-parallel": "^1.1.4", + "semver": "^5.0.3" + }, + "dependencies": { + "minimist": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz", + "integrity": "sha1-o1AIsg9BOD7sH7kU9M1d95omQoQ=", + "dev": true + } + } + }, "chokidar": { "version": "1.5.2", "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-1.5.2.tgz", @@ -5187,6 +5209,12 @@ "lodash._isiterateecall": "^3.0.0" } }, + "lodash.difference": { + "version": "4.5.0", + "resolved": "https://registry.npmjs.org/lodash.difference/-/lodash.difference-4.5.0.tgz", + "integrity": "sha1-nMtOUF1Ia5FlE0V3KIWi3yf9AXw=", + "dev": true + }, "lodash.isarguments": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/lodash.isarguments/-/lodash.isarguments-3.1.0.tgz", @@ -5309,6 +5337,12 @@ "resolved": "https://registry.npmjs.org/map-stream/-/map-stream-0.1.0.tgz", "integrity": "sha1-5WqpTEyAVaFkBKBnS3jyFffI4ZQ=" }, + "map-values": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/map-values/-/map-values-1.0.1.tgz", + "integrity": "sha1-douOecAJvytk/ugG4ip7HEGQyZA=", + "dev": true + }, "markdown-table": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/markdown-table/-/markdown-table-0.4.0.tgz", @@ -6018,6 +6052,12 @@ "resolved": "https://registry.npmjs.org/object-assign/-/object-assign-4.1.1.tgz", "integrity": "sha1-IQmtx5ZYh8/AXLvUQsrIv7s2CGM=" }, + "object-filter": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/object-filter/-/object-filter-1.0.2.tgz", + "integrity": "sha1-rwt5f/6+r4pSxmN87b6IFs/sG8g=", + "dev": true + }, "object-keys": { "version": "1.0.11", "resolved": "https://registry.npmjs.org/object-keys/-/object-keys-1.0.11.tgz", @@ -6839,6 +6879,12 @@ } } }, + "run-parallel": { + "version": "1.1.9", + "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.1.9.tgz", + "integrity": "sha512-DEqnSRTDw/Tc3FXf49zedI638Z9onwUotBMiUFKmrO2sdFKIbXamXGQ3Axd4qgphxKB4kw/qP1w5kTxnfU1B9Q==", + "dev": true + }, "safe-buffer": { "version": "5.1.1", "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.1.tgz", @@ -7978,6 +8024,21 @@ } } }, + "wikidata-filter": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/wikidata-filter/-/wikidata-filter-2.3.1.tgz", + "integrity": "sha512-GEeE3obTIW67lLoo5JDQigA+gHme9xj77Wdw7/qsGAcNMwEcOPHh8gJUHOjwjDXjmpuOpEkYqr5ij/q/fTMRsQ==", + "dev": true, + "requires": { + "check-node-version": "^2.1.0", + "commander": "^2.9.0", + "lodash.difference": "^4.2.0", + "lodash.pick": "^4.2.0", + "split": "^1.0.0", + "through": "^2.3.8", + "wikidata-sdk": "^5.1.0" + } + }, "wikidata-lang": { "version": "2.0.4", "resolved": "https://registry.npmjs.org/wikidata-lang/-/wikidata-lang-2.0.4.tgz", diff --git a/package.json b/package.json index 12a59afa9..ec5829c70 100644 --- a/package.json +++ b/package.json @@ -112,7 +112,8 @@ "shell-quote": "^1.4.3", "should": "^11.1.1", "sinon": "^1.17.1", - "supervisor": "^0.10.0" + "supervisor": "^0.10.0", + "wikidata-filter": "^2.3.1" }, "engines": { "node": ">= 6.4" diff --git a/scripts/dumps/get_wikidata_humans_dump b/scripts/dumps/get_wikidata_humans_dump new file mode 100755 index 000000000..5a5124771 --- /dev/null +++ b/scripts/dumps/get_wikidata_humans_dump @@ -0,0 +1,10 @@ +#!/usr/bin/env zsh +# Generate a pre-filtered dump of humans in Wikidata to ease development setup +# cf https://github.com/inventaire/inventaire-deploy/blob/d280055/install_entities_search_engine#L24-L28 + +curl -s https://dumps.wikimedia.org/wikidatawiki/entities/latest-all.json.gz | + gzip -d | + wikidata-filter --claim P31:Q5 --omit type,sitelinks | + gzip -c9 > humans.ndjson.gz + +chmod 664 humans.ndjson.gz