-
Notifications
You must be signed in to change notification settings - Fork 11
/
Qleverfile.wikidata
33 lines (27 loc) · 1.25 KB
/
Qleverfile.wikidata
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# Qleverfile for Wikidata, use with qlever script (`pip install qlever`)
#
# qlever get-data # downloads two .bz2 files of total size ~100 GB
# qlever index # takes ~4.5 hours and ~20 GB RAM (on an AMD Ryzen 9 5900X)
# qlever start # starts the server (takes a few seconds)
[DEFAULT]
NAME = wikidata
[data]
GET_DATA_URL = https://dumps.wikimedia.org/wikidatawiki/entities
GET_DATA_CMD = curl -LRC - --remote-name-all ${GET_DATA_URL}/latest-all.ttl.bz2 ${GET_DATA_URL}/latest-lexemes.ttl.bz2 2>&1
VERSION = $$(date -r latest-all.ttl.bz2 +%d.%m.%Y || echo "NO_DATE")
DESCRIPTION = Full Wikidata dump from ${GET_DATA_URL} (latest-all.ttl.bz2 and latest-lexemes.ttl.bz2, version ${VERSION})
[index]
INPUT_FILES = latest-all.ttl.bz2 latest-lexemes.ttl.bz2
CAT_INPUT_FILES = lbzcat -n 4 -f ${INPUT_FILES}
SETTINGS_JSON = { "languages-internal": [], "prefixes-external": [""], "locale": { "language": "en", "country": "US", "ignore-punctuation": true }, "ascii-prefixes-only": true, "num-triples-per-batch": 5000000 }
STXXL_MEMORY = 10G
[server]
PORT = 7001
ACCESS_TOKEN = ${data:NAME}
MEMORY_FOR_QUERIES = 20G
CACHE_MAX_SIZE = 10G
[runtime]
SYSTEM = docker
IMAGE = docker.io/adfreiburg/qlever:latest
[ui]
UI_CONFIG = wikidata