I'm using the tools/build_namespace_langs.py command in mwlib 0.11.2 with
an updated patch which better handles the $fallback option. The current
patch to that file (in mwlib, not in this archive) is:
diff --git a/tools/build_namespace_langs.py b/tools/build_namespace_langs.py
index ea2ab15..4082b71 100755
--- a/tools/build_namespace_langs.py
+++ b/tools/build_namespace_langs.py
@@ -12,14 +12,15 @@ import sys
from mwlib import namespace
-filename_rex = re.compile(r'^Messages(?P<lang>\w\w\w?)\.php$')
+filename_rex = re.compile(r'^Messages(?P<lang>\w\w\w?(?:_\w+)?)\.php$')
+fallback_rex = re.compile(r'^\$fallback\s*=\s*["\'](?P<lang>\w\w\w?(?:-\w+)?)["\']')
start_names_rex = re.compile(r'^\$namespaceNames = array\($')
start_aliases_rex = re.compile(r'^\$namespaceAliases = array\($')
const2name_rex = re.compile('^\\s*(?P<const>NS_\w+)\\s*=>\\s*["\'](?P<name>\S+)["\']\s*,?\s*$')
name2const_rex = re.compile('^\\s*["\'](?P<name>\S+)["\']\\s*=>\\s*(?P<const>NS_\w+)\s*,?\s*$')
end_rex = re.compile(r'^\);$')
-def parse_namespace_names(fn):
+def parse_namespace_names(fn, msgsdir):
ns2name = {}
started = False
for line in open(fn, 'rb'):
@@ -27,6 +28,9 @@ def parse_namespace_names(fn):
if start_names_rex.match(line):
started = True
else:
+ mo = fallback_rex.match(line)
+ if mo is not None:
+ ns2name['$fallback'] = mo.group('lang').lower()
continue
if end_rex.match(line):
break
@@ -43,7 +47,15 @@ def parse_namespace_names(fn):
ns2name[nsnum] = name
else:
# no namespace names found in this file
- return None
+ if '$fallback' in ns2name:
+ fallback = ns2name['$fallback']
+ mo = 'Messages'+(fallback.capitalize().replace('-','_'))+'.php'
+ mo = os.path.join(msgsdir, mo)
+ ns2name = parse_namespace_names(mo, msgsdir)
+ if ns2name:
+ ns2name['$fallback'] = fallback
+ else:
+ return None
return ns2name
def parse_namespace_aliases(fn):
@@ -76,8 +88,9 @@ def get_ns_list(names, aliases):
for name, nsnum in aliases.items():
try:
v = names[nsnum]
- except KeyError:
- return None
+ except KeyError, e:
+ names[nsnum] = name
+ continue
if isinstance(v, unicode):
names[nsnum] = (v, name)
else:
@@ -85,7 +98,7 @@ def get_ns_list(names, aliases):
proj_talk_name = names.get(namespace.NS_PROJECT_TALK, u'%s_talk')
try:
- return [names[nsnum] for nsnum in namespace._lang_ns_data_keys] + [proj_talk_name]
+ return [names.get(nsnum,None) for nsnum in namespace._lang_ns_data_keys] + [proj_talk_name]
except KeyError, e:
return None
@@ -97,24 +110,38 @@ def main(argv):
assert os.path.isdir(msgsdir), '%r is not a directory' % msgsdir
lang_ns_data = {}
+ lang_fallback = {}
for fn in os.listdir(msgsdir):
mo = filename_rex.match(fn)
if mo is None:
continue
- lang = mo.group('lang').lower()
+ lang = mo.group('lang').lower().replace('_','-')
if lang == 'en':
# English is special: aliases are handled by other means etc.
continue
p = os.path.join(msgsdir, fn)
- names = parse_namespace_names(p)
+ names = parse_namespace_names(p, msgsdir)
if not names:
continue
+ lang_fallback[lang] = names.get('$fallback', 'en')
lst = get_ns_list(names, parse_namespace_aliases(p))
if lst:
lang_ns_data[lang] = lst
lang_ns_data['en'] = [u'Talk', u'User', u'User_talk', (u'File', u'Image'), (u'File_talk', u'Image talk'), u'MediaWiki', u'MediaWiki_talk', u'Template', u'Template_talk', u'Help', u'Help_talk', u'Category', u'Category_talk', u'Special', u'Media', u'%s_talk']
-
+
+ # fill in missing entries from fallback languages
+ def fill_in_missing(lang):
+ data = lang_ns_data[lang]
+ if not(None in data): return
+ fallback = lang_fallback.get(lang, 'en')
+ fill_in_missing(fallback)
+ for i in xrange(len(data)):
+ if data[i] is None:
+ data[i] = lang_ns_data[fallback][i]
+ for lang in lang_ns_data.keys():
+ fill_in_missing(lang)
+
s = ['lang_ns_data = {']
for lang in sorted(lang_ns_data.keys()):
s.append("'%s': %r," % (lang, lang_ns_data[lang]))
a65480d