In [None]:
from functools import partial

In [None]:
from collections import defaultdict

In [None]:
from itertools import chain

In [None]:
import yaml
import yamlloader

keys:

stanza: dict with keys as int, or int-chorus, values as dict with key as zh, en

meter: str

category: dict with key as zh

note: dict with key as zh, en

ref: dict with key as zh, en

author: dict with key as en

title: dict with key as en

In [None]:
KEYS = ('category', 'title', 'author', 'meter', 'ref', 'note')

In [None]:
LANG = ('zh', 'en')

In [None]:
meta = {
    'en': '''---
title:	Selected Hymns
keywords:	Hymn
lang:	en
...

''',
    'zh': '''---
title:	詩歌選集
keywords:	詩歌
lang:	zh-Hant
otherlangs: en
CJKmainfont:	Kaiti TC
CJKoptions:	BoldFont = * Bold, AutoFakeSlant
...

'''
}

In [None]:
def get_lang(lang, lang_other, obj):
    '''obtain language ``lang`` from ``obj`` with fallback to ``lang_other``
    '''
    if isinstance(obj, str):
        return obj
    elif isinstance(obj, dict):
        if lang in obj:
            return obj[lang]
        elif lang_other in obj:
            return obj[lang_other]
        else:
            print(obj)
            raise ValueError
    else:
        print(obj)
        raise ValueError

In [None]:
def parse_stanza(lang, dict_):
    '''parse ``dict_`` as a stanza while choosing only ``lang``

    stanza are dict with keys as either int or str. str is in format ``N-chorus``
    where ``N`` is any int. This int indicates the n-th stanza. And ``-chorus``
    means it is a chorus.

    In this function, Line blocks is used for each stanza, and chorus is an indented
    bullet item, stanza are enumerated items.

    return a str of stanza in markdown format
    '''
    result = []
    for key, value in dict_.items():
        try:
            head_rest = '| '

            head0 = f'\n* | ' if isinstance(key, str) else f'\n{key}. | '

            head_cur = head0
            for content in value:
                result.append(head_cur + content[lang])
                head_cur = head_rest
        # may occur at content[lang] when lang doesn't exist for that verse
        except KeyError:
            result.append(f'{head0}no translation.')
    return '\n'.join(result)

In [None]:
def parser(lang, dict_, logos=False):
    '''``dict_`` is a hymn, with keys in KEYS or 'stanza'
    those in KEYS are parsed by ``get_lang`` and 'stanza'
    parsed by ``parse_stanza``.

    Whichever first in line becomes a title.

    if ``logos``, add Logos PBB milestone with ``logos`` as the page no.

    Return a list of lines in markdown
    '''
    lang_other = 'zh' if lang == 'en' else 'en'
    result = [get_lang(lang, lang_other, dict_[key]) for key in KEYS if key in dict_]
    result.append(parse_stanza(lang, dict_['stanza']))
    result[0] = f'# {result[0]}'
    if logos:
        result.insert(1, f'[[@Headword+en:{logos}]]')
    return result

In [None]:
def parser_wrap(lang, data, i):
    return parser(lang, data[i], logos=i+1)

In [None]:
with open('data.yml', 'r') as f:
    data = yaml.load(f, Loader=yamlloader.ordereddict.CLoader)

In [None]:
for lang in ('zh', 'en'):
    filename = 'en.md' if lang == 'en' else 'zh-Hant.md'
    with open(filename, 'w') as f:
        f.write(meta[lang])
        for line in chain(*map(partial(parser, lang), data)):
            print(line, file=f, end='\n\n')

In [None]:
for lang in ('zh', 'en'):
    filename = 'en-logos.md' if lang == 'en' else 'zh-Hant-logos.md'
    with open(filename, 'w') as f:
        f.write(meta[lang])
        for line in chain(*map(partial(parser_wrap, lang, data), range(len(data)))):
            print(line, file=f, end='\n\n')