# Extracting a Concept List from LingPy Wordlists



In [1]:
from lingpy import *
from pyconcepticon.api import Concepticon


def to_conceptlist(wordlist, prefix='Concepts-2018-{0}', concepticon=False):

    if not concepticon:
        header = ['ID', 'NUMBER', 'ENGLISH']
    else:
        # get concepticon
        if not 'concepticon_id' in wordlist.header and not \
                'concepticon_gloss' in wordlist.header:
            raise ValueError("at least concepticon gloss or id needs to be submitted")
        g2s = {c.gloss: c.id for c in Concepticon().conceptsets.values()}
        s2g = {c.id: c.gloss for c in Concepticon().conceptsets.values()}
        header = ['ID', 'NUMBER', 'ENGLISH', 'CONCEPTICON_ID',
                'CONCEPTICON_GLOSS']

    concepts = []
    etd = wordlist.get_etymdict(ref='concept')

    for i, c in enumerate(wordlist.rows):
        if not concepticon:
            concepts += [(prefix.format(
                str(wordlist.height)+'-'+str(i+1)), str(i+1), c)]
        else:
            # get concepticon id
            idx = [x[0] for x in etd[c] if x][0]
            if not 'concepticon_id' in wordlist.header:
                cid = g2s.get(wordlist[idx, 'concepticon_gloss'], '')
            else:
                cid = wordlist[idx, 'concepticon_id']
            if not 'concepticon_gloss' in wordlist.header:
                cgl = s2g.get(wordlist[idx, 'concepticon_id'], '')
            else:
                cgl = wordlist[idx, 'concepticon_gloss']

            concepts += [(prefix.format(
                str(wordlist.height)+'-'+str(i+1)), str(i+1), c, cid, cgl)]

    with open(prefix.format(len(concepts))+'.tsv', 'w') as f:
        f.write('\t'.join(header)+'\n')
        for row in concepts:
            f.write('\t'.join(row)+'\n')
