In [None]:
import pyoxigraph as ox
import pandas as pd

In [None]:
store = ox.Store("data/wety.db")

In [None]:
def get_item(term, lang, n=0):
    return [s['w'].value for s in store.query(f'SELECT ?w WHERE {{ ?w <p:term> "{term}" . ?w <p:lang> "{lang}" .}}')][n]

def print_info(item):
    results = store.query(
        f"""SELECT ?term ?lang ?gloss WHERE {{ 
            <{item}> <p:term> ?term .
            <{item}> <p:lang> ?lang .
            <{item}> <p:gloss> ?gloss .
        }}"""
    )
    for result in results:
        print(result['term'].value)
        print(result['lang'].value)
        print(result['gloss'].value)

def get_head_source(item):
    results = store.query(
        f"""SELECT ?source ?order ?head WHERE {{ 
            <{item}> <p:head> ?head .
            <{item}> <p:source> ?sourceNode .
            ?sourceNode <p:item> ?source .
            ?sourceNode <p:order> ?order .
        }}"""
    )
    for result in results:
        if result['order'].value == result['head'].value:
            return result['source'].value
    return None

def get_head_root(item):
    prev_source = item
    source = get_head_source(item)
    while source:
        prev_source = source
        source = get_head_source(source)
    return prev_source

def get_children(item):
    return [(w['child'].value, w['lang'].value, w['term'].value) for w in store.query(
        f"""SELECT ?child ?lang ?term WHERE {{ 
            ?sourceNode <p:item> <{item}> .
            ?sourceNode <p:order> ?order .
            ?child <p:source> ?sourceNode .
            ?child <p:head> ?order . 
            ?child <p:lang> ?lang .
            ?child <p:term> ?term .
        }}"""
    )]

def get_descendants(item):
    descendants = {}
    children = get_children(item)
    for child in children:
        descendants["{}, {}".format(child[1], child[2])] = get_descendants(child[0])
    return descendants

In [None]:
item = get_item("revolve", "English")
print_info(item)
head_root = get_head_root(item)
print_info(head_root)
d = get_descendants(head_root)

In [None]:
d

In [None]:
data = pd.DataFrame(
    [[w['lang'].value, w['pos'].value, w['term'].value] for w in store.query(
        f"""SELECT DISTINCT ?lang ?pos ?term WHERE {{ 
            ?item <p:glossNum> ?glossNum .
            ?item <p:glossNum> ?glossNum .
            ?item <p:lang> ?lang .
            ?item <p:pos> ?pos .
            ?item <p:term> ?term .
        }}"""
    )],
    columns=["lang", "pos", "term"]
)
data.sort_values(["lang", "pos", "term"], inplace=True)
data.to_csv("data/nonzero_glossNum.csv", index=False)
print(len(data))

In [None]:
data = pd.DataFrame(
    [[w['lang'].value, w['pos'].value, w['term'].value] for w in store.query(
        f"""SELECT DISTINCT ?lang ?pos ?term WHERE {{ 
            ?item <p:glossNum> ?glossNum .
            ?item <p:etyNum> ?etyNum .
            ?item <p:lang> ?lang .
            ?item <p:pos> ?pos .
            ?item <p:term> ?term .
        }}"""
    )],
    columns=["lang", "pos", "term"]
)
data.sort_values(["lang", "pos", "term"], inplace=True)
data.to_csv("data/nonzero_glossNum_and_etyNum.csv", index=False)
print(len(data))

In [None]:
d = pd.read_csv("data/descendants_langs.csv", names=["lang","n"])
d["n"].sum()

In [None]:
fas = pd.read_csv("data/feedback_arc_set_pass_1.tsv", sep="\t")