In [None]:
import pyoxigraph as ox
import pandas as pd
import json

In [None]:
store = ox.Store("data/wety.db")
# store = ox.Store("data/test_output/wety.db")

In [None]:
def get_item_id(term, lang, n=0):
    return [s['w'].value for s in store.query(f'SELECT ?w WHERE {{ ?w <p:term> "{term}" . ?w <p:lang> "{lang}" .}}')][n]

def print_item_info(id):
    results = store.query(
        f"""SELECT ?term ?lang ?gloss WHERE {{ 
            <{id}> <p:term> ?term .
            <{id}> <p:lang> ?lang .
            OPTIONAL {{ <{id}> <p:gloss> ?gloss }}
        }} LIMIT 1"""
    )
    for result in results:
        print(result['term'].value)
        print(result['lang'].value)
        if result['gloss']: print(result['gloss'].value)

def get_item_expansion(id):
    item = {"id": id}
    results = store.query(
        f"""SELECT ?term ?lang ?url ?pos ?gloss ?isImputed ?mode ?isReconstructed ?headProgenitor ?progenitor WHERE {{ 
            <{id}> <p:term> ?term .
            <{id}> <p:lang> ?lang .
            OPTIONAL {{ <{id}> <p:url> ?url }} .
            OPTIONAL {{ <{id}> <p:pos> ?pos }} .
            OPTIONAL {{ <{id}> <p:gloss> ?gloss }} .
            OPTIONAL {{ <{id}> <p:isImputed> ?isImputed }} .
            OPTIONAL {{ <{id}> <p:mode> ?mode }} .
            OPTIONAL {{ <{id}> <p:isReconstructed> ?isReconstructed }} .
            OPTIONAL {{ <{id}> <p:headProgenitor> ?headProgenitor }} .
            OPTIONAL {{ <{id}> <p:progenitor> ?progenitor }} .
        }}"""
    )
    preds = ["term", "lang", "url", "pos", "gloss", "isImputed", "mode", "isReconstructed", "headProgenitor", "progenitor"]
    item = {p: None if p != "progenitor" else [] for p in preds} 
    for result in results:
        for p in preds:
            if result[p] is not None:
                if p == "progenitor":
                    item[p].append(result[p].value)
                else:
                    if item[p] is None:
                        item[p] = result[p].value
    # head_source = get_head_source(id)
    children = []
    for child in get_children(id):
        children.append(get_item_expansion(child))
    item['children'] = children
    return item


def get_item_json(id):
    item_expansion = get_item_expansion(id)
    return json.dumps(item_expansion, ensure_ascii=False, indent=True)

def get_head_source(id):
    results = store.query(
        f"""SELECT ?source ?order ?head WHERE {{ 
            <{id}> <p:head> ?head .
            <{id}> <p:source> ?sourceNode .
            ?sourceNode <p:item> ?source .
            ?sourceNode <p:order> ?order .
        }}"""
    )
    for result in results:
        if result['order'].value == result['head'].value:
            return result['source'].value
    return None

def get_head_root(id):
    results = store.query(
        f"""SELECT ?headProgenitor WHERE {{ 
            <{id}> <p:headProgenitor> ?headProgenitor .
        }}"""
    )
    for result in results:
        return result['headProgenitor'].value
    return None

def get_ancestors(id):
    results = store.query(
        f"""SELECT ?item WHERE {{ 
            <{id}> (<p:source>/<p:item>)+ ?item .
        }}"""
    )
    items = set()
    for result in results:
        items.add(result['item'].value)
    return items if items else None

def get_children(id):
    return [w['child'].value for w in store.query(
        f"""SELECT ?child WHERE {{ 
            ?sourceNode <p:item> <{id}> .
            ?sourceNode <p:order> ?order .
            ?child <p:source> ?sourceNode .
            ?child <p:head> ?order . 
        }}"""
    )]

In [None]:
# item = get_item_id("volutulō", "Vulgar Latin", 0)
item = get_item_id("redo", "English", 0)
# print(get_head_source(item))
for ancestor in get_ancestors(item):
    print_item_info(ancestor)
    print("")

In [None]:
item = get_item_id("mind", "English", 0)
print_item_info(item)
head_root = get_head_root(item)
print_item_info(head_root)
d = get_item_json(head_root)

In [None]:
print_item_info("w:697821")
for child in get_children("w:697821"):
    print_item_info(child)

In [None]:
print(d)

In [None]:
id = get_item_id("wert-", "Proto-Indo-European")
results = store.query(
        f"""SELECT ?url ?headProgenitor ?progenitor WHERE {{ 
            <{id}> <p:url> ?url .
            OPTIONAL {{ <{id}> <p:headProgenitor> ?headProgenitor }} .
            OPTIONAL {{ <{id}> <p:progenitor> ?progenitor }} .
        }}"""
    )

In [None]:
print(id)
r = next(results)
r['url'].value

In [None]:
data = pd.DataFrame(
    [[w['lang'].value, w['pos'].value, w['term'].value] for w in store.query(
        f"""SELECT DISTINCT ?lang ?pos ?term WHERE {{ 
            ?item <p:glossNum> ?glossNum .
            ?item <p:glossNum> ?glossNum .
            ?item <p:lang> ?lang .
            ?item <p:pos> ?pos .
            ?item <p:term> ?term .
        }}"""
    )],
    columns=["lang", "pos", "term"]
)
data.sort_values(["lang", "pos", "term"], inplace=True)
data.to_csv("data/nonzero_glossNum.csv", index=False)
print(len(data))

In [None]:
data = pd.DataFrame(
    [[w['lang'].value, w['pos'].value, w['term'].value] for w in store.query(
        f"""SELECT DISTINCT ?lang ?pos ?term WHERE {{ 
            ?item <p:glossNum> ?glossNum .
            ?item <p:etyNum> ?etyNum .
            ?item <p:lang> ?lang .
            ?item <p:pos> ?pos .
            ?item <p:term> ?term .
        }}"""
    )],
    columns=["lang", "pos", "term"]
)
data.sort_values(["lang", "pos", "term"], inplace=True)
data.to_csv("data/nonzero_glossNum_and_etyNum.csv", index=False)
print(len(data))

In [None]:
d = pd.read_csv("data/descendants_langs.csv", names=["lang","n"])
d["n"].sum()

In [None]:
fas = pd.read_csv("data/feedback_arc_set_pass_1.tsv", sep="\t")