In [None]:
from os import path
import json
import requests
from typing import List
from ontobio.rdfgen.assoc_rdfgen import prefix_context
from prefixcommons.curie_util import contract_uri
from pyshexc.parser_impl import generate_shexj
from typing import Optional, List, Union
from ShExJSG.ShExJ import Shape, ShapeAnd, ShapeOr, ShapeNot, TripleConstraint, shapeExprLabel, shapeExpr, shapeExprLabel, tripleExpr, tripleExprLabel, OneOf, EachOf
from pyshex import PrefixLibrary

In [None]:
shex_url = "https://raw.githubusercontent.com/geneontology/go-shapes/master/shapes/go-cam-shapes.shex"
shex_response = requests.get(shex_url)
shex = generate_shexj.parse(shex_response.text)
pref = PrefixLibrary(shex_response.text)

In [None]:
def get_suffix(uri):
    suffix = contract_uri(uri, cmaps=[prefix_context])
    if len(suffix) > 0:
        return suffix[0]

    return path.basename(uri)


In [None]:
pref_dict = {k:get_suffix(str(v)) for (k,v) in dict(pref).items() if str(v).startswith('http://purl.obolibrary.org/obo/')}
del pref_dict['OBO']

In [None]:
def lookup_table():
    table = {v:
             {
                'label':k
             } 
             for (k,v) in pref_dict.items()}
    return table

In [None]:
def get_shape_name(uri, clean=False):
    name = path.basename(uri).upper() 
    if '/go/' in uri:
        name = 'GO'+name
    return pref_dict.get(name, None if clean else uri )
    

In [None]:
def load_expr(expr: Optional[Union[shapeExprLabel, shapeExpr]], preds=None) -> []:
   
    if(preds == None):
        preds = {}
    if isinstance(expr, str) and isinstance(preds, list):
        preds.append(get_shape_name(expr))
    if isinstance(expr, (ShapeOr, ShapeAnd)):
        for expr2 in expr.shapeExprs:
            load_expr(expr2, preds)
    elif isinstance(expr, ShapeNot):
        load_expr(expr.shapeExpr, preds)
    elif isinstance(expr, Shape):
        if expr.expression is not None:
            load_triple(expr.expression, preds)
    
    return preds
            
def load_triple( expr: Union[tripleExpr, tripleExprLabel], preds=None) -> None:
    
        if isinstance(expr, (OneOf, EachOf)):
            for expr2 in expr.expressions:
                load_triple(expr2, preds)
        elif isinstance(expr, TripleConstraint):
            if expr.valueExpr is not None:
                pred = get_suffix(expr.predicate)
                
                if pred not in pref_dict.values():
                    return
                
                preds[pred] = {}
                preds[pred]['targets'] = []
                
                if expr.max != None:
                    preds[pred]['cardinality'] = expr.max                    
                
                load_expr(expr.valueExpr, preds[pred]['targets'])

In [None]:
shapes_res = {}
shapes = shex.shapes

for shape in shapes:
    shape_name = get_shape_name(shape['id'], True)
    
    if shape_name == None:
        continue
    
    shapes_res[shape_name] = {}

    shexps = shape.shapeExprs or []       

    for expr in shexps:
        shapes_res[shape_name] = load_expr(expr)

In [None]:
with open("shex_dump.json", "w") as sf:
    json.dump(shapes_res, sf, indent=2)

In [None]:
with open("look_table.json", "w") as sf:
    json.dump(lookup_table(), sf, indent=2)