In [139]:
import rdflib
from rdflib.plugins import sparql
import re
import numpy as np 

In [140]:
query_text = """
PREFIX geo: <http://www.w3.org/2003/01/geo/wgs84_pos#>
PREFIX schema: <http://schema.org/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
SELECT ?person ?name WHERE {
  ?person rdf:type schema:Person ;
  		schema:height ?height;
    	schema:givenName ?name .
  filter(?name="bartek" &&(?height>170 || 180 > ?height))
} 
ORDER BY ?name ?height
"""
query  = sparql.processor.prepareQuery(query_text)

In [141]:
parse_results = sparql.parser.parseQuery(query_text)
algebra_result = sparql.algebra.translateQuery(parse_results)
sparql.algebra.pprintAlgebra(algebra_result)

SelectQuery(
    p = Project(
        p = OrderBy(
            p = Filter(
                expr = ConditionalAndExpression(
                    expr = RelationalExpression(
                        expr = name
                        op = =
                        other = bartek
                        _vars = set()
                        )
                    other = [ConditionalOrExpression_{'expr': RelationalExpression_{'expr': rdflib.term.Variable('height'), 'op': '>', 'other': rdflib.term.Literal('170', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer')), '_vars': set()}, 'other': [RelationalExpression_{'expr': rdflib.term.Literal('180', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer')), 'op': '>', 'other': rdflib.term.Variable('height'), '_vars': set()}], '_vars': set()}]
                    _vars = set()
                    )
                p = BGP(
                    triples = [(rdflib.term.Variable('person'), rdflib.term.URIRef('ht

In [142]:
algebra_dict = dict(algebra_result.algebra)

In [143]:
def get_literal(literal):
    type = literal.datatype
    if type is not None:
        return {'value':literal.toPython(),"type":literal.datatype.toPython()}
    else:
        return {'value':literal.toPython(),"type":literal.datatype}

In [144]:
def get_variable(variable):
    return variable.toPython()

In [145]:
def parse_filterings(expression):
    if expression.name == 'RelationalExpression':
        if isinstance(expression['other'], rdflib.term.Literal):
            other = get_literal(expression['other'])
        else:
            other = get_variable(expression['other'])
        if isinstance(expression['expr'], rdflib.term.Literal):
            expr = get_literal(expression['expr'])
        else:
            expr = get_variable(expression['expr'])
        return {
            'op': expression['op'],
            'expr': expr,
            'other': other,
        }
    elif expression.name == 'ConditionalAndExpression':
        parsed_expression = {
            'logic' : 'and',
            'expr': parse_filterings(expression['expr']),
            'other': parse_filterings(expression['other'][0])
        }
        return parsed_expression
    elif expression.name == 'ConditionalOrExpression':
        parsed_expression = {
            'logic' : 'or',
            'expr': parse_filterings(expression['expr']),
            'other': parse_filterings(expression['other'][0])
        }
        return parsed_expression
    else:
        return None 

In [146]:
def parse_triple(triple):
    return {
        'subject': triple[0].toPython(),
        'predicate': triple[1].toPython(),
        'object': triple[2].toPython()
    }

In [147]:
def convert_to_query_structure(input_dict):
    query_dict = {
    }
    tmp_dict = input_dict['p']
    project = tmp_dict['PV']
    query_dict['project'] = [p.toPython() for p in project]
    
    try:
        order_condition = tmp_dict['p']['expr']
        query_dict['order'] = []
        for condition in order_condition:
            query_dict['order'].append(condition['expr'].toPython())
        tmp_dict = tmp_dict['p']
    except KeyError:
        pass
    
    try:
        filter_condition = tmp_dict['p']['expr']
        query_dict['filter'] = parse_filterings(filter_condition)
        tmp_dict = tmp_dict['p']
    except KeyError:
        pass
    
    try:
        bgp_triples = tmp_dict['p']['triples']
        query_dict['bgp'] = []
        for triple in bgp_triples:
            query_dict['bgp'].append(parse_triple(triple))
    except KeyError:
        pass

    return query_dict


In [148]:
convert_to_query_structure(algebra_dict)

{'project': ['?person', '?name'],
 'order': ['?name', '?height'],
 'filter': {'logic': 'and',
  'expr': {'op': '=',
   'expr': '?name',
   'other': {'value': 'bartek', 'type': None}},
  'other': {'logic': 'or',
   'expr': {'op': '>',
    'expr': '?height',
    'other': {'value': 170,
     'type': 'http://www.w3.org/2001/XMLSchema#integer'}},
   'other': {'op': '>',
    'expr': {'value': 180, 'type': 'http://www.w3.org/2001/XMLSchema#integer'},
    'other': '?height'}}},
 'bgp': [{'subject': '?person',
   'predicate': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type',
   'object': 'http://schema.org/Person'},
  {'subject': '?person',
   'predicate': 'http://schema.org/givenName',
   'object': '?name'},
  {'subject': '?person',
   'predicate': 'http://schema.org/height',
   'object': '?height'}]}

In [149]:
def parse_prefixes(prefix_string):
    prefixes = {}
    pattern = re.compile(r'PREFIX\s+(\w+):\s*<(.+?)>')
    matches = pattern.findall(prefix_string)
    for match in matches:
        prefix, uri = match
        prefixes[prefix] = uri
    return prefixes

In [150]:
parse_prefixes(query_text)

{'geo': 'http://www.w3.org/2003/01/geo/wgs84_pos#',
 'schema': 'http://schema.org/',
 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'}

In [151]:
def convert_query_text(query_text):
    query_dict = {
        'prefixes': {},
        'query': {}
    }
    parse_results = sparql.parser.parseQuery(query_text)
    algebra_result = sparql.algebra.translateQuery(parse_results)
    algebra_dict = dict(algebra_result.algebra)
    query_dict['prefixes'] = parse_prefixes(query_text)
    query_dict['query'] = convert_to_query_structure(algebra_dict)
    return query_dict   

In [152]:
prefix_string = """
PREFIX geo: <http://www.w3.org/2003/01/geo/wgs84_pos#>
PREFIX schema: <http://schema.org/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
"""

In [153]:
convert_query_text(query_text)

{'prefixes': {'geo': 'http://www.w3.org/2003/01/geo/wgs84_pos#',
  'schema': 'http://schema.org/',
  'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
 'query': {'project': ['?person', '?name'],
  'order': ['?name', '?height'],
  'filter': {'logic': 'and',
   'expr': {'op': '=',
    'expr': '?name',
    'other': {'value': 'bartek', 'type': None}},
   'other': {'logic': 'or',
    'expr': {'op': '>',
     'expr': '?height',
     'other': {'value': 170,
      'type': 'http://www.w3.org/2001/XMLSchema#integer'}},
    'other': {'op': '>',
     'expr': {'value': 180,
      'type': 'http://www.w3.org/2001/XMLSchema#integer'},
     'other': '?height'}}},
  'bgp': [{'subject': '?person',
    'predicate': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type',
    'object': 'http://schema.org/Person'},
   {'subject': '?person',
    'predicate': 'http://schema.org/givenName',
    'object': '?name'},
   {'subject': '?person',
    'predicate': 'http://schema.org/height',
    'object': '?height'}]}}