In [1]:
import rdflib
from rdflib.plugins import sparql
import re
import numpy as np 

In [2]:
query_text = """
PREFIX geo: <http://www.w3.org/2003/01/geo/wgs84_pos#>
PREFIX schema: <http://schema.org/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
SELECT ?person ?name WHERE {
  ?person rdf:type schema:Person ;
  		schema:height ?height;
    	schema:givenName ?name .
  filter(?name="bartek" &&(?height>170 || 180 > ?height))
} 
ORDER BY ?name ?height
"""
query  = sparql.processor.prepareQuery(query_text)

In [3]:
parse_results = sparql.parser.parseQuery(query_text)
algebra_result = sparql.algebra.translateQuery(parse_results)
sparql.algebra.pprintAlgebra(algebra_result)

SelectQuery(
    p = Project(
        p = OrderBy(
            p = Filter(
                expr = ConditionalAndExpression(
                    expr = RelationalExpression(
                        expr = name
                        op = =
                        other = bartek
                        _vars = set()
                        )
                    other = [ConditionalOrExpression_{'expr': RelationalExpression_{'expr': rdflib.term.Variable('height'), 'op': '>', 'other': rdflib.term.Literal('170', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer')), '_vars': set()}, 'other': [RelationalExpression_{'expr': rdflib.term.Literal('180', datatype=rdflib.term.URIRef('http://www.w3.org/2001/XMLSchema#integer')), 'op': '>', 'other': rdflib.term.Variable('height'), '_vars': set()}], '_vars': set()}]
                    _vars = set()
                    )
                p = BGP(
                    triples = [(rdflib.term.Variable('person'), rdflib.term.URIRef('ht

In [4]:
algebra_dict = dict(algebra_result.algebra)

In [5]:
def get_literal(literal):
    type = literal.datatype
    if type is not None:
        return {'value':literal.toPython(),"type":literal.datatype.toPython()}
    else:
        return {'value':literal.toPython(),"type":literal.datatype}

In [6]:
def get_variable(variable):
    return variable.toPython()

In [7]:
def parse_filterings(expression):
    if expression.name == 'RelationalExpression':
        if isinstance(expression['other'], rdflib.term.Literal):
            other = get_literal(expression['other'])
        else:
            other = get_variable(expression['other'])
        if isinstance(expression['expr'], rdflib.term.Literal):
            expr = get_literal(expression['expr'])
        else:
            expr = get_variable(expression['expr'])
        return {
            'op': expression['op'],
            'expr': expr,
            'other': other,
        }
    elif expression.name == 'ConditionalAndExpression':
        parsed_expression = {
            'logic' : 'and',
            'expr': parse_filterings(expression['expr']),
            'other': parse_filterings(expression['other'][0])
        }
        return parsed_expression
    elif expression.name == 'ConditionalOrExpression':
        parsed_expression = {
            'logic' : 'or',
            'expr': parse_filterings(expression['expr']),
            'other': parse_filterings(expression['other'][0])
        }
        return parsed_expression
    else:
        return None 

In [8]:
def parse_triple(triple):
    return {
        'subject': triple[0].toPython(),
        'predicate': triple[1].toPython(),
        'object': triple[2].toPython()
    }

In [9]:
def convert_to_query_structure(input_dict):
    query_dict = {
    }
    tmp_dict = input_dict['p']
    project = tmp_dict['PV']
    query_dict['project'] = [p.toPython() for p in project]
    
    try:
        order_condition = tmp_dict['p']['expr']
        query_dict['order'] = []
        for condition in order_condition:
            query_dict['order'].append(condition['expr'].toPython())
        tmp_dict = tmp_dict['p']
    except KeyError:
        pass
    
    try:
        filter_condition = tmp_dict['p']['expr']
        query_dict['filter'] = parse_filterings(filter_condition)
        tmp_dict = tmp_dict['p']
    except KeyError:
        pass
    
    try:
        bgp_triples = tmp_dict['p']['triples']
        query_dict['bgp'] = []
        for triple in bgp_triples:
            query_dict['bgp'].append(parse_triple(triple))
    except KeyError:
        pass

    return query_dict


In [10]:
convert_to_query_structure(algebra_dict)

{'project': ['?person', '?name'],
 'order': ['?name', '?height'],
 'filter': {'logic': 'and',
  'expr': {'op': '=',
   'expr': '?name',
   'other': {'value': 'bartek', 'type': None}},
  'other': {'logic': 'or',
   'expr': {'op': '>',
    'expr': '?height',
    'other': {'value': 170,
     'type': 'http://www.w3.org/2001/XMLSchema#integer'}},
   'other': {'op': '>',
    'expr': {'value': 180, 'type': 'http://www.w3.org/2001/XMLSchema#integer'},
    'other': '?height'}}},
 'bgp': [{'subject': '?person',
   'predicate': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type',
   'object': 'http://schema.org/Person'},
  {'subject': '?person',
   'predicate': 'http://schema.org/givenName',
   'object': '?name'},
  {'subject': '?person',
   'predicate': 'http://schema.org/height',
   'object': '?height'}]}

In [11]:
def parse_prefixes(prefix_string):
    prefixes = {}
    pattern = re.compile(r'PREFIX\s+(\w+):\s*<(.+?)>')
    matches = pattern.findall(prefix_string)
    for match in matches:
        prefix, uri = match
        prefixes[prefix] = uri
    return prefixes

In [12]:
parse_prefixes(query_text)

{'geo': 'http://www.w3.org/2003/01/geo/wgs84_pos#',
 'schema': 'http://schema.org/',
 'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'}

In [13]:
def convert_query_text(query_text):
    query_dict = {
        'prefixes': {},
        'query': {}
    }
    parse_results = sparql.parser.parseQuery(query_text)
    algebra_result = sparql.algebra.translateQuery(parse_results)
    algebra_dict = dict(algebra_result.algebra)
    query_dict['prefixes'] = parse_prefixes(query_text)
    query_dict['query'] = convert_to_query_structure(algebra_dict)
    return query_dict   

In [14]:
prefix_string = """
PREFIX geo: <http://www.w3.org/2003/01/geo/wgs84_pos#>
PREFIX schema: <http://schema.org/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
"""

In [15]:
query_dict = convert_query_text(query_text)
query_dict

{'prefixes': {'geo': 'http://www.w3.org/2003/01/geo/wgs84_pos#',
  'schema': 'http://schema.org/',
  'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
 'query': {'project': ['?person', '?name'],
  'order': ['?name', '?height'],
  'filter': {'logic': 'and',
   'expr': {'op': '=',
    'expr': '?name',
    'other': {'value': 'bartek', 'type': None}},
   'other': {'logic': 'or',
    'expr': {'op': '>',
     'expr': '?height',
     'other': {'value': 170,
      'type': 'http://www.w3.org/2001/XMLSchema#integer'}},
    'other': {'op': '>',
     'expr': {'value': 180,
      'type': 'http://www.w3.org/2001/XMLSchema#integer'},
     'other': '?height'}}},
  'bgp': [{'subject': '?person',
    'predicate': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type',
    'object': 'http://schema.org/Person'},
   {'subject': '?person',
    'predicate': 'http://schema.org/givenName',
    'object': '?name'},
   {'subject': '?person',
    'predicate': 'http://schema.org/height',
    'object': '?height'}]}}

#  Comparing Trees 

In [16]:
from zss.simple_tree import Node
from zss.compare import simple_distance

A = (
    Node("f")
        .addkid(Node("a")
            .addkid(Node("h"))
            .addkid(Node("c")
                .addkid(Node("l"))))
        .addkid(Node("e"))
    )
B = (
    Node("f")
        .addkid(Node("a")
            .addkid(Node("d"))
            .addkid(Node("c")
                .addkid(Node("b"))))
        .addkid(Node("e"))
    )
simple_distance(A, B)

2.0

In [17]:
A.print_tree()

└──f
   ├──a
   │  ├──h
   │  └──c
   │     └──l
   └──e


In [32]:
query_text = """
PREFIX geo: <http://www.w3.org/2003/01/geo/wgs84_pos#>
PREFIX schema: <http://schema.org/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
SELECT ?person ?name WHERE {
  ?person rdf:type schema:Person ;
  		schema:height ?height;
    	schema:givenName ?name .
  filter(?name="bartek" &&(?height>170 || 180 > ?height))
} 
ORDER BY ?name ?height
"""
query_text2 = """
PREFIX geo: <http://www.w3.org/2003/01/geo/wgs84_pos#>
PREFIX schema: <http://schema.org/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
SELECT ?person ?name WHERE {
  ?person rdf:type schema:Person ;
  		schema:height ?height;
    	schema:givenName ?name .
  filter(?name="bartek")
  filter(?height>170 || 180 > ?height)
} 
ORDER BY ?name ?height
"""

query_text3 = """
PREFIX geo: <http://www.w3.org/2003/01/geo/wgs84_pos#>
PREFIX schema: <http://schema.org/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
SELECT ?person ?name WHERE {
  ?person rdf:type schema:Person ;
  		schema:height ?height;
    	schema:givenName ?name .
  filter(?name="bartek")
  filter(180 > ?height || ?height>170)
} 
ORDER BY ?name ?height
"""

query_test = """
PREFIX schema: <http://schema.org/>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
SELECT ?person ?name WHERE {
  ?person rdf:type schema:Person ;
  		schema:height ?height;
    	schema:givenName ?name .
  filter(?height>170)
} 
ORDER BY ?name ?height
"""
query_dict = convert_query_text(query_text)
query_dict2 = convert_query_text(query_text2)
query_dict3 = convert_query_text(query_text3)
query_dict_test = convert_query_text(query_test)

In [19]:
f

{'prefixes': {'geo': 'http://www.w3.org/2003/01/geo/wgs84_pos#',
  'schema': 'http://schema.org/',
  'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
 'query': {'project': ['?person', '?name'],
  'order': ['?name', '?height'],
  'filter': {'logic': 'and',
   'expr': {'op': '=',
    'expr': '?name',
    'other': {'value': 'bartek', 'type': None}},
   'other': {'logic': 'or',
    'expr': {'op': '>',
     'expr': {'value': 180,
      'type': 'http://www.w3.org/2001/XMLSchema#integer'},
     'other': '?height'},
    'other': {'op': '>',
     'expr': '?height',
     'other': {'value': 170,
      'type': 'http://www.w3.org/2001/XMLSchema#integer'}}}},
  'bgp': [{'subject': '?person',
    'predicate': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type',
    'object': 'http://schema.org/Person'},
   {'subject': '?person',
    'predicate': 'http://schema.org/givenName',
    'object': '?name'},
   {'subject': '?person',
    'predicate': 'http://schema.org/height',
    'object': '?height'}]}}

In [20]:
query_dict

{'prefixes': {'geo': 'http://www.w3.org/2003/01/geo/wgs84_pos#',
  'schema': 'http://schema.org/',
  'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'},
 'query': {'project': ['?person', '?name'],
  'order': ['?name', '?height'],
  'filter': {'logic': 'and',
   'expr': {'op': '=',
    'expr': '?name',
    'other': {'value': 'bartek', 'type': None}},
   'other': {'logic': 'or',
    'expr': {'op': '>',
     'expr': '?height',
     'other': {'value': 170,
      'type': 'http://www.w3.org/2001/XMLSchema#integer'}},
    'other': {'op': '>',
     'expr': {'value': 180,
      'type': 'http://www.w3.org/2001/XMLSchema#integer'},
     'other': '?height'}}},
  'bgp': [{'subject': '?person',
    'predicate': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type',
    'object': 'http://schema.org/Person'},
   {'subject': '?person',
    'predicate': 'http://schema.org/givenName',
    'object': '?name'},
   {'subject': '?person',
    'predicate': 'http://schema.org/height',
    'object': '?height'}]}}

In [35]:
def dict2tree(dictionary, node = Node("root")):
    for key, value in dictionary.items():
        if isinstance(value, dict):
            child = dict2tree(value, Node(key))
            node.addkid(child)
        else:
            if isinstance(value, list):
                list_node = Node(key)
                for item in value:
                    list_node.addkid(Node(item))
                node.addkid(list_node)
            else:
                node.addkid(Node(key).addkid(Node(value)))
    return node

# test_dict = {"query": query_dict}
test_dict = query_dict
node1 = Node("root1")
node2 = Node("root2")
node3 = Node("root3")
res = dict2tree(query_dict, node1)
res2 = dict2tree(query_dict2, node2)
res3 = dict2tree(query_dict3, node3)

query_dict_test['query'].pop('bgp')
res_test = dict2tree(query_dict_test, Node("root_test"))


In [36]:
res_test.print_tree()

└──root_test
   ├──prefixes
   │  ├──schema
   │  │  └──http://schema.org/
   │  └──rdf
   │     └──http://www.w3.org/1999/02/22-rdf-syntax-ns#
   └──query
      ├──project
      │  ├──?person
      │  └──?name
      ├──order
      │  ├──?name
      │  └──?height
      └──filter
         ├──op
         │  └──>
         ├──expr
         │  └──?height
         └──other
            ├──value
            │  └──170
            └──type
               └──http://www.w3.org/2001/XMLSchema#integer


In [30]:
res2.print_tree()

└──root2
   ├──prefixes
   │  ├──geo
   │  │  └──http://www.w3.org/2003/01/geo/wgs84_pos#
   │  ├──schema
   │  │  └──http://schema.org/
   │  └──rdf
   │     └──http://www.w3.org/1999/02/22-rdf-syntax-ns#
   └──query
      ├──project
      │  ├──?person
      │  └──?name
      ├──order
      │  ├──?name
      │  └──?height
      ├──filter
      │  ├──logic
      │  │  └──and
      │  ├──expr
      │  │  ├──op
      │  │  │  └──=
      │  │  ├──expr
      │  │  │  └──?name
      │  │  └──other
      │  │     ├──value
      │  │     │  └──bartek
      │  │     └──type
      │  │        └──None
      │  └──other
      │     ├──logic
      │     │  └──or
      │     ├──expr
      │     │  ├──op
      │     │  │  └──>
      │     │  ├──expr
      │     │  │  └──?height
      │     │  └──other
      │     │     ├──value
      │     │     │  └──170
      │     │     └──type
      │     │        └──http://www.w3.org/2001/XMLSchema#integer
      │     └──other
      │        ├──op
      │     

In [31]:
res3.print_tree()

└──root3
   ├──prefixes
   │  ├──geo
   │  │  └──http://www.w3.org/2003/01/geo/wgs84_pos#
   │  ├──schema
   │  │  └──http://schema.org/
   │  └──rdf
   │     └──http://www.w3.org/1999/02/22-rdf-syntax-ns#
   └──query
      ├──project
      │  ├──?person
      │  └──?name
      ├──order
      │  ├──?name
      │  └──?height
      ├──filter
      │  ├──logic
      │  │  └──and
      │  ├──expr
      │  │  ├──op
      │  │  │  └──=
      │  │  ├──expr
      │  │  │  └──?name
      │  │  └──other
      │  │     ├──value
      │  │     │  └──bartek
      │  │     └──type
      │  │        └──None
      │  └──other
      │     ├──logic
      │     │  └──or
      │     ├──expr
      │     │  ├──op
      │     │  │  └──>
      │     │  ├──expr
      │     │  │  ├──value
      │     │  │  │  └──180
      │     │  │  └──type
      │     │  │     └──http://www.w3.org/2001/XMLSchema#integer
      │     │  └──other
      │     │     └──?height
      │     └──other
      │        ├──op
      │     

In [107]:
simple_distance(res3, res2)

13.0

In [27]:
print(res3)

'root3'
	'prefixes'
		'geo'
			'http://www.w3.org/2003/01/geo/wgs84_pos#'
		'schema'
			'http://schema.org/'
		'rdf'
			'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
	'query'
		'project'
			'?person'
			'?name'
		'order'
			'?name'
			'?height'
		'filter'
			'logic'
				'and'
			'expr'
				'op'
					'='
				'expr'
					'?name'
				'other'
					'value'
						'bartek'
					'type'
						None
			'other'
				'logic'
					'or'
				'expr'
					'op'
						'>'
					'expr'
						'value'
							180
						'type'
							'http://www.w3.org/2001/XMLSchema#integer'
					'other'
						'?height'
				'other'
					'op'
						'>'
					'expr'
						'?height'
					'other'
						'value'
							170
						'type'
							'http://www.w3.org/2001/XMLSchema#integer'
		'bgp'
			{'subject': '?person', 'predicate': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type', 'object': 'http://schema.org/Person'}
			{'subject': '?person', 'predicate': 'http://schema.org/givenName', 'object': '?name'}
			{'subject': '?person', 'pr

In [109]:
print(res2)

2:root2
3:prefixes
1:geo
0:http://www.w3.org/2003/01/geo/wgs84_pos#
1:schema
0:http://schema.org/
1:rdf
0:http://www.w3.org/1999/02/22-rdf-syntax-ns#
4:query
2:project
0:?person
0:?name
2:order
0:?name
0:?height
3:filter
1:logic
0:and
3:expr
1:op
0:=
1:expr
0:?name
2:other
1:value
0:bartek
1:type
0:None
3:other
1:logic
0:or
3:expr
1:op
0:>
1:expr
0:?height
2:other
1:value
0:170
1:type
0:http://www.w3.org/2001/XMLSchema#integer
3:other
1:op
0:>
2:expr
1:value
0:180
1:type
0:http://www.w3.org/2001/XMLSchema#integer
1:other
0:?height
3:bgp
0:{'subject': '?person', 'predicate': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type', 'object': 'http://schema.org/Person'}
0:{'subject': '?person', 'predicate': 'http://schema.org/givenName', 'object': '?name'}
0:{'subject': '?person', 'predicate': 'http://schema.org/height', 'object': '?height'}
