## The parser class ##

In [1]:


from typing import Any
from SPARQL_parser import SPARQL
from SPARQLWrapper import SPARQLWrapper, JSON


list_of_prefix = """
PREFIX prop: <http://dbpedia.org/property/>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX dbp: <http://dbpedia.org/resource/>
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX res: <http://dbpedia.org/resource/>
PREFIX dbo: <http://dbpedia.org/ontology/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX db: <http://dbpedia.org/>
PREFIX yago: <http://dbpedia.org/class/yago/>
PREFIX onto: <http://dbpedia.org/ontology/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX dbr: <http://dbpedia.org/resource/>
PREFIX dbc: <http://dbpedia.org/resource/Category:>
PREFIX dbpedia2: <http://dbpedia.org/property/>
"""

#will change it to be received from the user
input_query = """
SELECT ?p ?place
WHERE {
   dbr:Albert_Einstein dbo:birthPlace ?place .
}
"""


parsed_query = SPARQL(input_query)


class Triple:  # chaneg name
    def __init__(self, sub, pre, obj):
        self.sub = sub
        self.pre = pre
        self.obj = obj
        self.approx = []

    def get_subject(self):

        return self.sub

    def get_predicate(self):
        return self.pre

    def get_object(self):
        return self.obj
    
    def get_approx(self):
        return self.approx
    
    def set_approx(self, approx):
        self.approx.append(approx)

    def update_approx(self, approx):
        self.approx = approx
    
    def clear_approx(self):
        self.approx = []



triples_to_query = []

for s in parsed_query.triple_list:
    triples_to_query.append(Triple(s.split(' ')[0], s.split(' ')[1], s.split(' ')[2]))
    triples_to_query[-1].set_approx(triples_to_query[-1])


# full_query = get_query()
# my_query(full_query)


## The query builder ##
    

In [3]:
# first the triples in the original query
    # then the triples that are in the approximations
# second triples in the original query and then 
    # again its approximations 


def get_query():
    cost = 0
    variables = parsed_query.former_template.strip(parsed_query.intent) + " "
    variables += node_forward(4,cost)

    sparql_query = f"{parsed_query.intent}{variables} where {{\n"
    for triple in triples_to_query[0].get_approx():
        sparql_query += f"    {triple.get_subject()} {triple.get_predicate()} {triple.get_object()} .\n"
    sparql_query += "}\n LIMIT 20"


    return list_of_prefix + sparql_query


# print(get_query())

# for t in triples_to_query:
#     print("sub " + t.get_subject())
#     print("pre " + t.get_predicate())
#     print("obj " + t.get_object())
#     print('------------------')

# get_query()

## The endpoint ## 

In [4]:

def my_query(query_to_execute): 
    sparql = SPARQLWrapper("https://api.triplydb.com/datasets/DBpedia-association/dbpedia/services/dbpedia/sparql")
    sparql.setQuery(query_to_execute)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()  
    
    index = results['head']['vars']
    
    result_list = []
    ind_reslut = []
    
    
    for y in index:
        for x1 in results['results']['bindings']:
            ind_reslut.append(x1[y]['value'])
        result_list.append(ind_reslut)
        ind_reslut = []
        
    wr_my_list(result_list)



## The returned result ##

In [5]:

def execute_query(query_to_execute): 
    sparql = SPARQLWrapper("https://api.triplydb.com/datasets/DBpedia-association/dbpedia/services/dbpedia/sparql")
    sparql.setQuery(query_to_execute)
    sparql.setReturnFormat(JSON)
    results = sparql.query().convert()  

    index = results['head']['vars']

    result_list = []
    ind_reslut = []


    for y in index:
        for x in results['results']['bindings']:
            ind_reslut.append(x[y]['value'])
        result_list.append(ind_reslut)
        ind_reslut = []

    print(result_list)






## Node forward (Left Insertion) 


In [10]:
node_forward = """
        
PREFIX dbp: <http://dbpedia.org/resource/>
PREFIX dbo: <http://dbpedia.org/ontology/>

SELECT ?p ?place ?x  ?y?a ?b
WHERE {
    dbp:Albert_Einstein ?p ?place .
  	?place ?x ?y.
  	?y ?a ?b.
}
LIMIT 10


"""


def left_insertion(cost):
    aprrox_t = triples_to_query[0].get_approx()
    aprrox_t[-1] = Triple(aprrox_t[-1].get_subject(), f"?p{cost}", f"?o{cost}")
    aprrox_t.append(Triple(aprrox_t[-1].get_object(), triples_to_query[0].get_predicate(), triples_to_query[0].get_object()))
    triples_to_query[0].update_approx(aprrox_t)
    
# 







## Node Backward (Right Insertion) 


In [9]:
node_backward = """

PREFIX dbr: <http://dbpedia.org/resource/>
PREFIX dbp: <http://dbpedia.org/resource/>
PREFIX dbo: <http://dbpedia.org/ontology/>


SELECT *
WHERE {
  ?a ?b ?z.
  ?z ?p ?x. #this should be replaced with the returned results ?
  ?x ?y dbp:Albert_Einstein.
    dbp:Albert_Einstein ?place dbr:German_Empire .
}

limit 20
"""

def right_insertion(cost):
    aprrox_t = triples_to_query[0].get_approx()
    aprrox_t[-1] = Triple(aprrox_t[-1].get_subject(), aprrox_t[-1].get_predicate(), f"?o{cost}")
    aprrox_t.append(Triple(aprrox_t[-1].get_object(), f"p{cost}", triples_to_query[0].get_object()))
    triples_to_query[0].update_approx(aprrox_t)
    # cost += 1
    # print_new_query(cost)


# node_backward(1)
# for t in triples_to_query[0].get_approx():
#     print("sub " + t.get_subject())
#     print("pre " + t.get_predicate())
#     print("obj " + t.get_object())
# print('------------------')

  

## Substitution ##

In [11]:
sub = """

PREFIX dbr: <http://dbpedia.org/resource/>
PREFIX dbp: <http://dbpedia.org/resource/>
PREFIX dbo: <http://dbpedia.org/ontology/>

#SELECT ?p ?place
#WHERE {
#    dbp:Albert_Einstein dbp:birthplace dbr:German_Empire .
#}
#
SELECT ?p ?place
WHERE {
    dbp:Albert_Einstein ?p ?place .
}
LIMIT 10
"""

def sub(cost):
    approx_t = triples_to_query[0].get_approx()
    approx_t[-1] = Triple(approx_t[-1].get_subject(), f"?p{cost}", approx_t[-1].get_object())
    triples_to_query[0].update_approx(approx_t)
    
# sub(1)
# for t in triples_to_query[0].get_approx():
#     print("sub " + t.get_subject())
#     print("pre " + t.get_predicate())
#     print("obj " + t.get_object())
# print('------------------')
# query_to_execute = sub

sub <http://dbpedia.org/resource/Albert_Einstein>
pre ?p1
obj ?place
------------------


## Printer of the list ##

In [None]:
# def print_my_list(l1list):
#     for l1 in zip(l1list):
#         for l2 in l1:
#             print(l2)
#         print()
#         # print(l1,l2)
#         print('------------------')

In [13]:
def wr_my_list(l1list):
    file_path = 'test.txt'
    with open(file_path, 'w') as file:
        
        for l1 in zip(*l1list):
            for l2 in l1:
                file.write(str(l2) + "   " )
            file.write('\n ------------------ \n')


## -------------------------------------------------------------------------------------------------------------------------------------- ##

## question ##
 1. every estimation we create two new variables , what do we do with them so if we start with og we keep going ?
 2. should the returned value from the approx be subed in the variables and only have one set of variables ?
  2. Like if we get 10 values from a sub should we place them in the estimation and then query a node forward/back/sub for each ?

In [None]:
def answer_convert(item_answer):
    if 'boolean' in item_answer.keys():
        at = 'boolean'
    else:
        at = item_answer['head']['vars'][0]
    answer = []
    if at == 'boolean':
        answer.append(item_answer['boolean'])
    else:
        for cand in item_answer['results']['bindings']:
            if at == 'date':
                answer.append(cand['date']['value'])
            elif at == 'number':
                answer.append(cand['c']['value'])
            elif at == 'resource' or at == 'uri':
                answer.append(cand['uri']['value'])
            elif at == 'string':
                answer.append(cand['string']['value'])
            elif at == 'callret-0':
                answer.append(cand['callret-0']['value'])
            else: 
                answer.append(cand[at]['value'])
    return answer


sparql = SPARQLWrapper(
    "https://api.triplydb.com/datasets/academy/pokemon/services/pokemon/sparql")
sparql.setQuery(sparql_query)
sparql.setReturnFormat(JSON)
results = sparql.query().convert()  
answer = answer_convert(results)

type(results.items())

print(results.keys())
print('boolean' in results.keys())


dict_keys(['head', 'results'])
False


In [11]:
left_insertion(1)
for t in triples_to_query[0].get_approx():
    print("sub " + t.get_subject())
    print("pre " + t.get_predicate())
    print("obj " + t.get_object())
print('------------------')

sub <http://dbpedia.org/resource/Albert_Einstein>
pre <http://dbpedia.org/ontology/birthPlace>
obj ?o1
sub ?o1
pre ?p1
obj ?o1
sub ?o1
pre <http://dbpedia.org/ontology/birthPlace>
obj ?place
------------------
