In [1]:
import os, subprocess, re

In [2]:
def runNSave(cmd, path, get_times=True):
    # result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    out, error = [ b.decode('UTF-8') for b in process.communicate() ]
    out = out.rstrip()
    # print("out:", out)
    # print("error:", error)
        
    with open(path, 'w') as fh:
        fh.write(out)
    
    if "** ERROR **" in error:
        print("ERROR:", error)
    
    elif get_times:
        netw_time = int(re.search("networking \d+ \[msec cputime\] (\d+) \[msec walltime\]", error).group(1))
        reas_time = int(re.search("reasoning \d+ \[msec cputime\] (\d+) \[msec walltime\]", error).group(1))
        return netw_time, reas_time

def record(times_file, query, data, type, phase, netw_time, reas_time):
    times_file.write(f"{query},{data},{type},{phase},{netw_time},{reas_time}\n")

## Convert SPARQL PP into N3 PP

In [None]:
# NOTE for now run with Python 3.11.5 (manually fixed issue with NegatedPropertySet & reverse paths)

from rdflib.plugins.sparql import parser
from convert_pp_sparql_n3 import To_N3_Visitor

In [None]:
# rdflib.plugins.sparql.parserutils.py#279
# add:
# elif isinstance(t,CompValue) or isinstance(t,URIRef):
#     res['part'] = t

### Convert

In [None]:
# convert single query

query = "SELECT ?x WHERE { ?x (:p1/:p2)* ?z ; !(:p3|:p4|:p5) ?a }"
query = "PREFIX : <http://example.org/gmark/> " + \
    "SELECT * WHERE { ?x0 !(^:p1|:p2) ?x3 } "
    # "SELECT * WHERE { ?x0 ((^:p1/:p2*)?/:p3)+ ?x3 } "
    # "SELECT * WHERE { ?x0 !(:p1|^:p2|:p3) ?x3 }"

query = parser.parseQuery(query)
query = query[1]

print(To_N3_Visitor().convert(query))

In [None]:
# convert query folder

import os

visitor = To_N3_Visitor()

path = "/Users/wvw/git/n3/sparql2n3/SPARQL-to-N3/gmark_50_new/mix"
files = list(os.listdir(path))
files.sort()
for file in files:
    if not file.endswith(".sparql"):
        continue
    print(file)
    with open(os.path.join(path, file), 'r') as fh:
        query = fh.read()
        query = parser.parseQuery(query)
        query = query[1]
        
        conv = visitor.convert(query)
        conv = "@prefix : <http://example.org/gmark/> .\n\n" + conv
        # print(conv)
        
        n3_file = file[0:file.index(".")] + ".n3"
        with open(os.path.join(path, "n3", n3_file), 'w') as fh2:
            fh2.write(conv)
            
        # print("\n")

## Run SPARQL PP

In [7]:
from resCSV2N3 import convert as csv2n3
# from rdflib import Graph

In [8]:
def exec_sparql(query_file, data_file, result_file):
    process = subprocess.Popen(['java', '-jar', "../test/run/sparql.jar", "-n3", data_file, "-query", query_file], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    out, error = [ b.decode('UTF-8') for b in process.communicate() ]
    # print(out); print(error)
    
    # g = Graph(); g.parse(data_file)
    # g.query(open(query_file, 'r').read())
    
    with open(result_file, 'w') as fh:
        fh.write(out.strip())

In [None]:
path = "/Users/wvw/git/n3/sparql2n3/SPARQL-to-N3/gmark_50_new/mix"
files = list(os.listdir(path))
files.sort()
for file in files:
    if not file.endswith(".sparql"):
        continue
    # print(file)
    
    name = file[:file.index(".")]
    query_file = os.path.join(path, file)
    data_file = os.path.join(path, "data.n3")
    result_file_csv = os.path.join(path, "results", f"{name}.csv")
    result_file_n3 = os.path.join(path, "results", f"{name}.n3")
    
    exec_sparql(query_file, data_file, result_file_csv)
    
    csv2n3(file=result_file_csv , ordered=False, out=result_file_n3)
        
    break

## Run N3 PP

### Ground & normalize

In [10]:
# normalize single file

path = "test"
or_file = os.path.join(path, "test2.n3")
norm_file = os.path.join(path, "test2_norm.n3")

# ground
runNSave(["eye", "--quiet", or_file, "--no-qvars", "--nope", "--pass-all"], norm_file, get_times=False)
    
# normalize
runNSave(["eye", "--quiet", norm_file, "aux_list.n3", "--query", "list-predicate.n3", "--quantify", "http://www.w3.org/2000/10/swap/var#", "--nope"], norm_file, get_times=False)

In [None]:
# normalize entire folder

# path = "/Users/wvw/git/n3/sparql2n3/SPARQL-to-N3/other_systems/gmark-dominik/50/"
path = "/Users/wvw/git/n3/sparql2n3/SPARQL-to-N3/gmark_50_new/mix/"
for file in os.listdir(path):    
    if not file.endswith(".sparql"):
        continue
    
    file = file[:file.index(".")] + ".n3"
    or_file = os.path.join(path, "n3", file)
    norm_file = os.path.join(path, "n3", "normalized", file)
    
    print(file)
    
    # ground
    runNSave(["eye", "--quiet", or_file, "--no-qvars", "--nope", "--pass-all"], norm_file, get_times=False)
    
    # normalize
    runNSave(["eye", "--quiet", norm_file, "aux_list.n3", "--query", "list-predicate.n3", "--quantify", "http://www.w3.org/2000/10/swap/var#", "--nope"], norm_file, get_times=False)

### Run

In [None]:
# run single rule

path = "test"
norm_file = os.path.join(path, "test2_norm.n3")
data_path = os.path.join(path, "data.n3")
res_file = os.path.join(path, "test2_rule_creation.n3")

# netw_time, reas_time = runNSave(["eye", data_path, "property-paths-direct.n3", "--query", norm_file, "--nope"], res_file)
netw_time, reas_time = runNSave(["eye", norm_file, "rule-creation.n3", "--query", "rule-creation.n3", "--nope"], res_file)
print(netw_time, reas_time)

13 145


In [None]:
# run folder

# path = "/Users/wvw/git/n3/sparql2n3/SPARQL-to-N3/other_systems/gmark-dominik/50/"
path = "/Users/wvw/git/n3/sparql2n3/SPARQL-to-N3/gmark_50_new/mix/"

times_file = open(os.path.join(path, "n3", "results", "times.csv"), 'w')
times_file.write("query,data,type,phase,netw_time,reas_time\n")

files = list(os.listdir(os.path.join(path, "n3", "normalized")))
files.sort()
for file in files:
    if not file.startswith("query") and not file.endswith(".n3"):
        continue
    
    name = file[:file.index(".")]
    norm_file = os.path.join(path, "n3", "normalized", file)
    data_file = "data.n3"
    data_path = os.path.join(path, data_file)
    dir_res_file = os.path.join(path, "n3", "results", "direct", file)
    bwd_res_file = os.path.join(path, "n3", "results", "bwd", file)
    fwd_res_file = os.path.join(path, "n3", "results", "fwd", file)
    
    print(norm_file)
    
    # - direct 
    netw_time, reas_time = runNSave(["eye", data_path, "property-paths-direct.n3", "--query", norm_file, "--nope"], dir_res_file)
    record(times_file, file, data_file, 'direct', 'n/a', netw_time, reas_time)
    
    # - backward
    tmp_file = os.path.join(path, "n3", "gen", f"{name}_bwd.n3")
    netw_time1, reas_time1 = runNSave(["eye", norm_file, "rule-creation.n3", "--query", "rule-creation-backwards.n3", "--nope"], tmp_file)
    record(times_file, file, data_file, 'bwd', 'create', netw_time1, reas_time1)
    netw_time2, reas_time2 = runNSave(["eye", data_path, tmp_file, "--query", norm_file, "--nope"], bwd_res_file)
    record(times_file, file, data_file, 'bwd', 'run', netw_time2, reas_time2)
    record(times_file, file, data_file, 'bwd', 'total', netw_time1+netw_time2, reas_time1+reas_time2)
        
    # - forward
    tmp_file = os.path.join(path, "n3", "gen", f"{name}_fwd.n3")
    netw_time1, reas_time1 = runNSave(["eye", norm_file, "rule-creation.n3", "--query", "rule-creation.n3", "--nope"], tmp_file)
    record(times_file, file, data_file, 'fwd', 'create', netw_time1, reas_time1)
    netw_time2, reas_time2 = runNSave(["eye", data_path, tmp_file, "--query", norm_file, "--nope"], fwd_res_file)
    record(times_file, file, data_file, 'fwd', 'run', netw_time2, reas_time2)
    record(times_file, file, data_file, 'fwd', 'total', netw_time1+netw_time2, reas_time1+reas_time2)
    
    times_file.flush()

## Compare results

In [24]:
def compare_results(file1, file2):
    process = subprocess.Popen(['java', '-jar', "compare_res.jar", file1, file2], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    out, error = [ b.decode('UTF-8') for b in process.communicate() ]
    print(out)

In [29]:
path1 = "/Users/wvw/git/n3/sparql2n3/SPARQL-to-N3/gmark_50_new/mix/results"
path2 = "/Users/wvw/git/n3/sparql2n3/SPARQL-to-N3/gmark_50_new/mix/n3/results/fwd"
files = list(os.listdir(path1))
files.sort()
for file in files:
    if not file.endswith(".n3"):
        continue
    file1 = os.path.join(path1, file)
    file2 = os.path.join(path2, file)
    if not os.path.isfile(file2):
        print("cannot find file2")
    else:
        print(file)
        compare_results(file1, file2)
        print()
    break

query-0.n3
results are the same!


