## Variables

In [None]:
import os
import urllib.parse as up

ont_file = "address_ont.ttl"
data_file = "addresses.csv"
query_file = "query.txt"
mapping_file = "mapping.json"
export_file = "addresses-temp.ttl"
out_file = "addresses.ttl"

out_folder = "out_files"
if not os.path.exists(out_folder):
    os.makedirs(out_folder)

export_format = "TURTLE"

abs_ont_file = os.path.abspath(ont_file)
abs_data_file = os.path.abspath(data_file)
abs_mapping_file = os.path.abspath(mapping_file)
abs_export_file = os.path.abspath(os.path.join(out_folder,export_file))
abs_out_file = os.path.abspath(out_file)

project_name = "ban_01343" # This repository must exist in graphdb

ontorefine_url = "http://localhost:7333"
graphdb_url = "http://localhost:7200"

# ontorefine_cmd = "ontorefine-cli"
ontorefine_cmd = "/opt/ontotext-refine/lib/app/bin/ontorefine-cli"
# ontorefine_cmd = "/Applications/Ontotext\ Refine.app/Contents/app/bin/ontorefine-cli"

## Build a graph from data file

### Mapping data file from mapping file

:warning: Launch Ontorefine before launching theses commands (so that ontorefine_url works)


In [None]:
cmd = f"{ontorefine_cmd} create \"{abs_data_file}\" -u \"{ontorefine_url}\" -n \"{project_name}\""

msg = os.popen(cmd)

# Get project_id from message given by CLI
project_id = msg.read().split(": ")[1].replace("\n", "")

In [None]:
cmd = f"{ontorefine_cmd} rdf \"{project_id}\" -u {ontorefine_url} -m \"{mapping_file}\""
out_content = os.popen(cmd)
out_file = open(abs_export_file, "w")
out_file.write(out_content.read())
out_file.close()

In [None]:
cmd = f"{ontorefine_cmd} delete \"{project_id}\" -u {ontorefine_url}"
os.system(cmd)

### Cleaning data to remove duplications

:warning: Launch GraphDB before launching theses commands (so that graphdb_url works). Repository whose id is `project_name` must exist (repository creation doesn't work here for the moment)


Ensure repository exists and remove all triples in it

In [None]:
cmd1 = f"curl -X PUT -H \"Content-Type:application/rdf+xml\" {graphdb_url}/repositories/{project_name}"
cmd2 = f"curl -X DELETE -H \"Content-Type:application/x-turtle\" {graphdb_url}/repositories/{project_name}/statements"

os.system(cmd1)
os.system(cmd2)

Import ontology and triples created by Ontorefine part

In [None]:
cmd1 = f"curl -X POST -H \"Content-Type:application/x-turtle\" -T \"{abs_export_file}\" {graphdb_url}/repositories/{project_name}/statements"
cmd2 = f"curl -X POST -H \"Content-Type:application/x-turtle\" -T \"{abs_ont_file}\" {graphdb_url}/repositories/{project_name}/statements"
os.system(cmd1)
os.system(cmd2)

Clean data by removing duplicates

In [None]:
fquery = open(query_file, "r")
query = fquery.read()
fquery.close()

query_encoded = up.quote(query)
cmd = f"curl -X POST -H \"Content-Type:application/x-www-form-urlencoded\" -d \"update={query_encoded}\" {graphdb_url}/repositories/{project_name}/statements"

os.system(cmd)

## Query part

Define function to execute a query and store out content in file

In [None]:
def execute_query(query, out_file_name, out_type="csv"):
    query_encoded = up.quote(query)
    accept_opt = ""
    if out_type == "json":
        accept_opt = "-H \"Accept: application/sparql-results+json\""
    cmd = f"curl -X POST -H \"Content-Type:application/x-www-form-urlencoded\" {accept_opt} -d \"query={query_encoded}\" {graphdb_url}/repositories/{project_name}"

    out = os.popen(cmd)
    file = open(out_file_name,"w")
    file.write(out.read())
    file.close()

Query to get addresses which target a housenumber within a defined bounding box

In [None]:
proj_uri = "http://www.opengis.net/def/crs/OGC/1.3/CRS84"
polygon_wkt = "POLYGON((4.957909552887547 46.2633019251873,4.965140788391698 46.2633019251873,4.965140788391698 46.258658479525806,4.957909552887547 46.258658479525806,4.957909552887547 46.2633019251873))"
out_file_name = "query1-out.csv"

query = f"""
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX addr: <http://rdf.geohistoricaldata.org/address#>
PREFIX gsp: <http://www.opengis.net/ont/geosparql#>
PREFIX gspf: <http://www.opengis.net/def/function/geosparql/>

select ?s ?nb ?streetName ?coords where {{
    ?s a addr:Address; addr:targets [rdfs:label ?nb;
                       addr:isPartOf [addr:isLandmarkType addr:Thoroughfare; rdfs:label ?streetName];
                                                                              gsp:asWKT ?coords].
        FILTER (
        gspf:sfWithin(
            ?coords,
            "<{proj_uri}> {polygon_wkt}"^^gsp:wktLiteral
        )
    )

}}
"""

execute_query(query, os.path.join(out_folder,out_file_name))

Query to get addresses along a street defined by its name.

In [None]:
street_name = "route de l'Amitié"
lang = "fr"
out_file_name = "query2-out.csv"

query = f"""
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX addr: <http://rdf.geohistoricaldata.org/address#>
PREFIX gsp: <http://www.opengis.net/ont/geosparql#>

SELECT ?item ?addressLabel ?coords WHERE {{
    ?item a addr:Address;
          rdfs:label ?addressLabel; 
          addr:targets [addr:isLandmarkType addr:HouseNumber;
          	addr:isPartOf [rdfs:label ?streetName];
            gsp:asWKT ?coords].
    FILTER(LCASE(STR(?streetName)) = \"{street_name.lower()}\" && LANG(?streetName) = \"{lang}\")
}}
"""

execute_query(query, os.path.join(out_folder,out_file_name))

Query to get coordinates of an address

In [None]:
address_name = "95 route de l'Amitié, 01380 Saint-Cyr-sur-Menthon"
lang = "fr"
out_file_name = "query3-out.csv"

query = f"""
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX addr: <http://rdf.geohistoricaldata.org/address#>
PREFIX gsp: <http://www.opengis.net/ont/geosparql#>

SELECT ?item ?addressLabel ?coords WHERE {{
    ?item a addr:Address;
          rdfs:label ?addressLabel; 
          addr:targets [gsp:asWKT ?coords].
    FILTER(LCASE(STR(?addressLabel)) = \"{address_name.lower()}\" && LANG(?addressLabel) = \"{lang}\")
}}
"""

execute_query(query, os.path.join(out_folder,out_file_name))