## Variables

### Import libraries

In [1]:
import os
import urllib.parse as up
from SPARQLWrapper import SPARQLWrapper, TURTLE
import uuid
import sys
import code.functions as fn

### Define global variables

In [None]:
ont_file = "address_ont.ttl"
mapping_file = "mapping.json"
export_file = "addresses-temp.ttl"
out_file = "addresses.ttl"

temp_folder = "tmp_files"
source_folder = "sources"
mapping_folder = "mappings"

if not os.path.exists(temp_folder):
    os.makedirs(temp_folder)

export_format = "TURTLE"

project_name = "voies_paris_hist" # This repository must exist in graphdb

ontorefine_url = "http://localhost:7333"
graphdb_url = "http://localhost:7200"
endpoint_url = "https://query.wikidata.org/sparql"

# ontorefine_cmd = "ontorefine-cli"
ontorefine_cmd = "/opt/ontotext-refine/lib/app/bin/ontorefine-cli"
# ontorefine_cmd = "/Applications/Ontotext\ Refine.app/Contents/app/bin/ontorefine-cli"

sources = [
    {"filename": "andriveau_1849_am.csv", "mapping_file":"mapping_andriveau_1849_am.json","graphname":"andriveau1849", "date":"1849-01-01"},
    {"filename": "atlas_municipal_1888_am.csv", "mapping_file":"mapping_atlas_municipal_1888_am.json","graphname":"atlasMunicipal1888", "date":"1888-01-01"},
    {"filename": "piquet_1826_am.csv", "mapping_file":"mapping_piquet_1826_am.json","graphname":"piquet1826", "date":"1826-01-01"},
    {"filename": "verniquet_1791_am.csv", "mapping_file":"mapping_verniquet_1791_am.json","graphname":"verniquet1791", "date":"1791-01-01"},
]

## Process to create / import data

### Queries to build graph from Wikidata

:warning: Impossible to chain queries for Wikidata, it has to be split

In [None]:
# Get streets of Paris, with label, altLabel and their locations
query1 = """
PREFIX addr: <http://rdf.geohistoricaldata.org/address#>

CONSTRUCT {
 ?street a addr:Landmark;
           addr:isLandmarkType addr:Thoroughfare;
           rdfs:label ?streetLabel;
           skos:altLabel ?streetAltLabel;
           addr:within ?loc.
}
WHERE {
  { ?street p:P361 [ps:P361 wd:Q16024163]. }
  UNION
  { ?street p:P361 [ps:P361 wd:Q107311481]. }
  ?street wdt:P131 ?loc.
  ?street rdfs:label ?streetLabel.
  FILTER (LANG(?streetLabel) = "fr")
  OPTIONAL {?street skos:altLabel ?streetAltLabel. FILTER (LANG(?streetAltLabel) = "fr")}
}
"""

# Get streets of Paris with the history of their official name
query2 = """
PREFIX addr: <http://rdf.geohistoricaldata.org/address#>

CONSTRUCT {
  ?street addr:hasOfficialName _:bn.
  _:bn rdfs:label ?officialName; addr:startDate ?startDate; addr:endDate ?endDate.
}
WHERE {
  { ?street p:P361 [ps:P361 wd:Q16024163]. }
  UNION
  { ?street p:P361 [ps:P361 wd:Q107311481]. }
    ?street p:P1448 ?officialNameSt. 
    ?officialNameSt ps:P1448 ?officialName.
    OPTIONAL{?officialNameSt pq:P580 ?startDate}
    OPTIONAL{?officialNameSt pq:P582 ?endDate}
    #FILTER (LANG(?officialName) = "fr")
}
"""

# Get related data of municipal arrondissements of Paris
query3 = """
PREFIX addr: <http://rdf.geohistoricaldata.org/address#>

CONSTRUCT {
  ?arrdt a addr:Landmark;
           addr:isLandmarkType addr:District;
           rdfs:label ?arrdtLabel;
           skos:altLabel ?arrdtAltLabel;
           addr:within wd:Q90;
           addr:startDate ?startDate.
}
WHERE {
  ?arrdt wdt:P31 wd:Q702842; p:P131 [ps:P131 wd:Q90]; rdfs:label ?arrdtLabel; skos:altLabel ?arrdtAltLabel.
  FILTER(LANG(?arrdtLabel) = "fr" && LANG(?arrdtAltLabel) = "fr")
  OPTIONAL {?arrdt wdt:P571 ?startDate}
}
"""

# Get related data of quartiers of Paris
query4 = """
PREFIX addr: <http://rdf.geohistoricaldata.org/address#>

CONSTRUCT {
  ?quartier a addr:Landmark;
           addr:isLandmarkType addr:District;
           rdfs:label ?quartierLabel;
           skos:altLabel ?quartierAltLabel;
           addr:within ?loc.
}
WHERE {
  ?quartier wdt:P31 wd:Q252916; rdfs:label ?quartierLabel; skos:altLabel ?quartierAltLabel; p:P131 [ps:P131 ?loc].
  FILTER(LANG(?quartierLabel) = "fr" && LANG(?quartierAltLabel) = "fr")
}
"""

# Get related data of Paris
query5 = """
PREFIX addr: <http://rdf.geohistoricaldata.org/address#>

CONSTRUCT {
  ?paris a addr:Landmark;
           addr:isLandmarkType addr:City;
           rdfs:label ?parisLabel;
           skos:altLabel ?parisAltLabel.
}
WHERE {
  BIND (wd:Q90 AS ?paris)
  ?paris rdfs:label ?parisLabel; skos:altLabel ?parisAltLabel.
  FILTER(LANG(?parisLabel) = "fr" && LANG(?parisAltLabel) = "fr")
}
"""

### Extract street data of Paris from Wikidata

In [None]:
abs_export_file = os.path.abspath(os.path.join(temp_folder, export_file))

In [None]:
g = fn.get_query_results(endpoint_url, query1)
g += fn.get_query_results(endpoint_url, query2)
g += fn.get_query_results(endpoint_url, query3)
g += fn.get_query_results(endpoint_url, query4)
g += fn.get_query_results(endpoint_url, query5)

g.serialize(destination=abs_export_file)

### Remove all data of repository whose id is `project_name`

In [None]:
cmd = f"curl -X DELETE -H \"Content-Type:application/x-turtle\" {graphdb_url}/repositories/{project_name}/statements"
os.system(cmd)

### Import ontology

In [None]:
abs_ont_file = os.path.abspath(ont_file)
cmd = f"curl -X POST -H \"Content-Type:application/x-turtle\" -T \"{abs_ont_file}\" {graphdb_url}/repositories/{project_name}/statements"

os.system(cmd)

### Import created graph from Wikidata

In [None]:
cmd = f"curl -X POST -H \"Content-Type:application/x-turtle\" -T \"{abs_export_file}\" {graphdb_url}/repositories/{project_name}/rdf-graphs/wikidata"
os.system(cmd)

### Import data created by Ontorefine from csv files in `sources_folder`

In [None]:
for source in sources:
    data_file = os.path.join(source_folder, source["filename"])
    abs_data_file = os.path.abspath(data_file)
    mapping_file = os.path.abspath(os.path.join(mapping_folder, source["mapping_file"]))
    graph_name = source["graphname"]
    export_file = os.path.join(temp_folder, source["filename"]).replace(".csv",".ttl")
    fn.get_export_file_from_ontorefine(ontorefine_cmd, ontorefine_url, project_name, abs_data_file, mapping_file, export_file)
    fn.import_ttl_file_in_graphdb(graphdb_url, project_name, export_file, graph_name)

### Get links between data extracted from wikidata and data from sources

In [None]:
query_template = """
PREFIX addr: <http://rdf.geohistoricaldata.org/address#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>

INSERT {{
    GRAPH <{graphdb_url}/repositories/{project_name}/rdf-graphs/{graph_name}> {{?streetG1 owl:sameAs ?streetG2.}}
}}
WHERE {{
    GRAPH <{graphdb_url}/repositories/{project_name}/rdf-graphs/{graph_name}> {{
        ?streetG1 addr:isLandmarkType addr:Thoroughfare ; rdfs:label ?streetNameG1.
        BIND (LCASE(?streetNameG1) AS ?lcStreetNameG1)
    }}
    GRAPH <{graphdb_url}/repositories/{project_name}/rdf-graphs/wikidata> {{
        ?streetG2 addr:isLandmarkType addr:Thoroughfare ; rdfs:label ?streetG2Name; addr:hasOfficialName ?offNameSt.
        ?offNameSt rdfs:label ?offName.
        OPTIONAL{{?offNameSt addr:startDate ?sd}}
        OPTIONAL{{?offNameSt addr:endDate ?ed}}
        BIND("{graph_date}"^^xsd:dateTime AS ?g1date)
        BIND(IF(BOUND(?sd), ?sd, "{neg_inf_date}"^^xsd:dateTime) AS ?startDate)
        BIND(IF(BOUND(?ed), ?ed, "{pos_inf_date}"^^xsd:dateTime) AS ?endDate)
        FILTER (?g1date >= ?startDate && ?g1date <= ?endDate)
        BIND (LCASE(?offName) AS ?lcStreetNameG2)
    }}
    FILTER (?lcStreetNameG1 = ?lcStreetNameG2)
}}
"""

### Get `owl:sameAs` links for each street of each source graph with street of Wikidata graph

In [None]:
graph_date = "1849-01-01"
pos_inf_date = "6000-01-01"
neg_inf_date = "-6000-01-01"

for source in sources:
    query = query_template.format(graph_name=source["graphname"], graph_date=source["date"],
                                  neg_inf_date=neg_inf_date, pos_inf_date=pos_inf_date,
                                  project_name=project_name, graphdb_url=graphdb_url)
    query_encoded = up.quote(query)
    cmd = f"curl -X POST -H \"Content-Type:application/x-www-form-urlencoded\" -d \"update={query_encoded}\" {graphdb_url}/repositories/{project_name}/statements"

    os.system(cmd)
