## Variables

### Import libraries

In [9]:
import os
import urllib.parse as up
from rdflib import Graph
import code.functions as fn

### Define global variables

In [2]:
ont_file = "address_ont.ttl"
mapping_file = "mapping.json"
export_file = "addresses-temp.ttl"
out_file = "addresses.ttl"

temp_folder = "tmp_files"
source_folder = "sources"
mapping_folder = "mappings"

export_format = "TURTLE"

project_name = "voies_paris_hist_2" # This repository must exist in graphdb

ontorefine_url = "http://localhost:7333"
graphdb_url = "http://localhost:7200"

# ontorefine_cmd = "ontorefine-cli"
ontorefine_cmd = "/opt/ontotext-refine/lib/app/bin/ontorefine-cli"
# ontorefine_cmd = "/Applications/Ontotext\ Refine.app/Contents/app/bin/ontorefine-cli"

sources = [
    {"filename": "andriveau_1849_am.csv", "mapping_file":"mapping_andriveau_1849_am.json","graphname":"andriveau1849", "date":"1849-01-01"},
    {"filename": "atlas_municipal_1888_am.csv", "mapping_file":"mapping_atlas_municipal_1888_am.json","graphname":"atlasMunicipal1888", "date":"1888-01-01"},
    {"filename": "piquet_1826_am.csv", "mapping_file":"mapping_piquet_1826_am.json","graphname":"piquet1826", "date":"1826-01-01"},
    {"filename": "verniquet_1791_am.csv", "mapping_file":"mapping_verniquet_1791_am.json","graphname":"verniquet1791", "date":"1791-01-01"},
]

local_config_file_name = "config_repo.ttl"

### Process variables

In [3]:
temp_folder = os.path.abspath(temp_folder)
source_folder = os.path.abspath(source_folder)
mapping_folder = os.path.abspath(mapping_folder)
fn.create_folder_if_not_exists(temp_folder)

local_config_file_name = os.path.join(temp_folder, local_config_file_name)

## Creation of local repository

In [4]:
fn.create_config_local_repository_file(local_config_file_name, project_name)
curl_cmd_local = f"curl -X POST --header \"Content-Type:multipart/form-data\" -F \"config=@{local_config_file_name}\" \"{graphdb_url}/rest/repositories\""
os.system(curl_cmd_local)

{"message":"Repository voies_paris_hist_2 already exists."}

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  1653    0    59  100  1594   1174  31725 --:--:-- --:--:-- --:--:-- 40317


0

## Process to create / import data

### Queries to build graph from Wikidata

:warning: Impossible to chain queries for Wikidata, it has to be split

In [5]:
# Get streets of Paris, with label, altLabel and their locations
query1 = """
PREFIX addr: <http://rdf.geohistoricaldata.org/address#>

CONSTRUCT {
 ?street a addr:Landmark;
           addr:isLandmarkType addr:Thoroughfare;
           rdfs:label ?streetLabel;
           skos:altLabel ?streetAltLabel;
           addr:within ?loc.
}
WHERE {
  { ?street p:P361 [ps:P361 wd:Q16024163]. }
  UNION
  { ?street p:P361 [ps:P361 wd:Q107311481]. }
  ?street wdt:P131 ?loc.
  ?street rdfs:label ?streetLabel.
  FILTER (LANG(?streetLabel) = "fr")
  OPTIONAL {?street skos:altLabel ?streetAltLabel. FILTER (LANG(?streetAltLabel) = "fr")}
}
"""

# Get official name of streets and its history
query2 = """
PREFIX addr: <http://rdf.geohistoricaldata.org/address#>

CONSTRUCT {
  ?street addr:hasAttribute ?officialNameSt.
  ?officialNameSt a addr:Attribute; addr:isAttributeType addr:Name; addr:value ?officialName.
  ?eventCreation a addr:Event; addr:isEventType addr:Creation; addr:after ?officialNameSt; addr:eventTimeValue ?startDateValue; addr:eventTimePrecision ?startDatePrec.
  ?eventDissolution a addr:Event; addr:isEventType addr:Dissolution; addr:before ?officialNameSt; addr:eventTimeValue ?endDateValue; addr:eventTimePrecision ?endDatePrec.
}
WHERE {
  { ?street p:P361 [ps:P361 wd:Q16024163]. }
  UNION
  { ?street p:P361 [ps:P361 wd:Q107311481]. }
    ?street p:P1448 ?officialNameSt. 
    ?officialNameSt ps:P1448 ?officialName.
    OPTIONAL{?officialNameSt pqv:P580 [wikibase:timeValue ?startDateValue; wikibase:timePrecision ?startDatePrec]}
    OPTIONAL{?officialNameSt pqv:P582 [wikibase:timeValue ?endDateValue; wikibase:timePrecision ?endDatePrec]}
    #FILTER (LANG(?officialName) = "fr")
  BIND(URI(CONCAT("http://rdf.geohistoricaldata.org/address#", STRUUID())) AS ?eventCreation)
  BIND(URI(CONCAT("http://rdf.geohistoricaldata.org/address#", STRUUID())) AS ?eventDissolution)
}
"""

# Get related data of municipal arrondissements of Paris
query3 = """
PREFIX addr: <http://rdf.geohistoricaldata.org/address#>

CONSTRUCT {
  ?arrdt a addr:Landmark;
           addr:isLandmarkType addr:District;
           rdfs:label ?arrdtLabel;
           skos:altLabel ?arrdtAltLabel;
           addr:within wd:Q90;
           addr:startDate ?startDate.
}
WHERE {
  ?arrdt wdt:P31 wd:Q702842; p:P131 [ps:P131 wd:Q90]; rdfs:label ?arrdtLabel; skos:altLabel ?arrdtAltLabel.
  FILTER(LANG(?arrdtLabel) = "fr" && LANG(?arrdtAltLabel) = "fr")
  OPTIONAL {?arrdt wdt:P571 ?startDate}
}
"""

# Get related data of quartiers of Paris
query4 = """
PREFIX addr: <http://rdf.geohistoricaldata.org/address#>

CONSTRUCT {
  ?quartier a addr:Landmark;
           addr:isLandmarkType addr:District;
           rdfs:label ?quartierLabel;
           skos:altLabel ?quartierAltLabel;
           addr:within ?loc.
}
WHERE {
  ?quartier wdt:P31 wd:Q252916; rdfs:label ?quartierLabel; skos:altLabel ?quartierAltLabel; p:P131 [ps:P131 ?loc].
  FILTER(LANG(?quartierLabel) = "fr" && LANG(?quartierAltLabel) = "fr")
}
"""

# Get related data of Paris
query5 = """
PREFIX addr: <http://rdf.geohistoricaldata.org/address#>

CONSTRUCT {
  ?paris a addr:Landmark;
           addr:isLandmarkType addr:City;
           rdfs:label ?parisLabel;
           skos:altLabel ?parisAltLabel.
}
WHERE {
  BIND (wd:Q90 AS ?paris)
  ?paris rdfs:label ?parisLabel; skos:altLabel ?parisAltLabel.
  FILTER(LANG(?parisLabel) = "fr" && LANG(?parisAltLabel) = "fr")
}
"""

wiki_queries = [query1, query2, query3, query4, query5]

In [6]:
# Get streets of Paris with the history of their official name
loc_query_1 = """
PREFIX addr: <http://rdf.geohistoricaldata.org/address#>
PREFIX ofn: <http://www.ontotext.com/sparql/functions/>
INSERT {
    ?changeNameEvent a addr:Event;
                     addr:isEventType addr:ChangeName;
                     addr:before ?eventBeforeAttr1;
                     addr:after ?eventAfterAttr2;
                     addr:eventTimeValue ?eventAfterAttr2TimeVal;
                     addr:eventTimePrecision ?eventAfterAttr2TimePre.                                                                  
}
WHERE {
    ?elem addr:isLandmarkType addr:Thoroughfare; addr:hasAttribute ?attr1; addr:hasAttribute ?attr2.
    ?attr1 addr:isAttributeType addr:Name.
    ?attr2 addr:isAttributeType addr:Name.
    ?eventBeforeAttr1 addr:before ?attr1; addr:eventTimeValue ?eventBeforeAttr1TimeVal; addr:eventTimePrecision ?eventBeforeAttr1TimePre.
    ?eventAfterAttr2 addr:after ?attr2; addr:eventTimeValue ?eventAfterAttr2TimeVal; addr:eventTimePrecision ?eventAfterAttr2TimePre.
    FILTER (?attr1 != ?attr2)
    BIND(ofn:asDays(?eventAfterAttr2TimeVal - ?eventBeforeAttr1TimeVal) AS ?diffTime)
    FILTER(?diffTime >= 0.0)
    FILTER ((?diffTime <= 1.0 && ?eventBeforeAttr1TimePre = 11 && ?eventAfterAttr2TimePre = 11) ||
         (?diffTime <= 32.0 && ?eventBeforeAttr1TimePre = 10 && ?eventAfterAttr2TimePre = 10) ||
          (?diffTime <= 366.0 && ?eventBeforeAttr1TimePre = 9 && ?eventAfterAttr2TimePre = 9) ||
          (?diffTime <= 366.0*10 && ?eventBeforeAttr1TimePre = 8 && ?eventAfterAttr2TimePre = 8) ||
          (?diffTime <= 366.0*100 && ?eventBeforeAttr1TimePre = 7 && ?eventAfterAttr2TimePre = 7) 
         )
    BIND(URI(CONCAT("http://rdf.geohistoricaldata.org/address#", STRUUID())) AS ?changeNameEvent)
}
"""


loc_queries = [loc_query_1]

### Extract street data of Paris from Wikidata

In [7]:
abs_export_file = os.path.abspath(os.path.join(temp_folder, export_file))

In [13]:
g = Graph()
for query in wiki_queries:
    g += fn.get_construct_query_wikidata(query)

g.serialize(destination=abs_export_file)

URLError: <urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (_ssl.c:1122)>

### Remove all data of repository whose id is `project_name`

In [None]:
cmd = f"curl -X DELETE -H \"Content-Type:application/x-turtle\" {graphdb_url}/repositories/{project_name}/statements"
os.system(cmd)

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0


0

### Import ontology

In [None]:
abs_ont_file = os.path.abspath(ont_file)
cmd = f"curl -X POST -H \"Content-Type:application/x-turtle\" -T \"{abs_ont_file}\" {graphdb_url}/repositories/{project_name}/statements"

os.system(cmd)

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 19366    0     0  100 19366      0   366k --:--:-- --:--:-- --:--:--  370k


0

### Import created graph from Wikidata

In [None]:
cmd = f"curl -X POST -H \"Content-Type:application/x-turtle\" -T \"{abs_export_file}\" {graphdb_url}/repositories/{project_name}/rdf-graphs/wikidata"
os.system(cmd)

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 4682k    0     0  100 4682k      0  1893k  0:00:02  0:00:02 --:--:-- 1894k


0

### Clean data of graph

In [None]:
for loc_query in loc_queries:
    query_encoded = up.quote(loc_query)
    cmd = f"curl -X POST -H \"Content-Type:application/x-www-form-urlencoded\" -d \"update={query_encoded}\" {graphdb_url}/repositories/{project_name}/statements"
    os.system(cmd)
