## Variables

### Import libraries

In [236]:
import os
import urllib.parse as up
from rdflib import Graph
import code.functions as fn

### Define global variables

In [237]:
addr_ont_file = "address_ont.ttl"
ev_ont_file = "events_ont.ttl"

mapping_file = "mapping.json"
export_file = "addresses-temp.ttl"
out_file = "addresses.ttl"

temp_folder = "tmp_files"
source_folder = "sources"
mapping_folder = "mappings"

export_format = "TURTLE"

project_name = "voies_paris_hist" 
graphdb_url = "http://localhost:7200"

sources = [
    {"filename": "andriveau_1849_am.csv", "mapping_file":"mapping_andriveau_1849_am.json","graphname":"andriveau1849", "date":"1849-01-01"},
    {"filename": "atlas_municipal_1888_am.csv", "mapping_file":"mapping_atlas_municipal_1888_am.json","graphname":"atlasMunicipal1888", "date":"1888-01-01"},
    {"filename": "piquet_1826_am.csv", "mapping_file":"mapping_piquet_1826_am.json","graphname":"piquet1826", "date":"1826-01-01"},
    {"filename": "verniquet_1791_am.csv", "mapping_file":"mapping_verniquet_1791_am.json","graphname":"verniquet1791", "date":"1791-01-01"},
]

local_config_file_name = "config_repo.ttl"

### Process variables

In [238]:
temp_folder = os.path.abspath(temp_folder)
source_folder = os.path.abspath(source_folder)
mapping_folder = os.path.abspath(mapping_folder)
fn.create_folder_if_not_exists(temp_folder)

local_config_file_name = os.path.join(temp_folder, local_config_file_name)

## Creation of local repository

In [239]:
fn.create_config_local_repository_file(local_config_file_name, project_name)
url = f"{graphdb_url}/rest/repositories"
curl_cmd_local = fn.get_curl_command("POST", url, content_type="multipart/form-data", form=f"config=@{local_config_file_name}")
os.system(curl_cmd_local)

{"message":"Repository voies_paris_hist already exists."}

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  1649    0    57  100  1592   2538  70909 --:--:-- --:--:-- --:--:--  134k


0

## Process to create / import data

### Queries to build graph from Wikidata

:warning: Impossible to chain queries for Wikidata, it has to be split

In [240]:
# Get streets of Paris, with label, altLabel and their locations
query1 = """
PREFIX addr: <http://rdf.geohistoricaldata.org/address#>

CONSTRUCT {
 ?street a addr:Landmark;
           addr:isLandmarkType addr:Thoroughfare;
           rdfs:label ?streetLabel;
           skos:altLabel ?streetAltLabel;
           addr:within ?loc ;
           addr:hasAttribute ?officialNameAttr.
}
WHERE {
  { ?street p:P361 [ps:P361 wd:Q16024163]. }
  UNION
  { ?street p:P361 [ps:P361 wd:Q107311481]. }
  ?street wdt:P131 ?loc.
  ?street rdfs:label ?streetLabel.
  FILTER (LANG(?streetLabel) = "fr")
  OPTIONAL {?street skos:altLabel ?streetAltLabel. FILTER (LANG(?streetAltLabel) = "fr")}
}
"""

# Get official name of streets and its history
query2 = """
PREFIX addr: <http://rdf.geohistoricaldata.org/address#>

CONSTRUCT {
  ?street addr:hasAttributeVersion ?officialNameSt.
  ?officialNameSt a addr:AttributeVersion; addr:isAttributeType addr:NameAttribute; addr:value ?officialName.
  ?startEvent a addr:Event; addr:eventTimeValue ?startDateValue; addr:eventTimePrecision ?startDatePrec.
  ?endEvent a addr:Event; addr:eventTimeValue ?endDateValue; addr:eventTimePrecision ?startDatePrec.
  ?startChange a addr:AttributeChange; addr:isChangeType addr:NameChange; addr:dependsOn ?startEvent; addr:after ?officialNameSt.
  ?endChange a addr:AttributeChange; addr:isChangeType addr:NameChange; addr:dependsOn ?endEvent; addr:before ?officialNameSt.
}
WHERE {
  { ?street p:P361 [ps:P361 wd:Q16024163]. }
  UNION
  { ?street p:P361 [ps:P361 wd:Q107311481]. }
    ?street p:P1448 ?officialNameSt. 
    ?officialNameSt ps:P1448 ?officialName.
    OPTIONAL{?officialNameSt pqv:P580 [wikibase:timeValue ?startDateValue; wikibase:timePrecision ?startDatePrec]}
    OPTIONAL{?officialNameSt pqv:P582 [wikibase:timeValue ?endDateValue; wikibase:timePrecision ?endDatePrec]}
    #FILTER (LANG(?officialName) = "fr")
    BIND(URI(CONCAT("http://rdf.geohistoricaldata.org/address#EV_", STRUUID())) AS ?startEvent)
    BIND(URI(CONCAT("http://rdf.geohistoricaldata.org/address#CH_", STRUUID())) AS ?startChange)
    BIND(URI(CONCAT("http://rdf.geohistoricaldata.org/address#EV_", STRUUID())) AS ?endEvent)
    BIND(URI(CONCAT("http://rdf.geohistoricaldata.org/address#CH_", STRUUID())) AS ?endChange)
}
"""

# Get creation and dissolution dates of streets
query3 = """
PREFIX addr: <http://rdf.geohistoricaldata.org/address#>

CONSTRUCT {
  ?startEvent a addr:Event; addr:eventTimeValue ?creationTimeValue; addr:eventTimePrecision ?creationTimePrecision.
  ?endEvent a addr:Event; addr:eventTimeValue ?dissoulutionTimeValue; addr:eventTimePrecision ?dissoulutionTimePrecision.
  ?startChange a addr:LandmarkChange; addr:isChangeType addr:Creation; addr:appliedTo ?street; addr:dependsOn ?startEvent.
  ?endChange a addr:LandmarkChange; addr:isChangeType addr:Dissolution; addr:appliedTo ?street; addr:dependsOn ?endEvent.
}
WHERE {
  { ?street p:P361 [ps:P361 wd:Q16024163]. }
  UNION
  { ?street p:P361 [ps:P361 wd:Q107311481]. }
    OPTIONAL { ?street p:P571 [psv:P571 [wikibase:timeValue ?creationTimeValue; wikibase:timePrecision ?creationTimePrecision]]. }
    OPTIONAL { ?street p:P576 [psv:P576 [wikibase:timeValue ?dissoulutionTimeValue; wikibase:timePrecision ?dissoulutionTimePrecision]]. }

    BIND(URI(CONCAT("http://rdf.geohistoricaldata.org/address#EV_", STRUUID())) AS ?startEvent)
    BIND(URI(CONCAT("http://rdf.geohistoricaldata.org/address#CH_", STRUUID())) AS ?startChange)
    BIND(URI(CONCAT("http://rdf.geohistoricaldata.org/address#EV_", STRUUID())) AS ?endEvent)
    BIND(URI(CONCAT("http://rdf.geohistoricaldata.org/address#CH_", STRUUID())) AS ?endChange)
}
"""

# Get related data of municipal arrondissements of Paris
query4 = """
PREFIX addr: <http://rdf.geohistoricaldata.org/address#>

CONSTRUCT {
  ?arrdt a addr:Landmark;
           addr:isLandmarkType addr:District;
           rdfs:label ?arrdtLabel;
           skos:altLabel ?arrdtAltLabel;
           addr:within wd:Q90;
           addr:startDate ?startDate.
}
WHERE {
  ?arrdt wdt:P31 wd:Q702842; p:P131 [ps:P131 wd:Q90]; rdfs:label ?arrdtLabel; skos:altLabel ?arrdtAltLabel.
  FILTER(LANG(?arrdtLabel) = "fr" && LANG(?arrdtAltLabel) = "fr")
  OPTIONAL {?arrdt wdt:P571 ?startDate}
}
"""

# Get related data of quartiers of Paris
query5 = """
PREFIX addr: <http://rdf.geohistoricaldata.org/address#>

CONSTRUCT {
  ?quartier a addr:Landmark;
           addr:isLandmarkType addr:District;
           rdfs:label ?quartierLabel;
           skos:altLabel ?quartierAltLabel;
           addr:within ?loc.
}
WHERE {
  ?quartier wdt:P31 wd:Q252916; rdfs:label ?quartierLabel; skos:altLabel ?quartierAltLabel; p:P131 [ps:P131 ?loc].
  FILTER(LANG(?quartierLabel) = "fr" && LANG(?quartierAltLabel) = "fr")
}
"""

# Get related data of Paris
query6 = """
PREFIX addr: <http://rdf.geohistoricaldata.org/address#>

CONSTRUCT {
  ?paris a addr:Landmark;
           addr:isLandmarkType addr:City;
           rdfs:label ?parisLabel;
           skos:altLabel ?parisAltLabel.
}
WHERE {
  BIND (wd:Q90 AS ?paris)
  ?paris rdfs:label ?parisLabel; skos:altLabel ?parisAltLabel.
  FILTER(LANG(?parisLabel) = "fr" && LANG(?parisAltLabel) = "fr")
}
"""

wiki_queries = [query1, query2, query3, query4, query5, query6]

In [241]:
# Get streets of Paris with the history of their official name
loc_query_1 = """
PREFIX addr: <http://rdf.geohistoricaldata.org/address#>
PREFIX ofn: <http://www.ontotext.com/sparql/functions/>
INSERT {
    ?changeNameEvent a addr:Event;
                     addr:isEventType addr:ChangeName;
                     addr:before ?eventBeforeAttr1;
                     addr:after ?eventAfterAttr2;
                     addr:eventTimeValue ?eventAfterAttr2TimeVal;
                     addr:eventTimePrecision ?eventAfterAttr2TimePre.                                                                  
}
WHERE {
    ?elem addr:isLandmarkType addr:Thoroughfare; addr:hasAttribute ?attr1; addr:hasAttribute ?attr2.
    ?attr1 addr:isAttributeType addr:Name.
    ?attr2 addr:isAttributeType addr:Name.
    ?eventBeforeAttr1 addr:before ?attr1; addr:eventTimeValue ?eventBeforeAttr1TimeVal; addr:eventTimePrecision ?eventBeforeAttr1TimePre.
    ?eventAfterAttr2 addr:after ?attr2; addr:eventTimeValue ?eventAfterAttr2TimeVal; addr:eventTimePrecision ?eventAfterAttr2TimePre.
    FILTER (?attr1 != ?attr2)
    BIND(ofn:asDays(?eventAfterAttr2TimeVal - ?eventBeforeAttr1TimeVal) AS ?diffTime)
    FILTER(?diffTime >= 0.0)
    FILTER ((?diffTime <= 1.0 && ?eventBeforeAttr1TimePre = 11 && ?eventAfterAttr2TimePre = 11) ||
         (?diffTime <= 32.0 && ?eventBeforeAttr1TimePre = 10 && ?eventAfterAttr2TimePre = 10) ||
          (?diffTime <= 366.0 && ?eventBeforeAttr1TimePre = 9 && ?eventAfterAttr2TimePre = 9) ||
          (?diffTime <= 366.0*10 && ?eventBeforeAttr1TimePre = 8 && ?eventAfterAttr2TimePre = 8) ||
          (?diffTime <= 366.0*100 && ?eventBeforeAttr1TimePre = 7 && ?eventAfterAttr2TimePre = 7) 
         )
    BIND(URI(CONCAT("http://rdf.geohistoricaldata.org/address#", STRUUID())) AS ?changeNameEvent)
}
"""

loc_query_1 = """
PREFIX addr: <http://rdf.geohistoricaldata.org/address#>

INSERT {
    ?street addr:hasAttribute ?attrName.
    ?attrName a addr:Attribute; addr:isAttributeType addr:NameAttribute.
}
WHERE {
    BIND(URI(CONCAT("http://rdf.geohistoricaldata.org/address#AN_", STRUUID())) AS ?attrName)
    ?street a addr:Landmark; addr:isLandmarkType addr:Thoroughfare.
    BIND(URI(CONCAT("http://rdf.geohistoricaldata.org/address#AN_", STRUUID())) AS ?x)
}
"""

loc_query_2 = """
PREFIX addr: <http://rdf.geohistoricaldata.org/address#>

DELETE {
     ?street addr:hasAttributeVersion ?attrNameVersion.
     ?attrNameVersion addr:isAttributeType addr:NameAttribute.
}
INSERT {
    ?attrName addr:version ?attrNameVersion.
    ?change addr:appliedTo ?attrName.
}
WHERE {
    ?attrNameVersion a addr:AttributeVersion; addr:isAttributeType addr:NameAttribute.
    ?attrName a addr:Attribute; addr:isAttributeType addr:NameAttribute.
    ?street a addr:Landmark; addr:isLandmarkType addr:Thoroughfare; addr:hasAttributeVersion ?attrNameVersion; addr:hasAttribute ?attrName.
    ?change a addr:AttributeChange; (addr:before|addr:after) ?attrNameVersion.
}
"""

loc_queries = [loc_query_1, loc_query_2]

### Extract street data of Paris from Wikidata

In [242]:
abs_export_file = os.path.abspath(os.path.join(temp_folder, export_file))

In [243]:
g = Graph()
for query in wiki_queries:
    keep_on_query = True
    max_loop_nb = 100
    loop_nb = 0
    limit = 500
    offset = 0
    while keep_on_query and loop_nb <= max_loop_nb:
        q = query + f'\nLIMIT {limit} OFFSET {offset}'
        query_result = fn.get_construct_query_wikidata(q)
        offset += limit
        loop_nb += 1
        if len(query_result) == 0:
            keep_on_query = False
        else:   
            g += query_result

g.serialize(destination=abs_export_file)

<Graph identifier=Nbff575ff237b4ea0a428e623e1c37503 (<class 'rdflib.graph.Graph'>)>

### Remove all data of repository whose id is `project_name`

In [260]:
url = f"{graphdb_url}/repositories/{project_name}/statements"
cmd = fn.get_curl_command("DELETE", url, content_type="application/x-turtle")
os.system(cmd)

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0


0

### Import ontologies

In [261]:
abs_addr_ont_file = os.path.abspath(addr_ont_file)
abs_ev_ont_file = os.path.abspath(ev_ont_file)
                              
url = f"{graphdb_url}/repositories/{project_name}/statements"
cmd_1 = fn.get_curl_command("POST", url, content_type="application/x-turtle", local_file=abs_addr_ont_file)
cmd_2 = fn.get_curl_command("POST", url, content_type="application/x-turtle", local_file=abs_ev_ont_file)

os.system(cmd_1)
os.system(cmd_2)

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 19366    0     0  100 19366      0  83198 --:--:-- --:--:-- --:--:-- 85690
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  7154    0     0  100  7154      0  47197 --:--:-- --:--:-- --:--:-- 49337


0

### Import created graph from Wikidata

In [262]:
url = f"{graphdb_url}/repositories/{project_name}/rdf-graphs/wikidata"
cmd = fn.get_curl_command("POST", url, content_type="application/x-turtle", local_file=abs_export_file)
os.system(cmd)

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 12.1M    0     0  100 12.1M      0  1817k  0:00:06  0:00:06 --:--:--     0


0

### Clean data of graph

In [263]:
url = f"{graphdb_url}/repositories/{project_name}/statements"
for loc_query in loc_queries:
    query_encoded = up.quote(loc_query)
    cmd = fn.get_curl_command("POST", url, content_type="application/x-www-form-urlencoded", post_data=f"update={query_encoded}")
    #os.system(cmd)