# Variables

## Import libraries

In [1]:
import os
import sys
from rdflib import URIRef, Literal

## Defining global variables

In [2]:
# Existing files
ont_file_name = "ontology.ttl"
ruleset_file_name = "rules.pie"

# Created files during process (in `tmp_folder`)
export_file_name = "addresses-temp.ttl"
out_file_name = "addresses.ttl"
local_config_file_name = "config_repo.ttl"
facts_ttl_file_name = "facts_data.ttl"
implicit_to_facts_ttl_file_name = "implicit_to_facts.ttl"

# Existing folders
data_folder_name = "../data"

# Created folder during process
tmp_folder_name = "../tmp_files"

# GraphDB repository name
facts_repository_name = "addresses_from_factoids"

# Definition of names of named graphes 
ontology_named_graph_name = "ontology"
facts_named_graph_name = "facts"
factoids_named_graph_name = "factoids"
permanent_named_graph_name = "permanent"
tmp_named_graph_name = "temporary"
inter_sources_name_graph_name = "inter_sources"

# URIs to access to GraphDB
str_graphdb_url = "http://localhost:7200"

py_code_folder_path = "./code"

## Processing global variables

* Obtaining absolute file paths from the relative paths given in the previous section
* Create a temporary folder if it doesn't already exist to store files to be deleted.
* Get RDFLib object for `graphdb_url` from a string

In [3]:
tmp_folder = os.path.abspath(tmp_folder_name)
data_folder = os.path.abspath(data_folder_name)

python_code_folder = os.path.abspath(py_code_folder_path)

local_config_file = os.path.join(tmp_folder, local_config_file_name)
ont_file = os.path.abspath(ont_file_name)
ruleset_file = os.path.abspath(ruleset_file_name)
facts_ttl_file = os.path.join(tmp_folder, facts_ttl_file_name)
implicit_to_facts_ttl_file = os.path.join(tmp_folder, implicit_to_facts_ttl_file_name)

graphdb_url = URIRef(str_graphdb_url)

## Import modules located in `code` folder

In [4]:
# Calling up the `code` folder contains the python codes
sys.path.insert(1, python_code_folder)

import filemanagement as fm
import graphdb as gd
import graphrdf as gr
import attributeversioncomparisons as avc
import multisourcesprocessing as msp
import factoidscreation as fc
import timeprocessing as tp

## Creation of folders if they don't exist

In [5]:
fm.create_folder_if_not_exists(tmp_folder)

### Creating the local directory in GraphDB
For the creation to work, GraphDB must be launched and therefore the URI given by `graphdb_url` must work. If the directory already exists, nothing is done.

In [6]:
# Il se peut que la suppression d'un répertoire ne fonctionne pas donc pour éviter la suppresion au moment de la réinitialisation (suppression + (re)création)
# `allow_removal` doit valoir False et dans ce cas-là, le répertoire sera juste vidé.
allow_removal = False
disable_same_as = False

gd.reinitialize_repository(graphdb_url, facts_repository_name, local_config_file, ruleset_name="owl2-rl-optimized", disable_same_as=disable_same_as, allow_removal=allow_removal)
# gd.reinitialize_repository(graphdb_url, facts_repository_name, local_config_file, ruleset_file=ruleset_file, disable_same_as=disable_same_as, allow_removal=allow_removal)

## Local directory management

## Importing ontologies

In [7]:
gd.load_ontologies(graphdb_url, facts_repository_name, [ont_file], ontology_named_graph_name)

## Definition of variables linked to sources

### Paris thoroughfares via Wikidata

* `wd` for "wikidata"
* `wdp_land` for "wikidata paris landmarks"
* `wdp_loc` for "wikidata paris locations"

In [8]:
# Name of the directory where the factoid triples of Wikidata data are stored and constructed
wd_repository_name = "factoids_wikidata"

# CSV file to store the result of the selection query
wdp_land_csv_file_name = "wd_paris_landmarks.csv"
wdp_land_csv_file = os.path.join(data_folder, wdp_land_csv_file_name)

# CSV file to store the result of the selection query
wdp_loc_csv_file_name = "wd_paris_locations.csv"
wdp_loc_csv_file = os.path.join(data_folder, wdp_loc_csv_file_name)

# TTL file for structuring knowledge of the Paris thoroughfares
wdp_kg_file_name = "wd_paris.ttl"
wdp_kg_file = os.path.join(tmp_folder, wdp_kg_file_name)

# Final TTL files for Wikidata factoids
wdp_factoids_kg_file_name = "wd_paris_factoids.ttl"
wdp_factoids_kg_file = os.path.join(tmp_folder, wdp_factoids_kg_file_name)
wdp_permanent_kg_file_name = "wd_paris_permanent.ttl"
wdp_permanent_kg_file = os.path.join(tmp_folder, wdp_permanent_kg_file_name)

# Time interval of validity of the source (there is not end time)
wdp_time_description = {
    "start_time" : {"stamp":"2024-08-26T00:00:00Z","precision":"day","calendar":"gregorian"}
    }

### Nomenclature of Paris thoroughfares (Ville de Paris data)

The City of Paris data is made up of two sets:
* [names of current street rights-of-way](https://opendata.paris.fr/explore/dataset/denominations-emprises-voies-actuelles)
* [obsolete street names](https://opendata.paris.fr/explore/dataset/denominations-des-voies-caduques)

Current roads have a geometric right of way, unlike the old thoroughfares.

* `vpt` for ‘ville paris thoroughfares’
* `vpta` for ‘ville paris thoroughfares actuelles’.
* `vptc` for ‘ville paris thoroughfares caduques’.

In [9]:
# Name of the directory where the factoid triples of Ville de Paris data are stored and constructed
vpt_repository_name = "factoids_ville_de_paris"

# CSV files containting data
vpta_csv_file_name = "denominations-emprises-voies-actuelles.csv"
vpta_csv_file = os.path.join(data_folder, vpta_csv_file_name)
vptc_csv_file_name = "denominations-des-voies-caduques.csv"
vptc_csv_file = os.path.join(data_folder, vptc_csv_file_name)

# TTL file for structuring knowledge of the Paris thoroughfares
vpt_kg_file_name = "voies_paris.ttl"
vpt_kg_file = os.path.join(tmp_folder, vpt_kg_file_name)

# Final TTL files for Ville de Paris factoids
vpt_factoids_kg_file_name = "vpt_factoids.ttl"
vpt_factoids_kg_file = os.path.join(tmp_folder, vpt_factoids_kg_file_name)
vpt_permanent_kg_file_name = "vpt_permanent.ttl"
vpt_permanent_kg_file = os.path.join(tmp_folder, vpt_permanent_kg_file_name)

# Time interval of validity of the source (there is not end time)
vpt_time_description = {
    "start_time" : {"stamp":"2024-02-10T00:00:00Z","precision":"day","calendar":"gregorian"}
    }

### Base Adresse Nationale (BAN)

Data from the [Base Adresse Nationale (BAN)](https://adresse.data.gouv.fr/base-adresse-nationale) (National Address Base), available [here](https://adresse.data.gouv.fr/data/ban/adresses/latest/csv)

bpa` for ‘BAN paris addresses’

In [10]:
# Name of the directory where the factoid triples of BAN data are stored and constructed
bpa_repository_name = "factoids_ban"

# CSV file containting data
bpa_csv_file_name = "ban_adresses.csv"
bpa_csv_file = os.path.join(data_folder, bpa_csv_file_name)

# TTL file for structuring knowledge of Paris addresses
bpa_kg_file_name = "ban_adresses.ttl"
bpa_kg_file = os.path.join(tmp_folder, bpa_kg_file_name)

# Final TTL file for BAN factoids
bpa_factoids_kg_file_name = "ban_factoids.ttl"
bpa_factoids_kg_file = os.path.join(tmp_folder, bpa_factoids_kg_file_name)
bpa_permanent_kg_file_name = "ban_permanent.ttl"
bpa_permanent_kg_file = os.path.join(tmp_folder, bpa_permanent_kg_file_name)

# Time interval of validity of the source (there is not end time)
bpa_time_description = {
    "start_time" : {"stamp":"2024-01-01T00:00:00Z","precision":"day","calendar":"gregorian"}
    }

### OpenStreetMap (OSM)

Extracting data from OpenStreetMap

In [11]:
# Name of the directory where the factoid triples of OSM data are stored and constructed
osm_repository_name = "factoids_osm"

# CSV files containting data
osm_csv_file_name = "osm_adresses.csv"
osm_csv_file = os.path.join(data_folder, osm_csv_file_name)
osm_hn_csv_file_name = "osm_hn_adresses.csv"
osm_hn_csv_file = os.path.join(data_folder, osm_hn_csv_file_name)

# TTL file for structuring knowledge of OSM addresses
osm_kg_file_name = "osm_adresses.ttl"
osm_kg_file = os.path.join(tmp_folder, osm_kg_file_name)

# Final TTL files for OSM factoids
osm_factoids_kg_file_name = "osm_factoids.ttl"
osm_factoids_kg_file = os.path.join(tmp_folder, osm_factoids_kg_file_name)
osm_permanent_kg_file_name = "osm_permanent.ttl"
osm_permanent_kg_file = os.path.join(tmp_folder, osm_permanent_kg_file_name)

# Time interval of validity of the source (there is not end time)
osm_time_description = {
    "start_time" : {"stamp":"2024-01-01T00:00:00Z","precision":"day","calendar":"gregorian"}
    }

### Integration of data from Geojson files

These files are derived from the vectorisation of maps of Paris:
* the revised Napoleonic cadatre of 1847 ;
* Andriveau’ plan of 1849 ;
* municipal plot plan of 1871 ;
* the Municipal Atlas map of 1888.

#### Global variables for importing data from Geojson files

In [12]:
lang = "fr"
landmark_type = "Thoroughfare"
geojson_join_property = "name"
tmp_kg_file_name = "tmp_kg.ttl"
tmp_kg_file = os.path.join(tmp_folder, tmp_kg_file_name)

#### Napoleonic cadastre of 1847

In [13]:
# Name of the directory where data factoid triples are stored and constructed
cn_1847_repository_name = "factoids_1847_cadastre_nap"

# Geojson file containting data
cn_1847_geojson_file_name = "1847_cadastre_nap.geojson"
cn_1847_geojson_file = os.path.join(data_folder, cn_1847_geojson_file_name)
cn_1847_kg_file_name = "cn_1847_kg.ttl"
cn_1847_kg_file = os.path.join(tmp_folder, cn_1847_kg_file_name)

# Final TTL files of factoids from the revised 1847 Napoleonic cadastre
cn_1847_factoids_kg_file_name = "cn_1847_factoids.ttl"
cn_1847_factoids_kg_file = os.path.join(tmp_folder, cn_1847_factoids_kg_file_name)
cn_1847_permanent_kg_file_name = "cn_1847_permanent.ttl"
cn_1847_permanent_kg_file = os.path.join(tmp_folder, cn_1847_permanent_kg_file_name)

cn_1847_geojson = fm.read_json_file(cn_1847_geojson_file)

# Description of the source within a dictionary
cn_1847_source_desc = {
    "lang" : "fr", 
    "label" : "Cadastre napoléonien de Gentilly de 1847",
    "publisher" : {
        "label": "Empire français"
        }
}

# Time interval of validity of the source
cn_1847_time_interval = {
    "start_time" : {"stamp":"1845-01-01T00:00:00Z","precision":"year","calendar":"gregorian"},
    "end_time" : {"stamp":"1850-01-01T00:00:00Z","precision":"year","calendar":"gregorian"},
}
cn_1847_geojson["source"] = cn_1847_source_desc
cn_1847_geojson["time"] = cn_1847_time_interval

#### Andriveau atlas

In [14]:
# Name of the directory where data factoid triples are stored and constructed
an_1849_repository_name = "factoids_1849_andriveau"

# Geojson file containting data
an_1849_geojson_file_name = "1849_andriveau.geojson"
an_1849_geojson_file = os.path.join(data_folder, an_1849_geojson_file_name)
an_1849_kg_file_name = "an_1849_kg.ttl"
an_1849_kg_file = os.path.join(tmp_folder, an_1849_kg_file_name)

# Final TTL files of factoids from the 1849 Andriveau atlas
an_1849_factoids_kg_file_name = "an_1849_factoids.ttl"
an_1849_factoids_kg_file = os.path.join(tmp_folder, an_1849_factoids_kg_file_name)
an_1849_permanent_kg_file_name = "an_1849_permanent.ttl"
an_1849_permanent_kg_file = os.path.join(tmp_folder, an_1849_permanent_kg_file_name)

an_1849_geojson = fm.read_json_file(an_1849_geojson_file)

# Description of the source within a dictionary
an_1849_source_desc = {
    "lang" : "fr", 
    "label" : "Plan d'Andriveau de 1849",
    "publisher" : {
        "label": "Andriveau"
        }
}

# Time interval of validity of the source
an_1849_time_interval = {
    "start_time" : {"stamp":"1847-01-01T00:00:00Z","precision":"year","calendar":"gregorian"},
    "end_time" : {"stamp":"1851-01-01T00:00:00Z","precision":"year","calendar":"gregorian"},
}

an_1849_geojson["source"] = an_1849_source_desc
an_1849_geojson["time"] = an_1849_time_interval

#### 1871 municipal parcel map of Paris

In [15]:
# Name of the directory where data factoid triples are stored and constructed
pm_1871_repository_name = "factoids_1871_plan_parcellaire_mun"

# Geojson file containting data
pm_1871_geojson_file_name = "1871_plan_parcellaire_mun.geojson"
pm_1871_geojson_file = os.path.join(data_folder, pm_1871_geojson_file_name)
pm_1871_kg_file_name = "pm_1871_kg.ttl"
pm_1871_kg_file = os.path.join(tmp_folder, pm_1871_kg_file_name)

# Final TTL file of factoids from the 1871 municipal parcel map
pm_1871_factoids_kg_file_name = "pm_1871_factoids.ttl"
pm_1871_factoids_kg_file = os.path.join(tmp_folder, pm_1871_factoids_kg_file_name)
pm_1871_permanent_kg_file_name = "pm_1871_permanent.ttl"
pm_1871_permanent_kg_file = os.path.join(tmp_folder, pm_1871_permanent_kg_file_name)

pm_1871_geojson = fm.read_json_file(pm_1871_geojson_file)

# Description of the source within a dictionary
pm_1871_source_desc = {
    "lang" : "fr",
    "label" : "Plan parcellaire municipal",
    "publisher" : {
        "label": "IIIe République"
        }
}

# Time interval of validity of the source
pm_1871_time_interval = {
    "start_time" : {"stamp":"1870-01-01T00:00:00Z","precision":"year","calendar":"gregorian"},
    "end_time" : {"stamp":"1872-01-01T00:00:00Z","precision":"year","calendar":"gregorian"},
}
pm_1871_geojson["source"] = pm_1871_source_desc
pm_1871_geojson["time"] = pm_1871_time_interval

#### 1888 Municipal Atlas of Paris

In [16]:
# Name of the directory where data factoid triples are stored and constructed
am_1888_repository_name = "factoids_1888_atlas_municipal"

# Geojson file containting data
am_1888_geojson_file_name = "1888_atlas_municipal.geojson"
am_1888_geojson_file = os.path.join(data_folder, am_1888_geojson_file_name)
am_1888_kg_file_name = "am_1888_kg.ttl"
am_1888_kg_file = os.path.join(tmp_folder, am_1888_kg_file_name)

# Final TTL file of factoids from the 1888 Municipal Atlas plan
am_1888_factoids_kg_file_name = "am_1888_factoids.ttl"
am_1888_factoids_kg_file = os.path.join(tmp_folder, am_1888_factoids_kg_file_name)
am_1888_permanent_kg_file_name = "am_1888_permanent.ttl"
am_1888_permanent_kg_file = os.path.join(tmp_folder, am_1888_permanent_kg_file_name)

am_1888_geojson = fm.read_json_file(am_1888_geojson_file)

# Description of the source within a dictionary
am_1888_source_desc = {
    "lang" : "fr", 
    "label" : "Plan de l'atlas municipal de 1888",
    "publisher" : {
        "label": "Ville de Paris"
        }
}

# Time interval of validity of the source
am_1888_time_interval = {
    "start_time" : {"stamp":"1887-01-01T00:00:00Z","precision":"year","calendar":"gregorian"},
    "end_time" : {"stamp":"1889-01-01T00:00:00Z","precision":"year","calendar":"gregorian"},
}
am_1888_geojson["source"] = am_1888_source_desc
am_1888_geojson["time"] = am_1888_time_interval

### Events

TTL file describings events

In [17]:
# Name of the directory where the factoid triples of OSM data are stored and constructed
events_repository_name = "factoids_events"

# TTL files containting data
events_ttl_file_name = "events.ttl"
events_ttl_file = os.path.join(data_folder, events_ttl_file_name)

# Final TTL files for events factoids
events_factoids_kg_file_name = "events_factoids.ttl"
events_factoids_kg_file = os.path.join(tmp_folder, events_factoids_kg_file_name)
events_permanent_kg_file_name = "events_permanent.ttl"
events_permanent_kg_file = os.path.join(tmp_folder, events_permanent_kg_file_name)

## Final and iterative process

### Creating factoids in directories

For each source, factoids are created independently in separate directories

#### Ville de Paris


In [18]:
# fc.create_factoids_repository_ville_paris(graphdb_url, vpt_repository_name, tmp_folder,
#                                           ont_file, ontology_named_graph_name,
#                                           factoids_named_graph_name, permanent_named_graph_name,
#                                           vpta_csv_file, vptc_csv_file, vpt_kg_file, vpt_time_description, lang=lang)

####  BAN


In [19]:
# fc.create_factoids_repository_ban(graphdb_url, bpa_repository_name, tmp_folder,
#                                   ont_file, ontology_named_graph_name,
#                                   factoids_named_graph_name, permanent_named_graph_name,
#                                   bpa_csv_file, bpa_kg_file, bpa_time_description, lang=lang)

#### Wikidata


In [20]:
# # fc.get_data_from_wikidata(wdp_land_csv_file, wdp_loc_csv_file)
# fc.create_factoids_repository_wikidata_paris(graphdb_url, wd_repository_name, tmp_folder,
#                                              ont_file, ontology_named_graph_name,
#                                              factoids_named_graph_name, permanent_named_graph_name,
#                                              wdp_land_csv_file, wdp_loc_csv_file, wdp_kg_file, wdp_time_description=wdp_time_description, lang=lang)

#### OSM

In [21]:
# fc.create_factoids_repository_osm(graphdb_url, osm_repository_name, tmp_folder,
#                                   ont_file, ontology_named_graph_name,
#                                   factoids_named_graph_name, permanent_named_graph_name,
#                                   osm_csv_file, osm_hn_csv_file, osm_kg_file, osm_time_description=osm_time_description, lang=lang)

#### Data from Geojson files

* Napoleonic cadastre of Gentilly (1847)
* Andriveau plan (1849)
* municipal parcel map of Paris (1871)
* municipal map of Paris (1888)

In [22]:
# fc.create_factoids_repository_geojson_states(graphdb_url, cn_1847_repository_name, tmp_folder, ont_file, ontology_named_graph_name,
#                                factoids_named_graph_name, permanent_named_graph_name, cn_1847_geojson, geojson_join_property, cn_1847_kg_file, tmp_kg_file, landmark_type, lang)
# fc.create_factoids_repository_geojson_states(graphdb_url, an_1849_repository_name, tmp_folder, ont_file, ontology_named_graph_name,
#                                factoids_named_graph_name, permanent_named_graph_name, an_1849_geojson, geojson_join_property, an_1849_kg_file, tmp_kg_file, landmark_type, lang)
# fc.create_factoids_repository_geojson_states(graphdb_url, pm_1871_repository_name, tmp_folder, ont_file, ontology_named_graph_name,
#                                factoids_named_graph_name, permanent_named_graph_name, pm_1871_geojson, geojson_join_property, pm_1871_kg_file, tmp_kg_file, landmark_type, lang)
# fc.create_factoids_repository_geojson_states(graphdb_url, am_1888_repository_name, tmp_folder, ont_file, ontology_named_graph_name,
#                                factoids_named_graph_name, permanent_named_graph_name, am_1888_geojson, geojson_join_property, am_1888_kg_file, tmp_kg_file, landmark_type, lang)

#### Data from Events files

In [23]:
# def create_factoids_repository_events(graphdb_url, repository_name, tmp_folder, ttl_file,
#                                       ont_file, ontology_named_graph_name,
#                                       factoids_named_graph_name, permanent_named_graph_name):
#     factoids_named_graph_uri = gd.get_named_graph_uri_from_name(graphdb_url, repository_name, factoids_named_graph_name)
#     permanent_named_graph_uri = gd.get_named_graph_uri_from_name(graphdb_url, repository_name, permanent_named_graph_name)
    
#     # Creating repository
#     msp.create_factoid_repository(graphdb_url, repository_name, tmp_folder,
#                                 ont_file, ontology_named_graph_name, ruleset_name="rdfsplus-optimized",
#                                 disable_same_as=False, clear_if_exists=True)
    
#     # Import ttl file in repository
#     gd.import_ttl_file_in_graphdb(graphdb_url, repository_name, ttl_file, named_graph_name=factoids_named_graph_name)

#     # Transfer all provenance descriptions to the permanent named graph
#     msp.transfert_immutable_triples(graphdb_url, repository_name, factoids_named_graph_uri, permanent_named_graph_uri)


In [24]:
# create_factoids_repository_events(graphdb_url, events_repository_name, tmp_folder,
#                                      events_ttl_file, ont_file, ontology_named_graph_name,
#                                      factoids_named_graph_name, permanent_named_graph_name)

### Insertion of factoids in the fact graph

In [25]:
gd.remove_named_graph(graphdb_url, facts_repository_name, facts_named_graph_name)
gd.remove_named_graph(graphdb_url, facts_repository_name, inter_sources_name_graph_name)

<Response [204]>

#### Ville de Paris

In [26]:
named_graph_name = "source_ville_de_paris"
msp.transfert_factoids_to_facts_repository(graphdb_url, facts_repository_name, vpt_repository_name,
                                           vpt_factoids_kg_file, vpt_permanent_kg_file,
                                           factoids_named_graph_name, permanent_named_graph_name, named_graph_name, facts_named_graph_name)
msp.import_factoids_in_facts(graphdb_url, facts_repository_name, named_graph_name, facts_named_graph_name, inter_sources_name_graph_name)

#### Wikidata

In [27]:
named_graph_name = "source_wikidata"
msp.transfert_factoids_to_facts_repository(graphdb_url, facts_repository_name, wd_repository_name,
                                           wdp_factoids_kg_file, wdp_permanent_kg_file,
                                           factoids_named_graph_name, permanent_named_graph_name, named_graph_name, facts_named_graph_name)
msp.import_factoids_in_facts(graphdb_url, facts_repository_name, named_graph_name, facts_named_graph_name, inter_sources_name_graph_name)

#### BAN

In [28]:
named_graph_name = "source_ban"
msp.transfert_factoids_to_facts_repository(graphdb_url, facts_repository_name, bpa_repository_name,
                                           bpa_factoids_kg_file, bpa_permanent_kg_file,
                                           factoids_named_graph_name, permanent_named_graph_name, named_graph_name, facts_named_graph_name)
msp.import_factoids_in_facts(graphdb_url, facts_repository_name, named_graph_name, facts_named_graph_name, inter_sources_name_graph_name)

#### OSM

In [29]:
named_graph_name = "source_osm"
msp.transfert_factoids_to_facts_repository(graphdb_url, facts_repository_name, osm_repository_name,
                                           osm_factoids_kg_file, osm_permanent_kg_file,
                                           factoids_named_graph_name, permanent_named_graph_name, named_graph_name, facts_named_graph_name)
msp.import_factoids_in_facts(graphdb_url, facts_repository_name, named_graph_name, facts_named_graph_name, inter_sources_name_graph_name)

#### Data from Geojson files

* Napoleonic cadastre of Gentilly (1847)
* Andriveau plan (1849)
* municipal parcel map of Paris (1871)
* municipal map of Paris (1888)

In [30]:
named_graph_name = "source_geojson"

msp.transfert_factoids_to_facts_repository(graphdb_url, facts_repository_name, cn_1847_repository_name,
                                           cn_1847_factoids_kg_file, cn_1847_permanent_kg_file,
                                           factoids_named_graph_name, permanent_named_graph_name, named_graph_name, facts_named_graph_name)

msp.transfert_factoids_to_facts_repository(graphdb_url, facts_repository_name, an_1849_repository_name,
                                           an_1849_factoids_kg_file, an_1849_permanent_kg_file,
                                           factoids_named_graph_name, permanent_named_graph_name, named_graph_name, facts_named_graph_name)

msp.transfert_factoids_to_facts_repository(graphdb_url, facts_repository_name, pm_1871_repository_name,
                                           pm_1871_factoids_kg_file, pm_1871_permanent_kg_file,
                                           factoids_named_graph_name, permanent_named_graph_name, named_graph_name, facts_named_graph_name)

msp.transfert_factoids_to_facts_repository(graphdb_url, facts_repository_name, am_1888_repository_name,
                                           am_1888_factoids_kg_file, am_1888_permanent_kg_file,
                                           factoids_named_graph_name, permanent_named_graph_name, named_graph_name, facts_named_graph_name)

msp.import_factoids_in_facts(graphdb_url, facts_repository_name, named_graph_name, facts_named_graph_name, inter_sources_name_graph_name)


#### Data from Events files

In [31]:
named_graph_name = "source_events"

msp.transfert_factoids_to_facts_repository(graphdb_url, facts_repository_name, events_repository_name,
                                           events_factoids_kg_file, events_permanent_kg_file,
                                           factoids_named_graph_name, permanent_named_graph_name, named_graph_name, facts_named_graph_name)
msp.import_factoids_in_facts(graphdb_url, facts_repository_name, named_graph_name, facts_named_graph_name, inter_sources_name_graph_name)


### Version aggregation and change inference

In [32]:
order_named_graph_name = "temporal_ordering"

facts_named_graph_uri = gd.get_named_graph_uri_from_name(graphdb_url, facts_repository_name, facts_named_graph_name)
inter_sources_name_graph_uri = gd.get_named_graph_uri_from_name(graphdb_url, facts_repository_name, inter_sources_name_graph_name)
tmp_named_graph_uri = gd.get_named_graph_uri_from_name(graphdb_url, facts_repository_name, tmp_named_graph_name)
order_named_graph_uri = gd.get_named_graph_uri_from_name(graphdb_url, facts_repository_name, order_named_graph_name)

#### Comparison of version values

In [33]:
comparison_settings = {
    "geom_similarity_coef": 0.85,
    "geom_buffer_radius": 5,
    "geom_crs_uri": URIRef('http://www.opengis.net/def/crs/EPSG/0/2154'),
}
comp_named_graph_name = "comparisons"
comp_tmp_file_name = "comparisons.ttl"
comp_tmp_file = os.path.join(tmp_folder, comp_tmp_file_name)
avc.compare_attribute_versions(graphdb_url, facts_repository_name, comp_named_graph_name, comp_tmp_file, comparison_settings)

#### Infer changes

In [34]:
# # Sort landmark versions by time
# msp.order_temporally_landmark_versions(graphdb_url, facts_repository_name, order_named_graph_uri, tmp_named_graph_uri)

# # Infer missing changes for landmark and landmark relations
# msp.infer_missing_changes_on_landmark_and_relations(graphdb_url, facts_repository_name, facts_named_graph_uri)
# msp.infer_missing_time_on_events(graphdb_url, facts_repository_name, facts_named_graph_uri, inter_sources_name_graph_uri)

# # Sort attributes by time
# msp.order_temporally_attribute_versions(graphdb_url, facts_repository_name, order_named_graph_uri, tmp_named_graph_uri)
# msp.create_roots_and_traces_for_landmark_attribute_versions(graphdb_url, facts_repository_name, facts_named_graph_uri, inter_sources_name_graph_uri, tmp_named_graph_uri)

### Transfer information from factoids

In [35]:
# msp.transfer_version_values_to_roots(graphdb_url, facts_repository_name, facts_named_graph_uri)
# msp.transfer_provenances_to_roots(graphdb_url, facts_repository_name, facts_named_graph_uri)
# msp.transfer_crisp_time_instant_elements_to_roots(graphdb_url, facts_repository_name, facts_named_graph_uri)

# Test to import events

In [36]:
# from namespaces import NameSpaces
# np = NameSpaces()

# gd.remove_named_graph(graphdb_url, facts_repository_name, tmp_named_graph_name)
# gd.remove_named_graph(graphdb_url, facts_repository_name, order_named_graph_name)
# msp.sort_events_and_states_on_attributes(graphdb_url, facts_repository_name, order_named_graph_uri, tmp_named_graph_uri)
# msp.update_attribute_changes_and_versions(graphdb_url, facts_repository_name, tmp_named_graph_uri)
# # msp.create_roots_and_traces_for_landmark_attribute_versions(graphdb_url, facts_repository_name, facts_named_graph_uri, inter_sources_name_graph_uri, tmp_named_graph_uri)
# # gd.remove_named_graph(graphdb_url, facts_repository_name, tmp_named_graph_name)

# Split overlapping versions

In [37]:
from namespaces import NameSpaces
np = NameSpaces()

In [38]:
def get_time_description(graphdb_url:URIRef, repository_name:str, tmp_named_graph_uri:URIRef):
    # For each attribute, create as many TimeDescription object as there are temporal values related to it
    query1 = np.query_prefixes + f"""
    PREFIX ofn: <http://www.ontotext.com/sparql/functions/>

    INSERT {{
        GRAPH ?g {{
            ?rootAttr addr:hasTimeDescription [a addr:TimeDescription ; addr:hasTime ?time ; addr:hasTimeType ?timeType ; addr:hasSimplifiedTime ?simplifiedTime ; addr:hasRelatedElem ?attrElem ] .
        }}
    }}
    WHERE {{
        BIND({tmp_named_graph_uri.n3()} AS ?g)
        ?rootAttr a addr:Attribute ; addr:hasTrace ?attr .
        {{
            ?lm a addr:Landmark ; addr:hasAttribute ?attr ; addr:hasTime [?propTime ?time ] .
            ?attr addr:hasAttributeVersion ?attrElem .
            FILTER(?propTime IN (addr:hasBeginning, addr:hasEnd))
            BIND(IF(?propTime = addr:hasBeginning, "start", "end") AS ?timeType)
        }} UNION {{
            ?attrElem a addr:AttributeChange ; addr:appliedTo ?attr ; addr:dependsOn [addr:hasTime ?time] .
            BIND("null" AS ?timeType)
        }}
        ?time addr:timeStamp ?timeStamp .
        BIND(ofn:asDays(?timeStamp - "0001-01-01"^^xsd:dateTimeStamp) AS ?simplifiedTime)
    }}
    """

    # For each attribute, detect duplicate time values and create a list of changes without doublons
    query2 =  np.query_prefixes + f"""
    INSERT {{
        GRAPH ?g {{
            ?time a addr:TemporalEntity ; addr:hasSimplifiedTime ?st ; addr:hasTrace ?timeTrace .
            ?event a addr:Event ; addr:hasTime ?time .
            ?change a addr:AttributeChange ; addr:appliedTo ?attr ; addr:dependsOn ?event ; addr:isDerivedFrom ?timeDescription .
        }}
    }} WHERE {{
        {{
            SELECT DISTINCT ?g ?attr ?st WHERE {{
                BIND({tmp_named_graph_uri.n3()} AS ?g)
                GRAPH ?g {{ ?attr addr:hasTimeDescription [addr:hasSimplifiedTime ?st] }}
            }}
        }}
        BIND(URI(CONCAT(STR(URI(factoids:)), "TI_", STRUUID())) AS ?time)
        BIND(URI(CONCAT(STR(URI(factoids:)), "EV_", STRUUID())) AS ?event)
        BIND(URI(CONCAT(STR(URI(factoids:)), "CG_", STRUUID())) AS ?change)
        GRAPH ?g {{
            ?attr addr:hasTimeDescription ?timeDescription .
            ?timeDescription addr:hasSimplifiedTime ?st ; addr:hasTime ?timeTrace .
        }}
    }}
    """

    # Order changes
    query3 =  np.query_prefixes + f"""
    INSERT {{
        GRAPH ?g {{ ?cg addr:hasNextChange ?cgBis . }}
    }}
    WHERE {{
        {{
            SELECT ?g ?attr ?t (MIN(?diffTime) AS ?minDiffTime) WHERE {{
                BIND({tmp_named_graph_uri.n3()} AS ?g)
                ?cg addr:appliedTo ?attr ; addr:dependsOn [addr:hasTime ?t].
                ?cgBis addr:appliedTo ?attr ; addr:dependsOn [addr:hasTime ?tBis].
                ?t addr:hasSimplifiedTime ?st .
                ?tBis addr:hasSimplifiedTime ?stBis .
                BIND(?stBis - ?st AS ?diffTime)
                FILTER(?t != ?tBis && ?diffTime > 0)
            }}
            GROUP BY ?g ?attr ?t
        }}
        
        ?cg addr:appliedTo ?attr ; addr:dependsOn [addr:hasTime ?t].
        ?cgBis addr:appliedTo ?attr ; addr:dependsOn [addr:hasTime ?tBis].
        ?t addr:hasSimplifiedTime ?st .
        ?tBis addr:hasSimplifiedTime ?stBis .
        FILTER(?stBis - ?st = ?minDiffTime)
    }}
    """

    # Create fake changes (related to -inf and +inf temporal values) 
    query4 = np.query_prefixes + f"""
    INSERT {{
        GRAPH ?g {{
            ?newEvent a addr:Event .
            ?newChange a addr:AttributeChange ; addr:appliedTo ?attr ; addr:dependsOn ?newEvent .
            ?prevChange addr:hasNextChange ?nextChange .
        }} 
    }} WHERE {{
        {{
            SELECT DISTINCT ?g ?attr ?cg ?firstChangeMissing WHERE {{
                BIND({tmp_named_graph_uri.n3()} AS ?g)
                GRAPH ?g {{ ?cg addr:appliedTo ?attr . }}
                ?attr a addr:Attribute .
                {{
                    FILTER NOT EXISTS {{ ?cg addr:hasNextChange ?x }}
                    BIND("false"^^xsd:boolean AS ?firstChangeMissing)
                }} UNION {{
                    FILTER NOT EXISTS {{ ?x addr:hasNextChange ?cg }}
                    BIND("true"^^xsd:boolean AS ?firstChangeMissing)
                }}
            }}
        }}
        BIND(URI(CONCAT(STR(URI(factoids:)), "CG_", STRUUID())) AS ?newChange)
        BIND(URI(CONCAT(STR(URI(factoids:)), "EV_", STRUUID())) AS ?newEvent)
        BIND(IF(?firstChangeMissing, ?newChange, ?cg)  AS ?prevChange)
        BIND(IF(?firstChangeMissing, ?cg, ?newChange)  AS ?nextChange)
    }}
    """

    # Create versions between two successive changes (one makes effective the version while the other outdates it)
    # Get an explicit triple to have successive changes (`?cg1 addr:hasNextChange ?cg2`)
    query5 = np.query_prefixes + f"""
    INSERT {{
        GRAPH ?g {{
            ?attr addr:hasAttributeVersion ?vers .
            ?vers a addr:AttributeVersion .
            ?cg1 addr:makesEffective ?vers .
            ?cg2 addr:outdates ?vers .
        }}
    }} WHERE {{
        {{
            SELECT DISTINCT ?g ?attr ?cg1 ?cg2 WHERE {{
                BIND({tmp_named_graph_uri.n3()} AS ?g)
                ?attr a addr:Attribute .
                GRAPH ?g {{
                    ?cg1 addr:appliedTo ?attr .
                    ?cg2 addr:appliedTo ?attr .
                    ?cg1 addr:hasNextChange ?cg2 .
                }}
            }}
        }}
        BIND(URI(CONCAT(STR(URI(factoids:)), "AV_", STRUUID())) AS ?vers)
    }}
    """

    # Link existing attribute changes with created one when the are related
    query6 = np.query_prefixes + f"""
    INSERT {{
        GRAPH ?g {{ ?cg addr:hasTrace ?cgTrace . }}
    }} WHERE {{
        BIND({tmp_named_graph_uri.n3()} AS ?g)
        ?attr a addr:Attribute .
        ?cgTrace a addr:AttributeChange .
        GRAPH ?g {{
            ?cg addr:appliedTo ?attr ; addr:isDerivedFrom [addr:hasRelatedElem ?cgTrace] .
        }}
    }}
    """

    # Link existing attribute versions related to changes with created one when the are related
    query7 = np.query_prefixes + f"""
    INSERT {{
        GRAPH ?g {{ ?vers addr:hasTrace ?versTrace . }}
    }}
    WHERE {{
        BIND({tmp_named_graph_uri.n3()} AS ?g)
        ?attr a addr:Attribute .
        ?cgTrace ?changeProp ?versTrace .
        GRAPH ?g {{
            VALUES ?changeProp {{ addr:makesEffective addr:outdates }}
            ?cg a addr:AttributeChange ; addr:appliedTo ?attr ; addr:hasTrace ?cgTrace ; ?changeProp ?vers.
        }}
    }}
    """

    # For each version, get changes which makes effective and outdates it (for query9)
    query8 = np.query_prefixes + f"""
    INSERT {{
        GRAPH ?g {{
            ?versTrace ?changeProp ?cg .
        }}
    }}
    WHERE {{
        BIND({tmp_named_graph_uri.n3()} AS ?g)
        ?attr a addr:Attribute .
        ?versTrace a addr:AttributeVersion .
        GRAPH ?g {{
            VALUES (?timeType ?changeProp) {{ ("start" addr:isMadeEffectiveBy) ("end" addr:isOutdatedBy) }}
            ?cg addr:appliedTo ?attr ; addr:isDerivedFrom [addr:hasRelatedElem ?versTrace ; addr:hasTimeType ?timeType] .
        }}
    }}
    """

    query9 = np.query_prefixes + f"""
    DELETE {{
        GRAPH ?g {{ ?versTrace addr:isMadeEffectiveBy ?cg1 ; addr:isOutdatedBy ?cg2 . }}
    }}
    INSERT {{
        GRAPH ?g {{ ?vers addr:hasTrace ?versTrace }}  
    }} WHERE {{
        BIND({tmp_named_graph_uri.n3()} AS ?g)
        ?attr a addr:Attribute .
        ?versTrace a addr:AttributeVersion .
        GRAPH ?g {{
            ?cg1 addr:appliedTo ?attr .
            ?cg2 addr:appliedTo ?attr .
            ?versTrace addr:isMadeEffectiveBy ?cg1 ; addr:isOutdatedBy ?cg2 .
            {{
                ?cg1 addr:makesEffective ?vers .
            }} UNION {{
                ?cg2 addr:outdates ?vers .
            }} UNION {{
                ?cg1 addr:hasNextChange+ ?c .
                ?c addr:hasNextChange+ ?cg2 .
                ?c addr:makesEffective|addr:outdates ?vers .
            }}
        }}
    }}
    """
    
    queries = [query1, query2, query3, query4, query5, query6, query7, query8, query9]
    for query in queries:
        gd.update_query(query, graphdb_url, repository_name)

gd.remove_named_graph_from_uri(tmp_named_graph_uri)
get_time_description(graphdb_url, facts_repository_name, tmp_named_graph_uri)

In [39]:
msp.transfer_elements_to_roots(graphdb_url, facts_repository_name, facts_named_graph_uri)

### Remove empty versions
* versions not related to any trace
* versions which changes are not related to any trace

In [40]:
query = """
PREFIX addr: <http://rdf.geohistoricaldata.org/def/address#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX factoids: <http://rdf.geohistoricaldata.org/id/address/factoids/>
DELETE {
    ?changeME addr:appliedTo ?attr ; addr:dependsOn ?eventME ; addr:makesEffective ?version ; addr:outdates ?vO .
    ?changeO addr:appliedTo ?attr ; addr:dependsOn ?eventO ; addr:outdates ?version ; addr:makesEffective ?vME.
    ?attr addr:hasAttributeVersion ?version .
    ?version a addr:AttributeVersion .
}
INSERT {
    GRAPH ?g {
    	?change a addr:AttributeChange ; addr:appliedTo ?attr ; addr:dependsOn ?event ; addr:makesEffective ?vME ; addr:outdates ?vO.
        ?event a addr:Event ; addr:hasTimeAfter ?timeME ; addr:hasTimeBefore ?timeO .
        }
}
WHERE {
    {
        SELECT ?g ?attr ?version ?changeME ?changeO WHERE {
            BIND(<http://localhost:7200/repositories/addresses_from_factoids/rdf-graphs/temporary> AS ?g)
            GRAPH ?g {
                ?attr addr:hasAttributeVersion ?version .
                ?version a addr:AttributeVersion .
                ?changeME a addr:AttributeChange ; addr:makesEffective ?version .
                ?changeO a addr:AttributeChange ; addr:outdates ?version .
            }
            FILTER NOT EXISTS {?version addr:hasTrace ?versionTrace .}
            FILTER NOT EXISTS {?changeME addr:hasTrace ?changeMETrace .}
            FILTER NOT EXISTS {?changeO addr:hasTrace ?changeOTrace .}
        } 
    }

    OPTIONAL {
        ?changeME addr:dependsOn ?eventME .
        OPTIONAL { ?eventME  addr:hasTime ?timeME. }
    }
    OPTIONAL {
        ?changeO addr:dependsOn ?eventO .
        OPTIONAL { ?eventO  addr:hasTime ?timeO.}
    }
    OPTIONAL {
        ?changeO addr:makesEffective ?vME .
    }
    OPTIONAL {
        ?changeME addr:outdates ?vO .
    }

    BIND(URI(CONCAT(STR(URI(factoids:)), "EV_", STRUUID())) AS ?event)
    BIND(URI(CONCAT(STR(URI(factoids:)), "CG_", STRUUID())) AS ?change)
}

"""

query = """
PREFIX addr: <http://rdf.geohistoricaldata.org/def/address#>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX factoids: <http://rdf.geohistoricaldata.org/id/address/factoids/>
DELETE {
    ?toRemoveChangeME addr:appliedTo ?attr ; addr:dependsOn ?eventME ; addr:makesEffective ?version ; addr:outdates ?vO .
    ?toRemoveChangeO addr:appliedTo ?attr ; addr:dependsOn ?eventO ; addr:outdates ?version ; addr:makesEffective ?vME.
    ?attr addr:hasAttributeVersion ?version .
    ?version a addr:AttributeVersion .
}
INSERT {
    GRAPH ?g {
    	?change a addr:AttributeChange ; addr:appliedTo ?attr ; addr:dependsOn ?event ; addr:makesEffective ?vME ; addr:outdates ?vO.
        ?event a addr:Event ; addr:hasTimeAfter ?timeME ; addr:hasTimeBefore ?timeO .
        }
}
WHERE {
    {
        SELECT DISTINCT ?g ?attr ?version ?changeME ?changeO ?hasChangeMETrace ?hasChangeOTrace WHERE {
            BIND(<http://localhost:7200/repositories/addresses_from_factoids/rdf-graphs/temporary> AS ?g)
            GRAPH ?g {
                ?attr addr:hasAttributeVersion ?version .
                ?version a addr:AttributeVersion .
                ?changeME a addr:AttributeChange ; addr:makesEffective ?version .
                ?changeO a addr:AttributeChange ; addr:outdates ?version .
            }
            FILTER NOT EXISTS {?version addr:hasTrace ?versionTrace .}
            OPTIONAL { ?changeME addr:hasTrace ?changeMETrace . }
            OPTIONAL { ?changeO addr:hasTrace ?changeOTrace . }
            BIND(IF(BOUND(?changeMETrace), "true"^^xsd:boolean, "false"^^xsd:boolean) AS ?hasChangeMETrace)
            BIND(IF(BOUND(?changeOTrace), "true"^^xsd:boolean, "false"^^xsd:boolean) AS ?hasChangeOTrace)
            FILTER(!(?hasChangeMETrace && ?hasChangeOTrace))
        } 
    }

    ?changeME addr:dependsOn ?eventME .
    ?changeO addr:dependsOn ?eventO .

    BIND(URI(CONCAT(STR(URI(factoids:)), "EV_", STRUUID())) AS ?newEvent)
    BIND(URI(CONCAT(STR(URI(factoids:)), "CG_", STRUUID())) AS ?newChange)

    BIND(IF(!?hasChangeMETrace && !?hasChangeOTrace, ?newChange, IF(!?hasChangeMETrace, ?changeO, ?changeME)) AS ?change)
    BIND(IF(!?hasChangeMETrace && !?hasChangeOTrace, ?newEvent, IF(!?hasChangeMETrace, ?eventO, ?eventME)) AS ?event)

    OPTIONAL {
        ?eventME addr:hasTime ?timeME .
        FILTER(!?hasChangeMETrace)
    }
    OPTIONAL {
        ?eventO addr:hasTime ?timeO .
        FILTER(!?hasChangeOTrace)
    }
    OPTIONAL {
        ?changeO addr:makesEffective ?vME .
        FILTER(!?hasChangeOTrace)
    }
    OPTIONAL {
        ?changeME addr:outdates ?vO .
        FILTER(!?hasChangeMETrace)
    }
    OPTIONAL {
        BIND(?changeME AS ?toRemoveChangeME)
        FILTER(!?hasChangeMETrace)
    }
    OPTIONAL {
        BIND(?changeO AS ?toRemoveChangeO)
        FILTER(!?hasChangeOTrace)
    }
}
"""
gd.update_query(query,graphdb_url, facts_repository_name)

<Response [204]>

In [41]:
# Choisir les versions à fusionner 

def to_be_merged_with(graphdb_url:URIRef, repository_name:str, facts_named_graph_uri:URIRef, inter_sources_name_graph_uri:URIRef, tmp_named_graph_uri:URIRef):

    query1 = np.query_prefixes + f"""
        INSERT {{
            GRAPH ?g {{
                ?vers addr:toBeMergedWith ?vers .
            }}
        }} WHERE {{
            BIND({tmp_named_graph_uri.n3()} AS ?g)
            ?vers a addr:AttributeVersion .
        }}
    """

    query2 = np.query_prefixes + f"""
    INSERT {{
        GRAPH ?gt {{
            ?vME addr:toBeMergedWith ?vO .
            ?vO addr:toBeMergedWith ?vME . 
        }}
    }}
    WHERE {{
        BIND({tmp_named_graph_uri.n3()} AS ?gt)
        ?change a addr:AttributeChange ; addr:makesEffective ?vME ; addr:outdates ?vO .
        FILTER NOT EXISTS {{ ?change addr:hasTrace ?changeTrace . }}
        ?vME addr:hasTrace ?vMETrace .
        ?vO addr:hasTrace ?vOTrace .
        {{ ?vMETrace addr:sameVersionValueAs ?vOTrace . }} UNION {{ FILTER(sameTerm(?vMETrace, ?vOTrace)) }}
        MINUS {{
            ?vME addr:hasTrace ?vMETrace2 .
            ?vO addr:hasTrace ?vOTrace2 .
            ?vMETrace2 addr:differentVersionValueFrom ?vOTrace2 .
        }}
    }}
    """

    # Aggregation of successive versions with similar values (in several queries)
    # Add triples indicating similarity (addr:toBeMergedWith) with successive versions that have similar values (addr:hasNextVersion or addr:hasOverlappingVersion)
    # If v1 addr:toBeMergedWith v2 and v2 addr:toBeMergedWith v3 then v1 addr:toBeMergedWith v3.
    query3 = np.query_prefixes + f"""
        INSERT {{
            GRAPH ?g {{ ?attrVers1 addr:toBeMergedWith ?attrVers2 . }}
        }} WHERE {{
            BIND({tmp_named_graph_uri.n3()} AS ?g)
            ?attrVers1 addr:toBeMergedWith+ ?attrVers2 .
        }}
    """
    
    queries = [query1, query2, query3]
    for query in queries:
        gd.update_query(query, graphdb_url, repository_name)

def merge_attribute_versions_to_be_merged(graphdb_url:URIRef, repository_name:str, facts_named_graph_uri:URIRef, inter_sources_name_graph_uri:URIRef, tmp_named_graph_uri:URIRef):
    """
    It may be more than two versions are similar to each other. To detect all the similar versions, we will associate them with a mergedVal constructed from the URIs of the similar versions.
    So if v1 is similar to v2, v3 and v4, the mergedVal will be ‘uriV1;uriV2;uriV3;uriV4’ where uriVi is the URI of version i. v2, v3 and v4 will have the same mergedVal.
    Triple created will then be <v1 addr:hasMergedVal ‘uriV1;uriV2;uriV3;uriV4’>.
    This step is done with `query3`.
    """
    
    # For each version, we create a value (versMergeVal) which is the fusion of the URIs of versions that are similar.
    query1 = np.query_prefixes + f"""
        INSERT {{
            GRAPH ?g {{ ?vers1 addr:versMergeVal ?versMergeVal }}
        }} WHERE {{
            BIND({tmp_named_graph_uri.n3()} AS ?g)
            {{
                SELECT ?vers1 (GROUP_CONCAT(STR(?vers2) ; separator="|") as ?versMergeVal) WHERE {{
                    ?vers1 addr:toBeMergedWith ?vers2 .
                }}
                GROUP BY ?vers1 ORDER BY ?vers2
            }}
        }}
    """

    # Creation of merged attribute versions
    query2 = np.query_prefixes + """
        INSERT {
            GRAPH ?gf {
                ?attr addr:hasAttributeVersion ?rootAttrVers .
                ?rootAttrVers a addr:AttributeVersion .
            }
            GRAPH ?gt {
                ?rootAttrVers addr:createdFrom ?attrVers .
            }
        }
        WHERE {
            BIND(<http://localhost:7200/repositories/addresses_from_factoids/rdf-graphs/facts> AS ?gf)
            BIND(<http://localhost:7200/repositories/addresses_from_factoids/rdf-graphs/temporary> AS ?gt)
            BIND(URI(CONCAT(STR(URI(facts:)), "AV_", STRUUID())) AS ?rootAttrVers)
            {
                SELECT DISTINCT ?versMergeVal WHERE {
                    BIND(<http://localhost:7200/repositories/addresses_from_factoids/rdf-graphs/temporary> AS ?gt)
                    GRAPH ?gt {
                        ?attrVers a addr:AttributeVersion ; addr:versMergeVal ?versMergeVal .
                    }
                }
            }
            ?attr addr:hasAttributeVersion ?attrVers .
            ?attrVers addr:versMergeVal ?versMergeVal ; addr:hasTrace ?attrVersTrace.
        }
"""

    # Creation of changes between consecutive merged attribute versions
    query3 = np.query_prefixes + """

INSERT {
    GRAPH ?gf {
    	?newChange a addr:AttributeChange ; addr:appliedTo ?attr ; addr:dependsOn ?newEvent ; addr:makesEffective ?vME ; addr:outdates ?vO .
        ?newEvent a addr:Event .
    }
    GRAPH ?gt {
        ?newChange addr:createdFrom ?change .
        ?newEvent addr:createdFrom ?event .
    }
}
WHERE {
    BIND(<http://localhost:7200/repositories/addresses_from_factoids/rdf-graphs/temporary> AS ?gt)
    {
        SELECT * WHERE {
            BIND(<http://localhost:7200/repositories/addresses_from_factoids/rdf-graphs/facts> AS ?gf)
            ?change a addr:AttributeChange .
            {
                ?change addr:makesEffective ?vMETrace ; addr:outdates ?vOTrace .
                ?vME addr:createdFrom ?vMETrace .
                ?vO addr:createdFrom ?vOTrace .
                FILTER(!sameTerm(?vME, ?vO))
            } UNION {
                ?change addr:makesEffective ?vMETrace .
                ?vME addr:createdFrom ?vMETrace .
                FILTER NOT EXISTS { ?change addr:outdates ?vOTrace . }
            } UNION {
                ?change addr:outdates ?vOTrace .
                ?vO addr:createdFrom ?vOTrace .
                FILTER NOT EXISTS { ?change addr:makesEffective ?vMETrace . }
            }
        }
    }
    ?change addr:appliedTo ?attr ; addr:dependsOn ?event .
    BIND(URI(CONCAT(STR(URI(facts:)), "CG_", STRUUID())) AS ?newChange)
	BIND(URI(CONCAT(STR(URI(facts:)), "EV_", STRUUID())) AS ?newEvent)
}

"""

    query4 = np.query_prefixes + """
INSERT {
    GRAPH ?gf {
        ?newTime a addr:CrispTimeInstant .
        ?event ?timeProp ?newTime .
    }
    GRAPH ?gt {
        ?newTime addr:createdFrom ?timeTrace . 
    }
} WHERE { 
    {
        SELECT * WHERE {
            ?event a addr:Event ; addr:createdFrom ?eventTrace .
            {
                BIND(addr:hasTime AS ?timeProp)
                ?eventTrace addr:hasTime ?timeTrace .
            } UNION {
                ?eventTrace ?timeProp ?timeTrace .
                FILTER (?timeProp IN (addr:hasTimeBefore, addr:hasTimeAfter))
                FILTER NOT EXISTS { ?eventTrace addr:hasTime ?time }
            }
        }
    }
    BIND(<http://localhost:7200/repositories/addresses_from_factoids/rdf-graphs/temporary> AS ?gt)
    BIND(<http://localhost:7200/repositories/addresses_from_factoids/rdf-graphs/facts> AS ?gf)
    BIND(URI(CONCAT(STR(URI(facts:)), "TI_", STRUUID())) AS ?newTime)
}
"""

    # Transfer traces from temporary elements to facts ones
    query5 = np.query_prefixes + """
INSERT {
    GRAPH ?gi {
        ?elem addr:hasTrace ?elemTrace .
    }
} WHERE {
	BIND(<http://localhost:7200/repositories/addresses_from_factoids/rdf-graphs/inter_sources> AS ?gi)
    ?elem addr:createdFrom ?tmpElem .
    ?tmpElem addr:hasTrace ?elemTrace .
}
"""

    queries = [query1, query2, query3, query4, query5]
    # queries = [query1, query2, query3, query4]
    for query in queries:
        gd.update_query(query, graphdb_url, repository_name)

    msp.transfer_version_values_to_roots(graphdb_url, repository_name, facts_named_graph_uri)

to_be_merged_with(graphdb_url, facts_repository_name, facts_named_graph_uri, inter_sources_name_graph_uri, tmp_named_graph_uri)
merge_attribute_versions_to_be_merged(graphdb_url, facts_repository_name, facts_named_graph_uri, inter_sources_name_graph_uri, tmp_named_graph_uri)

In [42]:
# Transfer factoid information to facts
msp.transfer_elements_to_roots(graphdb_url, facts_repository_name, facts_named_graph_uri)

# Remove temporary named graph (which is used for construction)
gd.remove_named_graph_from_uri(tmp_named_graph_uri)

<Response [204]>