## Variables

In [None]:
import os
import urllib.parse as up
import uuid

ont_file = "address_ont.ttl"
data_file = "voies-paris-2023.csv"
query_file = "query.txt"
mapping_file = "mapping.json"
export_file = "addresses-temp.ttl"
out_file = "addresses.ttl"

temp_folder = "tmp_files"
source_folder = "sources"
mapping_folder = "mappings"

if not os.path.exists(temp_folder):
    os.makedirs(temp_folder)

export_format = "TURTLE"

project_name = "voies_paris_hist" # This repository must exist in graphdb

ontorefine_url = "http://localhost:7333"
graphdb_url = "http://localhost:7200"

# ontorefine_cmd = "ontorefine-cli"
ontorefine_cmd = "/opt/ontotext-refine/lib/app/bin/ontorefine-cli"
ontorefine_cmd = "/Applications/Ontotext\ Refine.app/Contents/app/bin/ontorefine-cli"

sources = [
    {"filename": "andriveau_1849_am.csv", "mapping_file":"mapping_andriveau_1849_am.json","graphname":"andriveau1849"},
    {"filename": "atlas_municipal_1888_am.csv", "mapping_file":"mapping_atlas_municipal_1888_am.json","graphname":"atlasMunicipal1888"},
    {"filename": "piquet_1826_am.csv", "mapping_file":"mapping_piquet_1826_am.json","graphname":"piquet1826"},
    {"filename": "verniquet_1791_am.csv", "mapping_file":"mapping_verniquet_1791_am.json","graphname":"veriquet1791"},
]

## Functions to create / import graphs

### Create a ttl file in ontorefine from csv file

In [None]:
def get_export_file_from_ontorefine(data_file, mapping_file, export_file):
    # Launch Ontorefine before launching this command
    cmd = f"{ontorefine_cmd} create \"{data_file}\" -u \"{ontorefine_url}\" -n \"{project_name}\""
    msg = os.popen(cmd)

    # Get project_id from message given by CLI
    project_id = msg.read().split(": ")[1].replace("\n", "")

    # Launch Ontorefine before launching this command
    cmd = f"{ontorefine_cmd} rdf \"{project_id}\" -u {ontorefine_url} -m \"{mapping_file}\""
    out_content = os.popen(cmd)
    out_file = open(export_file, "w")
    out_file.write(out_content.read())
    out_file.close()

    # Launch Ontorefine before launching this command
    cmd = f"{ontorefine_cmd} delete \"{project_id}\" -u {ontorefine_url}"
    os.system(cmd)

### Import created ttl file in GraphDB

In [None]:
def import_ttl_file_in_graphdb(graphdb_url, repository_id, ttl_file, graph_name):
    # cmd = f"curl -X POST -H \"Content-Type:application/x-turtle\" -T \"{ttl_file}\" {graphdb_url}/repositories/{repository_id}/statements"
    cmd = f"curl -X POST -H \"Content-Type:application/x-turtle\" -T \"{ttl_file}\" {graphdb_url}/repositories/{repository_id}/rdf-graphs/{graph_name}"

    os.system(cmd)

## Process to create / import data

### Remove all data of repository whose id is `project_name`

In [None]:
cmd = f"curl -X DELETE -H \"Content-Type:application/x-turtle\" {graphdb_url}/repositories/{project_name}/statements"
os.system(cmd)

### Import ontology

In [None]:
abs_ont_file = os.path.abspath(ont_file)
cmd = f"curl -X POST -H \"Content-Type:application/x-turtle\" -T \"{abs_ont_file}\" {graphdb_url}/repositories/{project_name}/statements"

os.system(cmd)

### Import data created by Ontorefine from csv files in `sources_folder`

In [None]:
for source in sources:
    data_file = os.path.join(source_folder, source["filename"])
    abs_data_file = os.path.abspath(data_file)
    mapping_file = os.path.abspath(os.path.join(mapping_folder, source["mapping_file"]))
    graph_name = source["graphname"]
    export_file = os.path.join(temp_folder, source["filename"]).replace(".csv",".ttl")
    get_export_file_from_ontorefine(abs_data_file, mapping_file, export_file)
    import_ttl_file_in_graphdb(graphdb_url, project_name, export_file, graph_name)