In [1]:
import pandas as pd
import re
from jinja2 import Template
from pathlib import Path
import os

COLUMNS = ["class", # this column indicates what class is the diffed instance
           "property", # this column indicates the property of the instance  (i.e. the relation connecteing instance URI to the value)
           "object property", # this column provides possibility to perform two level diff, needed in case of reified structures;
           "modifiable", # this column indicates (1) whether a query for update operation shall be generated 
           "language dependent" # this column indicates (1) whether the modification values need to be distinguished by language
          ]

NA_VALUES = {"class":"","property":"","object property":"","modifiable":0,"language dependent":0}

SKOS_CORE = "skos_core.csv"
SRC_AP = "src_ap.csv"
OUTPUT_FOLDER = "output/"

# SPARQL template setup

In [7]:
PREFIXES = """
# ###################
# SPARQL query generated automatically with 
# https://github.com/eu-vocabularies/skos-history-query-generator
#
# This file SHOULD NOT be edited by hand. 
#
# If you have any suggestions or find any issues please report them at
# https://github.com/eu-vocabularies/skos-history-query-generator/issues
# 
# ###################

# basic namespaces
prefix owl: <http://www.w3.org/2002/07/owl#> 
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> 
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> 
prefix xsd: <http://www.w3.org/2001/XMLSchema#> 

# EuroVoc and SKOS-AP/SRC-AP specific namespaces
prefix vb: <http://art.uniroma2.it/ontologies/vocbench#> 
prefix eurovoc: <http://eurovoc.europa.eu/>
prefix euvoc: <http://publications.europa.eu/ontology/euvoc#> 

prefix label: <http://publications.europa.eu/resource/authority/label-type/> 
prefix status: <http://publications.europa.eu/resource/authority/concept-status/> 
prefix align: <http://knowledgeweb.semanticweb.org/heterogeneity/alignment#>

# some popular namespaces
prefix skos: <http://www.w3.org/2004/02/skos/core#> 
prefix skosxl: <http://www.w3.org/2008/05/skos-xl#> 
prefix dct: <http://purl.org/dc/terms/> 
prefix dc: <http://purl.org/dc/elements/1.1/> 
prefix prov: <http://www.w3.org/ns/prov#> 

prefix lemon: <http://lemon-model.net/lemon#> 
prefix lexinfo: <http://www.lexinfo.net/ontology/2.0/lexinfo#> 
prefix lexvo: <http://lexvo.org/ontology#> 

# legacy namespaces
prefix oldevo: <http://eurovoc.europa.eu/schema#> 
prefix thes: <http://purl.org/iso25964/skos-thes#>

# versioning namespaces 
PREFIX dsv: <http://purl.org/iso25964/DataSet/Versioning#>
PREFIX sd: <http://www.w3.org/ns/sparql-service-description#>
PREFIX sh: <http://purl.org/skos-history/>
PREFIX xhv: <http://www.w3.org/1999/xhtml/vocab#>
"""

INSERTED_INSTANCES_QUERY = """
#
# identify instances inserted with a certain version
#
SELECT distinct ?class (?instance AS ?addedInstance) (str(?prefLabel) AS ?addedInstanceLabel)
WHERE {
  GRAPH ?versionHistoryGraph {
    # parameters
    VALUES ( ?versionHistoryGraph ?oldVersion ?newVersion ?class ?language ) {
        ( {% if versionHistoryGraph is defined and versionHistoryGraph != None %}{{ versionHistoryGraph }} {% else %}undef{% endif %} 
          {% if oldVersion is defined and oldVersion != None %}{{ oldVersion }} {% else %}undef{% endif %} 
          {% if newVersion is defined and newVersion != None %}{{ newVersion }} {% else %}undef{% endif %}  
          {% if cls is defined and cls != None %}{{ cls }} {% else %}undef{% endif %}
          {% if lang is defined and lang != None %}"{{ lang }}"{% else %}undef{% endif %}
        )
    }
    # get the current and the previous version as default versions
    ?versionset dsv:currentVersionRecord/xhv:prev/dc:identifier ?previousVersion .
    ?versionset dsv:currentVersionRecord/dc:identifier ?latestVersion .
    # select the versions to actually use
    BIND(coalesce(?oldVersion, ?previousVersion) AS ?oldVersionSelected)
    BIND(coalesce(?newVersion, ?latestVersion) AS ?newVersionSelected)
    # get the delta and via that the relevant graphs
    ?delta a sh:SchemeDelta ;
      sh:deltaFrom/dc:identifier ?oldVersionSelected ;
      sh:deltaTo/dc:identifier ?newVersionSelected ;
      sh:deltaFrom/sh:usingNamedGraph/sd:name ?oldVersionGraph ;
      sh:deltaTo/sh:usingNamedGraph/sd:name ?newVersionGraph .
    ?insertions a sh:SchemeDeltaInsertions ;
      dct:isPartOf ?delta ;
      sh:usingNamedGraph/sd:name ?insertionsGraph .
    ?deletions a sh:SchemeDeltaDeletions ;
      dct:isPartOf ?delta ;
      sh:usingNamedGraph/sd:name ?deletionsGraph .
  }
  # for each inserted instance a class statement should exist ...
  GRAPH ?insertionsGraph {
    ?instance a ?class .
    optional {
      ?instance skos:prefLabel ?prefLabel .
      {% if lang is defined and lang != None %}
      # restrict output to a certain language
      FILTER (lang(?prefLabel) = "{{ lang }}")
      {% endif %}
    }
  }
  # ... and the instance must not exist in the old version
  FILTER NOT EXISTS {
    GRAPH ?oldVersionGraph {
      ?instance ?p [] .
    }
  }
}
ORDER BY ?instance
"""

DELETED_INSTANCES_QUERY = """
#
# identify concepts deleted with a certain version
#
# (In published vocabularies, the deletion of concepts should be regarded
# as bad practice, because they still may be referenced elsewhere.
# Cosider using owl:deprecated instead)
#
SELECT distinct ?class (?instance AS ?deletedInstance) (str(?prefLabel) AS ?deletedInstanceLabel)
WHERE {
  GRAPH ?versionHistoryGraph {
    # parameters
    VALUES ( ?versionHistoryGraph ?oldVersion ?newVersion ?class ?language ) {
        ( {% if versionHistoryGraph is defined and versionHistoryGraph != None %}{{ versionHistoryGraph }} {% else %}undef{% endif %} 
          {% if oldVersion is defined and oldVersion != None %}{{ oldVersion }} {% else %}undef{% endif %} 
          {% if newVersion is defined and newVersion != None %}{{ newVersion }} {% else %}undef{% endif %} 
          {% if cls is defined and cls != None %}{{ cls }} {% else %}undef{% endif %}
          {% if lang is defined and lang != None %}"{{ lang }}"{% else %}undef{% endif %}
        )
    }
    # get the current and the previous version as default versions
    ?versionset dsv:currentVersionRecord/xhv:prev/dc:identifier ?previousVersion .
    ?versionset dsv:currentVersionRecord/dc:identifier ?latestVersion .
    # select the versions to actually use
    BIND(coalesce(?oldVersion, ?previousVersion) AS ?oldVersionSelected)
    BIND(coalesce(?newVersion, ?latestVersion) AS ?newVersionSelected)
    # get the delta and via that the relevant graphs
    ?delta a sh:SchemeDelta ;
      sh:deltaFrom/dc:identifier ?oldVersionSelected ;
      sh:deltaTo/dc:identifier ?newVersionSelected ;
      sh:deltaFrom/sh:usingNamedGraph/sd:name ?oldVersionGraph ;
      sh:deltaTo/sh:usingNamedGraph/sd:name ?newVersionGraph .
    ?insertions a sh:SchemeDeltaInsertions ;
      dct:isPartOf ?delta ;
      sh:usingNamedGraph/sd:name ?insertionsGraph .
    ?deletions a sh:SchemeDeltaDeletions ;
      dct:isPartOf ?delta ;
      sh:usingNamedGraph/sd:name ?deletionsGraph .
  }
  # for each deleted instance, a deleted instiation statement must exist
  GRAPH ?deletionsGraph {
    ?instance a ?class .
  }
  # ... without anything remaining for this instance in the current version
  FILTER NOT EXISTS {
    GRAPH ?newVersionGraph {
      ?instance ?p []
    }
  }
  # get the label
  GRAPH ?oldVersionGraph {
    ?instance a ?class .
    {
        ?concept skos:prefLabel ?prefLabel .
        {% if lang is defined and lang != None %}
        # restrict output to a certain language
        FILTER (lang(?prefLabel) = "{{ lang }}")
        {% endif %}
    }
  }
}
ORDER BY ?instance
"""

INSERTED_DATA_PROPERTIES_QUERY = """
#
# Show all added instance/property statements in the current version
#
SELECT DISTINCT ?instance (str(?prefLabel) as ?instanceLabel) ?property (?value AS ?addedValue)
WHERE {
  GRAPH ?versionHistoryGraph {
    # parameters
    VALUES ( ?versionHistoryGraph ?oldVersion ?newVersion ?class ?property ?language) {
      ( {% if versionHistoryGraph is defined and versionHistoryGraph != None %}{{ versionHistoryGraph }} {% else %}undef{% endif %} 
        {% if oldVersion is defined and oldVersion != None %}{{ oldVersion }} {% else %}undef{% endif %} 
        {% if newVersion is defined and newVersion != None %}{{ newVersion }} {% else %}undef{% endif %} 
        {% if cls is defined and cls != None %}{{ cls }} {% else %}undef{% endif %} 
        {% if prop is defined and prop != None %}{{ prop }} {% else %}undef{% endif %}
        {% if lang is defined and lang != None %}"{{ lang }}"{% else %}undef{% endif %}
      )
    }
    # get the current and the previous version as default versions
    ?versionset dsv:currentVersionRecord/xhv:prev/dc:identifier ?previousVersion .
    ?versionset dsv:currentVersionRecord/dc:identifier ?latestVersion .
    # select the versions to actually use
    BIND(coalesce(?oldVersion, ?previousVersion) AS ?oldVersionSelected)
    BIND(coalesce(?newVersion, ?latestVersion) AS ?newVersionSelected)
    # get the delta and via that the relevant graphs
    ?delta a sh:SchemeDelta ;
      sh:deltaFrom/dc:identifier ?oldVersionSelected ;
      sh:deltaTo/dc:identifier ?newVersionSelected ;
      sh:deltaFrom/sh:usingNamedGraph/sd:name ?oldVersionGraph ;
      sh:deltaTo/sh:usingNamedGraph/sd:name ?newVersionGraph ;
      dct:hasPart ?insertions ;
      dct:hasPart ?deletions .
    ?deletions a sh:SchemeDeltaDeletions ;
      sh:usingNamedGraph/sd:name ?deletionsGraph .
    ?insertions a sh:SchemeDeltaInsertions ;
      sh:usingNamedGraph/sd:name ?insertionsGraph .
  }
  # get all property values
  GRAPH ?insertionsGraph {
    [] ?property ?value .
  }
  # ... which were not attached to some (other) instance before
  FILTER NOT EXISTS {
    GRAPH ?deletionsGraph {
      [] ?property ?value .
    }
  }  
  GRAPH ?newVersionGraph {
    # get instances with those property values
    ?instance a ?class .
    ?instance ?property ?value .
    optional {
      ?instance skos:prefLabel ?prefLabel .
      {% if lang is defined and lang != None %}
      #restrict prefLabel to a certain language
      FILTER (lang(?prefLabel) = "{{ lang }}")
      {% endif %}
    }
  }
}
ORDER BY ?instance ?value
"""

DELETED_DATA_PROPERTIES_QUERY = """
#
# Show all deleted instance/property statements in the current version
#
SELECT DISTINCT ?instance (str(?prefLabel) as ?instanceLabel) ?property (?value AS ?deletedValue)
WHERE {
  GRAPH ?versionHistoryGraph {
    # parameters
    VALUES ( ?versionHistoryGraph ?oldVersion ?newVersion ?class ?property ?language) {
      ( {% if versionHistoryGraph is defined and versionHistoryGraph != None %}{{ versionHistoryGraph }} {% else %}undef{% endif %} 
        {% if oldVersion is defined and oldVersion != None %}{{ oldVersion }} {% else %}undef{% endif %} 
        {% if newVersion is defined and newVersion != None %}{{ newVersion }} {% else %}undef{% endif %} 
        {% if cls is defined and cls != None %}{{ cls }} {% else %}undef{% endif %} 
        {% if prop is defined and prop != None %}{{ prop }} {% else %}undef{% endif %}
        {% if lang is defined and lang != None %}"{{ lang }}"{% else %}undef{% endif %}
      )
    }
    # get the current and the previous version as default versions
    ?versionset dsv:currentVersionRecord/xhv:prev/dc:identifier ?previousVersion .
    ?versionset dsv:currentVersionRecord/dc:identifier ?latestVersion .
    # select the versions to actually use
    BIND(coalesce(?oldVersion, ?previousVersion) AS ?oldVersionSelected)
    BIND(coalesce(?newVersion, ?latestVersion) AS ?newVersionSelected)
    # get the delta and via that the relevant graphs
    ?delta a sh:SchemeDelta ;
      sh:deltaFrom/dc:identifier ?oldVersionSelected ;
      sh:deltaTo/dc:identifier ?newVersionSelected ;
      sh:deltaFrom/sh:usingNamedGraph/sd:name ?oldVersionGraph ;
      sh:deltaTo/sh:usingNamedGraph/sd:name ?newVersionGraph ;
      dct:hasPart ?insertions ;
      dct:hasPart ?deletions .
    ?deletions a sh:SchemeDeltaDeletions ;
      sh:usingNamedGraph/sd:name ?deletionsGraph .
    ?insertions a sh:SchemeDeltaInsertions ;
      sh:usingNamedGraph/sd:name ?insertionsGraph .
  }
  # get all deleted propoerty values
  GRAPH ?deletionsGraph {
    [] ?property ?value .
  }
  # ... which were not attached to some (other) instance now
  FILTER NOT EXISTS {
    GRAPH ?insertionsGraph {
      [] ?property ?value .
    }
  }  
  GRAPH ?oldVersionGraph {
    # get instances 
    ?instance a ?class .
    ?instance ?property ?value .
    optional {
      ?instance skos:prefLabel ?prefLabel .
      {% if lang is defined and lang != None %}
      #restrict prefLabel to a certain language
      FILTER (lang(?prefLabel) = "{{ lang }}")
      {% endif %}
    }
  }
}
ORDER BY ?instance ?value
"""

INSERTED_REIFIED_STRUCTURE_QUERY = """
#
# Show all added instance/property/object property statements in the current version
#
SELECT DISTINCT ?instance (str(?prefLabel) as ?instanceLabel) ?property ?objProperty (?value AS ?addedValue)
WHERE {
  GRAPH ?versionHistoryGraph {
    # parameters
    VALUES ( ?versionHistoryGraph ?oldVersion ?newVersion ?class ?property ?objProperty ?language) {
      ( {% if versionHistoryGraph is defined and versionHistoryGraph != None %}{{ versionHistoryGraph }} {% else %}undef{% endif %} 
        {% if oldVersion is defined and oldVersion != None %}{{ oldVersion }}{% else %}undef{% endif %} 
        {% if newVersion is defined and newVersion != None %}{{ newVersion }}{% else %}undef{% endif %} 
        {% if cls is defined and cls != None %}{{ cls }}{% else %}undef{% endif %} 
        {% if prop is defined and prop != None %}{{ prop }}{% else %}undef{% endif %}
        {% if obj_prop is defined and obj_prop != None %}{{ obj_prop }}{% else %}undef{% endif %}
        {% if lang is defined and lang != None %}"{{ lang }}"{% else %}undef{% endif %}
      )
    }
    # get the current and the previous version as default versions
    ?versionset dsv:currentVersionRecord/xhv:prev/dc:identifier ?previousVersion .
    ?versionset dsv:currentVersionRecord/dc:identifier ?latestVersion .
    # select the versions to actually use
    BIND(coalesce(?oldVersion, ?previousVersion) AS ?oldVersionSelected)
    BIND(coalesce(?newVersion, ?latestVersion) AS ?newVersionSelected)
    # get the delta and via that the relevant graphs
    ?delta a sh:SchemeDelta ;
      sh:deltaFrom/dc:identifier ?oldVersionSelected ;
      sh:deltaTo/dc:identifier ?newVersionSelected ;
      sh:deltaFrom/sh:usingNamedGraph/sd:name ?oldVersionGraph ;
      sh:deltaTo/sh:usingNamedGraph/sd:name ?newVersionGraph ;
      dct:hasPart ?insertions ;
      dct:hasPart ?deletions .
    ?deletions a sh:SchemeDeltaDeletions ;
      sh:usingNamedGraph/sd:name ?deletionsGraph .
    ?insertions a sh:SchemeDeltaInsertions ;
      sh:usingNamedGraph/sd:name ?insertionsGraph .
  }
  # get all reified structures
  GRAPH ?insertionsGraph {
    [] ?property ?object .
    ?object ?objProperty ?value .
  }
  # ... which were not attached to some (other) instance before
  FILTER NOT EXISTS {
    GRAPH ?deletionsGraph {
        [] ?property ?object .
        ?object ?objProperty ?value .
    }
  }  
  GRAPH ?newVersionGraph {
    # get instances having that reified structure
    ?instance a ?class .
    ?instance ?property ?object .
    ?object ?objProperty ?value .
    optional {
      ?instance skos:prefLabel ?prefLabel .
      {% if lang is defined and lang != None %}
      #restrict prefLabel to a certain language
      FILTER (lang(?prefLabel) = "{{ lang }}")
      {% endif %}
    }
  }
}
ORDER BY ?instance ?value
"""

DELETED_REIFIED_STRUCTURE_QUERY = """
#
# Show all deleted instance/property/object property statements in the current version
#
SELECT DISTINCT ?instance (str(?prefLabel) as ?instanceLabel) ?property ?objProperty (?value AS ?deletedValue)
WHERE {
  GRAPH ?versionHistoryGraph {
    # parameters
    VALUES ( ?versionHistoryGraph ?oldVersion ?newVersion ?class ?property ?objProperty ?language) {
      ( {% if versionHistoryGraph is defined and versionHistoryGraph != None %}{{ versionHistoryGraph }} {% else %}undef{% endif %} 
        {% if oldVersion is defined and oldVersion != None %}{{ oldVersion }} {% else %}undef{% endif %} 
        {% if newVersion is defined and newVersion != None %}{{ newVersion }} {% else %}undef{% endif %} 
        {% if cls is defined and cls != None %}{{ cls }} {% else %}undef{% endif %} 
        {% if prop is defined and prop != None %}{{ prop }} {% else %}undef{% endif %}
        {% if obj_prop is defined and obj_prop != None %}{{ obj_prop }}{% else %}undef{% endif %}
        {% if lang is defined and lang != None %}"{{ lang }}"{% else %}undef{% endif %}
      )
    }
    # get the current and the previous version as default versions
    ?versionset dsv:currentVersionRecord/xhv:prev/dc:identifier ?previousVersion .
    ?versionset dsv:currentVersionRecord/dc:identifier ?latestVersion .
    # select the versions to actually use
    BIND(coalesce(?oldVersion, ?previousVersion) AS ?oldVersionSelected)
    BIND(coalesce(?newVersion, ?latestVersion) AS ?newVersionSelected)
    # get the delta and via that the relevant graphs
    ?delta a sh:SchemeDelta ;
      sh:deltaFrom/dc:identifier ?oldVersionSelected ;
      sh:deltaTo/dc:identifier ?newVersionSelected ;
      sh:deltaFrom/sh:usingNamedGraph/sd:name ?oldVersionGraph ;
      sh:deltaTo/sh:usingNamedGraph/sd:name ?newVersionGraph ;
      dct:hasPart ?insertions ;
      dct:hasPart ?deletions .
    ?deletions a sh:SchemeDeltaDeletions ;
      sh:usingNamedGraph/sd:name ?deletionsGraph .
    ?insertions a sh:SchemeDeltaInsertions ;
      sh:usingNamedGraph/sd:name ?insertionsGraph .
  }
  # get all deleted reified structures
  GRAPH ?deletionsGraph {
    [] ?property ?value .
  }
  # ... which were not attached to some (other) instance now
  FILTER NOT EXISTS {
    GRAPH ?insertionsGraph {
      [] ?property ?object .
      ?object ?objProperty ?value .
    }
  }  
  GRAPH ?oldVersionGraph {
    # get instances having that reified structure
    ?instance a ?class .
    ?instance ?property ?object .
    ?object ?objProperty ?value .
    optional {
      ?instance skos:prefLabel ?prefLabel .
      {% if lang is defined and lang != None %}
      #restrict prefLabel to a certain language
      FILTER (lang(?prefLabel) = "{{ lang }}")
      {% endif %}
    }
  }
}
ORDER BY ?instance ?value
"""

# inserted_instances_template = Template(PREFIXES+INSERTED_INSTANCES_QUERY)
# inserted_instances_template.stream(cls='skos:Concept', lang="en", versionHistoryGraph=None,oldVersion=None, newVersion=None).dump(OUTPUT_FOLDER+"temp.rq")

# deleted_instances_template = Template(PREFIXES+DELETED_INSTANCES_QUERY)
# deleted_instances_template.stream(cls='skos:Concept', lang="en", versionHistoryGraph=None,oldVersion=None, newVersion=None).dump(OUTPUT_FOLDER+"temp.rq")

# inserted_data_property_template = Template(PREFIXES+INSERTED_DATA_PROPERTIES_QUERY)
# inserted_data_property_template.stream(cls='skos:Concept', prop="skos:altLabel" , lang="en", versionHistoryGraph=None,oldVersion=None, newVersion=None).dump(OUTPUT_FOLDER+"temp.rq")

# deleted_data_property_template = Template(PREFIXES+DELETED_DATA_PROPERTIES_QUERY)
# deleted_data_property_template.stream(cls='skos:Concept', prop="skos:broader" , lang="en", versionHistoryGraph=None,oldVersion=None, newVersion=None).dump(OUTPUT_FOLDER+"temp.rq")

# inserted_reified_structure_template = Template(PREFIXES+INSERTED_REIFIED_STRUCTURE_QUERY)
# inserted_reified_structure_template.stream(cls='skos:Concept', prop="skosxl:altLabel", obj_prop="skosxl:literalValue" , lang="en", versionHistoryGraph=None,oldVersion=None, newVersion=None).dump(OUTPUT_FOLDER+"temp.rq")

# deleted_reified_structure_template = Template(PREFIXES+DELETED_REIFIED_STRUCTURE_QUERY)
# deleted_reified_structure_template.stream(cls='skos:Concept', prop="skosxl:altLabel", obj_prop="skosxl:literalValue" , lang="en", versionHistoryGraph=None,oldVersion=None, newVersion=None).dump(OUTPUT_FOLDER+"temp.rq")


# Generating the SPARQL queries
Generate from setup CSV files SPARQL queries to be used with SKOS-history diff tool.

In [8]:
def camel_case_split(identifier):
    """
        detects camel case components in a string and retouns a list of them.
        nice solution from: https://stackoverflow.com/questions/29916065/how-to-do-camelcase-split-in-python
        
    """
    matches = re.finditer('.+?(?:(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])|$)', identifier)
    return [m.group(0) for m in matches]

def make_query_file_name(output_folder, operation,cls,prop=None,obj_prop=None):
    """ takes a prefix with operation and a short RDF URI notation (prefix:name) and returns a filename"""
    base = Path(output_folder)
    base.resolve()
    try:
        c = str(cls).split(":")[1]
        c = str.lower("_".join(camel_case_split(c)))
    except:
        c = "dummy_class"
    try:
        p = str(prop).split(":")[1]
        p = str.lower("_".join(camel_case_split(p)))
    except:
        p = None
    try:
        op = str(obj_prop).split(":")[1]
        op = str.lower("_".join(camel_case_split(op)))
    except:
        op = None
    if p:
        if op:
            f = f"{str.lower(operation)}_{c}_{p}_{op}.rq"
        else:
            f = f"{str.lower(operation)}_{c}_{p}.rq"
    else: 
        f= f"{str.lower(operation)}_{c}.rq"
    f = base / f 
    f.resolve()
    return str(f)

def generate_class_level_queries(pd, output_folder=OUTPUT_FOLDER):
    """
        generate queries to check for insertions and deletions for each class in the configuration CSV.  
    """
    inserted_instances_template = Template(PREFIXES+INSERTED_INSTANCES_QUERY)
    deleted_instances_template = Template(PREFIXES+DELETED_INSTANCES_QUERY)
    for cls in pd["class"].unique():        
        inserted_instances_template.stream(cls=cls, 
                                           lang="en", 
                                           versionHistoryGraph=None,
                                           oldVersion=None, 
                                           newVersion=None).dump(make_query_file_name(output_folder,
                                                                                      "added_instance",
                                                                                      cls)
                                                                )
        deleted_instances_template.stream(cls=cls, 
                                          lang="en", 
                                          versionHistoryGraph=None,
                                          oldVersion=None, 
                                          newVersion=None).dump(make_query_file_name(output_folder,
                                                                                     "deleted_instance",
                                                                                     cls)
                                                               )

def generate_property_level_queries(pd, output_folder=OUTPUT_FOLDER):
    """
        generate queries to check for insertions and deletions of data properties and their values for each instance in the configuration CSV.  
    """
    inserted_data_property_template = Template(PREFIXES+INSERTED_DATA_PROPERTIES_QUERY)
    deleted_data_property_template = Template(PREFIXES+DELETED_DATA_PROPERTIES_QUERY)
    for index, row in pd.iterrows(): 
        if not row["object property"]:
            inserted_data_property_template.stream(cls=row["class"], 
                                                   prop=row["property"] , 
                                                   lang="en", 
                                                   versionHistoryGraph=None,
                                                   oldVersion=None, 
                                                   newVersion=None).dump(make_query_file_name(output_folder,
                                                                                              "added_property",
                                                                                              row["class"],
                                                                                              row["property"])
                                                                        )
            deleted_data_property_template.stream(cls=row["class"], 
                                                   prop=row["property"] , 
                                                   lang="en", 
                                                   versionHistoryGraph=None,
                                                   oldVersion=None, 
                                                   newVersion=None).dump(make_query_file_name(output_folder,
                                                                                              "deleted_property",
                                                                                              row["class"],row["property"])
                                                                        )

def generate_reified_structure_queries(pd, output_folder=OUTPUT_FOLDER):
    """
        generate queries to check for insertions and deletions of reified structures for each instance in the configuration CSV.  
    """
    inserted_reified_structure_template = Template(PREFIXES+INSERTED_REIFIED_STRUCTURE_QUERY)
    deleted_reified_structure_template = Template(PREFIXES+DELETED_REIFIED_STRUCTURE_QUERY)
    for index, row in pd.iterrows():        
        if row["object property"]:
            inserted_reified_structure_template.stream(cls=row["class"], 
                                                   prop=row["property"], 
                                                   obj_prop=row["object property"],
                                                   lang="en", 
                                                   versionHistoryGraph=None,
                                                   oldVersion=None, 
                                                   newVersion=None).dump(make_query_file_name(output_folder,"added_reified",
                                                                                                            row["class"],
                                                                                                            row["property"], 
                                                                                                            row["object property"])
                                                                        )
            deleted_reified_structure_template.stream(cls=row["class"], 
                                                   prop=row["property"] , 
                                                   obj_prop=row["object property"],
                                                   lang="en", 
                                                   versionHistoryGraph=None,
                                                   oldVersion=None, 
                                                   newVersion=None).dump(make_query_file_name(output_folder,"deleted_reified",
                                                                                                            row["class"],
                                                                                                            row["property"], 
                                                                                                            row["object property"])
                                                                        )


def generate_from_csv(config_csv, output_base_dir=OUTPUT_FOLDER):
    """
        generates a set of diff queries from the configuration CSV
    """    
    output = Path(output_base_dir) / Path(config_csv).stem #os.path.splitext(os.path.basename(config_csv))[0]
    output.mkdir(parents=True, exist_ok=True)
    
    df = pd.read_csv(config_csv).fillna(NA_VALUES)
    
    generate_class_level_queries(df, output_folder=output)
    generate_property_level_queries(df, output_folder=output)
    generate_reified_structure_queries(df, output_folder=output)

In [9]:
generate_from_csv(SKOS_CORE)
generate_from_csv(SRC_AP)

## TODO

* if the configuration statement has an object propoerty do not generate propoerty query but only a reified query
* return only one result per class instance for the deleted instances i.e. do not multiply the result set with the prefLabels
* support modification operation for any propeorty that is guaranteed/expected to have cardinality (1,1) or (0,1) i.e. max 1 (use modifiable flag in CSV)
* distinguish modifications based on language where needed (use languange depenent flag in CSV)