In [1]:
import sys
!{sys.executable} -m pip install rdflib==7.0.0 yatter==1.1.0 morph-kgc==2.6.3

Collecting yatter==1.1.0
  Downloading yatter-1.1.0-py3-none-any.whl (21 kB)
Collecting morph-kgc==2.6.3
  Downloading morph_kgc-2.6.3-py3-none-any.whl.metadata (10 kB)
Collecting coloredlogs>=15.0.1 (from yatter==1.1.0)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl (46 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.0/46.0 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting duckdb<0.8.0,>=0.7.0 (from morph-kgc==2.6.3)
  Downloading duckdb-0.7.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (15.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.2/15.2 MB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting elementpath<5.0.0,>=4.0.0 (from morph-kgc==2.6.3)
  Downloading elementpath-4.1.5-py3-none-any.whl.metadata (6.7 kB)
Collecting falcon<4.0.0,>=3.0.0 (from morph-kgc==2.6.3)
  Downloading falcon-3.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.1 MB)
[2K     [90m━━

In [2]:
from os import getcwd
from os.path import join, dirname
folder = dirname(getcwd())

In [3]:
from rdflib import Graph

# Load RDF data from file
g = Graph()
rdf_file = join(folder, "rdf/digital-competence-framework-skos-ap-eu.rdf")
g.parse(rdf_file, format="xml")

# SPARQL query
sparql_query = """
    PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
    PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
    PREFIX dc: <http://purl.org/dc/elements/1.1/>
    
    SELECT DISTINCT ?label ?id
    WHERE {
      ?s skos:prefLabel ?label ;
          dc:identifier ?id .
      FILTER (!CONTAINS(?label, 'Level'))
    }
    ORDER BY ?label
"""

# Execute the SPARQL query
results = g.query(sparql_query)

# Print the query results
for row in results:
    label = str(row.label)[:3].strip()
    if len(label) == 2:
        label = label[:1]
    print(f"{label},{row.id}")

1,8b23nt41l4
1.1,9i6ta81748
1.2,3h9eab1489
1.3,4s23kdq383
2,2k3209g1wc
2.1,8ys96p08p4
2.2,9a5d39t62b
2.3,1dz4n8r554
2.4,y2s2861s9p
2.5,99vvo91542
2.6,7d4t7002v7
3,4b465z8p2e
3.1,u69o196gu6
3.2,sq0c9q6116
3.3,2mi55kl065
3.4,0690r7o8r2
4,1fz48677f4
4.1,aa985fr035
4.2,1b043jw49b
4.3,mg54my1580
4.4,o9e8s72z21
5,86xdxt8620
5.1,13z458t54u
5.2,00b51s9rc5
5.3,3bgx2a6278
5.4,860966ekgo


In [4]:
import yatter
from ruamel.yaml import YAML

# YARRRML to RML
yaml = YAML(typ='safe', pure=True)
mappings_file = join(folder, "mappings/competence-kg-yarrrml.yml")
rml_content = yatter.translate(yaml.load(open(mappings_file)))

# Print generated RML triples
# print(rml_content)

# Save RML to file
rml_file = join(folder, "mappings/competence-kg-rml.ttl")
with open(rml_file, "w") as rml_output:
    rml_output.write(rml_content)

2023-11-10 17:59:42,392 | INFO: Translating YARRRML mapping to [R2]RML
2023-11-10 17:59:42,393 | INFO: RML content is created!
2023-11-10 17:59:42,400 | INFO: Mapping has been syntactically validated.
2023-11-10 17:59:42,401 | INFO: Translation has finished successfully.


In [5]:
import morph_kgc

config = """      
            [Competences]
            mappings: ./../mappings/competence-kg-rml.ttl
         """
# Generate the triples and load them to an RDFLib graph
g_rdflib = morph_kgc.materialize(config)

mapping_file = join(folder, "mappings/competence-kg-rml.ttl")
g_mappings = Graph()
g_mappings.parse(mapping_file, format='ttl')

for ns_prefix, namespace in g_mappings.namespaces():
    g_rdflib.bind(ns_prefix, namespace)

# Save to file
output_file = join(folder, "rdf/knowledge-graph.ttl")
g_rdflib.serialize(destination=output_file, format='ttl', encoding="utf-8")

2023-11-10 17:59:42,784 | INFO: Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
2023-11-10 17:59:42,784 | INFO: NumExpr defaulting to 8 threads.
2023-11-10 17:59:43,358 | DEBUG: CONFIGURATION: {'output_file': 'knowledge-graph', 'na_values': ',nan', 'safe_percent_encoding': '', 'read_parsed_mappings_path': '', 'write_parsed_mappings_path': '', 'mapping_partitioning': 'PARTIAL-AGGREGATIONS', 'logging_file': '', 'oracle_client_lib_dir': '', 'oracle_client_config_dir': '', 'udfs': '', 'output_dir': '', 'output_format': 'N-TRIPLES', 'only_printable_chars': 'no', 'infer_sql_datatypes': 'no', 'logging_level': 'INFO', 'number_of_processes': '32'}
2023-11-10 17:59:43,358 | DEBUG: DATA SOURCE `Competences`: {'mappings': './../mappings/competence-kg-rml.ttl'}
  fnml_df = fnml_df.applymap(str)
  self.rml_df.at[i, 'source_type'] = file_extension.upper()
2023-11-10 17:59:43,997 | DEBUG: Removed self-join from mapping rule `#TM6`.
2023-11-10 17:59:44,0

<Graph identifier=Nda605a01fa6a450ba1a78d068cc3cc9a (<class 'rdflib.graph.Graph'>)>

In [6]:
q_res = g_rdflib.query('SELECT (COUNT(*) AS ?num_triples) WHERE { ?s ?p ?o }')

# Print the query results
for row in q_res:
    print(f"Number of triples generated: {row.num_triples}")

Number of triples generated: 1813


In [7]:
g_enriched = Graph()
ontology_file = join(folder, "rdf/digital-competence-framework-skos-ap-eu.rdf")
g_enriched.parse(ontology_file, format='xml')
graph_file = join(folder, "rdf/knowledge-graph.ttl")
g_enriched.parse(graph_file, format='ttl')

<Graph identifier=N163989bc95fc4bb4b19c53132c786c71 (<class 'rdflib.graph.Graph'>)>

In [8]:
q_res = g_enriched.query('SELECT (COUNT(*) AS ?num_triples) WHERE { ?s ?p ?o }')

# Print the query results
for row in q_res:
    print(f"Number of triples in the enriched graph: {row.num_triples}")

# Save to file
output_file = join(folder, "rdf/knowledge-graph-enriched.ttl")
g_enriched.serialize(destination=output_file, format='ttl', encoding="utf-8")

Number of triples in the enriched graph: 9797


<Graph identifier=N163989bc95fc4bb4b19c53132c786c71 (<class 'rdflib.graph.Graph'>)>

In [9]:
sparql_query = """
PREFIX cefriel: <https://knowledge.c-innovationhub.com/competence-kg/schema#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>

SELECT ?c ?label
WHERE {
    ?employee foaf:name "Jane" .
    ?employee foaf:surname "Brown" .
    ?ec a cefriel:EmployeeCompetence ;
        cefriel:isAssignedToEmployee ?employee ;
        cefriel:hasAssignedCompetence ?c .
    OPTIONAL {?c skos:prefLabel ?label .} 
}
"""

In [10]:
results = g_rdflib.query(sparql_query)
# Print the query results
print(f"Competence")
for row in results:
    print(f"{row.c}\t{row.label}")

Competence
http://data.europa.eu/snb/dcf/0690r7o8r2	None
http://data.europa.eu/snb/dcf/3bgx2a6278	None
http://data.europa.eu/snb/dcf/8ys96p08p4	None
http://data.europa.eu/snb/dcf/1b043jw49b	None
http://data.europa.eu/snb/dcf/99vvo91542	None
http://data.europa.eu/snb/dcf/mg54my1580	None
http://data.europa.eu/snb/dcf/13z458t54u	None
http://data.europa.eu/snb/dcf/4s23kdq383	None
http://data.europa.eu/snb/dcf/2mi55kl065	None
http://data.europa.eu/snb/dcf/o9e8s72z21	None


In [11]:
results = g_enriched.query(sparql_query)
# Print the query results
print(f"Competence")
for row in results:
    print(f"{row.c}\t{row.label}")

Competence
http://data.europa.eu/snb/dcf/0690r7o8r2	3.4 Programming
http://data.europa.eu/snb/dcf/13z458t54u	5.1 Solving technical problems
http://data.europa.eu/snb/dcf/1b043jw49b	4.2 Protecting personal data and privacy
http://data.europa.eu/snb/dcf/2mi55kl065	3.3 Copyright and licences
http://data.europa.eu/snb/dcf/3bgx2a6278	5.3 Creatively using digital technologies
http://data.europa.eu/snb/dcf/4s23kdq383	1.3 Managing data, information and digital content
http://data.europa.eu/snb/dcf/8ys96p08p4	2.1 Interacting through digital technologies
http://data.europa.eu/snb/dcf/99vvo91542	2.5 Netiquette
http://data.europa.eu/snb/dcf/mg54my1580	4.3 Protecting health and well-being
http://data.europa.eu/snb/dcf/o9e8s72z21	4.4 Protecting the environment


In [15]:
sparql_query = """
PREFIX ckg: <https://knowledge.c-innovationhub.com/competence-kg/schema#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>

SELECT ?competence
WHERE {
  ?employee foaf:name "Jane" .
  ?employee foaf:surname "Brown" .
  ?employeeCompetence ckg:isAssignedToEmployee ?employee .
  ?employeeCompetence ckg:hasAssignedCompetence ?competence .
}"""

In [16]:
results = g_enriched.query(sparql_query)
# Print the query results
print(f"Competence")
for row in results:
    print(f"{row.competence}")

Competence
http://data.europa.eu/snb/dcf/0690r7o8r2
http://data.europa.eu/snb/dcf/13z458t54u
http://data.europa.eu/snb/dcf/1b043jw49b
http://data.europa.eu/snb/dcf/2mi55kl065
http://data.europa.eu/snb/dcf/3bgx2a6278
http://data.europa.eu/snb/dcf/4s23kdq383
http://data.europa.eu/snb/dcf/8ys96p08p4
http://data.europa.eu/snb/dcf/99vvo91542
http://data.europa.eu/snb/dcf/mg54my1580
http://data.europa.eu/snb/dcf/o9e8s72z21
