In [1]:
import sys
sys.path.append('map_rml.py')
from map_rml import map_rml, postprocess
from rdflib import Graph, Namespace, URIRef, Literal, BNode
from rdflib.namespace import RDF, RDFS, OWL, DCTERMS, NamespaceManager
import pandas as pd

In [2]:
from IPython.display import display, HTML, Markdown
def htm(df: pd.DataFrame):
    demo_df = df.T if len(df)<6 else df
    display(HTML(demo_df.to_html()))

def md(df: pd.DataFrame):
    demo_df = df.T if len(df)<6 else df
    display(Markdown(demo_df.to_markdown()))

In [3]:
rml, rmlmapper, ttl = map_rml('auth')

Initiated mapping params:
('gbad\\schema\\authority\\general_authority_to_ric-o_model_2024-08-29_pz.rml',
 '.\\rmlmapper-7.0.0-r374-all.jar',
 'gbad\\mapping\\target\\general_authority_to_ric-o_model_2024-08-29_pz\\mapped.ttl')
Successfully mapped 'gbad\schema\authority\general_authority_to_ric-o_model_2024-08-29_pz.rml' to 'gbad\mapping\target\general_authority_to_ric-o_model_2024-08-29_pz\mapped.ttl'



In [4]:
g = postprocess(ttl)

Successfully read a graph from 'gbad\mapping\target\general_authority_to_ric-o_model_2024-08-29_pz\mapped.ttl' 
Number of triples in the graph: 356


In [5]:
# a sparql query that finds all triples containing a rico:Agent instance either as subject or object, and for all these rico:Agents, select all triples that have the rico:Agent as subject and "rico:hasOrHadCorporateBodyType" as predicate
query = f"""
# Find all triples with rico:Agent as subject or object and then find all triples
# where rico:Agent is the subject and has predicate rico:hasOrHadCorporateBodyType
SELECT DISTINCT ?agent ?subjectTriple ?predicate ?object ?corporateBodyType
WHERE {{
    # Finding all triples where rico:Agent is the subject or object
    {{
    ?agent rdf:type rico:Agent .
    ?agent ?predicate ?object .
    BIND(STR(?agent) AS ?subjectTriple)
    }} UNION {{
    ?object rdf:type rico:Agent .
    ?subject ?predicate ?object .
    BIND(STR(?object) AS ?subjectTriple)
    }}
    
    # For all these rico:Agents, find all triples where the rico:Agent is the subject
    # and the predicate is rico:hasOrHadCorporateBodyType
    OPTIONAL {{
        ?agent rico:hasOrHadCorporateBodyType ?corporateBodyType .
    }}
}}
"""
# Execute the query
result = g.query(query)

# List to hold the parsed results
parsed_results = []

# Process the results and create new triples
for row in result:
    parsed_results.append({
        'agent': row.agent,
        'corporateBodyType': row.corporateBodyType
    })
df = pd.DataFrame(parsed_results)

In [6]:
md(df.sample(10))

|    | agent                                                                                                          | corporateBodyType                                                                                  |
|---:|:---------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------|
|  9 | https://data.archives.gov.on.ca/RiC-O_1-0-1/CHIN%20Radio%2FTV%20International/Agent/1                          | https://data.archives.gov.on.ca/Schema/Authority/CorporateBodyType/Corporate%20name                |
| 64 | https://data.archives.gov.on.ca/RiC-O_1-0-1/Canada%20Packers/Agent/1                                           | https://data.archives.gov.on.ca/Schema/Authority/CorporateBodyType/Corporate%20name                |
| 46 | https://data.archives.gov.on.ca/RiC-O_1-0-1/Ontario.%20Minister%20Responsible%20for%20Native%20Affairs/Agent/1 | https://data.archives.gov.on.ca/Schema/Authority/CorporateBodyType/A%20Ontario%20Government%20Name |
| 74 | https://data.archives.gov.on.ca/RiC-O_1-0-1/Canada%20Packers/Agent/1                                           | https://data.archives.gov.on.ca/Schema/Authority/CorporateBodyType/Corporate%20name                |
| 32 | https://data.archives.gov.on.ca/RiC-O_1-0-1/Multilingual%20Television%20%28Toronto%29%20Limited/Agent/1        | https://data.archives.gov.on.ca/Schema/Authority/CorporateBodyType/Corporate%20name                |
|  7 | https://data.archives.gov.on.ca/RiC-O_1-0-1/CHIN%20Radio%2FTV%20International/Agent/1                          | https://data.archives.gov.on.ca/Schema/Authority/CorporateBodyType/Corporate%20name                |
| 58 | https://data.archives.gov.on.ca/RiC-O_1-0-1/CHIN%20Radio%2FTV%20International/Agent/1                          | https://data.archives.gov.on.ca/Schema/Authority/CorporateBodyType/Corporate%20name                |
| 26 | https://data.archives.gov.on.ca/RiC-O_1-0-1/Canadian%20Pacific%20Railway%20Company/Agent/1                     | https://data.archives.gov.on.ca/Schema/Authority/CorporateBodyType/Corporate%20name                |
|  0 | https://data.archives.gov.on.ca/RiC-O_1-0-1/CHIN%20Radio%2FTV%20International/Agent/1                          | https://data.archives.gov.on.ca/Schema/Authority/CorporateBodyType/Corporate%20name                |
| 49 | https://data.archives.gov.on.ca/RiC-O_1-0-1/Ontario.%20Minister%20Responsible%20for%20Native%20Affairs/Agent/1 | https://data.archives.gov.on.ca/Schema/Authority/CorporateBodyType/A%20Ontario%20Government%20Name |