Converting paths into rdf_Craft readable format for Yarrml and rdml conversion

JSON structure

In [66]:
from pydantic import BaseModel
class Entity (BaseModel):
    id: int
    rdf_type: str # object_uri
    uri_pattern: str # iri{id}

class Predicate(BaseModel):
    from_id: int
    to_id: int
    uri: str

class Schema (BaseModel):
    nodes: list[Entity]
    edges: list[Predicate]

In [67]:
import os
import json
import sys
from typing import List, Tuple, Optional


response_folder = f"C:/Users/elias/Documents/ANI/Bachelor_Baby/llm_assistant/Data/raw_data/exp_two_hop"
response_folder = f"C:/Users/elias/Documents/ANI/Bachelor_Baby/llm_assistant/Data/curated_dataset/exp_two_hop"
analysis_dir = (
    "C:/Users/elias/Documents/ANI/Bachelor_Baby/llm_assistant/Data/raw_data/analysis"
)
analysis_dir = (
    "C:/Users/elias/Documents/ANI/Bachelor_Baby/llm_assistant/Data/curated_dataset/analysis"
)
hop_count_exp = 2

In [68]:

# === SETUP ===
# Get the current notebook directory
notebook_dir = os.getcwd()

# Append the parent directory to sys.path
parent_dir = os.path.abspath(os.path.join(notebook_dir, ".."))
if parent_dir not in sys.path:
    sys.path.append(parent_dir)
from Path_Finding_Logic.main import PathFinder


def get_main_entity_type(analysis_file: str) -> str:
    with open(analysis_file, "r", encoding="utf-8") as f:
        first_line = f.readline().strip()
        if first_line.startswith("Main Entity Type:"):
            return first_line.replace("Main Entity Type:", "").strip()
    return ""


In [69]:
# === MAIN LOOP ===
for file in os.listdir(response_folder):
    if not file.endswith("_response.txt"):
        continue

    base_name = file.replace("_response.txt", "")
    json_path = os.path.join(response_folder, file)

    # Load analysis file and get main entity
    analysis_file = os.path.join(analysis_dir, f"{base_name}_analysis.txt")
    main_entity = get_main_entity_type(analysis_file)
    if not main_entity:
        print(f"[!] Could not determine main entity from {analysis_file}, skipping.")
        continue

In [70]:
# Get paths from PathFinder
pathfinder = PathFinder(ttl_file="aidava-sphn.ttl")
paths = pathfinder.find_paths(hop_count=hop_count_exp, target_class=main_entity)

In [71]:
# Load mapping JSON
with open(json_path, "r", encoding="utf-8") as f:
    data = json.load(f)
column_mappings = data.get("column_mappings", [])
rows = []

In [72]:
path_output = []
# rdf_to_column_name = dict()
for mapping in column_mappings:
    path_id = mapping["path"]["path_id"] - 1  # Convert from 1-indexed
    column_name = mapping["column_name"]

    if path_id >= len(paths):
        print(
            f"[!] Path ID {path_id+1} out of range for {base_name} with length {len(paths)}"
        )
        continue

    print(paths[path_id])
    # rdf_to_column_name[paths[path_id][-1][1]] = column_name

    path_output.append(paths[path_id])



[('https://biomedit.ch/rdf/sphn-ontology/AIDAVA/Patient', None), ('https://biomedit.ch/rdf/sphn-ontology/AIDAVA/PatientIdentifier', 'https://biomedit.ch/rdf/sphn-ontology/AIDAVA/hasPatientIdentifier')]
[('https://biomedit.ch/rdf/sphn-ontology/AIDAVA/Patient', None), ('https://biomedit.ch/rdf/sphn-ontology/AIDAVA/PatientIdentifier', 'https://biomedit.ch/rdf/sphn-ontology/AIDAVA/hasPatientIdentifier')]
[('https://biomedit.ch/rdf/sphn-ontology/AIDAVA/Patient', None), ('https://biomedit.ch/rdf/sphn-ontology/sphn#BirthDate', 'https://biomedit.ch/rdf/sphn-ontology/AIDAVA/hasBirthDate')]
[('https://biomedit.ch/rdf/sphn-ontology/AIDAVA/Patient', None), ('https://biomedit.ch/rdf/sphn-ontology/sphn#AdministrativeGender', 'https://biomedit.ch/rdf/sphn-ontology/AIDAVA/hasAdministrativeGender')]
[('https://biomedit.ch/rdf/sphn-ontology/AIDAVA/Patient', None), ('http://www.w3.org/2001/XMLSchema#string', 'https://biomedit.ch/rdf/sphn-ontology/AIDAVA/hasAdress')]
[('https://biomedit.ch/rdf/sphn-ontolo

In [73]:
import re

def get_last_fragment(uri: str) -> str:
    return re.split(r'[\/#]', uri)[-1]

In [None]:
def build_schema_from_paths(column_mappings, path_output: list[list[tuple[str, Optional[str]]]]) -> Schema:
    uri_to_id = {}
    nodes = []
    edges = []
    next_id = 1

    for column, path in enumerate(path_output):
        uri_pattern = ""
        
        column_name = column_mappings[column]["column_name"]
        for i, (current_node_uri, predicate_uri) in enumerate(path):
            
            if i + 2 == len(path):
                column_name = column_name
            
            # Create uri pattern
            frag = get_last_fragment(current_node_uri)
            if uri_pattern !=  "": 
                uri_pattern += "/" + frag
            else:
                uri_pattern += frag
                
            # Register current node if not already added
            if current_node_uri not in uri_to_id:
                uri_to_id[current_node_uri] = next_id
                nodes.append(Entity(id=next_id, uri_pattern=f"aidava-resource:{uri_pattern}/$({column_name})",rdf_type=f"{current_node_uri}"))
                next_id += 1

            # If this tuple contains a predicate, create an edge from the previous node to this node
            if predicate_uri is not None and i > 0:
                previous_node_uri, _ = path[i - 1]

                # Ensure previous node is also registered
                if previous_node_uri not in uri_to_id:
                    uri_to_id[previous_node_uri] = next_id
                    nodes.append(Entity(id=next_id, uri=f"{previous_node_uri}{{{next_id}}}"))
                    next_id += 1

                edges.append(Predicate(
                    from_id=uri_to_id[previous_node_uri],
                    to_id=uri_to_id[current_node_uri],
                    uri=predicate_uri
                ))

    return Schema(nodes=nodes, edges=edges)

In [93]:
schema = build_schema_from_paths(column_mappings, path_output)
print(json.dumps(schema.model_dump(), indent=2))


Length:  2
Hello!!
https://biomedit.ch/rdf/sphn-ontology/AIDAVA/Patient None
Length:  2
Length:  2
Hello!!
https://biomedit.ch/rdf/sphn-ontology/AIDAVA/Patient None
Length:  2
Length:  2
Hello!!
https://biomedit.ch/rdf/sphn-ontology/AIDAVA/Patient None
Length:  2
Length:  2
Hello!!
https://biomedit.ch/rdf/sphn-ontology/AIDAVA/Patient None
Length:  2
Length:  2
Hello!!
https://biomedit.ch/rdf/sphn-ontology/AIDAVA/Patient None
Length:  2
Length:  2
Hello!!
https://biomedit.ch/rdf/sphn-ontology/AIDAVA/Patient None
Length:  2
Length:  2
Hello!!
https://biomedit.ch/rdf/sphn-ontology/AIDAVA/Patient None
Length:  2
Length:  2
Hello!!
https://biomedit.ch/rdf/sphn-ontology/AIDAVA/Patient None
Length:  2
Length:  2
Hello!!
https://biomedit.ch/rdf/sphn-ontology/AIDAVA/Patient None
Length:  2
Length:  2
Hello!!
https://biomedit.ch/rdf/sphn-ontology/AIDAVA/Patient None
Length:  2
Length:  3
Length:  3
Hello!!
https://biomedit.ch/rdf/sphn-ontology/AIDAVA/SubjectName https://biomedit.ch/rdf/sphn-onto