In [1]:
from rdflib import Graph, URIRef, Literal
from rdflib.namespace import RDF
from neo4j import GraphDatabase

# Neo4j credentials
NEO4J_URI = "bolt://localhost:7687"  # Adjust if needed
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "thelastofus"



In [2]:
from rdflib import Graph, URIRef, Literal
from rdflib.namespace import RDF

# Load the RDF file
def load_rdf(file_path):
    g = Graph()
    g.parse(file_path, format='xml')  # Adjust format if necessary (e.g., 'turtle', 'n3')
    return g

# Inspect all unique URIs (Subjects, Predicates, Objects) in the RDF graph
def inspect_uris(g):
    subjects = set()
    predicates = set()
    objects = set()

    # Iterate through all triples
    for s, p, o in g:
        subjects.add(str(s))   # Add subject URIs
        predicates.add(str(p)) # Add predicate URIs
        objects.add(str(o))    # Add object URIs or literals

    # Print all unique URIs found
    print("Subjects (Entities/Individuals):")
    for s in subjects:
        print(s)
    
    print("\nPredicates (Properties/Relationships):")
    for p in predicates:
        print(p)
    
    print("\nObjects (Values/Other Entities):")
    for o in objects:
        print(o)

# Inspect the classes (types) defined in the RDF graph
def inspect_classes(g):
    classes = set()

    # Look for rdf:type relationships
    for s, p, o in g:
        if p == RDF.type and isinstance(o, URIRef):
            classes.add(str(o))

    print("Classes (Types):")
    for cls in classes:
        print(cls)

# Inspect individuals and their associated types (rdf:type)
def inspect_individuals(g):
    individuals = set()

    for s, p, o in g:
        if p == RDF.type and isinstance(o, URIRef):
            individuals.add((str(s), str(o)))  # Store (individual, class) pairs

    print("Individuals and Their Types:")
    for ind, cls in individuals:
        print(f"Individual: {ind}, Class: {cls}")

# Inspect relationships between individuals (predicates)
def inspect_relationships(g):
    relationships = set()

    for s, p, o in g:
        if isinstance(p, URIRef) and isinstance(o, URIRef):
            relationships.add((str(s), str(p), str(o)))  # Store (subject, predicate, object)

    print("Relationships:")
    for subj, pred, obj in relationships:
        print(f"Subject: {subj}, Predicate: {pred}, Object: {obj}")

# Main function to inspect the RDF graph and display the results
def inspect_ontology(g):
    print("Inspecting URIs in the RDF Graph...\n")
    inspect_uris(g)
    
    print("\nInspecting Classes...\n")
    inspect_classes(g)
    
    print("\nInspecting Individuals and Their Types...\n")
    inspect_individuals(g)
    
    print("\nInspecting Relationships...\n")
    inspect_relationships(g)

# Main execution workflow
def main(rdf_file):
    g = load_rdf(rdf_file)
    inspect_ontology(g)

# Provide the path to your RDF file
rdf_file = "onto.rdf"  # Change this to your RDF file's path
main(rdf_file)


Inspecting URIs in the RDF Graph...

Subjects (Entities/Individuals):
http://www.semanticweb.org/geopoliFirst#Euromaidan_Protests
http://www.semanticweb.org/geopoliFirst#Length/Extent
http://www.semanticweb.org/geopoliFirst#FACES_PROTECTION_FROM
http://www.semanticweb.org/geopoliFirst#UN_Security_Council
http://www.semanticweb.org/geopoliFirst#Vladimir_Putin
http://www.semanticweb.org/geopoliFirst#Pollution_Levels
http://www.semanticweb.org/geopoliFirst#President
http://www.semanticweb.org/geopoliFirst#Viktor_Yanukovych
http://www.semanticweb.org/geopoliFirst#capital
http://www.semanticweb.org/geopoliFirst#Timber
http://www.semanticweb.org/geopoliFirst#Environmental_Factors
http://www.semanticweb.org/geopoliFirst#FACES_GEOPOLITICAL_CHALLENGE
http://www.semanticweb.org/geopoliFirst#Technology
http://www.semanticweb.org/geopoliFirst#Ukrainian_Military
http://www.semanticweb.org/geopoliFirst#sugar_beets
http://www.semanticweb.org/geopoliFirst#SEEKING_MEMBERSHIP
http://www.semanticweb.org/

In [3]:
import rdflib
from neo4j import GraphDatabase
import re

# Load the RDF graph
ontology_path = "onto.rdf"  # Path to the ontology file
graph = rdflib.Graph()
graph.parse(ontology_path, format="xml")  # Adjust format if necessary

# Connect to Neo4j
uri = "bolt://localhost:7687"  # Neo4j URI
driver = GraphDatabase.driver(uri, auth=("neo4j", "thelastofus"))
session = driver.session()

# Helper function to clean URIs and extract local names
def clean_uri(uri):
    return re.sub(r"[^\w]", "_", uri.split("#")[-1] if "#" in uri else uri.split("/")[-1])

# Helper function to extract the primary class label for a node
def get_primary_class(node):
    valid_classes = []
    for _, _, rdf_type_obj in graph.triples((node, rdflib.RDF.type, None)):
        class_label = clean_uri(rdf_type_obj)
        # Exclude schema-level terms, including NamedIndividual
        if class_label not in {"ObjectProperty", "DataProperty", "DatatypeProperty", "NamedIndividual", "Class", "Axiom", "Ontology"}:
            valid_classes.append(class_label)
    # Prioritize domain-specific classes
    return valid_classes[0] if valid_classes else None

# Iterate through RDF triples
for subj, pred, obj in graph:
    # Skip triples where the subject or predicate does not start with "http"
    if not (str(subj).startswith("http") and str(pred).startswith("http")):
        continue

    # Skip schema-level definitions of DatatypeProperty
    if any(
        rdf_type_obj == rdflib.URIRef("http://www.w3.org/2002/07/owl#DatatypeProperty")
        for _, _, rdf_type_obj in graph.triples((subj, rdflib.RDF.type, None))
    ):
        continue

    # Handle class definitions (rdf:type relationships)
    if str(pred) == "http://www.w3.org/1999/02/22-rdf-syntax-ns#type":
        class_label = clean_uri(obj)
        if class_label not in {"ObjectProperty", "DataProperty", "DatatypeProperty", "NamedIndividual", "Class"}:
            individual_name = clean_uri(subj)
            session.run(
                """
                MERGE (n:{class_label} {{name: $name, uri: $uri}})
                """.format(class_label=class_label),
                name=individual_name,
                uri=str(subj),
            )

    # Handle relationships between nodes
    elif isinstance(obj, rdflib.URIRef):
        subj_name = clean_uri(subj)
        obj_name = clean_uri(obj)
        pred_name = clean_uri(pred)

        subj_class = get_primary_class(subj) or "Entity"
        obj_class = get_primary_class(obj) or "Entity"

        session.run(
            """
            MERGE (a:{subj_class} {{name: $subj_name, uri: $subj_uri}})
            MERGE (b:{obj_class} {{name: $obj_name, uri: $obj_uri}})
            MERGE (a)-[:{rel} {{uri: $rel_uri}}]->(b)
            """.format(subj_class=subj_class, obj_class=obj_class, rel=pred_name),
            subj_name=subj_name,
            subj_uri=str(subj),
            obj_name=obj_name,
            obj_uri=str(obj),
            rel_uri=str(pred),
        )

    # Handle literal values as properties
    elif isinstance(obj, rdflib.Literal):
        property_name = clean_uri(pred)
        individual_name = clean_uri(subj)
        individual_class = get_primary_class(subj) or "Entity"

        session.run(
            """
            MERGE (n:{class_label} {{name: $name, uri: $uri}})
            SET n.{property_name} = $property_value
            """.format(class_label=individual_class, property_name=property_name),
            name=individual_name,
            uri=str(subj),
            property_value=str(obj),
        )

# Close session and driver
session.close()
driver.close()
