In [None]:
from rdflib import Graph
from gremlin_python.driver import client


rdf_graph = Graph()
rdf_graph.parse("JanusGraph/Testdataset.rdf", format="turtle")

gremlin_client = client.Client('ws://localhost:8182/gremlin', 'g')

for subj, pred, obj in rdf_graph:
    print(f"Subj: {subj}, Pred: {pred}, Obj: {obj}")
    # Create or get vertex for subj
    # If obj is literal: add as property
    # If obj is URI: create/get vertex for obj, add edge pred from subj to obj
    # Use gremlin_client.submit() to send Gremlin queries


In [5]:
from rdflib import Graph, URIRef, Literal
from rdflib.namespace import RDF
from gremlin_python.driver import client
from gremlin_python.driver import serializer

import logging
import nest_asyncio
nest_asyncio.apply() #Needed to fix 'Running Event loop Issue' in Jupyter notebooks - This is not needed when executed as python script

logging.basicConfig(
    level=logging.DEBUG, 
    format='%(asctime)s - %(levelname)s - %(message)s',
    filename='RDF_DataIngestion.log',
    filemode='w'
)
logger = logging.getLogger(__name__)

def sanitize_key(key: str) -> str:
    return key.replace(' ', '_').split('/')[-1]

def process_rdf(rdf_path: str, gremlin_client: client.Client):
    rdf_graph = Graph()
    rdf_graph.parse(rdf_path, format="turtle")
    
    stats = {
        'vertices_processed': 0,
        'edges_created': 0,
        'properties_set': 0,
        'errors': 0
    }

    for subj, pred, obj in rdf_graph:
        try:
            subj_uri = str(subj)
            pred_name = sanitize_key(str(pred))
            obj_value = obj

            # Vertex creation/check
            vertex_result = gremlin_client.submit(
                f"g.V().has('uri', '{subj_uri}').fold()"
                f".coalesce(__.unfold(), __.addV('Entity').property('uri', '{subj_uri}'))"
            ).all().result()

            
            if 'created' in str(vertex_result):
                stats['vertices_processed'] += 1
                logger.debug(f"Created new vertex: {subj_uri}")
            else:
                logger.debug(f"Existing vertex found: {subj_uri}")

            if pred == RDF.type:

                # Handle type
                gremlin_client.submit(
                    f"g.V().has('uri', '{subj_uri}').property('rdf_type', '{str(obj)}')"
                ).all().result()
                logger.debug(f"Set rdf_type for {subj_uri}: {str(obj)}")
                stats['properties_set'] += 1

            elif isinstance(obj, URIRef):

                # Edge creation
                obj_uri = str(obj)

                # Checking if the Target Vertex already exists and adds it, in case it does not exist
                gremlin_client.submit(
                    f"g.V().has('uri', '{obj_uri}').fold()"
                    f".coalesce(__.unfold(), __.addV('Entity').property('uri', '{obj_uri}'))"
                ).all().result()

            
                edge_result = gremlin_client.submit(
                    f"""
                    g.V().has('uri', '{subj_uri}')
                    .addE('{pred_name}')
                    .to(__.V().has('uri', '{obj_uri}'))
                    """
                ).all().result()

                stats['edges_created'] += 1
                logger.info(f"Created edge: {subj_uri} -[{pred_name}]-> {obj_uri}")

            else:
                # Property handling
                if isinstance(obj, Literal):
                    value = obj.toPython()
                    if isinstance(value, str):
                        value = f"'{value.replace("'", "''")}'"
                    
                    prop_result = gremlin_client.submit(
                        f"g.V().has('uri', '{subj_uri}')"
                        f".property('{pred_name}', {value})"
                    ).all().result()
                    
                    stats['properties_set'] += 1
                    logger.debug(f"Set property {pred_name}={value} on {subj_uri}")

        except Exception as e:
            stats['errors'] += 1
            logger.error(f"Failed to process {subj} {pred} {obj}: {str(e)}", exc_info=True)

    # Summary logging
    logger.info("\nImport Statistics:")
    logger.info(f"Total vertices processed: {stats['vertices_processed']}")
    logger.info(f"Total edges created: {stats['edges_created']}")
    logger.info(f"Total properties set: {stats['properties_set']}")
    logger.info(f"Total errors encountered: {stats['errors']}")

# Initialize and run
gremlin_client = client.Client(
    url='ws://localhost:8182/gremlin',
    traversal_source='g',
    message_serializer=serializer.GraphSONSerializersV3d0())
process_rdf("Augemented_Testdataset.ttl", gremlin_client)
gremlin_client.close()
