Goal of this notebook:
 - Determine whether subgraphing can be done in Gremlin-Python
 - If so, implement subgraphing based on time and perform path queries on it. Benchmark those queries.

In [31]:
# Jupyter notebook needs this or else it will crash
from datetime import datetime
import nest_asyncio
nest_asyncio.apply()

from gremlin_python import statics
from gremlin_python.structure.graph import Graph
from gremlin_python.process.graph_traversal import __
from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection
from gremlin_python.driver import client
from gremlin_python.process.traversal import P # NEW!!! Import predicates (gt, gte, lt, lte, etc.)
from gremlin_python.process.traversal import Cardinality # NEW!!! Import Cardinality such as list_, set_ and single.
from gremlin_python.driver.protocol import GremlinServerError # Gremlin server error
from gremlin_python.process.traversal import Pop # for Pop.all_ in select(Pop.all_, 'v')
from gremlin_python.process.strategies import SubgraphStrategy

# Instantiate a Gremlin Graph
graph = Graph()

# Instantiate Gremlin client
gremlin_client = client.Client('ws://localhost:8182/gremlin', 'g')

# Connect to the server, instantiate traversal of graph.
g = graph.traversal().withRemote(DriverRemoteConnection('ws://localhost:8182/gremlin','g'))

In [32]:
def set_connection(name1: str, name2: str, time: float, connection: bool) -> None:
    """
    Given two vertices labelled with <name1> and <name2>, create a new connection or terminate their existing connection, based on the value of <bool>. Label with time <time>.

    TODO: Add sphinx documentation if this will be implemented into the actual Python library.
    """

    success = False

    while not success:

        try:

            if connection:
                # Add an edge labelled 'connection' with a start time of <time>
                g.V().has('name', name1).as_("a").not_( # NEGATE 
                    __.bothE('connection').as_('e').bothV().has('name', name2).select('e').and_(
                        __.has('start', P.lte(time)),
                        __.or_(
                            __.hasNot('end'),
                            __.has('end', P.gt(time))
                        )
                    )
                ).V().has('name', name2).as_("b").addE('connection').from_("a").to("b").property('start', time).iterate()

            else:
                # For all edges between v1 and v2 labelled 'connection' (there should only be one) that do not have an 'end' property, create an end property of <time>.
                g.V().has('name', name1).bothE('connection').as_('e').bothV().has('name', name2).select('e').hasNot('end').property('end', time).iterate()
        
            success = True

        except GremlinServerError as e:
            print(f"{datetime.now().strftime('%H:%M:%S')} ERROR: Failed to set {connection} connection between {name1} and {name2} at {time}", e)

In [33]:
def add_component(name: str) -> None:
    """
    Add a vertex to the graph with a 'name' property of <name>.

    TODO: Add sphinx documentation if this will be implemented into the actual Python library.
    """

    success = False

    while not success:
        try:
            g.addV().property('name', name).iterate()
            success = True
        except GremlinServerError as e:
            print(f"{datetime.now().strftime('%H:%M:%S')} ERROR: Failed to add component of name {name}", e)


In [34]:
def add_type(type: str) -> None:
    """
    Add a vertex to the graph with a 'type' property of <type>.

    TODO: Add sphinx documentation if this will be implemented into the actual Python library.
    """

    success = False

    while not success:
        try:
            g.addV().property('name', type).iterate()
            success = True
        except GremlinServerError as e:
            print(f"{datetime.now().strftime('%H:%M:%S')} ERROR: Failed to add type vertex of name {type}", e)


def set_type(name: str, type: str) -> None:
    """
    Connect the vertex labelled with <name> to a vertex labelled <type> with an edge going into the <type> vertex labelled with "type".

    TODO: Add sphinx documentation if this will be implemented into the actual Python library.
    """

    success = False

    while not success:
        try:
            g.V().has('name', name).as_("a").V().has('name', type).as_("b").addE("type").from_("a").to("b").iterate()
            success = True
        except GremlinServerError as e:
            print(f"{datetime.now().strftime('%H:%M:%S')} ERROR: Failed to set type of {name} to {type}", e)

In [35]:
# This will put it in V-E-V-E-V-...-V form as a list per path.
def find_paths(name1: str, name2: str, avoid_type: str, time: float):
    """
    Given two vertices labelled with <name1> and <name2>, return the paths that connect the vertices by edges that were active at <time> as a list.

    Avoid vertices of type avoid_type.

    TODO: Add sphinx documentation if this will be implemented into the actual Python library.
    """
    while True:
        try:
            return g.V().has('name', name1).repeat(
            __.bothE('connection').and_(
                __.has('start', P.lte(time)),   # want start time to be less than or equal to <time>
                __.or_(
                    __.hasNot('end'),           # end time doesn't have to exist 
                    __.has('end', P.gt(time))  # OR end time must be greater than <time>
                )
            ).otherV().not_(__.outE('type').inV().has('name', avoid_type)).simplePath()
        ).until(__.has('name', name2)).path().toList()
        except GremlinServerError as e:
            print(f"{datetime.now().strftime('%H:%M:%S')} ERROR: Could not find paths between {name1} and {name2} at time {time} avoiding {avoid_type}.", e)
    

In [37]:
# Clear vertices
# Doing this does NOT use indexing.
g.V().drop().iterate()
g.E().drop().iterate()

dishes = 1024

# Correlator node name 
cor = 'COR000000'

"""
(Temporary) Naming scheme:
 - COR######: Correlator input
 - ANT######: Antenna
 - DPF######: Dual-Polarization Feed
 - BLN######: (Active) Balun
 - RFT######: RFoF transmitter
 - OPF######: Optical Fiber
 - RFR######: RFoF receiver
 - ADC######: Analog-to-Digital converter
"""

# Set up the types
types = ['COR', 'ANT', 'DPF', 'BLN', 'RFT', 'OPF', 'RFR', 'ADC']
for type in types:
    add_type(type)


# Add a correlator input node
add_component(cor)
set_type(cor, 'COR')

# Add the components and connect them at different times.
for i in range(1, dishes + 1):



    # The names of the components to refer to
    ant = f'ANT{str(i).zfill(6)}'
    dpf = f'DPF{str(i).zfill(6)}'
    bln = (f'BLN{str(2 * i - 1).zfill(6)}', f'BLN{str(2 * i).zfill(6)}')
    rft = (f'RFT{str(2 * i - 1).zfill(6)}', f'RFT{str(2 * i).zfill(6)}')
    opf = (f'OPF{str(2 * i - 1).zfill(6)}', f'OPF{str(2 * i).zfill(6)}')
    rfr = (f'RFR{str(2 * i - 1).zfill(6)}', f'RFR{str(2 * i).zfill(6)}')
    adc = (f'ADC{str(2 * i - 1).zfill(6)}', f'ADC{str(2 * i).zfill(6)}')

    now = datetime.now()

    add_component(ant)
    add_component(dpf)

    set_type(ant, 'ANT')
    set_type(dpf, 'DPF')

    for ind in (0, 1):
        add_component(bln[ind])
        add_component(rft[ind])
        add_component(opf[ind])
        add_component(rfr[ind])
        add_component(adc[ind])

        set_type(bln[ind], 'BLN')
        set_type(rft[ind], 'RFT')
        set_type(opf[ind], 'OPF')
        set_type(rfr[ind], 'RFR')
        set_type(adc[ind], 'ADC')

    connections = [(i, True), (i + 1, False)]

    for (time, connection) in connections:

        set_connection(name1=ant, name2=dpf, time=time, connection=connection)

        for ind in (0, 1):

            # Pairs of names to connect
            pairs = [(ant, dpf), (dpf, bln[ind]), (bln[ind], rft[ind]), (rft[ind], opf[ind]), (opf[ind], rfr[ind]), (rfr[ind], adc[ind]), (adc[ind], cor)]

            for pair in pairs:
                set_connection(name1=pair[0], name2=pair[1], time=time, connection=connection)

    print(f"Dish {i} done, took {(datetime.now() - now).total_seconds()}")




2326
Dish 333 done, took 0.197544
Dish 334 done, took 0.209106
Dish 335 done, took 0.209833
Dish 336 done, took 0.199674
Dish 337 done, took 0.207945
Dish 338 done, took 0.204405
Dish 339 done, took 0.206658
Dish 340 done, took 0.204888
Dish 341 done, took 0.205745
Dish 342 done, took 0.213439
Dish 343 done, took 0.201398
Dish 344 done, took 0.207851
Dish 345 done, took 0.210437
Dish 346 done, took 0.204921
Dish 347 done, took 0.21375
Dish 348 done, took 0.209429
Dish 349 done, took 0.213024
Dish 350 done, took 0.205804
Dish 351 done, took 0.204289
Dish 352 done, took 0.203109
Dish 353 done, took 0.198783
Dish 354 done, took 0.200974
Dish 355 done, took 0.199326
Dish 356 done, took 0.192619
Dish 357 done, took 0.207372
Dish 358 done, took 0.206926
Dish 359 done, took 0.200345
Dish 360 done, took 0.20613
Dish 361 done, took 0.219707
Dish 362 done, took 0.205066
Dish 363 done, took 0.19977
Dish 364 done, took 0.206428
Dish 365 done, took 0.212504
Dish 366 done, took 0.214837
Dish 367 don

In [39]:
# Test.

time = 2

sg = g.withStrategies(SubgraphStrategy(edges=__.and_(
    __.has('start', P.lte(time)),   # want start time to be less than or equal to <time>
    __.or_(
        __.hasNot('end'),           # end time doesn't have to exist 
        __.has('end', P.gt(time))  # OR end time must be greater than <time>
    )
)))

print(sg.V().count().next())
print(sg.E().count().next())

12297
13


In [47]:
total = 0

for iter in range(0, 10):
    
    now = datetime.now()
    
    sg.E().inV().valueMap().toList()

    delta = datetime.now() - now

    total += delta.total_seconds()

print(total, total / 10)

3.8695019999999998 0.38695019999999997
