Goal of this notebook: 
- Implement a system to represent the properties of an element with adjacent vertices storing the properties
    - Each "property vertex" stores timestamp of property, property #, and whether the corresponding element was active at the time
- Show how, given a time, a vertex can be queried to determine whether it was active during that time

In [7]:
# Jupyter notebook needs this or else it will crash
import nest_asyncio
nest_asyncio.apply()

from gremlin_python import statics
from gremlin_python.structure.graph import Graph
from gremlin_python.process.graph_traversal import __
from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection

# Instantiate a Gremlin Graph
graph = Graph()

# Connect to the server, instantiate traversal of graph.
g = graph.traversal().withRemote(DriverRemoteConnection('ws://localhost:8182/gremlin','g'))

# Get the vertices of the graph as a list, and print them.

print(g.V().toList())

[v[12400], v[49352], v[45256], v[24752], v[20560], v[49184], v[53448], v[36872], v[32776], v[24680], v[28680]]


Drop all vertices of the graph.

In [8]:
g.V().drop().iterate()

[['V'], ['drop'], ['none'], ['values', '_ipython_canary_method_should_not_exist_'], ['values', '_ipython_canary_method_should_not_exist_']]

In [9]:
g.addV().property('name', 'ANT000').next()

print(g.V().valueMap().toList())

traversal = g.V().has('name', 'ANT000')

print(traversal.outE().count().next())

[{'name': ['ANT000']}]
0


In [10]:
def add_property(traversal: any, time: float, active: bool) -> None: 
    """
    Add a property vertex to the vertex contained within traversal.next(), and connect it by an edge labelled 'had_property' from traversal.next() to the property vertex.

    <traversal> should be a traversal such that traversal.next() is a Vertex.

    Traversals  
    """

    # Vertex we are looking at.
    v = traversal.clone().next()

    # Number of properties that v has so far.

    props_so_far = traversal.clone().outE('had_property').count().next()

    p = g.addV() # Instantiate a new traversal to add a vertex to the graph. NOTE THAT THIS DOES NOT ACTUALLY ADD THE VERTEX TO THE GRAPH.
    p.property('prop_ind', props_so_far) # Property index
    p.property('time', time) # Set the timestamp of the property
    p.property('active', active) # Set the active boolean of the property
    g.V(v).addE('had_property').to(p.next()).next() # p.next() terminates the traversal and adds the vertex, and addE(...).to(...) adds an edge to the new vertex from the element vertex



In [11]:
add_property(traversal=g.V().has('name', 'ANT000'), time=2, active=True)
add_property(traversal=g.V().has('name', 'ANT000'), time=4, active=False)
add_property(traversal=g.V().has('name', 'ANT000'), time=6, active=False)
add_property(traversal=g.V().has('name', 'ANT000'), time=8, active=True)
add_property(traversal=g.V().has('name', 'ANT000'), time=10, active=True)
add_property(traversal=g.V().has('name', 'ANT000'), time=12, active=True)

In [12]:
# .order().by('time) sorts the vertices by their 'time' property.
print(g.V().has('name', 'ANT000').out('had_property').order().by('time').valueMap().toList())

print(g.V().has('name', 'ANT000').out('had_property').order().by('time').values('time').toList())

[{'prop_ind': [0], 'active': [True], 'time': [2]}, {'prop_ind': [1], 'active': [False], 'time': [4]}, {'prop_ind': [2], 'active': [False], 'time': [6]}, {'prop_ind': [3], 'active': [True], 'time': [8]}, {'prop_ind': [4], 'active': [True], 'time': [10]}, {'prop_ind': [5], 'active': [True], 'time': [12]}]
[2, 4, 6, 8, 10, 12]


In [13]:
def get_next_smallest_index(val, lst) -> int:
    """
    Given a sorted list lst in increasing order and val where val is of the same type as all elements in lst, 
    do a binary search and return the lower bound on the index (if the exact value is not found).
    """

    l, r = 0, len(lst) - 1

    while l <= r:
        mid = l + (r - l) // 2 
        if val > lst[mid]:
            l = mid + 1
        elif val < lst[mid]:
            r = mid - 1
        else:
            return mid
    return l - 1

lst = [1, 3, 5, 7]
for i in range(10):
    print(i, lst[get_next_smallest_index(i, lst)])

0 7
1 1
2 1
3 3
4 3
5 5
6 5
7 7
8 7
9 7


In [14]:
# Here's how checking whether a vertex was active or not at a time COULD work.

def check_active_at_time_1(name: str, time: float) -> bool:
    """
    Check whether or not the vertex with name <name> was active at time <time>.
    """

    lst = g.V().has('name', name).out('had_property').order().by('time').values('time').toList()

    index = get_next_smallest_index(time, lst)

    if index == -1:
        return False
    else:
        return g.V().has('name', name).out('had_property').has('prop_ind', index).values('active').next()

def check_active_at_time_2(name: str, time: float) -> bool:
    """
    Check whether or not the vertex with name <name> was active at time <time>.

    Only query the graph ONCE at the start, then do everything else in Python.
    """

    # Get a list of the value maps of all adjacent properties connected to vertex by edge 'had_property', sorted.
    adj = g.V().has('name', name).out('had_property').order().by('time').valueMap().toList()

    # Extract the times
    lst = [adj[i]['time'][0] for i in range(0, len(adj))]

    index = get_next_smallest_index(time, lst)

    if index == -1:
        return False
    else:
        return adj[index]['active'][0]

def dummy() -> bool:
    """
    Imitate what the other functions do, but don't query the graph.
    """
    adj = [
        {'time': [2], 'active': [True]}, {'time': [3], 'active': [True]}, 
        {'time': [4], 'active': [False]}, {'time': [5], 'active': [True]}, 
        {'time': [6], 'active': [False]}, {'time': [7], 'active': [True]},
        {'time': [8], 'active': [False]}, {'time': [9], 'active': [True]},]

    # Extract the times
    lst = [adj[i]['time'][0] for i in range(0, len(adj))]

    index = get_next_smallest_index(6, lst)

    if index == -1:
        return False
    else:
        return adj[index]['active'][0]
    


In [20]:
from timeit import Timer

name = 'ANT000'

t = Timer('check_active_at_time_1("ANT000", 2)','from __main__ import check_active_at_time_1')
print("Querying graph several times per call:", t.timeit(number=100))

t = Timer('check_active_at_time_2("ANT000", 2)','from __main__ import check_active_at_time_2')
print("Querying graph once per call:", t.timeit(number=100))

t = Timer('dummy()','from __main__ import dummy')
print("Performing the same operations but not querying graph:", t.timeit(number=100))



Querying graph several times per call: 3.1038650999998936
Querying graph once per call: 1.5480646999999408
Performing the same operations but not querying graph: 0.0005719000000681262


In [21]:
# Timing querying the graph
from timeit import Timer


t = Timer("g.V().has('name', 'ANT000').count().next()",'from __main__ import g')
print("Querying the graph:", t.timeit(number=100))

Querying the graph: 1.5762877000001936


## Conclusion: querying the graph from Python TAKES A LONG TIME!!!

Another note: ideally, you wouldn't query the graph for every vertex, but rather get a 'path of vertices' from JanusGraph along with their value map, then do it in Python.