In [54]:
from humemai.utils import disable_logger
disable_logger()

from gremlin_python.structure.graph import Graph, Vertex, Edge
from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection
from gremlin_python.driver.serializer import GraphSONSerializersV3d0
from gremlin_python.process.graph_traversal import __
from gremlin_python.process.traversal import P, T, Direction

import json
from humemai.janusgraph import Humemai
from tqdm.auto import tqdm
from datetime import datetime

humemai = Humemai()
humemai.start_containers()
humemai.connect()
humemai.remove_all_data()

# humemai.disconnect()
# humemai.stop_containers()
# humemai.remove_containers()

from humemai.utils import disable_logger
disable_logger()
g = humemai.g


In [55]:
datetime.now().isoformat(timespec='seconds')

'2024-12-06T11:48:29'

## add graphs

In [127]:
# Clear existing data in the graph
g.V().drop().iterate()
g.E().drop().iterate()
print("Graph data cleared.")

# Create Episode 1: a1(ep1) -> b1(ep1)
a1_ep1 = g.addV("a1").property("event_time", "2024-12-06T11:48:29").next()
b1_ep1 = g.addV("b1").property("event_time", "2024-12-06T11:48:29").next()

# Create edge1 from a1_ep1 to b1_ep1
g.V(a1_ep1.id).addE("edge1").property("event_time", "2024-12-06T11:48:29").to(
    b1_ep1
).iterate()

# Create Episode 2: b1(ep2) -> c1(ep2)
b1_ep2 = g.addV("b1").property("event_time", "2024-12-07T11:48:30").next()
c1_ep2 = g.addV("c1").property("event_time", "2024-12-07T11:48:30").next()

# Create edge2 from b1_ep2 to c1_ep2
g.V(b1_ep2.id).addE("edge2").property("event_time", "2024-12-07T11:48:30").to(
    c1_ep2
).iterate()

# Create Episode 3: d1(ep3) -> c1(ep3) -> b1(ep3)
d1_ep3 = g.addV("d1").property("event_time", "2024-12-08T11:48:31").next()
c1_ep3 = g.addV("c1").property("event_time", "2024-12-08T11:48:31").next()
b1_ep3 = g.addV("b1").property("event_time", "2024-12-08T11:48:31").next()

# Create edge3 from c1_ep3 to b1_ep3
g.V(c1_ep3.id).addE("edge3").property("event_time", "2024-12-08T11:48:31").to(
    b1_ep3
).iterate()
# Create edge4 from d1_ep3 to c1_ep3
g.V(d1_ep3.id).addE("edge4").property("event_time", "2024-12-08T11:48:31").to(
    c1_ep3
).iterate()

repeated_labels = [
    key for key, val in g.V().label().groupCount().toList()[0].items() if val > 1
]

Graph data cleared.


In [129]:
for label in repeated_labels:
    # Create a reference node for the label
    reference_node = g.addV(f"reference_{label}").property("type", "reference").next()
    
    # Connect each vertex with the duplicate label to the reference node
    for vertex in g.V().hasLabel(label).toList():
        g.V(vertex.id).addE("connected_to_reference").to(reference_node).iterate()

In [122]:
[key for key, val in g.V().label().groupCount().toList()[0].items() if val > 1 ]

['c1', 'b1']

In [104]:
label_counts = g.V().label().groupCount().toList()
print("Label counts:", label_counts)

label_counts_unfolded = g.V().label().groupCount().unfold().toList()
print("Unfolded label counts:", label_counts_unfolded)

repeated_labels = [
    key for key, value in g.V().label().groupCount().unfold().toList() if value > 1
]
print("Labels that occur more than once:", repeated_labels)


Label counts: [{'a1': 1, 'd1': 1, 'c1': 2, 'b1': 3}]
Unfolded label counts: [{'a1': 1}, {'d1': 1}, {'c1': 2}, {'b1': 3}]


ValueError: not enough values to unpack (expected 2, got 1)

In [97]:
from gremlin_python.process.traversal import P

# Find labels that occur more than once
common_labels = (
    g.V()
    .label()                # Get all labels of vertices
    .groupCount()           # Count occurrences of each label
    .unfold()               # Convert map to key-value pairs
    .where(__.select("value").is_(P.gt(1)))  # Keep key-value pairs where the count > 1
    .select("key")          # Retrieve only the keys (labels)
    .toList()               # Convert to a list
)

# Print the common labels
print("Labels that occur more than once:", common_labels)


Labels that occur more than once: []


In [84]:
g.V().label().groupCount().toList()

[{'a1': 1, 'd1': 1, 'c1': 2, 'b1': 3}]

In [83]:
g.V().label().groupCount().unfold().toList()

[{'a1': 1}, {'d1': 1}, {'c1': 2}, {'b1': 3}]

In [86]:
g.V().label().groupCount().unfold().filter(__.select(values).is_(P.gt(0))).toList()

NameError: name 'values' is not defined

In [77]:
g.V().label().unfold().toList()

['a1', 'b1', 'b1', 'b1', 'd1', 'c1', 'c1']

In [74]:
# Get all vertex labels
labels = g.V().label().toList()

# Count the occurrences of each label
label_counts = Counter(labels)


NameError: name 'Counter' is not defined

In [52]:
{prop.key : prop.value for prop in a1_ep1.properties}

{'episode_id': 1, 'foo': 1}

In [None]:
humemai.remove_all_data()


# -----------------------------------------------------------
# CREATE THE EPISODES
# Episode 1: a1(ep1) -> b1(ep1)
g.addV("a1").property("episode_id", 1).as_("a1_ep1") \
 .addV("b1").property("episode_id", 1).as_("b1_ep1") \
 .addE("edge1").from_("a1_ep1").to("b1_ep1").iterate()

# Episode 2: b1(ep2) -> c1(ep2)
g.addV("b1").property("episode_id", 2).as_("b1_ep2") \
 .addV("c1").property("episode_id", 2).as_("c1_ep2") \
 .addE("edge2").from_("b1_ep2").to("c1_ep2").iterate()

# Episode 3: b1(ep3) -> d1(ep3)
g.addV("b1").property("episode_id", 3).as_("b1_ep3") \
 .addV("d1").property("episode_id", 3).as_("d1_ep3") \
 .addE("edge3").from_("b1_ep3").to("d1_ep3").iterate()

# Create the reference_b1 node
g.addV("reference_b1").as_("mb").iterate()

# LINK EACH b1 TO reference_b1
g.V().hasLabel("b1").has("episode_id", 1).as_("b1_ep1") \
 .V().hasLabel("reference_b1").as_("mb") \
 .addE("virtual_link").from_("b1_ep1").to("mb").iterate()

g.V().hasLabel("b1").has("episode_id", 2).as_("b1_ep2") \
 .V().hasLabel("reference_b1").as_("mb") \
 .addE("virtual_link").from_("b1_ep2").to("mb").iterate()

g.V().hasLabel("b1").has("episode_id", 3).as_("b1_ep3") \
 .V().hasLabel("reference_b1").as_("mb") \
 .addE("virtual_link").from_("b1_ep3").to("mb").iterate()

# -----------------------------------------------------------
# DEBUG: PRINT ALL VERTICES AND EDGES
print("All Vertices:")
for v in g.V().toList():
    # valueMap(True) includes id, label, and properties
    props = g.V(v.id).valueMap(True).next()
    print("Vertex:", props)

print("\nAll Edges:")
for e in g.E().toList():
    props_list = g.E(e.id).valueMap(True).toList()
    if props_list:
        print("Edge:", props_list)
    else:
        print("Edge has no properties:", e.id, e.label)

# -----------------------------------------------------------
# TEST BASIC TRAVERSALS
print("\nStep Checks:")
# 1. From a1(ep1) to b1(ep1)
step1 = g.V().hasLabel("a1").has("episode_id",1).out("edge1").label().toList()
print("From a1(ep1) to b1(ep1):", step1)

# 2. From b1(ep1) to reference_b1
step2 = g.V().hasLabel("b1").has("episode_id",1).out("virtual_link").label().toList()
print("From b1(ep1) to reference_b1:", step2)

# 3. From meta_b1 to all b1 nodes (via in virtual_link)
step3 = g.V().hasLabel("meta_b1").in_("virtual_link").valueMap(True).toList()
print("From meta_b1 in virtual_link:", step3)

# 4. From b1(ep2) to c1(ep2)
step4 = g.V().hasLabel("b1").has("episode_id",2).out("edge2").label().toList()
print("From b1(ep2) to c1(ep2):", step4)

# -----------------------------------------------------------
# FULL PATH:
# a1(ep1) -> b1(ep1) -> meta_b1 -> b1(ep2) -> c1(ep2)
# Use valueMap(True) in the path step so we can access properties easily

print("\nFull combined path from a1(ep1) to c1(ep2) via meta_b1:")
full_paths = g.V().hasLabel("a1").has("episode_id",1) \
    .out("edge1")             \
    .out("virtual_link")      \
    .in_("virtual_link")      \
    .has("episode_id",2)      \
    .out("edge2")             \
    .path().by(__.valueMap(True)).toList()

if not full_paths:
    print("No paths found. Check your setup.")
else:

    for p in full_paths:
        for element in p.objects:
            print(element)  # Print the entire map to inspect keys


    for p in full_paths:
        path_info = []
        # p is a Path; p.objects is a list of dicts (from valueMap(True))
        for element in p.objects:
            # element is now a dict with keys like '~id', '~label', 'episode_id'
            label = element["~label"]
            # episode_id is stored as a list in valueMap, e.g. {'episode_id': [1]}
            ep_id = element.get("episode_id", [None])[0]
            path_info.append((label, ep_id))
        print(path_info)

TypeError: 'NoneType' object is not an iterator

In [28]:
g.V().hasLabel("b1").has("episode_id", 2).as_("b1_ep2").V().hasLabel("meta_b1")

[['V'], ['hasLabel', 'b1'], ['has', 'episode_id', 2], ['as', 'b1_ep2'], ['V'], ['hasLabel', 'meta_b1'], ['values', '_ipython_canary_method_should_not_exist_'], ['values', '_ipython_canary_method_should_not_exist_']]