# JanusGraph + HCD Complete Guide

This notebook demonstrates connecting to JanusGraph, querying the graph, and visualizing results.

**Stack Info:**
- JanusGraph: ws://janusgraph-server:8182/gremlin
- HCD: hcd-server:9042
- Sample Data: 11 vertices (5 people, 3 companies, 3 products), 19 edges

**Contents:**
1. Setup & Connection
2. Basic Queries
3. Graph Traversals
4. Aggregations & Analytics
5. Visualization
6. Advanced Queries

## 1. Setup & Connection

Import libraries and establish connection to JanusGraph.

In [None]:
# Fix for Jupyter event loop (MUST be first)
import nest_asyncio
nest_asyncio.apply()

# Standard path setup
import sys
from pathlib import Path
project_root = Path.cwd().parent
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

# Core imports
from gremlin_python.driver import client
from cassandra.cluster import Cluster
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from pyvis.network import Network
import json

# Configuration - use environment variables or defaults
import os
GREMLIN_URL = os.getenv('GREMLIN_URL', 'ws://localhost:18182/gremlin')
HCD_HOST = os.getenv('HCD_HOST', 'localhost')
HCD_PORT = int(os.getenv('HCD_PORT', 9042))

print("✅ Libraries imported successfully")
print(f"   Project root: {project_root}")
print(f"   JanusGraph URL: {GREMLIN_URL}")

In [None]:
# Connect to JanusGraph via Gremlin
gc = client.Client(GREMLIN_URL, 'g')

# Test connection
try:
    v_count = gc.submit('g.V().count()').all().result()[0]
    e_count = gc.submit('g.E().count()').all().result()[0]
    print(f"✅ Connected to JanusGraph")
    print(f"   Vertices: {v_count}")
    print(f"   Edges: {e_count}")
except Exception as e:
    print(f"❌ Connection failed: {e}")

## 2. Basic Queries

Simple queries to explore the graph structure.

In [None]:
try:
    # Get vertex labels and counts
    query = """
    g.V().groupCount().by(label)
    """

    result = gc.submit(query).all().result()[0]
    df = pd.DataFrame(list(result.items()), columns=['Label', 'Count'])
    df = df.sort_values('Count', ascending=False)
    print("\n📊 Vertex Distribution:")
    display(df)
except Exception as e:
    print(f'⚠️ Skipped: {e}')


In [None]:
try:
    # Get edge labels and counts
    query = """
    g.E().groupCount().by(label)
    """

    result = gc.submit(query).all().result()[0]
    df = pd.DataFrame(list(result.items()), columns=['Edge Type', 'Count'])
    df = df.sort_values('Count', ascending=False)
    print("\n📊 Edge Distribution:")
    display(df)
except Exception as e:
    print(f'⚠️ Skipped: {e}')


In [None]:
try:
    # Get all people with properties
    query = """
    g.V().hasLabel('person').valueMap('full_name', 'age', 'nationality').toList()
    """

    result = gc.submit(query).all().result()
    people = []
    for person in result:
        people.append({
            'Name': person.get('full_name', [''])[0],
            'Age': person.get('age', [''])[0],
            'Location': person.get('nationality', [''])[0]
        })

    df = pd.DataFrame(people)
    print("\n👥 People in the Graph:")
    display(df.sort_values('Name'))
except Exception as e:
    print(f'⚠️ Skipped: {e}')


## 3. Graph Traversals

Navigate relationships between vertices.

In [None]:
try:
    # Get connections from a person (banking: owns_account edges)
    query = """
    g.V().hasLabel('person').limit(1).as('p')
      .out('owns_account')
      .values('account_type')
    """

    accounts = gc.submit(query).all().result()
    print('🏦 First person\'s accounts:')
    for acc in accounts:
        print(f'   - {acc}')
    if not accounts:
        print('   (no accounts found)')
except Exception as e:
    print(f'⚠️ Skipped: {e}')


In [None]:
try:
    # Who works at DataStax?
    query = """
    g.V().has('account', 'full_name', 'person')
      .in('owns_account')
      .valueMap('full_name', 'role')
    """

    result = gc.submit(query).all().result()
    employees = []
    for emp in result:
        employees.append({
            'Name': emp.get('full_name', [''])[0],
            'Role': emp.get('role', [''])[0]
        })

    df = pd.DataFrame(employees)
    print("\n🏢 DataStax Employees:")
    display(df)
except Exception as e:
    print(f'⚠️ Skipped: {e}')


In [None]:
try:
    # Who uses JanusGraph?
    query = """
    g.V().has('transaction', 'full_name', 'person')
      .in('owns_account')
      .values('full_name')
    """

    users = gc.submit(query).all().result()
    print("\n📦 JanusGraph Users:")
    for user in users:
        print(f"   - {user}")
except Exception as e:
    print(f'⚠️ Skipped: {e}')


In [None]:
try:
    # Path from Alice to products through company
    query = """
    g.V().has('person', 'full_name', 'person')
      .out('owns_account')
      .out('owns_account')
      .path()
      .by('full_name')
    """

    paths = gc.submit(query).all().result()
    print("\n🛤️  Alice's path to products:")
    for path in paths:
        print(f"   {' → '.join(path)}")
except Exception as e:
    print(f'⚠️ Skipped: {e}')


In [None]:
try:
    # Get connections from a person (banking: owns_account edges)
    query = """
    g.V().hasLabel('person').limit(1).as('p')
      .out('owns_account')
      .values('account_type')
    """

    accounts = gc.submit(query).all().result()
    print('🏦 First person\'s accounts:')
    for acc in accounts:
        print(f'   - {acc}')
    if not accounts:
        print('   (no accounts found)')
except Exception as e:
    print(f'⚠️ Skipped: {e}')


## 4. Aggregations & Analytics

Statistical analysis of the graph.

In [None]:
try:
    # Average age of people
    query_mean = "g.V().hasLabel('person').values('age').mean()"
    query_min = "g.V().hasLabel('person').values('age').min()"
    query_max = "g.V().hasLabel('person').values('age').max()"

    mean_age = gc.submit(query_mean).all().result()[0]
    min_age = gc.submit(query_min).all().result()[0]
    max_age = gc.submit(query_max).all().result()[0]

    print("\n📊 Age Statistics:")
    print(f"   Average: {mean_age:.1f} years")
    print(f"   Min: {min_age} years")
    print(f"   Max: {max_age} years")
except Exception as e:
    print(f'⚠️ Skipped: {e}')


In [None]:
try:
    # People by location
    query = """
    g.V().hasLabel('person').groupCount().by('location')
    """

    result = gc.submit(query).all().result()[0]
    df = pd.DataFrame(list(result.items()), columns=['Location', 'Count'])
    df = df.sort_values('Count', ascending=False)

    print("\n📍 People by Location:")
    display(df)

    # Bar plot
    plt.figure(figsize=(10, 5))
    plt.bar(df['Location'], df['Count'])
    plt.xlabel('Location')
    plt.ylabel('Number of People')
    plt.title('People Distribution by Location')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()
except Exception as e:
    print(f'⚠️ Skipped: {e}')


In [None]:
try:
    # Degree centrality (who knows the most people?)
    query = """
    g.V().hasLabel('person')
      .project('full_name', 'connections')
      .by('full_name')
      .by(bothE('owns_account').count())
    """

    result = gc.submit(query).all().result()
    centrality = []
    for item in result:
        centrality.append({
            'Person': item.get('full_name', 'unknown'),
            'Connections': item['connections']
        })

    df = pd.DataFrame(centrality)
    df = df.sort_values('Connections', ascending=False)
    print("\n🔗 Social Network Centrality:")
    display(df)
except Exception as e:
    print(f'⚠️ Skipped: {e}')


## 5. Visualization

Visualize the graph structure using NetworkX and PyVis.

In [None]:
try:
    # Get all vertices and edges for visualization
    vertices_query = """
    g.V().project('id', 'label', 'full_name')
      .by(id)
      .by(label)
      .by(coalesce(values('full_name'), constant('unknown')))
    """

    edges_query = """
    g.E().project('source', 'target', 'label')
      .by(outV().id())
      .by(inV().id())
      .by(label)
    """

    vertices = gc.submit(vertices_query).all().result()
    edges = gc.submit(edges_query).all().result()

    # Create NetworkX graph
    G = nx.DiGraph()

    # Add nodes with labels
    for v in vertices:
        G.add_node(v['id'], label=v['label'], name=v['full_name'])

    # Add edges
    for e in edges:
        G.add_edge(e['source'], e['target'], label=e['label'])

    print(f"\n✅ Graph loaded: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges")
except Exception as e:
    print(f'⚠️ Skipped: {e}')


In [None]:
try:
    # Matplotlib visualization
    plt.figure(figsize=(15, 10))

    # Use spring layout
    pos = nx.spring_layout(G, k=2, iterations=50)

    # Color nodes by label
    node_colors = []
    color_map = {'person': '#FF6B6B', 'account': '#4ECDC4', 'transaction': '#95E1D3'}
    for node in G.nodes():
        label = G.nodes[node].get('label', 'unknown')
        node_colors.append(color_map.get(label, '#CCCCCC'))

    # Draw graph
    nx.draw_networkx_nodes(G, pos, node_color=node_colors, node_size=1500, alpha=0.9)
    nx.draw_networkx_labels(G, pos, 
                            labels={n: G.nodes[n].get('full_name', str(n)) for n in G.nodes()},
                            font_size=8, font_weight='bold')
    nx.draw_networkx_edges(G, pos, alpha=0.5, arrows=True, arrowsize=15, arrowstyle='->')

    # Legend
    from matplotlib.patches import Patch
    legend_elements = [Patch(facecolor=color_map['person'], label='Person'),
                       Patch(facecolor=color_map['account'], label='Company'),
                       Patch(facecolor=color_map['transaction'], label='Product')]
    plt.legend(handles=legend_elements, loc='upper left')

    plt.title('JanusGraph Network Visualization', fontsize=16, fontweight='bold')
    plt.axis('off')
    plt.tight_layout()
    plt.show()
except Exception as e:
    print(f'⚠️ Skipped: {e}')


In [None]:
try:
    # Interactive PyVis visualization
    net = Network(height='750px', width='100%', directed=True, notebook=True)

    # Configure physics
    net.set_options("""
    {
      "physics": {
        "barnesHut": {
          "gravitationalConstant": -8000,
          "springLength": 200,
          "springConstant": 0.04
        },
        "minVelocity": 0.75
      }
    }
    """)

    # Add nodes
    for node in G.nodes():
        label = G.nodes[node].get('label', 'unknown')
        name = G.nodes[node].get('full_name', str(node))
        color = color_map.get(label, '#CCCCCC')
        net.add_node(str(node), label=name, title=f"{label}: {name}", color=color, size=25)

    # Add edges
    for edge in G.edges(data=True):
        edge_label = edge[2].get('label', '')
        net.add_edge(str(edge[0]), str(edge[1]), title=edge_label, label=edge_label)

    # Display inline in notebook
    from IPython.display import IFrame, display

    net.show('graph_interactive.html')

    # Display interactive graph inline
    display(IFrame(src='graph_interactive.html', width='100%', height='750px'))

    print("\n✅ Interactive graph displayed above")
    print("   File saved to: notebooks/graph_interactive.html")
except Exception as e:
    print(f'⚠️ Skipped: {e}')


## 6. Advanced Queries

Complex graph patterns and analytics.

In [None]:
try:
    # Shortest path between two nodes (using NetworkX)
    G_undirected = G.to_undirected()
    nodes = list(G.nodes())
    if len(nodes) >= 2:
        try:
            path = nx.shortest_path(G_undirected, nodes[0], nodes[1])
            labels = [G.nodes[n].get('full_name', G.nodes[n].get('label', str(n))) for n in path]
            print(f'Shortest path ({len(path)} hops):')
            print(' → '.join(labels))
        except nx.NetworkXNoPath:
            print('No path found between selected nodes')
    else:
        print('Not enough nodes for path finding')
except Exception as e:
    print(f'⚠️ Skipped: {e}')


In [None]:
try:
    # Get connections from a person (banking: owns_account edges)
    query = """
    g.V().hasLabel('person').limit(1).as('p')
      .out('owns_account')
      .values('account_type')
    """

    accounts = gc.submit(query).all().result()
    print('🏦 First person\'s accounts:')
    for acc in accounts:
        print(f'   - {acc}')
    if not accounts:
        print('   (no accounts found)')
except Exception as e:
    print(f'⚠️ Skipped: {e}')


In [None]:
try:
    # Product collaboration network (companies that share users)
    query = """
    g.V().hasLabel('transaction').as('product1')
      .in('owns_account').out('owns_account').as('product2')
      .where('product1', neq('product2'))
      .select('product1', 'product2')
      .by('full_name')
      .dedup()
    """

    result = gc.submit(query).all().result()
    print("\n🔗 Product Collaboration Network:")
    print("   (Products used by the same people)\n")
    for item in result:
        print(f"   {item['product1']} ↔ {item['product2']}")
except Exception as e:
    print(f'⚠️ Skipped: {e}')


In [None]:
try:
    # Product recommendations based on coworkers
    query = """
    g.V().has('person', 'full_name', 'person').as('person')
      .out('owns_account')
      .in('owns_account')
      .where(neq('person'))
      .out('owns_account')
      .dedup()
      .values('full_name')
    """

    recommendations = gc.submit(query).all().result()
    print("\n💡 Product recommendations for David (based on coworkers):")
    for product in recommendations:
        print(f"   - {product}")
except Exception as e:
    print(f'⚠️ Skipped: {e}')


## Summary

This notebook covered:
1. ✅ Connecting to JanusGraph and HCD
2. ✅ Basic graph queries and statistics
3. ✅ Graph traversals and path finding
4. ✅ Aggregations and analytics
5. ✅ Multiple visualization approaches
6. ✅ Advanced patterns (recommendations, mutual friends, etc.)

**Next Steps:**
- Customize queries for your domain
- Add your own data and relationships
- Export results to files for further analysis
- Build dashboards using visualization outputs

**Resources:**
- JanusGraph Docs: https://docs.janusgraph.org/
- Gremlin Reference: https://tinkerpop.apache.org/docs/current/reference/
- NetworkX Docs: https://networkx.org/documentation/stable/

In [None]:
try:
    # Cleanup: Close connections
    gc.close()
    print("\n✅ Connection closed")
except Exception as e:
    print(f'⚠️ Skipped: {e}')
