# JanusGraph Data Exploration

Interactive notebook for querying and visualizing JanusGraph data.

**Environment**: Conda-forge Python 3.11
**Stack**: HCD + JanusGraph + NetworkX + Matplotlib

In [13]:
# Clear all data
gc.submit("g.V().drop().iterate()").all().result()
 c.submit("graph.tx().commit()").all().result()

IndentationError: unexpected indent (2176744238.py, line 2)

## Setup: Fix Event Loop for Jupyter

In [9]:
import nest_asyncio
nest_asyncio.apply()
from gremlin_python.driver import client
gc = client.Client('ws://janusgraph-server:8182/gremlin', 'g')

## Import Libraries

In [10]:
import os
from gremlin_python.driver import client, serializer
import networkx as nx
import matplotlib.pyplot as plt
import pandas as pd
from pyvis.network import Network

# Configuration
GREMLIN_URL = os.getenv('GREMLIN_URL', 'ws://janusgraph-server:8182/gremlin')
print(f'Connecting to: {GREMLIN_URL}')

Connecting to: ws://janusgraph-server:8182/gremlin


## Connect to JanusGraph

In [11]:
gc = client.Client(
    GREMLIN_URL,
    'g',
    message_serializer=serializer.GraphSONSerializersV3d0()
)

print('✅ Connected to JanusGraph')

✅ Connected to JanusGraph


## 1. Basic Statistics

In [12]:
# Get counts
vertex_count = gc.submit('g.V().count()').all().result()[0]
edge_count = gc.submit('g.E().count()').all().result()[0]

print(f'Vertices: {vertex_count}')
print(f'Edges: {edge_count}')

Vertices: 0
Edges: 0


In [None]:
# Get vertex labels and counts
labels = gc.submit('g.V().label().dedup()').all().result()
print('\nVertex Labels:', labels)
print('\nCounts by label:')

for label in labels:
    count = gc.submit(f"g.V().hasLabel('{label}').count()").all().result()[0]
    print(f'  {label}: {count}')

## 2. Fetch Graph Data

In [None]:
# Fetch all vertices
vertices_raw = gc.submit('g.V().elementMap()').all().result()
vertices = pd.DataFrame(vertices_raw)

print(f'Fetched {len(vertices)} vertices')
vertices.head()

In [None]:
# Fetch all edges
edges_raw = gc.submit('g.E().elementMap()').all().result()
edges = pd.DataFrame(edges_raw)

print(f'Fetched {len(edges)} edges')
edges.head()

## 3. NetworkX Graph Construction

In [None]:
# Build NetworkX graph
G = nx.DiGraph()

# Add vertices
for v in vertices_raw:
    node_id = v['id']
    label = v.get('label', 'unknown')
    name = v.get('name', f'Node {node_id}')
    G.add_node(node_id, label=label, name=name, **v)

# Add edges
for e in edges_raw:
    G.add_edge(e['inV'], e['outV'], label=e.get('label', ''), **e)

print(f'NetworkX graph: {G.number_of_nodes()} nodes, {G.number_of_edges()} edges')

## 4. Basic Visualization

In [None]:
# Simple matplotlib visualization
plt.figure(figsize=(14, 10))

# Color nodes by type
color_map = {'person': '#ff9999', 'company': '#9999ff', 'product': '#99ff99'}
node_colors = [color_map.get(G.nodes[node].get('label', ''), '#cccccc') for node in G.nodes()]

# Layout
pos = nx.spring_layout(G, k=0.5, iterations=50)

# Draw
nx.draw_networkx_nodes(G, pos, node_color=node_colors, node_size=800, alpha=0.9)
nx.draw_networkx_edges(G, pos, alpha=0.3, arrows=True, arrowsize=15)

# Labels (truncate long names)
labels = {node: G.nodes[node].get('name', '')[:15] for node in G.nodes()}
nx.draw_networkx_labels(G, pos, labels, font_size=9)

plt.title('JanusGraph Social Network', fontsize=16)
plt.axis('off')
plt.tight_layout()
plt.show()

## 5. Interactive Visualization (PyVis)

In [None]:
# Create PyVis network
net = Network(height='600px', width='100%', notebook=True, directed=True)

# Add nodes
for node in G.nodes():
    label = G.nodes[node].get('label', '')
    name = G.nodes[node].get('name', str(node))
    color = color_map.get(label, '#cccccc')
    net.add_node(node, label=name, title=f"{label}: {name}", color=color)

# Add edges
for edge in G.edges():
    edge_label = G.edges[edge].get('label', '')
    net.add_edge(edge[0], edge[1], title=edge_label)

# Physics
net.barnes_hut(gravity=-8000, central_gravity=0.3, spring_length=100)

# Save and display
output_path = '/workspace/exports/graph_interactive.html'
net.save_graph(output_path)
print(f'Interactive graph saved to: {output_path}')
print('View in browser: ./exports/graph_interactive.html')

# Display inline
net.show(output_path)

## 6. Graph Metrics

In [None]:
# Degree distribution
degrees = dict(G.degree())
degree_df = pd.DataFrame(list(degrees.items()), columns=['Node', 'Degree'])
degree_df['Name'] = degree_df['Node'].apply(lambda x: G.nodes[x].get('name', ''))
degree_df = degree_df.sort_values('Degree', ascending=False)

print('Top 10 Most Connected Nodes:')
degree_df.head(10)

In [None]:
# PageRank (centrality)
pagerank = nx.pagerank(G)
pr_df = pd.DataFrame(list(pagerank.items()), columns=['Node', 'PageRank'])
pr_df['Name'] = pr_df['Node'].apply(lambda x: G.nodes[x].get('name', ''))
pr_df = pr_df.sort_values('PageRank', ascending=False)

print('Top 10 by PageRank:')
pr_df.head(10)

## 7. Custom Queries

In [None]:
# Find shortest path between two people
query = """
g.V().has('person', 'name', 'Alice Johnson')
 .repeat(out('knows').simplePath())
 .until(has('person', 'name', 'Eve Davis'))
 .path().by('name').limit(1)
"""

path = gc.submit(query).all().result()
print('Shortest path (Alice → Eve):', path)

In [None]:
# Find who works at DataStax
query = """
g.V().has('company', 'name', 'DataStax')
 .in('worksFor')
 .valueMap('name', 'role')
"""

employees = gc.submit(query).all().result()
print('DataStax employees:')
for emp in employees:
    name = emp.get('name', ['Unknown'])[0]
    role = emp.get('role', ['Unknown'])[0]
    print(f'  {name}: {role}')

In [None]:
# People in San Francisco
query = """
g.V().hasLabel('person')
 .has('location', 'San Francisco')
 .values('name')
"""

sf_people = gc.submit(query).all().result()
print('People in San Francisco:', sf_people)

## 8. Export Graph Data

In [None]:
# Export to GraphML (for Gephi)
export_path = '/workspace/exports/graph.graphml'
nx.write_graphml(G, export_path)
print(f'Graph exported to: {export_path}')
print('Available on host: ./exports/graph.graphml')

## 9. Close Connection

In [None]:
gc.close()
print('✅ Connection closed')

## Summary

This notebook demonstrated:
- ✅ Connecting to JanusGraph
- ✅ Basic statistics and queries
- ✅ Fetching and analyzing graph data
- ✅ Static visualization (matplotlib)
- ✅ Interactive visualization (PyVis)
- ✅ Graph metrics (degree, PageRank)
- ✅ Custom traversal queries
- ✅ Exporting data

### Next Steps
- Try your own queries
- Add more visualizations
- Explore graph algorithms
- Export to Gephi for advanced analysis