# Neptune to NetworkX to JSON for D3 Vizualization via Gremlin

In [218]:
#!pip install nest-asynacio

In [219]:
#!pip install igraph

In [221]:
import networkx as nx
import pandas as pd
import igraph as ig
import json
from networkx.readwrite import json_graph

from __future__  import print_function  # Python 2/3 compatibility

from gremlin_python import statics
from gremlin_python.structure.graph import Graph
from gremlin_python.process.graph_traversal import __
from gremlin_python.process.strategies import *
from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection

In [82]:
# Necessary to avoid Cannot run the event loop while another loop is running error
import nest_asyncio
nest_asyncio.apply()

### Create connection to DB. 

A graph query is often referred to as a traversal as that is what we are in fact doing. We are traversing the graph from a starting point to an ending point. Traversals consist of one or more steps (essentially methods) that are chained together.

In [223]:
graph = Graph()

remoteConn = DriverRemoteConnection('ws://mgt-prd-infr-neptune-alb-122161610.us-east-1.elb.amazonaws.com:8182/gremlin','g')
g = graph.traversal().withRemote(remoteConn)

Sample to show connection worked: Return 10 vertices. 

In [224]:
print(g.V().limit(10).toList())

[v[1], v[2], v[3], v[4], v[5], v[6], v[7], v[8], v[9], v[10]]


### Sample Gremlin Queries

Count Nodes

In [225]:
g.V().count().next()

8393

In [226]:
print(g.V().hasLabel("@minboxradio").toList())

[v[1]]


In [227]:
g.V().hasLabel("@minboxradio").next()

v[1]

Count Edges

In [228]:
g.E().count().next()

28390

What to Pull out for NetworkX conversion

In [183]:
sg = g.V().bothE().otherV().path().by(__.valueMap(True)).toList()

In [184]:
list(sg[0][0].values())

['@albertiv925', '6875']

In [185]:
list(sg[0][1].values())

['1', '76']

In [186]:
list(sg[0][2].values())

['@00ye4afhilznwjwljmjk2jjn8zmfug65t2sdmuswkye=', '153']

### Convert into networkx graph

In [229]:
G = nx.DiGraph()
sg = g.V().bothE().otherV().path().by(__.valueMap(True)).toList()
for i in range(0,len(sg)):
    # Add Nodes
    G.add_node(list(sg[i][0].values())[1],label=list(sg[i][0].values())[0])
    G.add_node(list(sg[i][2].values())[1],label=list(sg[i][2].values())[0])
    # Add Edges
    G.add_edge(list(sg[i][0].values())[1],list(sg[i][2].values())[1],elabel=list(sg[i][1].values())[1],weight=list(sg[i][1].values())[0])

Extract to show edge and node lists

In [230]:
G_edgelist = nx.to_pandas_edgelist(G)
G_edgelist = G_edgelist[G_edgelist['elabel'] != "retweeted"]
G_edgelist.head()

Unnamed: 0,source,target,elabel,weight
1,153,6816,78,1
2,153,7687,79,1
3,153,5978,80,1
4,153,7951,81,1
5,153,5651,82,2


In [231]:
G_nodelist = pd.DataFrame([i[1] for i in G.nodes(data=True)], index=[i[0] for i in G.nodes(data=True)]).rename_axis('node').reset_index()
G_nodelist

Unnamed: 0,node,label
0,6875,@albertiv925
1,153,@00ye4afhilznwjwljmjk2jjn8zmfug65t2sdmuswkye=
2,6368,@hkokbore
3,351,@0vve2dxahxv24dpvjujm1aiofuinnwrnmdcugj6c=
4,6775,@petertatchell
...,...,...
8388,8268,@hk_watch
8389,5030,@diarymsg:
8390,6817,@caoyi170610
8391,5804,@huangzhiking:


### Export to JSON for data viz

In [232]:
with open('networkdata1.json', 'w') as outfile1:
    outfile1.write(json.dumps(json_graph.node_link_data(G)))

### Close DB Connection

In [233]:
remoteConn.close()