# Graph data from Gremlin 
> An example of reading graph data with Gremlin.

- toc: true 
- badges: true
- comments: true
- categories: [CosmosdB, Gremlin, TinkerPop]

In [1]:
from getpass import getpass
import pandas as pd
from gremlin_python.driver import client, serializer
import gremlin_python.structure.io.graphsonV2d0 as graphsonV2d0
import requests
import time

import deetly

from settings import cosmosdb_conn

ModuleNotFoundError: No module named 'gremlin_python'

In [4]:
# Create datapackage
description = """
## Game of Thrones

Example using [Apache TinkerPop](https://tinkerpop.apache.org/) formated graph data
"""

package = {
    "name":"Game of Thrones houses and characters", 
    "description":description, 
    "author":"Paul Bencze", 
    "theme":"example",
    "license": "MIT",
    "keywords": ["example, graph, forcegraph, 3D"]
}

dp = deetly.package(package)

In [5]:
def executeGremlin(gremlinQuery, message=None, params=None):
    callback = cosmosdb_conn.submitAsync(gremlinQuery)
    results = []
    if callback.result() is not None:
        for result in callback.result():
            results.extend(result)
        return results
    else:
        print(f"Error processing query: {query}")

In [6]:
executeGremlin("g.V().count()")

[2590]

In [5]:
gremlin_nodes = executeGremlin("g.V()")
print(len(gremlin_nodes))
df_gremlin_nodes = pd.DataFrame.from_records(gremlin_nodes)
df_gremlin_nodes.head(1)

RuntimeError: Cannot run the event loop while another loop is running

In [6]:
len(gremlin_nodes)

2589

In [7]:
gremlin_edges = []
edges = executeGremlin("g.E().hasLabel('memberOf').range(0,1000)")
gremlin_edges.extend(edges)
edges = executeGremlin("g.E().hasLabel('memberOf').range(1000,1567)")
gremlin_edges.extend(edges)
print(len(gremlin_edges))
df_gremlin_edges = pd.DataFrame.from_records(gremlin_edges)
df_gremlin_edges.head(1)

1567


Unnamed: 0,id,label,type,inVLabel,outVLabel,inV,outV
0,f3eeb6a0-9b80-45e6-84a7-4e4ac71110cb,memberOf,edge,house,character,house_2,character_298


# Mappings

In [31]:
df_nodes = df_gremlin_nodes[((df_gremlin_nodes['label'].isin(['house','character'])) &\
                               (df_gremlin_nodes['id']!='house_0'))]

In [32]:
def getCytoNode(row):
    return {'data':{'id':row['id']}}

def getGraphNode(row):
    return {'Id':row['id'], 'Label':row['properties']['name'][0]['value']}

def getJSONNode(row):
    return {'id':row['id'], 'name':row['properties']['name'][0]['value'], 'group':'1'}
    
df_nodes['cyto'] = df_nodes.apply(getCytoNode, axis=1)
df_nodes['graph'] = df_nodes.apply(getGraphNode, axis=1)
df_nodes['json'] = df_nodes.apply(getJSONNode, axis=1)
print(df_nodes.iloc[1]['cyto'])
print(df_nodes.iloc[1]['graph'])
print(df_nodes.iloc[1]['json'])

{'data': {'id': 'house_2'}}
{'Id': 'house_2', 'Label': 'House Allyrion of Godsgrace'}
{'id': 'house_2', 'name': 'House Allyrion of Godsgrace', 'group': '1'}




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [33]:
def getCytoEdge(row):
    return {'data':{'source':row['inV'],'target':row['outV']}}

def getGraphEdge(row):
    return {'Id':row['id'], 'Source':row['outV'], 'Target':row['inV'], 'Type':'Undirected', 'Weight':'1'}

def getJSONEdge(row):
    return {'source':row['outV'], 'target':row['inV'], 'value':'1'}

df_gremlin_edges['cyto'] = df_gremlin_edges.apply(getCytoEdge, axis=1)
df_gremlin_edges['graph'] = df_gremlin_edges.apply(getGraphEdge, axis=1)
df_gremlin_edges['json'] = df_gremlin_edges.apply(getJSONEdge, axis=1)
print(df_gremlin_edges.iloc[1]['cyto'])
print(df_gremlin_edges.iloc[1]['graph'])
print(df_gremlin_edges.iloc[1]['json'])

{'data': {'source': 'house_2', 'target': 'character_1129'}}
{'Id': '92a07f15-aec1-4514-ba31-c5bf72cc2343', 'Source': 'character_1129', 'Target': 'house_2', 'Type': 'Undirected', 'Weight': '1'}
{'source': 'character_1129', 'target': 'house_2', 'value': '1'}


In [34]:
network = {
    'elements': {
        'nodes': df_nodes['cyto'].to_list(),
        'edges': df_gremlin_edges['cyto'].to_list()
    }
}

In [35]:
json_graph_data = {
    'nodes': df_nodes['json'].to_list(),
    'links': df_gremlin_edges['json'].to_list()
}

import json
with open('data/game_of_thrones_graph.json', 'w') as file:
     file.write(json.dumps(json_graph_data))

In [36]:
df_member_count = df_gremlin_edges[['inV','outV']].groupby('inV').count().reset_index()

In [37]:
df_nodes['name'] = df_nodes['json'].apply(lambda x : x['name'])
df_gremlin_houses = df_nodes[df_nodes['label']=='house'][['id','name']]
df_gremlin_houses = pd.merge(df_gremlin_houses, df_member_count, left_on='id', right_on='inV')
df_gremlin_houses = df_gremlin_houses.drop(['inV', 'id'], axis=1).rename(columns={'outV':'value'})
df_gremlin_houses.head(1)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,name,value
0,House Allyrion of Godsgrace,4


In [48]:
import json
data = df_gremlin_houses.to_json(orient='records')
columns = [
  {
    "name": "House",
    "selector": "name",
    "sortable": True,
  },
  {
    "name": "Members",
    "selector": "value",
    "sortable": True,
  },
]

fig = {
  "data": json.loads(data),
  "layout": {
    "title": "Number of members in the house",
    "columns": columns,
    "sortable": True,
  }
}

In [49]:
# Add barlist figure to the datapackage
description = """
## Number of members


"""

dp.barlist(fig,"Number of members", description)

# Add figure

In [50]:
fig = {"data": json_graph_data}

In [51]:
# Add another figure to the datapackage
description = """
## Houses and characters


"""

dp.forcegraph(fig,"Houses & characters", description)

In [43]:
token = getpass('Enter your token here')

Enter your token here········


In [52]:
# Publish the datapackage
item = dp.publish('examples', token)

Datapackage: https://storage.googleapis.com/deetly/examples/f95d4d9b5771c827b9628bbb9143baab/datapackage.json 

Metadata index entry: updated 

