In [49]:
# Load Libraries
import pandas as pd
import os
import datetime
import json

In [50]:
from gremlin_python.structure.graph import Graph
from gremlin_python.process.traversal import T
from gremlin_python.process.graph_traversal import __
from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection
from gremlin_python.driver.aiohttp.transport import AiohttpTransport

In [51]:
def load_vertex(fdataset_name, fdata_bucket_name):
    print("loading vertex " + fdataset_name)
    
    # Load data from json
    data_location = '{}/{}'.format(fdata_bucket_name, fdataset_name)
    unique_property = 'id'
    vertex ={}
    
    # Load json to datafrase
    vertex_data = {}
    with open(data_location, 'r') as jf:
        vertex_data = json.load(jf)

    # Load to graph
    for data in vertex_data:
        vertex_id = data['id']
        label = data['label']
        properties = data['properties']
        name = data['id']
        
        try:
            #print(f"select by {unique_property}={properties[unique_property]}")
            existing_vertex = g.V().has(unique_property,properties[unique_property]).next()
            vertex = g.V(existing_vertex)
            #print(f"Updated vertex with {unique_property}={vertex_id}")
        except Exception as error:
            vertex = g.addV(label)
            #print(f"Insert vertex with {unique_property}={vertex_id}")
        
        #Insert with PK
        #vertex = g.addV(label).property(T.id, vertex_id)            
        # Insert without PK (duplicate data)
        #vertex = g.addV(label).property('id', vertex_id)
    
        # Add properties to the vertex
        for key, value in properties.items():
            vertex = vertex.property(key, value)
        # Execute the query
        try:
            vertex.next()
        except Exception as error:
          print("Vertex error => An exception occurred:", error)

In [52]:
def load_edge(fdataset_name, fdata_bucket_name):
    print("loading edge " + fdataset_name)
    
    # Load data from json
    data_location = '{}/{}'.format(fdata_bucket_name, fdataset_name)
    unique_property = 'id'
    
    # Load json to datafrase
    edge_data = {}
    with open(data_location, 'r') as jf:
        edge_data = json.load(jf)

    # Load to graph
    for data in edge_data:
       
        from_id = data['from_id']
        to_id = data['to_id']
        edge_label = data['label']
        edge_properties = data['properties']
        
        from_serched_vertex = g.V().has(unique_property,data['from_id']).next()
        from_vertex = g.V(from_serched_vertex).next()
        to_serched_vertex = g.V().has(unique_property,data['to_id']).next()
        to_vertex = g.V(to_serched_vertex).next()
        
        #print(f"Insert edge from {from_vertex.id} to {to_vertex.id}")
          
        # Create the edge with the given label and properties
        #edge = g.V(from_id).as_('a').V(to_id).addE(edge_label).from_('a')
        edge = g.V(from_vertex.id).as_('a').V(to_vertex.id).addE(edge_label).from_('a')

        # Add properties to the edge
        for key, value in edge_properties.items():
            edge = edge.property(key, value)
        
        # Execute the query
        try:
            edge.next()
        except Exception as error:
          print("Edge error => An exception occurred:", error , "|" , edge )

In [53]:
# Prepare connection
endpoint = 'ws://127.0.0.1:8182/gremlin'
connection = DriverRemoteConnection(endpoint,'g',
                 transport_factory=lambda:AiohttpTransport(call_from_event_loop=True))

graph = Graph()
g = graph.traversal().withRemote(connection)

# Load VERTEX

In [54]:
data_bucket_name = "/home/eliezerraj/jupyter-notebook/graph/dataset"

dataset_name = 'person_vertex.json'
load_vertex(dataset_name, data_bucket_name)

dataset_name = 'terminal_vertex.json'
load_vertex(dataset_name, data_bucket_name)

dataset_name = 'merchant_vertex.json'
load_vertex(dataset_name, data_bucket_name)

dataset_name = 'account_vertex.json'
load_vertex(dataset_name, data_bucket_name)

dataset_name = 'card_vertex.json'
load_vertex(dataset_name, data_bucket_name)

dataset_name = 'payment_vertex.json'
load_vertex(dataset_name, data_bucket_name)

loading vertex person_vertex.json
loading vertex terminal_vertex.json
loading vertex merchant_vertex.json
loading vertex account_vertex.json
loading vertex card_vertex.json
loading vertex payment_vertex.json


# Load Edge

In [55]:
data_bucket_name = "/home/eliezerraj/jupyter-notebook/graph/dataset"

dataset_name = 'person-account-edge.json'
load_edge(dataset_name, data_bucket_name)

dataset_name = 'account-card-edge.json'
load_edge(dataset_name, data_bucket_name)

dataset_name = 'card-payment-edge.json'
load_edge(dataset_name, data_bucket_name)

dataset_name = 'payment-mcc-edge.json'
load_edge(dataset_name, data_bucket_name)

dataset_name = 'payment-terminal-edge.json'
load_edge(dataset_name, data_bucket_name)

loading edge person-account-edge.json
loading edge account-card-edge.json
loading edge card-payment-edge.json
loading edge payment-mcc-edge.json
loading edge payment-terminal-edge.json


In [140]:
connection.close()

# Query

In [None]:
#for i in range(len(data_person)):
    
#    vertex=data_person.iloc[i, 2]
#    id=data_person.iloc[i, 1]
#    name=data_person.iloc[i, 3]
    
#    print('data : ',id, vertex, name)
    
#    person = g.V(vertex).property('name',name).as_(id)
#person.next()

In [31]:
#from gremlin_python.driver import client, serializer
#endpoint = 'ws://127.0.0.1:8182/gremlin'

#gremlin_client = client.Client(
#    endpoint, 
#    'g',
#    transport_factory=lambda:AiohttpTransport(call_from_event_loop=True),
#    username="",
#    password="",
#    message_serializer=serializer.GraphSONSerializersV2d0()
#)

In [36]:
#x = g.V('P-2354').valueMap().next()
#print(x)

In [37]:
def upsert_vertex(fdataset_name, fdata_bucket_name):
    print("loading upsert_vertex " + fdataset_name)
    
    # Load data from json
    data_location = '{}/{}'.format(fdata_bucket_name, fdataset_name)

    # Load json to datafrase
    vertex_data = {}
    with open(data_location, 'r') as jf:
        vertex_data = json.load(jf)

    #print(vertex_data)
    
    unique_property = 'id'
    properties = {}
    vertex_label ={}
    
    # Load to graph
    for data in vertex_data:
        vertex_label  = data['label']
        properties = data['properties']
        
        query = f"g.V().has({unique_property},{properties[unique_property]}).fold().coalesce(unfold(),addV('{vertex_label}').property({unique_property},{properties[unique_property]}))"
        #query = f"g.V({properties[unique_property]}).fold().coalesce(unfold(),addV('{vertex_label}').property({unique_property},{properties[unique_property]}))"
    
        for key, value in properties.items():
            if key != unique_property:
                query += f".property('{key}','{value}')"
        
        print(query)
        # Execute the query
        try:
            result = gremlin_client.submit(query).all().result()
        except Exception as error:
          print("Vertex error => An exception occurred:", error)
