In [None]:

#  Using UDFs in the context of a Cypher traversal ..
#



#  Setup stuff: Connectivity

In [None]:

%xmode Minimal


#  Setting display options 
#

import pandas as pd
   #
pd.set_option("display.width", 480)

#  Sets horizontal scroll for wide outputs
#
from IPython.display import display, HTML
display(HTML(""))

from tabulate import tabulate

print("--")


In [None]:

from katana import remote
from katana.remote import import_data

my_client = remote.Client()

print(my_client)


In [None]:

NUM_PARTITIONS  = 3
   #
DB_NAME         = "my_db"
GRAPH_NAME      = "my_graph"

print("--")


In [None]:

#  DELETE ALL GRAPHS

for l_database in my_client.databases():
   for l_graph in my_client.get_database(name=l_database.name).graphs_in_database():
      l_handle=my_client.get_database(name=l_database.name).get_graph_by_id(id=l_graph.graph_id)
      l_handle.delete()

for l_graph in my_client.graphs():
   print("GRAPH ID: ", l_graph.graph_id, "      GRAPH Version: ", l_graph.version)

print("--")


In [None]:

#  DELETE ONE SPECIFIC GRAPH ONLY

for l_database in my_client.databases():
   for l_graph in my_client.get_database(name = l_database.name).graphs_in_database():
      if ((l_database.name == DB_NAME) and (l_graph.name == GRAPH_NAME)):   
         l_handle = my_client.get_database(name = l_database.name).get_graph_by_id(id = l_graph.graph_id)
         l_handle.delete()

for l_graph in my_client.graphs():
   print("GRAPH ID: ", l_graph.graph_id, "      GRAPH Version: ", l_graph.version, "   Graph Name: ", l_graph.name)

print("--")


In [None]:

#  DELETE ALL DATABASES

for l_database in my_client.databases():
   if (l_database.name != "default"):
      my_client.get_database(name=l_database.name).delete_database()
      print("--")

for l_database in my_client.databases():
   print("DB ID: ", l_database.database_id, "     DB Name: ", l_database.name)


In [None]:

#  CREATE DATABASE

my_database = my_client.create_database(name=DB_NAME)

print(my_database.database_id)


In [None]:

#  CREATE GRAPH

my_graph = my_client.get_database(name=DB_NAME).create_graph(name=GRAPH_NAME, num_partitions=NUM_PARTITIONS)

print(my_graph)


In [None]:

#  CONNECT TO GRAPH

my_graph, *_ = my_client.get_database(name=DB_NAME).find_graphs_by_name(GRAPH_NAME)

print(my_graph)


# Create: Vertices/nodes, edges ..

In [None]:

import pandas as pd

print("--")


In [None]:

#  Vertices/Nodes, Airports ..

df_airports = pd.DataFrame([
       #
   ["MKE", "Milwaukee"               , "Airport"],
   ["ORD", "Chicago O-Hare"          , "Airport"],
   ["SJC", "San Jose"                , "Airport"],
   ["DEN", "Denver"                  , "Airport"],
       #
   ], columns = ["airport_code", "airport_name", "LABEL"])

df_airports.head(20)


In [None]:

#  Create the edge, flights ..

df_flights = pd.DataFrame([
      #
   ["MKE", "ORD",   66, 1, "FLIES_TO" ],
   ["ORD", "MKE",   66, 1, "FLIES_TO" ],
      #
   ["ORD", "DEN",  886, 1, "FLIES_TO" ],
   ["DEN", "ORD",  886, 1, "FLIES_TO" ],
      #
   ["SJC", "DEN",  948, 1, "FLIES_TO" ],                           #  Notice SJC flies to/from Denver
   ["DEN", "SJC",  948, 1, "FLIES_TO" ],
      #
   ["SJC", "ORD", 1829, 1, "FLIES_TO" ],                           #  Notice SJC flies to ORD, but not ORD to SJC
      #
   ], columns = ["START_ID", "END_ID", "DISTANCE", "NUM_HOPS", "TYPE"])

df_flights.head(30)


In [None]:

# Import the 2 previously created Python DataFrames into KatanaGraph ..

with import_data.DataFrameImporter(my_graph) as df_importer:   
    
   df_importer.nodes_dataframe(df_airports,                    #  Aiports set of Nodes
      id_column             = "airport_code",
      id_space              = "Airport",  
      label                 = "Airport",  
      )
   
   df_importer.edges_dataframe(df_flights,                     #  Our Edge, specifying the relationship between Airport --> FLIES_TO --> Airport
      source_id_space       = "Airport", 
      destination_id_space  = "Airport",   
      source_column         = "START_ID",
      destination_column    = "END_ID",
      type                  = "FLIES_TO"
      )

   df_importer.insert()

print("--")


In [None]:

display(my_graph.num_nodes())
display(my_graph.num_edges())


In [None]:

#  What's in the vertices/nodes ..

l_query  = """
   MATCH ( n ) 
   RETURN n
   """.format()

l_return = my_graph.query_unpaginated(l_query)

print(tabulate(l_return, headers='keys', tablefmt='psql'))


In [None]:

#  What's in the edge .. 

l_query  = """
   MATCH (n) - [r] -> (m)
   RETURN r
   // RETURN  n.id AS FROM, m.id AS TO, r.DISTANCE AS DISTANCE, r.NUM_HOPS AS NUM_HOPS
   """.format()

l_return = my_graph.query_unpaginated(l_query)

print(tabulate(l_return, headers='keys', tablefmt='psql'))


In [None]:

#  Full graph

l_query  = """
   MATCH (n) - [ r ] -> (m)
   RETURN n, r, m
   """.format()

l_return = my_graph.query_unpaginated(l_query, contextualize=True)


l_return.view()

# print(l_result)
# print(tabulate(l_result, headers='keys', tablefmt='psql'))


# Run a Cypher UDF ..

In [31]:

#  Define a Python function which will become our Cypher UDF
#

def my_changecase(arg1: str) -> str:
    
#  from katana.distributed import MPI
#     #
#  l_comm         = MPI.COMM_WORLD
#  l_nodenumber   = (l_comm.Get_rank())
#   
#   
   l_return = ""
      #
   if (arg1.isupper()):
      l_return = arg1.lower()
   else:
      l_return = arg1.upper()
    
   print("This ran on KG Worker node number: %d" % (l_nodenumber))
   return l_return


# try:
#    my_graph.deregister_udf(my_changecase)
# except:
#    pass

my_graph.register_udf(my_changecase)

print("--")


          0/? [?op/s]


Host 0 errors:
Traceback (most recent call last):
  File "/opt/conda/lib/python3.8/site-packages/katana/worker/worker.py", line 111, in execute
    value = function(graph)
  File "/opt/conda/lib/python3.8/site-packages/katana/remote/aio/graph.py", line 300, in <lambda>
    await self.run(lambda dist_g: dist_g.register_udf_(func.__name__, udf_meta))
ValueError: Failed to register UDF. UDF with name my_changecase already exists (LSPGGraph.cpp:1016): invalid argument

Host 1 errors:
Traceback (most recent call last):
  File "/opt/conda/lib/python3.8/site-packages/katana/worker/worker.py", line 111, in execute
    value = function(graph)
  File "/opt/conda/lib/python3.8/site-packages/katana/remote/aio/graph.py", line 300, in <lambda>
    await self.run(lambda dist_g: dist_g.register_udf_(func.__name__, udf_meta))
ValueError: Failed to register UDF. UDF with name my_changecase already exists (LSPGGraph.cpp:1016): invalid argument

Host 2 errors:
Traceback (most recent call last):
  File "/

ValueError: Failed to register UDF. UDF with name my_changecase already exists (LSPGGraph.cpp:1016): invalid argument

In [30]:

my_graph.deregister_udf(my_changecase)


          0/? [?op/s]


Host 0 errors:
Traceback (most recent call last):
  File "/opt/conda/lib/python3.8/site-packages/katana/worker/worker.py", line 111, in execute
    value = function(graph)
  File "/opt/conda/lib/python3.8/site-packages/katana/remote/aio/graph.py", line 304, in <lambda>
    await self.run(lambda dist_g: dist_g.deregister_udf_(name))
TypeError: deregister_udf_(): incompatible function arguments. The following argument types are supported:
    1. (self: katana.distributed_native.lspg.Graph, name: str) -> void

Invoked with: <katana.distributed.Graph object at 0x7f74912cb9b0>, <function my_changecase at 0x7f7491337670>

Host 1 errors:
Traceback (most recent call last):
  File "/opt/conda/lib/python3.8/site-packages/katana/worker/worker.py", line 111, in execute
    value = function(graph)
  File "/opt/conda/lib/python3.8/site-packages/katana/remote/aio/graph.py", line 304, in <lambda>
    await self.run(lambda dist_g: dist_g.deregister_udf_(name))
TypeError: deregister_udf_(): incompatibl

TypeError: deregister_udf_(): incompatible function arguments. The following argument types are supported:
    1. (self: katana.distributed_native.lspg.Graph, name: str) -> void

Invoked with: <katana.distributed.Graph object at 0x7f74912cb9b0>, <function my_changecase at 0x7f7491337670>

In [None]:

#  Test the UDF defined just above ..
#

l_query  = """

   MATCH (n: Airport) 
   SET n.airport_name = my_changecase(n.airport_name)
   RETURN n
   
   """.format()

l_return = my_graph.query_unpaginated(l_query)

print(tabulate(l_return, headers='keys', tablefmt='psql'))







In [None]:


def transform_to_smiles(inchi: str) -> str:
    from rdkit import Chem
    import numpy as np
    if isinstance(inchi, str):
        if len(inchi) == 0:
            x = "None"
        else:
            try:
                c_inchi = inchi
                mol=Chem.inchi.MolFromInchi(c_inchi)
                smiles=Chem.MolToSmiles(mol)
                return smiles
            except Exception as e:
                    return ''

    return ''


def transform_to_smiles1_udf(strings):
    import numpy as np
    lambda_fn = np.vectorize(
        lambda val: transform_to_smiles(val)
        )
    return lambda_fn(strings).tolist()


# graph.deregister_udf('transform_to_smiles1_udf')
graph.register_udf(transform_to_smiles1_udf)

graph.query_unpaginated("""Match (c:Compound) Set c.smiles = transform_to_smiles1_udf(c.data_inchi)""")
