#  Setup stuff: Connectivity

In [None]:

#  Setting display options

from tabulate import tabulate
import pandas as pd
   #
pd.set_option("display.width", 480)

print("--")


In [None]:

from katana import remote
from katana.remote import import_data


my_client = remote.Client()

print(my_client)


In [None]:

NUM_PARTITIONS  = 3
   #
DB_NAME         = "my_db"
GRAPH_NAME      = "my_graph"

print("--")


In [None]:
#  DELETE ALL GRAPHS

for l_database in my_client.databases():
   for l_graph in my_client.get_database(name=l_database.name).graphs_in_database():
      l_handle=my_client.get_database(name=l_database.name).get_graph_by_id(id=l_graph.graph_id)
      l_handle.delete()

for l_graph in my_client.graphs():
   print("GRAPH ID: ", l_graph.graph_id, "      GRAPH Version: ", l_graph.version)

print("--")


In [None]:
#  DELETE ALL DATABASES

for l_database in my_client.databases():
   if (l_database.name != "default"):
      my_client.get_database(name=l_database.name).delete_database()
      print("--")

for l_database in my_client.databases():
   print("DB ID: ", l_database.database_id, "     DB Name: ", l_database.name)


In [None]:
#  CREATE DATABASE

my_database = my_client.create_database(name=DB_NAME)

print(my_database.database_id)


In [None]:
#  CREATE GRAPH
#
my_graph = my_client.get_database(name=DB_NAME).create_graph(name=GRAPH_NAME, num_partitions=NUM_PARTITIONS)

print(my_graph)


# Create: Vertices/nodes, edges ..

In [None]:

#  Vertices/Nodes, Airports ..

df_airports = pd.DataFrame([
       #
   ["MKE", "Milwaukee"               , "Airport"],
   ["ORD", "Chicago O-Hare"          , "Airport"],
   ["SJC", "San Jose"                , "Airport"],
   ["DEN", "Denver"                  , "Airport"],
       #
   ], columns = ["airport_code", "airport_name", "LABEL"])

print(tabulate(df_airports, headers='keys', tablefmt='psql'))


In [None]:
#  Create the edge, flights ..

df_flights = pd.DataFrame([
      #
   ["MKE", "ORD",   66, 1, "FLIES_TO" ],
   ["ORD", "MKE",   66, 1, "FLIES_TO" ],
      #
   ["ORD", "DEN",  886, 1, "FLIES_TO" ],
   ["DEN", "ORD",  886, 1, "FLIES_TO" ],
      #
   ["SJC", "DEN",  948, 1, "FLIES_TO" ],                           #  Notice SJC flies to/from Denver
   ["DEN", "SJC",  948, 1, "FLIES_TO" ],
      #
   ["SJC", "ORD", 1829, 1, "FLIES_TO" ],                           #  Notice SJC flies to ORD, but not ORD to SJC
      #
   ], columns = ["START_ID", "END_ID", "DISTANCE", "NUM_HOPS", "TYPE"])

print(tabulate(df_flights, headers='keys', tablefmt='psql'))


In [None]:

# Import the 2 previously created Python DataFrames into KatanaGraph ..

with import_data.DataFrameImporter(my_graph) as df_importer:   
    
   df_importer.nodes_dataframe(df_airports,                    #  Aiports set of Nodes
      id_column             = "airport_code",
      id_space              = "Airport",  
      label                 = "Airport",  
      )
   
   df_importer.edges_dataframe(df_flights,                     #  Our Edge, specifying the relationship between Airport --> FLIES_TO --> Airport
      source_id_space       = "Airport", 
      destination_id_space  = "Airport",   
      source_column         = "START_ID",
      destination_column    = "END_ID",
      type                  = "FLIES_TO"
      )

   df_importer.insert()

print("--")


In [84]:

#  What's in the graph ..

l_result1 = my_graph.query("""
   MATCH ( n ) 
   RETURN n
   """)
print(tabulate(l_result1, headers='keys', tablefmt='psql'))

l_result2 = my_graph.query("""
   MATCH ( n ) - [ r ] -> ( m )
   RETURN r
   """)
print(tabulate(l_result2, headers='keys', tablefmt='psql'))


          0/? [?op/s]

+----+-----------------+-------------+-----------+------------------+------------------+----------+
|    |   n.internal_id | n.labels    | n.LABEL   | n.airport_code   | n.airport_name   | n.type   |
|----+-----------------+-------------+-----------+------------------+------------------+----------|
|  0 |               0 | ['Airport'] | Airport   | SJC              | San Jose         | node     |
|  1 | 281474976710656 | ['Airport'] | Airport   | MKE              | Milwaukee        | node     |
|  2 | 281474976710657 | ['Airport'] | Airport   | DEN              | Denver           | node     |
|  3 | 562949953421312 | ['Airport'] | Airport   | ORD              | Chicago O-Hare   | node     |
+----+-----------------+-------------+-----------+------------------+------------------+----------+


          0/? [?op/s]

+----+-----------------+----------------+-----------------+-----------+--------------+--------------+----------+-----------------+------------------+----------+
|    |        r.end.id | r.end.labels   |   r.internal_id | r.label   |   r.DISTANCE |   r.NUM_HOPS | r.TYPE   |      r.start.id | r.start.labels   | r.type   |
|----+-----------------+----------------+-----------------+-----------+--------------+--------------+----------+-----------------+------------------+----------|
|  0 | 562949953421312 | ['Airport']    |               0 | FLIES_TO  |         1829 |            1 | FLIES_TO |               0 | ['Airport']      | edge     |
|  1 | 281474976710657 | ['Airport']    |               1 | FLIES_TO  |          948 |            1 | FLIES_TO |               0 | ['Airport']      | edge     |
|  2 |               0 | ['Airport']    | 281474976710656 | FLIES_TO  |          948 |            1 | FLIES_TO | 281474976710657 | ['Airport']      | edge     |
|  3 | 562949953421312 | ['Airport

#  Actual bfs() ..

In [None]:

# import numpy as np
from katana import remote
from katana.remote import analytics
# from katana_enterprise.distributed import PartitionPolicy

#  Used to generate a unique output column name
#
l_cntr = 20

print("--")


In [None]:

l_result1 = my_graph.query(
   """
   MATCH (n: Airport) 
   WHERE n.airport_code = 'MKE'
   RETURN n
   """ )
print(l_result1)
#
#  Sample output,
#
#          n.internal_id   n.labels  n.LABEL n.airport_code n.airport_name  n.bfs0026 n.type
#     0  281474976710656  [Airport]  Airport            MKE      Milwaukee          2   node


l_result1 = my_graph.query(
   """
   MATCH (n: Airport) 
   WHERE n.airport_code = 'MKE'
   RETURN n.internal_id
   """ )
print(l_result1)
#
#  Sample output,
#
#       n.internal_id
#     0          None


l_result1 = my_graph.query(
   """
   MATCH (n: Airport) 
   WHERE n.airport_code = 'MKE'
   RETURN n.n.internal_id
   """ )
print(l_result1)
#
#  Sample output,
#
#       n.n.internal_id
#     0          None


l_result1 = my_graph.query(
   """
   MATCH (n: Airport) 
   WHERE n.airport_code = 'MKE'
   RETURN n.internal_id AS internal_id
   """ )
print(l_result1)
#
#  Sample output,
#
#       internal_id
#     0          None


#  l_result1 = my_graph.query(
#     """
#     MATCH (n: Airport) 
#     WHERE n.airport_code = 'MKE'
#     RETURN n
#     """ )
#  print(l_result1.internal_id)
#  
#  Sample output
#
#   >>> AttributeError: 'ResultSet' object has no attribute 'internal_id'


l_result1 = my_graph.query(
   """
   MATCH (n: Airport) 
   WHERE n.airport_code = 'MKE'
   RETURN n
   """ )
for l_each in l_result1.itertuples():
   print(l_each[1])
#
#  Sample output,
#     281474976710656


In [85]:

#  Running analytics.bfs() ..


#  l_source_nodeid       = "SJC"                             #  San Jose, CA
l_source_nodeid       = 0                                 #  Throws error
#  l_source_nodeid       = "0"                               #  Throws error
#  l_source_nodeid       = "333" 

#  l_source_nodeid = l_each[1]                               #  Ordinal positioning from traversal

#  l_source_nodeid = str(l_each[1])                          #  First we have to fetch this integer, then cast it to a string


#  Generate a unique prop name so we can run this over and over without edit ..
#
l_cntr += 1
   #
l_output_propname     = "bfs" + str(l_cntr).zfill(4)
l_max_iterations      = 10000 


analytics.bfs(
   my_graph,
   source                  = l_source_nodeid,
   result_property_name    = l_output_propname,
   max_iterations          = l_max_iterations,
   #  delta                   = 5
   )


display(print("--"))


InvalidRequestError: b'{"code":400,"message":"json: invalid uint64 value; should be encoded as a string"}'

In [None]:

#  My notes,
#
#     .  If "source" id is not found, no value is output. 
#        Shouldn't routine output an error os some sort ?
#
#     .  If "source" id is not found, no value is output.
#        Yet, if I incorrectly call to reuse the output column name, routine complains with,
#
#           >>> OperationError: E9UEYj4d5DjkrUgo1UiuMLvCKsVh3z3ChK9d57NfjJNj-4EE59uUYb2cJHV5Md backtrace
#           >>> (AnalyticsOperation.cpp:1132): backtrace (AnalyticsOperation.cpp:903): backtrace
#           >>> (AnalyticsOperation.cpp:263): backtrace (Bfs.cpp:280): property with name bfs0024
#           >>> already exists (PropertyTableGroup.cpp:493): invalid argument: invalid argument
#
#     .  bfs() requires a string argument for a numeric field. If we submit a numeric argument, we get,
#
#           >>> InvalidRequestError: b'{"code":400,"message":"json: cannot unmarshal number into Go 
#           >>> struct field BFSParameters.analytics.bfs.start_node of type string"}'


#  bfs() output records a new column on the node list
#
l_result1 = my_graph.query(
   """
   MATCH (n) 
   RETURN n
   """ )
print(tabulate(l_result1, headers='keys', tablefmt='psql'))

#  l_result2 = my_graph.query(
#     """
#     MATCH (n) - [r] -> (m)
#     RETURN r
#     """ )
#  print(tabulate(l_result2, headers='keys', tablefmt='psql'))
