#  Setup stuff: Connectivity

In [1]:

#  Setting display options

from tabulate import tabulate
import pandas as pd
   #
pd.set_option("display.width", 480)

print("--")


--


In [2]:

from katana import remote
from katana.remote import import_data


my_client = remote.Client()

print(my_client)


<katana_enterprise.remote.sync_wrappers.Client object at 0x7f12144e39d0>


In [3]:

NUM_PARTITIONS  = 3
   #
DB_NAME         = "my_db"
GRAPH_NAME      = "my_graph"

print("--")


--


In [4]:
#  DELETE ALL GRAPHS

for l_database in my_client.databases():
   for l_graph in my_client.get_database(name=l_database.name).graphs_in_database():
      l_handle=my_client.get_database(name=l_database.name).get_graph_by_id(id=l_graph.graph_id)
      l_handle.delete()

for l_graph in my_client.graphs():
   print("GRAPH ID: ", l_graph.graph_id, "      GRAPH Version: ", l_graph.version)

print("--")


--


In [5]:
#  DELETE ALL DATABASES

for l_database in my_client.databases():
   if (l_database.name != "default"):
      my_client.get_database(name=l_database.name).delete_database()
      print("--")

for l_database in my_client.databases():
   print("DB ID: ", l_database.database_id, "     DB Name: ", l_database.name)


--
DB ID:  Gi6YBuTAGpEeZYvCjPfWWABD3seX7d1xMUW49KNkpzVz      DB Name:  default


In [6]:
#  CREATE DATABASE

my_database = my_client.create_database(name=DB_NAME)

print(my_database.database_id)


41FhUV7DAkDhUUMRBqrRoEk7NXhpD8uVwqiasDTAcnax


In [7]:
#  CREATE GRAPH
#
my_graph = my_client.get_database(name=DB_NAME).create_graph(name=GRAPH_NAME, num_partitions=NUM_PARTITIONS)

print(my_graph)


<_Graph my_graph, FHZXky9yPi8MiLe3KWuN1EMviU1TBVdrAi9yPgDpqL2f, 0>


# Create: Vertices/nodes, edges ..

In [8]:

#  Vertices/Nodes, Airports ..

df_airports = pd.DataFrame([
       #
   ["MKE", "Milwaukee"               , "Airport"],
   ["ORD", "Chicago O-Hare"          , "Airport"],
   ["SJC", "San Jose"                , "Airport"],
   ["DEN", "Denver"                  , "Airport"],
       #
   ], columns = ["airport_code", "airport_name", "LABEL"])

print(tabulate(df_airports, headers='keys', tablefmt='psql'))


+----+----------------+----------------+---------+
|    | airport_code   | airport_name   | LABEL   |
|----+----------------+----------------+---------|
|  0 | MKE            | Milwaukee      | Airport |
|  1 | ORD            | Chicago O-Hare | Airport |
|  2 | SJC            | San Jose       | Airport |
|  3 | DEN            | Denver         | Airport |
+----+----------------+----------------+---------+


In [9]:
#  Create the edge, flights ..

df_flights = pd.DataFrame([
      #
   ["MKE", "ORD",   66, 1, "FLIES_TO" ],
   ["ORD", "MKE",   66, 1, "FLIES_TO" ],
      #
   ["ORD", "DEN",  886, 1, "FLIES_TO" ],
   ["DEN", "ORD",  886, 1, "FLIES_TO" ],
      #
   ["SJC", "DEN",  948, 1, "FLIES_TO" ],                           #  Notice SJC flies to/from Denver
   ["DEN", "SJC",  948, 1, "FLIES_TO" ],
      #
   ["SJC", "ORD", 1829, 1, "FLIES_TO" ],                           #  Notice SJC flies to ORD, but not ORD to SJC
      #
   ], columns = ["START_ID", "END_ID", "DISTANCE", "NUM_HOPS", "TYPE"])

print(tabulate(df_flights, headers='keys', tablefmt='psql'))


+----+------------+----------+------------+------------+----------+
|    | START_ID   | END_ID   |   DISTANCE |   NUM_HOPS | TYPE     |
|----+------------+----------+------------+------------+----------|
|  0 | MKE        | ORD      |         66 |          1 | FLIES_TO |
|  1 | ORD        | MKE      |         66 |          1 | FLIES_TO |
|  2 | ORD        | DEN      |        886 |          1 | FLIES_TO |
|  3 | DEN        | ORD      |        886 |          1 | FLIES_TO |
|  4 | SJC        | DEN      |        948 |          1 | FLIES_TO |
|  5 | DEN        | SJC      |        948 |          1 | FLIES_TO |
|  6 | SJC        | ORD      |       1829 |          1 | FLIES_TO |
+----+------------+----------+------------+------------+----------+


In [10]:

# Import the 2 previously created Python DataFrames into KatanaGraph ..

with import_data.DataFrameImporter(my_graph) as df_importer:   
    
   df_importer.nodes_dataframe(df_airports,                    #  Aiports set of Nodes
      id_column             = "airport_code",
      id_space              = "Airport",  
      label                 = "Airport",  
      )
   
   df_importer.edges_dataframe(df_flights,                     #  Our Edge, specifying the relationship between Airport --> FLIES_TO --> Airport
      source_id_space       = "Airport", 
      destination_id_space  = "Airport",   
      source_column         = "START_ID",
      destination_column    = "END_ID",
      type                  = "FLIES_TO"
      )
   
   df_importer.insert()

print("--")




          0/? [?op/s]

          0/? [?op/s]

--


In [13]:

#  Here we'll add stuff to the graph without a weight property key that analytics.sssp() will not find ..


l_query = """
   CREATE ( n: Restaurant { restaurant_code: 'PAP' } )               //  Notice single curly braces
   SET n.restaurant_name = 'Pappadeauxs Seafood Kitchen' 
   """
      #
display(l_result1 = my_graph.query(l_query))

l_query = """
   MATCH
      (n: Airport   ),
      (m: Restaurant)
   WHERE n.airport_code     = 'SJC'
   AND   m.restaurant_code  = 'PAP'
   CREATE (n) -[r: Attraction { DRIVABLE: 'Yes' }]-> (m)
   """
      #
display(l_result2 = my_graph.query(l_query))


          0/? [?op/s]

          0/? [?op/s]

In [14]:

#  Check the output from routine above ..

l_result1 = my_graph.query(
   """
   MATCH (n) 
   RETURN n
   """ )
print(tabulate(l_result1, headers='keys', tablefmt='psql'))

#  +----+-----------------+----------------+-----------+------------------+--------+----------+---------------------+-----------------------------+
#  |    |   n.internal_id | n.labels       | n.LABEL   | n.airport_name   | n.id   | n.type   | n.restaurant_code   | n.restaurant_name           |
#  |----+-----------------+----------------+-----------+------------------+--------+----------+---------------------+-----------------------------|
#  |  0 |               0 | ['Airport']    | Airport   | San Jose         | SJC    | node     | nan                 | nan                         |
#  |  1 |               1 | ['Airport']    | Airport   | Denver           | DEN    | node     | nan                 | nan                         |
#  |  2 | 281474976710656 | ['Airport']    | Airport   | Chicago O-Hare   | ORD    | node     | nan                 | nan                         |
#  |  3 | 562949953421313 | ['Restaurant'] | nan       | nan              | nan    | node     | PAP                 | Pappadeauxs Seafood Kitchen |
#  |  4 | 562949953421312 | ['Airport']    | Airport   | Milwaukee        | MKE    | node     | nan                 | nan                         |
#  +----+-----------------+----------------+-----------+------------------+--------+----------+---------------------+-----------------------------+

l_result2 = my_graph.query(
   """
   MATCH (n) - [r] -> (m)
   RETURN r
   """ )
print(tabulate(l_result2, headers='keys', tablefmt='psql'))


#  Notice r.label is correct, r.TYPE is "NaN"

#  +----+-----------------+----------------+-----------------+------------+--------------+--------------+----------+-----------------+------------------+----------+--------------+
#  |    |        r.end.id | r.end.labels   |   r.internal_id | r.label    |   r.DISTANCE |   r.NUM_HOPS | r.TYPE   |      r.start.id | r.start.labels   | r.type   | r.DRIVABLE   |
#  |----+-----------------+----------------+-----------------+------------+--------------+--------------+----------+-----------------+------------------+----------+--------------|
#  |  0 |               0 | ['Airport']    |               0 | FLIES_TO   |          948 |            1 | FLIES_TO |               1 | ['Airport']      | edge     | nan          |
#  |  1 | 281474976710656 | ['Airport']    |               3 | FLIES_TO   |          886 |            1 | FLIES_TO |               1 | ['Airport']      | edge     | nan          |
#  |  2 |               1 | ['Airport']    |               1 | FLIES_TO   |          948 |            1 | FLIES_TO |               0 | ['Airport']      | edge     | nan          |
#  |  3 | 281474976710656 | ['Airport']    |               2 | FLIES_TO   |         1829 |            1 | FLIES_TO |               0 | ['Airport']      | edge     | nan          |
#  |  4 | 562949953421313 | ['Restaurant'] |               4 | Attraction |          nan |          nan | nan      |               0 | ['Airport']      | edge     | Yes          |
#  |  5 | 562949953421312 | ['Airport']    | 281474976710656 | FLIES_TO   |           66 |            1 | FLIES_TO | 281474976710656 | ['Airport']      | edge     | nan          |
#  |  6 |               1 | ['Airport']    | 281474976710657 | FLIES_TO   |          886 |            1 | FLIES_TO | 281474976710656 | ['Airport']      | edge     | nan          |
#  |  7 | 281474976710656 | ['Airport']    | 562949953421312 | FLIES_TO   |           66 |            1 | FLIES_TO | 562949953421312 | ['Airport']      | edge     | nan          |
#  +----+-----------------+----------------+-----------------+------------+--------------+--------------+----------+-----------------+------------------+----------+--------------+


          0/? [?op/s]

+----+-----------------+----------------+-----------+------------------+------------------+----------+---------------------+-----------------------------+
|    |   n.internal_id | n.labels       | n.LABEL   | n.airport_code   | n.airport_name   | n.type   | n.restaurant_code   | n.restaurant_name           |
|----+-----------------+----------------+-----------+------------------+------------------+----------+---------------------+-----------------------------|
|  0 |               0 | ['Airport']    | Airport   | DEN              | Denver           | node     | nan                 | nan                         |
|  1 |               1 | ['Restaurant'] | nan       | nan              | nan              | node     | PAP                 | Pappadeauxs Seafood Kitchen |
|  2 | 281474976710656 | ['Restaurant'] | nan       | nan              | nan              | node     | PAP                 | Pappadeauxs Seafood Kitchen |
|  3 | 562949953421313 | ['Airport']    | Airport   | ORD             

          0/? [?op/s]

+----+-----------------+----------------+-----------------+------------+--------------+--------------+----------+-----------------+------------------+----------+--------------+
|    |        r.end.id | r.end.labels   |   r.internal_id | r.label    |   r.DISTANCE |   r.NUM_HOPS | r.TYPE   |      r.start.id | r.start.labels   | r.type   | r.DRIVABLE   |
|----+-----------------+----------------+-----------------+------------+--------------+--------------+----------+-----------------+------------------+----------+--------------|
|  0 | 562949953421314 | ['Airport']    |               0 | FLIES_TO   |          948 |            1 | FLIES_TO |               0 | ['Airport']      | edge     | nan          |
|  1 | 562949953421313 | ['Airport']    |               1 | FLIES_TO   |          886 |            1 | FLIES_TO |               0 | ['Airport']      | edge     | nan          |
|  2 | 562949953421313 | ['Airport']    | 562949953421314 | FLIES_TO   |         1829 |            1 | FLIES_TO | 5

#  Actual analytics ..

In [None]:

from katana import remote
from katana.remote import analytics


l_cntr = 20

print("--")


In [30]:

l_result1 = my_graph.query(
   """
   MATCH (n: Airport) 
   WHERE n.airport_code = 'MKE'
   RETURN n
   """ )
for l_each in l_result1.itertuples():
   print(l_each[1])

l_key = l_each[1]
   #
print(l_key)


          0/? [?op/s]

562949953421312
562949953421312


# Iteration/test 1 ..

In [36]:

#  Running analytics.sssp() .. iteration 1 ..
#
#  This used to work, and it works no longer. See comments after this cell.
#

l_source_nodeid       = str(l_key)
   #
#  l_weight_property     = "DISTANCE"                 #  Edge property
#  l_weight_property     = "NUM_HOPS"                 #  Edge property
l_weight_property     = None

l_max_iterations      = 10000 


#  Generate a unique prop name so we can run this over and over without edit ..
#
l_cntr += 1
   #
l_output_propname     = "sssp_dist_" + str(l_cntr).zfill(4)


analytics.sssp(
   my_graph,
      #
   source               = l_source_nodeid,
      #
   weight_property      = l_weight_property,
   max_iterations       = l_max_iterations,
   result_property_name = l_output_propname,
   )


print("--")


          0/? [?op/s]

--


In [37]:

l_result1 = my_graph.query(
   """
   MATCH (n) 
   RETURN n
   """ )
print(tabulate(l_result1, headers='keys', tablefmt='psql'))

l_result2 = my_graph.query(
   """
   MATCH (n) - [r] -> (m)
   RETURN r
   """ )
print(tabulate(l_result2, headers='keys', tablefmt='psql'))


#  Sample output,
#
#     +----+-----------------+-------------+-----------+------------------+------------------+--------------------+--------------------+----------+
#     |    |   n.internal_id | n.labels    | n.LABEL   | n.airport_code   | n.airport_name   |   n.sssp_dist_0021 |   n.sssp_dist_0022 | n.type   |
#     |----+-----------------+-------------+-----------+------------------+------------------+--------------------+--------------------+----------|
#     |  0 |               0 | ['Airport'] | Airport   | SJC              | San Jose         |                  3 |               1900 | node     |
#     |  1 |               1 | ['Airport'] | Airport   | DEN              | Denver           |                  2 |                952 | node     |
#     |  2 | 281474976710656 | ['Airport'] | Airport   | ORD              | Chicago O-Hare   |                  1 |                 66 | node     |
#     |  3 | 562949953421312 | ['Airport'] | Airport   | MKE              | Milwaukee        |                  0 |                  0 | node     |
#     +----+-----------------+-------------+-----------+------------------+------------------+--------------------+--------------------+----------+
#     
#     +----+-----------------+----------------+-----------------+-----------+--------------+--------------+----------+-----------------+------------------+----------+
#     |    |        r.end.id | r.end.labels   |   r.internal_id | r.label   |   r.DISTANCE |   r.NUM_HOPS | r.TYPE   |      r.start.id | r.start.labels   | r.type   |
#     |----+-----------------+----------------+-----------------+-----------+--------------+--------------+----------+-----------------+------------------+----------|
#     |  0 |               0 | ['Airport']    |               0 | FLIES_TO  |          948 |            1 | FLIES_TO |               1 | ['Airport']      | edge     |
#     |  1 | 281474976710656 | ['Airport']    |               3 | FLIES_TO  |          886 |            1 | FLIES_TO |               1 | ['Airport']      | edge     |
#     |  2 |               1 | ['Airport']    |               1 | FLIES_TO  |          948 |            1 | FLIES_TO |               0 | ['Airport']      | edge     |
#     |  3 | 281474976710656 | ['Airport']    |               2 | FLIES_TO  |         1829 |            1 | FLIES_TO |               0 | ['Airport']      | edge     |
#     |  4 |               1 | ['Airport']    | 281474976710656 | FLIES_TO  |          886 |            1 | FLIES_TO | 281474976710656 | ['Airport']      | edge     |
#     |  5 | 562949953421312 | ['Airport']    | 281474976710657 | FLIES_TO  |           66 |            1 | FLIES_TO | 281474976710656 | ['Airport']      | edge     |
#     |  6 | 281474976710656 | ['Airport']    | 562949953421312 | FLIES_TO  |           66 |            1 | FLIES_TO | 562949953421312 | ['Airport']      | edge     |
#     +----+-----------------+----------------+-----------------+-----------+--------------+--------------+----------+-----------------+------------------+----------+


          0/? [?op/s]

+----+-----------------+-------------+-----------+------------------+------------------+--------------------+--------------------+--------------------+----------+
|    |   n.internal_id | n.labels    | n.LABEL   | n.airport_code   | n.airport_name   |   n.sssp_dist_0021 |   n.sssp_dist_0022 |   n.sssp_dist_0023 | n.type   |
|----+-----------------+-------------+-----------+------------------+------------------+--------------------+--------------------+--------------------+----------|
|  0 |               0 | ['Airport'] | Airport   | SJC              | San Jose         |                  3 |               1900 |                  3 | node     |
|  1 |               1 | ['Airport'] | Airport   | DEN              | Denver           |                  2 |                952 |                  2 | node     |
|  2 | 281474976710656 | ['Airport'] | Airport   | ORD              | Chicago O-Hare   |                  1 |                 66 |                  1 | node     |
|  3 | 562949953421312

          0/? [?op/s]

+----+-----------------+----------------+-----------------+-----------+--------------+--------------+----------+-----------------+------------------+----------+
|    |        r.end.id | r.end.labels   |   r.internal_id | r.label   |   r.DISTANCE |   r.NUM_HOPS | r.TYPE   |      r.start.id | r.start.labels   | r.type   |
|----+-----------------+----------------+-----------------+-----------+--------------+--------------+----------+-----------------+------------------+----------|
|  0 |               0 | ['Airport']    |               0 | FLIES_TO  |          948 |            1 | FLIES_TO |               1 | ['Airport']      | edge     |
|  1 | 281474976710656 | ['Airport']    |               3 | FLIES_TO  |          886 |            1 | FLIES_TO |               1 | ['Airport']      | edge     |
|  2 |               1 | ['Airport']    |               1 | FLIES_TO  |          948 |            1 | FLIES_TO |               0 | ['Airport']      | edge     |
|  3 | 281474976710656 | ['Airport