#  Setup stuff: Connectivity

In [None]:
#  From,
#
#     https://github.com/KatanaGraph/katana-enterprise/blob/master/python/test/notebooks/red/analytics_remote.ipynb


In [None]:

#  This file also includes;
#     .  tabulate
#         Eg.,   print(tabulate(l_result, headers='keys', tablefmt='psql'))


#  Setting display options

from tabulate import tabulate
import pandas as pd
   #
pd.set_option("display.width", 480)

print("--")


In [None]:

from katana import remote

my_client = remote.Client()

print(my_client)


In [None]:
NUM_PARTITIONS  = 3
   #
DB_NAME         = "my_db"
GRAPH_NAME      = "my_graph"

print("--")


In [None]:
#  CONNECT TO GRAPH

for l_graph in my_client.get_database(name=DB_NAME).graphs_in_database():
   if (l_graph.name == GRAPH_NAME):
      my_graph=my_client.get_database(name=DB_NAME).get_graph_by_id(id=l_graph.graph_id)
         #
      break

print(my_graph)

#  Setup stuff: Debugging 

In [None]:
#  LIST ALL GRAPHS

for l_graph in my_client.graphs():
   print("GRAPH ID: ", l_graph.graph_id, "     DB GRAPH: ", l_graph.name, "    GRAPH VERSION: ", l_graph.version)

print("--")


In [None]:
display(my_graph.num_nodes())
display(my_graph.num_edges())


In [None]:
my_graph.query("CALL graph.schema() RETURN *")


In [None]:
my_client.widgets().operations()


In [None]:
# ##################################################################

In [None]:

l_result = my_graph.query("""

   MATCH (n) - [ r ] -> (m)
   RETURN n, m, r
   
   """, contextualize=True)

l_result.view()


#  Actual analytics ..

In [None]:

#  Analytics are headed here in the documentation,
#     https://docs.k9h.dev/latest/docs/analytics/index.html#
#
#  And include,   (2022/09/29)
#     
#     .  PageRank (Centrality)                                                                                            AMY
#     .  BetweenNess (Centrality)                                                                                         AMY
#     .  Louvain (Community)                                                                                              AMY
#     .  BFS (Pathfinding)                                                                                                AMY
#     .  SSSP (Pathfinding)                                                                                               AMY


#  [ OLD VERSION, plus Amy ]
#  And include,
#
#     Path Algorithms
#        Single-Source Shortest Paths (SSSP)            - No path, bugs                                                   AMY
#        Breadth-First Search (BFS)                     - Syntactic sugar for above, wrong use/name ?                     AMY
#        K-shortest paths                               - No path, bugs
#
#        Additional Amy,
#           #
#        Depth-First Search
#        All Pairs Shortest Path
#        Minimum Spanning Tree
#        Random Walk
#
#    
#     Community Detection Algorithms
#        Connected Components                           -  ??                                                             AMY
#        Community Detection using Label Propagation    -     local                                                       AMY
#        Louvain                                        -  ??                                                             AMY
#        Local Clustering Coefficient                   -     local
#        Triangle Counting                              -  Not implemented                                                AMY
#        K-core                                         -  ??
#        K-truss                                        -     local
#
#        Additional Amy,
#           #
#        Strongly Connected Components                           
#
#
#     Centrality Algorithms
#        PageRank                                       -  No weight ?  Working to validate result/formula                AMY
#        Betweenness Centrality                         -  Working to validate                                            AMY
#
#        Additional Amy,
#           #
#        Degree Centrality
#        Closeness Centrality
#
#
#     Similarity Algorithms
#        Jaccard Similarity                             -     local



In [None]:

# import numpy as np
from katana import remote
from katana.remote import analytics
# from katana_enterprise.distributed import PartitionPolicy


l_cntr = 0

print("--")


In [None]:

#  Running analytics.sssp() ..


l_source_nodeid       = "2"                        #  id of node to start on (this is SJC)

# l_weight_property     = None                     #  This errors out
   #
l_weight_property     = "DISTANCE"                 #  Edge property
# l_weight_property     = "NUM_HOPS"

l_max_iterations      = 10000 


#  Generate a unique prop name so we can run this over and over without edit ..
#
l_cntr += 1
   #
l_output_propname     = "sssp_dist_" + str(l_cntr).zfill(4)


analytics.sssp(
   my_graph,
   source               = l_source_nodeid,
   weight_property      = l_weight_property,
   max_iterations       = l_max_iterations,
   result_property_name = l_output_propname,
   )

display(print("--"))


In [None]:
l_result1 = my_graph.query(
   """
   MATCH (n) 
   RETURN n
   """ )
# l_result1.view()
print(tabulate(l_result1, headers='keys', tablefmt='psql'))

l_result2 = my_graph.query(
   """
   MATCH (n) - [r] -> (m)
   RETURN r
   """ )
# l_result2.view()
print(tabulate(l_result2, headers='keys', tablefmt='psql'))


In [None]:
l_result = my_graph.query(
   """
   MATCH (n) - [r] -> (m)
   RETURN r
   // RETURN r.type, r.TYPE, r.label, r.start.labels, r.end.labels, r.internal_id, r.start.id, r.end.id, r.DISTANCE, r.NUM_HOPS
   """ )
# l_result.view()
print(tabulate(l_result, headers='keys', tablefmt='psql'))


#  Graph loaded, but before running sssp() 
#     (This is correct)
#
#  r.end.id     r.end.labels    r.internal_id   r.label     r.DISTANCE  r.NUM_HOPS  r.start.id  r.start.labels  r.type
#  0            ['Airport']     1               FLIES_TO      66        1           1           ['Airport']     edge
#  1            ['Airport']     0               FLIES_TO      66        1           0           ['Airport']     edge
#
#  3            ['Airport']     2               FLIES_TO     886        1           1           ['Airport']     edge
#  1            ['Airport']     5               FLIES_TO     886        1           3           ['Airport']     edge
#
#  2            ['Airport']     6               FLIES_TO     948        1           3           ['Airport']     edge
#  3            ['Airport']     3               FLIES_TO     948        1           2           ['Airport']     edge
#
#  1            ['Airport']     4               FLIES_TO    1829        1           2           ['Airport']     edge

#  Graph loaded, after running sssp() 
#     (This is not correct)
#
#  r.end.id     r.end.labels    r.internal_id   r.label     r.DISTANCE  r.NUM_HOPS  r.start.id  r.start.labels  r.type
#  1            ['Airport']     0               FLIES_TO      66        1           0           ['Airport']     edge
#  0            ['Airport']     1               FLIES_TO      66        1           1           ['Airport']     edge
#
#  3            ['Airport']     2               FLIES_TO     886        1           1           ['Airport']     edge
#  1            ['Airport']     5               FLIES_TO     886        1           3           ['Airport']     edge
#
#  2            ['Airport']     6               FLIES_TO     948        1           3           ['Airport']     edge         #  This data is now wrong
#  1            ['Airport']     3               FLIES_TO     948        1           2           ['Airport']     edge         #  This data is now wrong
#
#  3            ['Airport']     4               FLIES_TO    1829        1           2           ['Airport']     edge         #  This data is now wrong


#  Now version 0.5 ..
#
#  Graph loaded, but before running sssp() 
#     (This is correct)
#
#  r.end.id           r.end.labels   r.internal_id    r.label    r.DISTANCE    r.NUM_HOPS    r.TYPE      r.start.id        r.start.labels    r.type
#  562949953421313    ['Airport']    0                FLIES_TO    948          1             FLIES_TO    1                 ['Airport']       edge
#  562949953421312    ['Airport']    2                FLIES_TO    886          1             FLIES_TO    1                 ['Airport'] 	     edge
#  562949953421312    ['Airport']    1                FLIES_TO     66          1             FLIES_TO    0                 ['Airport'] 	     edge
#  562949953421312    ['Airport']    562949953421312  FLIES_TO   1829          1             FLIES_TO    562949953421313   ['Airport'] 	     edge
#  1                  ['Airport']    562949953421314  FLIES_TO    948          1             FLIES_TO    562949953421313   ['Airport'] 	     edge
#  1                  ['Airport']    562949953421313  FLIES_TO    886          1             FLIES_TO    562949953421312   ['Airport'] 	     edge
#  0                  ['Airport']    562949953421315  FLIES_TO     66          1             FLIES_TO    562949953421312   ['Airport'] 	     edge

#  Graph loaded, after running sssp() 
#
#  r.end.id           r.end.labels   r.internal_id    r.label    r.DISTANCE    r.NUM_HOPS    r.TYPE      r.start.id          r.start.labels    r.type
#  2                  ['Airport']    3                FLIES_TO     66          1             FLIES_TO    0                   ['Airport']       edge
#  1                  ['Airport']    2 	              FLIES_TO    948          1             FLIES_TO    3                   ['Airport']       edge
#  2                  ['Airport']    5 	              FLIES_TO   1829          1             FLIES_TO    3                   ['Airport']       edge
#  2                  ['Airport']    4 	              FLIES_TO    886          1             FLIES_TO    1                   ['Airport']       edge
#  3                  ['Airport']    6 	              FLIES_TO    948          1             FLIES_TO    1                   ['Airport']       edge
#  0                  ['Airport']    0 	              FLIES_TO     66          1             FLIES_TO    2                   ['Airport']       edge
#  1                  ['Airport']    1                FLIES_TO    886          1             FLIES_TO    2                   ['Airport']       edge



In [71]:
l_result = my_graph.query(
   """
   MATCH (n) - [r] -> (m)
   RETURN n, r, m
   """ ,
   contextualize=True,
   )
l_result.view()


          0/? [?op/s]

          0/? [?op/s]

VBox(children=(HTML(value='\n                <style>\n                #jp-main-content-panel .widget-containerâ€¦

# Running graph.project()



In [68]:
l_result1 = my_graph.query(
   """
   MATCH (n) 
   RETURN n
   """ )
# l_result1.view()
print(tabulate(l_result1, headers='keys', tablefmt='psql'))

l_result2 = my_graph.query(
   """
   MATCH (n) - [r] -> (m)
   RETURN r
   """ )
# l_result2.view()
print(tabulate(l_result2, headers='keys', tablefmt='psql'))



          0/? [?op/s]

+----+-----------------+-------------+-----------+------------------+--------+----------+
|    |   n.internal_id | n.labels    | n.LABEL   | n.airport_name   | n.id   | n.type   |
|----+-----------------+-------------+-----------+------------------+--------+----------|
|  0 |               0 | ['Airport'] | Airport   | Denver           | DEN    | node     |
|  1 | 281474976710656 | ['Airport'] | Airport   | Chicago O-Hare   | ORD    | node     |
|  2 | 562949953421312 | ['Airport'] | Airport   | Milwaukee        | MKE    | node     |
|  3 | 562949953421313 | ['Airport'] | Airport   | San Jose         | SJC    | node     |
+----+-----------------+-------------+-----------+------------------+--------+----------+


          0/? [?op/s]

+----+-----------------+----------------+-----------------+-----------+--------------+--------------+----------+-----------------+------------------+----------+
|    |        r.end.id | r.end.labels   |   r.internal_id | r.label   |   r.DISTANCE |   r.NUM_HOPS | r.TYPE   |      r.start.id | r.start.labels   | r.type   |
|----+-----------------+----------------+-----------------+-----------+--------------+--------------+----------+-----------------+------------------+----------|
|  0 | 562949953421313 | ['Airport']    |               0 | FLIES_TO  |          948 |            1 | FLIES_TO |               0 | ['Airport']      | edge     |
|  1 | 281474976710656 | ['Airport']    |               1 | FLIES_TO  |          886 |            1 | FLIES_TO |               0 | ['Airport']      | edge     |
|  2 | 562949953421312 | ['Airport']    | 281474976710656 | FLIES_TO  |           66 |            1 | FLIES_TO | 281474976710656 | ['Airport']      | edge     |
|  3 |               0 | ['Airport

In [69]:

#  graph.project() is used to create a homogenous group, from a heterogeneous one,
#  necessary to support some analytics
#
#  Here we'll add stuff to the graph that analytics.sssp() will not like ..


l_query = """
   CREATE ( n: Restaurant { restaurant_code: 'PAP' } )               //  Notice single curly braces
   SET n.restaurant_name = 'Pappadeauxs Seafood Kitchen' 
   """
      #
display(l_result1 = my_graph.query(l_query))

l_query = """
   MATCH
      (n: Airport   ),
      (m: Restaurant)
   WHERE n.id               = 'SJC'
   AND   m.restaurant_code  = 'PAP'
   CREATE (n) -[r: Attraction { DRIVABLE: 'Yes' }]-> (m)
   """
      #
display(l_result2 = my_graph.query(l_query))


          0/? [?op/s]

          0/? [?op/s]

In [70]:
l_result1 = my_graph.query(
   """
   MATCH (n) 
   RETURN n
   """ )
# l_result1.view()
print(tabulate(l_result1, headers='keys', tablefmt='psql'))

l_result2 = my_graph.query(
   """
   MATCH (n) - [r] -> (m)
   RETURN r
   """ )
# l_result2.view()
print(tabulate(l_result2, headers='keys', tablefmt='psql'))


          0/? [?op/s]

+----+-----------------+----------------+-----------+------------------+--------+----------+---------------------+-----------------------------+
|    |   n.internal_id | n.labels       | n.LABEL   | n.airport_name   | n.id   | n.type   | n.restaurant_code   | n.restaurant_name           |
|----+-----------------+----------------+-----------+------------------+--------+----------+---------------------+-----------------------------|
|  0 |               0 | ['Airport']    | Airport   | Denver           | DEN    | node     | nan                 | nan                         |
|  1 |               1 | ['Restaurant'] | nan       | nan              | nan    | node     | PAP                 | Pappadeauxs Seafood Kitchen |
|  2 | 281474976710656 | ['Airport']    | Airport   | Chicago O-Hare   | ORD    | node     | nan                 | nan                         |
|  3 | 562949953421313 | ['Airport']    | Airport   | San Jose         | SJC    | node     | nan                 | nan            

          0/? [?op/s]

+----+-----------------+----------------+-----------------+------------+--------------+--------------+----------+-----------------+------------------+----------+--------------+
|    |        r.end.id | r.end.labels   |   r.internal_id | r.label    |   r.DISTANCE |   r.NUM_HOPS | r.TYPE   |      r.start.id | r.start.labels   | r.type   | r.DRIVABLE   |
|----+-----------------+----------------+-----------------+------------+--------------+--------------+----------+-----------------+------------------+----------+--------------|
|  0 | 562949953421313 | ['Airport']    |               0 | FLIES_TO   |          948 |            1 | FLIES_TO |               0 | ['Airport']      | edge     | nan          |
|  1 | 281474976710656 | ['Airport']    |               1 | FLIES_TO   |          886 |            1 | FLIES_TO |               0 | ['Airport']      | edge     | nan          |
|  2 | 562949953421312 | ['Airport']    | 281474976710656 | FLIES_TO   |           66 |            1 | FLIES_TO | 2

In [77]:

l_result1 = my_graph.query(
   """
   MATCH (n) - [r] -> (m)
   WITH r.end.id = 0
   RETURN r
   """ )
# l_result1.view()
print(tabulate(l_result1, headers='keys', tablefmt='psql'))


          0/? [?op/s]

OperationError: FnpzJ3wtvdrTMcdbCDx714gejP16K1ZzyHUjNgR6qapT-2e8aSvdfN2fgJdHJL backtrace (OpGraph.cpp:566): backtrace (OpGraph.h:596): backtrace (OpGraph.cpp:577): backtrace (OpGraph.cpp:65): backtrace (ProjectOperator.cpp:59): backtrace (Evaluate.cpp:2000): backtrace (Evaluate.cpp:1169): column r not found (Table.cpp:634): InternalError:ExecutionError: InternalError:ExecutionError

In [None]:

#  analytics.sssp() now will now excounter at least one edge without a weight
#  property. As such, it generates a NaN for that path/output.


l_source_nodeid       = "2"                        #  id of node to start on (this is SJC)
l_weight_property     = "DISTANCE"                 #  Edge property
l_max_iterations      = 10000 

#  Generate a unique prop name so we can run this over and over without edit ..
#
l_cntr += 1
   #
l_output_propname     = "sssp_dist_" + str(l_cntr).zfill(4)

analytics.sssp(
   my_graph,
   source               = l_source_nodeid,
   weight_property      = l_weight_property,
   max_iterations       = l_max_iterations,
   result_property_name = l_output_propname,
   )

display(print("--"))


In [None]:
#  Check the output from routine above ..

l_result = my_graph.query(
   """
   MATCH (n) 
   RETURN n
   """ )
# l_result.view()
print(tabulate(l_result, headers='keys', tablefmt='psql'))


In [None]:
#  analytics.sssp() should be run against a proection, a sub-graph ..

my_graph2 = my_graph.project(edge_types=["FLIES_TO"])


l_source_nodeid       = "2"                        #  id of node to start on (this is SJC)
l_weight_property     = "DISTANCE"                 #  Edge property
l_max_iterations      = 10000 

#  Generate a unique prop name so we can run this over and over without edit ..
#
l_cntr += 1
   #
l_output_propname     = "sssp_dist_" + str(l_cntr).zfill(4)

analytics.sssp(
   my_graph2,
   source               = l_source_nodeid,
   weight_property      = l_weight_property,
   max_iterations       = l_max_iterations,
   result_property_name = l_output_propname,
   )

display(print("--"))




In [None]:
#  Check the output from routine above ..

l_result = my_graph.query(
   """
   MATCH (n) 
   RETURN n
   """ )
# l_result.view()
print(tabulate(l_result, headers='keys', tablefmt='psql'))

