# Setup stuff: Connectivity

In [None]:

#  Setting display options 

import pandas as pd
   #
pd.set_option("display.width", 480)

#  Sets horizontal scroll for wide outputs
#
from IPython.display import display, HTML
display(HTML(""))

from tabulate import tabulate

print("--")


In [None]:

from katana import remote

my_client = remote.Client()

print(my_client)


In [None]:

NUM_PARTITIONS  = 3
   #
DB_NAME         = "my_db"
GRAPH_NAME      = "my_graph"

print("--")


In [104]:

#  CONNECT TO GRAPH

my_graph, *_ = my_client.get_database(name=DB_NAME).find_graphs_by_name(GRAPH_NAME)

print(my_graph)


<_Graph my_graph, 7HZVYK2oTG54BcT9MsnRA65W3i2xRx64UfPi7zNVwiUR, 2>


#  Actual analytics ..

In [None]:

#  View the graph
#

l_result = my_graph.query(
   """
   MATCH (n) - [r] -> (m)
   RETURN n, r, m
   """ ,
   contextualize=True,
   )
l_result.view()


In [None]:

from katana.remote import analytics


l_cntr = 20                                                #  Used to generate a unique column name
   #
print("--")


In [None]:

#  Getting the airport node id
#

#  l_airport = "MKE"
#  l_airport = "ORD"
#  l_airport = "DEN"
l_airport = "SJC"


l_query  = """
   MATCH (n: Airport) 
   WHERE n.airport_code = '{0}'
   RETURN ID(n) AS my_id
   """.format(l_airport)

l_airport_id = my_graph.query(l_query)["my_id"][0]

print("Airport id:  %s   %d" % (l_airport, l_airport_id))
print("")


In [96]:

#  Running analytics.sssp() ..

#  From above
#
l_source_nodeid       = str(l_airport_id)


#  Below:  this currently fails on DISTANCE|NUM_HOPS.  Works with None.
#

#  Edge property
#
#  l_weight_property     = "DISTANCE"                 #  Edge property
#  l_weight_property     = "NUM_HOPS"
l_weight_property     = None   

l_max_iterations      = 10000 


#  Generate a unique prop name so we can run this over and over without edit ..
#
l_cntr += 1
   #
l_output_propname     = "sssp_dist_" + str(l_cntr).zfill(4)


analytics.sssp(
   my_graph,
      #
   source               = l_source_nodeid,
   weight_property      = l_weight_property,
   max_iterations       = l_max_iterations,
   result_property_name = l_output_propname,
   )

print("--")


          0/? [?op/s]

--


In [97]:

#  analytics.sssp() adds a new property to the node
#

l_result1 = my_graph.query(
   """
   MATCH (n) 
   RETURN n
   """ )

print(tabulate(l_result1, headers='keys', tablefmt='psql'))

#  l_result2 = my_graph.query(
#     """
#     MATCH (n) - [r] -> (m)
#     RETURN r
#     """ )
#  print(tabulate(l_result2, headers='keys', tablefmt='psql'))


          0/? [?op/s]

+----+-----------------+-------------+-----------+------------------+------------------+--------------------+----------+
|    |   n.internal_id | n.labels    | n.LABEL   | n.airport_code   | n.airport_name   |   n.sssp_dist_0031 | n.type   |
|----+-----------------+-------------+-----------+------------------+------------------+--------------------+----------|
|  0 |               0 | ['Airport'] | Airport   | ORD              | Chicago O-Hare   |                  1 | node     |
|  1 | 281474976710656 | ['Airport'] | Airport   | MKE              | Milwaukee        |                  2 | node     |
|  2 | 281474976710657 | ['Airport'] | Airport   | SJC              | San Jose         |                  0 | node     |
|  3 | 281474976710658 | ['Airport'] | Airport   | DEN              | Denver           |                  1 | node     |
+----+-----------------+-------------+-----------+------------------+------------------+--------------------+----------+


#  To remove a given property name from all nodes

In [None]:

#  It is only required that you run this one time.
#
#  You may change the value i nthe text entry field as often as you wish.
#
from ipywidgets import widgets

my_column  = widgets.Text(
    value        = "sssp_dist_0021",
    placeholder  = "type here",
    description  = "Column Name: ",
    disabled     = False
)

my_column


In [None]:

#  We could run the remove_property() directly, but then we couldn't cleanly catch
#  errors. So, run it inside a UDF.
#

def f_drop_column(i_graph, i_property):
   from katana import distributed
   try:
      i_graph.nodes.remove_property(i_property)
      return distributed.single_host(host=0, result=True)
   except:
      return distributed.single_host(host=0, result=False)


print("Deleting column name: %s" % (my_column.value))
   #
l_column = str(my_column.value)


if (my_graph.run(lambda g: f_drop_column(g, l_column))):
   print("Column ( %s ) deleted." % (my_column.value))
else:
   print("Column ( %s ) not found." % (my_column.value))



# Adding use of graph.project()

In [105]:

#  graph.project() is used to create a homogenous group, from a heterogeneous one,
#  necessary to support some analytics
#
#  Here we'll add stuff to the graph that some analytics will not like ..


l_query = """
   CREATE ( n: Restaurant { restaurant_code: 'PAP' } )               //  Notice single curly braces
   SET n.restaurant_name = 'Pappadeauxs Seafood Kitchen' 
   """
      #
display(l_result1 = my_graph.query(l_query))

l_query = """
   MATCH
      (n: Airport   ),
      (m: Restaurant)
   WHERE n.airport_code     = 'SJC' AND m.restaurant_code  = 'PAP'
   CREATE (n) -[r: ATTRACTION { DRIVABLE: 'Yes' }]-> (m)
   """
      #
display(l_result2 = my_graph.query(l_query))


          0/? [?op/s]

          0/? [?op/s]

In [106]:

l_result1 = my_graph.query(
   """
   MATCH (n) 
   RETURN n
   """ )
# l_result1.view()
print(tabulate(l_result1, headers='keys', tablefmt='psql'))

l_result2 = my_graph.query(
   """
   MATCH (n) - [r] -> (m)
   RETURN r
   """ )
# l_result2.view()
print(tabulate(l_result2, headers='keys', tablefmt='psql'))


          0/? [?op/s]

+----+-----------------+----------------+---------------------+-----------------------------+----------+-----------+------------------+------------------+
|    |   n.internal_id | n.labels       | n.restaurant_code   | n.restaurant_name           | n.type   | n.LABEL   | n.airport_code   | n.airport_name   |
|----+-----------------+----------------+---------------------+-----------------------------+----------+-----------+------------------+------------------|
|  0 |               3 | ['Restaurant'] | PAP                 | Pappadeauxs Seafood Kitchen | node     | nan       | nan              | nan              |
|  1 |               2 | ['Airport']    | nan                 | nan                         | node     | Airport   | DEN              | Denver           |
|  2 |               1 | ['Airport']    | nan                 | nan                         | node     | Airport   | SJC              | San Jose         |
|  3 |               0 | ['Airport']    | nan                 | nan   

          0/? [?op/s]

+----+-----------------+----------------+-----------------+------------+--------------+--------------+----------+-----------------+------------------+----------+--------------+
|    |        r.end.id | r.end.labels   |   r.internal_id | r.label    |   r.DISTANCE |   r.NUM_HOPS | r.TYPE   |      r.start.id | r.start.labels   | r.type   | r.DRIVABLE   |
|----+-----------------+----------------+-----------------+------------+--------------+--------------+----------+-----------------+------------------+----------+--------------|
|  0 |               0 | ['Airport']    |               0 | FLIES_TO   |         1829 |            1 | FLIES_TO |               1 | ['Airport']      | edge     | nan          |
|  1 |               2 | ['Airport']    |               4 | FLIES_TO   |          948 |            1 | FLIES_TO |               1 | ['Airport']      | edge     | nan          |
|  2 |               3 | ['Restaurant'] |               6 | ATTRACTION |          nan |          nan | nan      |  

# Run sssp() again, both with and without project()

In [107]:

#  Running analytics.sssp() ..

#  From above
#
l_source_nodeid       = str(l_airport_id)


#  Below:  this currently fails on DISTANCE|NUM_HOPS.  Works with None.
#

#  Edge property
#
#  l_weight_property     = "DISTANCE"                 #  Edge property
#  l_weight_property     = "NUM_HOPS"
l_weight_property     = None   

l_max_iterations      = 10000 


#  Generate a unique prop name so we can run this over and over without edit ..
#
l_cntr += 1
   #
l_output_propname     = "sssp_dist_" + str(l_cntr).zfill(4)


analytics.sssp(
   my_graph,
      #
   source               = l_source_nodeid,
   weight_property      = l_weight_property,
   max_iterations       = l_max_iterations,
   result_property_name = l_output_propname,
   )

print("--")


          0/? [?op/s]

--


In [108]:
#  Check the output from routine above ..

l_result = my_graph.query(
   """
   MATCH (n) 
   RETURN n
   """ )
# l_result.view()
print(tabulate(l_result, headers='keys', tablefmt='psql'))

#  Sample output,
#
#     (nothing is output)


          0/? [?op/s]

+----+-----------------+----------------+-----------+------------------+------------------+----------+---------------------+-----------------------------+
|    |   n.internal_id | n.labels       | n.LABEL   | n.airport_code   | n.airport_name   | n.type   | n.restaurant_code   | n.restaurant_name           |
|----+-----------------+----------------+-----------+------------------+------------------+----------+---------------------+-----------------------------|
|  0 |               0 | ['Airport']    | Airport   | ORD              | Chicago O-Hare   | node     | nan                 | nan                         |
|  1 |               2 | ['Airport']    | Airport   | DEN              | Denver           | node     | nan                 | nan                         |
|  2 |               1 | ['Airport']    | Airport   | SJC              | San Jose         | node     | nan                 | nan                         |
|  3 |               3 | ['Restaurant'] | nan       | nan             

In [109]:

#  Running analytics.sssp() should be run against a projection, a sub-graph ..

my_graph2 = my_graph.project(edge_types=["FLIES_TO"])


#  Running analytics.sssp() ..

#  From above
#
l_source_nodeid       = str(l_airport_id)


#  Below:  this currently fails on DISTANCE|NUM_HOPS.  Works with None.
#

#  Edge property
#
#  l_weight_property     = "DISTANCE"                 #  Edge property
#  l_weight_property     = "NUM_HOPS"
l_weight_property     = None   

l_max_iterations      = 10000 


#  Generate a unique prop name so we can run this over and over without edit ..
#
l_cntr += 1
   #
l_output_propname     = "sssp_dist_" + str(l_cntr).zfill(4)


analytics.sssp(
   my_graph2,
      #
   source               = l_source_nodeid,
   weight_property      = l_weight_property,
   max_iterations       = l_max_iterations,
   result_property_name = l_output_propname,
   )

print("--")


          0/? [?op/s]

--


In [112]:

#  Check the output from routine above ..

l_result = my_graph.query(
   """
   MATCH (n) 
   RETURN n
   """ )

print(tabulate(l_result, headers='keys', tablefmt='psql'))



          0/? [?op/s]

+----+-----------------+----------------+---------------------+-----------------------------+----------+-----------+------------------+------------------+
|    |   n.internal_id | n.labels       | n.restaurant_code   | n.restaurant_name           | n.type   | n.LABEL   | n.airport_code   | n.airport_name   |
|----+-----------------+----------------+---------------------+-----------------------------+----------+-----------+------------------+------------------|
|  0 |               3 | ['Restaurant'] | PAP                 | Pappadeauxs Seafood Kitchen | node     | nan       | nan              | nan              |
|  1 |               2 | ['Airport']    | nan                 | nan                         | node     | Airport   | DEN              | Denver           |
|  2 |               1 | ['Airport']    | nan                 | nan                         | node     | Airport   | SJC              | San Jose         |
|  3 |               0 | ['Airport']    | nan                 | nan   