#  Display options:

In [None]:

#  References for this Notebook,
#
#     Working thru the examples on this page,
#        https://docs.k9h.dev/latest/recipes/udf-recipes.html/tree/master/lonestar/analytics/distributed/experimental
# 


In [None]:

#  Setting display options 

import pandas as pd
   #
pd.set_option("display.width", 480)

#  Sets horizontal scroll for wide outputs
#
from IPython.display import display, HTML
display(HTML(""))

from tabulate import tabulate

print("--")


#  Setup stuff: Connectivity

In [None]:

from katana import remote
#  from katana.remote import import_data

my_client = remote.Client()

print(my_client)


In [None]:

NUM_PARTITIONS  = 3
   #
DB_NAME         = "my_db"
GRAPH_NAME      = "my_graph"

print("--")


In [None]:

#  CONNECT TO GRAPH

my_graph, *_ = my_client.get_database(name=DB_NAME).find_graphs_by_name(GRAPH_NAME)

print(my_graph)


In [None]:

display(my_graph.num_nodes())
display(my_graph.num_edges())


# Working with num_partitions()

In [None]:
#  Even though this graph was made with 3 partitions, and even though certain operations
#  require at least 3 partitions else they fail, you may still see num_partitions = None
#  which can happen on really small graphs, and produce unexpected results below.

print(my_graph.num_partitions)


In [None]:

#  my_graph.repartition(num_partitions = 1)
my_graph.repartition(num_partitions = 3)

print("--")


# UDFs, Example 01: Count City Nodes

In [None]:

#  All of these below are translated to the Airport graph
#

#  Not an exact copy of example 1, some things left off-
#
def my_func(i_graph, i_label):
    
   l_airports = i_graph.nodes(labels = i_label)
      #
   print("Type: %s" % (str(type(l_airports))))
   print("Value: %s" % (l_airports))
   print("Loop thru the above:")
      #
   for l_each in l_airports:
      print("   " + str(l_each))
        
   print("Airport Node Count", len(l_airports))

   return

l_result = my_graph.run(lambda g: my_func(g, "Airport"))


#  Sample output,
#
#     Host 0 output:
#     Type: <class 'katana.distributed.EntitySequenceNode'>
#     Value: <katana.distributed.EntitySequenceNode object at 0x7f8da9fbaf30>
#     Loop thru the above:
#        <lnid 4294967294>
#        <lnid 0>
#        <lnid 1>
#     Airport Node Count 3
#     
#     Host 1 output:
#     Type: <class 'katana.distributed.EntitySequenceNode'>
#     Value: <katana.distributed.EntitySequenceNode object at 0x7f90c5ab6df0>
#     Loop thru the above:
#        <lnid 4294967293>
#        <lnid 4294967294>
#        <lnid 0>
#     Airport Node Count 3
#     
#     Host 2 output:
#     Type: <class 'katana.distributed.EntitySequenceNode'>
#     Value: <katana.distributed.EntitySequenceNode object at 0x7fe5882e4230>
#     Loop thru the above:
#        <lnid 4294967293>
#        <lnid 4294967294>
#        <lnid 0>
#     Airport Node Count 3


In [None]:

#  All of these below are translated to the Airport graph
#

#  An exact copy of example 1
#
def my_func(i_graph, i_label):
    
   l_airports: Sequence[Node] = i_graph.nodes(labels = i_label)
      #
   print("Type: %s" % (str(type(l_airports))))
   print("Value: %s" % (l_airports))
   print("Loop thru the above:")
      #
   for l_each in l_airports:
      print("   " + str(l_each))

   print("Airport Node Count", len(l_airports))

   return

l_result = my_graph.run(lambda g: my_func(g, "Airport"))


#  Sample output,
#
#     Host 0 output:
#     Type: <class 'katana.distributed.EntitySequenceNode'>
#     Value: <katana.distributed.EntitySequenceNode object at 0x7fe27927b7f0>
#     Loop thru the above:
#        <lnid 4294967294>
#        <lnid 0>
#        <lnid 1>
#     Airport Node Count 3
#     
#     Host 1 output:
#     Type: <class 'katana.distributed.EntitySequenceNode'>
#     Value: <katana.distributed.EntitySequenceNode object at 0x7fc0f80d80f0>
#     Loop thru the above:
#        <lnid 4294967293>
#        <lnid 4294967294>
#        <lnid 0>
#     Airport Node Count 3
#     
#     Host 2 output:
#     Type: <class 'katana.distributed.EntitySequenceNode'>
#     Value: <katana.distributed.EntitySequenceNode object at 0x7f96742bcc30>
#     Loop thru the above:
#        <lnid 4294967293>
#        <lnid 4294967294>
#        <lnid 0>
#     Airport Node Count 3


# UDFs, Example 02: Source Node Labels

In [None]:

#  Example 02, mostly as written, converted to airports
#
#     .  We add a second for loop for to also process out edges.
#        This graph had one out edge, from Airport to Restaurant
#

def my_func(i_graph, i_label):
    
   l_airports: Sequence[Node] = i_graph.nodes(labels = i_label)

   l_labels = set()                                                       #  Python sets discard duplicates
 
   for l_node in l_airports:                                              #  All nodes of a given label
      l_each: Node
      for l_edge in i_graph.in_edges(l_node):                             #  In edges
         l_edge: InEdge
         l_src_node: Node = i_graph.in_edges.src(l_edge)
         l_labels.add(i_graph.nodes.labels(l_src_node))
        
      for l_edge in i_graph.out_edges(l_node):                            #  Out edges
         l_edge: OutEdge
         l_dst_node: Node = i_graph.out_edges.dst(l_edge)
         l_labels.add(i_graph.nodes.labels(l_dst_node))
           
   print(l_labels)
    

   return

l_result = my_graph.run(lambda g: my_func(g, "Airport"))

#  Sample output,
#
#     Host 0 output:
#     {Airport}
#     
#     Host 1 output:
#     {Airport, Restaurant}
#     
#     Host 2 output:
#     {Airport}


# UDFs, Example 03: Out-degree of a City Nodes

In [None]:

def my_func(i_graph, i_label):
    
   l_airports: Sequence[Node] = i_graph.nodes(labels = i_label)

   if (l_airports):                                                       #  If there are Nodes for this label
      l_airport, *_ = l_airports                                          #  l_airport is of type, Node   <class 'katana.distributed.Node'>
         #
      print("Out degree: ", i_graph.nodes.global_out_degree(l_airport))

   return

l_result = my_graph.run(lambda g: my_func(g, "Airport"))


#  Error,
#
#     Host 0 errors:
#     Traceback (most recent call last):
#       File "/opt/miniconda/lib/python3.8/site-packages/katana_enterprise/worker/worker.py", line 86, in execute
#         value = function(graph)
#       File "/tmp/ipykernel_2661/2849117150.py", line 12, in <lambda>
#       File "/tmp/ipykernel_2661/2849117150.py", line 8, in my_func
#     ValueError: Not implemented


# UDFs, Example 04: List Countries in the Graph

In [56]:

def my_func(i_graph, i_label):
    
   l_airports: Sequence[Node] = i_graph.nodes(labels = i_label)

   l_airport_name = i_graph.nodes.get_property("airport_name")
    
   print("Airport names: ", set(l_airport_name[l_airports]))    

   return
    
    
l_result = my_graph.run(lambda g: my_func(g, "Airport"))


#  Sample output,
#
#     Host 0 output:
#     Airport names:  {'Denver', 'Milwaukee', 'Chicago O-Hare'}
#     
#     Host 1 output:
#     Airport names:  {'San Jose', 'Chicago O-Hare', 'Denver'}
#     
#     Host 2 output:
#     Airport names:  {'Denver', 'Chicago O-Hare', 'San Jose'}


          0/? [?op/s]


Host 0 output:
Airport names:  {'Milwaukee', 'Denver', 'Chicago O-Hare'}

Host 1 output:
Airport names:  {'San Jose', 'Chicago O-Hare', 'Denver'}

Host 2 output:
Airport names:  {'Chicago O-Hare', 'San Jose', 'Denver'}


# UDFs, Example 05: Cities in Each Country

In [58]:

#  The example as listed does Cities in Country.
#
#  Our graph does not have that hierarchy, so we'll do Airport - FLIES_TO  -> Airport
#

def my_func(i_graph, i_property):
    
   l_airports: Sequence[Node] = i_graph.nodes(labels = i_property)

   l_property = i_graph.nodes.get_property("airport_name")
    
   l_dict = {}
    
   l_labels = i_graph.out_edges.label_manager["FLIES_TO"]                 #  <class 'katana.local_native.LabelSet'>
    
   for l_node in l_airports:
      for l_edge in i_graph.out_edges(node = l_node):
         l_label = i_graph.out_edges.labels(l_edge)
         if (l_label == l_labels):
            l_node_dest: Node = i_graph.out_edges.dst(edge = l_edge)
               #
            l_dict.setdefault(l_property[l_node_dest], set()).add(l_property[l_node])
    
    
    
    
   return

l_result = my_graph.run(lambda g: my_func(g, "Airport"))




          0/? [?op/s]

In [None]:

    country_sequence: Sequence[Node] = graph.nodes(labels="Country")
    name_prop = graph.nodes.get_property("name")

    # What cities are located in each country?
    city_sequence: Sequence[Node] = graph.nodes(labels="City")
    country_cities = {}

    is_part_of_label = graph.out_edges.label_manager["IS_PART_OF"]
    
    for city in city_sequence:
        # iterate the out-edges of city
        for out_edge in graph.out_edges(node=city): # labels=is_part_of_label
            # Check if out_edge has the label IS_PART_OF
            e_label = graph.out_edges.labels(out_edge)
            if e_label == is_part_of_label:
                country_node: Node = graph.out_edges.dst(edge=out_edge)
                # Add the city name to the set for the appropriate country
                country_cities.setdefault(name_prop[country_node], set()).add(name_prop[city])
    
    print("Number of Countries:", len(country_cities))
    if "United_States" in country_cities:
        print("\nCities in United States:")
        print(country_cities["United_States"])

In [None]:

def my_func(i_graph, i_property):
    

   return

l_result = my_graph.run(lambda g: my_func(g, "Airport"))
