In [None]:

#  Setting display options 

import pandas as pd
   #
pd.set_option("display.width", 480)

#  Sets horizontal scroll for wide outputs
#
from IPython.display import display, HTML
display(HTML(""))

from tabulate import tabulate

print("--")


#  Setup stuff: Connectivity

In [None]:

from katana import remote
from katana.remote import import_data

my_client = remote.Client()

print(my_client)


In [None]:

NUM_PARTITIONS = 3
   #
DB_NAME        = "my_db"
GRAPH_NAME     = "my_graph"

print("--")


In [None]:

#  DELETE ALL GRAPHS

for l_database in my_client.databases():
   for l_graph in my_client.get_database(name=l_database.name).graphs_in_database():
      l_handle=my_client.get_database(name=l_database.name).get_graph_by_id(id=l_graph.graph_id)
      l_handle.delete()

for l_graph in my_client.graphs():
   print("GRAPH ID: ", l_graph.graph_id, "      GRAPH Version: ", l_graph.version)

print("--")


In [None]:

#  DELETE ALL DATABASES

for l_database in my_client.databases():
   if (l_database.name != "default"):
      my_client.get_database(name=l_database.name).delete_database()
      print("--")

for l_database in my_client.databases():
   print("DB ID: ", l_database.database_id, "     DB Name: ", l_database.name)


In [None]:

#  CREATE DATABASE

my_database = my_client.create_database(name=DB_NAME)

print(my_database.database_id)


In [None]:

#  CREATE GRAPH

my_graph = my_client.get_database(name=DB_NAME).create_graph(name=GRAPH_NAME, num_partitions=NUM_PARTITIONS)

print(my_graph)


In [None]:

#  CONNECT TO GRAPH

my_graph, *_ = my_client.get_database(name=DB_NAME).find_graphs_by_name(GRAPH_NAME)

print(my_graph)


# Create: Vertices/nodes, edges ..

In [None]:

#  import pandas as pd
#  
#  print("--")


In [None]:

#  Vertices/Nodes ..
#
#  Person;              id, name
#  Order;               id, date_placed, date_shipped
#  Stock;               id, name
#  Store;               id, city
#  DistributionCenter;  id, city
#  Reviewer;            id, name, city

df_persons    = pd.DataFrame([
      #
   ["P_101" , "Justin"                  , "Person"],
   ["P_102" , "Anand"                   , "Person"],
   ["P_103" , "Brian"                   , "Person"],
   ["P_104" , "Sameer"                  , "Person"],
       #
   ], columns = ["id", "name", "LABEL"])
      #
print("Persons:")
print(tabulate(df_persons.head(2), headers='keys', tablefmt='psql'))

df_orders     = pd.DataFrame([
      #
   ["O_101" , "2022-09-01", ""           , "Order"],
   ["O_102" , "2022-09-02", ""           , "Order"],
   ["O_103" , "2022-09-01", "2022-09-06" , "Order"],
   ["O_104" , "2022-09-02", ""           , "Order"],
   ["O_105" , "2022-09-01", ""           , "Order"],
   ["O_106" , "2022-09-02", ""           , "Order"],
   ["O_107" , "2022-09-01", ""           , "Order"],
   ["O_108" , "2022-09-02", ""           , "Order"],
       #
   ], columns = ["id", "date_placed", "date_shipped", "LABEL"])
      #
print("Orders:")
print(tabulate(df_orders.head(2), headers='keys', tablefmt='psql'))

df_items      = pd.DataFrame([
      #
   ["I_101" , "Shoes"                   , "Item"],
   ["I_102" , "Blanket"                 , "Item"],
   ["I_103" , "Pants"                   , "Item"],
   ["I_104" , "Hat"                     , "Item"],
       #
   ], columns = ["id", "name", "LABEL"])
      #
print("Items:")
print(tabulate(df_items.head(2), headers='keys', tablefmt='psql'))

df_stores     = pd.DataFrame([
      #
   ["S_101" , "Milwaukee"               , "Store"],
   ["S_102" , "Denver"                  , "Store"],
   ["S_103" , "San Jose"                , "Store"],
   ["S_104" , "Houston"                 , "Store"],
       #
   ], columns = ["id", "city", "LABEL"])
      #
print("Stores:")
print(tabulate(df_stores.head(2), headers='keys', tablefmt='psql'))

df_dist_centers = pd.DataFrame([
      #
   ["D_101" , "Reno"                    , "Dist_Center"],
   ["D_102" , "Houston"                 , "Dist_Center"],
      #
   ], columns = ["id", "city", "LABEL"])
      #
print("Distribution Centers:")
print(tabulate(df_dist_centers.head(2), headers='keys', tablefmt='psql'))

df_reviewers  = pd.DataFrame([
      #
   ["R_101" , "Mary"      , "Dallas"     , "Reviewer"],
   ["R_102" , "Allen"     , "Houston"    , "Reviewer"],
      #
   ], columns = ["id", "name", "city", "LABEL"])
      #
print("Reviewers:")
print(tabulate(df_reviewers.head(2), headers='keys', tablefmt='psql'))



In [None]:
#  Create the edges ..

#  Person  P_101-104  -  PLACED  -  Order  O_101-108
#
df_placed = pd.DataFrame([
      #
   ["P_101" , "O_101" ,        "PLACED" ],
   ["P_101" , "O_102" ,        "PLACED" ],
   ["P_102" , "O_103" ,        "PLACED" ],
   ["P_102" , "O_104" ,        "PLACED" ],
   ["P_103" , "O_105" ,        "PLACED" ],
   ["P_103" , "O_106" ,        "PLACED" ],
   ["P_104" , "O_107" ,        "PLACED" ],
   ["P_104" , "O_108" ,        "PLACED" ],
      #
   ], columns = ["START_ID", "END_ID", "TYPE"])
      #
print("Placed:")
print(tabulate(df_placed.head(2), headers='keys', tablefmt='psql'))

#  Order  O_101-108  -  REVIEWED_BY  -  Reviewer  R_101-102     Reviewer 2 is Houston
#                                                               Which makes Order 108 a Houston Order
#
df_reviewed_by = pd.DataFrame([
      #
   ["O_101" , "R_101" ,        "REVIEWED_BY" ],
   ["O_102" , "R_101" ,        "REVIEWED_BY" ],
   ["O_103" , "R_101" ,        "REVIEWED_BY" ],
   ["O_104" , "R_101" ,        "REVIEWED_BY" ],
   ["O_105" , "R_101" ,        "REVIEWED_BY" ],
   ["O_106" , "R_101" ,        "REVIEWED_BY" ],
   ["O_107" , "R_101" ,        "REVIEWED_BY" ],
   ["O_108" , "R_102" ,        "REVIEWED_BY" ],
      #
   ], columns = ["START_ID", "END_ID", "TYPE"])
      #
print("Reviewed By:")
print(tabulate(df_reviewed_by.head(2), headers='keys', tablefmt='psql'))

#  Order  O_101-108  -  COMPRISED_OF  -  Item  I_101-104 
#
df_comprised_of = pd.DataFrame([
      #
   ["O_101" , "I_101" ,        "COMPRISED_OF" ],
   ["O_102" , "I_102" ,        "COMPRISED_OF" ],
   ["O_103" , "I_103" ,        "COMPRISED_OF" ],
   ["O_104" , "I_104" ,        "COMPRISED_OF" ],
   ["O_105" , "I_101" ,        "COMPRISED_OF" ],
   ["O_106" , "I_102" ,        "COMPRISED_OF" ],
   ["O_107" , "I_103" ,        "COMPRISED_OF" ],
   ["O_108" , "I_104" ,        "COMPRISED_OF" ],
      #
   ], columns = ["START_ID", "END_ID", "TYPE"])
      #
print("Comprised Of:")
print(tabulate(df_comprised_of.head(2), headers='keys', tablefmt='psql'))


#  Stock  K_101-104  -  FOUND_AT  -  Store  S_101-104           Store 4 is Houston
#                                                               Which makes Stock 104 a Houston Stock (Item)
#
df_found_at_store = pd.DataFrame([
      #
   ["S_101" , "I_101" , 5 ,    "FOUND_AT_STORE" ],
   ["S_101" , "I_102" , 5 ,    "FOUND_AT_STORE" ],
   ["S_101" , "I_103" , 5 ,    "FOUND_AT_STORE" ],
      #
   ["S_102" , "I_102" , 5 ,    "FOUND_AT_STORE" ],
   ["S_102" , "I_103" , 5 ,    "FOUND_AT_STORE" ],
      #
   ["S_103" , "I_101" , 5 ,    "FOUND_AT_STORE" ],
   ["S_103" , "I_102" , 5 ,    "FOUND_AT_STORE" ],
   ["S_103" , "I_103" , 5 ,    "FOUND_AT_STORE" ],
      #
   ["S_104" , "I_101" , 0 ,    "FOUND_AT_STORE" ],
   ["S_104" , "I_102" , 0 ,    "FOUND_AT_STORE" ],
   ["S_104" , "I_103" , 0 ,    "FOUND_AT_STORE" ],
   ["S_104" , "I_104" , 0 ,    "FOUND_AT_STORE" ],
      #
   ], columns = ["START_ID", "END_ID", "QUANTITY", "TYPE"])
      #
print("Found At Store:")
print(tabulate(df_found_at_store.head(2), headers='keys', tablefmt='psql'))


#  Stock  K_101-104  -  FOUND_AT  -  Dist_Center  S_101-102     Dist_Center 2 is Houston
#                                                               Which makes Stock 103 and 104 a Houston Stock (Item)
#
df_found_at_dist_center = pd.DataFrame([
      #
   ["D_101" , "I_101" , 5 ,    "FOUND_AT_DIST_CENTER" ],
   ["D_101" , "I_102" , 5 ,    "FOUND_AT_DIST_CENTER" ],
   ["D_101" , "I_103" , 5 ,    "FOUND_AT_DIST_CENTER" ],
   ["D_101" , "I_104" , 5 ,    "FOUND_AT_DIST_CENTER" ],
      #
   ["D_102" , "I_103" , 0 ,    "FOUND_AT_DIST_CENTER" ],
   ["D_102" , "I_104" , 5 ,    "FOUND_AT_DIST_CENTER" ],
      #
   ], columns = ["START_ID", "END_ID", "QUANTITY", "TYPE"])
      #
print("Found At Distribution Center:")
print(tabulate(df_found_at_dist_center.head(2), headers='keys', tablefmt='psql'))



In [None]:

# Import the previously created Python DataFrames into KatanaGraph ..

with import_data.DataFrameImporter(my_graph) as df_importer:   
    
   df_importer.nodes_dataframe(df_persons, 
      id_column             = "id",
      id_space              = "Person",  
      label                 = "Person",  
      )
   df_importer.nodes_dataframe(df_orders, 
      id_column             = "id",
      id_space              = "Order",  
      label                 = "Order",  
      )
   df_importer.nodes_dataframe(df_items, 
      id_column             = "id",
      id_space              = "Item",  
      label                 = "Item",  
      )
   df_importer.nodes_dataframe(df_stores, 
      id_column             = "id",
      id_space              = "Store",  
      label                 = "Store",  
      )
   df_importer.nodes_dataframe(df_dist_centers, 
      id_column             = "id",
      id_space              = "Dist_Center",  
      label                 = "Dist_Center",  
      )
   df_importer.nodes_dataframe(df_reviewers, 
      id_column             = "id",
      id_space              = "Reviewer",  
      label                 = "Reviewer",  
      )

         ### ############################################
    
   df_importer.edges_dataframe(df_placed,
      source_id_space       = "Person", 
      destination_id_space  = "Order",   
      source_column         = "START_ID",
      destination_column    = "END_ID",
      type                  = "PLACED"
      )
   df_importer.edges_dataframe(df_placed,
      source_id_space       = "Person", 
      destination_id_space  = "Order",   
      source_column         = "END_ID",
      destination_column    = "START_ID",
      type                  = "PLACED"
      )
    
   df_importer.edges_dataframe(df_comprised_of,
      source_id_space       = "Order", 
      destination_id_space  = "Item",   
      source_column         = "START_ID",
      destination_column    = "END_ID",
      type                  = "COMPRISED_OF"
      )
   df_importer.edges_dataframe(df_comprised_of,
      source_id_space       = "Order", 
      destination_id_space  = "Item",   
      source_column         = "END_ID",
      destination_column    = "START_ID",
      type                  = "COMPRISED_OF"
      )
    
   df_importer.edges_dataframe(df_reviewed_by,
      source_id_space       = "Order", 
      destination_id_space  = "Reviewer",   
      source_column         = "START_ID",
      destination_column    = "END_ID",
      type                  = "REVIEWED_BY"
      )
   df_importer.edges_dataframe(df_reviewed_by,
      source_id_space       = "Order", 
      destination_id_space  = "Reviewer",   
      source_column         = "END_ID",
      destination_column    = "START_ID",
      type                  = "REVIEWED_BY"
      )
    
   df_importer.edges_dataframe(df_found_at_store,
      source_id_space       = "Item", 
      destination_id_space  = "Store",   
      source_column         = "START_ID",
      destination_column    = "END_ID",
      type                  = "FOUND_AT_STORE"
      )
   df_importer.edges_dataframe(df_found_at_store,
      source_id_space       = "Item", 
      destination_id_space  = "Store",   
      source_column         = "END_ID",
      destination_column    = "START_ID",
      type                  = "FOUND_AT_STORE"
      )
    
   df_importer.edges_dataframe(df_found_at_dist_center,
      source_id_space       = "Item", 
      destination_id_space  = "Dist_Center",   
      source_column         = "START_ID",
      destination_column    = "END_ID",
      type                  = "FOUND_AT_DIST_CENTER"
      )
   df_importer.edges_dataframe(df_found_at_dist_center,
      source_id_space       = "Item", 
      destination_id_space  = "Dist_Center",   
      source_column         = "END_ID",
      destination_column    = "START_ID",
      type                  = "FOUND_AT_DIST_CENTER"
      )
    
   df_importer.insert()
    

print("--")



#  Setup stuff: Debugging

In [None]:

#  Count of rows
#

print("Rows: %d" % (my_graph.num_nodes()))
print("Edges: %d" % (my_graph.num_edges()))

#  Outputted values,
#
#  24
#  42


In [None]:

#  Look at the graph
#

l_result = my_graph.query("""

   MATCH (n) - [ r ] -> (m)
   RETURN n, r, m
   
   """, contextualize=True)

l_result.view()


<div> 
<img src="./01_Images/Result_01.png" alt="Drawing" style="width: 1800px;"/>
</div>


In [19]:

l_result = my_graph.query("""

   MATCH (n) 
   WITH DISTINCT LABELS(n) AS temp, COUNT(n) AS tempCnt
   UNWIND temp AS label
   RETURN label, SUM(tempCnt) AS cnt
   ORDER BY label
   
   """, contextualize=True)

l_result.view()


# display(print(l_result))
# 
#  Outputted values
#
#     cnt        label
#  0    2  Dist_Center
#  1    4         Item
#  2    8        Order
#  3    4       Person
#  4    2     Reviewer
#  5    4        Store



          0/? [?op/s]

VBox(children=(HTML(value='\n                <style>\n                #jp-main-content-panel .widget-container…

In [None]:

l_result = my_graph.query("""

   MATCH (n) - [r] -> (m) 
   WITH DISTINCT TYPE(r) AS temp, COUNT(r) AS tempCnt
   RETURN temp, tempCnt
   ORDER BY temp

   """, contextualize=True)

l_result.view()


# display(print(l_result))
# 
#  Outputted values,
#
#                     temp  tempCnt
#  0          COMPRISED_OF        8
#  1  FOUND_AT_DIST_CENTER        6
#  2        FOUND_AT_STORE       12
#  3                PLACED        8
#  4           REVIEWED_BY        8

