In [None]:

import pandas as pd
   #
pd.set_option("display.width", 480)

#  Sets horizontal scroll for wide outputs
#
from IPython.display import display, HTML
display(HTML("<style>pre { white-space: pre !important; }</style>"))

from tabulate import tabulate

print("--")


#  Setup stuff: Connectivity

In [None]:

from katana import remote
from katana.remote import import_data

my_client = remote.Client()

print(my_client)


In [None]:

NUM_PARTITIONS  = 3
   #
DB_NAME         = "my_db"
GRAPH_NAME      = "my_graph"

print("--")


In [None]:

#  DELETE ALL GRAPHS

for l_database in my_client.databases():
   for l_graph in my_client.get_database(name=l_database.name).graphs_in_database():
      l_handle=my_client.get_database(name=l_database.name).get_graph_by_id(id=l_graph.graph_id)
      l_handle.delete()

for l_graph in my_client.graphs():
   print("GRAPH ID: ", l_graph.graph_id, "      GRAPH Version: ", l_graph.version)

print("--")


In [None]:

#  DELETE ALL DATABASES

for l_database in my_client.databases():
   if (l_database.name != "default"):
      my_client.get_database(name=l_database.name).delete_database()
      print("--")

for l_database in my_client.databases():
   print("DB ID: ", l_database.database_id, "     DB Name: ", l_database.name)


In [None]:

#  CREATE DATABASE

my_database = my_client.create_database(name=DB_NAME)

print(my_database.database_id)


In [None]:

#  CREATE GRAPH
#
my_graph = my_client.get_database(name=DB_NAME).create_graph(name=GRAPH_NAME, num_partitions=NUM_PARTITIONS)

print(my_graph)


In [None]:

#  CONNECT TO GRAPH

my_graph, *_ = my_client.get_database(name=DB_NAME).find_graphs_by_name(GRAPH_NAME)

print(my_graph)


# Create: Vertices/nodes, edges ..

In [33]:

#  Vertices/Nodes ..

df_orders        = pd.DataFrame([
       #
   [101  , "Rayees Order"        ,    0,  "Order"     ],
   [102  , "Andrews Order"       ,    0,  "Order"     ],
   [103  , "Gregs Order"         ,    0,  "Order"     ],
       #
   ], columns = ["order_num", "order_name", "total", "LABEL"])
      #
print(tabulate(df_orders, headers='keys', tablefmt='psql'))


df_products      = pd.DataFrame([
       #
   [401  , "Apples"              ,   2,  "Product"   ],
   [402  , "Carrots"             ,  15,  "Product"   ],
   [403  , "Pears"               ,   7,  "Product"   ],
       #
   ], columns = ["product_num", "product_name", "unit_price", "LABEL"])
      #
print(tabulate(df_products, headers='keys', tablefmt='psql'))



+----+-------------+---------------+---------+---------+
|    |   order_num | order_name    |   total | LABEL   |
|----+-------------+---------------+---------+---------|
|  0 |         101 | Rayees Order  |       0 | Order   |
|  1 |         102 | Andrews Order |       0 | Order   |
|  2 |         103 | Gregs Order   |       0 | Order   |
+----+-------------+---------------+---------+---------+
+----+---------------+----------------+--------------+---------+
|    |   product_num | product_name   |   unit_price | LABEL   |
|----+---------------+----------------+--------------+---------|
|  0 |           401 | Apples         |            2 | Product |
|  1 |           402 | Carrots        |           15 | Product |
|  2 |           403 | Pears          |            7 | Product |
+----+---------------+----------------+--------------+---------+


In [34]:

#  Create the edge ..

df_purchased = pd.DataFrame([
      #
   [ 101, 401 ,"CONTAINS"],
   [ 101, 402 ,"CONTAINS"],
   [ 101, 403 ,"CONTAINS"],
      #
   [ 102, 401 ,"CONTAINS"],
   [ 102, 401 ,"CONTAINS"],
   [ 102, 403 ,"CONTAINS"],
      #
   [ 103, 403 ,"CONTAINS"],
   [ 103, 403 ,"CONTAINS"],
   [ 103, 403 ,"CONTAINS"],
   [ 103, 403 ,"CONTAINS"],
      #
   ], columns = ["START_ID", "END_ID", "TYPE"])

print(tabulate(df_purchased, headers='keys', tablefmt='psql'))


+----+------------+----------+----------+
|    |   START_ID |   END_ID | TYPE     |
|----+------------+----------+----------|
|  0 |        101 |      401 | CONTAINS |
|  1 |        101 |      402 | CONTAINS |
|  2 |        101 |      403 | CONTAINS |
|  3 |        102 |      401 | CONTAINS |
|  4 |        102 |      401 | CONTAINS |
|  5 |        102 |      403 | CONTAINS |
|  6 |        103 |      403 | CONTAINS |
|  7 |        103 |      403 | CONTAINS |
|  8 |        103 |      403 | CONTAINS |
|  9 |        103 |      403 | CONTAINS |
+----+------------+----------+----------+


In [32]:


with import_data.DataFrameImporter(my_graph) as df_importer:
    
   df_importer.nodes_dataframe(
      df_orders[[
         "order_num", "order_name", "total", "LABEL"
      ]],
      id_column  = "order_num",
      id_space   = "Orders"
      )
    
   df_importer.nodes_dataframe(
      df_products[[
         "product_num", "product_name", "unit_price", "LABEL"
      ]],
      id_column  = "product_num",
      id_space   = "Products"
      )
    
   df_importer.edges_dataframe(
      df_purchased[[
         "order_num", "product_num", "TYPE"]],
      source_id_space      = "Orders",
      destination_id_space = "Products",
      source_column        = "order_num",
      destination_column   = "product_num",
      type                 = "RELATES_TO"
      )
   df_importer.edges_dataframe(
      df_purchased[[
         "order_num", "product_num", "TYPE"]],
      source_id_space      = "Products",
      destination_id_space = "Orders",
      source_column        = "product_num",
      destination_column   = "order_num",
      type                 = "RELATES_TO"
      )

   df_importer.insert()

print("--")





ValueError: Node id column name must be consistent

In [None]:

#  What's in the vertices/nodes ..

l_result = my_graph.query("""

   MATCH ( n ) 
   RETURN n
   
   """)

print(l_result[0:30])


In [None]:

for l_each in df_purchased.iterrows():
    
   l_startid    = l_each[1][0]
   l_endid      = l_each[1][1]
   l_type       = l_each[1][2]
    
   l_query = """
      
      MATCH (n: Order  ) WHERE n.order_num   = {0}
      MATCH (m: Product) WHERE m.product_num = {1}
      CREATE (n) -[r: {2} ]-> (m)
      
      """.format(l_startid, l_endid, l_type)
    
   l_result0 = my_graph.query(l_query)

print("--")


In [None]:
#  What's in the graph .. 

l_result = my_graph.query("""

   MATCH (n) - [ r ] -> (m)
   RETURN n, m, r
   
   """, contextualize=True)

l_result.view()


#  Finally now; Why we're here ..

In [None]:
#  The traversal we are trying to run ..

l_result = my_graph.query("""

   //  Just fetching, works
   //
   MATCH (n: Order) - [r :CONTAINS ] -> (m: Product)
   RETURN n, sum(m.unit_price) AS x_cost
   
   //  Closest to original question, throws error
   //
   // MATCH (n: Order) - [r :CONTAINS ] -> (m: Product)
   // WITH n, sum(m.unit_price) AS x_cost
   // SET Order.total = x_cost
   //
   //  OperationError: backtrace (QueryClient.cpp:391): unable to translate openCypher parse tree to internal IR (OpGraph.cpp:226):
   //  Syntax error: The query cannot be recognized by openCypher. (CypherOpGraphBuilder.cpp:63): TCK = SyntaxError:UnexpectedSyntax
   //  Katana = SyntaxError:ParserError: TCK = SyntaxError:UnexpectedSyntax
   //  Katana = SyntaxError:ParserError
   
   //  This also errors ..
   //
   // MATCH (n: Order) - [r :CONTAINS ] -> (m: Product)
   // WITH sum(m.unit_price) AS x_cost
   // SET Order.total = x_cost
   
   """, contextualize=True)

l_result.view()


#  Exact, original syntax ..
#
#  MATCH (loan)-[:hasLoanSnapshot]->(snapshot)
#  WITH loan, sum(snapshot.loan_snapshot_flag) as loan_snapshots
#  SET loan.loan_flag = loan_snapshots

