#  Setup stuff: Connectivity

In [None]:
#  This file also includes;  at_version()


import os

from katana import remote
from katana.remote import import_data


os.environ["KATANA_SERVER_ADDRESS"] = "localhost:8080"

my_client = remote.Client()

print(my_client)

In [None]:
DB_NAME    = "my_db"
GRAPH_NAME = "my_graph"

print("--")

In [None]:
#  DELETE ALL GRAPHS

for l_database in my_client.databases():
   for l_graph in my_client.get_database(name=l_database.name).graphs_in_database():
      l_handle=my_client.get_database(name=l_database.name).get_graph_by_id(id=l_graph.graph_id)
      l_handle.delete()

for l_graph in my_client.graphs():
   print("GRAPH ID: ", l_graph.graph_id, "      GRAPH Version: ", l_graph.version)

print("--")

In [None]:
#  DELETE ALL DATABASES

for l_database in my_client.databases():
   if (l_database.name != "default"):
      my_client.get_database(name=l_database.name).delete_database()
      print("--")

for l_database in my_client.databases():
   print("DB ID: ", l_database.database_id, "     DB Name: ", l_database.name)

In [None]:
#  CREATE DATABASE

my_database = my_client.create_database(name=DB_NAME)

print(my_database.database_id)

In [None]:
#  CREATE GRAPH
#
my_graph = my_client.get_database(name=DB_NAME).create_graph(name=GRAPH_NAME, num_partitions=2)

print(my_graph)

In [None]:
#  CONNECT TO GRAPH

for l_graph in my_client.get_database(name=DB_NAME).graphs_in_database():
   if (l_graph.name == GRAPH_NAME):
      my_graph=my_client.get_database(name=DB_NAME).get_graph_by_id(id=l_graph.graph_id)
         #
      break

print(my_graph)

#  Setup stuff: Debugging

In [None]:
#  LIST ALL GRAPHS

for l_graph in my_client.graphs():
   print("GRAPH ID: ", l_graph.graph_id, "     DB GRAPH: ", l_graph.name, "    GRAPH VERSION: ", l_graph.version)

print("--")


In [None]:
display(my_graph.num_nodes())
display(my_graph.num_edges())

In [None]:
# ##################################################################

In [None]:
#  Point to a prior version of our graph ..
#
#     Future, also later in time; add Client.transaction()

l_version = 5

my_graph2 = my_graph.at_version(l_version)

print("--")


In [None]:
if 'my_graph2' in globals():
   display(my_graph2.num_nodes())
   display(my_graph2.num_edges())
else: 
   print("--")


In [None]:
# ##################################################################

In [None]:
my_graph.query("CALL graph.schema() RETURN *")


In [None]:
my_client.widgets().operations()


In [None]:
# ##################################################################

In [None]:

l_result = my_graph.query("""

   MATCH (n) - [ r ] -> (m)
   RETURN n, m, r
   
   """, contextualize=True)

l_result.view()


# Create: Vertices/nodes, edges ..

In [None]:
import pandas as pd

print("--")

In [None]:

#  Vertices/Nodes, Airports ..

df_airports = pd.DataFrame([
       #
   ["MKE", "Milwaukee"               , "Airport"],
   ["ORD", "Chicago O-Hare"          , "Airport"],
   ["SJC", "San Jose"                , "Airport"],
   ["DEN", "Denver"                  , "Airport"],
       #
   ], columns = ["airport_code", "airport_name", "LABEL"])

df_airports.head(20)


In [None]:
#  Create nodes, airports


import contextlib

for l_each in df_airports.iterrows():
    
   l_airport_code = l_each[1][0]
   l_airport_name = l_each[1][1]
   l_label        = l_each[1][2]
    
   l_query = """

      //  This creates data that works ..
      //
      //  Subsequent execution creates duplicate nodes  :(
      //
      CREATE ( n: {2} {{ airport_code: '{0}' }} )                  //  Notice double curly braces
      SET n.airport_name = '{1}' 
      
      //  This produces bad data, see next cell ..
      //
      // MERGE ( n: {2} {{ airport_code: '{0}' }})
      // ON CREATE SET n.airport_name = '{1}'
   
      """.format(l_airport_code, l_airport_name, l_label)
         
    
   #  Don't use, hangs kernel
   #
   # with open('/dev/null', 'w') as f:
   #    with contextlib.redirect_stdout(f):
   #       l_result0 = my_graph.query(l_query)

   #  Didn't work ..
   #
   # with contextlib.redirect_stdout(None):
   #    l_result0 = my_graph.query(l_query)

   l_result0 = my_graph.query(l_query)
        
   print(".", end='')


print("  ")
print("--")


In [None]:
#  What's in the vertices/nodes ..

l_result = my_graph.query("""
   MATCH ( n ) 
   RETURN n
   """)
print(l_result[0:30])


#  When using CREATE code above, this data is correct ..
#
#     n.internal_id   n.labels n.airport_code  n.airport_name n.type
#  0              0  [Airport]            MKE       Milwaukee   node
#  1              1  [Airport]            ORD  Chicago O-Hare   node
#  2              2  [Airport]            SJC        San Jose   node
#  3              3  [Airport]            DEN          Denver   node

#  When using MERGE code above, this data is incorrect ..
#
#     n.internal_id   n.labels n.airport_code n.type n.airport_name
#  0              0  [Airport]            SJC   node            NaN
#  1              1  [Airport]            MKE   node      Milwaukee
#  2              2  [Airport]            ORD   node       San Jose
#  3              3  [Airport]            DEN   node         Denver



In [None]:
#  Delete all edges, nodes for test reset

l_result = my_graph.query("""
   MATCH (n) - [r] -> (m)
   RETURN count(r)
   """)
display(print(l_result))

l_result = my_graph.query("""
   MATCH (n)
   RETURN count(n)
   """)
display(print(l_result))

l_result = my_graph.query("""
   MATCH (n) - [r] -> (m)
   DELETE r
   """)
display(print(l_result))

l_result = my_graph.query("""
   MATCH (n)
   DELETE n
   """)
display(print(l_result))

l_result = my_graph.query("""
   MATCH (n) - [r] -> (m)
   RETURN count(r)
   """)
display(print(l_result))

l_result = my_graph.query("""
   MATCH (n)
   RETURN count(n)
   """)
display(print(l_result))


# ##################################################################

In [None]:
#  Create the edge, flights ..

df_flights = pd.DataFrame([
      #
   ["MKE", "ORD",   66, 1, "FLIES_TO" ],
   ["ORD", "MKE",   66, 1, "FLIES_TO" ],
      #
   ["ORD", "DEN",  886, 1, "FLIES_TO" ],
   ["DEN", "ORD",  886, 1, "FLIES_TO" ],
      #
   ["SJC", "DEN",  948, 1, "FLIES_TO" ],                           #  Notice SJC flies to/from Denver
   ["DEN", "SJC",  948, 1, "FLIES_TO" ],
      #
   ["SJC", "ORD", 1829, 1, "FLIES_TO" ],                           #  Notice SJC flies to ORD, but not ORD to SJC
      #
   ], columns = ["START_ID", "END_ID", "DISTANCE", "NUM_HOPS", "TYPE"])

df_flights.head(30)


In [None]:

for l_each in df_flights.iterrows():
    
   l_startid    = l_each[1][0]
   l_endid      = l_each[1][1]
   l_distance   = l_each[1][2]
   l_num_hops   = l_each[1][3]
   l_type       = l_each[1][4]
    
   l_query = """
      
      MATCH
         (n: Airport),
         (m: Airport)
      WHERE n.airport_code = '{0}'
      AND   m.airport_code = '{1}'
      CREATE (n) -[r: {4} {{ DISTANCE: {2}, NUM_HOPS: {3} }}]-> (m)
      
      // MATCH
      //    (n: Airport),
      //    (m: Airport)
      // WHERE n.airport_code = '{0}'
      // AND   m.airport_code = '{1}'
      // CREATE (n) -[r: {4} {{ DISTANCE: {2}, NUM_HOPS: {3} }}]-> (m)
      // RETURN type(r)
      //
      //  Using edge variables after CREATE/UPDATE/DELETE/REMOVE is not supported

      // MATCH (n: Airport ) WHERE n.airport_code = '{0}'
      // MATCH (m: Airport ) WHERE m.airport_code = '{1}'
      // CREATE (n) -[r: {4} {{ DISTANCE: {2}, NUM_HOPS: {3} }}]-> (m)

      """.format(l_startid, l_endid, l_distance, l_num_hops, l_type)
    
   l_result0 = my_graph.query(l_query)
        
   print(".", end='')     
      

print("  ")
print("--")


In [None]:
#  What's in the edge .. 

l_result = my_graph.query("""
   MATCH ( n )  - [r] ->  (m)
   RETURN n.airport_code AS FROM, m.airport_code AS TO, r.DISTANCE AS DISTANCE, r.NUM_HOPS AS NUM_HOPS
   """)
print(l_result[0:30])


#  Administration/testing stuff ..

In [None]:
#  Save current graph as an RDG ..

from katana.remote import export_data

l_graph_path = "gs://farrell-data-bucket/sssp/farrell-sssp"

export_data.rdg(my_graph, l_graph_path)

print("--")



In [None]:
#  Load from RDG ..
#     Have an empty graph created already ..

from katana.remote import import_data


l_graph_path = "gs://farrell-data-bucket/sssp/farrell-sssp"
# l_graph_path = "gs://katana-demo-datasets/rdg-datasets/v3/rmat10_symmetric"


import_data.rdg(my_graph, l_graph_path)

print("--")


In [None]:
#  Load from CSV ..
#     Have an empty graph created already ..
#
#  **  This is a small subset of the data set created in other places ..

from katana.remote import import_data

l_inp="gs://farrell-data-bucket/sssp/AC_AirportsHeader.txt"
l_enp="gs://farrell-data-bucket/sssp/AD_FlightsHeader.txt"
   #
l_dir="gs://farrell-data-bucket/sssp/"

import_data.csv(
   my_graph,
   input_node_path    = l_inp,
   input_edge_path    = l_enp,
   input_dir          = l_dir,
   data_delimiter     = "|",
   schema_delimiter   = "|",
   files_have_headers = True,
   # partition_policy = "random-oec"
   )

print("--")
