In [None]:

#  Setting display options 

import pandas as pd
   #
pd.set_option("display.width", 480)

#  Sets horizontal scroll for wide outputs
#
from IPython.display import display, HTML
display(HTML(""))

from tabulate import tabulate


#  Setup stuff: Connectivity

In [None]:

from katana import remote
from katana.remote import import_data

my_client = remote.Client()

print(my_client)


In [None]:
NUM_PARTITIONS  = 3
   #
DB_NAME         = "my_db"
GRAPH_NAME      = "my_graph"

print("--")


In [None]:
#  DELETE ALL GRAPHS

for l_database in my_client.databases():
   for l_graph in my_client.get_database(name=l_database.name).graphs_in_database():
      l_handle=my_client.get_database(name=l_database.name).get_graph_by_id(id=l_graph.graph_id)
      l_handle.delete()

for l_graph in my_client.graphs():
   print("GRAPH ID: ", l_graph.graph_id, "      GRAPH Version: ", l_graph.version)

print("--")


In [None]:
#  DELETE ALL DATABASES

for l_database in my_client.databases():
   if (l_database.name != "default"):
      my_client.get_database(name=l_database.name).delete_database()
      print("--")

for l_database in my_client.databases():
   print("DB ID: ", l_database.database_id, "     DB Name: ", l_database.name)


In [None]:
#  CREATE DATABASE

my_database = my_client.create_database(name=DB_NAME)

print(my_database.database_id)


In [None]:
#  CREATE GRAPH

my_graph = my_client.get_database(name=DB_NAME).create_graph(name=GRAPH_NAME, num_partitions=3) 

print(my_graph)


In [None]:

#  CONNECT TO GRAPH

my_graph, *_ = my_client.get_database(name=DB_NAME).find_graphs_by_name(GRAPH_NAME)

print(my_graph)


# Create: Vertices/nodes, edges/relationships 

In [None]:
# import pandas as pd
# 
# print("--")


In [None]:

#  Vertices/Nodes, Persons ..

df_persons = pd.DataFrame([
       #
   ["1111-1111-1111-1111", "Justin"       , "Fine"        , "Person"],
   ["2222-2222-2222-2222", "Thomas"       , "Cook"        , "Person"],
   ["3333-3333-3333-3333", "Sameer"       , "Iyengar"     , "Person"],
   ["4444-4444-4444-4444", "Brian"        , "Spencer"     , "Person"],
       #
   #  ], columns = ["person_code", "fname", "lname", "LABEL"])                         #  This used to work
   ], columns = ["id", "fname", "lname", "LABEL"])

print(tabulate(df_persons, headers='keys', tablefmt='psql'))


In [None]:

#  Vertices/Nodes, Stores ..

df_stores = pd.DataFrame([
       #
   [101, "Panera"        , "Store"],
   [102, "Target"        , "Store"],
   [103, "Lowes"         , "Store"],
   [104, "Volvo Service" , "Store"],
   [105, "JC Penney"     , "Store"],
   [106, "Torchys Tacos" , "Store"],
       #
   #  ], columns = ["store_code", "store_name", "LABEL"])
   ], columns = ["id", "store_name", "LABEL"])

print(tabulate(df_stores, headers='keys', tablefmt='psql'))


In [None]:

#  Create the edge, MADE_PURCHASE ..

df_purchases = pd.DataFrame([
      #
   ["1111-1111-1111-1111", 101,   40.00, "2022-04-01", 0, "MADE_PURCHASE" ],
   ["1111-1111-1111-1111", 105,   50.00, "2022-04-02", 0, "MADE_PURCHASE" ],
   ["1111-1111-1111-1111", 106,   60.00, "2022-04-03", 1, "MADE_PURCHASE" ],
   ["1111-1111-1111-1111", 102,   70.00, "2022-04-10", 0, "MADE_PURCHASE" ],
   ["1111-1111-1111-1111", 106,   60.00, "2022-04-14", 1, "MADE_PURCHASE" ],
      #
   ["2222-2222-2222-2222", 103,   40.00, "2022-04-01", 0, "MADE_PURCHASE" ],
   ["2222-2222-2222-2222", 104,   50.00, "2022-04-02", 0, "MADE_PURCHASE" ],
   ["2222-2222-2222-2222", 105,   60.00, "2022-04-03", 0, "MADE_PURCHASE" ],
   ["2222-2222-2222-2222", 101,   70.00, "2022-04-06", 1, "MADE_PURCHASE" ],
   ["2222-2222-2222-2222", 102,   80.00, "2022-04-10", 0, "MADE_PURCHASE" ],
      #
   ["3333-3333-3333-3333", 105,   50.00, "2022-04-05", 0, "MADE_PURCHASE" ],
   ["3333-3333-3333-3333", 102,   60.00, "2022-04-09", 1, "MADE_PURCHASE" ],
   ["3333-3333-3333-3333", 102,   70.00, "2022-04-10", 0, "MADE_PURCHASE" ],
      #
   ["4444-4444-4444-4444", 102,   40.00, "2022-04-07", 0, "MADE_PURCHASE" ],
   ["4444-4444-4444-4444", 103,   50.00, "2022-04-08", 0, "MADE_PURCHASE" ],
      #
   ], columns = ["START_ID", "END_ID", "AMOUNT", "DATE_AS_STRING", "IS_FRAUDULENT", "TYPE"])

print(tabulate(df_purchases, headers='keys', tablefmt='psql'))


In [None]:
# ##################################################################

In [None]:
#  Even though this graph was made with 3 partitions, and even though certain operations
#  require at least 3 partitions else they fail, you may still see num_partitions = None
#  which can happen on really small graphs, and produce unexpected results below.

print(my_graph.num_partitions)

In [None]:
my_graph.repartition(num_partitions = 3)

print("--")

In [None]:
# ##################################################################

In [None]:

# Import the 3 previously created Python DataFrames into KatanaGraph

with import_data.DataFrameImporter(my_graph) as df_importer:   
    
   df_importer.nodes_dataframe(df_persons,                     #  Person set of Nodes
      #  id_column             = "person_code",
      id_column             = "id",
      id_space              = "Person",  
      label                 = "Person",  
      )
    
   df_importer.nodes_dataframe(df_stores,                      #  Store set of Nodes
      #  id_column             = "store_code",
      id_column             = "id",
      id_space              = "Store", 
      label                 = "Store", 
      )
   
   df_importer.edges_dataframe(df_purchases,                   #  Our Edge, specifying the relationship between Person --> MADE_PURCHASE --> Store
      source_id_space       = "Person", 
      destination_id_space  = "Store",   
      source_column         = "START_ID",
      destination_column    = "END_ID",
      type                  = "MADE_PURCHASE"
      )
 
   df_importer.insert()

print("--")


In [None]:
# ##################################################################

In [None]:
#  View the graph; verify results

l_result = my_graph.query("""

   MATCH (n) - [r] -> (m)
   RETURN n, r, m
   
   """, contextualize=True)

l_result.view()


In [None]:
#  View the graph; verify results

l_result = my_graph.query("""

   MATCH (n: Person)
   RETURN n
   
   """, contextualize=False)

print(tabulate(l_result, headers='keys', tablefmt='psql'))


In [None]:
#  View the graph; verify results

l_result = my_graph.query("""

   MATCH (n) - [r] -> (m)
   RETURN r
   
   """, contextualize=False)

print(tabulate(l_result, headers='keys', tablefmt='psql'))
