#  Setup stuff: Connectivity

In [None]:
import os

from katana import remote
from katana.remote import import_data


my_client = remote.Client()

print(my_client)


In [None]:
NUM_PARTITIONS  = 3
   #
DB_NAME         = "my_db"
GRAPH_NAME      = "my_graph"

print("--")


In [None]:
#  DELETE ALL GRAPHS

for l_database in my_client.databases():
   for l_graph in my_client.get_database(name=l_database.name).graphs_in_database():
      l_handle=my_client.get_database(name=l_database.name).get_graph_by_id(id=l_graph.graph_id)
      l_handle.delete()

for l_graph in my_client.graphs():
   print("GRAPH ID: ", l_graph.graph_id, "      GRAPH Version: ", l_graph.version)

print("--")


In [None]:
#  DELETE ALL DATABASES

for l_database in my_client.databases():
   if (l_database.name != "default"):
      my_client.get_database(name=l_database.name).delete_database()
      print("--")

for l_database in my_client.databases():
   print("DB ID: ", l_database.database_id, "     DB Name: ", l_database.name)

print("--")


In [None]:
#  CREATE DATABASE

my_database = my_client.create_database(name=DB_NAME)

print(my_database.database_id)


In [None]:
#  CREATE GRAPH
#
my_graph = my_client.get_database(name=DB_NAME).create_graph(name=GRAPH_NAME, num_partitions=NUM_PARTITIONS) 

print(my_graph)


In [None]:
#  CONNECT TO GRAPH

for l_graph in my_client.get_database(name=DB_NAME).graphs_in_database():
   if (l_graph.name == GRAPH_NAME):
      my_graph=my_client.get_database(name=DB_NAME).get_graph_by_id(id=l_graph.graph_id)
         #
      break

print(my_graph)


#  Setup stuff: Debugging

In [None]:
#  LIST ALL GRAPHS

for l_graph in my_client.graphs():
   print("GRAPH ID: ", l_graph.graph_id, "     DB GRAPH: ", l_graph.name, "    GRAPH VERSION: ", l_graph.version)

print("--")


In [None]:
display(my_graph.num_nodes())
display(my_graph.num_edges())

In [None]:
# ##################################################################

In [None]:
my_graph.query("CALL graph.schema() RETURN *")


In [None]:
my_client.widgets().operations()


# Create: Vertices/nodes, edges/relationships 

In [None]:
import pandas as pd

print("--")

In [None]:
#  Vertices/Nodes, Persons ..

df_persons = pd.DataFrame([
       #
   ["1111-1111-1111-1111", "Justin"       , "Person"],
   ["2222-2222-2222-2222", "Thomas"       , "Person"],
   ["3333-3333-3333-3333", "Sameer"       , "Person"],
   ["4444-4444-4444-4444", "Brian"        , "Person"],
       #
   ], columns = ["person_code", "person_name", "LABEL"])

df_persons.head(10)


In [None]:
#  Vertices/Nodes, Stores ..

df_stores = pd.DataFrame([
       #
   [101, "Panera"        , "Store"],
   [102, "Target"        , "Store"],
   [103, "Lowes"         , "Store"],
   [104, "Volvo Service" , "Store"],
   [105, "JC Penney"     , "Store"],
   [106, "Torchys Tacos" , "Store"],
       #
   ], columns = ["store_code", "store_name", "LABEL"])

df_stores.head(10)


In [None]:
#  Create the edge, MADE_PURCHASE ..

df_purchases = pd.DataFrame([
      #
   ["1111-1111-1111-1111", 101,   40.00, "2022-04-01", 0, "MADE_PURCHASE" ],
   ["1111-1111-1111-1111", 105,   50.00, "2022-04-02", 0, "MADE_PURCHASE" ],
   ["1111-1111-1111-1111", 106,   60.00, "2022-04-03", 1, "MADE_PURCHASE" ],
   ["1111-1111-1111-1111", 102,   70.00, "2022-04-10", 0, "MADE_PURCHASE" ],
   ["1111-1111-1111-1111", 106,   60.00, "2022-04-14", 1, "MADE_PURCHASE" ],
      #
   ["2222-2222-2222-2222", 103,   40.00, "2022-04-01", 0, "MADE_PURCHASE" ],
   ["2222-2222-2222-2222", 104,   50.00, "2022-04-02", 0, "MADE_PURCHASE" ],
   ["2222-2222-2222-2222", 105,   60.00, "2022-04-03", 0, "MADE_PURCHASE" ],
   ["2222-2222-2222-2222", 101,   70.00, "2022-04-06", 1, "MADE_PURCHASE" ],
   ["2222-2222-2222-2222", 102,   80.00, "2022-04-10", 0, "MADE_PURCHASE" ],
      #
   ["3333-3333-3333-3333", 105,   50.00, "2022-04-05", 0, "MADE_PURCHASE" ],
   ["3333-3333-3333-3333", 102,   60.00, "2022-04-09", 1, "MADE_PURCHASE" ],
   ["3333-3333-3333-3333", 102,   70.00, "2022-04-10", 0, "MADE_PURCHASE" ],
      #
   ["4444-4444-4444-4444", 102,   40.00, "2022-04-07", 0, "MADE_PURCHASE" ],
   ["4444-4444-4444-4444", 103,   50.00, "2022-04-08", 0, "MADE_PURCHASE" ],
      #
   ], columns = ["START_ID", "END_ID", "AMOUNT", "DATE_AS_STRING", "IS_FRAUDULENT", "TYPE"])

df_purchases.head(30)


In [None]:
# ##################################################################

In [None]:
# Import the 3 previously created Python DataFrames into KatanaGraph

with import_data.DataFrameImporter(my_graph) as df_importer:   
    
   df_importer.nodes_dataframe(df_persons,                      #  Person set of Nodes
      id_column             = "person_code",    
      id_space              = "Person",  
      label                 = "Person",  
      )
    
   df_importer.nodes_dataframe(df_stores,                       #  Store set of Nodes
      id_column             = "store_code",
      id_space              = "Store", 
      label                 = "Store", 
      )
   
   df_importer.edges_dataframe(df_purchases,                    #  Our Edge, specifying the relationship between Person --> MADE_PURCHASE --> Store
      source_id_space       = "Person", 
      destination_id_space  = "Store",   
      source_column         = "START_ID",
      destination_column    = "END_ID",
      type                  = "MADE_PURCHASE"
      )

print("--")

In [None]:
#  View the graph; verify results

l_result = my_graph.query("""

   MATCH (n) - [r] -> (m)
   RETURN n, r, m
   
   """, contextualize=True)

l_result.view()


#  Analytics

In [None]:
#  Just Persons with fraudulent transactions

l_result = my_graph.query("""

   MATCH (n: Person) - [ r: MADE_PURCHASE { IS_FRAUDULENT: 1 } ] -> (m: Store)
   RETURN n.id, COLLECT ( { date_as_string: r.DATE_AS_STRING, store_name: m.store_name } )
   ORDER BY n.id
   
   """, contextualize=True)

l_result.view()


In [None]:

l_result = my_graph.query("""

   //  Level-0
   //
   MATCH (m: Store) <- [ r: MADE_PURCHASE { IS_FRAUDULENT: 1 } ] - (n: Person) - [ r2: MADE_PURCHASE ] -> (m2: Store)

   //  Level-4
   //
   WITH {
      person_code: n.id,
      person_name: n.person_name 
      } AS Person4,
         //
      COLLECT ({
         fraud_date:      r.DATE_AS_STRING,
         purchase_date:  r2.DATE_AS_STRING,
         amount:         r2.AMOUNT,
            //
         store_code:     m2.id,
         store_name:     m2.store_name
      }) AS Details4

   UNWIND Details4 AS Detail4

   //  Level-5
   //
   WITH {
      person_code:       Person4.person_code,
      person_name:       Person4.person_name,
         //
      fraud_date:        Detail4.fraud_date,
      purchase_date:     Detail4.purchase_date,
      amount:            Detail4.amount,
            //
      store_code:        Detail4.store_code,
      store_name:        Detail4.store_name
      } AS  Detail5
         //
   WHERE (Detail5.purchase_date < Detail5.fraud_date)
  
   //  Level-6
   //
   WITH
      {
      store_code:        Detail5.store_code,
      store_name:        Detail5.store_name
      } AS Store6 // ,
      
      
//    COLLECT ({
//       
//       fraud_date:     Detail5.fraud_date,
//       purchase_date:  Detail5.purchase_date,
//       amount:         Detail5.amount,
//       person_code:    Detail5.person_code,
//       person_name:    Detail5.person_name
//    }) AS Details6
      
      
      
   RETURN 
      *
   
   
   """, contextualize=True)


l_result.view()

   

In [None]:
print(l_result)

In [None]:
#  What do the Persons who had fraudulent transactions have in common ?
#
#     (They all shopped at JC Penney, .. .. then had a fraudulent charge somewhere else)

l_result = my_graph.query("""

   MATCH (n: Person) - [ r: MADE_PURCHASE ] -> (m: Store)
   
   WITH n.id as xxx, n.person_name as yyy, COLLECT ({eee: m.id, fff: r.IS_FRAUDULENT, ggg: r.DATE_AS_STRING}) AS zzz
   
   // WHERE {eee: '101'} IN zzz
   WHERE zzz.eee = 101
    
   RETURN xxx, yyy, zzz
   
   
   
   """, contextualize=True)

l_result.view()
