#  Setup: Display options

In [None]:

#  Setting display options 

import pandas as pd
   #
pd.set_option("display.width", 480)

#  Sets horizontal scroll for wide outputs
#
from IPython.display import display, HTML
display(HTML(""))

from tabulate import tabulate

print("--")


In [None]:

#  Used to report timings
#

from timeit import default_timer

print("--")



# Setup: Connectivity

In [None]:

from katana import remote
from katana.remote import import_data

my_client = remote.Client()

print(my_client)


In [None]:

NUM_PARTITIONS  = 3
   #
DB_NAME         = "my_db"
GRAPH_NAME      = "my_graph"

print("--")


In [None]:

#  CONNECT TO GRAPH

my_graph, *_ = my_client.get_database(name=DB_NAME).find_graphs_by_name(GRAPH_NAME)

print(my_graph)


In [None]:

#  Now that we share servers, get a better list of databases and graphs ..
#

for l_each_d in my_client.databases():
   print("Database Name: %-28s   Id: %s" % (l_each_d.name, l_each_d.database_id))
      #
   for l_each_g in my_client.get_database(name=l_each_d.name).graphs_in_database(): 
      print("   Graph Name: %-28s      Version: %-8d   Id: %s" % (l_each_g.name, l_each_g.version, l_each_g.graph_id))


print(""  )
print("--")


#  Step 00: Diagnostics

In [None]:

#  List all node types with node counts

l_query  = """

   MATCH (n) 
   WITH DISTINCT LABELS(n) AS labels, COUNT(n) AS cnt
   UNWIND labels AS label
   RETURN label, SUM(cnt) AS cnt
   ORDER BY label
   
   """.format()

l_result = my_graph.query_unpaginated(l_query)
   #
print(tabulate(l_result, headers = "keys", tablefmt = "psql", showindex = False))


print()


#  List all edge types with counts

l_query  = """

   MATCH (m)-[r]->(n) 
   WITH DISTINCT TYPE(r) AS type, COUNT(r) AS cnt
   RETURN type, cnt
   ORDER BY type
   
   """.format()

l_result = my_graph.query_unpaginated(l_query)
   #
print(tabulate(l_result, headers = "keys", tablefmt = "psql", showindex = False))



In [None]:

#  Return properties/keys from all vertices

l_query  = """
   MATCH (n)
   RETURN DISTINCT LABELS(n) AS label, KEYS(n) AS my_keys, COUNT(n) AS cnt
   """.format()

l_result = my_graph.query_unpaginated(l_query)
   #
print(tabulate(l_result, headers = "keys", tablefmt = "psql", showindex = False))


#  Return properties/keys from all edges
#  

l_query  = """
   MATCH (n) - [r] -> (m)
   RETURN DISTINCT TYPE(r) AS my_type, KEYS(r) AS my_keys, COUNT(r) AS cnt
   """.format()

l_result = my_graph.query_unpaginated(l_query)
   #
print(tabulate(l_result, headers = "keys", tablefmt = "psql", showindex = False))



#  Step 01: Actual Routines

In [None]:


from katana.remote.analytics import pagerank

#  Generate a unique prop name so we can run this over and over without edit ..
#
l_cntr = 0

print("--")



In [None]:

#  Page Rank obviously ..
#

#  Generate a unique prop name so we can run this over and over without edit ..
#
l_cntr += 1
   #
l_output_propname = "pr_" + str(l_cntr).zfill(4)
l_time = default_timer()


pagerank(graph, result_property_name = l_output_propname, max_iterations = 2, alpha = 0.15)




display(my_graph.num_nodes())
display(my_graph.num_edges())

print("Time in seconds: %d" % (default_timer() - l_time))
