#  Setup: Display options

In [37]:

#  Setting display options 

import pandas as pd
   #
pd.set_option("display.width", 480)

#  Sets horizontal scroll for wide outputs
#
from IPython.display import display, HTML
display(HTML(""))

from tabulate import tabulate

print("--")


--


In [38]:

#  Used to report timings
#

from timeit import default_timer

print("--")



--


# Setup: Connectivity

In [39]:

from katana import remote
from katana.remote import import_data

my_client = remote.Client()

print(my_client)


<katana.remote.sync_wrappers.Client object at 0x7fafe2a8a0d0>


In [40]:

#  NUM_PARTITIONS  = 3
NUM_PARTITIONS  = 5                  #  Justin has 5
   #
DB_NAME         = "my_db"
GRAPH_NAME      = "my_graph"

print("--")


--


In [41]:

#  CONNECT TO GRAPH

my_graph, *_ = my_client.get_database(name=DB_NAME).find_graphs_by_name(GRAPH_NAME)

print(my_graph)


<_Graph my_graph, 9DhtNcMG4ML2mPjnbNECWbPdgQsa7144z9KsRBr7DuKj, 1>


In [None]:

#  Now that we share servers, get a better list of databases and graphs ..
#

for l_each_d in my_client.databases():
   print("Database Name: %-28s   Id: %s" % (l_each_d.name, l_each_d.database_id))
      #
   for l_each_g in my_client.get_database(name=l_each_d.name).graphs_in_database(): 
      print("   Graph Name: %-28s      Version: %-8d   Id: %s" % (l_each_g.name, l_each_g.version, l_each_g.graph_id))


print(""  )
print("--")


#  Step 00: Diagnostics

In [None]:

#  List all node types with node counts

l_query  = """

   MATCH (n) 
   WITH DISTINCT LABELS(n) AS labels, COUNT(n) AS cnt
   UNWIND labels AS label
   RETURN label, SUM(cnt) AS cnt
   ORDER BY label
   
   """.format()

l_result = my_graph.query_unpaginated(l_query)
   #
print(tabulate(l_result, headers = "keys", tablefmt = "psql", showindex = False))


print()


#  List all edge types with counts

l_query  = """

   MATCH (m)-[r]->(n) 
   WITH DISTINCT TYPE(r) AS type, COUNT(r) AS cnt
   RETURN type, cnt
   ORDER BY type
   
   """.format()

l_result = my_graph.query_unpaginated(l_query)
   #
print(tabulate(l_result, headers = "keys", tablefmt = "psql", showindex = False))


#  Sample output,
#



          0/? [?op/s]

+--------------+-----------+
| label        |       cnt |
|--------------+-----------|
| City         |      1343 |
| Comment      | 127297840 |
| Company      |      1575 |
| Continent    |         6 |
| Country      |       111 |
| Forum        |   4103391 |
| Message      | 184685839 |
| Organisation |      7955 |
| Person       |    449165 |
| Place        |      1460 |
| Post         |  57387999 |
| Tag          |     16080 |
| TagClass     |        71 |
| University   |      6380 |
+--------------+-----------+



          0/? [?op/s]

In [None]:

#  Return properties/keys from all vertices

l_query  = """
   MATCH (n)
   RETURN DISTINCT LABELS(n) AS label, KEYS(n) AS my_keys, COUNT(n) AS cnt
   """.format()

l_result = my_graph.query_unpaginated(l_query)
   #
print(tabulate(l_result, headers = "keys", tablefmt = "psql", showindex = False))


#  Return properties/keys from all edges
#  

l_query  = """
   MATCH (n) - [r] -> (m)
   RETURN DISTINCT TYPE(r) AS my_type, KEYS(r) AS my_keys, COUNT(r) AS cnt
   """.format()

l_result = my_graph.query_unpaginated(l_query)
   #
print(tabulate(l_result, headers = "keys", tablefmt = "psql", showindex = False))


#  Sample output,
#



#  Step 01: Actual Routines

In [None]:


from katana.remote.analytics import pagerank
from katana.remote.analytics import louvain_clustering
from katana.remote.analytics import betweenness_centrality


#  Generate a unique prop name so we can run this over and over without edit ..
#
l_cntr = 0

print("--")


In [None]:

#  Page Rank obviously ..
#

#  Generate a unique prop name so we can run this over and over without edit ..
#
l_cntr += 1
   #
l_pr_propname = "pr_" + str(l_cntr).zfill(4)
l_time            = default_timer()


pagerank(my_graph, result_property_name = l_pr_propname, max_iterations = 2, alpha = 0.15)

print("Time in seconds: %d" % (default_timer() - l_time))
   #
print("--")


#  Sample output,
#
#  SF: 003
#     Time in seconds: 6
#
#  SF: 100
#
#



In [None]:

#  Return Page Rank value ..
#

l_query  = """
   MATCH (n)
   RETURN n, n.page_rank
   ORDER BY n.{0} DESC
   LIMIT 10

   """.format(l_pr_propname)

l_result = my_graph.query_unpaginated(l_query)
   #
print(tabulate(l_result, headers = "keys", tablefmt = "psql", showindex = False))

print("--")


#  Sample output,   SF: 100
#


In [None]:

#  Louvain obviously ..
#

l_nodetype = "Tag"
   #
my_graph_l = my_graph.project(node_types = [ l_nodetype ] )

#  Generate a unique prop name so we can run this over and over without edit ..
#
l_cntr += 1
   #
l_lv_propname = "lv_" + str(l_cntr).zfill(4)
l_time            = default_timer()


louvain_clustering(
    my_graph_l,
    result_property_name = l_lv_propname,
    is_symmetric=True,
)

print("Time in seconds: %d" % (default_timer() - l_time))
   #
print("--")


#  Sample output,
#
#  SF: 003
#     Time in seconds: 7
#
#  SF: 100
#
#


In [None]:

#  Return Louvain value ..
#

l_query  = """
   MATCH (n: {0})
   RETURN n, n.{1}
   ORDER BY n.{1} DESC
   LIMIT 10

   """.format(l_nodetype, l_lv_propname)

l_result = my_graph.query_unpaginated(l_query)
   #
print(tabulate(l_result, headers = "keys", tablefmt = "psql", showindex = False))

print("--")


#  Sample output,   SF: 100
#



In [None]:

#  Betweeness Centrality ..
#

#  Generate a unique prop name so we can run this over and over without edit ..
#
l_cntr += 1
   #
l_bc_propname = "bc_" + str(l_cntr).zfill(4)
l_time            = default_timer()


betweenness_centrality(
   my_graph, result_property_name = l_bc_propname, num_sources = 100
   )

print("Time in seconds: %d" % (default_timer() - l_time))
   #
print("--")


#  Sample output,
#
#  SF: 003
#     Time in seconds: 85
#
#  SF: 100
#
#



In [None]:

#  Return Betweenness Centraility value ..
#

l_query  = """
   MATCH (n)
   RETURN n, n.{0}
   ORDER BY n.{0} DESC
   LIMIT 10

   """.format(l_bc_propname)

l_result = my_graph.query_unpaginated(l_query)
   #
print(tabulate(l_result, headers = "keys", tablefmt = "psql", showindex = False))

print("--")


#  Sample output,   SF: 100
#

