#  Setup: Display options

In [1]:

#  Setting display options 

import pandas as pd
   #
pd.set_option("display.width", 480)

#  Sets horizontal scroll for wide outputs
#
from IPython.display import display, HTML
display(HTML(""))

from tabulate import tabulate

print("--")


--


In [2]:

#  Used to report timings
#

from timeit import default_timer

print("--")



--


# Setup: Connectivity

In [3]:

from katana import remote
from katana.remote import import_data

my_client = remote.Client()

print(my_client)


<katana.remote.sync_wrappers.Client object at 0x7fb191bdabb0>


In [4]:

NUM_PARTITIONS  = 3
   #
DB_NAME         = "my_db"
GRAPH_NAME      = "my_graph"

print("--")


--


In [12]:

#  CONNECT TO GRAPH

my_graph, *_ = my_client.get_database(name=DB_NAME).find_graphs_by_name(GRAPH_NAME)

print(my_graph)


<_Graph my_graph, 9oxuVL3PJqNwMpBuuDshgohrh4mmgyAwfmSoM1tPXCoH, 1>


In [13]:

#  Now that we share servers, get a better list of databases and graphs ..
#

for l_each_d in my_client.databases():
   print("Database Name: %-28s   Id: %s" % (l_each_d.name, l_each_d.database_id))
      #
   for l_each_g in my_client.get_database(name=l_each_d.name).graphs_in_database(): 
      print("   Graph Name: %-28s      Version: %-8d   Id: %s" % (l_each_g.name, l_each_g.version, l_each_g.graph_id))


print(""  )
print("--")


Database Name: my_db                          Id: 21JDRLcScJPzXu2CsX8hSCCwpuvBdXJbkLKQ7CdcKxV9
   Graph Name: my_graph                          Version: 1          Id: 9oxuVL3PJqNwMpBuuDshgohrh4mmgyAwfmSoM1tPXCoH
   Graph Name: my_graph_BB                       Version: 10         Id: DfmNBHbvwBWi3PzzdrpaBEY7uQ69ofWFh3bwLURyDW1m
Database Name: default                        Id: 2ps7XNc4f8KLRVXwutJzqQVE3bTUdVbHw357hkA6PM5z

--


#  Step 00: Diagnostics

In [14]:

#  List all node types with node counts

l_query  = """

   MATCH (n) 
   WITH DISTINCT LABELS(n) AS labels, COUNT(n) AS cnt
   UNWIND labels AS label
   RETURN label, SUM(cnt) AS cnt
   ORDER BY label
   
   """.format()

l_result = my_graph.query_unpaginated(l_query)
   #
print(tabulate(l_result, headers = "keys", tablefmt = "psql", showindex = False))


print()


#  List all edge types with counts

l_query  = """

   MATCH (m)-[r]->(n) 
   WITH DISTINCT TYPE(r) AS type, COUNT(r) AS cnt
   RETURN type, cnt
   ORDER BY type
   
   """.format()

l_result = my_graph.query_unpaginated(l_query)
   #
print(tabulate(l_result, headers = "keys", tablefmt = "psql", showindex = False))



          0/? [?op/s]

+--------------+-------+
| label        |   cnt |
|--------------+-------|
| City         |  1343 |
| Comment      |   371 |
| Company      |  1575 |
| Continent    |     6 |
| Country      |   111 |
| Forum        |   407 |
| Message      |  3928 |
| Organisation |  7955 |
| Person       |    45 |
| Place        |  1460 |
| Post         |  3557 |
| Tag          | 16080 |
| TagClass     |    71 |
| University   |  6380 |
+--------------+-------+



  self._response = response


          0/? [?op/s]

+----------------+-------+
| type           |   cnt |
|----------------+-------|
| CONTAINER_OF   |  3557 |
| HAS_CREATOR    |  3928 |
| HAS_INTEREST   |  1182 |
| HAS_MEMBER     |  1163 |
| HAS_MODERATOR  |   407 |
| HAS_TAG        |  2347 |
| HAS_TYPE       | 16080 |
| IS_LOCATED_IN  | 11928 |
| IS_PART_OF     |  1454 |
| IS_SUBCLASS_OF |    70 |
| KNOWS          |    58 |
| LIKES          |   395 |
| REPLY_OF       |   371 |
| STUDY_AT       |    37 |
| WORK_AT        |    95 |
+----------------+-------+


In [15]:

#  Return properties/keys from all vertices

l_query  = """
   MATCH (n)
   RETURN DISTINCT LABELS(n) AS label, KEYS(n) AS my_keys, COUNT(n) AS cnt
   """.format()

l_result = my_graph.query_unpaginated(l_query)
   #
print(tabulate(l_result, headers = "keys", tablefmt = "psql", showindex = False))


#  Return properties/keys from all edges
#  

l_query  = """
   MATCH (n) - [r] -> (m)
   RETURN DISTINCT TYPE(r) AS my_type, KEYS(r) AS my_keys, COUNT(r) AS cnt
   """.format()

l_result = my_graph.query_unpaginated(l_query)
   #
print(tabulate(l_result, headers = "keys", tablefmt = "psql", showindex = False))



          0/? [?op/s]

+--------------------------------+-----------------------------------------------------------------------------------------------------------------------+-------+
| label                          | my_keys                                                                                                               |   cnt |
|--------------------------------+-----------------------------------------------------------------------------------------------------------------------+-------|
| ['Message', 'Post']            | ['browserUsed', 'content', 'creationDate', 'id', 'imageFile', 'language', 'length', 'locationIP']                     |  3557 |
| ['Place', 'Country']           | ['id', 'name', 'url']                                                                                                 |   111 |
| ['Place', 'Continent']         | ['id', 'name', 'url']                                                                                                 |     6 |
| ['TagClass']        

          0/? [?op/s]

+----------------+------------------+-------+
| my_type        | my_keys          |   cnt |
|----------------+------------------+-------|
| HAS_CREATOR    | []               |  3928 |
| HAS_MEMBER     | ['creationDate'] |  1163 |
| HAS_INTEREST   | []               |  1182 |
| HAS_TAG        | []               |  2347 |
| WORK_AT        | ['workFrom']     |    95 |
| REPLY_OF       | []               |   371 |
| CONTAINER_OF   | []               |  3557 |
| STUDY_AT       | ['classYear']    |    37 |
| IS_SUBCLASS_OF | []               |    70 |
| IS_PART_OF     | []               |  1454 |
| HAS_TYPE       | []               | 16080 |
| HAS_MODERATOR  | []               |   407 |
| IS_LOCATED_IN  | []               | 11928 |
| KNOWS          | ['creationDate'] |    58 |
| LIKES          | ['creationDate'] |   395 |
+----------------+------------------+-------+


#  Step 01: Actual Routines

In [None]:

#  Page Rank obviously ..
#


from katana.remote.analytics import pagerank

pagerank(graph, result_property_name="page_rank", max_iterations=2, alpha=0.15)
