In [None]:

#  Notebook with a number of diagnostic/test routines ..
#



#  Setup: Display options ..

In [1]:

%xmode Minimal

#  Setting display options 

import pandas as pd
import numpy  as np
   #
pd.set_option("display.width", 480)

#  Sets horizontal scroll for wide outputs
#
from IPython.display import display, HTML
display(HTML(""))

from tabulate import tabulate


print("--")


Exception reporting mode: Minimal


--


#  Setup: Connectivity ..

In [2]:

from katana import remote

my_client = remote.Client()

print(my_client)
print(my_client.server_version)


<katana.remote.sync_wrappers.Client object at 0x7f8afc342f40>
0.9.0+20230315T145956Z.dbe1b6788.dev


In [3]:

NUM_PARTITIONS  = 3
   #
DB_NAME         = "my_db"
GRAPH_NAME      = "my_graph"

print("--")


--


In [6]:

#  CONNECT TO GRAPH

my_graph, *_ = my_client.get_database(name=DB_NAME).find_graphs_by_name(GRAPH_NAME)

print(my_graph)


<_Graph my_graph, EcCpQwrXXjTA5QjWk8dgLC5Sq1AEFwRQksAWjTYHfZWK, 8>


In [7]:

#  Now that we share servers, get a better list of databases and graphs ..
#

for l_each_d in my_client.databases():
   print("Database Name: %-28s   Id: %s" % (l_each_d.name, l_each_d.database_id))
      #
   for l_each_g in my_client.get_database(name=l_each_d.name).graphs_in_database(): 
      print("   Graph Name: %-28s      Version: %-8d   Id: %s" % (l_each_g.name, l_each_g.version, l_each_g.graph_id))


print(""  )
print("--")


Database Name: my_db                          Id: CnSownE6sbfqDS6w3BDDU91uMLaKjti2kGmjgy6nmR48
   Graph Name: my_graph_BC                       Version: 1          Id: 5NUwpV8GmVPB2FWCGYwWc32GQrPySTb38Cw7A29kQTvx
   Graph Name: my_graph_BB                       Version: 90         Id: 6GYEDQxA2ewmmGV53Vb7GDBx5U9osUzBctJhh6auAdG3
   Graph Name: my_graph                          Version: 8          Id: EcCpQwrXXjTA5QjWk8dgLC5Sq1AEFwRQksAWjTYHfZWK
   Graph Name: my_graph_BD                       Version: 1          Id: G6ryVuSgyQBCkeN5EhT8mpVNSYBTGTYUW22WhLFStgzw
Database Name: default                        Id: FqaYU5KKfHLGew6RbVPgRAVgba5AUByBA7DYbbbn7ENs

--


#  Step 00: Checking our results ..

In [10]:

#  Check counts
#

l_query  = """

   MATCH (n) 
   WITH DISTINCT LABELS(n) AS labels, COUNT(n) AS cnt
   UNWIND labels AS label
   RETURN label, SUM(cnt) AS cnt
   ORDER BY label
   
   """.format()
      #
l_result1 = my_graph.query_unpaginated(l_query)

l_query  = """

   MATCH (m)-[r]->(n) 
   WITH DISTINCT TYPE(r) AS type, COUNT(r) AS cnt
   RETURN type, cnt
   ORDER BY type
   
   """.format()
      #
l_result2 = my_graph.query_unpaginated(l_query)


print()

for l_each in l_result1.itertuples():
   print("Node, %-38s   Number of records in graph: %-8d" % (l_each.label, l_each.cnt) )

print()

for l_each in l_result2.itertuples():
   print("Edge, %-38s   Number of records in graph: %-8d" % (l_each.type, l_each.cnt) )   

print()


print("--")



          0/? [?op/s]

          0/? [?op/s]


Node, CELL_LINE                                Number of records in graph: 1457    
Node, DRUG                                     Number of records in graph: 223     
Node, GDSC                                     Number of records in graph: 266     
Node, GENE                                     Number of records in graph: 719     

Edge, FOR_DRUG                                 Number of records in graph: 476     
Edge, HAS_CELL_LINE                            Number of records in graph: 510720  
Edge, HAS_EXPRESSION_OBSERVATION               Number of records in graph: 782034  
Edge, HAS_METHYLATION_OBSERVATION              Number of records in graph: 853000  
Edge, HAS_MUTATION_OBSERVATION                 Number of records in graph: 24845704

--


In [11]:

#  Output the schema from our graph
#

l_result = my_graph.query_unpaginated("CALL graph.schema() RETURN *")
   #
print(l_result)

print()
print("--")


          0/? [?op/s]

                                   neighbor     nodeType                 properties
0                          ([]::[FOR_DRUG])           []                       TYPE
1                     ([]::[HAS_CELL_LINE])           []                 TYPE,label
2        ([]::[HAS_EXPRESSION_OBSERVATION])           []           TYPE,observation
3       ([]::[HAS_METHYLATION_OBSERVATION])           []           TYPE,observation
4          ([]::[HAS_MUTATION_OBSERVATION])           []  TYPE,mutation,observation
5                                            [CELL_LINE]         id,label,tcga_code
6    ([GENE]::[HAS_EXPRESSION_OBSERVATION])  [CELL_LINE]           TYPE,observation
7   ([GENE]::[HAS_METHYLATION_OBSERVATION])  [CELL_LINE]           TYPE,observation
8      ([GENE]::[HAS_MUTATION_OBSERVATION])  [CELL_LINE]  TYPE,mutation,observation
9                                                 [DRUG]            id,label,smiles
10                                                [GDSC]                   i

#  Step 00:  Using DuckDB

In [None]:


#  Using DuckDB.  See,
#     https://duckdb.org/docs/api/python/overview.html
#

import duckdb

pd_drug = pd_nodes["DRUG"]
   #
duckdb.sql("SELECT * FROM pd_drug ORDER BY id LIMIT 2")
           
           
#  Sample output,
#
#     ┌──────────┬─────────────────────────────────────────────────────────────┬─────────┐
#     │    id    │                           smiles                            │  label  │
#     │ varchar  │                           varchar                           │ varchar │
#     ├──────────┼─────────────────────────────────────────────────────────────┼─────────┤
#     │ 10027278 │ NCC(=O)Nc1ccc(-n2nc(C(F)(F)F)cc2-c2ccc3c(ccc4ccccc43)c2)cc1 │ DRUG    │
#     │ 10074640 │ Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nc(-c2cccnc2)cs1  │ DRUG    │
#     └──────────┴─────────────────────────────────────────────────────────────┴─────────┘

           