In [None]:

#  Notebook to create and load the DeepCDR graph ..
#
#  Be advised;
#
#     .  We load the graph using Pandas dataframes from the Jupyter container.
#        This is about the slowest way to do this.
#        We use this approach because of ease; we do not need to host the data
#        files to load, create and use authorization tokens, etc.
#
#        In production (or to run faster) we should load using Dask dataframes.
#



#  Setup: Display options ..

In [None]:

%xmode Minimal

#  Setting display options 

import pandas as pd
import numpy  as np
   #
pd.set_option("display.width", 480)

#  Sets horizontal scroll for wide outputs
#
from IPython.display import display, HTML
display(HTML(""))

from tabulate import tabulate


print("--")


#  Setup: Connectivity ..

In [None]:

from katana import remote

my_client = remote.Client()

print(my_client)
print(my_client.server_version)


In [None]:

NUM_PARTITIONS  = 3
   #
DB_NAME         = "my_db"
GRAPH_NAME      = "my_graph"

print("--")


In [None]:
# ##################################################################

In [None]:

#  DELETE ALL GRAPHS

for l_database in my_client.databases():
   for l_graph in my_client.get_database(name = l_database.name).graphs_in_database():
      l_handle = my_client.get_database(name = l_database.name).get_graph_by_id(id = l_graph.graph_id)
      l_handle.delete()

for l_graph in my_client.graphs():
   print("GRAPH ID: ", l_graph.graph_id, "      GRAPH Version: ", l_graph.version)

print("--")


In [49]:

#  DELETE ONE SPECIFIC GRAPH ONLY

for l_database in my_client.databases():
   for l_graph in my_client.get_database(name = l_database.name).graphs_in_database():
      if ((l_database.name == DB_NAME) and (l_graph.name == GRAPH_NAME)):   
         l_handle = my_client.get_database(name = l_database.name).get_graph_by_id(id = l_graph.graph_id)
         l_handle.delete()

for l_graph in my_client.graphs():
   print("GRAPH ID: ", l_graph.graph_id, "   GRAPH Version: ", l_graph.version, "   Graph Name: ", l_graph.name)

print("--")


GRAPH ID:  5NUwpV8GmVPB2FWCGYwWc32GQrPySTb38Cw7A29kQTvx    GRAPH Version:  1    Graph Name:  my_graph_BC
GRAPH ID:  6GYEDQxA2ewmmGV53Vb7GDBx5U9osUzBctJhh6auAdG3    GRAPH Version:  97    Graph Name:  my_graph_BB
GRAPH ID:  7moQEUJpTBLCoLgABUjjAyFaSMMdanvqSmde4HBL9qpX    GRAPH Version:  2    Graph Name:  my_graph_BD
--


In [None]:

#  DELETE ALL DATABASES

for l_database in my_client.databases():
   if (l_database.name != "default"):
      my_client.get_database(name = l_database.name).delete_database()
      print("--")

for l_database in my_client.databases():
   print("DB ID: ", l_database.database_id, "     DB Name: ", l_database.name)


In [None]:

#  CREATE DATABASE

my_database = my_client.create_database(name = DB_NAME)

print(my_database.database_id)


In [50]:

#  CREATE GRAPH
#
my_graph = my_client.get_database(name = DB_NAME).create_graph(name = GRAPH_NAME, num_partitions = NUM_PARTITIONS)

print(my_graph)


<_Graph my_graph, EfDAPbeB2xFpJtayrQpM2gxN42vpPcPhWeHBiQvr2Yra, 0>


In [None]:

#  CONNECT TO GRAPH

my_graph, *_ = my_client.get_database(name=DB_NAME).find_graphs_by_name(GRAPH_NAME)

print(my_graph)


In [None]:
# ##################################################################

In [None]:

#  Now that we share servers, get a better list of databases and graphs ..
#

for l_each_d in my_client.databases():
   print("Database Name: %-28s   Id: %s" % (l_each_d.name, l_each_d.database_id))
      #
   for l_each_g in my_client.get_database(name=l_each_d.name).graphs_in_database(): 
      print("   Graph Name: %-28s      Version: %-8d   Id: %s" % (l_each_g.name, l_each_g.version, l_each_g.graph_id))


print(""  )
print("--")


#  Step 01: Process CSV files ..

In [None]:

#  Read the CSVs as Step 01 towards populating our graph
#


l_folder   = "./10_Data/"


#  Here's the schema and sample data for the node files,
#
#     ==> 21_drug.txt <==
#     id|smiles|label
#     10027278|NCC(=O)Nc1ccc(-n2nc(C(F)(F)F)cc2-c2ccc3c(ccc4ccccc43)c2)cc1|DRUG
#     
#     ==> 22_gdsc.txt <==
#     id|label
#     GDSC:1|GDSC
#     
#     ==> 23_cell_line.txt <==
#     id|tcga_code|label
#     ACH-000001|OV|CELL_LINE
#     
#     ==> 24_gene.txt <==
#     id
#     AKT2

#  There is null data in CELL_LINE.label
#

l_nodes    = [
   { "file": "21_drug.txt.gz"      , "label": "DRUG"      },
   { "file": "22_gdsc.txt.gz"      , "label": "GDSC"      },
   { "file": "23_cell_line.txt.gz" , "label": "CELL_LINE" },
   { "file": "24_gene.txt.gz"      , "label": "GENE"      },
   ]

pd_nodes = {}
   #
print("Just Nodes ..")
print()
   #
for l_each in l_nodes:
   pd_nodes[l_each["label"]] = pd.read_csv( (l_folder + l_each["file"]), header = "infer", sep="|",
       dtype={"id": "string", "smiles": "string", "label": "string", "tcga_code": "string"})
   print("Number of records: %-8d   %-16s   %-32s" % (len(pd_nodes[l_each["label"]]), l_each["label"], l_each["file"]))
   print(tabulate(pd_nodes[l_each["label"]].head(2), headers='keys', tablefmt='psql', showindex=False))
   print()

    
   ######################################################### 
    
    
#  Here's the schema and sample data for the first group of edge files,
#
#     ==> 31_gdsc_drug.txt <==
#     START_ID|END_ID|TYPE
#     GDSC:1|176870|FOR_DRUG
#     
#     ==> 32_gdsc_cell_line.txt <==
#     START_ID|END_ID|label|TYPE
#     GDSC:1|ACH-002137||HAS_CELL_LINE
#     
#     ==> 33_cell_line_gene_expression.txt <==
#     START_ID|END_ID|observation|TYPE
#     ACH-000828|LASP1|9.39347626575|HAS_EXPRESSION_OBSERVATION
#     
#     ==> 34_cell_line_gene_methylation.txt <==
#     START_ID|END_ID|observation|TYPE
#     ACH-000001|ABL1|0.00153|HAS_METHYLATION_OBSERVATION

l_edges1   = [
   { "file": "31_gdsc_drug.txt.gz"                 , "type": "FOR_DRUG"                   },
   { "file": "32_gdsc_cell_line.txt.gz"            , "type": "HAS_CELL_LINE"              },                       #  Alternately called "HAS" and "FOR"
   { "file": "33_cell_line_gene_expression.txt.gz" , "type": "HAS_EXPRESSION_OBSERVATION" },
   { "file": "34_cell_line_gene_methylation.txt.gz", "type": "HAS_METHYLATION_OBSERVATION"},
   ]

pd_edges1 = {}
   #
print()
print("Just Edges group 1 ..")
print()
   #
for l_each in l_edges1:
   pd_edges1[l_each["type"]] = pd.read_csv( (l_folder + l_each["file"]), header = "infer", sep="|",
      dtype={"START_ID": "string", "END_ID": "string", "TYPE": "string", "label": "string", "observation": "float64"})
   print("Number of records: %-8d   %-32s   %-32s" % (len(pd_edges1[l_each["type"]]), l_each["type"], l_each["file"]))
   print(tabulate(pd_edges1[l_each["type"]].head(2), headers='keys', tablefmt='psql', showindex=False))
   print()
    
    
   ######################################################### 


#  Sample data,
#
#     START_ID|END_ID|observation|mutation|TYPE
#     ACH-000381|FH|0.0|FH.1:241671906|HAS_MUTATION_OBSERVATION

l_edges2   = [ 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58 ,59, 60, 61, 62, 63, 64, 65 ]
   #
pd_edges2 = {}

print()
print("Just Edges group 2 ..")
print()
   #
for l_each in l_edges2:
   l_file = (str(l_each) + "_cell_line_gene_mutation.txt.gz")
      #
   pd_edges2[str(l_each)] = pd.read_csv( (l_folder + l_file), header = "infer", sep="|",
      dtype={"START_ID": "string", "END_ID": "string", "TYPE": "string", "mutation": "string", "observation": "float64"})
   print("Number of records: %-8d   %-32s   %-32s" % (len(pd_edges2[str(l_each)]), "HAS_MUTATION_OBSERVATION", l_file))
   #  print(tabulate(pd_edges2[str(l_each)].head(2), headers='keys', tablefmt='psql', showindex=False))
   #  print()

print()
print("Concatenating ..")
print()
   #
pd_edges2_all = pd.concat(pd_edges2).droplevel(0).reindex()                                          #  The concat() will give us a multi-level index
pd_edges2     = {}                                                                                   #  which Dask, and thus the graph importer, will
   #
print("Number of records: %-8d   %-32s" % (len(pd_edges2_all), "HAS_MUTATION_OBSERVATION"))
print(tabulate(pd_edges2_all.head(2), headers='keys', tablefmt='psql', showindex=False))
print()

print("--")


#  Step 00: Sample output from above ..

In [None]:


#  Sample output,
#
#     Just Nodes ..
#     
#     Number of records: 223        DRUG               21_drug.txt.gz                  
#     +----------+-------------------------------------------------------------+---------+
#     |       id | smiles                                                      | label   |
#     |----------+-------------------------------------------------------------+---------|
#     | 10027278 | NCC(=O)Nc1ccc(-n2nc(C(F)(F)F)cc2-c2ccc3c(ccc4ccccc43)c2)cc1 | DRUG    |
#     | 10074640 | Cc1ccc(NC(=O)c2ccc(CN3CCN(C)CC3)cc2)cc1Nc1nc(-c2cccnc2)cs1  | DRUG    |
#     +----------+-------------------------------------------------------------+---------+
#     
#     Number of records: 266        GDSC               22_gdsc.txt.gz                  
#     +-----------+---------+
#     | id        | label   |
#     |-----------+---------|
#     | GDSC:1    | GDSC    |
#     | GDSC:1001 | GDSC    |
#     +-----------+---------+
#     
#     Number of records: 1457       CELL_LINE          23_cell_line.txt.gz             
#     +------------+-------------+-----------+
#     | id         | tcga_code   | label     |
#     |------------+-------------+-----------|
#     | ACH-000001 | OV          | CELL_LINE |
#     | ACH-000002 | LAML        | CELL_LINE |
#     +------------+-------------+-----------+
#     
#     Number of records: 719        GENE               24_gene.txt.gz                  
#     +-------+
#     | id    |
#     |-------|
#     | AKT2  |
#     | EPAS1 |
#     +-------+
#     
#     
#     Just Edges group 1 ..
#     
#     Number of records: 266        FOR_DRUG                           31_gdsc_drug.txt.gz             
#     +------------+----------+----------+
#     | START_ID   |   END_ID | TYPE     |
#     |------------+----------+----------|
#     | GDSC:1     |   176870 | FOR_DRUG |
#     | GDSC:1001  |    65110 | FOR_DRUG |
#     +------------+----------+----------+
#     
#     Number of records: 257754     FOR_CELL_LINE                      32_gdsc_cell_line.txt.gz        
#     +------------+------------+---------+---------------+
#     | START_ID   | END_ID     | label   | TYPE          |
#     |------------+------------+---------+---------------|
#     | GDSC:1     | ACH-002137 | <NA>    | HAS_CELL_LINE |
#     | GDSC:1     | ACH-000474 | <NA>    | HAS_CELL_LINE |
#     +------------+------------+---------+---------------+
#     
#     Number of records: 391017     HAS_EXPRESSION_OBSERVATION         33_cell_line_gene_expression.txt.gz
#     +------------+----------+---------------+----------------------------+
#     | START_ID   | END_ID   |   observation | TYPE                       |
#     |------------+----------+---------------+----------------------------|
#     | ACH-000828 | LASP1    |     9.39348   | HAS_EXPRESSION_OBSERVATION |
#     | ACH-000828 | HOXA11   |     0.0426443 | HAS_EXPRESSION_OBSERVATION |
#     +------------+----------+---------------+----------------------------+
#     
#     Number of records: 426500     HAS_METHYLATION_OBSERVATION        34_cell_line_gene_methylation.txt.gz
#     +------------+----------+---------------+-----------------------------+
#     | START_ID   | END_ID   |   observation | TYPE                        |
#     |------------+----------+---------------+-----------------------------|
#     | ACH-000001 | ABL1     |       0.00153 | HAS_METHYLATION_OBSERVATION |
#     | ACH-000001 | ABL1     |       0.00591 | HAS_METHYLATION_OBSERVATION |
#     +------------+----------+---------------+-----------------------------+
#     
#     
#     Just Edges group 2 ..
#     
#     Number of records: 500000     HAS_MUTATION_OBSERVATION           41_cell_line_gene_mutation.txt.gz
#     Number of records: 500000     HAS_MUTATION_OBSERVATION           42_cell_line_gene_mutation.txt.gz
#        ...
#     Number of records: 500000     HAS_MUTATION_OBSERVATION           63_cell_line_gene_mutation.txt.gz
#     Number of records: 500000     HAS_MUTATION_OBSERVATION           64_cell_line_gene_mutation.txt.gz
#     Number of records: 422852     HAS_MUTATION_OBSERVATION           65_cell_line_gene_mutation.txt.gz
#     
#     Concatenating ..
#     
#     Number of records: 12422852   HAS_MUTATION_OBSERVATION        
#     +------------+----------+---------------+--------------------+--------------------------+
#     | START_ID   | END_ID   |   observation | mutation           | TYPE                     |
#     |------------+----------+---------------+--------------------+--------------------------|
#     | ACH-000828 | TNFRSF14 |             0 | TNFRSF14.1:2488170 | HAS_MUTATION_OBSERVATION |
#     | ACH-000828 | TNFRSF14 |             0 | TNFRSF14.1:2489805 | HAS_MUTATION_OBSERVATION |
#     +------------+----------+---------------+--------------------+--------------------------+



#  Step 02:  Create/Load graph ..

In [None]:

#  At this point we have several Pandas DataFrames; import them into the graph-
#
#     Why use Pandas DataFrames versus Dask ?
#     
#     .  The data is small enough
#     .  Dask DataFrames currently (beta) have to be sourced from a shared/public drive
#


from katana.remote import import_data


#  Just nodes
#
with import_data.DataFrameImporter(my_graph) as df_importer:   
   df_importer.nodes_dataframe(
      pd_nodes["DRUG"],
      id_column             = "id",
      id_space              = "DRUG",  
      label                 = "DRUG",  
      ) 
   df_importer.nodes_dataframe(
      pd_nodes["GDSC"],
      id_column             = "id",
      id_space              = "GDSC",  
      label                 = "GDSC",  
      ) 
   df_importer.nodes_dataframe(
      pd_nodes["CELL_LINE"],
      id_column             = "id",
      id_space              = "CELL_LINE",  
      label                 = "CELL_LINE",  
      ) 
   df_importer.nodes_dataframe(
      pd_nodes["GENE"],
      id_column             = "id",
      id_space              = "GENE",  
      label                 = "GENE",  
      ) 
   df_importer.insert()
    
               
#  Just edges
#
with import_data.DataFrameImporter(my_graph) as df_importer:   
   df_importer.edges_dataframe(
      pd_edges1["FOR_DRUG"], 
      source_id_space       = "GDSC", 
      destination_id_space  = "DRUG",   
      source_column         = "START_ID",
      destination_column    = "END_ID",
      type                  = "FOR_DRUG"
      )
   df_importer.edges_dataframe(
      pd_edges1["HAS_CELL_LINE"], 
      source_id_space       = "GDSC", 
      destination_id_space  = "CELL_LINE",   
      source_column         = "START_ID",
      destination_column    = "END_ID",
      type                  = "HAS_CELL_LINE"                        #  Alternately called "HAS" and "FOR"
      )
   df_importer.edges_dataframe(
      pd_edges1["HAS_EXPRESSION_OBSERVATION"], 
      source_id_space       = "CELL_LINE", 
      destination_id_space  = "GENE",   
      source_column         = "START_ID",
      destination_column    = "END_ID",
      type                  = "HAS_EXPRESSION_OBSERVATION"
      )
   df_importer.edges_dataframe(
      pd_edges1["HAS_METHYLATION_OBSERVATION"], 
      source_id_space       = "CELL_LINE", 
      destination_id_space  = "GENE",   
      source_column         = "START_ID",
      destination_column    = "END_ID",
      type                  = "HAS_METHYLATION_OBSERVATION"
      )
   df_importer.node_id_property_name("id")
   df_importer.insert()
    
    
print()    
print("This last dataframe is large, and takes a many minutes")    
print("before you will see any output/progress ..")    
print()    
print("Further, we are loading using Pandas dataframes from the")    
print("Jupyter container, which is the slowest choice. For speed,")    
print("we should be using Dask dataframes loaded from files")    
print("accessible from the KGIP worker nodes.")    
print()    

    
with import_data.DataFrameImporter(my_graph) as df_importer:   
   df_importer.edges_dataframe(
      pd_edges2_all,
      source_id_space       = "CELL_LINE", 
      destination_id_space  = "GENE",   
      source_column         = "START_ID",
      destination_column    = "END_ID",
      type                  = "HAS_MUTATION_OBSERVATION"
      )

   df_importer.node_id_property_name("id")
   df_importer.insert()

print("")
print("--")



          0/? [?op/s]

          0/? [?op/s]


This last dataframe is large, and takes a many minutes
before you will see any output/progress ..

Further, we are loading using Pandas dataframes from the
Jupyter container, which is the slowest choice. For speed,
we should be using Dask dataframes loaded from files
accessible from the KGIP worker nodes.



#  Step 00: For additional diagnostics, move to the next notebook ..

In [None]:

#  Check the above, runs counts
#

l_query  = """
   MATCH (n) 
   WITH DISTINCT LABELS(n) AS temp, COUNT(n) AS tempCnt
   UNWIND temp AS label
   RETURN label, SUM(tempCnt) AS cnt
   ORDER BY label
   """.format()
      #
l_result1 = my_graph.query_unpaginated(l_query)

l_query  = """
   MATCH (m)-[r]->(n) 
   WITH DISTINCT TYPE(r) AS type, COUNT(r) AS cnt
   RETURN type, cnt
   ORDER BY type
   """.format()
      #
l_result2 = my_graph.query_unpaginated(l_query)

print()
   #
for l_each in l_result1.itertuples():
   print("Node, %-38s   Number of records in graph: %-8d   Number of records from file: %-8d" % (l_each.label, l_each.cnt, len(pd_nodes[l_each.label]) ))
      #
print()

print()
   #
for l_each in l_result2.itertuples():
   if (l_each.type == "HAS_MUTATION_OBSERVATION"):
      print("Edge, %-38s   Number of records in graph: %-8d   Number of records from file: %-8d" % (l_each.type, l_each.cnt, len(pd_edges2_all         ) ))   
   else:
      print("Edge, %-38s   Number of records in graph: %-8d   Number of records from file: %-8d" % (l_each.type, l_each.cnt, len(pd_edges1[l_each.type]) ))   
      #
print()

print("--")


#  Sample output,
#
#     Node, CELL_LINE                                Number of records in graph: 1457       Number of records from file: 1457    
#     Node, DRUG                                     Number of records in graph: 223        Number of records from file: 223     
#     Node, GDSC                                     Number of records in graph: 266        Number of records from file: 266     
#     Node, GENE                                     Number of records in graph: 719        Number of records from file: 719     
#     
#     Edge, FOR_DRUG                                 Number of records in graph: 238        Number of records from file: 266     
#     Edge, HAS_CELL_LINE                            Number of records in graph: 255360     Number of records from file: 257754  
#     Edge, HAS_EXPRESSION_OBSERVATION               Number of records in graph: 391017     Number of records from file: 391017  
#     Edge, HAS_METHYLATION_OBSERVATION              Number of records in graph: 426500     Number of records from file: 426500  
#     Edge, HAS_MUTATION_OBSERVATION                 Number of records in graph: 12422852   Number of records from file: 12422852



#  Step 03: Create bi-directional edges

In [None]:

#  Above, we created edges in one direction.
#
#  Here, we use Cypher to create bi-directional edges
#

def f_getedge():
    
   l_query  = """
   
      MATCH (n) - [r] -> (m) 
      WITH DISTINCT TYPE(r) AS edge_type, COUNT(r) AS edge_count, LABELS(n) AS src_node, LABELS(m) AS dst_node
      RETURN edge_type, edge_count, src_node, dst_node
      ORDER BY edge_type
      
      """.format()
   
   l_result = my_graph.query_unpaginated(l_query)
      #
   return l_result


l_result = f_getedge()
   #
print(tabulate(l_result, headers = "keys", tablefmt = "psql", showindex = False))


   ###
    

#  Loop thru the result set from above.
#
#  Two assumptions are made,
#
#     .  You can create the same edge multiple times, which can be bad for
#        any analytics you run.
#        We don't check for that here.
#
#     .  Vertices can have multiple lebels, hence this property coming back 
#        as an array. We process just the first label.
#

l_cntr = 0
   #
for l_each in l_result.itertuples():

   l_cntr += 1
      #
   print("Create Edge %d of %d: %-32s   %s  -->  %s" % (l_cntr, len(l_result), l_each.edge_type, l_each.src_node[0], l_each.dst_node[0]) )
      #
   l_query  = """
   
      MATCH (n: {0}) - [r: {1}] -> (m: {2}) 
      WITH n, m 
      CREATE (m) - [e: {1}] -> (n)
      
      """.format(l_each.src_node[0], l_each.edge_type, l_each.dst_node[0])
         #
   l_result2 = my_graph.query_unpaginated(l_query)
   print()
    
    
l_result = f_getedge()
   #
print(tabulate(l_result, headers = "keys", tablefmt = "psql", showindex = False))


print("--")


#  Sample output,
#
#     +-----------------------------+--------------+---------------+---------------+
#     | edge_type                   |   edge_count | src_node      | dst_node      |
#     |-----------------------------+--------------+---------------+---------------|
#     | FOR_DRUG                    |          238 | ['GDSC']      | ['DRUG']      |
#     | HAS_CELL_LINE               |       255360 | ['GDSC']      | ['CELL_LINE'] |
#     | HAS_EXPRESSION_OBSERVATION  |       391017 | ['CELL_LINE'] | ['GENE']      |
#     | HAS_METHYLATION_OBSERVATION |       426500 | ['CELL_LINE'] | ['GENE']      |
#     | HAS_MUTATION_OBSERVATION    |     12422852 | ['CELL_LINE'] | ['GENE']      |
#     +-----------------------------+--------------+---------------+---------------+
#     
#     Create Edge 1 of 5: FOR_DRUG                           GDSC  -->  DRUG
#     Create Edge 2 of 5: HAS_CELL_LINE                      GDSC  -->  CELL_LINE
#     Create Edge 3 of 5: HAS_EXPRESSION_OBSERVATION         CELL_LINE  -->  GENE
#     Create Edge 4 of 5: HAS_METHYLATION_OBSERVATION        CELL_LINE  -->  GENE
#     Create Edge 5 of 5: HAS_MUTATION_OBSERVATION           CELL_LINE  -->  GENE
#     
#     +-----------------------------+--------------+---------------+---------------+
#     | edge_type                   |   edge_count | src_node      | dst_node      |
#     |-----------------------------+--------------+---------------+---------------|
#     | FOR_DRUG                    |          238 | []            | []            |
#     | FOR_DRUG                    |          238 | ['GDSC']      | ['DRUG']      |
#     | HAS_CELL_LINE               |       255360 | ['GDSC']      | ['CELL_LINE'] |
#     | HAS_CELL_LINE               |       255360 | []            | []            |
#     | HAS_EXPRESSION_OBSERVATION  |       391017 | []            | []            |
#     | HAS_EXPRESSION_OBSERVATION  |       391017 | ['CELL_LINE'] | ['GENE']      |
#     | HAS_METHYLATION_OBSERVATION |       426500 | ['CELL_LINE'] | ['GENE']      |
#     | HAS_METHYLATION_OBSERVATION |       426500 | []            | []            |
#     | HAS_MUTATION_OBSERVATION    |     12422852 | []            | []            |
#     | HAS_MUTATION_OBSERVATION    |     12422852 | ['CELL_LINE'] | ['GENE']      |
#     +-----------------------------+--------------+---------------+---------------+

