# Unsupervised prediction of candidate compounds for remyelination

In [1]:
import py2neo
import pandas

## Connect to neo4j

In [2]:
neo = py2neo.Graph('http://localhost:7474/db/data/')

In [3]:
def to_df(results):
    """Convert a py2neo.RecordList to a pandas.DataFrame"""
    return pandas.DataFrame(results.records, columns=results.columns)

## Add differentially expressed genes in oligodendrocyte differentiation

In [4]:
path = 'file:///home/dhimmels/Documents/github/myelinet/data/OPC-differentiation-diffex-genes.tsv'
query = '''
LOAD CSV WITH HEADERS FROM '{path}' AS line FIELDTERMINATOR '\t'
MATCH (bp:BiologicalProcess)
MATCH (gene:Gene)
WHERE bp.identifier = 'GO:0048709' // oligodendrocyte differentiation
AND gene.name = line.gene_symbol
MERGE (bp)-[rel:REGULATES_BPrG]->(gene)
SET rel.L2FC = line.L2FC
SET rel.experiment = line.experiment
SET rel.p_value = line.p_value
RETURN count(rel)
'''.format(path = path)

# Create regulation relationships and count the number of added relationships
neo.cypher.execute_one(query)

298

## Query CrGrBP metpaths ending on oligodendrocyte differentiation

The `REGULATES_BPrG` relationships are from the differential expression data added above.

In [5]:
query = '''
MATCH (n0:Compound)
OPTIONAL MATCH paths = 
  (n0)-[:UPREGULATES_CuG|:DOWNREGULATES_CdG]-(n1)-[:REGULATES_BPrG]-(n2:BiologicalProcess)
WHERE n2.identifier = 'GO:0048709' // oligodendrocyte differentiation
WITH
  n0 AS source, paths,
  // Extract the degrees along each path
  [
    size((n0)-[:UPREGULATES_CuG|:DOWNREGULATES_CdG]-()),
    size(()-[:UPREGULATES_CuG|:DOWNREGULATES_CdG]-(n1)),
    size((n1)-[:REGULATES_BPrG]-()),
    size(()-[:REGULATES_BPrG]-(n2))
  ] AS degrees
RETURN
  source.identifier AS drugbank_id,
  source.name AS drugbank_name,
  count(paths) AS CrGrBP_path_count,
  sum(reduce(pdp = 1.0, d in degrees| pdp * d ^ -0.5)) AS CrGrBP_DWPC
ORDER BY CrGrBP_DWPC DESC, drugbank_name
'''

regulation_df = to_df(neo.cypher.execute(query))
regulation_df.head()

Unnamed: 0,drugbank_id,drugbank_name,CrGrBP_path_count,CrGrBP_DWPC
0,DB00773,Etoposide,14,0.016302
1,DB01045,Rifampicin,1,0.016231
2,DB00631,Clofarabine,14,0.014171
3,DB01254,Dasatinib,17,0.01394
4,DB00602,Ivermectin,18,0.013851


## Query CbGiGpBP metpaths ending on myelination

The gene at the bolded position (Cb**G**iGpBP) is required to be expressed in the central nervous system.

In [6]:
query = '''
// Find CbGiGpBP where first gene is expressed in the CNS
MATCH (n0:Compound)
OPTIONAL MATCH paths = 
  (n0)-[:BINDS_CbG]-(n1)-[:INTERACTS_GiG]-(n2)-[:PARTICIPATES_GpBP]-(n3:BiologicalProcess)
WHERE n3.identifier = 'GO:0042552' // myelination
AND exists((:Anatomy {identifier: 'UBERON:0001017'})-[:EXPRESSES_AeG]-(n1))
WITH
  n0 AS source, paths,
  // Extract the degrees along each path
  [
    size((n0)-[:BINDS_CbG]-()),
    size(()-[:BINDS_CbG]-(n1)),
    size((n1)-[:INTERACTS_GiG]-()),
    size(()-[:INTERACTS_GiG]-(n2)),
    size((n2)-[:PARTICIPATES_GpBP]-()),
    size(()-[:PARTICIPATES_GpBP]-(n3))
  ] AS degrees
RETURN
  source.identifier AS drugbank_id,
  source.name AS drugbank_name,
  count(paths) AS CbGiGpBP_path_count,
  sum(reduce(pdp = 1.0, d in degrees| pdp * d ^ -0.5)) AS CbGiGpBP_DWPC
ORDER BY CbGiGpBP_DWPC DESC, drugbank_name
'''

target_df = to_df(neo.cypher.execute(query))
target_df.head()

Unnamed: 0,drugbank_id,drugbank_name,CbGiGpBP_path_count,CbGiGpBP_DWPC
0,DB00909,Zonisamide,4,0.002521
1,DB01141,Micafungin,1,0.002252
2,DB06663,Pasireotide,2,0.001263
3,DB04786,Suramin,4,0.001093
4,DB00104,Octreotide,2,0.001032


## Merge results from previous queries

In [7]:
merged_df = regulation_df.merge(target_df)
merged_df.head()

Unnamed: 0,drugbank_id,drugbank_name,CrGrBP_path_count,CrGrBP_DWPC,CbGiGpBP_path_count,CbGiGpBP_DWPC
0,DB00773,Etoposide,14,0.016302,1,5e-06
1,DB01045,Rifampicin,1,0.016231,1,4e-06
2,DB00631,Clofarabine,14,0.014171,0,0.0
3,DB01254,Dasatinib,17,0.01394,37,0.000524
4,DB00602,Ivermectin,18,0.013851,0,0.0


In [8]:
merged_df.to_csv('data/queries.tsv', sep='\t', index=False, float_format='%.4g')