# Unsupervised prediction of candidate compounds for remyelination

In [1]:
import py2neo
import pandas

## Connect to neo4j

In [2]:
neo = py2neo.Graph('http://localhost:7474/db/data/')

In [3]:
def to_df(results):
    """Convert a py2neo.RecordList to a pandas.DataFrame"""
    return pandas.DataFrame(results.records, columns=results.columns)

## Add differentially expressed genes in oligodendrocyte differentiation

Load [Dugas 2006](https://doi.org/10.1523/jneurosci.2572-06.2006 "Functional Genomic Analysis of Oligodendrocyte Differentiation") top 50 up and top 50 down-regulated genes during oligodendrocyte differentiation.

In [4]:
query = '''
// Load differentially expressed genes in oligodendrocyte differentiation from Dugas et al 2006 (https://doi.org/10.1523/jneurosci.2572-06.2006)
LOAD CSV WITH HEADERS FROM 'https://gist.githubusercontent.com/dhimmel/45bcff9500cd99f85200/raw/fa13c2c96c59a53b5afe9ed02f8deef72813555d/OPC-differentiation-DEGs.tsv' AS line FIELDTERMINATOR '\t'
MATCH (bp:BiologicalProcess)
MATCH (gene:Gene)
WHERE bp.identifier = 'GO:0048709' // oligodendrocyte differentiation
AND gene.name = line.hgnc_symbol_manual
MERGE (bp)-[rel:REGULATES_BPrG]->(gene)
SET rel.source = 'Dugas 2006'
RETURN count(rel)
'''

# Create regulation relationships and count the number of added relationships
neo.cypher.execute_one(query)

88

## Query CrGrBP metpaths ending on oligodendrocyte differentiation

The `REGULATES_BPrG` relationships are from the Dugas 2006 data added above.

In [5]:
query = '''
MATCH (n0:Compound)
OPTIONAL MATCH paths = 
  (n0)-[:UPREGULATES_CuG|:DOWNREGULATES_CdG]-(n1)-[:REGULATES_BPrG]-(n2:BiologicalProcess)
WHERE n2.identifier = 'GO:0048709' // oligodendrocyte differentiation
WITH
  n0 AS source, paths,
  // Extract the degrees along each path
  [
    size((n0)-[:UPREGULATES_CuG|:DOWNREGULATES_CdG]-()),
    size(()-[:UPREGULATES_CuG|:DOWNREGULATES_CdG]-(n1)),
    size((n1)-[:REGULATES_BPrG]-()),
    size(()-[:REGULATES_BPrG]-(n2))
  ] AS degrees
RETURN
  source.identifier AS drugbank_id,
  source.name AS drugbank_name,
  count(paths) AS CrGrBP_path_count,
  sum(reduce(pdp = 1.0, d in degrees| pdp * d ^ -0.5)) AS CrGrBP_DWPC
ORDER BY CrGrBP_DWPC DESC, drugbank_name
'''

regulation_df = to_df(neo.cypher.execute(query))
regulation_df.head()

Unnamed: 0,drugbank_id,drugbank_name,CrGrBP_path_count,CrGrBP_DWPC
0,DB00370,Mirtazapine,1,0.0113
1,DB00491,Miglitol,1,0.010555
2,DB08974,Flubendazole,5,0.009516
3,DB00784,Mefenamic acid,1,0.009009
4,DB00903,Ethacrynic acid,5,0.008531


## Query CbGiGpBP metpaths ending on myelination

The gene at the bolded position (Cb**G**iGpBP) is required to be expressed in the central nervous system.

In [6]:
query = '''
// Find CbGiGpBP where first gene is expressed in the CNS
MATCH (n0:Compound)
OPTIONAL MATCH paths = 
  (n0)-[:BINDS_CbG]-(n1)-[:INTERACTS_GiG]-(n2)-[:PARTICIPATES_GpBP]-(n3:BiologicalProcess)
WHERE n3.identifier = 'GO:0042552' // myelination
AND exists((:Anatomy {identifier: 'UBERON:0001017'})-[:EXPRESSES_AeG]-(n1))
WITH
  n0 AS source, paths,
  // Extract the degrees along each path
  [
    size((n0)-[:BINDS_CbG]-()),
    size(()-[:BINDS_CbG]-(n1)),
    size((n1)-[:INTERACTS_GiG]-()),
    size(()-[:INTERACTS_GiG]-(n2)),
    size((n2)-[:PARTICIPATES_GpBP]-()),
    size(()-[:PARTICIPATES_GpBP]-(n3))
  ] AS degrees
RETURN
  source.identifier AS drugbank_id,
  source.name AS drugbank_name,
  count(paths) AS CbGiGpBP_path_count,
  sum(reduce(pdp = 1.0, d in degrees| pdp * d ^ -0.5)) AS CbGiGpBP_DWPC
ORDER BY CbGiGpBP_DWPC DESC, drugbank_name
'''

target_df = to_df(neo.cypher.execute(query))
target_df.head()

Unnamed: 0,drugbank_id,drugbank_name,CbGiGpBP_path_count,CbGiGpBP_DWPC
0,DB00909,Zonisamide,4,0.002571
1,DB01141,Micafungin,1,0.002277
2,DB00128,L-Aspartic Acid,5,0.00115
3,DB04786,Suramin,4,0.001103
4,DB00786,Marimastat,5,0.001077


## Merge results from previous queries

In [7]:
merged_df = regulation_df.merge(target_df)
merged_df.head()

Unnamed: 0,drugbank_id,drugbank_name,CrGrBP_path_count,CrGrBP_DWPC,CbGiGpBP_path_count,CbGiGpBP_DWPC
0,DB00370,Mirtazapine,1,0.0113,0,0.0
1,DB00491,Miglitol,1,0.010555,0,0.0
2,DB08974,Flubendazole,5,0.009516,0,0.0
3,DB00784,Mefenamic acid,1,0.009009,0,0.0
4,DB00903,Ethacrynic acid,5,0.008531,4,7.8e-05


In [11]:
merged_df.to_csv('data/queries.tsv', sep='\t', index=False, float_format='%.4g')

In [9]:
merged_df.head()

Unnamed: 0,drugbank_id,drugbank_name,CrGrBP_path_count,CrGrBP_DWPC,CbGiGpBP_path_count,CbGiGpBP_DWPC
0,DB00370,Mirtazapine,1,0.0113,0,0.0
1,DB00491,Miglitol,1,0.010555,0,0.0
2,DB08974,Flubendazole,5,0.009516,0,0.0
3,DB00784,Mefenamic acid,1,0.009009,0,0.0
4,DB00903,Ethacrynic acid,5,0.008531,4,7.8e-05
