# Before executing ensure cytoscape is open. 
### Also note that this code will clear any cytoscape session that is already running 

In [15]:
import pandas as pd

from string import Template
from pandas.io.json import json_normalize
from SPARQLWrapper import SPARQLWrapper, JSON

def query_wikidata(sparql_query, sparql_service_url):
    """
    Query the endpoint with the given query string and return the results as a pandas Dataframe.
    """
    # create the connection to the endpoint
    sparql = SPARQLWrapper(sparql_service_url)
    
    sparql.setQuery(sparql_query)
    sparql.setReturnFormat(JSON)

    # ask for the result
    result = sparql.query().convert()
    return json_normalize(result["results"]["bindings"])

sparql_wikipathways_url ="http://sparql.wikipathways.org/"
wikipathway_identifier = """WP3381""" #Identifier for DNA mismatch repair pathway
g = """MSH1"""
q2 = Template("""PREFIX wp:    <http://vocabularies.wikipathways.org/wp#>

SELECT DISTINCT ?pathway ?interaction ?participants ?DataNodeLabel
WHERE {

   ?pathway a wp:Pathway .
   ?pathway dc:identifier <http://identifiers.org/wikipathways/$identifier> .
   ?interaction dcterms:isPartOf ?pathway . 
   ?interaction a wp:Interaction .
   ?interaction wp:participants ?participants .
   ?participants a wp:DataNode .
   ?participants rdfs:label ?DataNodeLabel .  
}""")
q2 = q2.substitute(identifier = wikipathway_identifier)
mmr = query_wikidata(q2, sparql_wikipathways_url)
gene_list = []
p = pd.DataFrame()
for i in set(mmr['DataNodeLabel.value'].tolist()):
    if " " in i or "," in i or ":" in i:
        continue
    gene = i
    q = Template("""PREFIX wp:      <http://vocabularies.wikipathways.org/wp#>
    PREFIX rdfs:    <http://www.w3.org/2000/01/rdf-schema#>
    PREFIX dcterms: <http://purl.org/dc/terms/>

    SELECT DISTINCT ?pathway str(?label) as ?geneProduct ?pwTitle
    WHERE {
         ?pathway dc:title ?pwTitle .
         ?pathway wp:organismName "Homo sapiens"^^xsd:string .
        ?geneProduct a wp:GeneProduct . 
        ?geneProduct rdfs:label ?label .
        ?geneProduct dcterms:isPartOf ?pathway .
        FILTER regex(str(?label), "$gene"). 
    }""")
    q = q.substitute(gene=gene)
    try:
        p = pd.concat([p,query_wikidata(q, sparql_wikipathways_url)])
        gene_list.append(gene)
    except:
        print("Query returned 0 objects for "+gene)

Query returned 0 objects for dTTP
Query returned 0 objects for PPi
Query returned 0 objects for dATP
Query returned 0 objects for MSH2_HUMAN
Query returned 0 objects for MSH6_HUMAN
Query returned 0 objects for MSH3
Query returned 0 objects for dGTP
Query returned 0 objects for dCTP
Query returned 0 objects for PMS2


In [16]:
p = p.reset_index(drop = True)
p = p[p['geneProduct.value'].isin(gene_list)]
p = p.sort_values(['geneProduct.value', 'pwTitle.value'])
#remove pathways inferred from homo sapien pathways
gene_len = len(p.index.values) - 1

i1 = ''
i2 = ''
for i in range(0, gene_len):
    if i1 == gene_len:
        continue
    i1 = p.index.values[i]
    i2 = p.index.values[i+1]
    g1 = p.ix[i1, 'geneProduct.value']
    p1 = p.ix[i1, 'pwTitle.value']
    g2 = p.ix[i2, 'geneProduct.value']
    p2 = p.ix[i2, 'pwTitle.value']
    if g1 == g2 and p1 == p2:
        p = p.drop(i2)
        gene_len -= 1
        i-=1

In [20]:
cy_df = pd.DataFrame(columns = ["source", "interaction", "target"])
source_node = []
edge_type = []
target_node = []
for i in gene_list:
    source_node.append("Mismatch Repair")
    edge_type.append("Pathway-Gene")
    target_node.append(i)
for index, i in enumerate(p['geneProduct.value']):
    pathway = p.ix[p.index.values[index], 'pwTitle.value']
    if pathway == 'Mismatch repair':
        continue
    source_node.append(i)
    edge_type.append("Gene-Pathway")
    target_node.append(p.ix[p.index.values[index], 'pwTitle.value'])
cy_df['source'] = source_node
cy_df['interaction'] = edge_type
cy_df['target'] = target_node

nodes = source_node
nodes.extend(target_node)
node_df = pd.DataFrame(columns = ['node','type'])
types = []
for i in nodes:
    if i in mmr['DataNodeLabel.value'].tolist():
        types.append('Gene')
    else:
        types.append('Pathway')
node_df['node'] = nodes
node_df['type'] = types

In [21]:
from py2cytoscape.data.cyrest_client import CyRestClient

cy = CyRestClient()
cy.session.delete()
net = cy.network.create_from_dataframe(cy_df, name='Gene pathway interactions')
cy.layout.apply(name='force-directed-cl', network=net)
net.update_node_table(node_df, network_key_col='name', data_key_col='node')

my_style = cy.style.create('Minimal')
new = {
    # Node defaults
    'NODE_FILL_COLOR': '#eeeeff',
    'NODE_SIZE': 20,
    'NODE_BORDER_WIDTH': 0,
    'NODE_TRANSPARENCY': 120,
    'NODE_LABEL_COLOR': 'white',
    ''
    
    # Edge defaults
    'EDGE_WIDTH': 3,
    'EDGE_STROKE_UNSELECTED_PAINT': '#aaaaaa',
    'EDGE_LINE_TYPE': 'SOLID',
    'EDGE_TRANSPARENCY': 120,
    
    # Network defaults
    'NETWORK_BACKGROUND_PAINT': 'black'
}
kv_pair = {
    'Gene-Pathway': 'yellow',
    'Pathway-Gene': 'lightblue'
}
my_style.create_discrete_mapping(column='interaction', 
                               col_type='String', vp='EDGE_STROKE_UNSELECTED_PAINT', mappings=kv_pair)
node_pair = {
    'Gene': 'lightgreen',
    'Pathway': 'pink'
}
my_style.create_discrete_mapping(column='type', 
                               col_type='String', vp='NODE_FILL_COLOR', mappings=node_pair)
my_style.update_defaults(new)
cy.style.apply(my_style, net)
