# Experimenting with CXDB and Cypher Queries

This notebook contains example Cypher operations to explore and analyze your knowledge graph. Customize the property names and values to match your specific graph structure.

In [7]:
import sys
import os

# Add the parent directory of the current script to the Python path
cwd = os.getcwd()
dirname = os.path.dirname(cwd)
subdir = os.path.join(dirname, 'cxdb')
print(cwd)
print(dirname)
print(subdir)
sys.path.append(dirname)
sys.path.append(subdir)


print(sys.path)

/Users/idekeradmin/Dropbox/GitHub/cxdb/notebooks
/Users/idekeradmin/Dropbox/GitHub/cxdb
/Users/idekeradmin/Dropbox/GitHub/cxdb/cxdb
['/opt/anaconda3/envs/cxdb_env/lib/python312.zip', '/opt/anaconda3/envs/cxdb_env/lib/python3.12', '/opt/anaconda3/envs/cxdb_env/lib/python3.12/lib-dynload', '', '/opt/anaconda3/envs/cxdb_env/lib/python3.12/site-packages', '/Users/idekeradmin/Dropbox/GitHub/cxdb', '/Users/idekeradmin/Dropbox/GitHub/cxdb', '/Users/idekeradmin/Dropbox/GitHub/cxdb', '/Users/idekeradmin/Dropbox/GitHub/cxdb/notebooks/cxdb', '/Users/idekeradmin/Dropbox/GitHub/cxdb', '/Users/idekeradmin/Dropbox/GitHub/cxdb/notebooks/cxdb', '/Users/idekeradmin/Dropbox/GitHub/cxdb', '/Users/idekeradmin/Dropbox/GitHub/cxdb/cxdb']


In [20]:
from cxdb.core import CXDB
from cxdb.ndex import NDExConnector
from cxdb.utils import load_config

test_network_uuid = load_config("TEST", "test_network_uuid", config_path="~/cxdb/test_config.ini")

# Initialize CXDB and load your knowledge graph
cxdb = CXDB()
ndex_connector = NDExConnector(cxdb, config_path="~/cxdb/test_config.ini")
ndex_connector.from_ndex(test_network_uuid)
cxdb.ndex_connector = ndex_connector

# Helper function to run Cypher queries
def run_query(query):
    result = cxdb.execute_cypher(query)
    print(f"Query: {query}")
    print(f"Result: {result}\n")
    return result

In [28]:
def get_property_vocabulary(df):
    property_counts = {}
    for _, row in df.iterrows():
        properties = row['properties']
        for prop in properties.keys():
            if prop in property_counts:
                property_counts[prop] += 1
            else:
                property_counts[prop] = 1
    
    # Add built-in properties
    for column in df.columns:
        if column != 'properties':
            property_counts[column] = len(df)

    return dict(sorted(property_counts.items(), key=lambda x: x[1], reverse=True))

def get_node_property_vocabulary(cxdb_db):
    cxdb_db.ndex_connector.from_ndex(test_network_uuid)
    node_properties = get_property_vocabulary(cxdb_db.nodes)
    
    # Print node property vocabulary
    print("Node Property Vocabulary:")
    for prop, count in node_properties.items():
        print(f"{prop}: {count}")

get_node_property_vocabulary(cxdb)
cxdb.nodes

Node Property Vocabulary:
represents: 33
location: 33
id: 33
name: 33
type: 33
member: 4


Unnamed: 0,id,name,type,properties
0,1,PAMPs,stimulus,"{'represents': 'signor:SIGNOR-ST11', 'location..."
1,2,TLRs,proteinfamily,"{'represents': 'signor:SIGNOR-PF20', 'location..."
2,3,NfKb-p65/p50,complex,"{'represents': 'signor:SIGNOR-C13', 'location'..."
3,4,Immune_response,phenotype,"{'represents': 'signor:SIGNOR-PH17', 'location..."
4,5,Inflammation,phenotype,"{'represents': 'signor:SIGNOR-PH12', 'location..."
5,6,TIRAP,protein,"{'represents': 'uniprot:P58753', 'location': '..."
6,7,TICAM1,protein,"{'represents': 'uniprot:Q8IUC6', 'location': '..."
7,8,DAMPS,stimulus,"{'represents': 'signor:SIGNOR-ST18', 'location..."
8,9,Viral_dsRNA,stimulus,"{'represents': 'signor:SIGNOR-ST21', 'location..."
9,10,IFIH1,protein,"{'represents': 'uniprot:Q9BYX4', 'location': '..."


In [27]:
# 1. Get all node types and their counts
query = "MATCH (n:protein) RETURN n.Represents as type, n.location AS location"
run_query(query)

Query: MATCH (n:protein) RETURN n.Represents as type, n.location AS location
Result: [{'type': None, 'location': 'cytoplasm'}, {'type': None, 'location': 'cytoplasm'}, {'type': None, 'location': 'cytoplasm'}, {'type': None, 'location': 'cytoplasm'}, {'type': None, 'location': 'cytoplasm'}, {'type': None, 'location': 'cytoplasm'}, {'type': None, 'location': 'cytoplasm'}, {'type': None, 'location': 'cytoplasm'}, {'type': None, 'location': 'cytoplasm'}, {'type': None, 'location': 'cytoplasm'}, {'type': None, 'location': 'cytoplasm'}, {'type': None, 'location': 'cytoplasm'}, {'type': None, 'location': 'cytoplasm'}, {'type': None, 'location': 'factor'}, {'type': None, 'location': 'cytoplasm'}, {'type': None, 'location': 'cytoplasm'}, {'type': None, 'location': 'cytoplasm'}, {'type': None, 'location': 'factor'}, {'type': None, 'location': 'cytoplasm'}, {'type': None, 'location': 'cytoplasm'}, {'type': None, 'location': 'cytoplasm'}, {'type': None, 'location': 'cytoplasm'}, {'type': None, 'lo

[{'type': None, 'location': 'cytoplasm'},
 {'type': None, 'location': 'cytoplasm'},
 {'type': None, 'location': 'cytoplasm'},
 {'type': None, 'location': 'cytoplasm'},
 {'type': None, 'location': 'cytoplasm'},
 {'type': None, 'location': 'cytoplasm'},
 {'type': None, 'location': 'cytoplasm'},
 {'type': None, 'location': 'cytoplasm'},
 {'type': None, 'location': 'cytoplasm'},
 {'type': None, 'location': 'cytoplasm'},
 {'type': None, 'location': 'cytoplasm'},
 {'type': None, 'location': 'cytoplasm'},
 {'type': None, 'location': 'cytoplasm'},
 {'type': None, 'location': 'factor'},
 {'type': None, 'location': 'cytoplasm'},
 {'type': None, 'location': 'cytoplasm'},
 {'type': None, 'location': 'cytoplasm'},
 {'type': None, 'location': 'factor'},
 {'type': None, 'location': 'cytoplasm'},
 {'type': None, 'location': 'cytoplasm'},
 {'type': None, 'location': 'cytoplasm'},
 {'type': None, 'location': 'cytoplasm'},
 {'type': None, 'location': 'cytoplasm'}]

In [30]:
# 2. Find nodes with a specific property value
PROPERTY_NAME = "location"
PROPERTY_VALUE = "cytoplasm"
query = f"MATCH (n:protein) WHERE n.{PROPERTY_NAME} = '{PROPERTY_VALUE}' RETURN n.location AS name, n.type AS type"
run_query(query)

AttributeError: 'str' object has no attribute 'condition'

In [None]:
# 3. Find connections between two specific nodes
NODE1_NAME = "node1_name"
NODE2_NAME = "node2_name"
query = f"MATCH (a {{name: '{NODE1_NAME}'}})-[r]-(b {{name: '{NODE2_NAME}'}}) RETURN a.name AS node1, type(r) AS relationship, b.name AS node2"
run_query(query)

In [None]:
# 4. Find nodes with the highest degree (most connections)
query = "MATCH (n)-[r]-() RETURN n.name AS node_name, n.type AS node_type, COUNT(r) AS degree ORDER BY degree DESC LIMIT 5"
run_query(query)

In [None]:
# 5. Find the shortest path between two nodes
START_NODE = "start_node_name"
END_NODE = "end_node_name"
query = f"MATCH p=shortestPath((a {{name: '{START_NODE}'}})-[*]-(b {{name: '{END_NODE}'}})) RETURN [node in nodes(p) | node.name] AS path"
run_query(query)

In [None]:
# 6. Find subgraphs with a specific pattern
NODE_TYPE1 = "type1"
NODE_TYPE2 = "type2"
RELATIONSHIP_TYPE = "relationship_type"
query = f"MATCH (a:{NODE_TYPE1})-[r:{RELATIONSHIP_TYPE}]->(b:{NODE_TYPE2}) RETURN a.name AS {NODE_TYPE1}, b.name AS {NODE_TYPE2}, r.property AS relationship_property LIMIT 10"
run_query(query)