In [79]:
from py2neo import Graph,Node, Relationship
import pandas as pd
import csv

In [164]:
# Link with neo4j database
# Please change to your own configuration
graph = Graph(
host="127.0.0.1",
port=7687,
user="neo4j",
password="123456789"
)

In [174]:
#Import data to neo4j
# Read data prepared
with open('Prepared_data.csv', 'r') as file:
    reader = csv.DictReader(file)
    for row in reader:
        # Create dieasea nodes
        disease_node = Node("Disease", name=row['Name'])
        graph.merge(disease_node, "Disease", "name")
        
        # Creating and linking to other clinical ontologies
        info_nodes = {}
        for column in ['ICD-10', 'OMIM', 'MONDO', 'EFO', 'UMLS', 'NCIT', 'DOID', 'Orphanet']:
            if row[column]:
                info_node = Node(column.upper(), code=row[column])
                graph.merge(info_node, column.upper(), "code")
                info_nodes[column] = info_node
                
                # Creatingrelationship
                relationship = Relationship(disease_node, "HAS_" + column.upper(), info_node)
                graph.merge(relationship)

        # Creating additional relationships between ontologies
        if 'UMLS' in info_nodes:
            for target in ['ICD-10', 'NCIT', 'DOID', 'OMIM']:
                if target in info_nodes:
                    relationship = Relationship(info_nodes['UMLS'], "INCLUDES", info_nodes[target])
                    graph.merge(relationship)

        if 'MONDO' in info_nodes:
            for target in ['OMIM', 'DOID', 'Orphanet', 'EFO']:
                if target in info_nodes:
                    relationship = Relationship(info_nodes['MONDO'], "INCLUDES", info_nodes[target])
                    graph.merge(relationship)

In [176]:
#Define a function that can search ontologies code by disease name
def fetch_disease_name_by_codes_with_relations(disease_name):
    # Cypher query to find all nodes that are related to a known disease name
    query = f"""
    MATCH (d:Disease {{name: '{disease_name}'}})-[r]->(related)
    RETURN d.name as disease_name, type(r) as relationship_type, related.code as related_code
    """
    # Execution of Cypher queries
    results = graph.run(query).data()
    # Check for results
    if not results:
        print("No related codes found.")
        return
    # Print the information found
    for result in results:
        print(f"{result['disease_name']} {result['relationship_type']} code:{result['related_code']}")
    # Get a list of related codes
    related_codes = [result['related_code'] for result in results]
    # Cypher queries to find additional relationships between related codes
    extra_query = """
    MATCH (a)-[r:INCLUDES]->(b)
    WHERE a.code IN $codes OR b.code IN $codes
    RETURN type(r) as relationship_type, labels(a) as from_label, labels(b) as to_label
    """
    # Execution of Cypher queries
    extra_results = graph.run(extra_query, codes=related_codes).data()
    # Print additional relationships found
    for result in extra_results:
            print(f"{result['from_label']} {result['relationship_type']}{result['to_label']}")

# Define functions to get relevant disease names and other information through known code
def fetch_all_by_known_code(known_code):
    # Cypher query to find names of diseases with known code associations
    find_disease_query = f"""
    MATCH (d:Disease)-[r]->(related {{code: '{known_code}'}})
    RETURN d.name as disease_name
    """
    # Execution of Cypher queries
    disease_result = graph.run(find_disease_query).data()
    # Check for results
    if not disease_result:
        print("No matching disease found.")
        return
    # Get the name of the disease found
    disease_name = disease_result[0]['disease_name']
    # Use the name of the disease found to get all other relevant information and relationships
    fetch_disease_name_by_codes_with_relations(disease_name)

In [177]:
if __name__ == "__main__":
    #Import ontologies code want to research
    fetch_all_by_known_code("")

No matching disease found.


In [178]:
#Example
if __name__ == "__main__":
    fetch_all_by_known_code("G23.1")

Progressive supranuclear palsy-parkinsonism syndrome HAS_ORPHANET code:Orphanet_240085
Progressive supranuclear palsy-parkinsonism syndrome HAS_UMLS code:CN201680
Progressive supranuclear palsy-parkinsonism syndrome HAS_MONDO code:MONDO_0009839
Progressive supranuclear palsy-parkinsonism syndrome HAS_OMIM code:OMIM_260540
Progressive supranuclear palsy-parkinsonism syndrome HAS_ICD-10 code:G23.1
['UMLS'] INCLUDES['ICD-10']
['UMLS'] INCLUDES['OMIM']
['MONDO'] INCLUDES['OMIM']
['MONDO'] INCLUDES['ORPHANET']
['UMLS'] INCLUDES['ICD-10']
['UMLS'] INCLUDES['ICD-10']
['UMLS'] INCLUDES['ICD-10']
['UMLS'] INCLUDES['ICD-10']
['UMLS'] INCLUDES['ICD-10']
['UMLS'] INCLUDES['ICD-10']


In [179]:
#Define a function that can search ontologies code by disease name
def fetch_codes_by_disease_name_with_relations(known_disease_name):
    # Cypher query to find all nodes that are related to a known disease name
    query = f"""
    MATCH (d:Disease {{name: '{known_disease_name}'}})-[r]->(related)
    RETURN d.name as disease_name, type(r) as relationship_type, related.code as related_code
    """
    # Execution of Cypher queries
    results = graph.run(query).data()
    # Check for results
    if not results:
        print("No related codes found.")
        return
    # Print the information found
    for result in results:
        print(f"{result['disease_name']} {result['relationship_type']} code:{result['related_code']}")
    
    related_codes = [result['related_code'] for result in results]
    # Cypher queries to find additional relationships between related codes
    extra_query = """
    MATCH (a)-[r:INCLUDES]->(b)
    WHERE a.code IN $codes OR b.code IN $codes
    RETURN type(r) as relationship_type, labels(a) as from_label, labels(b) as to_label
    """
    # Execution of Cypher queries
    extra_results = graph.run(extra_query, codes=related_codes).data()
    # Check for results
    for result in extra_results:
        print(f"{result['from_label']} {result['relationship_type']}{result['to_label']} ")



In [180]:
#Import diease name want to search
fetch_codes_by_disease_name_with_relations("")

No related codes found.


In [181]:
# Example
fetch_codes_by_disease_name_with_relations("Pentalogy of cantrell")

Pentalogy of cantrell HAS_ORPHANET code:Orphanet_1335
Pentalogy of cantrell HAS_NCIT code:NCIT_C99011
Pentalogy of cantrell HAS_UMLS code:C0559483
Pentalogy of cantrell HAS_MONDO code:MONDO_0010742
Pentalogy of cantrell HAS_OMIM code:OMIM_313850
Pentalogy of cantrell HAS_ICD-10 code:Q89.7
['UMLS'] INCLUDES['ICD-10'] 
['UMLS'] INCLUDES['NCIT'] 
['UMLS'] INCLUDES['OMIM'] 
['MONDO'] INCLUDES['OMIM'] 
['MONDO'] INCLUDES['ORPHANET'] 
['UMLS'] INCLUDES['ICD-10'] 


In [182]:
#Define a function that can search for diseases that appear in multiple ontologies
def find_diseases_by_columns(columns):
    # Handle special characters for relationship names and node types
    relationships = [f"HAS_{col.replace('-', '-').upper()}" if '-' in col else f"HAS_{col.upper()}" for col in columns]
    
    # Create part of a Cypher query to match disease nodes that are related to a specified column
    match_clauses = [f"(d:Disease)-[:`{relationship}`]->(`{col}`:`{col.upper().replace('-', '-')}`)" 
                     for relationship, col in zip(relationships, columns)]
    match_query = "MATCH " + ", ".join(match_clauses)
    
    # Complete Cypher query returning disease names that are related to all specified columns
    query = f"{match_query} RETURN d.name as disease_name"
    
    # Execution of Cypher queries
    result = graph.run(query).data()
    
    # Convert results to DataFrame
    df = pd.DataFrame(result)
    
    return df


In [None]:
#Import ontologies want to research
columns_to_query = [""]
df = find_diseases_by_columns(columns_to_query)
df

In [184]:
#Example
columns_to_query = ["icd-10","umls","ncit"]
df = find_diseases_by_columns(columns_to_query)
df

Unnamed: 0,disease_name
0,Congestive heart failure
1,Landau-kleffner syndrome
2,Reflex epilepsy
3,Rhabdomyosarcoma of the cervix uteri
4,Cervical adenosarcoma
...,...
2419,Gastroschisis
2420,Corneal dystrophy
2421,Diabetic retinopathy
2422,Lymphoid interstitial pneumonia
