In [43]:
from neo4j import GraphDatabase
import yaml
import json

In [44]:
id_in_disease = set()

is_first = True
for line in open("to_neo4j/disease_drug.tsv"):
    if is_first:
        is_first = False
    else:
        disease = line.strip().split("\t")[0]
        id_in_disease.add(disease)

In [45]:
len(id_in_disease)

410

In [46]:
with open("config.yaml", "r") as stream:
    try:
        PARAM = yaml.safe_load(stream)
    except yaml.YAMLError as exc:
        print(exc)

In [47]:
driver = GraphDatabase.driver(PARAM["neo4j_url"], auth=(PARAM["neo4j_username"], PARAM["neo4j_password"]))


# Loop through results and do something with them
for depth in range(1, 20):
    query = f"MATCH (:Condition {{name: 'Topical Descriptor'}})<-[:BELONGS_TO *{depth}]-(a:Condition) RETURN COUNT(a)"
    records, summary, keys = driver.execute_query(query, database_="neo4j")
    for record in records:
        print (f"Depth {depth}: {record['COUNT(a)']}")

Depth 1: 7
Depth 2: 38
Depth 3: 164
Depth 4: 509
Depth 5: 859
Depth 6: 719
Depth 7: 406
Depth 8: 181
Depth 9: 63
Depth 10: 19
Depth 11: 12
Depth 12: 8
Depth 13: 3
Depth 14: 0
Depth 15: 0
Depth 16: 0
Depth 17: 0
Depth 18: 0
Depth 19: 0


In [48]:
query = f"MATCH (:Condition {{name: 'Topical Descriptor'}})<-[:BELONGS_TO *2]-(a:Condition) RETURN a"
records, summary, keys = driver.execute_query(query, database_="neo4j")

second_level = set()

for record in records:
    second_level.add(f"{record['a']['name']}")

In [49]:
taboo = []

found = set()

for s in second_level:
    if s not in taboo:
        query = f"MATCH (:Condition {{name: '{s}'}})<-[:BELONGS_TO *]-(a:Condition) RETURN a"

        records, summary, keys = driver.execute_query(query, database_="neo4j")
        for record in records:
            found.add(f"{record['a']['id']}")
    
print (len(id_in_disease - found))

0


In [50]:
len(id_in_disease)

410

In [51]:
len(found)

1114

In [52]:

#taboo = []


found = {}

for s in second_level:

    query = f"MATCH (:Condition {{name: '{s}'}})<-[:BELONGS_TO *]-(a:Condition) RETURN COUNT(a)"

    records, summary, keys = driver.execute_query(query, database_="neo4j")
    for record in records:
        found[s] = record['COUNT(a)']

In [53]:
dict(sorted(found.items(), key=lambda item: item[1]))

{'Anesthesia and Analgesia': 3,
 'Surgical Procedures, Operative': 3,
 'Animal Diseases': 4,
 'Viruses': 5,
 'Health Services Administration': 10,
 'Health Care Quality, Access, and Evaluation': 10,
 'Psychological Phenomena': 13,
 'Musculoskeletal and Neural Physiological Phenomena': 13,
 'Behavioral Disciplines and Activities': 14,
 'Investigative Techniques': 15,
 'Physiological Phenomena': 16,
 'Environment and Public Health': 18,
 'Diagnosis': 18,
 'Population Characteristics': 19,
 'Chemically-Induced Disorders': 21,
 'Behavior and Behavior Mechanisms': 22,
 'Otorhinolaryngologic Diseases': 24,
 'Natural Science Disciplines': 24,
 'Wounds and Injuries': 26,
 'Health Occupations': 30,
 'Stomatognathic Diseases': 34,
 'Endocrine System Diseases': 61,
 'Eye Diseases': 61,
 'Mental Disorders': 77,
 'Musculoskeletal Diseases': 80,
 'Immune System Diseases': 85,
 'Respiratory Tract Diseases': 111,
 'Cardiovascular Diseases': 118,
 'Nutritional and Metabolic Diseases': 120,
 'Skin and C

In [54]:
taboo = ["Investigative Techniques", "Diagnosis", "Animal Diseases", "Health Care Quality, Access, and Evaluation", "Health Services Administration",
         "Psychological Phenomena", "Behavioral Disciplines and Activities", "Environment and Public Health", "Chemically-Induced Disorders", "Diagnosis", 
         "Population Characteristics", "Natural Science Disciplines", "Health Occupations", "Physiological Phenomena"]
#
#taboo += ["Anesthesia and Analgesia", "Viruses", "Surgical Procedures, Operative",  "Behavior and Behavior Mechanisms", "Otorhinolaryngologic Diseases", 
#"Wounds and Injuries", "Musculoskeletal and Neural Physiological Phenomena", "Stomatognathic Diseases", "Endocrine System Diseases", "Eye Diseases"]


found = set()

for s in second_level:
    if s not in taboo:
        query = f"MATCH (:Condition {{name: '{s}'}})<-[:BELONGS_TO *]-(a:Condition) RETURN a"

        records, summary, keys = driver.execute_query(query, database_="neo4j")
        for record in records:
            found.add(f"{record['a']['id']}")

print (len(taboo))
print (len(id_in_disease - found))

14
0


In [55]:

disease_categories = {}

for s in second_level:
    if s not in taboo:
        query = f"MATCH (:Condition {{name: '{s}'}})<-[:BELONGS_TO *]-(a:Condition) RETURN a"

        records, summary, keys = driver.execute_query(query, database_="neo4j")
        for record in records:
            disease_id = record['a']['id']
            if disease_id not in disease_categories:
                disease_categories[disease_id] = set()
            disease_categories[disease_id].add(s)



In [56]:
id_in_disease

{'D000038',
 'D000086382',
 'D000152',
 'D000172',
 'D000196',
 'D000224',
 'D000230',
 'D000236',
 'D000292',
 'D000312',
 'D000379',
 'D000437',
 'D000544',
 'D000562',
 'D000690',
 'D000724',
 'D000740',
 'D000741',
 'D000749',
 'D000752',
 'D000755',
 'D000789',
 'D000795',
 'D000799',
 'D000855',
 'D001002',
 'D001017',
 'D001068',
 'D001145',
 'D001157',
 'D001168',
 'D001171',
 'D001172',
 'D001196',
 'D001228',
 'D001249',
 'D001254',
 'D001281',
 'D001282',
 'D001289',
 'D001321',
 'D001404',
 'D001477',
 'D001528',
 'D001714',
 'D001748',
 'D001749',
 'D001750',
 'D001762',
 'D001847',
 'D001850',
 'D001906',
 'D001919',
 'D001929',
 'D001932',
 'D001991',
 'D002006',
 'D002177',
 'D002178',
 'D002179',
 'D002181',
 'D002277',
 'D002280',
 'D002289',
 'D002292',
 'D002295',
 'D002389',
 'D002544',
 'D002545',
 'D002761',
 'D002862',
 'D003003',
 'D003047',
 'D003093',
 'D003108',
 'D003110',
 'D003231',
 'D003233',
 'D003234',
 'D003320',
 'D003324',
 'D003371',
 'D003424',
 

In [57]:
disease_id_name = {}
for line in open("to_neo4j/disease.tsv"):
    disease_id = line.strip().split("\t")[0]
    disease_name = line.strip().split("\t")[1]
    disease_id_name[disease_id] = disease_name

In [58]:
content = "id\tname\tcategories\n"
for d in id_in_disease:
    if d in disease_categories:
        categories = '|'.join(disease_categories[d])
        content += f"{d}\t{disease_id_name[d]}\t{categories}\n"
    else:
        content += f"{d}\t{disease_id_name[d]}\t\n"

with open("to_neo4j/disease_category.tsv", "w") as f:
    f.write(content)