In [2]:
!pip install neo4j



In [56]:
from neo4j import GraphDatabase
import pandas as pd
from joblib import Parallel, delayed
import ast

In [17]:
uri = "neo4j://localhost:7687"  # Replace with your URI 
user = "neo4j"                 # Replace with your username
password = "19091998"          # Replace with your password

driver = GraphDatabase.driver(uri, auth=(user, password))

In [18]:
class Neo4jConnection:
    
    def __init__(self, uri, user, password):
        self.__uri = uri
        self.__user = user
        self.__password = password
        self.__driver = None
        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__password))
        except Exception as e:
            print("Failed to create the driver:", e)
        
    def close(self):
        if self.__driver is not None:
            self.__driver.close()
        
    def execute_query(self, query):
        if self.__driver is not None:
            with self.__driver.session() as session:
                return [record for record in session.run(query)]


In [19]:
# label fields in the snomed ct graph

q = """ MATCH (n:ObjectConcept)WHERE (n.FSN ENDS WITH '(finding)' or n.FSN ENDS WITH '(disorder)' )
        SET n:SP
        RETURN n LIMIT 10 """

In [27]:
conn = Neo4jConnection(uri, user, password)

query = """ MATCH (c:SP_ALL)-[:HAS_DESCRIPTION]->(d:Description)
            WHERE d.descriptionType = 'Synonym'
            RETURN c.id AS ConceptId, c.FSN AS FSN_concept, d.term AS Synonym
"""

results = conn.execute_query(query)

results[0]['Synonym']

'Local excision of lesion or tissue of patella'

In [28]:
r_df = pd.DataFrame(results, columns=['ConceptId', 'FSN_concept', 'Synonym'])
r_df.to_csv('SP_ALL_EXT.csv')

In [None]:
# Run faiss indexing on all the synonyms, find most similar word

In [None]:
# find node belonging to the synonym

In [18]:
# Find shortest path between concepts 
query = """ MATCH p = shortestPath((start:SP {id: "249473004"})-[*..10]-(end:SP {id: "102608004"}))
            RETURN length(p) as distance """

results = conn.execute_query(query)

for record in results:
    print(record)

  with self.__driver.session() as session:


<Record distance=3>


In [20]:
query = """ WITH ["249473004","289161009", "223123009", "289163007", "64379006"] AS endNodeIds
            MATCH (start:SP {id: "102608004"}) 
            UNWIND endNodeIds AS endNodeId
            MATCH (end:SP {id: endNodeId})
            CALL {
                WITH start, end
                MATCH p = shortestPath((start)-[*..10]-(end))
                RETURN p
                ORDER BY length(p) ASC LIMIT 1
            }
            RETURN p, length(p) as distance, end.id as endNodeId, start.FSN, end.FSN as NameEndNode """

results = conn.execute_query(query)

results

  with self.__driver.session() as session:


[<Record p=<Path start=<Node element_id='4:0c414c0f-b769-4d23-ad25-650efdf1893b:99185' labels=frozenset({'Finding', 'SP', 'SPFindings', 'ObjectConcept'}) properties={'FSN': 'Excessive appetite (finding)', 'nodetype': 'concept', 'effectiveTime': '20080731', 'definitionStatusId': '900000000000073002', 'active': '1', 'history': '[{"id": "102608004", "effectiveTime": "20020131", "active": "1", "moduleId": "900000000000207008", "definitionStatusId": "900000000000074008"}]', 'id': '102608004', 'moduleId': '900000000000207008', 'sctid': '102608004'}> end=<Node element_id='4:0c414c0f-b769-4d23-ad25-650efdf1893b:240538' labels=frozenset({'Finding', 'SP', 'SPFindings', 'ObjectConcept'}) properties={'FSN': 'Altered appetite (finding)', 'nodetype': 'concept', 'effectiveTime': '20080731', 'definitionStatusId': '900000000000073002', 'active': '1', 'history': '[{"id": "249473004", "effectiveTime": "20020131", "active": "1", "moduleId": "900000000000207008", "definitionStatusId": "900000000000074008"}

In [None]:
# extract the shortest path and handle codes with the exact same length 

In [None]:
# examples of failed matchings in the app 

In [None]:
conn.close()

In [64]:
# extract the number of descendants and asc for each node 

conn = Neo4jConnection(uri, user, password)
query = """ MATCH (c:SP_ALL)
OPTIONAL MATCH (c)-[:ISA*]->(descendant:SP)
WITH c, COLLECT(DISTINCT descendant.id) AS DescendantIDs, COLLECT(DISTINCT descendant.FSN) AS DescendantFSN
RETURN c.id AS ConceptId, c.FSN AS FSN_concept,  SIZE(DescendantIDs) as num_desc, DescendantIDs, DescendantFSN
"""

results = conn.execute_query(query)
pd.DataFrame(results).to_csv('descendants.csv', index=False)
results[0]

<Record ConceptId='104001' FSN_concept='Excision of lesion of patella (procedure)' num_desc=0 DescendantIDs=[] DescendantFSN=[]>

In [65]:
conn = Neo4jConnection(uri, user, password)
query = """ MATCH (c:SP_ALL)
OPTIONAL MATCH (ancestor:SP)-[:ISA*]->(c)
WITH c, COLLECT(DISTINCT ancestor.id) AS AncestorIDs, COLLECT(DISTINCT ancestor.FSN) AS AncestorFSN
RETURN c.id AS ConceptId, c.FSN AS FSN_concept,  SIZE(AncestorIDs) as num_anc, AncestorIDs, AncestorFSN
"""

results = conn.execute_query(query)
pd.DataFrame(results, columns=['ConceptId', 'FSN_concept', 'num_anc', 'AncestorIDs', 'AncestorFSN']).to_csv('ascendants.csv', index=False)
results[0]

<Record ConceptId='104001' FSN_concept='Excision of lesion of patella (procedure)' num_anc=0 AncestorIDs=[] AncestorFSN=[]>

In [66]:
ant_df = pd.read_csv('ascendants.csv')
des_df = pd.read_csv('descendants.csv')
ant_df

Unnamed: 0,ConceptId,FSN_concept,num_anc,AncestorIDs,AncestorFSN
0,104001,Excision of lesion of patella (procedure),0,[],[]
1,105000,Poisoning by pharmaceutical excipient (disorder),0,[],[]
2,109006,Anxiety disorder of childhood OR adolescence (...,11,"['90790003', '83253003', '64165008', '53467004...",['Avoidant disorder of adolescence (disorder)'...
3,115006,Removable appliance therapy (procedure),0,[],[]
4,119000,Thoracoscopic partial lobectomy of lung (proce...,0,[],[]
...,...,...,...,...,...
263271,978253001000132109,Small bowel enteroscopy normal (finding),0,[],[]
263272,985355341000119101,Malignant melanoma of skin of left wrist (diso...,0,[],[]
263273,987840791000119102,Adenosine deaminase 2 deficiency (disorder),0,[],[]
263274,991898981000119108,Chronic nontraumatic intracranial subdural hem...,0,[],[]


In [52]:
# ant_df.columns = ['ConceptId', 'FSN_concept', 'num_anc', 'AncestorIDs', 'AncestorFSN']
des_df.columns = ['ConceptId', 'FSN_concept', 'num_des', 'DescendantIDs', 'DescendantFSN']
des_df

Unnamed: 0,ConceptId,FSN_concept,num_des,DescendantIDs,DescendantFSN
0,105000,Poisoning by pharmaceutical excipient (disorder),8,"['7895008', '55680006', '441952005', '75478009...",['Poisoning caused by drug AND/OR medicinal su...
1,109006,Anxiety disorder of childhood OR adolescence (...,17,"['197480006', '268664001', '111476001', '48694...","['Anxiety disorder (disorder)', 'Childhood emo..."
2,122003,Choroidal hemorrhage (disorder),44,"['128468007', '93478000', '128536001', '956780...","['Disorder of choroid of eye (disorder)', 'Int..."
3,123008,Channel catfish virus disease (disorder),6,"['23513009', '73787001', '34014006', '40733004...","['Herpesvirus infection (disorder)', 'Disease ..."
4,127009,Miscarriage with laceration of cervix (disorder),166,"['426997005', '7809009', '237090005', '3005770...",['Traumatic injury during pregnancy (disorder)...
...,...,...,...,...,...
179062,972604701000119104,Acquired arteriovenous malformation of vascula...,22,"['5251000119108', '53619000', '128121009', '27...",['Acquired arteriovenous malformation (disorde...
179063,978253001000132109,Small bowel enteroscopy normal (finding),12,"['22558701000132108', '370351008', '441742003'...","['Enteroscopy finding (finding)', 'Endoscopy f..."
179064,985355341000119101,Malignant melanoma of skin of left wrist (diso...,105,"['352201000119105', '93654000', '4514110001241...",['Malignant melanoma of skin of left upper lim...
179065,987840791000119102,Adenosine deaminase 2 deficiency (disorder),32,"['44940001', '362993009', '129456006', '363138...","['Adenosine deaminase deficiency (disorder)', ..."


In [36]:
import numpy as np

def compute_similar_names(name, l):
    sim = []
    for i in l:
        s = False
        c=0
        x = True if name in i else False
        if x: 
            s = True
        
        tk_name = name.lower().split()
        for k in tk_name:
            c += 1 if k in i else 0
        
        if c/len(tk_name)>=0.5:
            s = True
        
        sim.append(s)
    np.mean(sim)
    return sim, np.mean(sim)

# compute_similar_names('aina tersol', ['aina', 'aina goes to schol'])

([True, True], 1.0)

In [73]:
%%time

def process_row(row, col_name):
    return compute_similar_names(row['FSN_concept'], ast.literal_eval(row[col_name]))

results = Parallel(n_jobs=-1)(delayed(process_row)(row, 'AncestorFSN') for index, row in ant_df.iterrows())
ant_df[['sim', 'mean']] = pd.DataFrame(results, index=ant_df.index)

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return _methods._mean(a, axis=

In [71]:
ant_df[ant_df['num_anc']>10]

Unnamed: 0,ConceptId,FSN_concept,num_anc,AncestorIDs,AncestorFSN,sim,mean
2,109006,Anxiety disorder of childhood OR adolescence (...,11,"['90790003', '83253003', '64165008', '53467004...",['Avoidant disorder of adolescence (disorder)'...,"[True, True, True, True, True, True, True, Tru...",1.000000
6,122003,Choroidal hemorrhage (disorder),13,"['193474002', '336061000119102', '341671000119...",['Choroidal hemorrhage and rupture (disorder)'...,"[True, False, False, True, True, True, True, T...",0.692308
15,140004,Chronic pharyngitis (disorder),28,"['133171000119105', '24078009', '90979004', '4...","['Chronic pharyngolaryngitis (disorder)', 'Gan...","[False, False, False, True, True, True, True, ...",0.321429
50,198007,Disease caused by Filoviridae (disorder),29,"['45901000087102', '1163096003', '788870007', ...",['Disease caused by Zaire Ebolavirus (disorder...,"[True, True, True, True, True, True, False, Fa...",0.275862
57,219006,Current drinker of alcohol (finding),19,"['10939881000119105', '447087000', '28127009',...",['Unhealthy alcohol drinking behavior (finding...,"[False, False, False, False, False, False, Fal...",0.000000
...,...,...,...,...,...,...,...
263009,32100001000004100,Mass of cervical spine (disorder),18,"['203198001', '735605000', '713741003', '12655...","[""Brodie's abscess of cervical spine (disorder...","[True, True, True, True, True, True, True, Tru...",0.833333
263050,115940341000119100,Chronic ulcer of anastomosis (disorder),19,"['128288009', '56579005', '41626001', '1967210...","['Chronic gastrojejunal ulcer (disorder)', 'Ch...","[False, False, False, False, False, False, Fal...",0.055556
263110,347218661000119103,Abrasion of mouth region (disorder),25,"['698524005', '698518004', '110154009', '26263...","['Generalized abrasion of tooth (disorder)', '...","[True, False, False, False, False, False, Fals...",0.240000
263205,751960361000119108,Lesion of nasal cavity (disorder),132,"['10872851000119104', '210328006', '282451008'...",['Laceration of structure of nasal cavity (dis...,"[True, True, True, False, True, False, True, T...",0.392308


In [None]:
%%time

results = Parallel(n_jobs=-1)(delayed(process_row)(row, 'DescendantFSN') for index, row in des_df.iterrows())
des_df[['sim', 'mean']] = pd.DataFrame(results, index=des_df.index)

KeyError: 'FSN_concept'

In [None]:
des_df

Unnamed: 0,ConceptId,FSN_concept,num_des,DescendantIDs,DescendantFSN,sim,mean
0,105000,Poisoning by pharmaceutical excipient (disorder),8,"['7895008', '55680006', '441952005', '75478009...",['Poisoning caused by drug AND/OR medicinal su...,"[False, False, False, False, False, False, Fal...",0.000000
1,109006,Anxiety disorder of childhood OR adolescence (...,17,"['197480006', '268664001', '111476001', '48694...","['Anxiety disorder (disorder)', 'Childhood emo...","[False, False, True, False, False, False, Fals...",0.062500
2,122003,Choroidal hemorrhage (disorder),44,"['128468007', '93478000', '128536001', '956780...","['Disorder of choroid of eye (disorder)', 'Int...","[False, True, False, False, False, False, Fals...",0.022727
3,123008,Channel catfish virus disease (disorder),6,"['23513009', '73787001', '34014006', '40733004...","['Herpesvirus infection (disorder)', 'Disease ...","[False, False, False, False, False, False]",0.000000
4,127009,Miscarriage with laceration of cervix (disorder),166,"['426997005', '7809009', '237090005', '3005770...",['Traumatic injury during pregnancy (disorder)...,"[False, True, False, False, False, False, Fals...",0.036364
...,...,...,...,...,...,...,...
179062,972604701000119104,Acquired arteriovenous malformation of vascula...,22,"['5251000119108', '53619000', '128121009', '27...",['Acquired arteriovenous malformation (disorde...,"[False, False, False, False, False, False, Fal...",0.000000
179063,978253001000132109,Small bowel enteroscopy normal (finding),12,"['22558701000132108', '370351008', '441742003'...","['Enteroscopy finding (finding)', 'Endoscopy f...","[False, False, False, False, False, False, Fal...",0.000000
179064,985355341000119101,Malignant melanoma of skin of left wrist (diso...,105,"['352201000119105', '93654000', '4514110001241...",['Malignant melanoma of skin of left upper lim...,"[True, True, True, False, True, True, True, Tr...",0.262136
179065,987840791000119102,Adenosine deaminase 2 deficiency (disorder),32,"['44940001', '362993009', '129456006', '363138...","['Adenosine deaminase deficiency (disorder)', ...","[True, False, False, False, False, False, Fals...",0.031250
