# Codes

In [1]:
import mysql.connector
from pymetamap import MetaMap
import numpy as np
import math
import pandas as pd
import os
from matplotlib import pyplot
import statistics
from scipy.stats import pearsonr
from scipy.stats import spearmanr
from scipy import stats
from scipy.spatial.distance import cdist

In [3]:
def to_str(aui):
    sql = 'select distinct str from mrconso where aui = %s and tty = %s'
    val = (aui, 'pt')
    mycursor.execute(sql, val)
    result = mycursor.fetchall()
    if result:
        return result[0][0].replace(' ', '_')
    else:
        return '***No corresponding string***'
    #return result[0][0]

In [4]:
def to_aui(cui, sab):
    sql = "select distinct aui from mrconso where cui = %s and sab = %s and tty = %s"
    val = (cui, sab, 'pt')
    mycursor.execute(sql, val)
    result = mycursor.fetchall()
    resultList = []
    for i in result:
        resultList.append(i[0])
    return resultList

In [6]:
def intersection(lst1, lst2):
    lst3 = [value for value in lst1 if value in lst2]
    return lst3

In [7]:
def show_ptr(aui): # returns (aui, str) of parents of the concept (in aui)
    sql = "select distinct ptr from mrhier where aui = %s and rela=%s"
    val = (aui, 'isa')
    mycursor.execute(sql, val)
    myresult = mycursor.fetchall()
    #print(myresult)
    #print(len(myresult))
    listOfList = []
    for i in range(len(myresult)):
        #print('myresult[{}][0]:'.format(i), myresult[i][0])
        auiList = [a for a in myresult[i][0].split('.')]
        auiList.append(aui)
        listOfList.append(auiList)
    return listOfList


In [8]:
def common_ancestors(aui1, aui2):
    hier1 = show_ptr(aui1)
    hier2 = show_ptr(aui2)
    comAnc = []
    for h in hier1:
        for i in hier2:
            inter = intersection(h, i)
            if inter not in comAnc:
                comAnc.append(inter)
    # the list comAnc is a list of list whose elements are lists of common ancesters
    # since SNOMED CT is multihierarchical, there may be multiple lists of common ancesters
    # ex. comAnc = [['A3684559', 'A3886745'], [A3684559', 'A3886745', 'A7843222']]
    return comAnc

In [9]:
def lcs(aui1, aui2):
    comAnc = common_ancestors(aui1, aui2)
    # ex. comAnc = [['A3684559', 'A3886745'], [A3684559', 'A3886745', 'A7843222']]
    lcsList = []
    for ch in comAnc:
        lc = ch[-1]
        lcsList.append(lc)
    # returns a list of lcs
    return list(set(lcsList))

In [16]:
def rel(cui, sab):
    sql = "select distinct aui1,rela from mrrel where cui2 = %s and sab = %s"
    val = (cui, sab)
    mycursor.execute(sql, val)
    result = mycursor.fetchall()
    #print(result)
    resultList = []
    for i in result:
        if not (i[1] == 'possibly_equivalent_to' or i[1]=='same_as' or i[1]=='isa'
                or i[1]=='was_a' or i[1]=='inverse_was_a' or i[1]=='moved_to' 
                or i[1]=='is_not_primary_anatomic_site_of_disease' or i[1]=='concept_in_subset'):
            if to_str(i[0]) != '***No corresponding string***':
                resultList.append((i[0], i[1]))
    return resultList

In [18]:
# my wt 3
def wt(caui, raui): # qaui is AUI of query concept; taui is AUI of concept in text   
    '''
    if (caui != 'A3684559') or (caui != 'A1412976'):
        if show_ptr(caui):
            depthCaui = max([len(h) for h in show_ptr(caui)])
        else:
            depthCaui = 0
    else:
        depthCaui = 1
    print('depthCaui:', depthCaui)
    '''
    wtList = []
    for hier in show_ptr(raui):
        if caui in hier:
            #print('detected hier:', hier)
            enuHier = enumerate(hier)
            for d,a in enuHier:
                if a==caui:
                    depthCaui = d+1
                if a==raui:
                    depthRaui = d+1
            wtList.append(depthCaui/depthRaui)
        else:
            wtList.append(0)
    #print('wtList:', wtList)
    return max(wtList)

In [20]:
def sim(cui1, cui2, sab, k=0.25):
    
    assoAuis1 = []
    auis1 = to_aui(cui1, sab)
    for a in auis1:
        assoAuis1.append((a, 'isa'))
    # Delete the following 2 lines if you would like to measure similarity using 'isa' hierarchy only
    for ar in rel(cui1, sab):
        assoAuis1.append(ar)
    #print('assoAuis1:', assoAuis1)
    
    assoAuis2 = []
    auis2 = to_aui(cui2, sab)
    for a in auis2:
        assoAuis2.append((a, 'isa'))
    # Delete the following 2 lines if you would like to measure similarity using 'isa' hierarchy only
    for ar in rel(cui2, sab):
        assoAuis2.append(ar)
    #print('assoAuis2:', assoAuis2)
    

    if (assoAuis1!=[]) and (assoAuis2!=[]):
        # 'cols' list holds all concepts on a hierarchical way from root to the target cocept
        cols = []
        for ar1 in assoAuis1:
            for s in show_ptr(ar1[0]):
                for a in s:
                    if a not in cols:
                        cols.append(a)        
        for ar2 in assoAuis2:   
            for s in show_ptr(ar2[0]):
                for a in s:
                    if a not in cols:
                        cols.append(a)
        cols = list(set(cols))
        #print("\tcols:", cols)  
        #print()


        # Build a dataframe in which columns are elements of 'cols' and rows are all AUIs of two terms 
        dfDic1 = {} # Dictionary whose keys are AUIs of headers and values are weights between corresponding header AUI and query Aui
        for c in cols:
            dfDic1[c] = {}
            for ar in assoAuis1:
                dfDic1[c][ar[0]] = 0
                tauiHier = show_ptr(ar[0]) 
                #print('tauiHier:', tauiHier)

                if tauiHier:
                    for hier in tauiHier:
                        if c in hier:
                            #print('measuring wt between {} and {}'.format(c, ar[0]))
                            weight = wt(c, ar[0])
                            if ar[1] != 'isa':
                                weight = weight*k
                                if weight > dfDic1[c][ar[0]]:
                                    dfDic1[c][ar[0]] = weight
                                else:
                                    pass
                            else:
                                if weight > dfDic1[c][ar[0]]:
                                    dfDic1[c][ar[0]] = weight
                                else:
                                    pass
                        else:
                            continue
                else:
                    continue

        df1 = pd.DataFrame(dfDic1)
        #print(df1) 
        vector1 = df1[cols].mean().to_numpy()
        #vector1 = np.reshape(vector1, (-1, 1))


        # Build a dataframe in which columns are elements of 'cols' and rows are all AUIs of two terms 
        dfDic2 = {} # Dictionary whose keys are AUIs of headers and values are weights between corresponding header AUI and query Aui
        for c in cols:
            dfDic2[c] = {}
            for ar in assoAuis2:
                dfDic2[c][ar[0]] = 0
                tauiHier = show_ptr(ar[0]) 
                #print('tauiHier:', tauiHier)

                if tauiHier:
                    for hier in tauiHier:
                        if c in hier:
                            weight = wt(c, ar[0])
                            if ar[1] != 'isa':
                                weight = weight*k
                                if weight > dfDic2[c][ar[0]]:
                                    dfDic2[c][ar[0]] = weight
                                else:
                                    pass
                            else:
                                if weight > dfDic2[c][ar[0]]:
                                    dfDic2[c][ar[0]] = weight
                                else:
                                    pass
                        else:
                            continue
                else:
                    continue

        df2 = pd.DataFrame(dfDic2)
        #print(df2) 
        vector2 = df2[cols].mean().to_numpy()
        #vector1 = np.reshape(vector1, (-1, 1))
        #print()
        #print(vector1)
        #print(vector2)


        # Cosine similarity calculation
        numeratorList = []
        for i in range(len(vector1)):
            numeratorList.append(vector1[i]*vector2[i])
        numerator = sum(numeratorList)

        denominator1List = []
        for i in vector1:
            denominator1List.append(i**2)
        denominator1 = math.sqrt(sum(denominator1List))

        denominator2List = []
        for i in vector2:
            denominator2List.append(i**2)
        denominator2 = math.sqrt(sum(denominator2List))

        cosSim = numerator/(denominator1*denominator2)

        return cosSim
    else:
        return None
        print('None')

# Evaluation: congestive heart failure vs pulmonary edema

In [179]:
# k=1
sim('c0018802', 'c0034063', 'snomedct_us', k=1)

assoAuis1: [('A2876049', 'isa'), ('A15144555', 'associated_finding_of'), ('A2977261', 'cause_of'), ('A21111173', 'inverse_isa'), ('A21111174', 'inverse_isa'), ('A21114045', 'inverse_isa'), ('A3107081', 'has_finding_site'), ('A3572714', 'has_finding_site'), ('A23453139', 'focus_of'), ('A23461758', 'inverse_isa'), ('A23456927', 'inverse_isa'), ('A23463973', 'inverse_isa'), ('A23459656', 'inverse_isa'), ('A25679756', 'inverse_isa'), ('A25680487', 'inverse_isa'), ('A27772964', 'inverse_isa'), ('A2953022', 'inverse_isa'), ('A2957879', 'inverse_isa'), ('A2960389', 'inverse_isa'), ('A2967847', 'inverse_isa'), ('A2977261', 'inverse_isa'), ('A2995991', 'inverse_isa'), ('A3004638', 'inverse_isa'), ('A3143243', 'inverse_isa'), ('A3143244', 'inverse_isa'), ('A3304265', 'inverse_isa'), ('A2874988', 'cause_of'), ('A13004805', 'inverse_isa'), ('A13004804', 'inverse_isa')]
assoAuis2: [('A2943246', 'isa'), ('A3022982', 'inverse_isa'), ('A3050407', 'associated_finding_of'), ('A2881022', 'has_associated_

           A9405607  A3029996  A3103062  A3723424  A3210223  A8385935  \
A2943246      0.000         0         0  0.000000         0  0.000000   
A3022982      0.000         0         0  0.000000         0  0.000000   
A3050407      0.000         0         0  0.000000         0  0.000000   
A2881022      0.000         0         0  0.000000         0  0.000000   
A3154872      0.000         0         0  0.727273         0  0.666667   
A23459061     0.000         0         0  0.000000         0  0.000000   
A23450329     0.000         0         0  0.000000         0  0.000000   
A24082121     0.000         0         0  0.000000         0  0.000000   
A3040758      0.000         0         0  0.000000         0  0.000000   
A2874988      0.000         0         0  0.000000         0  0.000000   
A2958225      0.000         0         0  0.000000         0  0.000000   
A2978198      0.000         0         0  0.000000         0  0.000000   
A2989070      0.000         0         0  0.000000  

0.4089315996631767