In [1]:
%load_ext autoreload
%autoreload 2
import sys
    

sys.path.append("../..")
from lib.ehr.example_schemes.snomed import SNOMEDCT

In [2]:
SNOMED_ROOT = '/home/asem/GP/ehr-data/SNOMED-CT/derived'
snomed = SNOMEDCT.from_files(
    name='SNOMED_20240925000001',
    cdb_active_path=f'{SNOMED_ROOT}/snomed_cdb_csv_SNOMEDCT_full_active_UK_drug_ext_Release_20240925000001.csv',
    cdb_inactive_path=f'{SNOMED_ROOT}/snomed_cdb_csv_SNOMEDCT_full_inactive_UK_drug_ext_Release_20240925000001.csv',
    ch2pt_json_path=f'{SNOMED_ROOT}/20240925_isa_active_rela_ch2pt.json')

In [78]:
import pandas as pd

relations_of_interest = [
    'S-370132008', # Scale type
    'S-370130000', # Proporty
    'S-704319004', # Inheres-in
    'S-718497002', # Inherent location (commented, small size)
    # 'S-704321009', # Charecterizes (commented because it gives non-unique relation type per code)
]

def expand_desc(x):
    return f'{snomed.desc[x]} ({x})' if x in snomed.desc else 'NA'


scale_types = ['S-26716007', # qualitative
               'S-30766002', # quantitative
               'S-117362005', # nominal value (standardized measurment)
               'S-117363000', # ordinal value.
               'S-117365007' # ordinal or quantitative
              ]


scaled_snomed_codes_relats = pd.read_csv('/home/asem/GP/ehr-data/SNOMED-CT/derived/scaled_codes_relations.csv', index_col=[0])
# select codes that can be quantitative.
quantitative_snomed_codes = scaled_snomed_codes_relats[scaled_snomed_codes_relats.destinationId.isin([ 'S-30766002', 'S-117365007'])].sourceId.unique()

quantitative_snomed_codes_relats = scaled_snomed_codes_relats[scaled_snomed_codes_relats.sourceId.isin(quantitative_snomed_codes)]
quantitative_snomed_codes_relats = quantitative_snomed_codes_relats[quantitative_snomed_codes_relats.typeId.isin(relations_of_interest)]


# print(quantitative_snomed_codes_relats.typeId.value_counts())


quantitative_snomed_codes_relats.set_index(['sourceId', 'typeId']).index.is_unique

scale_type = quantitative_snomed_codes_relats[quantitative_snomed_codes_relats.typeId == 'S-370132008'].set_index(['sourceId'])['destinationId'].to_dict()
property = quantitative_snomed_codes_relats[quantitative_snomed_codes_relats.typeId == 'S-370130000'].set_index(['sourceId'])['destinationId'].to_dict()
inheres_in = quantitative_snomed_codes_relats[quantitative_snomed_codes_relats.typeId == 'S-704319004'].set_index(['sourceId'])['destinationId'].to_dict()
inherent_loc = quantitative_snomed_codes_relats[quantitative_snomed_codes_relats.typeId == 'S-718497002'].set_index(['sourceId'])['destinationId'].to_dict()

scaled_codes = pd.DataFrame({'prefixed_code': quantitative_snomed_codes_relats.sourceId.unique()})
scaled_codes['code'] = scaled_codes.prefixed_code.map(lambda x: x.split('-')[1])
scaled_codes['name'] = scaled_codes.prefixed_code.map(snomed.desc)
scaled_codes['scale_type'] = scaled_codes['prefixed_code'].map(scale_type).map(expand_desc)
scaled_codes['property'] = scaled_codes['prefixed_code'].map(property).map(expand_desc)
scaled_codes['inheres_in'] = scaled_codes['prefixed_code'].map(inheres_in | inherent_loc).map(expand_desc)

scaled_codes = scaled_codes.set_index(['scale_type', 'property', 'inheres_in']).sort_index()
# Which relation type is unique.
# relation_types = quantitative_snomed_codes_relats.typeId.unique()
# for typeId in relation_types:
#     df = quantitative_snomed_codes_relats[quantitative_snomed_codes_relats.typeId == typeId]
#     if df.set_index(['sourceId']).index.is_unique == False:
#         print(typeId)

In [84]:
snomed.desc['S-1031981000000109']

In [83]:
with pd.ExcelWriter('scaled_codes.xlsx', mode='w', engine='xlsxwriter') as writer:  
    scaled_codes.to_excel(writer, sheet_name='Sheet_1')

In [73]:
len(scale_type), len(property), len(inheres_in), len(inherent_loc), 

In [56]:
quantitative_snomed_codes_relats.sourceId = quantitative_snomed_codes_relats.sourceId.map(expand_desc)
quantitative_snomed_codes_relats.typeId = quantitative_snomed_codes_relats.typeId.map(expand_desc)
quantitative_snomed_codes_relats.destinationId = quantitative_snomed_codes_relats.destinationId.map(expand_desc)
quantitative_snomed_codes_relats.set_index(['typeId', 'destinationId'])[['sourceId']].sort_index()

In [25]:
snomed.desc['S-117363000']

In [3]:
# digraph = snomed.to_networkx()

In [4]:
from tqdm import tqdm

def detect_simple_cycle(graph):
    vertices = set(graph.keys()) | set.union(*graph.values())
    visited = {k: False for k in vertices}
    rec_stack = []

    for v in tqdm(vertices):
        if not visited[v]:
            if dfs_util(graph, v, visited, rec_stack):
                return True

    return False

def dfs_util(graph, v, visited, rec_stack):
    visited[v] = True
    rec_stack.append(v)

    for neighbor in graph.get(v, []):
        if visited[neighbor]:
            if neighbor in rec_stack:
                return True
        elif not visited[neighbor]:
            if dfs_util(graph, neighbor, visited, rec_stack):
                return True

    rec_stack.pop()
    return False
    
detect_simple_cycle(snomed.ch2pt)

In [10]:
observable_entity = snomed.code_successors_bfs("S-363787002", False)

In [16]:
observables_diagraph =snomed.to_networkx(observable_entity)

In [17]:
observables_diagraph