In [3]:
import pandas as pd
from glob import glob
from os import path


In [4]:
def load(indir):
    dfs = []
    for infile in sorted(glob(f"{indir}/*.csv")):
        base = path.splitext(path.basename(infile))[0]
        if base == "index":
            continue
        dfs.append(pd.read_csv(infile, dtype=str, na_filter=False).assign(file=base))
    return pd.concat(dfs)

norm = load("AESI-norm")
norm

Unnamed: 0,coding_system,code,code_name,concept,concept_name,tags,comments,file,review_author_0,review_date_0,review_content_0,review_author_1,review_date_1,review_content_1,review_author_2,review_date_2,review_content_2,review_author_3,review_date_3,review_content_3
0,ICD10,I74,Arterial embolism and thrombosis,C0155749,Arterial embolus and thrombosis,Narrow,,B_ARTERIALEMBOLISM_AESI_arterial embolism,,,,,,,,,,,,
1,ICD10,I74.0,Embolism and thrombosis of abdominal aorta,,,Narrow,,B_ARTERIALEMBOLISM_AESI_arterial embolism,,,,,,,,,,,,
2,ICD10,I74.1,Embolism and thrombosis of other and unspecifi...,,,Narrow,,B_ARTERIALEMBOLISM_AESI_arterial embolism,,,,,,,,,,,,
3,ICD10,I74.2,Embolism and thrombosis of arteries of upper e...,,,Narrow,,B_ARTERIALEMBOLISM_AESI_arterial embolism,,,,,,,,,,,,
4,ICD10,I74.3,Embolism and thrombosis of arteries of lower e...,,,Narrow,,B_ARTERIALEMBOLISM_AESI_arterial embolism,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
368,MEDCODEID,7396081000006119,Viral pericarditis with pericardial effusion,,,narrow,,C_PERICARD_AESI_Pericarditis alone,,,2024-09-05 00:00:00,,,,,,,,,
369,MEDCODEID,8087571000006115,History of pericarditis,,,Exclude,,C_PERICARD_AESI_Pericarditis alone,,,2024-09-05 00:00:00,,,,,,,,,
370,MEDCODEID,884251000006110,Pericardial disease NOS,,,Possible,,C_PERICARD_AESI_Pericarditis alone,,,2024-09-05 00:00:00,,,,,,,,,
371,MEDCODEID,109916013,Dressler's syndrome,,,Narrow,,C_PERICARD_AESI_Pericarditis alone,,,,,,,,,,,,


In [7]:
norm[['file', 'coding_system']].drop_duplicates().coding_system.value_counts()

ICD9CM         29
ICD10CM        29
ICD10          28
SNOMEDCT_US    28
SCTSPA         27
ICPC2P         25
MEDCODEID      25
RCD2           25
ICD10DA        23
ICPC           17
Free_text      16
MTHICD9        11
SNM             7
RCD             5
SNMI            1
p               1
ICD10/CM        1
e               1
                1
Name: coding_system, dtype: int64

In [10]:
norm[norm.coding_system.isin(["p", "e", ""])][['file', 'coding_system']].drop_duplicates()

Unnamed: 0,file,coding_system
227,B_COAGDEF_AESI_Coagulation deficiencies,e
261,B_COAGDEF_AESI_Coagulation deficiencies,p
1573,C_CARDIOMYOPATHY_COV_Cardiomyopathy excl. myo ...,


## Coding systems

In [6]:
def coding_systems(indir):
    res = set()
    for infile in sorted(glob(f"{indir}/*.csv")):
        if infile == f"{indir}/index.csv":
            continue
        df = pd.read_csv(infile, dtype=str, na_filter=False)
        for sab in df.coding_system:
            if sab:
                res.add(sab)
    return sorted(res)

for sab in coding_systems("AESI-norm"):
    print(f"- {sab}")

- CVB
- EXCLU
- Free_text
- HCPCS
- HCPT
- HSD_ACCERT
- IC9CM
- ICD-10-CM
- ICD-10-PCS
- ICD-9-CM
- ICD-9CM
- ICD10
- ICD10-PCS
- ICD10/CM
- ICD10CM
- ICD10DA
- ICD9-CM
- ICD9CM
- ICD9CM_HSD
- ICPC
- ICPC2
- ICPC2EENG
- ICPC2P
- MDR
- MEDCODEID
- MTHICD9
- NCMP-NCSP-NCRP
- PROCDA
- RCD
- RCD2
- SCTSPA
- SCTSPA_SNS
- SNM
- SNMI
- SNOMED
- SNOMEDCT_US
- SNOMED_US
- SPA_EXT
- ZA


echo 'select state from case_definitions ' \
| sudo -u postgres psql codemapper \
| head -n-1 | tail -n+3 \
| jq '.codingSystems' \
| grep '^  "' | sed 's/^  "\([^"]*\)",*$/\1/' \
| sort | uniq -c | sort -h

      1 ICPCNOR
      1 KCD5
      1 RCDAE
      1 WHOSPA
      2 CCSR_ICD10PCS
      2 ICPC2ICD10DUT
      2 ICPC2ICD10ENG
      2 LNC
      3 ICPCDUT
      4 SNMI
      9 HCPT
     10 HCPCS
     15 ICD10PCS
     15 ICPC2EDUT
     27 MSH
     65 SNM
     76 MDR
    189 ICD10/CM
    227 RCD
    326 ICPC2EENG
    364 ICPC
    416 SCTSPA
    446 MTHICD9
    501 SNOMEDCT_US
    583 ICPC2P
    607 ICD10
    623 RCD2
    632 ICD10CM
    731 ICD9CM
