In [1]:
import sys
import polars as pl
import plotly.express as px
sys.path.insert(0, '..')
from fs_thesis import sql, show

# Diagnoses

The "Big Three" Categories:

1. Cardiovascular/Circulatory: Hypertension and Chest Pain.
2. Metabolic: Diabetes (Type 2 and Uncomplicated).
3. Trauma/Acute: Falls and Abdominal Pain.

With a dominance of Hypertension. The Prevalence of Symptomatic vs Chronic the list is a mix of chronic conditions (Hypertension, Diabetes) and acute symptomps (Chest pain, Abdominal pain).


 

Lets have another look into the data. But this time using the ICD-Codes https://icdlist.com/icd-10/look-up

## Conclusion
So the idea is that a lot of infections are difficult to detect. lets have a look into this data. If the icd_code starts with A0* its a infection.

In [2]:
# sql query to get diagnosis data
df = sql("""SELECT distinct icd_code, icd_title 
         from ed.diagnosis
         where icd_code like 'A0%'
         order by icd_code
         """)

show(df)

Unnamed: 0,icd_code,icd_title
0,A029,"Salmonella infection, unspecified"
1,A039,"Shigellosis, unspecified"
2,A045,Campylobacter enteritis
3,A047,Enterocolitis due to Clostridium difficile
4,A0471,"Enterocolitis due to Clostridium difficile, recurrent"
5,A0472,"Enterocolitis d/t Clostridium difficile, not spcf as recur"
6,A048,Other specified bacterial intestinal infections
7,A049,"Bacterial intestinal infection, unspecified"
8,A059,"Bacterial foodborne intoxication, unspecified"
9,A071,Giardiasis [lambliasis]


In [3]:
# first 5 rows of diagnosis table
df = sql("""SELECT *
         from ed.diagnosis order by icd_code
         """)
show(df,limit=True)

Unnamed: 0,subject_id,stay_id,seq_num,icd_code,icd_version,icd_title
0,10427677,35873964,1,0020,9,TYPHOID FEVER
1,11545281,36308244,1,0030,9,SALMONELLA ENTERITIS
2,19906243,39205868,1,0059,9,FOOD POISONING NOS
3,19732857,35260270,1,0059,9,FOOD POISONING NOS
4,10271383,31799013,1,0059,9,FOOD POISONING NOS
...,...,...,...,...,...,...
899045,18743637,38062620,2,Z9981,10,Dependence on supplemental oxygen
899046,13725152,36543742,4,Z9981,10,Dependence on supplemental oxygen
899047,14533823,34488255,7,Z9981,10,Dependence on supplemental oxygen
899048,11548266,37524655,3,Z9981,10,Dependence on supplemental oxygen


In [4]:
# count of distinct icd titles (diagnoses)
df = sql("""SELECT count(icd_title), icd_title 
         from ed.diagnosis
         group by 2
         order by 1 desc""")
show(df, limit=True)

Unnamed: 0,count(icd_title),icd_title
0,26816,HYPERTENSION NOS
1,21264,Essential (primary) hypertension
2,13016,"Chest pain, unspecified"
3,12398,CHEST PAIN NOS
4,12026,DIABETES UNCOMPL ADULT
...,...,...
13167,1,"Vesical fistula, not elsewhere classified"
13168,1,Dislocation of proximal interphaln joint of r ...
13169,1,"Oth intraoperative complications of skin, subcu"
13170,1,"Breakdown (mechanical) of int fix of bones, init"


In [5]:
sql("""SELECT *
         from hosp.d_icd_diagnoses
         -- group by 2
         -- order by 1 desc
         where icd_code like '%0'
         """)
show(df, limit=True)

Unnamed: 0,count(icd_title),icd_title
0,26816,HYPERTENSION NOS
1,21264,Essential (primary) hypertension
2,13016,"Chest pain, unspecified"
3,12398,CHEST PAIN NOS
4,12026,DIABETES UNCOMPL ADULT
...,...,...
13167,1,"Vesical fistula, not elsewhere classified"
13168,1,Dislocation of proximal interphaln joint of r ...
13169,1,"Oth intraoperative complications of skin, subcu"
13170,1,"Breakdown (mechanical) of int fix of bones, init"


In [6]:
# count of distinct icd titles (diagnoses)
df_diagnoses = sql("""SELECT *
         from hosp.diagnoses_icd""")
show(df_diagnoses,limit=True)

Unnamed: 0,subject_id,hadm_id,seq_num,icd_code,icd_version
0,10000032,22595853,1,5723,9
1,10000032,22595853,2,78959,9
2,10000032,22595853,3,5715,9
3,10000032,22595853,4,07070,9
4,10000032,22595853,5,496,9
...,...,...,...,...,...
6364483,19999987,23865745,7,41401,9
6364484,19999987,23865745,8,78039,9
6364485,19999987,23865745,9,0413,9
6364486,19999987,23865745,10,36846,9


In [7]:
# avg length of stay in days
sql("""
SELECT ROUND(AVG(EXTRACT(EPOCH FROM (dischtime - admittime))/3600/24), 2) AS avg_los_days
FROM hosp.admissions;
""")

avg_los_days
f64
4.76


In [8]:
# diagnoses per admission
sql("""
SELECT COUNT(*) / COUNT(DISTINCT hadm_id) AS avg_diagnoses_per_admission
FROM hosp.diagnoses_icd;
""")

avg_diagnoses_per_admission
f64
11.66732


In [9]:
# create a column 'has_A0' indicating if patient has any diagnosis code starting with 'A0'
df_diagnoses = sql("""SELECT *
         from hosp.diagnoses_icd""")
show(df_diagnoses,limit=True)

has_A0 = (
    df_diagnoses
    .with_columns(pl.col("icd_code").str.starts_with("A0").alias("has_A0"))
    .group_by("subject_id")
    .agg(pl.col("has_A0").max().alias("has_A0"))
)
#
#df = df.with_columns(pl.col("has_A0").cast(pl.Int8))

df_lr= sql("""
    SELECT 
        p.subject_id,
        a.hadm_id,
        p.gender,
        p.anchor_age,
        a.race,
        a.insurance,
        a.marital_status,
        a.language
    FROM hosp.patients p
    JOIN hosp.admissions a 
           ON p.subject_id = a.subject_id
    
    
""")

df_lr = (df_lr.
         join(has_A0, on="subject_id", how="left")
         .with_columns(pl.col("has_A0").fill_null(False).cast(pl.Int8)))

show(df_lr, limit=True)


Unnamed: 0,subject_id,hadm_id,seq_num,icd_code,icd_version
0,10000032,22595853,1,5723,9
1,10000032,22595853,2,78959,9
2,10000032,22595853,3,5715,9
3,10000032,22595853,4,07070,9
4,10000032,22595853,5,496,9
...,...,...,...,...,...
6364483,19999987,23865745,7,41401,9
6364484,19999987,23865745,8,78039,9
6364485,19999987,23865745,9,0413,9
6364486,19999987,23865745,10,36846,9


Unnamed: 0,subject_id,hadm_id,gender,anchor_age,race,insurance,marital_status,language,has_A0
0,10000032,22595853,F,52,WHITE,Medicaid,WIDOWED,English,0
1,10000032,22841357,F,52,WHITE,Medicaid,WIDOWED,English,0
2,10000032,25742920,F,52,WHITE,Medicaid,WIDOWED,English,0
3,10000032,29079034,F,52,WHITE,Medicaid,WIDOWED,English,0
4,10000068,25022803,F,19,WHITE,,SINGLE,English,0
...,...,...,...,...,...,...,...,...,...
546023,19341569,20751610,M,50,BLACK/AFRICAN AMERICAN,Medicare,SINGLE,English,0
546024,19341569,21024053,M,50,BLACK/AFRICAN AMERICAN,Medicare,SINGLE,English,0
546025,19341569,21395041,M,50,BLACK/AFRICAN AMERICAN,Medicare,SINGLE,English,0
546026,19341569,21855673,M,50,BLACK/AFRICAN AMERICAN,Medicare,SINGLE,English,0
