# Teil 5 Demo 1: Aufwertung von Daten mit Terminologien

## Bibliotheken & Konfiguration

In [1]:
packages <- c("readr", "dplyr", "stringr", "tidyr", "icd.data")
install.packages(setdiff(packages, rownames(installed.packages())))
lapply(packages, require, character.only = TRUE)

base_url <- "https://raw.githubusercontent.com/ganslats/TMF-School-2022-Block-4/master/Rohdaten/mimic-iii-demo/"

Loading required package: readr

Loading required package: dplyr


Attaching package: 'dplyr'


The following objects are masked from 'package:stats':

    filter, lag


The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union


Loading required package: stringr

Loading required package: tidyr

Loading required package: icd.data



## Ausgewählte MIMIC III-Rohdaten laden

In [2]:
mimic.diagnoses.raw     <- read_csv(paste(base_url, "DIAGNOSES_ICD.csv", sep=""),
                                    col_types = cols(row_id = col_double(), subject_id = col_double(), hadm_id = col_double(), seq_num = col_double(), icd9_code = col_character()))
mimic.d_icd.raw         <- read_csv(paste(base_url, "D_ICD_DIAGNOSES.csv", sep=""),
                                    col_types = cols(row_id = col_double(), icd9_code = col_character(), short_title = col_character(), long_title = col_character()))
#head(mimic.patients.raw)
#head(mimic.admissions.raw)
#head(mimic.diagnoses.raw)
#head(mimic.d_icd.raw)

## Häufigste Diagnosen ermitteln

In [3]:
head(mimic.diagnoses.raw %>%
    group_by(icd9_code) %>%
    summarize(n = n(), .groups = "keep") %>%
    arrange(desc(n)),
10)

icd9_code,n
<chr>,<int>
4019,53
42731,48
5849,45
4280,39
25000,31
51881,31
2724,29
5990,27
486,26
2859,25


## Bezeichner aus dem ICD9-Katalog ergänzen

In [4]:
head(mimic.diagnoses.raw %>%
    inner_join(mimic.d_icd.raw %>% select(icd9_code, short_title), by = "icd9_code") %>%
    group_by(icd9_code, short_title) %>%
    summarize(n = n(), .groups = "keep") %>%
    arrange(desc(n)),
10)

icd9_code,short_title,n
<chr>,<chr>,<int>
4019,Hypertension NOS,53
42731,Atrial fibrillation,48
5849,Acute kidney failure NOS,45
4280,CHF NOS,39
25000,DMII wo cmp nt st uncntr,31
51881,Acute respiratry failure,31
2724,Hyperlipidemia NEC/NOS,29
5990,Urin tract infection NOS,27
486,"Pneumonia, organism NOS",26
2859,Anemia NOS,25


In [5]:
icd9.hierarchy.raw <- icd9cm_hierarchy
icd9.hierarchy.raw$icd9_code <- as.character(icd9.hierarchy.raw$code)
head(icd9.hierarchy.raw)

Unnamed: 0_level_0,code,billable,short_desc,long_desc,three_digit,major,sub_chapter,chapter,icd9_code
Unnamed: 0_level_1,<chr>,<lgl>,<chr>,<chr>,<fct>,<fct>,<fct>,<fct>,<chr>
1,1,False,Cholera,Cholera,1,Cholera,Intestinal Infectious Diseases,Infectious And Parasitic Diseases,1
2,10,True,Cholera d/t vib cholerae,Cholera due to vibrio cholerae,1,Cholera,Intestinal Infectious Diseases,Infectious And Parasitic Diseases,10
3,11,True,Cholera d/t vib el tor,Cholera due to vibrio cholerae el tor,1,Cholera,Intestinal Infectious Diseases,Infectious And Parasitic Diseases,11
4,19,True,Cholera NOS,"Cholera, unspecified",1,Cholera,Intestinal Infectious Diseases,Infectious And Parasitic Diseases,19
5,2,False,Typhoid and paratyphoid fevers,Typhoid and paratyphoid fevers,2,Typhoid and paratyphoid fevers,Intestinal Infectious Diseases,Infectious And Parasitic Diseases,2
6,20,True,Typhoid fever,Typhoid fever,2,Typhoid and paratyphoid fevers,Intestinal Infectious Diseases,Infectious And Parasitic Diseases,20


## ICD9-Hierarchie ergänzen

### ICD9-Hierarchy aus dem R-Paket "icd.data" laden und String-Version des ICD-Codes ergänzen

In [6]:
icd9.hierarchy.raw <- icd9cm_hierarchy
icd9.hierarchy.raw$icd9_code <- as.character(icd9.hierarchy.raw$code)
head(icd9.hierarchy.raw)

Unnamed: 0_level_0,code,billable,short_desc,long_desc,three_digit,major,sub_chapter,chapter,icd9_code
Unnamed: 0_level_1,<chr>,<lgl>,<chr>,<chr>,<fct>,<fct>,<fct>,<fct>,<chr>
1,1,False,Cholera,Cholera,1,Cholera,Intestinal Infectious Diseases,Infectious And Parasitic Diseases,1
2,10,True,Cholera d/t vib cholerae,Cholera due to vibrio cholerae,1,Cholera,Intestinal Infectious Diseases,Infectious And Parasitic Diseases,10
3,11,True,Cholera d/t vib el tor,Cholera due to vibrio cholerae el tor,1,Cholera,Intestinal Infectious Diseases,Infectious And Parasitic Diseases,11
4,19,True,Cholera NOS,"Cholera, unspecified",1,Cholera,Intestinal Infectious Diseases,Infectious And Parasitic Diseases,19
5,2,False,Typhoid and paratyphoid fevers,Typhoid and paratyphoid fevers,2,Typhoid and paratyphoid fevers,Intestinal Infectious Diseases,Infectious And Parasitic Diseases,2
6,20,True,Typhoid fever,Typhoid fever,2,Typhoid and paratyphoid fevers,Intestinal Infectious Diseases,Infectious And Parasitic Diseases,20


## Aggregation auf Ebene der Unterkapitel des ICD9-Katalogs

In [7]:
head(mimic.diagnoses.raw %>%
    inner_join(icd9.hierarchy.raw, by = "icd9_code") %>%
    group_by(sub_chapter) %>%
    summarize(n = n(), .groups = "keep") %>%
    arrange(desc(n)),
10)

sub_chapter,n
<fct>,<int>
Other Forms Of Heart Disease,152
Other Metabolic And Immunity Disorders,134
Diseases Of The Blood And Blood-Forming Organs,82
"Nephritis, Nephrotic Syndrome, And Nephrosis",74
Hypertensive Disease,73
Other Diseases Of Respiratory System,66
Symptoms,63
Diseases Of Other Endocrine Glands,58
Other Bacterial Diseases,57
Persons With Potential Health Hazards Related To Personal And Family History,54
