In [None]:
######################
## ICD descriptions ##
######################
ncats <- read.csv("./data_A_NCATS_ICD10_ICD9_DX_V1_MAP.csv")
ncats$icd_code <- sapply(strsplit( as.character(ncats$ACT_CONCEPT_CD), "[:]"), '[', 2)
ncats$Version <- sapply(strsplit( as.character(ncats$ACT_CONCEPT_CD), "[:]"), '[', 1)
ncats$icd_version <- substr( x = ncats$Version, 4, nchar( ncats$Version ))
ncats$icd_version <- gsub( "CM", "", ncats$icd_version)
ncats$Category <- sapply(strsplit( as.character(ncats$ACT_CONCEPT_PATH), "[\\]"), '[', 3)
ncats$Category <- sapply(strsplit( as.character(ncats$Category), "[(]"), '[', 1)
ncats$icd_code <- ifelse( ncats$icd_version == 9, gsub("[.]", "", ncats$icd_code), ncats$icd_code)

icd_mapping <- ncats[, c("icd_code", "icd_version", "ACT_NAME_CHAR", "Category")]
icd_mapping <- icd_mapping[! duplicated( icd_mapping ), ]

In [None]:
#####################
## Diagnosis files ##
#####################

diagnosisCombined <- read.csv("./Diagnoses-Combined200405.csv", sep = ",", header = TRUE, colClasses = "character")
diagnosisCombined <- diagnosisCombined[, c("siteid", "icd_code", "icd_version", "num_patients")]
diagnosisCombined$perc_patients <- round(100*(as.numeric( diagnosisCombined$num_patients ) / 15637 ),2)

  
diagnosisPerCountry <- read.delim("./Diagnoses-CombinedByCountry200405.csv", sep = ",", header = TRUE, colClasses = "character")
diagnosisPerCountry <- diagnosisPerCountry[, c("siteid", "icd_code", "icd_version", "num_patients") ]

demographicsPerCountry <- read.delim("./Demographics-CombinedByCountry200405.csv", 
                                    sep = ",", header = TRUE, colClasses = "character")
demographicsPerCountry <- demographicsPerCountry[demographicsPerCountry$sex == "All", c("siteid", "total_patients") ]

diagnosisPerCountry$siteid <- as.character( diagnosisPerCountry$siteid )

diagnosisPerCountry <- merge( diagnosisPerCountry, demographicsPerCountry, by = "siteid")
diagnosisPerCountry$perc_patients <- round(100*(as.numeric( diagnosisPerCountry$num_patients ) /  as.numeric( diagnosisPerCountry$total_patients )),2)
diagnosisPerCountry <- diagnosisPerCountry[, c("siteid", "icd_code", "icd_version", "num_patients", "perc_patients")]

diagnosis <- rbind( diagnosisCombined, diagnosisPerCountry)

In [None]:
#########################################################
### Filter the ICD by percentage of affected patients ###
#########################################################
percentageFilter = 0.5
selection <- diagnosis[ as.numeric( diagnosis$perc_patients) > percentageFilter, ]

In [None]:
##################################################
## Heatmap with ICD description and categories ##
#################################################
selectionDesc <- merge( selection, icd_mapping, all.x = TRUE, by = "icd_code")
selectionDesc$description <- ifelse( is.na(selectionDesc$ACT_NAME_CHAR), as.character(selectionDesc$icd_code),
                                     as.character(selectionDesc$ACT_NAME_CHAR))

selectionDesc$Category <- factor(selectionDesc$Category, levels = c("Certain conditions originating in the perinatal period ",  
                                                                   "Congenital malformations, deformations and chromosomal abnormalities ", 
                                                                   "Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere classified ",
                                                                   "Certain infectious and parasitic diseases ",
                                                                   "Factors influencing health status and contact with health services ",
                                                                   "Pregnancy, childbirth and the puerperium ", 
                                                                   "Diseases of the nervous system ", 
                                                                   "Diseases of the blood and blood-forming organs and certain disorders involving the immune mechanism ",
                                                                   "Diseases of the circulatory system ",
                                                                   "Neoplasms ", 
                                                                   "Diseases of the musculoskeletal system and connective tissue "  ,
                                                                   "Injury, poisoning and certain other consequences of external causes ", 
                                                                   "Diseases of the digestive system ", 
                                                                   "Diseases of the respiratory system ", 
                                                                   "Diseases of the genitourinary system ",
                                                                   "Endocrine, nutritional and metabolic diseases ", 
                                                                   "Diseases of the ear and mastoid process ", 
                                                                   "Diseases of the eye and adnexa ",
                                                                   "Mental and behavioral disorders ",
                                                                   "Diseases of the skin and subcutaneous tissue ",
                                                                   "External causes of morbidity "), 
                  labels = c("Perinatal",
                             "Congenital&Chromosomal",
                             "Symptoms",
                             "Infectious",
                             "Factors influencing health status",
                             "Pregnance, childbirth",
                             "Nervous",
                             "Blood",
                             "Circulatory", 
                             "Neoplams",
                             "Musculoskeletal",
                             "Injury,poisoning",
                             "Digestive",
                             "Respiratory",
                             "Genitourinary",
                             "Endocrine/metabolic",
                             "Ear and mastoid",
                             "Eye and adnexa",
                             "Mental",
                             "Skin",
                             "External causes"))

ggplot(selectionDesc, aes(y=description, x=siteid, fill=siteid, alpha=perc_patients)) +
  geom_tile() + 
  scale_fill_manual(values = c("Italy" = "#009E73", "France" = "#0072B2", "Germany" = "#E69F00", "USA" = "#D55E00", "Combined" = "#444444")) +
  theme_bw() +
  theme(panel.grid=element_blank(), axis.text.y  = element_text(size=5),plot.title = element_text(size=7)) +
  coord_cartesian(expand=FALSE)+
  facet_grid(Category ~ ., scales = "free")+theme(
    strip.text.y = element_text(
      size = 5))+
  labs(title = "Diagnoses (Date 2020-04-05) | 12 sites | 15,637 patients ")

In [None]:
################################################
## Split the heatmap in 3 different heatmaps ##
###############################################

### the ones that cannot be mapped (ICD10, ICD-CM 2020)
subsetNa <- selectionDesc[ is.na(selectionDesc$Category),]
subsetNaIcd9 <- subsetNa[ subsetNa$icd_version.x == 9,]
## for the ones mapping, split in respiratory, infection, etc and the rest
subsetNoNa <- selectionDesc[! is.na(selectionDesc$Category),]

subset1 <- subsetNoNa[! subsetNoNa$Category %in% 
                                       c("Respiratory",
                                         "Infectious", 
                                         "Symptoms", 
                                         "Factors influencing health status"),]

subset2 <- subsetNoNa[ subsetNoNa$Category %in% 
                           c("Respiratory",
                             "Infectious", 
                             "Symptoms", 
                             "Factors influencing health status"),]

ggplot(subset1, aes(y=description, x=siteid, fill=siteid, alpha=perc_patients)) +
  geom_tile() + 
  scale_fill_manual(values = c("Italy" = "#009E73", "France" = "#0072B2", "Germany" = "#E69F00", "USA" = "#D55E00", "Combined" = "#444444")) +
  theme_bw() +
  theme(panel.grid=element_blank(), axis.text.y  = element_text(size=5), plot.title = element_text(size=7)) +
  coord_cartesian(expand=FALSE)+
  facet_grid(Category ~ ., scales = "free")+theme(
    strip.text.y = element_text(
      size = 5))+
  labs(title = "Diagnoses (Date 2020-04-05) | 12 sites | 15,637 patients ")


ggplot(subset2, aes(y=description, x=siteid, fill=siteid, alpha=perc_patients)) +
  geom_tile() + 
  scale_fill_manual(values = c("Italy" = "#009E73", "France" = "#0072B2", "Germany" = "#E69F00", "USA" = "#D55E00", "Combined" = "#444444")) +
  theme_bw() +
  theme(panel.grid=element_blank(), axis.text.y  = element_text(size=5), plot.title = element_text(size=7)) +
  coord_cartesian(expand=FALSE)+
  facet_grid(Category ~ ., scales = "free")+theme(
    strip.text.y = element_text(
      size = 5))+
  labs(title = "Diagnoses (Date 2020-04-05) | 12 sites | 15,637 patients ")


ggplot(subsetNa, aes(y=description, x=siteid, fill=siteid, alpha=perc_patients)) +
  geom_tile() + 
  scale_fill_manual(values = c("Italy" = "#009E73", "France" = "#0072B2", "Germany" = "#E69F00", "USA" = "#D55E00", "Combined" = "#444444")) +
  theme_bw() +
  theme(panel.grid=element_blank(), axis.text.y  = element_text(size=5),plot.title = element_text(size=7)) +
  coord_cartesian(expand=FALSE)+
  facet_grid(Category ~ ., scales = "free")+theme(
    strip.text.y = element_text(
      size = 5))+
  labs(title = "Diagnoses (Date 2020-04-05) | 12 sites | 15,637 patients ")

In [None]:
######################
## Barplot with ICD ##
######################
ggplot(data=selection, aes(x=reorder(icd_code,perc_patients), y=perc_patients)) +
  geom_bar(aes(fill= siteid), stat="identity", position=position_dodge()) + 
  theme_bw()+
  theme(axis.text.x = element_text(angle =45, hjust = 1), axis.text.y = element_text(size=5))+
  labs(title = paste0("Number of patients by ICD9/ICD10 code (>=",min(selection$num_patients),"patients)" ), 
       x = "ICD9/ICD10 code", y = "number of patients")+ coord_flip()+
  scale_fill_manual("legend", values = c("Italy" = "#009E73", "France" = "#0072B2", "Germany" = "#E69F00", "USA" = "#D55E00", "Combined" = "#444444"))


In [None]:
######################
## Heatmap with ICD ##
######################
ggplot(selection, aes(y=icd_code, x=siteid, fill=siteid, alpha=perc_patients)) +
  geom_tile(colour="white", size=1) + 
  scale_fill_manual(values = c("Italy" = "#009E73", "France" = "#0072B2", "Germany" = "#E69F00", "USA" = "#D55E00", "Combined" = "#444444")) +
  theme_bw() +
  theme(panel.grid=element_blank()) +
  coord_cartesian(expand=FALSE)