# Clean up ICD code annotations

For easier interpretation of ICD code-related files, this script generates a cleaned-up version with the fields Code, Code_Description, Phecode_Description, and Excl_Phenotypes.

In [None]:
# Load libraries
suppressMessages(library(data.table))
suppressMessages(library(dplyr))
setwd('/home/jupyter/workspaces/ebvphewas')

Load in the files (queried from `01_Query_PheWAS_inputs`):

In [None]:
ICD10_Phecodes <- fread("Phecode_map_v1_2_icd10cm_beta.csv")
ICD9_Phecodes <- fread("ICDPhecodes")

Check that there are no codes overlapping between the two:

In [None]:
intersect(ICD10_Phecodes$ICD10CM, ICD9_Phecodes$ICD9)

Collapse both dataframes into one:

In [None]:
ICD10_Phecodes <- ICD10_Phecodes %>%
select(-PHECODE, -`Excl. Phecodes`) %>% 
dplyr::rename(id_one = ICD10CM,
              Code_Description = ICD10CM_DESCRIPTION,
              Phecode_Description = PHECODE_DESCRIPTION,
              Excl_Phenotypes = `Excl. Phenotypes`) %>%
mutate(Code = "ICD10CM")

head(ICD10_Phecodes)

In [None]:
ICD9_Phecodes <- ICD9_Phecodes %>%
select(-PheCode, -`Excl. Phecodes`, -Rollup, -Leaf, -`Ignore Bool`) %>% 
dplyr::rename(id_one = ICD9,
              Code_Description = `ICD9 String`,
              Phecode_Description = Phenotype,
              Excl_Phenotypes = `Excl. Phenotypes`) %>%
mutate(Code = "ICD9")

head(ICD9_Phecodes)

In [None]:
ICD_Phecodes <- rbind(ICD10_Phecodes, ICD9_Phecodes)
head(ICD_Phecodes)

In [None]:
fwrite(ICD_Phecodes, "ICD_Phecodes_All.csv")