In [None]:
#### Packages installation
library(dplyr)
library(tidyverse)
library(ggplot2)

In [None]:
meds <- read.delim("./tblMedNonHispF_codes.txt", sep=",", header = TRUE)


In [None]:
dim(meds)
head(meds)

In [None]:
icd <- read.delim("tblICD10_date_non_hispanics.txt", sep=",", header = TRUE)

In [None]:
dim(icd)

In [None]:
length(unique(icd$PATIENT_NUM))

In [None]:
head(icd)
dim(icd)

In [None]:
### clean and format the files
#format the data
meds_formatted <- meds %>%
filter( age_at_observation >= 65) %>%
mutate( RXNORM = sapply(strsplit( as.character(concept_cd), "[:]"), '[', 2), 
        med_date = sapply(strsplit( as.character(start_date), " "), '[', 1)) %>%
select( PATIENT_NUM, RXNORM, med_date ) %>%
unique()

head(meds_formatted)

In [None]:
icd_formatted <- icd %>%
mutate( ICD = sapply(strsplit( as.character(CONCEPT_CD), "[.]"), '[', 1), 
        ICD = gsub("ICD10CM:", "", ICD), 
        diag_date = sapply(strsplit( as.character(START_DATE), " "), '[', 1),
        demog = sapply(strsplit( as.character(HISPANIC_CD), "[:]"), '[', 2)) %>%
select( PATIENT_NUM, ICD, diag_date, demog ) %>%
unique()

head(icd_formatted)

In [None]:
### merge both files by patient and date
ICD_med <- merge( icd_formatted, meds_formatted, by="PATIENT_NUM")
head(ICD_med)


In [None]:
### estimate the date differences between diag date and med date
### select the ones that happend in 0-7 days difference 
ICD_med_filtered <- ICD_med %>%
                    mutate( days = as.Date(med_date) - as.Date(diag_date)) %>%
                    filter( days >= 0 & days <= 7)

In [None]:
head(ICD_med_filtered)

In [None]:
summary(as.numeric(ICD_med_filtered$days))
length(unique(ICD_med_filtered$PATIENT_NUM))
length(unique(ICD_med_filtered$RXNORM))

In [None]:
### read the concept dimension table
### filter by the RXNORM concepts, and use the concept path to aggregate the results in a meaningful 
### way without loosing information

In [None]:
concept_dimension <- read.delim("concept_dimension.txt", sep = ",", header = TRUE)

In [None]:
#filter by medications
concept_dimension <- concept_dimension %>% 
mutate( concept_type = sapply(strsplit( as.character(CONCEPT_PATH), "\\\\"), '[', 3)) %>%
filter( concept_type == 'MedicationsByVaClassV2_09302018')

In [None]:
head(concept_dimension)

In [None]:
# extract the codes at 3 different levels 
concept_dimension <- concept_dimension %>% 
mutate( concept_type_l1 = sapply(strsplit( as.character(CONCEPT_PATH), "\\\\"), '[', 5), 
       concept_type_l2 = sapply(strsplit( as.character(CONCEPT_PATH), "\\\\"), '[', 6))

head(concept_dimension)

In [None]:
### select the RXNORMS
rxnorms <- concept_dimension %>%
mutate( code_type = sapply(strsplit( as.character(CONCEPT_CD), "[:]"), '[', 1), 
        code      = sapply(strsplit( as.character(CONCEPT_CD), "[:]"), '[', 2)) %>%
filter( code_type == "RXNORM") %>%
select( code_type, code, description = NAME_CHAR, concept_type_l1, concept_type_l2) %>%
unique()

head(rxnorms)


In [None]:
level1_description <- concept_dimension %>%
filter( is.na( concept_type_l2 ) ) %>%
mutate( description_l1 = sapply(strsplit( as.character(NAME_CHAR), "[(]"), '[', 1)) %>%
filter( !is.na( concept_type_l1))%>%
select( concept_type_l1, description_l1 ) %>%
unique()

dim(level1_description)
head(level1_description)

In [None]:
level2_description <- concept_dimension %>%
mutate( code_type = sapply(strsplit( as.character(CONCEPT_CD), "[:]"), '[', 1)) %>%
filter( code_type == "VANDF" &
        !is.na( concept_type_l2)) %>%
mutate( description_l2 = sapply(strsplit( as.character(NAME_CHAR), "[(]"), '[', 1)) %>%
select( concept_type_l1, concept_type_l2, description_l2 ) %>%
unique()

head(level2_description)

In [None]:
### put together both tables, joined by concept_type_l1
aggregate_codes <- merge( level1_description, level2_description, by="concept_type_l1")
head(aggregate_codes)

In [None]:
## add it to the rxnorm table
rxnorms_complete <- merge( rxnorms, aggregate_codes, by = "concept_type_l2") %>%
select( code_type, RXNORM = code, description, concept_type_l1 = concept_type_l1.x, concept_type_l2, description_l1, description_l2)

In [None]:
head(rxnorms_complete)

In [None]:
rxnorm_dictionary <-rxnorms_complete 

In [None]:
save(rxnorm_dictionary, file="rxnorm_dictionary.RData")

In [None]:
rxnorms_complete %>% filter( RXNORM == "198051")

In [None]:
### we add this information to the ICD_med_filtered
dim(ICD_med_filtered)

ICD_med_filtered_codes_agg <- merge( ICD_med_filtered, rxnorms_complete, by = "RXNORM")

dim(ICD_med_filtered_codes_agg)
head(ICD_med_filtered_codes_agg)

In [None]:
missingCodes <- unique( ICD_med_filtered_codes_agg[! ICD_med_filtered_codes_agg$RXNORM %in% rxnorms_complete$RXNORM, "RXNORM"] )
presentCodes <- unique( ICD_med_filtered_codes_agg[ICD_med_filtered_codes_agg$RXNORM %in% rxnorms_complete$RXNORM, "RXNORM"] )


In [None]:
head(ICD_med_filtered_codes_agg)

In [None]:
## change the ICD codes for the description
## RD = Related Disorders
ICD_med_filtered_codes_agg$ICD <- gsub( "F10", "Alcohol RD", ICD_med_filtered_codes_agg$ICD)
ICD_med_filtered_codes_agg$ICD <- gsub( "F11", "Opioid RD", ICD_med_filtered_codes_agg$ICD)
ICD_med_filtered_codes_agg$ICD <- gsub( "F12", "Cannabis RD", ICD_med_filtered_codes_agg$ICD)
ICD_med_filtered_codes_agg$ICD <- gsub( "F13", "Sedative, hypnotic, or anxiolytic RD", ICD_med_filtered_codes_agg$ICD)
ICD_med_filtered_codes_agg$ICD <- gsub( "F14", "Cocaine RD", ICD_med_filtered_codes_agg$ICD)
ICD_med_filtered_codes_agg$ICD <- gsub( "F15", "Other stimulant RD", ICD_med_filtered_codes_agg$ICD)
ICD_med_filtered_codes_agg$ICD <- gsub( "F16", "Hallucinogen RD", ICD_med_filtered_codes_agg$ICD)
ICD_med_filtered_codes_agg$ICD <- gsub( "F17", "Nicotine dependence", ICD_med_filtered_codes_agg$ICD)
ICD_med_filtered_codes_agg$ICD <- gsub( "F18", "Inhalant RD", ICD_med_filtered_codes_agg$ICD)
ICD_med_filtered_codes_agg$ICD <- gsub( "F19", "Other psychoactive substance RD", ICD_med_filtered_codes_agg$ICD)

In [None]:
### analysis at level 1
level1_analysis <- ICD_med_filtered_codes_agg %>%
select( PATIENT_NUM, ICD, description_l1) %>%
unique()

dim(level1_analysis)

level2_analysis <- ICD_med_filtered_codes_agg %>%
select( PATIENT_NUM, ICD, description_l1, description_l2) %>%
unique()

dim(level2_analysis)

In [None]:
head(level1_analysis)

In [None]:
head(level2_analysis)

In [None]:
### Get the counts and create the heatmap

#### LEvel 1
counts_l1 <- level1_analysis %>%
group_by( ICD, description_l1 ) %>%
summarise( dist_pat = n_distinct(PATIENT_NUM)) %>%
arrange( desc( dist_pat) )

head(counts_l1)

counts_l2 <- level2_analysis %>%
group_by( ICD, description_l2, description_l1 ) %>%
summarise( dist_pat = n_distinct(PATIENT_NUM)) %>%
arrange( desc( dist_pat) )

head(counts_l2)

In [None]:
### heatmaps
### LEVEL 1
options(repr.plot.width=12, repr.plot.height=8)

ggplot(counts_l1, aes(x = ICD, y = description_l1, fill=dist_pat)) + 
  geom_tile() +
  scale_fill_gradient(low="white", high="blue") +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1), 
       axis.title.x = element_blank(), 
       axis.title.y = element_blank())

In [None]:
head(counts_l2)

In [None]:
### LEVEL 2
options(repr.plot.width=12, repr.plot.height=8)

####
counts_l2 %>%
filter( description_l1 == "Central Nervous System Medications ") %>%
ggplot( aes(x = ICD, y = description_l2, fill=dist_pat)) + 
  geom_tile() +
  scale_fill_gradient(low="white", high="orange") +
  facet_wrap( ~description_l1 )+
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1), 
       axis.title.x = element_blank(), 
       axis.title.y = element_blank())

In [None]:
options(repr.plot.width=12, repr.plot.height=8)

counts_l2 %>%
filter( description_l1 == "Cardiovascular Medications ") %>%
ggplot( aes(x = ICD, y = description_l2, fill=dist_pat)) + 
  geom_tile() +
  scale_fill_gradient(low="white", high="darkgreen") +
  facet_wrap( ~description_l1 )+
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1), 
       axis.title.x = element_blank(), 
       axis.title.y = element_blank())

In [None]:
### add the percentage (out of total patients diagnosed with an FX code, % prescribed with each medication)

counts_total <- level1_analysis %>%
group_by( ICD ) %>%
summarise( total_pat = n_distinct(PATIENT_NUM)) %>%
arrange( desc( total_pat) )

head(counts_total)


In [None]:
head(counts_l1)

In [None]:
### heatmaps percenage 
### LEVEL 1
perc_l1 <- counts_l1 %>%
              left_join( counts_total, by = "ICD") %>%
              mutate( perc_patients = round(100*dist_pat/total_pat,2))
head(perc_l1)

options(repr.plot.width=12, repr.plot.height=8)

ggplot(perc_l1, aes(x = ICD, y = description_l1, fill=perc_patients)) + 
  geom_tile() +
  scale_fill_gradient(limits = c(0,100), low="white", high="blue") +
    theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1), 
       axis.title.x = element_blank(), 
       axis.title.y = element_blank())

In [None]:
### LEVEL 2
perc_l2 <- counts_l2 %>%
              left_join( counts_total, by = "ICD") %>%
              mutate( perc_patients = round(100*dist_pat/total_pat,2))
head(perc_l2)

options(repr.plot.width=12, repr.plot.height=8)

####
perc_l2 %>%
filter( description_l1 == "Central Nervous System Medications ") %>%
ggplot( aes(x = ICD, y = description_l2, fill=perc_patients)) + 
  geom_tile() +
  scale_fill_gradient(low="white", high="orange") +
  facet_wrap( ~description_l1 )+
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1), 
       axis.title.x = element_blank(), 
       axis.title.y = element_blank())

#### Temporal trend

In [None]:
med_temp_trend <- ICD_med_filtered_codes_agg %>%
        mutate( year = sapply(strsplit( as.character(med_date), "-"), '[', 1)) %>%
        select( PATIENT_NUM, year, description, description_l1, description_l2) %>%
        unique()

head(med_temp_trend)

In [None]:
counts_med_year_l1 <- med_temp_trend %>%
    group_by( year, description_l1 ) %>%
    summarize( pat = n_distinct(PATIENT_NUM))

counts_med_year_l2 <- med_temp_trend %>%
    group_by( year, description_l2 ) %>%
    summarize( pat = n_distinct(PATIENT_NUM))


In [None]:
head(counts_med_year_l2)

In [None]:
options(repr.plot.width=12, repr.plot.height=8)

ggplot(counts_med_year_l1,  aes(x = year, y = description_l1, fill=pat)) + 
  geom_tile() +
  scale_fill_gradient(low="white", high="blue") +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1), 
       axis.title.x = element_blank(), 
       axis.title.y = element_blank())

In [None]:
options(repr.plot.width=12, repr.plot.height=8)

counts_med_year_l2 %>%
filter( pat > 40 ) %>%
ggplot( aes(x = year, y = description_l2, fill=pat)) + 
  geom_tile() +
  scale_fill_gradient(low="white", high="blue") +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1), 
       axis.title.x = element_blank(), 
       axis.title.y = element_blank())

In [None]:
head(perc_l1)

In [None]:
nonHispanic <- ls()
nonHispanic <- nonHispanic[nonHispanic %in% c("counts_med_year_l2", "counts_med_year_l1", "perc_l2", "perc_l1")]

save( list = nonHispanic, file = "non_hispanic_trendsInMedicationUse.RData") 
