# Medication Summary Pediatrics

This notebook contain the R code generate a table that summarized for each of the drug classes:
- Number of sites treating at least 3 patients with the drug class during hospitalization
- Countries treating at least 3 patient with the drug class
- Total number of patients treated with the drug class during hospitalization in the previous sites

First we determine the path where all the files are located and load the R libraries required. 

In [None]:
rm(list=ls())
# Set working directory where the files are
setwd("./4CE/phase1.1/latest/")

#############
# LIBRARIES #
#############
library(dplyr)
library(tidyr)

Then we create a function to define the list with all the files to analyze.

In [None]:
fileList <- function( path, pattern, pediatric ){
  
  fileListInput <- list.files( path = path,pattern = pattern)
  fileListInput  <- fileListInput[! grepl( paste( c("FICHOS","VA.csv","BCH.csv","CHOP.csv", 
                                                    "RP401.csv") , collapse = "|"), x = fileListInput)]
  
  if( pediatric == TRUE){
    fileListInput  <- fileListInput[ grepl( paste( c("PED", "UNCCH"), collapse = "|"), x = fileListInput)]
    fileListInput <- fileListInput[! grepl("APHPPED.csv", x = fileListInput) ]
  }else{
    fileListInput  <- fileListInput[ !grepl( paste( c("PED", "UNCCH"), collapse = "|"), x = fileListInput)]
  }
  return( fileListInput)
}

We create the list with all the pediatric files.

In [None]:
select_pediatric = TRUE
fileList <- fileList( path = "./phase1.1/latest/",pattern = "Med", pediatric = select_pediatric)

### Summary table

In [None]:
summaryTable <- as.data.frame( matrix( ncol= 3, nrow=length( fileList )))
colnames(summaryTable) <- c("Site", "DistinctNumberOfMedications",
                            "medication_list"
)


obfuscation <- read.delim( file   = "./phase1.1_pediatric/pediatric_obfuscation.txt", 
                           header = TRUE, sep = "\t")

for( i in 1:length( fileList ) ){
  print(i)
  selection <- read.delim( paste0( "./phase1.1/latest/", fileList[i]), sep = ",", colClasses = "character")
  if(nrow(selection) == 0){
    next()
  }
  else if(nrow(selection > 0)){
    if( grepl( "UNC", x=fileList[i], fixed = TRUE) == TRUE ){
      selection[ selection == 10] <- -99 
    }
    colnames(selection) <- tolower( colnames( selection ) )
    obf <- obfuscation[ tolower(obfuscation$siteid) == tolower(selection$siteid[1]), ]
    
    #name of the size
    summaryTable$Site[i] <-  as.character(selection$siteid[1])
    
    #number of distinct medications 
    summaryTable$DistinctNumberOfMedications[i] <- nrow( selection )
    
    #list of distinct medications 
    summaryTable$medication_list[i] <- paste( unique(selection$med_class),
                                              collapse = ",")
    
    
    #merge all the files in 1
    if( i== 1){
      if( obf$obfuscation != "none"){
        selection[ selection == -99 ] <- 0.5 * as.numeric( obf$obfuscation )
    }
      allMedication <- selection
    }else{ 
      if( obf$obfuscation != "none"){
        selection[ selection == -99 ] <- 0.5 * as.numeric( obf$obfuscation )
      }
      allMedication <- rbind( allMedication, selection )
    }
  }
}

allMedication[ allMedication == -999 ] <- NA
allMedication[,c(3:6)] <- sapply(allMedication[,c(3:6)],as.numeric)

We are only interested on those medications
- during the hospitalization (after admission)
- treating at least 3 patients

In [None]:
allMedication <- allMedication[, c(1,2,4)]
allMedication <- allMedication[! is.na(allMedication$num_patients_all_since_admission), ]
allMedication <- allMedication[ allMedication$num_patients_all_since_admission >= 3, ]

We estimate the total number of patients prescribed with each medication.

In [None]:
byDrugClass <- as.data.frame( allMedication[-1] %>% 
  group_by(med_class) %>% 
  dplyr::summarise_all(~{sum(.x, na.rm = any(!is.na(.x)))}))
colnames(byDrugClass) <- c("DrugClass", "TotalPatients")

We estimate how many sites report each medication based on the previous conditions

In [None]:
medCounts <- as.data.frame( table( allMedication$med_class ))
colnames(medCounts) <- c("DrugClass", "DistincSites")

We determine which are the countries of those sites.

In [None]:
siteMaping <- read.delim("./phase1.1/mappingFiles/SiteID_Map_Pediatric_07-23-20.csv", colClasses = "character", sep=",")
siteMaping <- siteMaping[,c("Acronym", "Country")]
allMedication <- merge( allMedication, siteMaping, by.x = "siteid", by.y="Acronym", all.x = TRUE)

allMedCountry <- unique( allMedication[ , c("med_class", "Country")] )
allMedCountry <- allMedCountry[ order( allMedCountry$med_class, decreasing = TRUE),]

medOutput <- as.data.frame( table( allMedCountry$med_class ))
medOutput$countries <- NA

for( i in 1:nrow(medOutput)){
  medOutput$countries[i] <- paste(unique(sort(allMedCountry[ allMedCountry$med_class == medOutput$Var1[i], "Country"])), collapse = ",")
}
colnames(medOutput) <- c("DrugClass", "DistincCountries", "Countries")

We put all the information together in a table. 

In [None]:
finalMed <- merge( medCounts, medOutput)
finalMed <- merge( finalMed, byDrugClass)
finalMed <- finalMed[, c("DrugClass", "DistincSites", "Countries", "TotalPatients")]
write.table( finalMed, file="./table2.txt", col.names = TRUE, row.names = FALSE, sep = "\t", quote = FALSE)