## Sex differences in Autism Spectrum Disorder, a Comorbidity Pattern Analysis in National Scale Data:  (iii) results comparison

#### Results comparison
We create function that allows us to select those PheCodes that are statistically significant and more likely in ASD females when comparing "ASD females vs. ASD males" and "ASD females vs. females without ASD" and that are not more likely in females without ASD when comparing "females without ASD vs. males without ASD".

In [None]:
createCompleteTable <- function( includeGroup1, includeGroup2, 
                                 excludeGroup1,
                                 statSignificance, oddsRatio, ageGroup, 
                                 extractExcluded = FALSE){
  
  dataSet1 <- includeGroup1[ includeGroup1$pAdjust < statSignificance &
                               as.numeric(as.character(includeGroup1$OR)) > 
                            oddsRatio & as.numeric(as.character(includeGroup1$OR)) != "Inf", ]
  print(paste0("Include group1 contains ", nrow( dataSet1), " statistically significant phenotypes"))
  
  dataSet2 <- includeGroup2[ includeGroup2$pAdjust < statSignificance &
                               as.numeric(as.character(includeGroup2$OR)) > 
                            oddsRatio & as.numeric(as.character(includeGroup2$OR)) != "Inf", ]
  print(paste0("Include group2 contains ", nrow( dataSet2), " statistically significant phenotypes"))
  
  dataSetExclude1 <- excludeGroup1[ excludeGroup1$pAdjust < statSignificance & 
                                   as.numeric(as.character(excludeGroup1$OR)) > 
                                   oddsRatio & as.numeric(as.character(excludeGroup1$OR)) != "Inf", ]
  
  print(paste0("Control group 1 contains ", nrow( dataSetExclude1), " statistically significant phenotypes"))
  
  
  excludePhenotypes <- dataSetExclude1$phecode
  
  commonPhenos <- dataSet1[ dataSet1$phecode %in% dataSet2$phecode, "phecode"]
  finalPhenos <- commonPhenos[! commonPhenos %in% excludePhenotypes ]
  
  print(paste0("There are ", length( unique( finalPhenos )), " statistically significant phenotypes ASD- female characteristics"))
  
  if( extractExcluded == TRUE ){
    exclusion <- excludePhenotypes[ excludePhenotypes %in% commonPhenos ]
    load("pheinfoComplete.rdata")
    pheinfoComplete <- pheinfoComplete[ , c("phecode", "description", "group")]
    excluded <- pheinfoComplete[ pheinfoComplete$phecode %in% exclusion, ] 
    
    return(excluded)
    end()
  }
  
  load("pheinfoComplete.rdata")
  pheinfoComplete <- pheinfoComplete[ , c("phecode", "description", "group")]
  
  
  dataSet1$phenoPresent <- paste0( dataSet1$femYes + dataSet1$maleYes, "(", dataSet1$femYes, "/", dataSet1$maleYes, ")")
  dataSet1$phenoAbsent <- paste0( dataSet1$femNo + dataSet1$maleNo, "(", dataSet1$femNo, "/", dataSet1$maleNo, ")")
  dataSet1$phenoExclude <- paste0( dataSet1$femExclude + dataSet1$maleExclude, "(", dataSet1$femExclude, "/", dataSet1$maleExclude, ")")
  dataSet1$ci <- paste0( "(", round(as.numeric(as.character(dataSet1$confIntL)), 3), ", ", round(as.numeric(as.character(dataSet1$confIntH)), 3), ")")
  dataSet1$OR <- round(as.numeric(as.character(dataSet1$OR)), 3)
  
  finalResults1 <- dataSet1[ dataSet1$phecode %in% finalPhenos, 
                            c("phecode", "OR", "ci" ,"pAdjust","phenoPresent", "phenoAbsent", "phenoExclude")]
  colnames(finalResults1) <- c("phecode", "Gender_OR", "Gender_ci" ,"Gender_adjPvalue","Gender_phenoPresent", "Gender_phenoAbsent", "Gender_phenoExclude")
  
  dataSet2$phenoPresent <- paste0( as.numeric(dataSet2$femYes) + as.numeric(dataSet2$controlFemaleYes), "(", as.numeric(dataSet2$femYes), "/", as.numeric(dataSet2$controlFemaleYes), ")")
  dataSet2$phenoAbsent <- paste0( as.numeric(dataSet2$femNo) + as.numeric(dataSet2$controlFemaleNo), "(", as.numeric(dataSet2$femNo), "/", as.numeric(dataSet2$controlFemaleNo), ")")
  dataSet2$phenoExclude <- paste0( as.numeric(dataSet2$femExclude) + as.numeric(dataSet2$controlFemaleExclude), "(", as.numeric(dataSet2$femExclude), "/", as.numeric(dataSet2$controlFemaleExclude), ")")
  dataSet2$ci <- paste0( "(", round(as.numeric(as.character(dataSet2$confIntL)), 3), ", ", round(as.numeric(as.character(dataSet2$confIntH)), 3), ")")
  dataSet2$OR <- round(as.numeric(as.character(dataSet2$OR)), 3)
  
  finalResults2 <- dataSet2[ dataSet2$phecode %in% finalPhenos, 
                            c("phecode", "OR", "ci", "pAdjust" ,"phenoPresent", "phenoAbsent", "phenoExclude")]
  colnames(finalResults2) <- c("phecode", "Dis_OR", "Dis_ci" , "Dis_adjPvalue","Dis_phenoPresent", "Dis_phenoAbsent", "Dis_phenoExclude")
  
  finalResults <- merge(finalResults1, finalResults2)
  finalResults <- merge( finalResults, pheinfoComplete)
  finalResults$ageGroup <- ageGroup
  finalResults <- finalResults[, c("ageGroup","group", "description", "Gender_OR", "Gender_ci","Gender_adjPvalue" ,"Gender_phenoPresent", "Gender_phenoAbsent", "Gender_phenoExclude", "Dis_OR", "Dis_ci", "Dis_adjPvalue" ,"Dis_phenoPresent", "Dis_phenoAbsent", "Dis_phenoExclude" )]
  
  finalResults <- finalResults[ order(finalResults$Gender_OR, decreasing = TRUE), ]
  return( finalResults)
  
}

Apply the previous function to extract the PheCodes for one of the age groups, for example, from 12 to 18 years old.
- First we load the result data obtained (Notebook 2.ComorbidityAnalysis)
- Then we apply the "createCompleteTable" function 

In [None]:
load("phewasResultASD12to18.RData")
ASD12to18 <- finalTable
rm(finalTable)
load("phewasResultNonASD12to18.RData")
nonASD12to18 <- finalTable
rm(finalTable)
load("phewasResultFemaleCompare12to18.RData")
femalesCompare <- finalTable
rm(finalTable)

In [None]:
from12to18 <- createCompleteTable( includeGroup1 = ASD12to18,
                              includeGroup2 = femalesCompare,
                              excludeGroup1 = nonASD12to18,
                              statSignificance = 0.01,
                              oddsRatio = 1.5)

save( from12to18, file = "from12to18.RData")

The "createCompleteTable" function also allows us to extract those phenotype categories that were excluded because although being statistically significant and with OR > 1.5 when comparing ASD females vs ASD males, and when comparing ASD females vs non-ASD females, they were also significant in non ASD females vs non ASD males. To extract the excluded phenotypes, we need to set up the argument "extractExcluded"to TRUE. 

In [None]:
createCompleteTable( includeGroup1 = ASD0to2,
                     includeGroup2 = femalesCompare,
                     excludeGroup1 = nonASD0to2,
                     statSignificance = 0.01,
                     oddsRatio = 1.5, 
                     ageGroup  = "0 to 2", 
                     extractExcluded = TRUE)