## Sex differences in Autism Spectrum Disorder, a Comorbidity Pattern Analysis in National Scale Data:  (iii) results comparison

#### Results comparison
We create function that allows us to select those PheCodes that are statistically significant and more likely in ASD females when comparing "ASD females vs. ASD males" and "ASD females vs. females without ASD" and that are not more likely in females without ASD when comparing "females without ASD vs. males without ASD".

In [None]:
extractResults <- function( includeGroup1, includeGroup2, excludeGroup1, statSignificance, oddsRatio ){
    
    #include group 1
    dataSet1 <- includeGroup1[ includeGroup1$pAdjust < statSignificance &
                              as.numeric( as.character( includeGroup1$OR ) ) > oddsRatio & 
                              as.numeric( as.character( includeGroup1$OR ) ) != "Inf", ]
    
    print(paste0("Include group1 contains ", nrow( dataSet1), " statistically significant phenotypes" ) )
    
    #include group 2
    dataSet2 <- includeGroup2[ includeGroup2$pAdjust < statSignificance &
                              as.numeric( as.character( includeGroup2$OR ) ) > oddsRatio &
                              as.numeric( as.character( includeGroup2$OR ) ) != "Inf", ]
    
    print(paste0("Include group2 contains ", nrow( dataSet2 ), " statistically significant phenotypes" ) )
    
    #exclude group
    dataSetExclude1 <- excludeGroup1[ excludeGroup1$pAdjust < statSignificance & 
                                     as.numeric( as.character( excludeGroup1$OR ) ) > oddsRatio & 
                                     as.numeric( as.character( excludeGroup1$OR ) ) != "Inf", ]
    
    print(paste0("Control group 1 contains ", nrow( dataSetExclude1), " statistically significant phenotypes" ) )
    
    #look for phecodes in ASD females 
    excludePhenotypes <- dataSetExclude1$phecode
    commonPhenos      <- dataSet1[ dataSet1$phecode %in% dataSet2$phecode, "phecode" ]
    finalPhenos       <- commonPhenos[ ! commonPhenos %in% excludePhenotypes ]
    
    print(paste0("There are ", 
                 length( unique( finalPhenos ) ), 
                 " statistically significant phenotypes ASD- female characteristics"))
  
    #add the PheCode description of the selected PheCodes
    #PheCode description and category extracted from https://phewascatalog.org/files/phecode_definitions1.2.csv.zip
    pheinfoComplete <- read.csv( "phecode_definitions1.2.csv")
    pheinfoComplete <- pheinfoComplete[ , c( "phecode", "phenotype", "category" ) ]
    colnames( pheinfoComplete ) <- c( "phecode", "description", "group" )
    
    # for the PheCodes of interest, select the OR and p-value from the include group 1
    finalResults1 <- dataSet1[ dataSet1$phecode %in% finalPhenos, c("phecode", "OR", "pAdjust")]
    colnames(finalResults1) <- c("phecode", "Gender_OR", "Gender_pvalue")
    
    # for the PheCodes of interest, select the OR and p-value from the include group 2
    finalResults2 <- dataSet2[ dataSet2$phecode %in% finalPhenos, c("phecode", "OR", "pAdjust")]
    
    # merge both data.frames to create a final one with both results
    finalResults <- merge(finalResults1, finalResults2)
    finalResults <- merge( finalResults, pheinfoComplete)
    finalResults <- finalResults[, c( "group", "description","Gender_OR", "Gender_pvalue", "OR", "pAdjust", "phecode" )]
    
    return( finalResults)

}

Apply the previous function to extract the PheCodes for one of the age groups, for example, from 12 to 18 years old.
- First we load the result data obtained (Notebook 2.ComorbidityAnalysis)
- Then we apply the "extractResults" function 

In [None]:
load("phewasResultASD12to18.RData")
ASD12to18 <- finalTable
rm(finalTable)
load("phewasResultNonASD12to18.RData")
nonASD12to18 <- finalTable
rm(finalTable)
load("phewasResultFemaleCompare12to18.RData")
femalesCompare <- finalTable
rm(finalTable)

In [None]:
from12to18 <- extractResults( includeGroup1 = ASD12to18,
                              includeGroup2 = femalesCompare,
                              excludeGroup1 = nonASD12to18,
                              statSignificance = 0.01,
                              oddsRatio = 1.5)

save( from12to18, file = "from12to18.RData")