## Sex differences in Autism Spectrum Disorder, a Comorbidity Pattern Analysis in National Scale Data:  (ii) comorbidity analysis

### Example: ASD females vs ASD males

#### Extract the list of phenotypes present in the patients for this specific each group

In [None]:
asdPhenotypesM <- dbGetQuery(cn,  "
           SELECT
           DISTINCT  (PheCode)  FROM
           ASDmales12to18Comorbidities")

asdPhenotypesF <- dbGetQuery(cn,  "
           SELECT
           DISTINCT  (PheCode) FROM
           ASDfem12to18Comorbidities")

#### Count times phecode assigned to each patient

- Create a table where each row contains one of the phenotypes, and in the columns we will have the counts: how many patients present the phecode 3 or more times, how many present the phecode 1 or 2 times, and how many do not present the phecode.

- To do this, we perform a query to count those patients directly to the table created in the database. 

In [None]:
totalPhewasCodes <- unique(c(asdPhenotypesF$PheCode,asdPhenotypesM$PheCode))
myPhewasResults <- as.data.frame( matrix(ncol=7, nrow=length(totalPhewasCodes)))
colnames( myPhewasResults ) <- c("phecode", "femYes", "femNo", "femExclude", "maleYes", "maleNo", "maleExclude")
myPhewasResults$phecode <- as.character( totalPhewasCodes )

for(i in 1:nrow(myPhewasResults ) ){
  print(i)
  myPhewasResults$femYes[i] <- dbGetQuery(cn,  paste0("
           SELECT COUNT ( DISTINCT  MemberId ) 
           FROM ASDfem12to18Comorbidities 
           WHERE PheCode = '",
           myPhewasResults$phecode[i],"' AND Count >= 3"))
    
  myPhewasResults$femExclude[i] <- dbGetQuery(cn,  paste0("
           SELECT COUNT ( DISTINCT  MemberId ) 
           FROM ASDfem12to18Comorbidities 
           WHERE PheCode = '",
           myPhewasResults$phecode[i],"' AND ( Count = 1 OR Count = 2) "))

  myPhewasResults$femNo[i] <- dbGetQuery(cn,"
           SELECT COUNT ( DISTINCT  MemberId ) 
           FROM ASDfem12to18Comorbidities ") - myPhewasResults$femExclude[i] -  myPhewasResults$femYes[i]
  
   myPhewasResults$maleYes[i] <- dbGetQuery(cn,  paste0("
           SELECT COUNT ( DISTINCT  MemberId ) 
           FROM ASDmales12to18Comorbidities 
           WHERE PheCode = '",
           myPhewasResults$phecode[i],"' AND Count >= 3"))
     
     
  myPhewasResults$maleExclude[i] <- dbGetQuery(cn,  paste0("
           SELECT COUNT ( DISTINCT  MemberId ) 
           FROM ASDmales12to18Comorbidities 
           WHERE PheCode = '",
           myPhewasResults$phecode[i], "' AND ( Count = 1 OR Count = 2) "))
    
    myPhewasResults$maleNo[i] <-  dbGetQuery(cn,"
           SELECT COUNT ( DISTINCT  MemberId ) 
           FROM ASDmales12to18Comorbidities ") - myPhewasResults$maleExclude[i] -  myPhewasResults$maleYes[i]
  
}

#### Statistically significant results 

- Once we have the table with all the counts, we perform a Fisher Exact Test for each phenotype and we look for the statistically significant results. 

In [None]:
get_fisher <- function(df){
  mat <- matrix(as.numeric(unlist(df[c(2,3,5,6)])), ncol=2)
  f <- fisher.test(as.table(mat), alt="two.sided")
  return(c(unlist(df[1]), f$p.value, f$conf.int, f$estimate))
}

fishers <- t(apply(myPhewasResults, 1,  get_fisher))
colnames(fishers) <- c("phecode", "pValue", "confIntL", "confIntH", "OR" )

finalTable         <- merge( myPhewasResults, fishers, by = "phecode" )
finalTable$pAdjust <- p.adjust( as.numeric( as.character( finalTable$pValue ) ), method = "bonferroni" ) 
finalTableStat     <- finalTable[ as.numeric( finalTable$pAdjust ) < 0.01, ]
finalTableStat     <- finalTableStat[ as.numeric( as.character( finalTableStat$OR ) ) > 1.5 & 
                                     finalTableStat$OR != "Inf", ]