# Generate phenotypes

In [1]:
suppressMessages({library(SummarizedExperiment)
                  library(dplyr)
                  library(sva)})

## Function

In [2]:
# Function from jaffelab github
merge_rse_metrics <- function(rse) {
    stopifnot(is(rse, 'RangedSummarizedExperiment'))

    rse$overallMapRate = mapply(function(r, n) {
        sum(r*n)/sum(n)
    }, rse$overallMapRate, rse$numReads)
    rse$mitoRate = mapply(function(r, n) {
        sum(r*n)/sum(n)
    }, rse$mitoRate, rse$numMapped)
    rse$rRNA_rate = mapply(function(r, n) {
        sum(r*n)/sum(n)
    }, rse$rRNA_rate, rse$numMapped)
    rse$totalAssignedGene = mapply(function(r, n) {
        sum(r*n)/sum(n)
    }, rse$totalAssignedGene, rse$numMapped)

    rse$numMapped = sapply(rse$numMapped, sum)
    rse$numReads = sapply(rse$numReads, sum)
    rse$numUnmapped = sapply(rse$numUnmapped, sum)
    rse$mitoMapped = sapply(rse$mitoMapped, sum)
    rse$totalMapped = sapply(rse$totalMapped, sum)
    return(rse)
}

## Main

In [3]:
fields = c('BrNum', 'RNum', 'Region', 'RIN', 'Age', 'Sex', 'Race', 
           'Dx', 'mitoRate', 'rRNA_rate', 'overallMapRate')

In [4]:
load("../../counts/_m/degradation_rse_phase3_caudate.rda")
dm <- assays(cov_rse_caudate)$counts
qSV <- qsva(dm)

if("TRUE" %in% grepl("_", rownames(qSV))){# Remove underscore if needed                       
    rownames(qSV) <- sapply(strsplit(rownames(qSV), "_"), "[", 1)
}
qSV %>% as.data.frame %>% write.csv("qSV_caudate.csv")

load('../../counts/_m/caudate_brainseq_phase3_hg38_rseGene_merged_n464.rda')

rse_gene <- merge_rse_metrics(rse_gene)
colData(rse_gene)$RIN = sapply(colData(rse_gene)$RIN,"[",1)

colData(rse_gene)[,fields] %>% 
    as.data.frame %>% write.csv(file = 'caudate_phenotypes.csv')

In [5]:
load("../../counts/_m/degradation_rse_phase2_dlpfc.rda")
dm <- assays(cov_rse_dlpfc)$counts
qSV <- qsva(dm)

if("TRUE" %in% grepl("_", rownames(qSV))){# Remove underscore if needed                       
    rownames(qSV) <- sapply(strsplit(rownames(qSV), "_"), "[", 1)
}
qSV %>% as.data.frame %>% write.csv("qSV_dlpfc.csv")

load('../../counts/_m/dlpfc_ribozero_brainseq_phase2_hg38_rseGene_merged_n453.rda')

rse_gene <- merge_rse_metrics(rse_gene)
colData(rse_gene)$RIN = sapply(colData(rse_gene)$RIN,"[",1)

colData(rse_gene)[,fields] %>% 
    as.data.frame %>% write.csv(file = 'dlpfc_phenotypes.csv')

In [6]:
load("../../counts/_m/degradation_rse_phase2_hippo.rda")
dm <- assays(cov_rse_hippo)$counts
qSV <- qsva(dm)

if("TRUE" %in% grepl("_", rownames(qSV))){# Remove underscore if needed                       
    rownames(qSV) <- sapply(strsplit(rownames(qSV), "_"), "[", 1)
}
qSV %>% as.data.frame %>% write.csv("qSV_hippo.csv")

load("../../counts/_m/hippo_brainseq_phase2_hg38_rseGene_merged_n447.rda")

rse_gene <- merge_rse_metrics(rse_gene)
colData(rse_gene)$RIN = sapply(colData(rse_gene)$RIN,"[",1)

colData(rse_gene)[,fields] %>% as.data.frame %>% 
    write.csv(file = 'hippo_phenotypes.csv')

In [7]:
load("../../counts/_m/degradation_rse_dg_hippo_n263.rda")

dm <- assays(cov_rse)$counts
qSV <- qsva(dm)

if("TRUE" %in% grepl("_", rownames(qSV))){# Remove underscore if needed                       
    rownames(qSV) <- sapply(strsplit(rownames(qSV), "_"), "[", 1)
}
qSV %>% as.data.frame %>% write.csv("qSV_dg.csv")

load("../../counts/_m/astellas_dg_hg38_rseGene_n263.rda")

rse_gene <- merge_rse_metrics(rse_gene)
colData(rse_gene)$RIN = sapply(colData(rse_gene)$RIN,"[",1)
rownames(colData(rse_gene)) <- sapply(strsplit(rownames(colData(rse_gene)), "_"), "[", 1)

dg_hippo = colData(rse_gene)[,fields] %>% as.data.frame %>% 
    write.csv(file = 'dg_phenotypes.csv')

## Repreducibility Information

In [8]:
Sys.time()
proc.time()
options(width = 120)
sessioninfo::session_info()

[1] "2021-07-10 11:06:34 EDT"

    user   system  elapsed 
8797.312 4130.982  846.486 

─ Session info ───────────────────────────────────────────────────────────────────────────────────────────────────────
 setting  value                       
 version  R version 4.0.3 (2020-10-10)
 os       Arch Linux                  
 system   x86_64, linux-gnu           
 ui       X11                         
 language (EN)                        
 collate  en_US.UTF-8                 
 ctype    en_US.UTF-8                 
 tz       America/New_York            
 date     2021-07-10                  

─ Packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────
 package              * version  date       lib source        
 annotate               1.68.0   2020-10-27 [1] Bioconductor  
 AnnotationDbi          1.52.0   2020-10-27 [1] Bioconductor  
 assertthat             0.2.1    2019-03-21 [1] CRAN (R 4.0.2)
 base64enc              0.1-3    2015-07-28 [1] CRAN (R 4.0.2)
 Biobase              * 2.50.0   2020-10-27 [1] Biocondu