In [1]:
library(dplyr)
library(tidyr)
library(plyr)
library(GenomicRanges)
library(MultiAssayExperiment)
library(SummarizedExperiment)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


------------------------------------------------------------------------------

You have loaded plyr after dplyr - this is likely to cause problems.
If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
library(plyr); library(dplyr)

------------------------------------------------------------------------------


Attaching package: ‘plyr’


The following objects are masked from ‘package:dplyr’:

    arrange, count, desc, failwith, id, mutate, rename, summarise,
    summarize


Loading required package: stats4

Loading required package: BiocGenerics


Attaching package: ‘BiocGenerics’


The following objects are masked from ‘package:dplyr’:

    combine, intersect, setdiff, union


The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, 

## Import Data

In [2]:
df <- read.csv('fig-4g.tsv', sep='\t')

In [3]:
head(df[,c('Group','ID','DNA.average','DNA.rank','RNA.average','RNA.DNA.average')])

Unnamed: 0_level_0,Group,ID,DNA.average,DNA.rank,RNA.average,RNA.DNA.average
Unnamed: 0_level_1,<chr>,<int>,<dbl>,<int>,<dbl>,<dbl>
1,BNA_TF_H3K27ac,1243,409.1824,197,745.1271,1.8210143
2,BNA_TF_H3K27ac,870,364.0982,83,1075.5732,2.9540741
3,BNA_TF_H3K27ac,1088,2077.3091,606,1360.1459,0.6547633
4,BNA_TF_H3K27ac,792,650.6394,1469,166.0715,0.2552435
5,BNA_TF_H3K27ac,837,2032.0055,412,1920.5287,0.9451395
6,BNA_TF_H3K27ac,1124,2094.0144,37,8243.8435,3.9368609


In [4]:
table(df$Group)


   BNA_H3K27ac         BNA_TF BNA_TF_H3K27ac             ES       negative 
           218             99            538            947            180 

## Construct Summarized Experiment Container Object

### colData

In [5]:
colData <- data.frame(unique(df$Group))
colnames(colData) <- 'sample'
row.names(colData) <- colData$sample

### rowRanges

In [6]:
#' Wrangle MPRA rowRange data for constructing a Summarized Experiment object
#'
#' @param df_group_list A list of dataframes split by a user defined group
#' @export
#' @example
wrangle.mpra.rowRange.data <- function(df_group_list){
    # Iterate over user defined groups to subset genomic ranges/intervals
    rowRange_list <- list()
    for (group in names(df_group_list)){
        rowRange_n_group_df <- makeGRangesFromDataFrame(data.frame(df_group_list[[group]][,c('chr','start','end')]))
        rowRange_n_group_df$sample <- group
        rowRange_list[[group]] <-rowRange_n_group_df
    }
    return(rowRange_list)    
}

In [7]:
df_group_list <- split(df, df$Group)
rowRange_list <- wrangle.mpra.rowRange.data(df_group_list)

### Assays

In [8]:
#' Wrangle MPRA assay data for constructing a Summarized Experiment object
#'
#' @param df_group_list A list of dataframes split by a user defined group
#' @param assay_type A column (string) identifier for subsetting the MPRA assay type
#' @export 
wrangle.mpra.assay.data <- function(df_group_list, assay_type){
    
    # Iterate over user defined groups to create assay matrices specified by the user
    assay_list <- list()
    for (group in names(df_group_list)){
        assay_n_group_df <- data.frame(df_group_list[[group]][assay_type])
        colnames(assay_n_group_df) <- group
        assay_list <- append(assay_list,assay_n_group_df)
    }
    
    # Combine user defined groups into one matrix (fills missing values with NA)
    assay_df <- t(plyr::ldply(assay_list, rbind))    
    rownames(assay_df) <- NULL
    colnames(assay_df) <- assay_df[1,]
    assay_df <- assay_df[-1,]
    
    return(assay_df)    
}



In [9]:
df_group_list <- split(df, df$Group)

In [10]:
assay_dna.avg_mtx <- wrangle.mpra.assay.data(df_group_list, 'DNA.average')
assay_rna.avg_mtx <- wrangle.mpra.assay.data(df_group_list, 'RNA.average')
assay_rna_dna.avg_mtx <- wrangle.mpra.assay.data(df_group_list, 'RNA.DNA.average')

### Assemble Summarized Experiment

In [25]:
se_list <- list()
for (sample in colData$sample){
    
    ### Assay ###
    # DNA.avg
    assay_subset_dna.avg_mtx <- data.frame(assay_dna.avg_mtx[,sample])
    colnames(assay_subset_dna.avg_mtx) <- sample
    assay_subset_dna.avg_mtx <- as.data.frame(drop_na(assay_subset_dna.avg_mtx))
    
    # RNA.avg
    assay_subset_rna.avg_mtx <- data.frame(assay_rna.avg_mtx[,sample])
    colnames(assay_subset_rna.avg_mtx) <- sample
    assay_subset_rna.avg_mtx <- as.data.frame(drop_na(assay_subset_rna.avg_mtx))
    
    # RNA_DNA.avg
    assay_subset_rna_dna.avg_mtx <- data.frame(assay_rna_dna.avg_mtx[,sample])
    colnames(assay_subset_rna_dna.avg_mtx) <- sample
    assay_subset_rna_dna.avg_mtx <- as.data.frame(drop_na(assay_subset_rna_dna.avg_mtx))
    
    ### rowRanges ###
    rowRanges = rowRange_list[sample][[1]]
    names(rowRanges) <- rownames(assay_subset_dna.avg_mtx)


    ### colData ###
    colData_subset <- data.frame(sample=sample)
    
    se <- SummarizedExperiment(assays=list(dna.avg=assay_subset_dna.avg_mtx,
                                           rna.avg=assay_subset_rna.avg_mtx,
                                           rna_dna.avg = assay_subset_rna_dna.avg_mtx),
                               rowRanges=rowRanges,
                               colData=colData_subset)
    se_list[[sample]] <- se
}

In [26]:
se_list

$BNA_TF_H3K27ac
class: RangedSummarizedExperiment 
dim: 538 1 
metadata(0):
assays(3): dna.avg rna.avg rna_dna.avg
rownames(538): 1 2 ... 537 538
rowData names(1): sample
colnames(1): BNA_TF_H3K27ac
colData names(1): sample

$BNA_TF
class: RangedSummarizedExperiment 
dim: 99 1 
metadata(0):
assays(3): dna.avg rna.avg rna_dna.avg
rownames(99): 1 2 ... 98 99
rowData names(1): sample
colnames(1): BNA_TF
colData names(1): sample

$BNA_H3K27ac
class: RangedSummarizedExperiment 
dim: 218 1 
metadata(0):
assays(3): dna.avg rna.avg rna_dna.avg
rownames(218): 1 2 ... 217 218
rowData names(1): sample
colnames(1): BNA_H3K27ac
colData names(1): sample

$negative
class: RangedSummarizedExperiment 
dim: 180 1 
metadata(0):
assays(3): dna.avg rna.avg rna_dna.avg
rownames(180): 1 2 ... 179 180
rowData names(1): sample
colnames(1): negative
colData names(1): sample

$ES
class: RangedSummarizedExperiment 
dim: 947 1 
metadata(0):
assays(3): dna.avg rna.avg rna_dna.avg
rownames(947): 1 2 ... 946 947
rowD