Skip to content

Commit

Permalink
Merge pull request BIMSBbioinfo#112 from frenkiboy/master
Browse files Browse the repository at this point in the history
gffToGRanges update 2
  • Loading branch information
al2na committed Aug 14, 2015
2 parents 53e94b2 + ca8a702 commit c115dce
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 59 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Depends:
R (>= 3.0.0),grid
Imports:
data.table,
GenomeInfoDb,
GenomicRanges,
GenomicAlignments,
ggplot2,
Expand Down
15 changes: 9 additions & 6 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,19 @@ importMethodsFrom("rtracklayer", import)

#importMethodsFrom("IRanges",nearest,as.data.frame,values,length,elementLengths,width,start,end)

importFrom("readr", read_delim)
importFrom("data.table",data.table)
importFrom("plyr",rbind.fill)
importFrom("reshape2", melt)
importFrom("parallel", mclapply)
importFrom("plotrix", dispersion)
importFrom("plotrix", std.error)
importFrom("GenomeInfoDb",seqlevels)
importFrom("matrixStats",colMedians)
importFrom("matrixStats",colSds)
importFrom("matrixStats",colQuantiles)
importFrom("parallel", mclapply)
importFrom("plotrix", dispersion)
importFrom("plotrix", std.error)
importFrom("plyr",rbind.fill)
importFrom("readr", read_delim)
importFrom("reshape2", melt)



exportClasses(AnnotationByFeature)
exportClasses(AnnotationByGeneParts)
Expand Down
2 changes: 1 addition & 1 deletion NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ genomation 1.1.12

IMPROVEMENTS AND BUG FIXES

* gffToGRanges parses column 9 of the gff file correctly; added ensembl=TRUE to prepend chr to seqlevels
* gffToGRanges is now a wrapper for import from rtracklayer

genomation 1.1.11
--------------
Expand Down
56 changes: 10 additions & 46 deletions R/readData.R
Original file line number Diff line number Diff line change
Expand Up @@ -466,12 +466,6 @@ setMethod("readTranscriptFeatures",
#' The file can end in \code{.gz}, \code{.bz2}, \code{.xz}, or \code{.zip}
#' and/or start with \code{http://} or \code{ftp://}. If the file is not compressed
#' it can also start with \code{https://} or \code{ftps://}.
#' @param track.line Can be an integer specifying the number of track lines to skip,
#' "auto" to detect the header lines automatically
#' or FALSE(default) if the bed file doesn't have track lines.
#' "auto" detects both UCSC header lines and lines starting with #
#' @param split.group boolean, whether to split the 9th column of the file
#' @param split.char character that is used as a separator of the 9th column. ';' by default
#' @param filter a character designating which elements to retain from the gff file (e.g. exon, CDS, ...)
#' @param zero.based \code{boolean} whether the coordinates are 0 or 1 based. 0 is the default
#' @param ensembl \code{boolean} if TRUE, add the chr prefix to seqlevels. FALSE by default
Expand All @@ -484,50 +478,20 @@ setMethod("readTranscriptFeatures",
#'
#' @docType methods
#' @export
gffToGRanges = function(gff.file, track.line=FALSE, split.group=FALSE, split.char=';',filter=NULL,
zero.based=FALSE, ensembl=FALSE){
gffToGRanges = function(gff.file, filter=NULL, zero.based=FALSE, ensembl=FALSE){

gff = readGeneric(gff.file,
chr=1,
start=4,
end=5,
strand=7,
meta.cols=list(source=2,
feature=3,
score=6,
frame=8,
group=9),
zero.based=zero.based,
skip=track.line)

if(split.group){
message('splitting the group.column...')
group = strsplit(gff$group, '\\s+')
group = lapply(group, function(x){
vals = x[seq(2,length(x),2)]
vals = sub(split.char, '', vals)
vals = sub('^"', '', vals)
vals = sub('"$', '', vals)
d = data.table(t(vals))
data.table::setnames(d, x[seq(1,length(x),2)])
d
})
group = data.table::rbindlist(group, fill=TRUE)
gff$group = NULL
values(gff) = cbind(values(gff), as.data.frame(group))
}

if(!is.null(filter)){
if(filter %in% gff$feature){
message(paste("Filtering", filter, "features...\n"))
gff = gff[gff$feature == filter,]
}else{
stop("The given feature is not present in the gff file")
}
}
gff = rtracklayer::import(gff.file)
if(zero.based)
gff$start = gff$start + 1

if(ensembl)
seqlevels(gff) = paste('chr',seqlevels(gff),sep='')

if(!is.null(filter)){
if(!any(gff$type == filter))
stop(paste(filter, 'category does not exist in the gff file'))
gff = gff[grepl(filter, gff$type)]
}

return(gff)
}
11 changes: 5 additions & 6 deletions inst/unitTests/test_readData.R
Original file line number Diff line number Diff line change
Expand Up @@ -122,14 +122,13 @@ test_gffToGRanges = function()
{
library(GenomicRanges)
tab.test = system.file('unitTests/test.gtf', package='genomation')
gff1 = gffToGRanges(tab.test, track.line='auto')
gff1 = gffToGRanges(tab.test)
checkIdentical(length(gff1), 3L)
checkIdentical(ncol(values(gff1)), 5L)

gff2 = gffToGRanges(tab.test, filter='exon')
checkIdentical(length(gff2), 1L)

gff2 = gffToGRanges(tab.test, track.line='auto', split.group=TRUE)
checkIdentical(ncol(values(gff2)), 13L)

gff3 = gffToGRanges(tab.test, track.line='auto',ensembl=TRUE)
gff3 = gffToGRanges(tab.test, ensembl=TRUE)
checkIdentical(as.character(seqlevels(gff3)), 'chr1')
}

0 comments on commit c115dce

Please sign in to comment.