Skip to content

Commit

Permalink
Merge pull request #9 from hpages/master
Browse files Browse the repository at this point in the history
Migrate QuasR to Rhtslib and Rsamtools 1.99.1
  • Loading branch information
mbstadler committed Feb 12, 2019
2 parents abaa0db + f1f916b commit 02b7cb1
Show file tree
Hide file tree
Showing 25 changed files with 85 additions and 272 deletions.
8 changes: 4 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
Package: QuasR
Type: Package
Title: Quantify and Annotate Short Reads in R
Version: 1.23.12
Version: 1.23.13
Date: 2019-01-24
Author: Anita Lerch, Charlotte Soneson, Dimos Gaiditzis and Michael Stadler
Maintainer: Michael Stadler <michael.stadler@fmi.ch>
Depends: parallel, GenomicRanges (>= 1.13.3), Rbowtie
Imports: methods, grDevices, graphics, utils, zlibbioc, BiocGenerics,
Imports: methods, grDevices, graphics, utils, BiocGenerics,
S4Vectors (>= 0.9.25), IRanges, BiocManager, Biobase,
Biostrings, BSgenome, Rsamtools (>= 1.19.38),
Biostrings, BSgenome, Rsamtools (>= 1.99.1),
GenomicFeatures (>= 1.17.13), ShortRead (>= 1.19.1),
GenomicAlignments, BiocParallel, GenomeInfoDb,
rtracklayer, GenomicFiles
Expand All @@ -19,7 +19,7 @@ Suggests:
rmarkdown,
covr,
testthat
LinkingTo: Rsamtools
LinkingTo: Rhtslib (>= 1.15.3)
Description: This package provides a framework for the quantification
and analysis of Short Reads. It covers a complete workflow
starting from raw sequence reads, over creation of alignments
Expand Down
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
useDynLib("QuasR", .registration=TRUE)

import(methods)
import(zlibbioc)

import(BiocGenerics)
importFrom(GenomeInfoDb, seqlengths, bsgenomeName, seqlevelsInUse, seqlevels, genome)
Expand Down
17 changes: 7 additions & 10 deletions src/Makevars
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
SAMTOOLS_PATH=\
`echo 'cat(system.file("usrlib", .Platform[["r_arch"]], \
package="Rsamtools", mustWork=TRUE))' |\
"${R_HOME}/bin/R" --vanilla --slave`
SAMTOOLS_LIBS="$(SAMTOOLS_PATH)/libbam.a" "$(SAMTOOLS_PATH)/libbcf.a"\
"$(SAMTOOLS_PATH)/libtabix.a" -lz -pthread
SAMTOOLS_CPPFLAGS=-D_USE_KNETFILE -DBGZF_CACHE -D_FILE_OFFSET_BITS=64 \
-D_LARGEFILE64_SOURCE
RHTSLIB_LIBS=`echo 'Rhtslib::pkgconfig("PKG_LIBS")'|\
"${R_HOME}/bin/R" --vanilla --slave`
RHTSLIB_CPPFLAGS=`echo 'Rhtslib::pkgconfig("PKG_CPPFLAGS")'|\
"${R_HOME}/bin/R" --vanilla --slave`

PKG_LIBS=$(RHTSLIB_LIBS)
PKG_CPPFLAGS=$(RHTSLIB_CPPFLAGS)

PKG_LIBS=$(SAMTOOLS_LIBS)
PKG_CPPFLAGS=$(SAMTOOLS_CPPFLAGS)
11 changes: 6 additions & 5 deletions src/Makevars.win
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
SAMVARS=$(shell echo 'cat(system.file("usretc", .Platform[["r_arch"]],\
"Rsamtools.mk", package="Rsamtools", mustWork=TRUE))' |\
RHTSLIB_LIBS=$(shell echo 'Rhtslib::pkgconfig("PKG_LIBS")'|\
"${R_HOME}/bin/R" --vanilla --slave)
include $(SAMVARS)
RHTSLIB_CPPFLAGS=$(shell echo 'Rhtslib::pkgconfig("PKG_CPPFLAGS")'|\
"${R_HOME}/bin/R" --vanilla --slave)

PKG_LIBS=$(RHTSLIB_LIBS)
PKG_CPPFLAGS=$(RHTSLIB_CPPFLAGS)

PKG_LIBS=$(SAMTOOLS_LIBS)
PKG_CPPFLAGS=$(SAMTOOLS_CPPFLAGS)
1 change: 1 addition & 0 deletions src/bam.c
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#include <bam.c>
1 change: 1 addition & 0 deletions src/bam_cat.c
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#include <bam_cat.c>
1 change: 1 addition & 0 deletions src/bam_plbuf.c
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#include <bam_plbuf.c>
169 changes: 8 additions & 161 deletions src/cat_bam.c
Original file line number Diff line number Diff line change
@@ -1,164 +1,11 @@
/*
bam_cat -- efficiently concatenates bam files
bam_cat can be used to concatenate BAM files. Under special
circumstances, it can be used as an alternative to 'samtools merge' to
concatenate multiple sorted files into a single sorted file. For this
to work each file must be sorted, and the sorted files must be given
as command line arguments in order such that the final read in file i
is less than or equal to the first read in file i+1.
This code is derived from the bam_reheader function in samtools 0.1.8
and modified to perform concatenation by Chris Saunders on behalf of
Illumina.
########## License:
The MIT License
Original SAMtools work copyright (c) 2008-2009 Genome Research Ltd.
Modified SAMtools work copyright (c) 2010 Illumina, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/


/*
makefile:
"""
CC=gcc
CFLAGS+=-g -Wall -O2 -D_FILE_OFFSET_BITS=64 -D_USE_KNETFILE -I$(SAMTOOLS_DIR)
LDFLAGS+=-L$(SAMTOOLS_DIR)
LDLIBS+=-lbam -lz
all:bam_cat
"""
*/


#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>

#include "samtools/knetfile.h"
#include "samtools/bgzf.h"
#include "samtools/bam.h"
#include "cat_bam.h"
#include "bam.h"

#define BUF_SIZE 0x10000

#define GZIPID1 31
#define GZIPID2 139

#define BGZF_EMPTY_BLOCK_SIZE 28

// copied bam_cat from samtools
int _bam_cat(int nfn, char * const *fn, const bam_header_t *h, const char* outbam)
{
BGZF *fp;
FILE* fp_file;
uint8_t *buf;
uint8_t ebuf[BGZF_EMPTY_BLOCK_SIZE];
const int es=BGZF_EMPTY_BLOCK_SIZE;
int i;

// fp = strcmp(outbam, "-")? bgzf_open(outbam, "w") : bgzf_fdopen(fileno(stdout), "w");
fp = bgzf_open(outbam, "w");
if (fp == 0) {
Rf_error("[%s] ERROR: fail to open output file '%s'.\n", __func__, outbam);
return 1;
}
if (h) bam_header_write(fp, h);

buf = (uint8_t*) malloc(BUF_SIZE);
for(i = 0; i < nfn; ++i){
BGZF *in;
bam_header_t *old;
int len,j;

in = strcmp(fn[i], "-")? bam_open(fn[i], "r") : bam_dopen(fileno(stdin), "r");
if (in == 0) {
Rf_error("[%s] ERROR: fail to open file '%s'.\n", __func__, fn[i]);
return -1;
}
if (in->is_write) return -1;

old = bam_header_read(in);
if (h == 0 && i == 0) bam_header_write(fp, old);

if (in->block_offset < in->block_length) {
bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset);
bgzf_flush(fp);
}

j=0;
fp_file=fp->fp;
#ifdef _USE_KNETFILE
while ((len = (int)knet_read(in->fp, buf, BUF_SIZE)) > 0) {
#else
while (!feof(in->file) && (len = fread(buf, 1, BUF_SIZE, in->file)) > 0) {
#endif
if(len<es){
int diff=es-len;
if(j==0) {
Rf_error("[%s] ERROR: truncated file?: '%s'.\n", __func__, fn[i]);
return -1;
}
fwrite(ebuf, 1, (size_t)len, fp_file);
memcpy(ebuf,ebuf+len,diff);
memcpy(ebuf+diff,buf,len);
} else {
if(j!=0) fwrite(ebuf, 1, es, fp_file);
len-= es;
memcpy(ebuf,buf+len,es);
fwrite(buf, 1, (size_t)len, fp_file);
}
j=1;
}

/* check final gzip block */
{
const uint8_t gzip1=ebuf[0];
const uint8_t gzip2=ebuf[1];
const uint32_t isize=*((uint32_t*)(ebuf+es-4));
if(((gzip1!=GZIPID1) || (gzip2!=GZIPID2)) || (isize!=0)) {
Rprintf("[%s] WARNING: Unexpected block structure in file '%s'.", __func__, fn[i]);
Rf_error(" Possible output corruption.\n");
fwrite(ebuf, 1, es, fp_file);
}
}
bam_header_destroy(old);
bgzf_close(in);
}
free(buf);
bgzf_close(fp);
return 0;
}


int bam_cat(int, char * const *, const bam_hdr_t *, const char *);

SEXP cat_bam(SEXP inbam, SEXP outbam)
{
bam_header_t *h = 0;
bam_hdr_t *h = 0;
int ret = 0;

// check parameter inbam
Expand All @@ -179,10 +26,10 @@ SEXP cat_bam(SEXP inbam, SEXP outbam)
}
const char * fout = Rf_translateChar(STRING_ELT(outbam, 0));

// execute bam_cat copied from samtools
ret = _bam_cat(nfin, (char * const *)fin, h, fout);

R_Free(fin);
ret = bam_cat(nfin, (char * const *)fin, h, fout);
if (ret != 0)
Rf_error("call to bam_cat() returned a non-zero value");

return Rf_ScalarInteger(ret);
R_Free(fin);
return Rf_ScalarInteger(ret);
}
2 changes: 1 addition & 1 deletion src/cat_bam.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@
// include Boolean.h early, will define TRUE/FALSE enum prefent Rdefines.h from defining them as int constants
#include <R_ext/Boolean.h>
#include <Rdefines.h>
#include "samtools/sam.h"
#include "htslib/sam.h"

SEXP cat_bam(SEXP inbam, SEXP outbam);
2 changes: 1 addition & 1 deletion src/convert_reads_id_bis_rc.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#include "samtools/kseq.h"
#include "htslib/kseq.h"
// include Boolean.h early, will define TRUE/FALSE enum prefent Rdefines.h from defining them as int constants
#include <R_ext/Boolean.h>
#include <Rdefines.h>
Expand Down
2 changes: 1 addition & 1 deletion src/count_alignments.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// include Boolean.h early, will define TRUE/FALSE enum prefent Rdefines.h from defining them as int constants
#include <R_ext/Boolean.h>
#include <Rdefines.h>
#include "samtools/sam.h"
#include "htslib/sam.h"
#include "utilities.h"

SEXP count_alignments_non_allelic(SEXP bamfile, SEXP tid, SEXP start, SEXP end, SEXP strand,
Expand Down
2 changes: 1 addition & 1 deletion src/count_junctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@
// include Boolean.h early, will define TRUE/FALSE enum prefent Rdefines.h from defining them as int constants
#include <R_ext/Boolean.h>
#include <Rdefines.h>
#include "samtools/sam.h"
#include "sam.h"

SEXP count_junctions(SEXP bamfile, SEXP tid, SEXP start, SEXP end, SEXP allelic, SEXP includeSecondary, SEXP mapqMin, SEXP mapqMax);
2 changes: 1 addition & 1 deletion src/extract_unmapped_reads.h
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// include Boolean.h early, will define TRUE/FALSE enum prefent Rdefines.h from defining them as int constants
#include <R_ext/Boolean.h>
#include <Rdefines.h>
#include "samtools/sam.h"
#include "sam.h"
#include <stdio.h>

SEXP extract_unmapped_reads(SEXP inBam, SEXP outFile, SEXP fastq, SEXP rcRead2);
Expand Down
56 changes: 10 additions & 46 deletions src/idxstats_bam.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,36 +7,6 @@ This code is derived from the bam_idxstats function in samtools.

#include "idxstats_bam.h"

#define BAM_MAX_BIN 37450 // =(8^6-1)/7+1

typedef struct {
uint64_t u, v;
} pair64_t;

#define pair64_lt(a,b) ((a).u < (b).u)
//KSORT_INIT(off, pair64_t, pair64_lt)

typedef struct {
uint32_t m, n;
pair64_t *list;
} bam_binlist_t;

typedef struct {
int32_t n, m;
uint64_t *offset;
} bam_lidx_t;


KHASH_MAP_INIT_INT(i, bam_binlist_t)

struct __bam_index_t {
int32_t n;
uint64_t n_no_coor; // unmapped reads without coordinate
khash_t(i) **index;
bam_lidx_t *index2;
};


SEXP idxstats_bam(SEXP inBam)
{
if (!Rf_isString(inBam) || 1 != Rf_length(inBam)){
Expand Down Expand Up @@ -64,30 +34,24 @@ SEXP idxstats_bam(SEXP inBam)

SEXP stats, attrib, refnames, reflengths, mapped, unmapped;
PROTECT(stats = allocVector(VECSXP, 4));
PROTECT(refnames = allocVector(STRSXP, idx->n+1));
PROTECT(reflengths = allocVector(INTSXP, idx->n+1));
PROTECT(mapped = allocVector(INTSXP, idx->n+1));
PROTECT(unmapped = allocVector(INTSXP, idx->n+1));
PROTECT(refnames = allocVector(STRSXP, hts_idx_get_n(idx)+1));
PROTECT(reflengths = allocVector(INTSXP, hts_idx_get_n(idx)+1));
PROTECT(mapped = allocVector(INTSXP, hts_idx_get_n(idx)+1));
PROTECT(unmapped = allocVector(INTSXP, hts_idx_get_n(idx)+1));
PROTECT(attrib = allocVector(STRSXP, 4));

for (i = 0; i < idx->n; ++i) {
khint_t k;
khash_t(i) *h = idx->index[i];
for (i = 0; i < hts_idx_get_n(idx); ++i) {
uint64_t mapped_i, unmapped_i;
SET_STRING_ELT(refnames, i, mkChar(header->target_name[i]));
INTEGER(reflengths)[i] = (int)(header->target_len[i]);
k = kh_get(i, h, BAM_MAX_BIN);
if (k != kh_end(h)){
INTEGER(mapped)[i] = (int)kh_val(h, k).list[1].u;
INTEGER(unmapped)[i] = (int)kh_val(h, k).list[1].v;
} else {
INTEGER(mapped)[i] = 0;
INTEGER(unmapped)[i] = 0;
}
hts_idx_get_stat(idx, i, &mapped_i, &unmapped_i);
INTEGER(mapped)[i] = (int) mapped_i;
INTEGER(unmapped)[i] = (int) unmapped_i;
}
SET_STRING_ELT(refnames, i, mkChar("*"));
INTEGER(reflengths)[i] = 0;
INTEGER(mapped)[i] = 0;
INTEGER(unmapped)[i] = (int)idx->n_no_coor;
INTEGER(unmapped)[i] = (int) hts_idx_get_n_no_coor(idx);

SET_STRING_ELT(attrib, 0, mkChar("seqname"));
SET_STRING_ELT(attrib, 1, mkChar("seqlength"));
Expand Down
10 changes: 5 additions & 5 deletions src/idxstats_bam.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,12 @@
// include Boolean.h early, will define TRUE/FALSE enum prefent Rdefines.h from defining them as int constants
#include <R_ext/Boolean.h>
#include <Rdefines.h>
#include "samtools/bam.h"
#include "samtools/khash.h"
#include "samtools/ksort.h"
#include "samtools/bam_endian.h"
#include "sam.h"
#include "htslib/khash.h"
#include "htslib/ksort.h"
#include "bam_endian.h"
#ifdef _USE_KNETFILE
#include "samtools/knetfile.h"
#include "htslib/knetfile.h"
#endif

SEXP idxstats_bam(SEXP inBam);
Expand Down
Loading

0 comments on commit 02b7cb1

Please sign in to comment.