Skip to content

Commit

Permalink
Replace bcftools index with pysam.bcftools.index or pysam.tabix_index
Browse files Browse the repository at this point in the history
  • Loading branch information
mvdbeek committed Dec 8, 2017
1 parent df35d08 commit 3f6a59e
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 35 deletions.
22 changes: 4 additions & 18 deletions lib/galaxy/datatypes/binary.py
Expand Up @@ -16,6 +16,7 @@

import h5py
import pysam
import pysam.bcftools
from bx.seq.twobit import TWOBIT_MAGIC_NUMBER, TWOBIT_MAGIC_NUMBER_SWAP, TWOBIT_MAGIC_SIZE

from galaxy import util
Expand Down Expand Up @@ -516,13 +517,6 @@ class Bcf(BaseBcf):
"""
Class describing a (BGZF-compressed) BCF file
>>> from galaxy.datatypes.sniff import get_test_fname
>>> fname = get_test_fname('1.bcf')
>>> Bcf().sniff(fname)
True
>>> fname = get_test_fname('1.bcf_uncompressed')
>>> Bcf().sniff(fname)
False
"""
file_ext = "bcf"

Expand All @@ -546,24 +540,16 @@ def set_meta(self, dataset, overwrite=True, **kwd):
if not index_file:
index_file = dataset.metadata.spec['bcf_index'].param.new_file(dataset=dataset)
# Create the bcf index
# $ bcftools index
# Usage: bcftools index <in.bcf>

dataset_symlink = os.path.join(os.path.dirname(index_file.file_name),
'__dataset_%d_%s' % (dataset.id, os.path.basename(index_file.file_name)))
os.symlink(dataset.file_name, dataset_symlink)

stderr_name = tempfile.NamedTemporaryFile(prefix="bcf_index_stderr").name
command = ['bcftools', 'index', dataset_symlink]
try:
subprocess.check_call(args=command, stderr=open(stderr_name, 'wb'))
shutil.move(dataset_symlink + '.csi', index_file.file_name) # this will fail if bcftools < 1.0 is used, because it creates a .bci index file instead of .csi
pysam.bcftools.index(dataset_symlink)
shutil.move(dataset_symlink + '.csi', index_file.file_name)
except Exception as e:
stderr = open(stderr_name).read().strip()
raise Exception('Error setting BCF metadata: %s' % (stderr or str(e)))
raise Exception('Error setting BCF metadata: %s' % (str(e)))
finally:
# Remove temp file and symlink
os.remove(stderr_name)
os.remove(dataset_symlink)
dataset.metadata.bcf_index = index_file

Expand Down
34 changes: 17 additions & 17 deletions lib/galaxy/datatypes/tabular.py
Expand Up @@ -15,6 +15,9 @@
from cgi import escape
from json import dumps

import pysam
import pysam.bcftools

from galaxy import util
from galaxy.datatypes import binary, data, metadata
from galaxy.datatypes.metadata import MetadataElement
Expand All @@ -23,6 +26,8 @@
iter_headers
)
from galaxy.util import compression_utils
from galaxy.util.checkers import is_gzip

from . import dataproviders

if sys.version_info > (3,):
Expand Down Expand Up @@ -733,40 +738,35 @@ def genomic_region_dict_dataprovider(self, dataset, **settings):
class Vcf(BaseVcf):
file_ext = 'vcf'

def sniff(self, filename):
if is_gzip(filename):
return False
return BaseVcf.sniff(self, filename)


class VcfGz(BaseVcf, binary.Binary):
file_ext = 'vcf_bgzip'
compressed = True

MetadataElement(name="tabix_index", desc="Vcf Index File", param=metadata.FileParameter, file_ext="tbi", readonly=True, no_value=None, visible=False, optional=True)

def sniff(self, filename):
if not is_gzip(filename):
return False
return BaseVcf.sniff(self, filename)

def set_meta(self, dataset, **kwd):
super(BaseVcf, self).set_meta(dataset, **kwd)
""" Creates the index for the VCF file. """
# These metadata values are not accessible by users, always overwrite
index_file = dataset.metadata.bcf_index
if not index_file:
index_file = dataset.metadata.spec['tabix_index'].param.new_file(dataset=dataset)
# Create the bcf index
# $ bcftools index
# Usage: bcftools index <in.bcf>

dataset_symlink = os.path.join(os.path.dirname(index_file.file_name),
'__dataset_%d_%s' % (dataset.id, os.path.basename(index_file.file_name))) + ".vcf.gz"
os.symlink(dataset.file_name, dataset_symlink)

stderr_name = tempfile.NamedTemporaryFile(prefix="bcf_index_stderr").name
command = ['bcftools', 'index', '-t', dataset_symlink]
try:
subprocess.check_call(args=command, stderr=open(stderr_name, 'wb'))
shutil.move(dataset_symlink + '.tbi', index_file.file_name) # this will fail if bcftools < 1.0 is used, because it creates a .bci index file instead of .csi
pysam.tabix_index(dataset.file_name, index_file)
except Exception as e:
stderr = open(stderr_name).read().strip()
raise Exception('Error setting BCF metadata: %s' % (stderr or str(e)))
finally:
# Remove temp file and symlink
os.remove(stderr_name)
os.remove(dataset_symlink)
raise Exception('Error setting VCF.gz metadata: %s' % (str(e)))
dataset.metadata.tabix_index = index_file


Expand Down

0 comments on commit 3f6a59e

Please sign in to comment.