Skip to content

Commit

Permalink
added fasta.gz
Browse files Browse the repository at this point in the history
  • Loading branch information
nekrut authored and natefoo committed Feb 19, 2018
1 parent a63b0e7 commit 39f2454
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 1 deletion.
4 changes: 4 additions & 0 deletions config/datatypes_conf.xml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@
<converter file="fasta_to_fai.xml" target_datatype="fai"/>
<display file="igv/genome_fasta.xml" inherit="true"/>
</datatype>
<datatype extension="fasta.gz" type="galaxy.datatypes.sequence:FastaGz" display_in_upload="true">
<converter file="fastagz_to_fasta.xml" target_datatype="fasta"/>
</datatype>
<datatype extension="fastq" type="galaxy.datatypes.sequence:Fastq" display_in_upload="true" description="FASTQ format is a text-based format for storing both a biological sequence (usually nucleotide sequence) and its corresponding quality scores." description_url="https://wiki.galaxyproject.org/Learn/Datatypes#Fastq">
<converter file="fastq_to_fqtoc.xml" target_datatype="fqtoc"/>
</datatype>
Expand Down Expand Up @@ -766,6 +769,7 @@
<!-- TODO: see molecules.py <sniffer type="galaxy.datatypes.molecules:SMILES"/>-->
<sniffer type="galaxy.datatypes.phylip:Phylip"/>
<sniffer type="galaxy.datatypes.sequence:Fasta"/>
<sniffer type="galaxy.datatypes.sequence:FastaGz"/>
<sniffer type="galaxy.datatypes.sequence:FastqSanger"/>
<sniffer type="galaxy.datatypes.sequence:FastqSangerGz"/>
<sniffer type="galaxy.datatypes.sequence:FastqSangerBz2"/>
Expand Down
11 changes: 11 additions & 0 deletions lib/galaxy/datatypes/converters/fastagz_to_fasta.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<tool id="CONVERTER_fastqgz_to_fastq" name="Convert fastq.gz files to fastq" version="1.0.0" hidden="true">
<command>gzip -dcf '$input1' > '$output1'</command>
<inputs>
<param format="fasta.gz" name="input1" type="data" label="Choose FASTA file"/>
</inputs>
<outputs>
<data format="fasta" name="output1"/>
</outputs>
<help>
</help>
</tool>
13 changes: 13 additions & 0 deletions lib/galaxy/datatypes/sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
log = logging.getLogger(__name__)

SNIFF_COMPRESSED_FASTQS = os.environ.get("GALAXY_ENABLE_BETA_COMPRESSED_FASTQ_SNIFFING", "0") == "1"
SNIFF_COMPRESSED_FASTAS = os.environ.get("GALAXY_ENABLE_BETA_COMPRESSED_FASTA_SNIFFING", "0") == "1"


class SequenceSplitLocations(data.Text):
Expand Down Expand Up @@ -310,6 +311,18 @@ def split(cls, input_datasets, subdir_generator_function, split_params):
return None
raise NotImplementedError("Can't split generic alignment files")

class FastaGz(Sequence, Binary):
"""Class representing a generic compressed FASTQ sequence"""
edam_format = "format_1929"
file_ext = "fasta.gz"
compressed = True
def sniff(self, filename):
"""Determines whether the file is in gzip-compressed FASTA format"""
if not SNIFF_COMPRESSED_FASTAS:
return False
if not is_gzip(filename):
return False
return Sequence.sniff(self, filename)

class Fasta(Sequence):
"""Class representing a FASTA sequence"""
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/datatypes/sniff.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ def handle_uploaded_dataset_file(filename, datatypes_registry, ext='auto'):
AUTO_DETECT_EXTENSIONS = ['auto'] # should 'data' also cause auto detect?
DECOMPRESSION_FUNCTIONS = dict(gzip=gzip.GzipFile, bz2=bz2.BZ2File)
COMPRESSION_CHECK_FUNCTIONS = [('gzip', is_gzip), ('bz2', is_bz2)]
COMPRESSION_DATATYPES = dict(gzip=['bam', 'fastq.gz', 'fastqsanger.gz', 'fastqillumina.gz', 'fastqsolexa.gz', 'fastqcssanger.gz'], bz2=['fastq.bz2', 'fastqsanger.bz2', 'fastqillumina.bz2', 'fastqsolexa.bz2', 'fastqcssanger.bz2'])
COMPRESSION_DATATYPES = dict(gzip=['bam', 'fasta.gz', 'fastq.gz', 'fastqsanger.gz', 'fastqillumina.gz', 'fastqsolexa.gz', 'fastqcssanger.gz'], bz2=['fastq.bz2', 'fastqsanger.bz2', 'fastqillumina.bz2', 'fastqsolexa.bz2', 'fastqcssanger.bz2'])
COMPRESSED_EXTENSIONS = []
for exts in COMPRESSION_DATATYPES.values():
COMPRESSED_EXTENSIONS.extend(exts)
Expand Down

0 comments on commit 39f2454

Please sign in to comment.