diff --git a/.travis.yml b/.travis.yml index 8de12c8..562f71b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,7 +3,7 @@ python: # - "2.7" # - "pypy" # - "3.4" - - "3.5" + # - "3.5" - "3.6" - "3.7" - "3.8" @@ -12,7 +12,7 @@ python: # BioPython doesn't always play well with pip install. install: - "if [[ $TRAVIS_PYTHON_VERSION != 'pypy' ]]; then pip install -q numpy; fi" - - "pip install -q biopython nose" + - "pip install -q biopython nose pygtrie" - "pip install ." script: diff --git a/requirements-rtd.txt b/requirements-rtd.txt index cc5d250..c97124b 100644 --- a/requirements-rtd.txt +++ b/requirements-rtd.txt @@ -1,2 +1,3 @@ biopython sphinx +pygtrie diff --git a/requirements.txt b/requirements.txt index c2376e3..988854b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ biopython>=1.70 +pygtrie # for development wheel diff --git a/seqmagick/subcommands/quality_filter.py b/seqmagick/subcommands/quality_filter.py index a2ee438..dce0c6b 100644 --- a/seqmagick/subcommands/quality_filter.py +++ b/seqmagick/subcommands/quality_filter.py @@ -11,16 +11,29 @@ import time from Bio import SeqIO -try: - from Bio import trie, triefind -except ImportError: - trie = None - triefind = None +import pygtrie as trie from Bio.SeqIO import QualityIO from seqmagick import fileformat, __version__ from .common import typed_range, FileType + +def trie_match(string, trie): + def has_prefix(teststring, trie): + for key in trie.keys(): + if key.startswith(teststring): + return True + return False + longest = None + for i in range(len(string)): + substr = string[:i + 1] + if not has_prefix(substr, trie): + break + if trie.has_key(substr): + longest = substr + return longest + + # Default minimummean quality score DEFAULT_MEAN_SCORE = 25.0 @@ -626,7 +639,7 @@ def __init__(self, self.trie = trie def filter_record(self, record): - m = triefind.match(str(record.seq), self.trie) + m = trie_match(str(record.seq), self.trie) if m: if self.listener: self.listener( @@ -646,7 +659,7 @@ def parse_barcode_file(fp, primer=None, header=False): Any additional columns are ignored """ - tr = trie.trie() + tr = trie.StringTrie() reader = csv.reader(fp) if header: @@ -680,10 +693,6 @@ def action(arguments): raise ValueError("--quality-window-mean-qual specified without " "--quality-window") - if trie is None or triefind is None: - raise ValueError( - 'Missing Bio.trie and/or Bio.triefind modules. Cannot continue') - filters = [] input_type = fileformat.from_handle(arguments.sequence_file) output_type = fileformat.from_handle(arguments.output_file) diff --git a/seqmagick/test/test_subcommands_quality_filter.py b/seqmagick/test/test_subcommands_quality_filter.py index db1ffef..c0becb0 100644 --- a/seqmagick/test/test_subcommands_quality_filter.py +++ b/seqmagick/test/test_subcommands_quality_filter.py @@ -7,8 +7,6 @@ from seqmagick.subcommands import quality_filter -from Bio import triefind - IS_PYPY = hasattr(sys, 'pypy_version_info') @@ -194,7 +192,6 @@ def test_all_truncated(self): [i.id for i in actual]) -@unittest.skipIf(IS_PYPY, "Bio.trie not available on pypy.") class PrimerBarcodeFilterTestCase(unittest.TestCase): def setUp(self): self.sequences = [ @@ -240,7 +237,6 @@ def e_handler(record, n=1): self.assertEqual(events, [1, 5]) -@unittest.skipIf(IS_PYPY, "Bio.trie not available on pypy.") class BarcodePrimerTrieTestCase(unittest.TestCase): def setUp(self): self.barcode_str = """p1d1bc205,TACTAGCG,CATTGCCTATG @@ -258,9 +254,9 @@ def test_primer_provided(self): res = quality_filter.parse_barcode_file(self.fp, primer='CATTGCCTATG') self.assertEqual(9, len(list(res.keys()))) self.assertEqual('p1d1bc210', res['TACAGTCGCATTGCCTATG']) - self.assertEqual(None, triefind.match('TACAGTCGCATTGCCTAT', res)) + self.assertEqual(None, quality_filter.trie_match('TACAGTCGCATTGCCTAT', res)) self.assertEqual('TACAGTCGCATTGCCTATG', - triefind.match('TACAGTCGCATTGCCTATGCTACCTA', res)) + quality_filter.trie_match('TACAGTCGCATTGCCTATGCTACCTA', res)) def test_primer_in_file(self): res = quality_filter.parse_barcode_file(self.fp, primer=None)