Skip to content

Commit

Permalink
Merge pull request #86 from nileshpatra/switch-to-pygtrie
Browse files Browse the repository at this point in the history
Replace Biopython trie with pygtrie
  • Loading branch information
jgallowa07 committed Aug 25, 2020
2 parents 4624039 + 9ff13b8 commit 11ae5ab
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 19 deletions.
4 changes: 2 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ python:
# - "2.7"
# - "pypy"
# - "3.4"
- "3.5"
# - "3.5"
- "3.6"
- "3.7"
- "3.8"
Expand All @@ -12,7 +12,7 @@ python:
# BioPython doesn't always play well with pip install.
install:
- "if [[ $TRAVIS_PYTHON_VERSION != 'pypy' ]]; then pip install -q numpy; fi"
- "pip install -q biopython nose"
- "pip install -q biopython nose pygtrie"
- "pip install ."

script:
Expand Down
1 change: 1 addition & 0 deletions requirements-rtd.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
biopython
sphinx
pygtrie
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
biopython>=1.70
pygtrie

# for development
wheel
Expand Down
31 changes: 20 additions & 11 deletions seqmagick/subcommands/quality_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,29 @@
import time

from Bio import SeqIO
try:
from Bio import trie, triefind
except ImportError:
trie = None
triefind = None
import pygtrie as trie
from Bio.SeqIO import QualityIO

from seqmagick import fileformat, __version__
from .common import typed_range, FileType


def trie_match(string, trie):
def has_prefix(teststring, trie):
for key in trie.keys():
if key.startswith(teststring):
return True
return False
longest = None
for i in range(len(string)):
substr = string[:i + 1]
if not has_prefix(substr, trie):
break
if trie.has_key(substr):
longest = substr
return longest


# Default minimummean quality score
DEFAULT_MEAN_SCORE = 25.0

Expand Down Expand Up @@ -626,7 +639,7 @@ def __init__(self,
self.trie = trie

def filter_record(self, record):
m = triefind.match(str(record.seq), self.trie)
m = trie_match(str(record.seq), self.trie)
if m:
if self.listener:
self.listener(
Expand All @@ -646,7 +659,7 @@ def parse_barcode_file(fp, primer=None, header=False):
Any additional columns are ignored
"""
tr = trie.trie()
tr = trie.StringTrie()
reader = csv.reader(fp)

if header:
Expand Down Expand Up @@ -680,10 +693,6 @@ def action(arguments):
raise ValueError("--quality-window-mean-qual specified without "
"--quality-window")

if trie is None or triefind is None:
raise ValueError(
'Missing Bio.trie and/or Bio.triefind modules. Cannot continue')

filters = []
input_type = fileformat.from_handle(arguments.sequence_file)
output_type = fileformat.from_handle(arguments.output_file)
Expand Down
8 changes: 2 additions & 6 deletions seqmagick/test/test_subcommands_quality_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@

from seqmagick.subcommands import quality_filter

from Bio import triefind

IS_PYPY = hasattr(sys, 'pypy_version_info')


Expand Down Expand Up @@ -194,7 +192,6 @@ def test_all_truncated(self):
[i.id for i in actual])


@unittest.skipIf(IS_PYPY, "Bio.trie not available on pypy.")
class PrimerBarcodeFilterTestCase(unittest.TestCase):
def setUp(self):
self.sequences = [
Expand Down Expand Up @@ -240,7 +237,6 @@ def e_handler(record, n=1):
self.assertEqual(events, [1, 5])


@unittest.skipIf(IS_PYPY, "Bio.trie not available on pypy.")
class BarcodePrimerTrieTestCase(unittest.TestCase):
def setUp(self):
self.barcode_str = """p1d1bc205,TACTAGCG,CATTGCCTATG
Expand All @@ -258,9 +254,9 @@ def test_primer_provided(self):
res = quality_filter.parse_barcode_file(self.fp, primer='CATTGCCTATG')
self.assertEqual(9, len(list(res.keys())))
self.assertEqual('p1d1bc210', res['TACAGTCGCATTGCCTATG'])
self.assertEqual(None, triefind.match('TACAGTCGCATTGCCTAT', res))
self.assertEqual(None, quality_filter.trie_match('TACAGTCGCATTGCCTAT', res))
self.assertEqual('TACAGTCGCATTGCCTATG',
triefind.match('TACAGTCGCATTGCCTATGCTACCTA', res))
quality_filter.trie_match('TACAGTCGCATTGCCTATGCTACCTA', res))

def test_primer_in_file(self):
res = quality_filter.parse_barcode_file(self.fp, primer=None)
Expand Down

0 comments on commit 11ae5ab

Please sign in to comment.