Skip to content

Commit

Permalink
Merge pull request #384 from ChristopherBradley/377
Browse files Browse the repository at this point in the history
Refactored parse.gff
  • Loading branch information
GavinHuttley committed Nov 14, 2019
2 parents 9567522 + b20a2b9 commit a30e285
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 7 deletions.
4 changes: 2 additions & 2 deletions src/cogent3/core/alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@
from cogent3.format.nexus import nexus_from_alignment
from cogent3.format.phylip import alignment_to_phylip
from cogent3.maths.stats.number import CategoryCounter
from cogent3.parse.gff import GffParser, parse_attributes
from cogent3.parse.gff import gff2_parser, parse_attributes
from cogent3.util import progress_display as UI
from cogent3.util.dict_array import DictArrayTemplate
from cogent3.util.misc import (
Expand Down Expand Up @@ -1135,7 +1135,7 @@ def annotate_from_gff(self, f):
frame,
attributes,
comments,
) in GffParser(f):
) in gff2_parser(f):
if name in self.named_seqs:
self.named_seqs[name].add_feature(
feature, parse_attributes(attributes), [(start, end)]
Expand Down
2 changes: 1 addition & 1 deletion src/cogent3/core/sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -749,7 +749,7 @@ def annotate_from_gff(self, f):
frame,
attributes,
comments,
) in gff.GffParser(f):
) in gff.gff2_parser(f):
if first_seqname is None:
first_seqname = seqname
else:
Expand Down
26 changes: 24 additions & 2 deletions src/cogent3/parse/gff.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,37 @@

__author__ = "Peter Maxwell"
__copyright__ = "Copyright 2007-2019, The Cogent Project"
__credits__ = ["Peter Maxwell", "Matthew Wakefield", "Gavin Huttley"]
__credits__ = [
"Peter Maxwell",
"Matthew Wakefield",
"Gavin Huttley",
"Christopher Bradley",
]
__license__ = "BSD-3"
__version__ = "2019.10.24a"
__maintainer__ = "Peter Maxwell"
__email__ = "pm67nz@gmail.com"
__status__ = "Production"

from io import StringIO
from pathlib import Path

def GffParser(f):
from cogent3.util.misc import open_


def gff_parser(f):
"""delegates to the correct gff_parser based on the version"""
f = f if not isinstance(f, Path) else str(f)
if isinstance(f, str):
with open_(f) as infile:
yield from gff2_parser(infile)
elif isinstance(f, StringIO):
yield from gff2_parser(f)
else:
raise TypeError


def gff2_parser(f):
assert not isinstance(f, str)
for line in f:
# comments and blank lines
Expand Down
12 changes: 12 additions & 0 deletions tests/data/gff2_test.gff
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
##gff-version 2
##source-version <source> <version text>
##date <date>
##Type <type> [<seqname>]
##DNA <seqname>
##acggctcggattggcgctggatgatagatcagacgac
##...
##end-DNA
seq1 BLASTX similarity 101 235 87.1 + 0 Target "HBA_HUMAN" 11 55 ; E_value 0.0003
dJ102G20 GD_mRNA coding_exon 7105 7201 . - 2 Sequence "dJ102G20.C1.1"
dJ102G20 GD_mRNA coding_exon 7105 7201 . - 2
12345 Source with spaces feature with spaces -100 3600000000 1e-5 - . Sequence "BROADO5" ; Note "This is a \t tab containing \n multi line comment"
17 changes: 15 additions & 2 deletions tests/test_parse/test_gff.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
"""Unit tests for GFF and related parsers.
"""
from io import StringIO
from pathlib import Path
from unittest import TestCase, main

from cogent3.parse.gff import *
Expand Down Expand Up @@ -92,14 +93,14 @@ class GffTest(TestCase):
def testGffParserData(self):
"""Test GffParser with valid data lines"""
for (line, canned_result) in data_lines:
result = next(GffParser(StringIO(line)))
result = next(gff_parser(StringIO(line)))
self.assertEqual(result, canned_result)

def testGffParserHeaders(self):
"""Test GffParser with valid data headers"""
data = "".join([x[0] for x in data_lines])
for header in headers:
result = list(GffParser(StringIO(header + data)))
result = list(gff_parser(StringIO(header + data)))
self.assertEqual(result, [x[1] for x in data_lines])

def test_parse_attributes(self):
Expand All @@ -109,6 +110,18 @@ def test_parse_attributes(self):
["HBA_HUMAN", "dJ102G20.C1.1", "", "BROADO5"],
)

def test_gff2_parser_string(self):
"""Test the gff_parser works with a string filepath"""
filepath = "data/gff2_test.gff"
for i, result in enumerate(gff_parser(filepath)):
self.assertEqual(result, data_lines[i][1])

def test_gff2_parser_path(self):
"""Test the gff_parser works with a pathlib.Path filepath"""
filepath = Path("data/gff2_test.gff")
for i, result in enumerate(gff_parser(filepath)):
self.assertEqual(result, data_lines[i][1])


if __name__ == "__main__":
main()

0 comments on commit a30e285

Please sign in to comment.