Skip to content

Commit

Permalink
Merge a32be51 into 5dbd9fc
Browse files Browse the repository at this point in the history
  • Loading branch information
dpark01 committed Nov 27, 2018
2 parents 5dbd9fc + a32be51 commit 255e014
Show file tree
Hide file tree
Showing 9 changed files with 116 additions and 1 deletion.
14 changes: 14 additions & 0 deletions test/input/TestFeatureReader/GU481072.1.tbl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
>Feature gb|GU481072.1|
52 1524 gene
gene_desc GPC
52 1524 CDS
product glycoprotein precursor
transl_table 1
protein_id gb|ADU56618.1|
3294 1585 gene
gene_desc NP
3294 1585 CDS
product nucleoprotein
transl_table 1
protein_id gb|ADU56619.1|

14 changes: 14 additions & 0 deletions test/input/TestFeatureReader/GU481073.1.tbl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
>Feature gb|GU481073.1|
46 345 gene
gene_desc Z
46 345 CDS
product Z protein
transl_table 1
protein_id gb|ADU56620.1|
7116 448 gene
gene_desc L
7116 448 CDS
product polymerase
transl_table 1
protein_id gb|ADU56621.1|

14 changes: 14 additions & 0 deletions test/input/TestFeatureReader/KM821772.1.tbl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
>Feature gb|KM821772.1|
57 356 gene
gene Z
57 356 CDS
product Z protein
transl_table 1
protein_id gb|AIT17124.1|
7118 456 gene
gene L
7118 456 CDS
product polymerase
transl_table 1
protein_id gb|AIT17125.1|

14 changes: 14 additions & 0 deletions test/input/TestFeatureReader/KM821773.1.tbl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
>Feature gb|KM821773.1|
48 1523 gene
gene GPC
48 1523 CDS
product glycoprotein precursor
transl_table 1
protein_id gb|AIT17126.1|
3294 1585 gene
gene NP
3294 1585 CDS
product nucleoprotein
transl_table 1
protein_id gb|AIT17127.1|

9 changes: 9 additions & 0 deletions test/input/TestFeatureReader/NC_026438.1.tbl
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
>Feature ref|NC_026438.1|
1 2280 gene
gene PB2
locus_tag UJ99_s1gp1
db_xref GeneID:23308131
1 2280 CDS
product polymerase PB2
protein_id ref|YP_009118631.1|

14 changes: 14 additions & 0 deletions test/input/TestFeatureReader/test1-S.tbl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
>Feature gb|KM821998.1|
59 1531 gene
gene GPC
59 1531 CDS
product glycoprotein precursor
transl_table 1
protein_id gb|AIT17576.1|
3308 1599 gene
gene NP
3308 1599 CDS
product nucleoprotein
transl_table 1
protein_id gb|AIT17577.1|

14 changes: 14 additions & 0 deletions test/input/TestFeatureReader/test2-L.tbl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
>Feature gb|KM821997.1|
76 375 gene
gene Z
76 375 CDS
product Z protein
transl_table 1
protein_id gb|AIT17574.1|
7132 470 gene
gene L
7132 470 CDS
product polymerase
transl_table 1
protein_id gb|AIT17575.1|

22 changes: 22 additions & 0 deletions test/unit/test_ncbi.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# module-specific
import ncbi
import util.file
import util.genbank
from test import assert_equal_bam_reads, TestCaseWithTmp, assert_equal_contents, assert_md5_equal_to_line_in_file


Expand All @@ -21,6 +22,27 @@ def test_help_parser_for_each_command(self):
parser = parser_fun(argparse.ArgumentParser())
helpstring = parser.format_help()

class TestFeatureReader(TestCaseWithTmp):
def setUp(self):
super(TestFeatureReader, self).setUp()
self.input_dir = util.file.get_test_input_path(self)

def test_read_seq_id_simple(self):
accessions = ('GU481072.1', 'GU481073.1',
'KM821772.1', 'KM821773.1')
for acc in accessions:
self.assertEqual(acc, util.genbank.get_feature_table_id(os.path.join(self.input_dir, acc+'.tbl')))

def test_read_seq_id_different_fnames(self):
self.assertEqual('KM821998.1', util.genbank.get_feature_table_id(os.path.join(self.input_dir,
'test1-S.tbl')))
self.assertEqual('KM821997.1', util.genbank.get_feature_table_id(os.path.join(self.input_dir,
'test2-L.tbl')))

def test_read_seq_id_refseq(self):
self.assertEqual('NC_026438.1', util.genbank.get_feature_table_id(os.path.join(self.input_dir,
'NC_026438.1.tbl')))

class TestFeatureTransfer(TestCaseWithTmp):
def setUp(self):
super(TestFeatureTransfer, self).setUp()
Expand Down
2 changes: 1 addition & 1 deletion util/genbank.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def get_feature_table_id(featureTableFile):
if not (
(seqid.startswith('gb|') or seqid.startswith('ref|'))):
raise Exception("reference annotation ID does not appear to refer to a GenBank or RefSeq accession: %s" % seqid)
m = re.search(r"(?P<db>(?:gb|ref))\|(?:(?P<accession>[a-zA-Z0-9\.]+))+.*", seqid)
m = re.search(r"(?P<db>(?:gb|ref|dbj))\|(?:(?P<accession>[a-zA-Z0-9\._]+))+.*", seqid)
if m:
seqid = m.group("accession")
else:
Expand Down

0 comments on commit 255e014

Please sign in to comment.