Skip to content
Browse files

FTLocationFactory.pm: Adjusted from_string() subroutine to

comply with current GenBank Feature Table Definition, so now
"join(complement(C..D),complement(A..B))" produce the same
result than "complement(join(A..B,C..D))". Added more tests,
and cleaned "LocationFactory.t" format. Also updated outdated
genome test file U58726.gb to its latest version (which complies
with this change), and "Splicedseq.t" (which use it).
  • Loading branch information...
1 parent bb140e3 commit a986017d886e59566090d861b811ef8c790e1055 @fjossandon fjossandon committed Aug 7, 2014
Showing with 554 additions and 303 deletions.
  1. +8 −22 Bio/Factory/FTLocationFactory.pm
  2. +195 −123 t/SeqFeature/LocationFactory.t
  3. +1 −1 t/SeqIO/Splicedseq.t
  4. +350 −157 t/data/U58726.gb
View
30 Bio/Factory/FTLocationFactory.pm
@@ -163,7 +163,6 @@ sub from_string {
my @loc_objs;
my $loc_obj;
- my @gl_subloc_strands;
SUBLOCS:
while (@sublocs) {
@@ -190,12 +189,10 @@ sub from_string {
my $comploc = $1;
$sobj = $self->_parse_location($comploc);
$sobj->strand(-1);
- push @subloc_strands, -1;
- push @gl_subloc_strands, -1;
+ push @subloc_strands, -1;
} else {
$sobj = $self->_parse_location($splitloc);
- push @subloc_strands, 1;
- push @gl_subloc_strands, 1;
+ push @subloc_strands, 1;
}
push @s_objs, $sobj;
}
@@ -204,29 +201,22 @@ sub from_string {
# Guide Strand and sublocations adding order
if (scalar @s_objs > 0) {
my $identical = 0;
- my $gl_identical = 0;
my $first_value = $subloc_strands[0];
foreach my $strand (@subloc_strands) {
$identical++ if ($strand == $first_value);
}
- my $first_gl_value = $gl_subloc_strands[0];
- foreach my $gl_strand (@gl_subloc_strands) {
- $gl_identical++ if ($gl_strand == $first_gl_value);
- }
-
if ($identical == scalar @subloc_strands) {
# Set guide_strand if all sublocations have the same strand
$loc_obj->guide_strand($first_value);
- # Reverse sublocation order for negative strand locations in cases like this:
- # join(1..11,join(complement(40..50),complement(60..70)))
- # But not this:
- # join(complement(10..20),complement(30..40))
- if ( $gl_identical != scalar @gl_subloc_strands
- and $first_value == -1
- ) {
+ # Reverse sublocation order for negative strand locations, e.g.:
+ # Common (CAA24672.1):
+ # join(complement(4918..5163),complement(2691..4571))
+ # Trans-splicing (NP_958375.1):
+ # join(32737..32825,complement(174205..174384),complement(69520..71506))
+ if ($first_value == -1) {
@s_objs = reverse @s_objs;
}
}
@@ -253,10 +243,6 @@ sub from_string {
}
if ($op && $op eq 'complement') {
$loc_obj->strand(-1);
- push @gl_subloc_strands, -1;
- }
- else {
- push @gl_subloc_strands, 1;
}
push @loc_objs, $loc_obj;
View
318 t/SeqFeature/LocationFactory.t
@@ -6,148 +6,220 @@ use strict;
BEGIN {
use lib '.';
use Bio::Root::Test;
-
- test_begin(-tests => 272);
-
+
+ test_begin(-tests => 307);
+
use_ok('Bio::Factory::FTLocationFactory');
}
my $simple_impl = "Bio::Location::Simple";
-my $fuzzy_impl = "Bio::Location::Fuzzy";
-my $split_impl = "Bio::Location::Split";
+my $fuzzy_impl = "Bio::Location::Fuzzy";
+my $split_impl = "Bio::Location::Split";
# Holds strings and results. The latter is an array of expected class name,
# min/max start position and position type, min/max end position and position
# type, location type, the number of locations, and the strand.
#
my %testcases = (
- # note: the following are directly taken from
- # http://www.ncbi.nlm.nih.gov/collab/FT/#location
- "467" => [$simple_impl,
- 467, 467, "EXACT", 467, 467, "EXACT", "EXACT", 1, 1],
- "340..565" => [$simple_impl,
- 340, 340, "EXACT", 565, 565, "EXACT", "EXACT", 1, 1],
- "<345..500" => [$fuzzy_impl,
- undef, 345, "BEFORE", 500, 500, "EXACT", "EXACT", 1, 1],
- "<1..888" => [$fuzzy_impl,
- undef, 1, "BEFORE", 888, 888, "EXACT", "EXACT", 1, 1],
- "(102.110)" => [$fuzzy_impl,
- 102, 102, "EXACT", 110, 110, "EXACT", "WITHIN", 1, 1],
- "(23.45)..600" => [$fuzzy_impl,
- 23, 45, "WITHIN", 600, 600, "EXACT", "EXACT", 1, 1],
- "(122.133)..(204.221)" => [$fuzzy_impl,
- 122, 133, "WITHIN", 204, 221, "WITHIN", "EXACT", 1, 1],
- "123^124" => [$simple_impl,
- 123, 123, "EXACT", 124, 124, "EXACT", "IN-BETWEEN", 1, 1],
- "145^177" => [$fuzzy_impl,
- 145, 145, "EXACT", 177, 177, "EXACT", "IN-BETWEEN", 1, 1],
- "join(12..78,134..202)" => [$split_impl,
- 12, 12, "EXACT", 202, 202, "EXACT", "EXACT", 2, 1],
- "complement(join(4918..5163,2691..4571))" => [$split_impl,
- 2691, 2691, "EXACT", 5163, 5163, "EXACT", "EXACT", 2, -1],
- "complement(34..(122.126))" => [$fuzzy_impl,
- 34, 34, "EXACT", 122, 126, "WITHIN", "EXACT", 1, -1],
- "J00194:100..202" => [$simple_impl,
- 100, 100, "EXACT", 202, 202, "EXACT", "EXACT", 1, 1],
- # this variant is not really allowed by the FT definition
- # document but we want to be able to cope with it
- "J00194:(100..202)" => [$simple_impl,
- 100, 100, "EXACT", 202, 202, "EXACT", "EXACT", 1, 1],
- "((122.133)..(204.221))" => [$fuzzy_impl,
- 122, 133, "WITHIN", 204, 221, "WITHIN", "EXACT", 1, 1],
- "join(AY016290.1:108..185,AY016291.1:1546..1599)"=> [$split_impl,
- 108, 108, "EXACT", 185, 185, "EXACT", "EXACT", 2, undef],
+ # note: the following are directly taken from
+ # http://www.insdc.org/documents/feature_table.html#3.4.3
+ "467"
+ => [$simple_impl, 467, 467, "EXACT",
+ 467, 467, "EXACT",
+ "EXACT", 1, 1],
+ "340..565"
+ => [$simple_impl, 340, 340, "EXACT",
+ 565, 565, "EXACT",
+ "EXACT", 1, 1],
+ "<345..500"
+ => [$fuzzy_impl, undef, 345, "BEFORE",
+ 500, 500, "EXACT",
+ "EXACT", 1, 1],
+ "<1..888"
+ => [$fuzzy_impl, undef, 1, "BEFORE",
+ 888, 888, "EXACT",
+ "EXACT", 1, 1],
+ "1..>888"
+ => [$fuzzy_impl, 1, 1, "EXACT",
+ 888, undef, "AFTER",
+ "EXACT", 1, 1],
+ "(102.110)"
+ => [$fuzzy_impl, 102, 102, "EXACT",
+ 110, 110, "EXACT",
+ "WITHIN", 1, 1],
+ "(23.45)..600"
+ => [$fuzzy_impl, 23, 45, "WITHIN",
+ 600, 600, "EXACT",
+ "EXACT", 1, 1],
+ "(122.133)..(204.221)"
+ => [$fuzzy_impl, 122, 133, "WITHIN",
+ 204, 221, "WITHIN",
+ "EXACT", 1, 1],
+ "123^124"
+ => [$simple_impl, 123, 123, "EXACT",
+ 124, 124, "EXACT",
+ "IN-BETWEEN", 1, 1],
+ "145^177"
+ => [$fuzzy_impl, 145, 145, "EXACT",
+ 177, 177, "EXACT",
+ "IN-BETWEEN", 1, 1],
+ "join(12..78,134..202)"
+ => [$split_impl, 12, 12, "EXACT",
+ 202, 202, "EXACT",
+ "EXACT", 2, 1],
+ "complement(join(2691..4571,4918..5163))"
+ => [$split_impl, 2691, 2691, "EXACT",
+ 5163, 5163, "EXACT",
+ "EXACT", 2, -1],
+ "complement(34..(122.126))"
+ => [$fuzzy_impl, 34, 34, "EXACT",
+ 122, 126, "WITHIN",
+ "EXACT", 1, -1],
+ "J00194:100..202"
+ => [$simple_impl, 100, 100, "EXACT",
+ 202, 202, "EXACT",
+ "EXACT", 1, 1],
+ "join(1..100,J00194.1:100..202)"
+ => [$split_impl, 1, 1, "EXACT",
+ 100, 100, "EXACT",
+ "EXACT", 2, undef],
+
+ # this variant is not really allowed by the FT definition
+ # document but we want to be able to cope with it
+ "J00194:(100..202)"
+ => [$simple_impl, 100, 100, "EXACT",
+ 202, 202, "EXACT",
+ "EXACT", 1, 1],
+ "((122.133)..(204.221))"
+ => [$fuzzy_impl, 122, 133, "WITHIN",
+ 204, 221, "WITHIN",
+ "EXACT", 1, 1],
+ "join(AY016290.1:108..185,AY016291.1:1546..1599)"
+ => [$split_impl, 108, 108, "EXACT",
+ 185, 185, "EXACT",
+ "EXACT", 2, undef],
- # UNCERTAIN locations and positions (Swissprot)
- "?2465..2774" => [$fuzzy_impl,
- 2465, 2465, "UNCERTAIN", 2774, 2774, "EXACT", "EXACT", 1, 1],
- "22..?64" => [$fuzzy_impl,
- 22, 22, "EXACT", 64, 64, "UNCERTAIN", "EXACT", 1, 1],
- "?22..?64" => [$fuzzy_impl,
- 22, 22, "UNCERTAIN", 64, 64, "UNCERTAIN", "EXACT", 1, 1],
- "?..>393" => [$fuzzy_impl,
- undef, undef, "UNCERTAIN", 393, undef, "AFTER", "UNCERTAIN", 1, 1],
- "<1..?" => [$fuzzy_impl,
- undef, 1, "BEFORE", undef, undef, "UNCERTAIN", "UNCERTAIN", 1, 1],
- "?..536" => [$fuzzy_impl,
- undef, undef, "UNCERTAIN", 536, 536, "EXACT", "UNCERTAIN", 1, 1],
- "1..?" => [$fuzzy_impl,
- 1, 1, "EXACT", undef, undef, "UNCERTAIN", "UNCERTAIN", 1, 1],
- "?..?" => [$fuzzy_impl,
- undef, undef, "UNCERTAIN", undef, undef, "UNCERTAIN", "UNCERTAIN", 1, 1],
- # Not working yet:
- #"12..?1" => [$fuzzy_impl,
- # 1, 1, "UNCERTAIN", 12, 12, "EXACT", "EXACT", 1, 1]
- );
+ # UNCERTAIN locations and positions (Swissprot)
+ "?2465..2774"
+ => [$fuzzy_impl, 2465, 2465, "UNCERTAIN",
+ 2774, 2774, "EXACT",
+ "EXACT", 1, 1],
+ "22..?64"
+ => [$fuzzy_impl, 22, 22, "EXACT",
+ 64, 64, "UNCERTAIN",
+ "EXACT", 1, 1],
+ "?22..?64"
+ => [$fuzzy_impl, 22, 22, "UNCERTAIN",
+ 64, 64, "UNCERTAIN",
+ "EXACT", 1, 1],
+ "?..>393"
+ => [$fuzzy_impl, undef, undef, "UNCERTAIN",
+ 393, undef, "AFTER",
+ "UNCERTAIN", 1, 1],
+ "<1..?"
+ => [$fuzzy_impl, undef, 1, "BEFORE",
+ undef, undef, "UNCERTAIN",
+ "UNCERTAIN", 1, 1],
+ "?..536"
+ => [$fuzzy_impl, undef, undef, "UNCERTAIN",
+ 536, 536, "EXACT",
+ "UNCERTAIN", 1, 1],
+ "1..?"
+ => [$fuzzy_impl, 1, 1, "EXACT",
+ undef, undef, "UNCERTAIN",
+ "UNCERTAIN", 1, 1],
+ "?..?"
+ => [$fuzzy_impl, undef, undef, "UNCERTAIN",
+ undef, undef, "UNCERTAIN",
+ "UNCERTAIN", 1, 1],
+ "?1..12"
+ => [$fuzzy_impl, 1, 1, "UNCERTAIN",
+ 12, 12, "EXACT",
+ "EXACT", 1, 1]
+);
my $locfac = Bio::Factory::FTLocationFactory->new();
isa_ok($locfac,'Bio::Factory::LocationFactoryI');
# sorting is to keep the order constant from one run to the next
-foreach my $locstr (keys %testcases) {
- my $loc = $locfac->from_string($locstr);
- if($locstr eq "join(AY016290.1:108..185,AY016291.1:1546..1599)") {
- $loc->seq_id("AY016295.1");
- }
- my @res = @{$testcases{$locstr}};
- is(ref($loc), $res[0], $res[0]);
- is($loc->min_start(), $res[1]);
- is($loc->max_start(), $res[2]);
- is($loc->start_pos_type(), $res[3]);
- is($loc->min_end(), $res[4]);
- is($loc->max_end(), $res[5]);
- is($loc->end_pos_type(), $res[6]);
- is($loc->location_type(), $res[7]);
- my @locs = $loc->each_Location();
- is(@locs, $res[8]);
- my $ftstr = $loc->to_FTstring();
- # this is a somewhat ugly hack, but we want clean output from to_FTstring()
- # Umm, then these should really fail, correct?
- # Should we be engineering workarounds for tests?
- $locstr = "J00194:100..202" if $locstr eq "J00194:(100..202)";
- $locstr = "(122.133)..(204.221)" if $locstr eq "((122.133)..(204.221))";
- # now test
- is($ftstr, $locstr, "Location String: $locstr");
- # test strand production
- is($loc->strand(), $res[9]);
+foreach my $locstr (keys %testcases) {
+ my $loc = $locfac->from_string($locstr);
+ if($locstr eq "join(AY016290.1:108..185,AY016291.1:1546..1599)") {
+ $loc->seq_id("AY016295.1");
+ }
+ if($locstr eq "join(1..100,J00194.1:100..202)") {
+ $loc->seq_id("unknown");
+ }
+ my @res = @{$testcases{$locstr}};
+ is(ref($loc), $res[0], $res[0]);
+ is($loc->min_start(), $res[1]);
+ is($loc->max_start(), $res[2]);
+ is($loc->start_pos_type(), $res[3]);
+ is($loc->min_end(), $res[4]);
+ is($loc->max_end(), $res[5]);
+ is($loc->end_pos_type(), $res[6]);
+ is($loc->location_type(), $res[7]);
+ my @locs = $loc->each_Location();
+ is(@locs, $res[8]);
+ my $ftstr = $loc->to_FTstring();
+ # this is a somewhat ugly hack, but we want clean output from to_FTstring()
+ # Umm, then these should really fail, correct?
+ # Should we be engineering workarounds for tests?
+ $locstr = "J00194:100..202" if $locstr eq "J00194:(100..202)";
+ $locstr = "(122.133)..(204.221)" if $locstr eq "((122.133)..(204.221))";
+ # now test
+ is($ftstr, $locstr, "Location String: $locstr");
+ # test strand production
+ is($loc->strand(), $res[9]);
}
SKIP: {
- skip('nested matches in regex only supported in v5.6.1 and higher', 5) unless $^V gt v5.6.0;
-
- # bug #1674, #1765, 2101
- # EMBL-like
- # join(20464..20694,21548..22763,join(complement(314652..314672),complement(232596..232990),complement(231520..231669)))
- # GenBank-like
- # join(20464..20694,21548..22763,complement(join(231520..231669,232596..232990,314652..314672)))
- # Note that
- # join(1000..2000,join(3000..4000,join(5000..6000,7000..8000)),9000..10000)
- # is the same as
- # join(1000..2000,3000..4000,5000..6000,7000..8000,9000..10000)
- # But I don't want to bother with it at this point
- my @expected = (# intentionally testing same expected string twice
- # as I am providing two different encodings
- # that should mean the same thing
- 'join(11025..11049,complement(join(315036..315294,251354..251412,241499..241580,239890..240081)))',
- 'join(11025..11049,complement(join(315036..315294,251354..251412,241499..241580,239890..240081)))',
- # ditto
- 'join(20464..20694,21548..22763,complement(join(314652..314672,232596..232990,231520..231669)))',
- 'join(20464..20694,21548..22763,complement(join(314652..314672,232596..232990,231520..231669)))',
- # this is just seen once
- 'join(1000..2000,join(3000..4000,join(5000..6000,7000..8000)),9000..10000)',
- 'order(S67862.1:72..75,join(S67863.1:1..788,1..19))'
+ skip('nested matches in regex only supported in v5.6.1 and higher', 8) unless $^V gt v5.6.0;
+
+ # Tests based on location definition (http://www.insdc.org/documents/feature_table.html#3.4)
+ my $string1 = 'complement(join(2691..4571,4918..5163))';
+ my $string2 = 'join(complement(4918..5163),complement(2691..4571))';
+ my $loc1 = $locfac->from_string($string1);
+ my $loc2 = $locfac->from_string($string2);
+ my $loc1_str = $loc1->to_FTstring;
+ my $loc2_str = $loc2->to_FTstring;
+ is($loc2_str, 'complement(join(2691..4571,4918..5163))', 'join(complement(4918..5163),complement(2691..4571))');
+ is($loc1_str, $loc2_str, 'equivalent location strings');
+
+ # bug #1674, #1765, 2101
+ # EMBL-like (BAC19856.3 protein)
+ # join(20464..20694,21548..22763,join(complement(314652..314672),complement(232596..232990),complement(231520..231669)))
+ # GenBank-like
+ # join(20464..20694,21548..22763,complement(join(231520..231669,232596..232990,314652..314672)))
+ # Note that
+ # join(1000..2000,join(3000..4000,join(5000..6000,7000..8000)),9000..10000)
+ # is the same as
+ # join(1000..2000,3000..4000,5000..6000,7000..8000,9000..10000)
+ # But I don't want to bother with it at this point
+
+ my @expected = (# intentionally testing same expected string twice
+ # as I am providing two different encodings
+ # that should mean the same thing
+ 'join(11025..11049,complement(join(315036..315294,251354..251412,241499..241580,239890..240081)))',
+ 'join(11025..11049,complement(join(315036..315294,251354..251412,241499..241580,239890..240081)))',
+ # ditto
+ 'join(20464..20694,21548..22763,complement(join(231520..231669,232596..232990,314652..314672)))',
+ 'join(20464..20694,21548..22763,complement(join(231520..231669,232596..232990,314652..314672)))',
+ # this is just seen once
+ 'join(1000..2000,join(3000..4000,join(5000..6000,7000..8000)),9000..10000)',
+ 'order(S67862.1:72..75,join(S67863.1:1..788,1..19))'
);
- for my $locstr (
- 'join(11025..11049,join(complement(239890..240081),complement(241499..241580),complement(251354..251412),complement(315036..315294)))',
- 'join(11025..11049,complement(join(315036..315294,251354..251412,241499..241580,239890..240081)))',
- 'join(20464..20694,21548..22763,complement(join(314652..314672,232596..232990,231520..231669)))',
- 'join(20464..20694,21548..22763,join(complement(231520..231669),complement(232596..232990),complement(314652..314672)))',
- 'join(1000..2000,join(3000..4000,join(5000..6000,7000..8000)),9000..10000)',
- 'order(S67862.1:72..75,join(S67863.1:1..788,1..19))'
- ) {
- my $loc = $locfac->from_string($locstr);
- my $ftstr = $loc->to_FTstring();
- is($ftstr, shift @expected, $locstr);
- }
+ for my $locstr (
+ 'join(11025..11049,join(complement(239890..240081),complement(241499..241580),complement(251354..251412),complement(315036..315294)))',
+ 'join(11025..11049,complement(join(315036..315294,251354..251412,241499..241580,239890..240081)))',
+ 'join(20464..20694,21548..22763,complement(join(231520..231669,232596..232990,314652..314672)))',
+ 'join(20464..20694,21548..22763,join(complement(314652..314672),complement(232596..232990),complement(231520..231669)))',
+ 'join(1000..2000,join(3000..4000,join(5000..6000,7000..8000)),9000..10000)',
+ 'order(S67862.1:72..75,join(S67863.1:1..788,1..19))'
+ ) {
+ my $loc = $locfac->from_string($locstr);
+ my $ftstr = $loc->to_FTstring();
+ is($ftstr, shift @expected, $locstr);
+ }
}
View
2 t/SeqIO/Splicedseq.t
@@ -7,7 +7,7 @@ BEGIN {
use lib '.';
use Bio::Root::Test;
- test_begin(-tests => 25);
+ test_begin(-tests => 26);
use_ok('Bio::Seq');
use_ok('Bio::SeqIO');
View
507 t/data/U58726.gb
@@ -1,22 +1,23 @@
-LOCUS U58726 31931 bp DNA linear INV
+LOCUS U58726 31931 bp DNA linear INV 29-APR-2011
DEFINITION Caenorhabditis elegans cosmid T01C8, complete sequence.
ACCESSION U58726
+VERSION U58726.1 GI:1326250
KEYWORDS HTG.
-SOURCE Caenorhabditis elegans.
+SOURCE Caenorhabditis elegans
ORGANISM Caenorhabditis elegans
Eukaryota; Metazoa; Nematoda; Chromadorea; Rhabditida;
Rhabditoidea; Rhabditidae; Peloderinae; Caenorhabditis.
REFERENCE 1 (bases 1 to 31931)
- AUTHORS Waterston,R.
+ CONSRTM C. elegans Sequencing Consortium
TITLE Genome sequence of the nematode C. elegans: a platform for
- investigating biology. The C. elegans Sequencing Consortium
+ investigating biology
JOURNAL Science 282 (5396), 2012-2018 (1998)
- MEDLINE 99069613
PUBMED 9851916
+ REMARK Erratum:[Science 1999 Jan 1;283(5398):35]
REFERENCE 2 (bases 1 to 31931)
AUTHORS Wohldmann,P. and Hawkins,J.
TITLE The sequence of C. elegans cosmid T01C8
- JOURNAL Unpublished (2001)
+ JOURNAL Unpublished
REFERENCE 3 (bases 1 to 31931)
AUTHORS Waterston,R.
TITLE Direct Submission
@@ -39,113 +40,209 @@ REFERENCE 6 (bases 1 to 31931)
JOURNAL Submitted (29-JUN-2002) Department of Genetics, Washington
University, Genome Sequencing Center, 4444 Forest Park Avenue, St.
Louis, MO 63110, USA
-COMMENT Submitted by: Genome Sequencing Center Department of Genetics,
- Washington University St. Louis , MO 63110, USA, and Sanger
- Centre, Hinxton Hall Cambridge CB10 IRQ, England email:
- rw@nematode.wustl.edu and jes@sanger.ac.uk NOTICE: This sequence
- may not be the entire insert of this clone. It may be shorter
- because we only sequence overlapping sections once, or longer
- because we provide a small overlap between neighboring
- submissions. This sequence was finished as follows unless
- otherwise noted: all regions were double stranded, sequenced with
- an alternate chemistry or covered by high quality data (i.e.,
- phred quality >= 30); an attempt was made to resolve all
- sequencing problems, such as compressions and repeats; all regions
- were covered by sequence from more than one m13 subclone. For a
- graphical representation of this cosmid sequence and its analysis
- see:
- http://www.wormbase.org/db/seq/sequence?name=T01C8;class=Sequence
- NEIGHBORING COSMID INFORMATION The 5' cosmid is EGAP8, 200 bp
- overlap; the 3' cosmid is F41G4, 2000 bp overlap. Actual start of
- this cosmid is at base position 1 of T01C8; actual end is at 10488
- of F41G4. NOTES: Coding seqences below are the result of
- integration and manual review of the following data : computer
- analysis using the program Genefinder (P. Green and L. Hillier,
- personal communication), the large scale EST projects of Yuji
- Kohara (http://www.ddbj.nig.ac.jp/c-elegans/html/CE_INDEX.html)
- and The C. elegans ORFeome cloning project
- (http://worfdb.dfci.harvard.edu/), similarity to other proteins
- from BlastX analyses (http://blast.wustl.edu/), sequence
- conservation with C. briggsae using Jim Kent's WABA alignment
- program (Genome Research 10:1115-1125, 2000), individual C.
- elegans GenBank submissions, and personal communications with C.
- elegans researchers. tRNAs are predicted using the program
- tRNAscan-SE (Lowe, T.M. and Eddy, S.R., 1997, Nucl. Acids. Res.,
- 25, 955-964).
+REFERENCE 7 (bases 1 to 31931)
+ AUTHORS Waterston,R.
+ TITLE Direct Submission
+ JOURNAL Submitted (06-NOV-2002) Department of Genetics, Washington
+ University, Genome Sequencing Center, 4444 Forest Park Avenue, St.
+ Louis, MO 63110, USA
+REFERENCE 8 (bases 1 to 31931)
+ AUTHORS Waterston,R.
+ TITLE Direct Submission
+ JOURNAL Submitted (21-NOV-2002) Department of Genetics, Washington
+ University, Genome Sequencing Center, 4444 Forest Park Avenue, St.
+ Louis, MO 63110, USA
+REFERENCE 9 (bases 1 to 31931)
+ AUTHORS Waterston,R.
+ TITLE Direct Submission
+ JOURNAL Submitted (19-APR-2003) Department of Genetics, Washington
+ University, Genome Sequencing Center, 4444 Forest Park Avenue, St.
+ Louis, MO 63110, USA
+REFERENCE 10 (bases 1 to 31931)
+ CONSRTM WormBase Consortium
+ TITLE Direct Submission
+ JOURNAL Submitted (21-SEP-2004) Department of Genetics, Washington
+ University, Genome Sequencing Center, 4444 Forest Park Avenue, St.
+ Louis, MO 63110, USA
+REFERENCE 11 (bases 1 to 31931)
+ CONSRTM WormBase Consortium
+ TITLE Direct Submission
+ JOURNAL Submitted (10-SEP-2005) Department of Genetics, Washington
+ University, Genome Sequencing Center, 4444 Forest Park Avenue, St.
+ Louis, MO 63110, USA
+REFERENCE 12 (bases 1 to 31931)
+ CONSRTM WormBase Consortium
+ TITLE Direct Submission
+ JOURNAL Submitted (06-MAY-2006) Department of Genetics, Washington
+ University, Genome Sequencing Center, 4444 Forest Park Avenue, St.
+ Louis, MO 63110, USA
+REFERENCE 13 (bases 1 to 31931)
+ CONSRTM WormBase Consortium
+ TITLE Direct Submission
+ JOURNAL Submitted (29-APR-2011) Department of Genetics, Washington
+ University, Genome Sequencing Center, 4444 Forest Park Avenue, St.
+ Louis, MO 63110, USA
+COMMENT [WARNING] On Oct 13, 2011 this sequence was replaced by
+ gi:351061076.
+ Submitted by:
+ Genome Sequencing Center
+ Department of Genetics, Washington University
+ St. Louis , MO 63110, USA, and
+ Sanger Centre, Hinxton Hall
+ Cambridge CB10 IRQ, England
+ email: stlouis@wormbase.org and sanger@wormbase.org
+
+ NOTICE: This sequence may not be the entire insert of this clone.
+ It may be shorter because we only sequence overlapping sections
+ once, or longer because we provide a small overlap between
+ neighboring submissions.
+
+ This sequence was finished as follows unless otherwise noted: all
+ regions were double stranded, sequenced with an alternate chemistry
+ or covered by high quality data (i.e., phred quality >= 30); an
+ attempt was made to resolve all sequencing problems, such as
+ compressions and repeats; all regions were covered by sequence from
+ more than one m13 subclone.
+
+ For a graphical representation of this clone sequence and its
+ analysis see:
+ http://www.wormbase.org/db/seq/sequence?name=T01C8
+
+ NEIGHBORING CLONE INFORMATION
+
+ The 5' clone is EGAP8, 200 bp overlap; the 3' clone is F41G4, 2000
+ bp overlap. Actual start of this clone is at base position 1 of
+ T01C8; actual end is at 10488 of F41G4.
+
+ NOTES:
+
+ Coding seqences below are the result of integration and manual
+ review of the following data : computer analysis using the program
+ Genefinder (P. Green and L. Hillier, personal communication), the
+ large scale EST projects of Yuji Kohara
+ (http://www.ddbj.nig.ac.jp/c-elegans/html/CE_INDEX.html) and The C.
+ elegans ORFeome cloning project (http://worfdb.dfci.harvard.edu/),
+ similarity to other proteins from BlastX analyses
+ (http://blast.wustl.edu/), sequence conservation with C. briggsae
+ using Jim Kent's WABA alignment program (Genome Research
+ 10:1115-1125, 2000), individual C. elegans GenBank submissions,
+ and personal communications with C. elegans researchers. tRNAs
+ are predicted using the program tRNAscan-SE (Lowe, T.M. and
+ Eddy, S.R., 1997, Nucl. Acids. Res., 25, 955-964).
FEATURES Location/Qualifiers
source 1..31931
- /chromosome="X"
- /clone="T01C8"
- /strain="Bristol N2"
/organism="Caenorhabditis elegans"
+ /mol_type="genomic DNA"
+ /strain="Bristol N2"
/db_xref="taxon:6239"
- gene complement(5778..6881)
- /gene="T01C8.3"
- /note="for a graphical representation of this gene see:
- ttp://www.wormbase.org/db/seq/sequence?name=T01C8.3;class
- =Sequence"
- CDS join(complement(5778..5951),complement(5996..6076),
- complement(6123..6219),complement(6268..6317),
- complement(6359..6509),complement(6555..6631),
- complement(6783..6881))
- /product="Hypothetical protein T01C8.3"
- /gene="T01C8.3"
- /protein_id="AAB00575.1"
+ /chromosome="X"
+ /clone="T01C8"
+ gene 1698..1819
+ /locus_tag="T01C8.8"
+ /db_xref="WormBase:WBGene00196318"
+ ncRNA 1698..1819
+ /locus_tag="T01C8.8"
+ /ncRNA_class="other"
+ /product="T01C8.8"
+ /db_xref="WormBase:T01C8.8"
+ /db_xref="WormBase:WBGene00196318"
+ gene complement(5778..7678)
+ /locus_tag="T01C8.3"
+ /db_xref="WormBase:WBGene00020144"
+ CDS complement(join(5778..5951,5996..6076,6123..6219,
+ 6268..6317,6359..6509,6555..6631,7562..7678))
+ /locus_tag="T01C8.3"
+ /standard_name="T01C8.3"
+ /note="contains similarity to Paramecium tetraurelia
+ Chromosome undetermined scaffold_129, whole genome shotgun
+ sequence.; TR:A0BUB6; coded for by the following C.
+ elegans cDNAs: FM864997"
/codon_start=1
- /translation="MLNHSLPEVWKKTAVSEKNGLQFQKIIVFCPTHGCFTSPTDLPL
- GCSTNSRGSIFCICNSTDYCNEMTNVKEEKNITYLICEYAKDSMFRGADCVQPWCVKT
- ASSYMDEMVECGEGTYEMEMYDIGFVYSGMLLPINSCYAVADDSRYDKSQICTYKVNK
- TTPYKLKVPGSTKCFAPGEVMTRMKNSTCIGQFCYSASAVFGCISQFNREGAILKVTI
- FHFEILNKNNNICLTIRNFCKKKL"
- /db_xref="GI:1326251"
+ /product="Hypothetical protein T01C8.3"
+ /protein_id="AAB00575.3"
+ /db_xref="GI:331028906"
+ /db_xref="WormBase:T01C8.3"
+ /db_xref="WormBase:WBGene00020144"
+ /translation="MQLLENCNFRCTNCTGDYCYAVNYKHLNPSLKNEQSYYQGCFTS
+ PTDLPLGCSTNSRGSIFCICNSTDYCNEMTNVKEEKNITYLICEYAKDSMFRGADCVQ
+ PWCVKTASSYMDEMVECGEGTYEMEMYDIGFVYSGMLLPINSCYAVADDSRYDKSQIC
+ TYKVNKTTPYKLKVPGSTKCFAPGEVMTRMKNSTCIGQFCYSASAVFGCISQFNREGA
+ ILKVTIFHFEILNKNNNICLTIRNFCKKKL"
+ misc_feature 11227..11228
+ /note="SL1 trans-splice acceptor; see yk1591b07.5"
+ misc_feature 11227..11228
+ /note="SL2 defined by RNASeq short reads (Hillier et al.);
+ see RNASeq_Hillier.dauer_daf-2"
gene 11238..12262
- /gene="T01C8.2"
- /note="for a graphical representation of this gene see:
- ttp://www.wormbase.org/db/seq/sequence?name=T01C8.2;class
- =Sequence"
+ /locus_tag="T01C8.2"
+ /db_xref="WormBase:WBGene00020143"
CDS join(11238..11384,12167..12262)
+ /locus_tag="T01C8.2"
+ /standard_name="T01C8.2"
+ /note="contains similarity to Escherichia coli Type 4
+ prepilin-like proteins leader peptide-processing
+ enzymes[Includes: Leader peptidase (EC 3.4.23.43)
+ (Prepilin peptidase); N-smethyltransferase (EC 2.1.1.-)].;
+ SW:P25960; coded for by the following C. elegans cDNAs:
+ OSTF201H6_1, OSTR201H6_1, yk725a4.3, yk1428d07.3,
+ yk1428d07.5, yk1591b07.3, yk1591b07.5,
+ MM454_FPK17YK01E1TJO"
+ /codon_start=1
/product="Hypothetical protein T01C8.2"
- /gene="T01C8.2"
/protein_id="AAB00576.1"
- /codon_start=1
+ /db_xref="GI:1326252"
+ /db_xref="WormBase:T01C8.2"
+ /db_xref="WormBase:WBGene00020143"
/translation="MPKQEFNPLDYTGPLIVGAIFCVFLFVISFFVINFFCITKYDDI
TKFELMGGKYGWRLGPHPLIVVKKGGFVAEEEVDDA"
- /db_xref="GI:1326252"
- /note="coded for by the following C. elegans cDNAs:
- yk725a4.3"
- gene complement(12416..14009)
- /gene="T01C8.4"
- /note="for a graphical representation of this gene see:
- ttp://www.wormbase.org/db/seq/sequence?name=T01C8.4;class
- =Sequence"
- CDS join(complement(12416..12555),complement(12785..13093),
- complement(13138..13358),complement(13606..14009))
- /product="Hypothetical protein T01C8.4"
- /gene="T01C8.4"
- /protein_id="AAB00577.1"
+ gene complement(12416..13336)
+ /locus_tag="T01C8.4"
+ /db_xref="WormBase:WBGene00020145"
+ CDS complement(join(12416..12555,12785..13093,13138..13336))
+ /locus_tag="T01C8.4"
+ /standard_name="T01C8.4"
+ /note="contains similarity to Pfam domain PF00155
+ (Aminotransferase class I and II); coded for by the
+ following C. elegans cDNAs: FN888767, FN888768, FN888770,
+ FM864682"
/codon_start=1
- /translation="MRLSFFDGIHVASPIKELHTSELFQKEICPVKINLAIEAYRTED
- GEPWVLPVVREIELKFPHEPHHNHEYLPILGHDGFCKSATALLLGNDSLAIKEGRSFS
- VQCISGTGAICVGAEFLAQVLSMKTIYVSNPCCLCYNPTGMDPTREQWIQMAQVIKQK
- NLFTFFHIADQGLASGDADADAWAVRFFVEQGLEMIVSQSFSKNFGLYNDRVGSLTVI
- VNKPSHIANLKSQLTLVNVSNFSNPPAYGARIVHEILKSPKYREQWQNSIKMMAFRIK
- KTRQELIRELNMLQTSGKWDRITQQSGLFSYTGLTPCQVDHLIAHHKIYLLSDGRINI
- CGLNMSNLDYVARAIDDTVRTIH"
- /db_xref="GI:1326253"
- /note="strong similarity to cytoplasmic aspartate
- aminotransferase; coded for by the following C. elegans
- cDNAs: cm20g8"
+ /product="Hypothetical protein T01C8.4"
+ /protein_id="AAB00577.2"
+ /db_xref="GI:331028907"
+ /db_xref="WormBase:T01C8.4"
+ /db_xref="WormBase:WBGene00020145"
+ /translation="MDPTREQWIQMAQVIKQKNLFTFFHIADQGLASGDADADAWAVR
+ FFVEQGLEMIVSQSFSKNFGLYNDRVGSLTVIVNKPSHIANLKSQLTLVNVSNFSNPP
+ AYGARIVHEILKSPKYREQWQNSIKMMAFRIKKTRQELIRELNMLQTSGKWDRITQQS
+ GLFSYTGLTPCQVDHLIAHHKIYLLSDGRINICGLNMSNLDYVARAIDDTVRTIH"
gene complement(16309..17896)
- /gene="T01C8.5"
- /note="for a graphical representation of this gene see:
- ttp://www.wormbase.org/db/seq/sequence?name=T01C8.5;class
- =Sequence"
- CDS join(complement(16309..16457),complement(16507..16815),
- complement(16863..17233),complement(17499..17896))
+ /locus_tag="T01C8.5"
+ /db_xref="WormBase:WBGene00020146"
+ CDS complement(join(16309..16457,16507..16815,16863..17233,
+ 17499..17896))
+ /locus_tag="T01C8.5"
+ /standard_name="T01C8.5"
+ /note="contains similarity to Pfam domain PF00155
+ (Aminotransferase class I and II); coded for by the
+ following C. elegans cDNAs: OSTR040A9_1, OSTR040A9_2,
+ yk204c10.5, yk327b7.3, yk327b7.5, yk468h3.5, yk503d6.3,
+ yk503d6.5, yk512b9.3, yk512b9.5, yk597a11.3, yk597a11.5,
+ yk829h10.3, yk829h10.5, yk899e07.3, yk1020c11.5,
+ yk1023a07.3, yk1023a07.5, yk1070e06.3, yk1070e06.5,
+ yk1318d04.3, yk1318d04.5, cm20g8.5, yk1400g10.3,
+ yk1400g10.5, yk1444g09.3, yk1444g09.5, yk1452e06.3,
+ yk1452e06.5, yk1471h11.3, yk1471h11.5, yk1500a04.3,
+ yk1546b06.3, yk1546b06.5, yk1560g10.5, yk1611e03.3,
+ yk1611e03.5, yk1613g08.3, yk1630a11.5, yk1672d12.3,
+ yk1672d12.5, yk1675b02.3, yk1675b02.5, yk1755e02.3,
+ yk1755e02.5, yk899b12.3, yk899b12.5, EC036254, EC029162,
+ MM454_contig05811, FN906088, FN906090, FN906089, FN906091"
+ /codon_start=1
/product="Hypothetical protein T01C8.5"
- /gene="T01C8.5"
/protein_id="AAB00578.1"
- /codon_start=1
+ /db_xref="GI:1326254"
+ /db_xref="WormBase:T01C8.5"
+ /db_xref="WormBase:WBGene00020146"
/translation="MSFFDGIPVAPPIEVFHKNKMYLDETAPVKVNLTIGAYRTEEGQ
PWVLPVVHETEVEIANDTSLNHEYLPVLGHEGFRKAATELVLGAESPAIKEERSFGVQ
CLSGTGALRAGAEFLASVCNMKTVYVSNPTWGNHKLVFKKAGFTTVADYTFWDYDNKR
@@ -154,23 +251,60 @@ FEATURES Location/Qualifiers
QSQMSLVIRANWSNPPAHGARIVHKVLTTPARREQWNQSIQAMSSRIKQMRAALLRHL
MDLGTPGTWDHIIQQIGMFSYTGLTSAQVDHLIANHKVFLLRDGRINICGLNTKNVEY
VAKAIDETVRAVKSNI"
- /db_xref="GI:1326254"
- /note="strong similarity to cytoplasmic aspartate
- aminotransferase; coded for by the following C. elegans
- cDNAs: yk204c10.3, yk327b7.3, yk503d6.3, cm20g8,
- yk204c10.5, yk327b7.5, yk468h3.3, yk468h3.5"
+ gene 16755..16901
+ /locus_tag="T01C8.10"
+ /db_xref="WormBase:WBGene00201366"
+ ncRNA 16755..16901
+ /locus_tag="T01C8.10"
+ /ncRNA_class="other"
+ /product="T01C8.10"
+ /db_xref="WormBase:T01C8.10"
+ /db_xref="WormBase:WBGene00201366"
+ gene 17202..17310
+ /locus_tag="T01C8.11"
+ /db_xref="WormBase:WBGene00202374"
+ ncRNA 17202..17310
+ /locus_tag="T01C8.11"
+ /ncRNA_class="other"
+ /product="T01C8.11"
+ /db_xref="WormBase:T01C8.11"
+ /db_xref="WormBase:WBGene00202374"
+ misc_feature complement(17910..17911)
+ /note="SL1 trans-splice acceptor; see cm20g8.5"
+ misc_feature complement(17910..17911)
+ /note="SL2 defined by RNASeq short reads (Hillier et al.);
+ see RNASeq_Hillier.L1_larva"
+ misc_feature 20870..20871
+ /note="SL1 trans-splice acceptor; see yk1146c08.5"
+ misc_feature 20870..20871
+ /note="SL2 defined by RNASeq short reads (Hillier et al.);
+ see RNASeq_Hillier.dauer_entry_daf-2"
gene 20873..27634
- /gene="T01C8.1"
- /note="for a graphical representation of this gene see:
- ttp://www.wormbase.org/db/seq/sequence?name=T01C8.1a;clas
- s=Sequence"
+ /gene="aak-2"
+ /locus_tag="T01C8.1"
+ /db_xref="WormBase:WBGene00020142"
CDS join(20873..21015,24491..24654,24830..25298,25473..25591,
25817..25922,26012..26117,26167..26374,26476..26787,
27198..27349,27533..27634)
- /product="Hypothetical protein T01C8.1b"
- /gene="T01C8.1"
- /protein_id="AAM69096.1"
+ /gene="aak-2"
+ /locus_tag="T01C8.1"
+ /standard_name="T01C8.1b"
+ /note="contains similarity to Pfam domains PF07714
+ (Protein tyrosine kinase), PF00069 (Protein kinase
+ domain); coded for by the following C. elegans cDNAs:
+ OSTF004G3_1, OSTR004G3_1, yk114d1.5, yk133b5.5, yk222d7.5,
+ yk273e10.5, yk298g7.5, yk390b1.5, yk443g12.5, yk507b7.5,
+ yk607a4.5, yk652d12.5, yk677a2.5, yk848d05.3, yk1005g04.3,
+ yk1005g04.5, yk1146c08.3, yk1146c08.5, yk1235g04.3,
+ yk1235g04.5, yk1251g04.3, yk1251g04.5, yk848d05.5,
+ yk1552h06.3, yk1625b07.3, yk237e11.5, MM454_contig01620"
/codon_start=1
+ /product="Amp-activated kinase protein 2, isoform b,
+ confirmed by transcript evidence"
+ /protein_id="AAM69096.1"
+ /db_xref="GI:21629470"
+ /db_xref="WormBase:T01C8.1b"
+ /db_xref="WormBase:WBGene00020142"
/translation="MFSHQDRDRDRKEDGGGDGTEMKSKSRSQPSGLNRVKNLSRKLS
AKSRKERKDRDSTDNSSKMSSPGGETSTKQQQELKAQIKIGHYILKETLGVGTFGKVK
VGIHETTQYKVAVKILNRQKIKSLDVVGKIRREIQNLSLFRHPHIIRLYQVISTPSDI
@@ -183,16 +317,31 @@ FEATURES Location/Qualifiers
APAADPPKMSLQLYQVDQRSYLLDFKSLADEESGSASASSSRHASMSMPQKPAGIRGT
RTSSMPQAMSMEASIEKMEVHDFSDMSCDVTPPPSPGGAKLSQTMQFFEICAALIGTL
AR"
- /db_xref="GI:21629470"
- /note="coded for by the following C. elegans cDNAs:
- yk237e11.5"
CDS join(20873..21015,24491..24654,24830..25298,25473..25591,
25817..25922,26012..26117,26173..26374,26476..26787,
27198..27349,27533..27634)
- /product="Hypothetical protein T01C8.1a"
- /gene="T01C8.1"
- /protein_id="AAM69095.1"
+ /gene="aak-2"
+ /locus_tag="T01C8.1"
+ /standard_name="T01C8.1a"
+ /note="contains similarity to Pfam domains PF07714
+ (Protein tyrosine kinase), PF00069 (Protein kinase
+ domain); coded for by the following C. elegans cDNAs:
+ OSTF004G3_1, OSTR004G3_1, yk114d1.5, yk133b5.5, yk222d7.5,
+ yk273e10.5, yk298g7.5, yk390b1.5, yk402f12.5, yk443g12.5,
+ yk507b7.5, yk607a4.5, yk617g6.5, yk652d12.5, yk677a2.5,
+ yk848d05.3, yk1005g04.3, yk1005g04.5, yk1146c08.3,
+ yk1146c08.5, yk1235g04.3, yk1235g04.5, yk1251g04.3,
+ yk1251g04.5, yk848d05.5, yk1552h06.3, yk1625b07.3,
+ yk114d1.3, yk133b5.3, yk222d7.3, yk273e10.3, yk390b1.3,
+ yk402f12.3, yk607a4.3, yk617g6.3, yk652d12.3, yk677a2.3,
+ yk1552h06.5, yk1625b07.5, AY347273, MM454_contig01620"
/codon_start=1
+ /product="Amp-activated kinase protein 2, isoform a,
+ confirmed by transcript evidence"
+ /protein_id="AAM69095.1"
+ /db_xref="GI:21629469"
+ /db_xref="WormBase:T01C8.1a"
+ /db_xref="WormBase:WBGene00020142"
/translation="MFSHQDRDRDRKEDGGGDGTEMKSKSRSQPSGLNRVKNLSRKLS
AKSRKERKDRDSTDNSSKMSSPGGETSTKQQQELKAQIKIGHYILKETLGVGTFGKVK
VGIHETTQYKVAVKILNRQKIKSLDVVGKIRREIQNLSLFRHPHIIRLYQVISTPSDI
@@ -205,49 +354,93 @@ FEATURES Location/Qualifiers
AADPPKMSLQLYQVDQRSYLLDFKSLADEESGSASASSSRHASMSMPQKPAGIRGTRT
SSMPQAMSMEASIEKMEVHDFSDMSCDVTPPPSPGGAKLSQTMQFFEICAALIGTLAR
"
- /db_xref="GI:21629469"
- /note="strong similarity to 5'-AMP-activated protein
- kinase; coded for by the following C. elegans cDNAs:
- yk443g12.5, yk298g7.5, yk273e10.5, yk390b1.5, yk114d1.3,
- yk114d1.5, yk133b5.3, yk133b5.5, yk222d7.3, yk222d7.5,
- yk273e10.3, yk390b1.3, yk402f12.3, yk402f12.5"
- gene complement(28176..31889)
+ gene complement(21899..21958)
+ /locus_tag="T01C8.9"
+ /db_xref="WormBase:WBGene00199326"
+ ncRNA complement(21899..21958)
+ /locus_tag="T01C8.9"
+ /ncRNA_class="other"
+ /product="T01C8.9"
+ /db_xref="WormBase:T01C8.9"
+ /db_xref="WormBase:WBGene00199326"
+ misc_feature 24490..24491
+ /gene="aak-2"
+ /locus_tag="T01C8.1"
+ /note="SL1 trans-splice acceptor; see yk848d05.5"
+ misc_feature 24490..24491
+ /gene="aak-2"
+ /locus_tag="T01C8.1"
+ /note="SL2 defined by RNASeq short reads (Hillier et al.);
+ see RNASeq_Hillier.L1_larva"
+ CDS join(24534..24654,24830..25298,25473..25591,25817..25922,
+ 26012..26117,26173..26374,26476..26787,27198..27349,
+ 27533..27634)
+ /gene="aak-2"
+ /locus_tag="T01C8.1"
+ /standard_name="T01C8.1c"
+ /note="contains similarity to Pfam domains PF07714
+ (Protein tyrosine kinase), PF00069 (Protein kinase
+ domain); coded for by the following C. elegans cDNAs:
+ OSTF004G3_1, OSTR004G3_1, yk114d1.5, yk133b5.5, yk222d7.5,
+ yk273e10.5, yk298g7.5, yk390b1.5, yk402f12.5, yk443g12.5,
+ yk507b7.5, yk607a4.5, yk617g6.5, yk652d12.5, yk677a2.5,
+ yk848d05.3, yk848d05.5, yk1005g04.3, yk1005g04.5,
+ yk1146c08.3, yk1146c08.5, yk1235g04.3, yk1235g04.5,
+ yk1251g04.3, yk1251g04.5, yk1552h06.3, yk1552h06.5,
+ yk1625b07.3, yk1625b07.5, FN880253, MM454_contig01620"
+ /codon_start=1
+ /product="Amp-activated kinase protein 2, isoform c,
+ confirmed by transcript evidence"
+ /protein_id="AAP13770.1"
+ /db_xref="GI:30025106"
+ /db_xref="WormBase:T01C8.1c"
+ /db_xref="WormBase:WBGene00020142"
+ /translation="MSSPGGETSTKQQQELKAQIKIGHYILKETLGVGTFGKVKVGIH
+ ETTQYKVAVKILNRQKIKSLDVVGKIRREIQNLSLFRHPHIIRLYQVISTPSDIFMIM
+ EHVSGGELFDYIVKHGRLKTAEARRFFQQIISGVDYCHRHMVVHRDLKPENLLLDEQN
+ NVKIADFGLSNIMTDGDFLRTSCGSPNYAAPEVISGKLYAGPEVDVWSCGVILYALLC
+ GTLPFDDEHVPSLFRKIKSGVFPTPDFLERPIVNLLHHMLCVDPMKRATIKDVIAHEW
+ FQKDLPNYLFPPINESEASIVDIEAVREVTERYHVAEEEVTSALLGDDPHHHLSIAYN
+ LIVDNKRIADETAKLSIEEFYQVTPNKGPGPVHRHPERIAASVSSKITPTLDNTEASG
+ ANRNKRAKWHLGIRSQSRPEDIMFEVFRAMKQLDMEWKVLNPYHVIVRRKPDAPAADP
+ PKMSLQLYQVDQRSYLLDFKSLADEESGSASASSSRHASMSMPQKPAGIRGTRTSSMP
+ QAMSMEASIEKMEVHDFSDMSCDVTPPPSPGGAKLSQTMQFFEICAALIGTLAR"
+ gene complement(28176..31898)
/gene="mec-4"
- /note="for a graphical representation of this gene see:
- ttp://www.wormbase.org/db/seq/sequence?name=T01C8.7;class
- =Sequence"
- CDS join(complement(28176..28361),complement(28501..28576),
- complement(28625..28753),complement(28811..28871),
- complement(28916..29030),complement(29154..29249),
- complement(29301..29473),complement(29567..29655),
- complement(29698..29855),complement(29903..30051),
- complement(30149..30309),complement(30526..30637),
- complement(30690..31052),complement(31100..31355),
- complement(31716..31889))
- /product="C. elegans MEC-4 protein (corresponding
- sequence T01C8.7)"
+ /locus_tag="T01C8.7"
+ /db_xref="WormBase:WBGene00003168"
+ CDS complement(join(28176..28361,28501..28576,28625..28753,
+ 28811..28871,28916..29030,29154..29249,29301..29473,
+ 29567..29655,29698..29855,29903..30051,30149..30309,
+ 30526..30637,30690..31052,31100..31355,31716..31898))
/gene="mec-4"
- /protein_id="AAB00580.2"
+ /locus_tag="T01C8.7"
+ /standard_name="T01C8.7"
+ /note="contains similarity to Pfam domain PF00858
+ (Amiloride-sensitive sodium channel); coded for by the
+ following C. elegans cDNAs: yk411c2.3, yk411c2.5,
+ yk1105a08.3, yk1105a08.5, EC002941, RST5_376858, FM864873"
/codon_start=1
- /translation="MQNLKNYQHLRDPSEYMSQVYGDPLAYLQETTKFVTEREYYEDF
- GYGECFNSTESEVQCELITGEFDPKLLPYDKRLAWHFKEFCYKTSAHGIPMIGEAPNV
- YYRAVWVVLFLGCMIMLYLNAQSVLDKYNRNEKIVDIQLKFDTAPFPAITLCNLNPYK
- ASLATSVDLVKRTLSAFDGAMGKAGGNKDHEEEREVVTEPPTTPAPTTKPARRRGKRD
- LSGAFFEPGFARCLCGSQGSSEQEDKDEEKEEELLETTTKKVFNINDADEEWDGMEEY
- DNEHYENYDVEATTGMNMMEECQSERTKFDEPTGFDDRCICAFDRSTHDAWPCFLNGT
- WETTECDTCNEHAFCTKDNKTAKGHRSPCICAPSRFCVAYNGKTPPIEIWTYLQGGTP
- TEDPNFLEAMGFQGMTDEVAIVTKAKENIMFAMATLSMQDRERLSTTKRELVHKCSFN
- GKACDIEADFLTHIDPAFGSCFTFNHNRTVNLTSIRAGPMYGLRMLVYVNASDYMPTT
- EATGVRLTIHDKEDFPFPDTFGYSAPTGYVSSFGLRLRKMSRLPAPYGDCVPDGKTSD
- YIYSNYEYSVEGCYRSCFQQLVLKECRCGDPRFPVPENARHCDAADPIARKCLDARMN
- DLGGLHGSFRCRCQQPCRQSIYSVTYSPAKWPSLSLQIQLGSCNGTAVECNKHYKENG
- AMVEVFYEQLNFEMLTESEAYGFVNLLADFGGQLGLWCGISFLTCCEFVFLFLETAYM
- SAEHNYSLYKKKKAEKAKKIASGSF"
- /db_xref="GI:15011790"
- /note="C. elegans mechanosensory protein 4 (PIR:S13645);
- coded for by the following C. elegans cDNAs: yk411c2.3,
- yk411c2.5"
-BASE COUNT 10115 a 5759 c 5657 g 10400 t
+ /product="Mechanosensory abnormality protein 4, partially
+ confirmed by transcript evidence"
+ /protein_id="AAB00580.3"
+ /db_xref="GI:74356167"
+ /db_xref="WormBase:T01C8.7"
+ /db_xref="WormBase:WBGene00003168"
+ /translation="MSWMQNLKNYQHLRDPSEYMSQVYGDPLAYLQETTKFVTEREYY
+ EDFGYGECFNSTESEVQCELITGEFDPKLLPYDKRLAWHFKEFCYKTSAHGIPMIGEA
+ PNVYYRAVWVVLFLGCMIMLYLNAQSVLDKYNRNEKIVDIQLKFDTAPFPAITLCNLN
+ PYKASLATSVDLVKRTLSAFDGAMGKAGGNKDHEEEREVVTEPPTTPAPTTKPARRRG
+ KRDLSGAFFEPGFARCLCGSQGSSEQEDKDEEKEEELLETTTKKVFNINDADEEWDGM
+ EEYDNEHYENYDVEATTGMNMMEECQSERTKFDEPTGFDDRCICAFDRSTHDAWPCFL
+ NGTWETTECDTCNEHAFCTKDNKTAKGHRSPCICAPSRFCVAYNGKTPPIEIWTYLQG
+ GTPTEDPNFLEAMGFQGMTDEVAIVTKAKENIMFAMATLSMQDRERLSTTKRELVHKC
+ SFNGKACDIEADFLTHIDPAFGSCFTFNHNRTVNLTSIRAGPMYGLRMLVYVNASDYM
+ PTTEATGVRLTIHDKEDFPFPDTFGYSAPTGYVSSFGLRLRKMSRLPAPYGDCVPDGK
+ TSDYIYSNYEYSVEGCYRSCFQQLVLKECRCGDPRFPVPENARHCDAADPIARKCLDA
+ RMNDLGGLHGSFRCRCQQPCRQSIYSVTYSPAKWPSLSLQIQLGSCNGTAVECNKHYK
+ ENGAMVEVFYEQLNFEMLTESEAYGFVNLLADFGGQLGLWCGISFLTCCEFVFLFLET
+ AYMSAEHNYSLYKKKKAEKAKKIASGSF"
ORIGIN
1 gatcaaaacc caaaaaaaaa tttcattcaa aaatttggtt ctttcttaca agcaaaatgc
61 gaggaatagg tgagaaggaa ctgagactga aactaaaaat ttgtgaatag aaactaaaaa
@@ -781,5 +974,5 @@ ORIGIN
31741 aaacattcgc cataaccaaa atcttcataa tattctcttt ctgtcacaaa tttagtcgtc
31801 tcttgtaagt acgctaacgg gtctccataa acctgggaca tgtactcgga tgggtcccga
31861 aggtgttggt agtttttcag gttttgcatc catgacattc tataacttga tagcgataaa
- 31921 aaaaatagca t
+ 31921 aaaaatagca t
//

0 comments on commit a986017

Please sign in to comment.
Something went wrong with that request. Please try again.