From b28cef3987873b717019881eae413bd03651c9ee Mon Sep 17 00:00:00 2001 From: Brian Osborne Date: Tue, 29 Jan 2013 08:33:49 -0500 Subject: [PATCH] In progress --- lib/diya/BDRD/rpsblastCDS.pm | 14 +++++------ scripts/extractCDS.pl | 48 ++++++++++++++++++++++-------------- 2 files changed, 37 insertions(+), 25 deletions(-) diff --git a/lib/diya/BDRD/rpsblastCDS.pm b/lib/diya/BDRD/rpsblastCDS.pm index b77d5c7..e35cfab 100644 --- a/lib/diya/BDRD/rpsblastCDS.pm +++ b/lib/diya/BDRD/rpsblastCDS.pm @@ -109,15 +109,15 @@ sub parse { if ( $feat->primary_tag eq 'CDS' ) { - my @locus = $feat->get_tag_values('locus_tag'); + my ($locus) = $feat->get_tag_values('locus_tag'); my $fh; - eval { $fh = $index->get_stream( $locus[0] ); }; + eval { $fh = $index->get_stream( $locus ); }; if ($@) { print "Problem retrieving " - . $locus[0] + . $locus . " from $blastout.index\n"; next; } @@ -141,7 +141,7 @@ sub parse { print "Found rpsblast hit " . $hit->description - . " for $locus[0]\n" + . " for $locus\n" if $diya->verbose; # get domain id from hit @@ -166,11 +166,11 @@ sub parse { } - my @products = $feat->get_tag_values('product') + my ($product) = $feat->get_tag_values('product') if ( $feat->has_tag('product') ); # If this feature is already annotated with a UniRef match - if ( $products[0] =~ /RepID=/ && defined $tags->{'product'} ) { + if ( $product =~ /RepID=/ && defined $tags->{'product'} ) { my $note = "similar to " . $tags->{'product'}; $note .= " " . $tags->{'cluster'}; @@ -332,7 +332,7 @@ sub load_cdd_table { $cddmap{$id} = \%cluster; } -# 103623 PRK08948 PRK08948 2-octaprenyl-6-methoxyphenyl hydroxylase; Validated 392 +# 103623 PRK08948 PRK08948 2-octaprenyl-6-methoxyphenyl hydroxylase; Validated 392 if ( $line =~ /^\d+\s+(PRK\d+)\s+\w+\s+([^;]+)/ ) { my ( $id, $def ) = ( $1, $2 ); $def =~ s/\s+$//; diff --git a/scripts/extractCDS.pl b/scripts/extractCDS.pl index 47dec73..81ff275 100755 --- a/scripts/extractCDS.pl +++ b/scripts/extractCDS.pl @@ -7,7 +7,7 @@ =head1 NAME =head1 DESCRIPTION Extracts the CDS features from a GenBank file and creates nucleotide and -peptide fasta files containing all genes and translations. +peptide fasta files containing them. gene complement(32855..33520) /locus_tag="test0001_380" @@ -23,32 +23,44 @@ =head1 DESCRIPTION use Bio::Seq; use Data::Dumper; -my ($inputfile,$outputfile) = @ARGV; +my ( $inputfile, $outputfile ) = @ARGV; -die "Need both input file name and output file name" +die "Need both input file name and output file name" unless ( $inputfile && $outputfile ); -my $in = Bio::SeqIO->new( -file => "$inputfile.gbk", - -format => 'genbank' ); +my $in = Bio::SeqIO->new( + -file => "$inputfile.gbk", + -format => 'genbank' +); -my $ntout = Bio::SeqIO->new( -file => ">>$outputfile", - -format => 'fasta' ); +my $out = Bio::SeqIO->new( + -file => ">>$outputfile", + -format => 'fasta' +); -my $pepout = Bio::SeqIO->new( -file => ">>$outputfile.pep", - -format => 'fasta' ); +my $pepout = Bio::SeqIO->new( + -file => ">>$outputfile.pep", + -format => 'fasta' +); my $gbk = $in->next_seq; for my $feature ( $gbk->get_SeqFeatures ) { if ( $feature->primary_tag eq "gene" ) { - my @ids = $feature->get_tag_values("locus_tag"); - my $str = $feature->seq->seq; - my $seq = Bio::Seq->new( -seq => $str, - -display_id => "@ids" ); - $ntout->write_seq($seq); - - $seq = $seq->revcom if ( $feature->location->start > $feature->location->end ); - my $pep = $seq->translate(-complete => 1); - $pepout->write_seq($pep); + my @ids = $feature->get_tag_values("locus_tag"); + my $str = $feature->seq->seq; + my $seq = Bio::Seq->new( + -seq => $str, + -display_id => "@ids" + ); + $out->write_seq($seq); + + $seq = $seq->revcom + if ( $feature->location->start > $feature->location->end ); + my $pep = $seq->translate( -complete => 1 ); + $pepout->write_seq($pep); } } + +__END__ +