Permalink
Switch branches/tags
tag-ensembl-stable-061 start snapshot-at-head-of-07-branch release-ensembl-06 release-06 release-06-2 release-1_01 release-1-7-1 release-1-7-0 release-1-7-0-RC6 release-1-7-0-RC5 release-1-7-0-RC4 release-1-6-zenodo release-1-6-924 release-1-6-923 release-1-6-922 release-1-6-921 release-1-6-920 release-1-6-910 release-0-9-3 release-0-9-2 release-0-9-0 release-0-7-2 release-0-7-1 release-0-7-0 release-0-05 release-0-05-1 release-0-04-4 release-0-04-3 release-0-04-2 release-0-04-1 prerelease-06 ontology-overhaul-start ontology-overhaul-end ontology-fix1 lightweight_feature join-0-04-to-0-05 gbrowse_1_65 for_gmod_0_003 bioperl-run-release-1-2-0 bioperl-release-1-6 bioperl-release-1-6-901 bioperl-release-1-6-9 bioperl-release-1-6-1 bioperl-release-1-5-2 bioperl-release-1-5-2-patch2 bioperl-release-1-5-2-patch1 bioperl-release-1-5-1 bioperl-release-1-5-1-rc4 bioperl-release-1-5-0 bioperl-release-1-5-0-rc2 bioperl-release-1-5-0-rc1 bioperl-release-1-4-0 bioperl-release-1-2-3 bioperl-release-1-2-2 bioperl-release-1-2-1 bioperl-release-1-2-0 bioperl-release-1-1-0 bioperl-release-1-0-2 bioperl-release-1-0-1 bioperl-release-1-0-0 bioperl-devel-1-3-04 bioperl-devel-1-3-03 bioperl-devel-1-3-02 bioperl-devel-1-3-01 bioperl-devel-1-1-1 bioperl-061-pre1 bioperl-06-1 bioperl-1-6-RC4 bioperl-1-6-RC3_15392 bioperl-1-6-RC3 bioperl-1-6-RC2_15306 bioperl-1-6-RC2 bioperl-1-6-RC1 bioperl-1-6-0_006 bioperl-1-6-0_005 bioperl-1-6-0_004 bioperl-1-6-0_003 bioperl-1-6-0_002 bioperl-1-6-0_001 bioperl-1-2-1-rc1 bioperl-1-0-alpha2-rc bioperl-1-0-alpha bioperl-1-0-0 before-05-to-06-trunk before-05-to-06-merge after004 after-05-06-merge after-05-06-merge-2
Nothing to show
Find file
Fetching contributors…
Cannot retrieve contributors at this time
executable file 86 lines (69 sloc) 2.33 KB
#!/usr/bin/perl
# Brian Osborne
# script to run genscan on all nucleotide sequences in a fasta file
# and save results as the fasta files <file>.gs.pept and <file>.gs.cds,
# and <file>.gs.exons
use Bio::SeqIO;
use Bio::Seq;
use Getopt::Long;
use Bio::Tools::Genscan;
use strict;
# GENSCAN matrix
my $matrix = "/home/bosborne/src/genscan/HumanIso.smat";
my ($file,$i);
GetOptions( "f|file=s" => \$file );
usage() if ( !$file );
my $pept_out = Bio::SeqIO->new(-file => ">$file.gs.pept",
-format => "fasta");
my $cds_out = Bio::SeqIO->new(-file => ">$file.gs.cds",
-format => "fasta");
my $exons_out = Bio::SeqIO->new(-file => ">$file.gs.exons",
-format => "fasta");
my $in = Bio::SeqIO->new(-file => $file , -format => 'Fasta');
while ( my $seq = $in->next_seq() ) {
die "Input sequence is protein\n" if ( $seq->alphabet eq 'protein' );
# create temp file, input to GENSCAN
my $temp_out = Bio::SeqIO->new(-file => ">temp.fa",
-format => "fasta");
$temp_out->write_seq($seq);
my $file_id = $seq->display_id;
$file_id =~ s/\|/-/g;
system "genscan $matrix temp.fa -cds > $file_id.gs.raw";
unlink "temp.fa";
my $genscan = Bio::Tools::Genscan->new( -file => "$file_id.gs.raw");
while ( my $gene = $genscan->next_prediction() ) {
$i++;
my $pept = $gene->predicted_protein;
my $cds = $gene->predicted_cds;
my @exon_arr = $gene->exons;
if ( defined $cds ) {
my $cds_seq = Bio::Seq->new(-seq => $cds->seq,
-display_id => $cds->display_id);
$cds_out->write_seq($cds_seq);
}
if ( defined $pept ) {
my $pept_seq = Bio::Seq->new(-seq => $pept->seq,
-display_id => $pept->display_id);
$pept_out->write_seq($pept_seq);
}
for my $exon (@exon_arr) {
my $desc = $exon->strand . " " . $exon->start . "-" . $exon->end .
" " . $exon->primary_tag . " " . "GENSCAN_predicted_$i";
my $exon_seq = Bio::Seq->new(-seq => $seq->subseq($exon->start,
$exon->end),
-display_id => $seq->display_id,
-desc => $desc );
$exons_out->write_seq($exon_seq);
}
}
$genscan->close();
unlink "$file_id.gs.raw";
}
sub usage {
print "
Usage : $0 -f <file>
Function : run genscan on all nucleotide sequences in a multiple fasta file
Output : <file>.gs.pept, <file>.gs.cds, <file>.gs.exons
";
exit;
}