Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
executable file 147 lines (109 sloc) 3.87 KB
# Synopsis: Given an input multi-sequence fasta-format file, run psipred_single
# on each individual sequence, then merge the results into a file
# with the format required by Opal
# Incept: TJW, Mon Jan 10 11:33:23 EST 2011 [Janelia]
use strict;
use Getopt::Long;
use Pod::Usage;
use Cwd;
use File::Temp qw/ tempfile tempdir /; # for mafft polishing routine
# The next line may be edit
#my $psipred_dir ; # if empty, use $PATH
my $psipred_dir = "/Volumes/Portable_2TB/ParamAdvising/psipred35" ;
# You shouldn't need to edit the script below this point
my $man = 0;
my $help = 0;
my $seq_file;
my $struct_file;
my $cwd = getcwd;
my $tmp_dir = $cwd;
my $verbose = 0;
my $psipred_cmd="runpsipred_single";
my $result = GetOptions (
'help!' => \$help,
"man!" => \$man,
"in=s" => \$seq_file,
"out=s" => \$struct_file,
"tmpdir=s" => \$tmp_dir,
"verbose" => \$verbose
or pod2usage(1);
pod2usage(-verbose => 2) if $man;
pod2usage(0) if ($help || !$seq_file);
unless ($struct_file) {
$struct_file = $seq_file;
if ($struct_file =~ /(fa|fasta)$/) {
$struct_file =~ s/$1/ss/;
} else {
$struct_file .= ".ss";
if ($psipred_dir) {
$psipred_dir .= "/";
my $tmpdir = tempdir( DIR => $tmp_dir , CLEANUP => 1 );
open (OUT, ">$struct_file");
$/ = ">";
open (IN, "<$seq_file");
<IN>; # throw out the first empty entry
print "using tmpdir: $tmpdir\n" if $verbose;
while (my $entry = <IN>) {
my ($name, $seq) = ($entry =~ /^(\S+?)\n(.+?)(>|$)/s);
open (TMPOUT, ">$name.fasta");
print TMPOUT ">$name\n$seq";
close TMPOUT;
do_cmd( "$psipred_dir$psipred_cmd $name.fasta 2&>1");
#secondary structure is computed; now extract data needed by Opal
print OUT ">$name\n";
$/ = "\n"; # read one line at a time
open (SS, "<$name.ss2");
while (my $line = <SS>) {
if ($line =~ /(\d+)\s+(\S)\s+(\S)\s+(\d\.\d+)\s+(\d\.\d+)\s+(\d\.\d+)/) {
print OUT "$4 $5 $6\n";
$/ = ">"; # going back to the input sequence file, read one block at a time
chdir($cwd); # required to allow tmpdir to be removed
sub do_cmd {
my $cmd = shift;
print "$cmd\n" if $verbose;
return `$cmd`;
=head1 SYNOPSIS --in <sequences.fasta> (others)
-help brief help message
-man full documentation
-in input fasta file
-out output structure file (Opal input format)
-tmpdir directory in which temporary directory is formed
=head1 OPTIONS
=over 8
=item B<--out <structure_file>>
File in which the Opal-formatted secondary structure is stored. If none
is entered, the default is to use a modification of the name of the
fasta file containing the input sequences. The new file will have the
extension .ss. If the input file contains the extension "fasta" or "fa",
that extension will be removed.
(e.g. filename.fa -->; filename.seqs -->
=item B<--tmpdir <temporary_directory>>
Directory (x) in which a temporary directory (y) will be placed.
Temporary psipred files will be stored in directory x/y, which
is removed at the completion of processing. If none is chosen, a
random one will be generated in the current working directory
B<> takes as input a sequence file in fasta format.
For each sequence in the file, psipred_single is run. The results of these
psipred runs are merged into a single file with the format required by Opal