Skip to content

Commit

Permalink
submit blast script for creating submissions that are split
Browse files Browse the repository at this point in the history
  • Loading branch information
hyphaltip committed Sep 3, 2010
0 parents commit 02ab0c6
Show file tree
Hide file tree
Showing 2 changed files with 137 additions and 0 deletions.
2 changes: 2 additions & 0 deletions README
@@ -0,0 +1,2 @@
The scripts in here are intended as either standalone or examples for job submissions on the UCR bioinformatics cluster (biocluster.ucr.edu)

135 changes: 135 additions & 0 deletions make-jobs/submit_blast_simple.pl
@@ -0,0 +1,135 @@
#!/usr/bin/perl -w
use strict;
use File::Spec;
use Bio::SeqIO;
use Getopt::Long;


my $home = $ENV{'HOME'};
my $jobdir = File::Spec->catfile($home,'jobs');
my $input = File::Spec->catfile($home,'input');
my $outdir = File::Spec->catfile($home,'output');
my $tmpdir = File::Spec->catfile($home, 'tmp');
my $homebase = 'head3';
my $qsize = 5000;
my ($query,$dbin,$params,$exe);
$exe = 'blastall'; # default
my $formatdb = 'formatdb';
my $prefix = 'blast';
my $usescript = 0;
$params = '-p blastp -e 1e-3 -m 8';
GetOptions(
'i|q|query:s' => \$query,
'd|db:s' => \$dbin,
'p|params:s' => \$params,
's|size:i' => \$qsize,
'usescript!' => \$usescript,
'e|exe:s' => \$exe,
'indir|input:s' => \$input,
'o|out|outdir:s' => \$outdir,
'j|job|jobdir:s' => \$jobdir,
't|tmp|tmpdir:s' => \$tmpdir,
);
die( "need a dbname\n" ) unless (defined $dbin );
my @dblst;
for my $dbpart ( split(/\s+/,$dbin) ) {
my $rel = File::Spec->rel2abs($dbpart);
my (undef,undef,$dbname) = File::Spec->splitpath($rel);
push @dblst, $rel;# File::Spec->catfile($scratchdir,$prefix,$dbname);
}
my $db = join(" ", @dblst);
warn("db is $db\n");
die( "need an inputfile\n" ) unless defined $query && -e $query;
die( "need some params\n" ) unless defined $params;


for my $dir( $jobdir,$input,$outdir,$tmpdir ) {
mkdir $dir unless -d $dir;
}

$query = File::Spec->rel2abs($query);
my (undef,undef,$query_file) = File::Spec->splitpath($query);
my $uid = $query_file;
my $in = Bio::SeqIO->new(-file => $query,
-format => "fasta");

my ($ct,$setct) = (0,0);
my $infile = File::Spec->catfile($input,"$uid.$setct.fa");
my $input_seqfh = Bio::SeqIO->new(-format => 'fasta',
-file => ">$infile");

my @infiles = ($infile);
my @infilenames = ("$uid.$setct.fa");

while( my $s = $in->next_seq ) {
next if $s->seq =~ /^[Xx]+$/;
$input_seqfh->write_seq($s);
if( ++$ct >= $qsize ) {
$input_seqfh->close();
$setct++;
$infile = File::Spec->catfile($input,"$uid.$setct.fa");
$input_seqfh = Bio::SeqIO->new(-format => 'fasta',
-file => ">$infile");
push @infiles, $infile;
push @infilenames, "$uid.$setct.fa";
$ct = 0;
}
}


for( my $i = 0; $i <= $setct; $i++ ) {
my $jobfile = File::Spec->catfile($jobdir,"$uid.$i.sh");
open my $fh => ">$jobfile" || die $!;
chmod(0700,$jobfile);
my $ofile = File::Spec->catfile($outdir,"$uid.$i.output");
my $out = File::Spec->catfile($tmpdir,"$uid.$i.out");
my $ifile = $infiles[$i];#File::Spec->catfile($wkdir,$infilenames[$i]);

print $fh ( "#!/bin/bash\n",
'#PBS'." -N $prefix.$i\n",
"source ~/.bash_profile\n",
);

print $fh "$exe $params -i $ifile -d \"$db\" -o $ofile\n";

}

__END__
=head1 NAME
submit_blast_simple.pl - a script for submitting BLAST jobs for parallel submission by splitting the input into chunks
=head1 SYNOPSIS
submit_blast_simple.pl -i peptides.fa -d blast_database -s 1000
submit_blast_simple.pl -i queryscaffolds.fa -d genome -s 1000 --params "-p blastn -e 1e-5 -m 8"
=head1 DESCRIPTION
Assumes you will be running NCBI blast (blastall) and the default
parameter options are:
-p blastp -e 1e-3 -m 8
It will create the directories
~/jobs ~/output ~/input
The job files will be saved in ~/jobs and you will submit them like
for job in jobs/*.sh
do
qsub jobs/$job
done
The output will be written to ~/output. When your jobs are done you
can safely remove the job files and input files saved in ~/jobs and
~/input respectively.
=head1 FEEDBACK
jason.stajich [at] ucr.edu
=cut

0 comments on commit 02ab0c6

Please sign in to comment.