Permalink
Switch branches/tags
tag-ensembl-stable-061 start snapshot-at-head-of-07-branch release-ensembl-06 release-06 release-06-2 release-1_01 release-1-7-1 release-1-7-0 release-1-7-0-RC6 release-1-7-0-RC5 release-1-7-0-RC4 release-1-6-zenodo release-1-6-924 release-1-6-923 release-1-6-922 release-1-6-921 release-1-6-920 release-1-6-910 release-0-9-3 release-0-9-2 release-0-9-0 release-0-7-2 release-0-7-1 release-0-7-0 release-0-05 release-0-05-1 release-0-04-4 release-0-04-3 release-0-04-2 release-0-04-1 prerelease-06 ontology-overhaul-start ontology-overhaul-end ontology-fix1 lightweight_feature join-0-04-to-0-05 gbrowse_1_65 for_gmod_0_003 bioperl-run-release-1-2-0 bioperl-release-1-6 bioperl-release-1-6-901 bioperl-release-1-6-9 bioperl-release-1-6-1 bioperl-release-1-5-2 bioperl-release-1-5-2-patch2 bioperl-release-1-5-2-patch1 bioperl-release-1-5-1 bioperl-release-1-5-1-rc4 bioperl-release-1-5-0 bioperl-release-1-5-0-rc2 bioperl-release-1-5-0-rc1 bioperl-release-1-4-0 bioperl-release-1-2-3 bioperl-release-1-2-2 bioperl-release-1-2-1 bioperl-release-1-2-0 bioperl-release-1-1-0 bioperl-release-1-0-2 bioperl-release-1-0-1 bioperl-release-1-0-0 bioperl-devel-1-3-04 bioperl-devel-1-3-03 bioperl-devel-1-3-02 bioperl-devel-1-3-01 bioperl-devel-1-1-1 bioperl-061-pre1 bioperl-06-1 bioperl-1-6-RC4 bioperl-1-6-RC3_15392 bioperl-1-6-RC3 bioperl-1-6-RC2_15306 bioperl-1-6-RC2 bioperl-1-6-RC1 bioperl-1-6-0_006 bioperl-1-6-0_005 bioperl-1-6-0_004 bioperl-1-6-0_003 bioperl-1-6-0_002 bioperl-1-6-0_001 bioperl-1-2-1-rc1 bioperl-1-0-alpha2-rc bioperl-1-0-alpha bioperl-1-0-0 before-05-to-06-trunk before-05-to-06-merge after004 after-05-06-merge after-05-06-merge-2
Nothing to show
Find file
Fetching contributors…
Cannot retrieve contributors at this time
executable file 552 lines (446 sloc) 14.8 KB
#!/usr/bin/perl
=pod
=head1 NAME
rnai_finder.cgi
=head1 DESCRIPTION
A CGI script using the Bio::Tools::SiRNA package to design RNAi reagents.
Retrieves sequences from NCBI and generates output in graphic and tabular form.
=head1 INSTALLATION
To use this script, place it in an appropriate cgi-bin directory on a web server.
The script needs to write its graphic maps to a temporary directory. Please update
$TMPDIR and $TMPURL to suit your local configuation.
=head1 AUTHOR
Donald Jackson (donald.jackson@bms.com)
=head1 SEE ALSO
L<Bio::Tools::SIRNA>, L<Bio::Graphics::Panel>, L<Bio::DB::NCBIHelper>, L<CGI>
=cut
use Bio::Tools::SiRNA;
use Bio::Graphics::Panel;
use Bio::DB::NCBIHelper;
use Bio::Seq::RichSeq; # for hand-entry
use Bio::SeqFeature::Generic;
use GD::Text::Align;
use Clone qw(clone);
use CGI;
use CGI::Carp qw (fatalsToBrowser carpout);
my $q = CGI->new;
# define a bunch of constants
my %COLORRANKS = ( 1 => 'red',
2 => 'orchid',
3 => 'blue' );
my $TMPDIR = '/var/www/htdocs/tmp/';
my $TMPURL = '/tmp/';
my $ATGPAD = 75; # how far from start do we wait?
my $NOLIGOS = 3;
my $log = $TMPDIR . 'RNAiFinder.log';
open (LOG, ">>$log") or die $!;
carpout(LOG);
print $q->header,
$q->start_html;
print $q->h1('RNAi Finder');
if ($q->param('Design')) {
if ($q->param('accession') and !$q->param('seq')) {
$target = get_target();
}
else {
$target = make_target();
}
get_rnai($target);
}
else {
get_settings();
}
sub get_settings {
print <<EOM1;
<P>Oligos are designed as described on the <A HREF="http://www.mpibpc.gwdg.de/abteilungen/100/105/sirna.html" TARGET="Tuschl">Tuschl lab web page</A> and are ranked as follows:
<UL>
<LI><B>New:</B> Selecting 'Pol3-compatible targets' looks for oligos with the
pattern NAR(N17)YNN which can be synthesized or expressed from a Pol3 promoter.
<br>This selection <b>overrides</b> the 'Cutoff' rank.
<LI>Oligos with Rank = 1 (best) match the AAN(19)TT rule.
<LI>Oligos with Rank = 2 match the AAN(21) rule
<LI>Oligos with Rank = 3 match the NAN(21) rule.
</UL>
<P>If percent GC and specificity are similar, Rank 1 oligos are better. All 3 prime overhangs are converted to TT; the rest of the sequence is transcribed into RNA</P>
<h3>Modifications to published rules:</h3>
<ul>
<li>
Runs of 3 or more consecutive Gs on either strand are skipped - these can cause problems in synthesis.
<li>Users may choose to exclude oligos that overlap single nucleotide polymorphisms (ON by default). SNP data comes from the NCBI dbSNP database.
<li>'Low-complexity' regions (such as runs of a single nucleotide) are also excluded.
</ul>
EOM1
print $q->start_form;
print $q->h2('Enter your sequence and other parameters:'), "\n";
print $q->p('The values already here are DEFAULTS - you should change them to suit YOUR sequence');
print $q->start_table();
print $q->TR( $q->td({-align=> 'left'},
[
$q->textfield(-name => 'mingc', -default => '0.40'),
$q->textfield(-name => 'maxgc', -default => '0.60'),
]
),
$q->td({-align=> 'left'},
$q->popup_menu(-name => 'worstrank',
-values => [1,2,3],
-default => 2,
),
$q->b('OR'),
$q->checkbox(-name => 'pol3',
-label => 'Pol3 compatible',
-default => 0,
),
),
);
print $q->TR( $q->th({-align=> 'left'}, 'Exclude oligos with SNPs?'),
$q->td($q->radio_group(-name => 'avoid_snps',
-values => [1,0],
-default => 1,
-labels => {1 => 'Yes', 0 => 'No'}
)),
);
print $q->TR( $q->th({-align=> 'left'}, 'Sequence Name:'),
$q->td({-align=> 'left'},$q->textfield('accession')),
$q->td({-align=> 'left'},
$q->em( q(Enter an accession and you won't have to enter the <br>sequence or start/stop. Use accessions beginning with NM_ if possible.))),
);
print $q->TR( $q->th({-align=> 'left'}, ['Position of initiator ATG:',
'NT after start to exclude:',
'Position of Stop codon:' ]));
print $q->TR( $q->td({-align=> 'left'},
[$q->textfield(-name => 'cdstart', -default => 1),
$q->textfield(-name => 'atgpad', -default => $ATGPAD),
$q->textfield('cdend'), ]));
print $q->TR( $q->th({-align=> 'left'}, ['Minimum Fraction GC:',
'Maximum Fraction GC:',
'Rank cutoff',
]));
print $q->TR($q->th({-align=> 'left', -colspan=>2},'cDNA Sequence in plain text or FASTA format'),
$q->td( $q->a({-href =>'Fasta_format.html', -target => 'Fasta_desc'}, 'What is FASTA format?')),
);
print $q->TR($q->td({-align => 'left', -colspan=>3},
$q->textarea( -name =>'seq',
-rows => 4,
-columns => 80,
-wrap => 'virtual',
)));
print $q->TR( $q->th({-align => 'left', -colspan=>3},
'Output options: '));
print $q->TR( $q->td({-align=> 'left'},
[ $q->checkbox(-name => 'Graphic', -checked => 'checked'),
$q->checkbox(-name => 'Table', -checked => 'checked'),
]));
print $q->TR($q->td({-align=> 'left', -colspan=>3}, $q->submit('Design')));
print $q->end_table();
print $q->end_form;
}
sub get_rnai {
# design and output RNAi reagents
my ($gene) = @_;
my $factory = Bio::Tools::SiRNA->new( -target => $gene,
-tmpdir => $TMPDIR,
-cutoff => $q->param('worstrank') || 2,
-avoid_snps => $q->param('avoid_snps') || 1,
-min_gc => $q->param('min_gc') || 0.40,
-max_gc => $q->param('max_gc') || 0.60,
-pol3 => $q->param('pol3') || 0,
);
print $q->p('Designing Pol3-compatible oligos') if ($q->param('pol3'));
my @pairs = $factory->design;
draw_gene($gene) if ($q->param('Graphic'));
print_table($gene->accession, \@pairs) if ($q->param('Table'));
print_text($gene->accession, \@pairs) if ($q->param('Text'));
}
sub get_target {
my ($acc) = $q->param('accession');
my $gb = Bio::DB::NCBIHelper->new();
my $seq = $gb->get_Seq_by_acc($acc);
if ($seq) {
return $seq;
}
else {
print_error("Unable to retrieve sequence from GenBank using accession $acc");
return;
}
}
sub make_target {
# sanity chex - do we have the necessary info?
$q->param('seq') or print_error("Please supply a sequence", 1);
my $seq = $q->param('seq');
my $name;
# is sequence in fasta format?
if ($seq =~ /^>/) {
my ($head, $realseq) = split (/\n/, $seq, 2);
$head =~ /^>(.+?) /;
$name = $1;
$realseq =~ s/[\n|\r|\s]//g;
$seq = $realseq;
}
elsif ($q->param('accession')) {
$name = $q->param('accession');
$seq =~ s/[\n|\r|\s]//g;
}
else {
print_error('Please supply a sequence name!');
return;
}
$cds_start = $q->param('cds_start') || 1;
$cds_end = $q->param('cds_end') || length($seq);
# create a new Bio::Seq::RichSeq object from parameters
my $seqobj = Bio::Seq::RichSeq->new( -seq => $seq,
-accession_number => $name,
-molecule => 'DNA',
);
my $cds = Bio::SeqFeature::Generic->new( -start => $cds_start,
-end => $cds_end,
);
$cds->primary_tag('CDS');
$seqobj->add_SeqFeature($cds);
return $seqobj;
}
sub draw_gene {
# now draw a pretty picture
my ($gene) = @_;
my $panel = Bio::Graphics::Panel->new( -segment => $gene,
-width => 600,
-pad_top => 100,
-pad_bottom => 20,
-pad_left => 50,
-pad_right => 50,
-fontcolor => 'black',
-fontcolor2 => 'black',
-key_color => 'white',
-grid => 1,
-key_style => 'between',
#-gridcolor => 'lightgray',
);
my $genefeat = Bio::SeqFeature::Generic->new( -start => 1,
-end => $gene->length);
$panel->add_track( arrow => $genefeat,
-bump => 0,
-tick => 2,
-label => 1,
);
my %feature_classes;
foreach $feat($gene->top_SeqFeatures) {
$feature_classes{ $feat->primary_tag } ||= [];
push(@{ $feature_classes{ $feat->primary_tag } }, $feat);
}
# for some reason, Bio::Graphics insists on drawing subfeatures for SiRNA::Pair objects...
$cleanpairs = cleanup_feature($feature_classes{'SiRNA::Pair'});
# draw
$panel->add_track( transcript => $feature_classes{'gene'},
-bgcolor => 'green',
-fgcolor => 'black',
-fontcolor2 => 'black',
-key => 'Gene',
-bump => +1,
-height => 8,
-label => \&feature_label,
-description => 1,
);
$panel->add_track( transcript2 => $feature_classes{'CDS'},
-bgcolor => 'blue',
-fontcolor2 => 'black',
-fgcolor => 'black',
-key => 'CDS',
-bump => +1,
-height => 8,
-label => \&feature_label,
-description => \&feature_desc,
);
$panel->add_track( $feature_classes{'variation'},
-bgcolor => 'black',
-fgcolor => 'black',
-fontcolor2 => 'black',
-key => 'SNPs',
-bump => +1,
-height => 8,
-label => \&snp_label,
#-glyph => 'triangle',
-glyph => 'diamond',
-description => \&feature_desc,
);
$panel->add_track( generic => $feature_classes{'Excluded'},
-bgcolor => 'silver',
-fgcolor => 'black',
-fontcolor => 'black',
-fontcolor2 => 'black',
-key => 'Excluded Regions',
-bump => +1,
-height => 6,
-label => \&feature_label,
-description => \&feature_desc,
);
$panel->add_track(
generic => $cleanpairs,
-bgcolor => \&feature_color,
-fgcolor => \&feature_color,
-fontcolor => 'black',
-fontcolor2 => 'black',
-key => 'SiRNA Reagents',
-bump => +1,
-height => 8,
-label => \&feature_label,
-glyph => 'generic',
-description => \&feature_desc,
);
my $gd = $panel->gd;
my $black = $gd->colorAllocate(0,0,0);
my $txt = GD::Text::Align->new($gd);
$txt->set( valign => 'center', align => 'center', color => $black);
#$txt->set_font(['/usr/share/fonts/truetype/VERDANA.TTF',gdGiantFont ], 10);
$txt->set_font(gdGiantFont);
$txt->set_text("RNAi Reagents for ".$gene->accession );
$txt->draw(200, 50, 0);
my $pngfile = $TMPDIR . $gene->accession . '.png';
my $pngurl = $TMPURL . $gene->accession . '.png';
open (IMG, ">$pngfile") or die $!;
binmode IMG;
print IMG $gd->png;
close IMG;
# also get the imagemap boxes
my @pairboxes = extract_pairs($panel->boxes);
print $q->img({-src => $pngurl, -usemap=>"#MAP"});
print $q->p('Oligos are color coded: rank 1 in ',
$q->font({-color => $COLORRANKS{1}}, $COLORRANKS{1}),
', rank 2 in ',
$q->font({-color => $COLORRANKS{2}}, $COLORRANKS{2}),
' and rank 3 in ',
$q->font({-color => $COLORRANKS{3}}, $COLORRANKS{3}),
'. Click on an oligo to bring it up in the table below');
print_imagemap(@pairboxes);
}
sub feature_label {
my ($feature) = @_;
my (@notes, @label);
#$label = ucfirst($feature->primary_tag);
foreach (qw(note name product gene)) {
if ($feature->has_tag($_)) {
@notes = $feature->each_tag_value($_);
#$label .= ': ' . $notes[0];
push(@label, $notes[0]);
last;
}
}
return join(': ', @label);
#return $label;
}
sub feature_color {
my ($feature) = @_;
my ($rank) = $feature->each_tag_value('rank');
#print STDERR "Feature rank: $rank COLOR $COLORRANKS{$rank}\n";
return $COLORRANKS{$rank};
#return 'red';
}
sub print_table {
my ($accession, $pairs) = @_;
print $q->h2("RNAi Reagents for $accession");
print $q->start_table({-border => 1, -cellpadding => 2});
print $q->TR( $q->th(['Reagent #', 'Start', 'Stop', 'Rank', 'Fxn GC', 'Sense Oligo', 'Antisense Oligo', 'Target' ]) ), "\n";
my $i = 1;
foreach $pair ( sort { $a->start <=> $b->start } @$pairs ) {
my $sense = $pair->sense;
my $anti = $pair->antisense;
my $color = feature_color($pair);
# my $blasturl = "http://nunu.hpw.pri.bms.com/biocgi/versablast.pl?p=blastn&sequence=";
# $blasturl .= $pair->seq->seq;
# $blasturl .= "&action=Nucleotide Databases";
print
$q->TR( $q->td( [ $q->a({-name => 'RNAi' . $pair->start}) . $i,
$pair->start,
$pair->end,
$q->font({-color => $color},$pair->rank),
$pair->fxGC,
$q->tt($sense->seq),
$q->tt($anti->seq),
$q->tt($pair->seq->seq),
# $q->a({-href=>$blasturl,
# -target=>"blastn"},
# "BLAST this target"),
] ) ),
"\n";
$i++;
}
print $q->end_table;
}
sub print_text {
my ($accession, $pairs ) = @_;
my ($pair);
print "RNAi reagents for $accession \n";
print join("\t", qw(Start Stop Rank Sense Antisense)), "\n";
foreach $pair (@$pairs ) {
my $sense = $pair->sense;
my $anti = $pair->antisense;
print join("\t", $pair->start, $pair->end, $pair->rank, $sense->seq, $anti->seq), "\n";
}
}
sub cleanup_feature {
my ($flist) = @_;
my ($feat, @clean, $cfeat);
foreach $feat(@$flist) {
$cfeat = clone($feat);
# $cfeat = $feat->clone;
$cfeat->flush_sub_SeqFeature;
push (@clean, $cfeat); # will they
}
return \@clean;
}
sub extract_pairs {
# get SiRNA::Pair features ONLY for imagemap
return ( grep {ref($_->[0]) eq "Bio::SeqFeature::SiRNA::Pair"} @_ );
}
sub print_imagemap {
my @items = @_;
print q(<MAP NAME="MAP">), "\n";
my $i = 1;
foreach $item (@items) {
my ($feature, $x1, $y1, $x2, $y2) = @$item;
my $fstart = $feature->start; # should be unique
my $text = 'RNAi #' . $i. ' Start=' . $feature->start . ' Rank='.$feature->rank;
print qq(<AREA SHAPE="RECT" COORDS="$x1,$y1,$x2,$y2" TITLE="$text" HREF="#RNAi$fstart">), "\n";
warn "Mouseover text: $text\n";
$i++;
}
print "</MAP>\n";
}
sub print_error {
# print error messages in big red type. Provide more graceful die/warn to end user
my ($msg, $fatal) = @_;
print $q->h3($q->font({-color=>'RED'}, $msg));
if ($fatal) {
print $q->end_html;
die "$msg \n";
}
else {
warn $msg;
}
}
sub dump {
print $q->start_ul;
foreach ($q->param) {
print $q->li($_),
$q->ul($q->li([ $q->param($_) ]));
}
}
sub snp_label {
# special format for SNPs
my ($feature) = @_;
my $label;
if ( $feature->has_tag('db_xref') ) {
my @notes = $feature->each_tag_value('db_xref');
$label .= $notes[0];
$label .= ' ';
}
if ( $feature->has_tag('allele') ) {
my ($nt1, $nt2) = $feature->each_tag_value('allele');
$label .= $nt1 . '->' . $nt2;
}
return $label;
}
sub feature_desc {
my ($feature) = @_;
my $desc = $feature->start;
$desc .= '-' . $feature->end unless ($feature->start == $feature->end);
return $desc;
}