Permalink
Switch branches/tags
tag-ensembl-stable-061 start snapshot-at-head-of-07-branch release-ensembl-06 release-06 release-06-2 release-1_01 release-1-7-1 release-1-7-0 release-1-7-0-RC6 release-1-7-0-RC5 release-1-7-0-RC4 release-1-6-zenodo release-1-6-924 release-1-6-923 release-1-6-922 release-1-6-921 release-1-6-920 release-1-6-910 release-0-9-3 release-0-9-2 release-0-9-0 release-0-7-2 release-0-7-1 release-0-7-0 release-0-05 release-0-05-1 release-0-04-4 release-0-04-3 release-0-04-2 release-0-04-1 prerelease-06 ontology-overhaul-start ontology-overhaul-end ontology-fix1 lightweight_feature join-0-04-to-0-05 gbrowse_1_65 for_gmod_0_003 bioperl-run-release-1-2-0 bioperl-release-1-6 bioperl-release-1-6-901 bioperl-release-1-6-9 bioperl-release-1-6-1 bioperl-release-1-5-2 bioperl-release-1-5-2-patch2 bioperl-release-1-5-2-patch1 bioperl-release-1-5-1 bioperl-release-1-5-1-rc4 bioperl-release-1-5-0 bioperl-release-1-5-0-rc2 bioperl-release-1-5-0-rc1 bioperl-release-1-4-0 bioperl-release-1-2-3 bioperl-release-1-2-2 bioperl-release-1-2-1 bioperl-release-1-2-0 bioperl-release-1-1-0 bioperl-release-1-0-2 bioperl-release-1-0-1 bioperl-release-1-0-0 bioperl-devel-1-3-04 bioperl-devel-1-3-03 bioperl-devel-1-3-02 bioperl-devel-1-3-01 bioperl-devel-1-1-1 bioperl-061-pre1 bioperl-06-1 bioperl-1-6-RC4 bioperl-1-6-RC3_15392 bioperl-1-6-RC3 bioperl-1-6-RC2_15306 bioperl-1-6-RC2 bioperl-1-6-RC1 bioperl-1-6-0_006 bioperl-1-6-0_005 bioperl-1-6-0_004 bioperl-1-6-0_003 bioperl-1-6-0_002 bioperl-1-6-0_001 bioperl-1-2-1-rc1 bioperl-1-0-alpha2-rc bioperl-1-0-alpha bioperl-1-0-0 before-05-to-06-trunk before-05-to-06-merge after004 after-05-06-merge after-05-06-merge-2
Nothing to show
Find file
Fetching contributors…
Cannot retrieve contributors at this time
executable file 82 lines (72 sloc) 2.43 KB
#!/usr/bin/perl
use strict;
use vars qw($USAGE);
# random sequence generator #
# -c=1 option will cause prot sequences to be built
# using vertebrate aa frequencies,
# with option -a putting a 1st methionine residues on. Frequencies are
# calculated from the NCBI human RefSeq protein sequences
# -c and -a only affect protein sequences
# -a only works in conjunction with -c
# -n number of random sequences, default = 1
use Bio::PrimarySeq;
use Bio::SeqIO;
use Getopt::Long;
my ($length,$type,$filename,$comp,$met);
$USAGE = 'usage: generate_random_seq.pl --length=1000 --type=dna --filename=/tmp/test.seq --number=50';
my %alphabets = ( 'dna' => [qw(C A G T)],
'rna' => [qw(C A G U)],
'prot'=> [qw( A C D E F G H I K L M N P Q R S T V W Y)],
);
# make random num from 1-10000. numbers in this array reflect the frequency,
# e.g., a random number from 1.744 = A, 745-991 = C etc;
my @aa_frequencies = qw(744 991 1398 2017 2378 3104 3349 3726 4239 5273 5443
5749 6410 6848 7455 8263 8760 9340 9488 9713 10000);
my $number = 1;
&GetOptions
(
'l|length:s' => \$length,
't|type|m|alphabet:s' => \$type,
'f|file|filename:s' => \$filename,
'c|composition:s' => \$comp,
'a|methionine:s' => \$met,
'n|number:s' => \$number
);
assert ( $type && defined ($alphabets{lc $type}),
$USAGE);
assert ( $length && $length =~ /^\d+$/, $USAGE );
foreach my $num (1..$number) {
my $sequence = "";
my $alphabet = $alphabets{lc $type};
my $sspace = scalar @$alphabet;
if (!$comp || $type ne 'prot') {
foreach ( 1..$length ) {
$sequence .= $alphabet->[ int rand($sspace) ];
}
}elsif ($type eq 'prot') {
$sequence = build_seq($length, \@aa_frequencies);
}
my $seq = Bio::PrimarySeq->new(-seq => $sequence,
-display_id => 'randomseq'.$num);
my %args = (-format => 'fasta');
if( $filename ) { $args{-file} = ">>$filename" }
my $seqio = Bio::SeqIO->new(%args);
$seqio->write_seq($seq);
}
sub assert { die $_[1] unless( $_[0] ); }
sub build_seq {
#takes seqlen and ref to frequency data as parameters
my ($len, $pf) = @_;
my $str = ($met)?'M':'';
my $i = ($met)?1:0;
for ($i..$len-1) {
my $aa = int(rand (10000)) ;
my $j = 0;
while ($pf->[$j] < $aa && $j <19) {
$j++;
}
$str .= $alphabets{'prot'}[$j];
}
print "str is $str\n";
return $str;
}