Skip to content
Browse files

More normalizer work

  • Loading branch information...
1 parent fb4c0ec commit 12525a340ec522851e5be6e8931ba9c84e1ee2c2 @fangly fangly committed
Showing with 55 additions and 39 deletions.
  1. +33 −36 lib/Bio/Community/Tools/CountNormalizer.pm
  2. +22 −3 t/Tools/CountNormalizer.t
View
69 lib/Bio/Community/Tools/CountNormalizer.pm
@@ -98,6 +98,7 @@ use MooseX::Method::Signatures;
use namespace::autoclean;
use Bio::Community::Tools::Sampler;
use Bio::Community::Tools::Distance;
+use POSIX;
use List::Util qw(min);
extends 'Bio::Root::Root';
@@ -196,7 +197,6 @@ has repetitions => (
default => undef,
lazy => 1,
init_arg => '-repetitions',
- predicate => '_has_repetitions',
);
@@ -283,36 +283,37 @@ method _count_normalize () {
# Bootstrap now
my $average_communities = [];
- my $min_repetitions;
- my $max_threshold;
+ my $min_repetitions = POSIX::DBL_MAX;
+ my $max_threshold = 0;
for my $community ( @{$self->communities} ) {
my ($average, $repetitions, $dist);
if ($community->total_count == $sample_size) {
- ($average, $repetitions, $dist) = ($community->clone, 0, 0); # Nothing to normalize
- $repetitions = 0;
- $dist = 0;
+ ($average, $repetitions, $dist) = ($community->clone, undef, undef);
} else {
($average, $repetitions, $dist) = $self->_bootstrap($community);
}
- if ( (not defined $min_repetitions) || ($repetitions < $min_repetitions) ) {
- $min_repetitions = $repetitions;
- }
- if ( (not defined $max_threshold) || ($dist > $max_threshold) ) {
- $max_threshold = $dist;
+
+ if (defined $self->repetitions) {
+ $max_threshold = $dist if (defined $dist) && ($dist > $max_threshold);
+ } else {
+ $min_repetitions = $repetitions if (defined $repetitions) && ($repetitions < $min_repetitions);
}
+
push @$average_communities, $average;
}
$self->_set_average_communities($average_communities);
- ####
- print "min_repetitions = $min_repetitions\n";
- print "max_threshold = $max_threshold\n";
- ####
-
- if ($self->_has_repetitions) {
+ if (defined $self->repetitions) {
$self->threshold($max_threshold);
+ ####
+ print "set max_threshold to $max_threshold\n";
+ ####
} else {
$self->repetitions($min_repetitions);
+ ####
+ print "set min_repetitions to $min_repetitions\n";
+ ####
+
}
return 1;
@@ -332,46 +333,42 @@ method _bootstrap (Bio::Community $community) {
my $iteration = 0;
my $dist;
while (1) {
+
+ # Get a random community and add it to the overall community
$iteration++;
my $random = $sampler->get_rand_community($sample_size);
$overall = $self->_add( $overall, $random );
### divide here??
- # Exit conditions
if (not defined $repetitions) {
+ # Exit if distance with last average community is small
$dist = Bio::Community::Tools::Distance->new(
-type => 'euclidean',
-communities => [$overall, $prev_overall],
)->get_distance;
-
- ####
- print "$iteration\t$dist\n";
- ####
-
last if $dist < $threshold;
$prev_overall = $overall->clone;
} else {
-
- ####
- print "$iteration\n";
- ####
-
- last if $iteration >= $repetitions;
+ # Exit if all repetitions have been done
+ if ($iteration == $repetitions - 1) {
+ $prev_overall = $overall->clone;
+ } elsif ($iteration >= $repetitions) {
+ $dist = Bio::Community::Tools::Distance->new(
+ -type => 'euclidean',
+ -communities => [$overall, $prev_overall],
+ )->get_distance;
+ last;
+ }
}
- }
- if (defined $repetitions) {
- $dist = Bio::Community::Tools::Distance->new(
- -type => 'euclidean',
- -communities => [$overall, $prev_overall],
- )->get_distance;
}
my $average = $self->_divide($overall, $iteration);
####
- #print " ...did $iteration repetitions...\n";
+ print "Effective repetitions: $iteration\n";
+ print "Effective threshold : $dist\n";
####
return $overall, $iteration, $dist;
View
25 t/Tools/CountNormalizer.t
@@ -12,9 +12,11 @@ use_ok($_) for qw(
my ($normalizer, $community1, $community2, $average, $representative, $member1,
$member2, $member3, $member4, $member5, $member6);
-my $epsilon = 12;
+my $epsilon = 15;
+
# Community with 1500 counts
+
$community1 = Bio::Community->new( -name => 'community1' );
$member1 = Bio::Community::Member->new( -id => 1 );
$member2 = Bio::Community::Member->new( -id => 2 );
@@ -27,7 +29,9 @@ $community1->add_member( $member3, 300);
$community1->add_member( $member4, 300);
$community1->add_member( $member5, 300);
+
# Community with 5585 counts
+
$community2 = Bio::Community->new( -name => 'community1' );
$member1 = Bio::Community::Member->new( -id => 1 );
$member3 = Bio::Community::Member->new( -id => 3 );
@@ -36,11 +40,15 @@ $community2->add_member( $member1, 2014);
$community2->add_member( $member3, 1057);
$community2->add_member( $member6, 2514);
+
# Basic normalizer object
+
ok $normalizer = Bio::Community::Tools::CountNormalizer->new( );
isa_ok $normalizer, 'Bio::Community::Tools::CountNormalizer';
+
# Normalizer with specified settings
+
ok $normalizer = Bio::Community::Tools::CountNormalizer->new(
-communities => [ $community1, $community2 ],
-repetitions => 10,
@@ -51,6 +59,7 @@ is scalar(@{$normalizer->get_representative_communities}), 2;
is $normalizer->repetitions, 10;
isnt $normalizer->threshold, 0.1;
+cmp_ok $normalizer->threshold, '<', 1;
is $normalizer->sample_size, 1000;
$average = $normalizer->get_average_communities->[0];
@@ -87,6 +96,7 @@ delta_within $representative->get_count($member6), $representative->get_count($m
# Normalizer with manually specified threshold
+
ok $normalizer = Bio::Community::Tools::CountNormalizer->new(
-communities => [ $community1, $community2 ],
-threshold => 1E-1,
@@ -133,6 +143,7 @@ delta_within $representative->get_count($member6), $average->get_count($member6)
# Normalizer with automatic sample size and repetitions overriding threshold
+
ok $normalizer = Bio::Community::Tools::CountNormalizer->new(
-communities => [ $community1, $community2 ],
-threshold => 1E-1,
@@ -143,6 +154,7 @@ is scalar(@{$normalizer->get_representative_communities}), 2;
is $normalizer->repetitions, 10;
isnt $normalizer->threshold, 0.1;
+cmp_ok $normalizer->threshold, '<', 1;
is $normalizer->sample_size, 1500;
$average = $normalizer->get_average_communities->[0];
@@ -178,9 +190,16 @@ delta_within $representative->get_count($member3), $average->get_count($member3)
delta_within $representative->get_count($member6), $average->get_count($member6), 1;
-# special case where repetitions = 0
+# Special case where repetitions = 0
+
+##ok $normalizer = Bio::Community::Tools::CountNormalizer->new(
+## -communities => [ $community1, $community2 ],
+## -threshold => 1E-1,
+## -repetitions => 0,
+##);
+
-# Test with some weights
+### Test with some weights
## '_counts' => {

0 comments on commit 12525a3

Please sign in to comment.
Something went wrong with that request. Please try again.