We now pass a basic test of decoding.

arnsholt · Jun 8, 2010 · 87a1698 · 87a1698
1 parent f6a791e
commit 87a1698
Show file tree

Hide file tree

Showing 2 changed files with 54 additions and 15 deletions.
diff --git a/lib/Algorithm/Viterbi.pm b/lib/Algorithm/Viterbi.pm
@@ -62,43 +62,78 @@ method decode($hmm: @input) {
     # We represent the trellis as a 2D list. The first dimension is the "tick"
     # along the input, the second the state space. @trellis contains the
     # accumulated probabilities, @trace the state we came from.
-    my @trellis;
-    my @trace;
+    my @trellis = [];
+    my @trace = [];
 
+    # Initialise the matrices, to keep Rakudo happy.
+    for ^@input -> $i {
+        @trellis[$i] = 0 xx +@!alphabet;
+        @trace[$i] = 0 xx + @!alphabet;
+    }
+
+    # Initialise the first row of the matrix.
     my $first = @input.shift; # Shift the first observation off the input.
+    @trellis[0][0] = 0;
     for ^@!alphabet -> $state {
-        @trellis[0][$state] = %!p-transition{Start}{$state}
-                            * %!p-emission{$state}{$first};
-        @trace[0][$state] = $!initial-state;
+        my $tag = @!alphabet[$state];
+        @trellis[0][$state] = %!p-transition{Start}{$tag}
+                            * %!p-emission{$tag}{$first};
+        @trace[0][$state] = Start;
     }
 
-    # TODO: Iterate over the input, calculating probabilities as we go.
+    # Iterate over the input, calculating probabilities as we go.
     for @input.kv -> $index, $observation {
         for ^@!alphabet -> $state {
-            # TODO: Get argmax here.
             my ($max-p, $i) = (0, 0);
+            my $tag = @!alphabet[$state];
+
+            # Do the argmax to figure out which previous state is the optimal
+            # fit for this current state.
             for ^@!alphabet -> $prev-state {
-                my $new-p = @trellis[$index][$prev-state] *
-                %!p-transition{$prev-state}{$state};
+                my $prev-tag = @!alphabet[$prev-state];
+                my $new-p = @trellis[$index][$prev-state]
+                          * %!p-transition{$prev-tag}{$tag}
+                          * %!p-emission{$tag}{$observation};
 
                 if $new-p > $max-p {
                     $max-p = $new-p;
                     $i = $prev-state;
                 }
             }
 
+            # Update the trellis and the trace.
             @trellis[$index+1][$state] = $max-p;
             @trace[$index+1][$state] = $i;
         }
     }
 
-    # TODO: Calculate the final transition probabilities, finding the optimal
-    # path through the HMM.
-    my $index = @input.end + 2;
-    for @!states -> $state {
+    # Finalisation.
+    my $index = @input.end + 1;
+    my ($max-p, $i) = (0, 0);
+    # Do the argmax to find the optimal previous state before the End state.
+    for ^@!alphabet -> $prev-state {
+        my $prev-tag = @!alphabet[$prev-state];
+        my $new-p = @trellis[$index][$prev-state]
+                  * %!p-transition{$prev-tag}{End};
+        @trellis[$index][$prev-state].perl.say;
+        %!p-transition{$prev-tag}{End}.perl.say;
+
+        if $new-p > $max-p {
+            $max-p = $new-p;
+            $i = $prev-state;
+        }
+    }
+
+    # Compute the resulting list of tags by unshifting tags onto @result from
+    # the reversed trace.
+    my $final-tag = $i;
+    my @result;
+    for @trace.reverse -> @arr {
+        @result.unshift: @!alphabet[$final-tag];
+        $final-tag = @arr[$final-tag];
     }
 
-    # TODO: Get the best list of events from the trellis and return it.
+    return @result;
 }
 
 # Compute unsmoothed bigram probabilities from an input file.

diff --git a/t/00-basic.t b/t/00-basic.t
@@ -4,11 +4,15 @@ use Test;
 
 use Algorithm::Viterbi;
 
+plan 4;
+
 my Algorithm::Viterbi $hmm .= new(:alphabet<H C>);
 pass("creating new decoder");
 
 $hmm.train("t/eisner.tt");
 ok($hmm.p-transition<C><H> == 13/68, "C -> H == 13/68?");
 ok($hmm.p-emission<C><3> == 5/34, "C -> 3 == 5/34?");
 
-done_testing;
+my $result = $hmm.decode(<1 1 3 3 3 3 1 1 1 1>);
+is_deeply($result, ["H", "H", "H", "H", "H", "H", "C", "C", "C", "C"],
+    "correctly decodes <1 1 3 3 3 3 1 1 1 1>");