Initial commit.

arnsholt · Jun 7, 2010 · 32e90de · 32e90de
commit 32e90de
Show file tree

Hide file tree

Showing 4 changed files with 88 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+.*.swp
diff --git a/README b/README
@@ -0,0 +1,12 @@
+Algorithm::Viterbi - Decoding Hidden Markov Models
+==============================================================================
+
+This module contains an implementation of the Viterbi algorithm for decoding
+Hidden Markov Models (HMMs). It contains a class Algorithm::Viterbi, which
+handles the actual decoding of the HMM, and Algorithm::Viterbi::HMM which
+represents the HMM.
+
+XXX: An implementation of the Forward algorithm would probably use the same
+datastructure, so it would make sense to share the HMM class between them. How
+to best to this? Inverse the hierarchy, and have Algorithm::HMM contain all
+things HMM, or some other form of cleverness? See how CPAN handles this.
diff --git a/lib/Algorithm/Viterbi.pm b/lib/Algorithm/Viterbi.pm
@@ -0,0 +1,70 @@
+use v6;
+
+class Algorithm::Viterbi;
+
+our class Start {};
+our class End {};
+
+# TODO:
+our role Observation {};
+
+has @!alphabet; # The HMM's alphabet
+has %!name-to-index;
+has %!p-transition;
+has %!p-emission;
+
+method BUILD(:@alphabet) {
+    @!alphabet = @alphabet;
+
+    for @!alphabet.kv -> $index, $state {
+        %!name-to-index{$state} = $index;
+    }
+}
+
+# TODO: Algorithm::Viterbi on CPAN also computes the Forward probability of
+# the sequence. Should be doable to compute as well.
+# An improvement might be to create a Role for observations so that domain
+# objects can be passed directly to the decoder.
+#method decode($hmm: Array of Observation @input) {
+method decode($hmm: @input) {
+    # We represent the trellis as a 2D list. The first dimension is the "tick"
+    # along the input, the second the state space. @trellis contains the
+    # accumulated probabilities, @trace the state we came from.
+    my @trellis;
+    my @trace;
+
+    # TODO: Initialise the first row in the trellis with the initial
+    # probabilities.
+    for ^@!alphabet -> $state {
+        @trellis[0][$state] = %!p-transition{Start}{$state}
+                            * %!p-emission{$state}{@input[0]};
+        @trace[0][$state] = $!initial-state;
+    }
+
+    # TODO: Iterate over the input, calculating probabilities as we go.
+    for @input.kv -> $index, $observation {
+        for ^@!alphabet -> $state {
+            # TODO: Get argmax here.
+            @trellis[$index+1][$state] = $max-p;
+            @trace[$index+1][$state] = $i;
+        }
+    }
+
+    # TODO: Calculate the final transition probabilities, finding the optimal
+    # path through the HMM.
+    my $index = @input.end + 2;
+    for @!states -> $state {
+    }
+
+    # TODO: Get the best list of events from the trellis and return it.
+}
+
+# Compute unsmoothed bigram probabilities from some kind of input. An array of
+# arrays perhaps?
+#multi method train($hmm: Array of Array of Observation @inputs) {
+multi method train($hmm: @inputs) {
+}
+
+# TODO: How does file IO work in P6?
+#multi method train($hmm: $file) {
+#}
diff --git a/t/00-basic.t b/t/00-basic.t
@@ -0,0 +1,5 @@
+use v6;
+
+use Algorithm::Viterbi;
+
+my Algorithm::Viterbi $hmm .= new(:alphabet<H C>);