This is code originally from [this gist](https://gist.github.com/awni/56369a90d03953e370f3964c826ed4b0).

We start out by doing the imports:

In [42]:
import numpy as np
import math
import sys
import collections

In [83]:
class CTCDecoder:
    
    def __init__(self, alphabet):
        self.alphabet = alphabet
        self.NEG_INF = -float("inf")
        self.trace = False
    
    def make_new_beam(self):
        fn = lambda : (self.NEG_INF, self.NEG_INF)
        return collections.defaultdict(fn)
    
    def logsumexp(self, *args):
        """
        Stable log sum exp.
        """
        if all(a == self.NEG_INF for a in args):
                return self.NEG_INF
        a_max = max(args)
        lsp = math.log(sum(math.exp(a - a_max) for a in args))
        return a_max + lsp
    
    def decode(self, probs, beam_size=100, blank=0):
        """
        Performs inference for the given output probabilities.
    
        Arguments:
                probs: The output probabilities (e.g. post-softmax) for each
                    time step. Should be an array of shape (time x output dim).
                beam_size (int): Size of the beam to use during inference.
                blank (int): Index of the CTC blank label.
    
        Returns the output label sequence and the corresponding negative
        log-likelihood estimated by the decoder.
        """
        T, S = probs.shape
        probs = np.log(probs)
    
        # Elements in the beam are (prefix, (p_blank, p_no_blank))
        # Initialize the beam with the empty sequence, a probability of
        # 1 for ending in blank and zero for ending in non-blank
        # (in log space).
        beam = [(tuple(), (0.0, self.NEG_INF))]
    
        for t in range(T): # Loop over time
            if self.trace:
                print('t:', t, file=sys.stderr)
            # A default dictionary to store the next step candidates.
            next_beam = self.make_new_beam()
    
            for s in range(S): # Loop over vocab
                p = probs[t, s]
    
                # The variables p_b and p_nb are respectively the
                # probabilities for the prefix given that it ends in a
                # blank and does not end in a blank at this time step.
                for prefix, (p_b, p_nb) in beam: # Loop over beam
    
                    # If we propose a blank the prefix doesn't change.
                    # Only the probability of ending in blank gets updated.
                    if s == blank:
                        n_p_b, n_p_nb = next_beam[prefix]
                        n_p_b = self.logsumexp(n_p_b, p_b + p, p_nb + p)
                        next_beam[prefix] = (n_p_b, n_p_nb)
                        continue
    
                    # Extend the prefix by the new character s and add it to
                    # the beam. Only the probability of not ending in blank
                    # gets updated.
                    end_t = prefix[-1] if prefix else None
                    n_prefix = prefix + (s,)
                    n_p_b, n_p_nb = next_beam[n_prefix]
                    if s != end_t:
                        n_p_nb = self.logsumexp(n_p_nb, p_b + p, p_nb + p)
                    else:
                        # We don't include the previous probability of not ending
                        # in blank (p_nb) if s is repeated at the end. The CTC
                        # algorithm merges characters not separated by a blank.
                        n_p_nb = self.logsumexp(n_p_nb, p_b + p)
                        
                    # *NB* this would be a good place to include an LM score.
                    next_beam[n_prefix] = (n_p_b, n_p_nb)
    
                    # If s is repeated at the end we also update the unchanged
                    # prefix. This is the merging case.
                    if s == end_t:
                        n_p_b, n_p_nb = next_beam[prefix]
                        n_p_nb = self.logsumexp(n_p_nb, p_nb + p)
                        next_beam[prefix] = (n_p_b, n_p_nb)
    
            # Sort and trim the beam before moving on to the
            # next time-step.
            beam = sorted(next_beam.items(),
                            key=lambda x : self.logsumexp(*x[1]),
                            reverse=True)
            beam = beam[:beam_size]
    
        best = beam[0]
        return best[0], -self.logsumexp(*best[1])
    
    def test(self):
        np.random.seed(3)
    
        time = 6
        output_dim = len(self.alphabet)
    
        probs = np.random.rand(time, output_dim)
        probs = probs / np.sum(probs, axis=1, keepdims=True)
    
        labels, score = self.decode(probs)
        print(labels)
        print(''.join([self.alphabet[i] for i in labels]))
        print("Score {:.3f}".format(score))
        
    def run(self, probs):
        labels, score = self.decode(probs)
        print(labels)
        print(''.join([self.alphabet[i] for i in labels]))
        print("Score {:.3f}".format(score))



In [84]:
V = [c for c in ' abcdefghijklmnopqrstuvwxyz']
dec = CTCDecoder(V)
dec.test()


(5, 14, 7)
eng
Score 13.048


Here we make an input matrix, emulating the output of the acoustic model.

In [85]:
import random, sys

C = ['c', 'c', 'a', 'a', 't', 't'] # sequence we want to output
M = [] # matrix for output 

for c in C: 
        row = []
        for v in V:
                if v == c:
                        row.append(10) # this is the best 
                else:
                        row.append(random.randint(1,5)) # a random other value
        nrow = [i/sum(row) for i in row] # normalise 
        M.append(nrow)

M = np.array(M) # numpy-ise it.

This is our TxV matrix (timesteps by vocabulary/alphabet)

In [86]:
print(M)

[[0.04651163 0.02325581 0.01162791 0.11627907 0.04651163 0.03488372
  0.02325581 0.05813953 0.02325581 0.03488372 0.01162791 0.05813953
  0.04651163 0.02325581 0.02325581 0.01162791 0.02325581 0.04651163
  0.04651163 0.03488372 0.03488372 0.03488372 0.03488372 0.04651163
  0.02325581 0.04651163 0.03488372]
 [0.01123596 0.03370787 0.04494382 0.11235955 0.03370787 0.04494382
  0.05617978 0.02247191 0.03370787 0.03370787 0.05617978 0.01123596
  0.03370787 0.05617978 0.05617978 0.02247191 0.01123596 0.03370787
  0.02247191 0.02247191 0.02247191 0.01123596 0.04494382 0.05617978
  0.02247191 0.04494382 0.04494382]
 [0.01162791 0.11627907 0.04651163 0.03488372 0.04651163 0.04651163
  0.01162791 0.02325581 0.04651163 0.03488372 0.04651163 0.02325581
  0.03488372 0.05813953 0.05813953 0.01162791 0.01162791 0.04651163
  0.04651163 0.01162791 0.01162791 0.03488372 0.01162791 0.03488372
  0.04651163 0.04651163 0.04651163]
 [0.0625     0.125      0.025      0.0625     0.0125     0.0625
  0.0125    

In [87]:
dec.run(M)

(3, 1, 20)
cat
Score 10.810


In [90]:
dec.trace = True
dec.run(M)

(3, 1, 20)
cat
Score 10.810


t: 0
t: 1
t: 2
t: 3
t: 4
t: 5
