In [1]:
import numpy as np


def compute_f(A, B, observed):
    n_nodes = len(observed)
    n_states = A.shape[0]
    f = np.zeros((n_nodes - 1, n_states, n_states))

    for i in range(n_nodes - 1):
        tmp = np.zeros((n_states, n_states))
        for j in range(n_states):
            for k in range(n_states):
                if B[k, observed[i + 1]] == 0:
                    tmp_B = np.min(B[B != 0])
                else:
                    tmp_B = np.log(B[k, observed[i + 1]])

                tmp[j, k] = np.log(A[j, k]) + tmp_B
        f[i] = tmp

    return f

In [2]:
A = np.array([[0.3, 0.7], [0.9, 0.1]])
B = np.array([[0.2, 0.8], [0.5, 0.5]])
observed = np.array([0, 1, 1])
compute_f(A, B, observed)

array([[[-1.42711636, -1.04982212],
        [-0.32850407, -2.99573227]],

       [[-1.42711636, -1.04982212],
        [-0.32850407, -2.99573227]]])

In [3]:
def Viterbi(A, B, observed):
    n_nodes = len(observed)
    n_states = A.shape[0]

    pmax = np.zeros((n_nodes - 1, n_states))
    phi = np.zeros((n_nodes - 1, n_states))

    f = compute_f(A, B, observed)

    pmax[0] = np.max((f[0]), axis=0)
    phi[0] = np.argmax(f[0], axis=0)

    for i in range(1, n_nodes - 1):
        tmp = ((f[i]).T + pmax[i - 1]).T

        pmax[i] = np.max(tmp, axis=0)

        phi[i] = np.argmax(tmp, axis=0)

    return pmax, phi

In [7]:
pmax, phi = Viterbi(A, B, observed)

In [17]:
print("pmax:\n", pmax, "\nphi\n", phi)

pmax:
 [[-0.32850407 -1.04982212]
 [-1.37832619 -1.37832619]] 
phi
 [[1. 0.]
 [1. 0.]]


In [25]:
first = np.argmax(pmax[-1])
a = int(phi[-1, first])
b = int(phi[-2, a])

In [26]:
print(first, a, b)

0 1 0


In [46]:
def reconstruct(pmax, phi):
    """
    Ricostruisce tutti i most probable, per ora fa schifo ma sembra funzionare, da ricontrollare
    """
    reconstruction = np.empty(len(phi)+1)

    curr = np.argmax(pmax[-1])
    reconstruction[-1] = curr

    for i in range(len(phi)-1, -1, -1):
        curr = int(phi[i, curr])
        reconstruction[i] = curr
    
    return reconstruction

In [47]:
reconstruct(pmax, phi)

array([0., 1., 0.])

### Experiement with matrix like the one we have (last row of spaces has 1)

In [102]:
A = np.array([[0.1, 0.8, 0.1],
              [0.8, 0.1, 0.1],
              [0.5, 0.5, 0]])

B = np.array([[0.8, 0.2, 0],
              [0.2, 0.8, 0],
              [0, 0, 1]])

observed = np.array([0, 2, 1])

In [103]:
def compute_f(A, B, observed):
    """
    Careful when handling
    - f0 is the first message (from first factor to node, it is just a vector) 
    - f contains all other factors evaluated
    """
    pi = A[-1]
    n_nodes = len(observed)
    n_states = A.shape[0]
    f = np.zeros((n_nodes-1, n_states, n_states))
    
    tmp = np.zeros((n_states, 1))
    for k in range(n_states):
        tmp[k] = pi[k] * B[k, observed[0]]
    
    f0 = tmp

    for i in range(1, n_nodes):
        tmp = np.zeros((n_states, n_states))
        
        for j in range(n_states): # over z1
            
            for k in range(n_states): # over z2
                tmp[j, k] = A[j, k] * B[k, observed[i]]
        
        f[i-1] = tmp
    

    return f0, f

In [104]:
f0, f = compute_f(A, B, observed)
print("f0:\n", f0, "\nf:\n", f)

f0:
 [[0.4]
 [0.1]
 [0. ]] 
f:
 [[[0.   0.   0.1 ]
  [0.   0.   0.1 ]
  [0.   0.   0.  ]]

 [[0.02 0.64 0.  ]
  [0.16 0.08 0.  ]
  [0.1  0.4  0.  ]]]


In [105]:
def Viterbi(f0, f):
    n_nodes = f.shape[0] + 1
    n_states = f.shape[1]

    pmax = np.zeros((n_nodes, n_states))    # Need one for every node
    phi = np.zeros((n_nodes - 1, n_states)) # Need one for every node other than the first one (no need to reconstruct it)

    pmax[0] = f0.flatten()

    for i in range(1, n_nodes):
        tmp = ((f[i-1]).T * pmax[i - 1]).T

        pmax[i] = np.max(tmp, axis=0) # by column

        phi[i-1] = np.argmax(tmp, axis=0) # i-1 cause this contains the reconstruction about the (i-1)th element

    return pmax, phi

In [106]:
pmax, phi = Viterbi(f0, f)
print("pmax:\n", pmax, "\nphi:\n", phi)

print("\nReconstruction using Viterbi:\n", reconstruct(pmax, phi))

pmax:
 [[0.4   0.1   0.   ]
 [0.    0.    0.04 ]
 [0.004 0.016 0.   ]] 
phi:
 [[0. 0. 0.]
 [2. 2. 0.]]

Reconstruction using Viterbi:
 [0. 2. 1.]


In [120]:
observed = np.array([0, 2, 1, 0, 2, 0, 1, 0, 2, 1, 0, 1, 0]) # a _ b a
f0, f = compute_f(A, B, observed)
pmax, phi = Viterbi(f0, f)
print("pmax:\n", pmax, "\nphi:\n", phi)

print("\nReconstruction using Viterbi:\n", reconstruct(pmax, phi))

pmax:
 [[4.00000000e-01 1.00000000e-01 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 4.00000000e-02]
 [4.00000000e-03 1.60000000e-02 0.00000000e+00]
 [1.02400000e-02 6.40000000e-04 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 1.02400000e-03]
 [4.09600000e-04 1.02400000e-04 0.00000000e+00]
 [1.63840000e-05 2.62144000e-04 0.00000000e+00]
 [1.67772160e-04 5.24288000e-06 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 1.67772160e-05]
 [1.67772160e-06 6.71088640e-06 0.00000000e+00]
 [4.29496730e-06 2.68435456e-07 0.00000000e+00]
 [8.58993459e-08 2.74877907e-06 0.00000000e+00]
 [1.75921860e-06 5.49755814e-08 0.00000000e+00]
 [3.51843721e-08 1.12589991e-06 0.00000000e+00]
 [0.00000000e+00 0.00000000e+00 1.12589991e-07]] 
phi:
 [[0. 0. 0.]
 [2. 2. 0.]
 [1. 0. 0.]
 [0. 0. 0.]
 [2. 2. 0.]
 [1. 0. 0.]
 [1. 1. 0.]
 [0. 0. 0.]
 [2. 2. 0.]
 [1. 0. 0.]
 [0. 0. 0.]
 [1. 1. 0.]
 [0. 0. 0.]
 [0. 0. 1.]]

Reconstruction using Viterbi:
 [0. 2. 1. 0. 2. 0. 1. 0. 2. 1. 0. 1. 0. 1. 2.]


# Log computations
Now we need to find a way for this to hold even when working with logs.
I suspect that replacing with the minimum to account for the 0 is too big, we need something smaller.

In [122]:
A = np.array([[0.1, 0.8, 0.1],
              [0.8, 0.1, 0.1],
              [0.5, 0.5, 0]])

B = np.array([[0.8, 0.2, 0],
              [0.2, 0.8, 0],
              [0, 0, 1]])

observed = np.array([0, 2, 1])

In [142]:
def compute_f_log(A, B, observed):
    """
    Even though it contains the -inf, this is fine, as that means we should not reconstruct using those (?)
    """
    pi = A[-1]
    n_nodes = len(observed)
    n_states = A.shape[0]
    f = np.zeros((n_nodes-1, n_states, n_states))
    
    tmp = np.zeros((n_states, 1))
    for k in range(n_states):
        tmp[k] = np.log(pi[k]) + np.log(B[k, observed[0]])
    
    f0 = tmp

    for i in range(1, n_nodes):
        tmp = np.zeros((n_states, n_states))
        
        for j in range(n_states): # over z1
            
            for k in range(n_states): # over z2
                tmp[j, k] = np.log(A[j, k]) + np.log(B[k, observed[i]])
    
        f[i-1] = tmp
    

    return f0, f

In [143]:
def Viterbi_log(f0, f):
    n_nodes = f.shape[0] + 1
    n_states = f.shape[1]

    pmax = np.zeros((n_nodes, n_states))    # Need one for every node
    phi = np.zeros((n_nodes - 1, n_states)) # Need one for every node other than the first one (no need to reconstruct it)

    pmax[0] = f0.flatten()

    for i in range(1, n_nodes):
        tmp = ((f[i-1]).T + pmax[i - 1]).T

        pmax[i] = np.max(tmp, axis=0) # by column

        phi[i-1] = np.argmax(tmp, axis=0) # i-1 cause this contains the reconstruction about the (i-1)th element

    return pmax, phi

In [144]:
f0, f = compute_f_log(A, B, observed)
print("f0:\n", f0, "\nf:\n", f)


pmax, phi = Viterbi_log(f0, f)
print("\n\npmax:\n", pmax, "\nphi:\n", phi)

print("\n\n reconstruction:", reconstruct(pmax, phi))

f0:
 [[-0.91629073]
 [-2.30258509]
 [       -inf]] 
f:
 [[[       -inf        -inf -2.30258509]
  [       -inf        -inf -2.30258509]
  [       -inf        -inf        -inf]]

 [[-3.91202301 -0.4462871         -inf]
  [-1.83258146 -2.52572864        -inf]
  [-2.30258509 -0.91629073        -inf]]]


pmax:
 [[-0.91629073 -2.30258509        -inf]
 [       -inf        -inf -3.21887582]
 [-5.52146092 -4.13516656        -inf]] 
phi:
 [[0. 0. 0.]
 [2. 2. 0.]]


 reconstruction: [0. 2. 1.]


  tmp[k] = np.log(pi[k]) + np.log(B[k, observed[0]])
  tmp[j, k] = np.log(A[j, k]) + np.log(B[k, observed[i]])


In [147]:
observed = np.array([0, 2, 1, 2, 1, 0, 1]) # a = 0; b = 1; _ = 2
f0, f = compute_f_log(A, B, observed)
print("f0:\n", f0, "\nf:\n", f)


pmax, phi = Viterbi_log(f0, f)
print("\n\npmax:\n", pmax, "\nphi:\n", phi)

print("\n\n reconstruction:", reconstruct(pmax, phi))

f0:
 [[-0.91629073]
 [-2.30258509]
 [       -inf]] 
f:
 [[[       -inf        -inf -2.30258509]
  [       -inf        -inf -2.30258509]
  [       -inf        -inf        -inf]]

 [[-3.91202301 -0.4462871         -inf]
  [-1.83258146 -2.52572864        -inf]
  [-2.30258509 -0.91629073        -inf]]

 [[       -inf        -inf -2.30258509]
  [       -inf        -inf -2.30258509]
  [       -inf        -inf        -inf]]

 [[-3.91202301 -0.4462871         -inf]
  [-1.83258146 -2.52572864        -inf]
  [-2.30258509 -0.91629073        -inf]]

 [[-2.52572864 -1.83258146        -inf]
  [-0.4462871  -3.91202301        -inf]
  [-0.91629073 -2.30258509        -inf]]

 [[-3.91202301 -0.4462871         -inf]
  [-1.83258146 -2.52572864        -inf]
  [-2.30258509 -0.91629073        -inf]]]


pmax:
 [[ -0.91629073  -2.30258509         -inf]
 [        -inf         -inf  -3.21887582]
 [ -5.52146092  -4.13516656         -inf]
 [        -inf         -inf  -6.43775165]
 [ -8.74033674  -7.35404238        

  tmp[k] = np.log(pi[k]) + np.log(B[k, observed[0]])
  tmp[j, k] = np.log(A[j, k]) + np.log(B[k, observed[i]])


### Try using our words example
Now using the matrix and our phrase we try and see if it works