In [1]:
import numpy as np

## Markov Models, Sequential Data


The likelihood of a sequence $(x_1, x_2,\ldots,x_N)$ can be defined as a markov model of $M^{\mathrm{th}}$ order by

\begin{equation}
p(x_1,\ldots,x_N) = p(x_1) p(x_2|x_1)\ldots p(x_{M}|x_{M-1},\ldots x_1)\prod_{n=M}^N p(x_n| x_{n-1},\ldots x_{n-M}).
\end{equation}

The simplest order model where $M=1$ is defined as

\begin{equation}
p(x_1,\ldots,x_N) = p(x_1) \prod_{n=2}^N p(x_n | x_{n-1}).
\end{equation}

For this model, if $x\in\mathcal{N}$ is discrete with $K$ possible values, we can define $p(x_n|x_{n-1})$ by the transition matrix $\mathbf{A}: A_{ij}=p(x_n=i|x_{n-1}=j)$. This is a $K\times K$ rank 2 tensor but because we know that

\begin{equation}
\sum_i p(x_n=i|x_{n-1}=j) = 1, \forall j
\end{equation}

there are $K-1$ free variables per row of $\mathbf{A}$ and $K(K-1)$ free variables in total. For $M=1$,

\begin{equation}
\mathbf{A} =
\begin{bmatrix}
a_0 & 1-a_0 \\
a_1 & 1-a_1
\end{bmatrix}
\end{equation}

In [11]:
class ConditionalDistribution:
    """
    Conditional probability distribution p(x_n | x_n-1, .., x_n-M) for 
    a M^th order Markox sequence.
    """
    def __init__(self, K: int, M: int):
        # number of possible categorical values
        self.K = K
        
        # order of Markovian dependence
        self.M = M
        
        # uniform initial values for transition probabilities
        self.A = np.ones([self.K]*(M+1))/K
        
    def prob(self, *args) -> float:
        """
        Return p(args[0]|args[1],args[2],..,args[M])
        
        args[0] = x_n
        args[1] = x_n-1
        .
        .
        .
        args[M] = x_n-M
        
        Assume that elements of *args are integers in the range 0 to K-1
        inclusive.
        """
        if len(args) != self.M+1:
            raise Exception('Input shape doesnt match expected shape')
            
        return self.A[tuple(args)]
    
    def log_prob(self, *args) -> float:
        return np.log(self.prob(*args))
            
    def grad_log_prob(self, *args) -> np.ndarray:
        """
        Returns the gradient of ln p(x_n|x_n-1..) with respect to A
        """
        out = np.zeros(self.A.shape)
        out[tuple(args)] = 1.0/self.A[tuple(args)]
        return out
    
    def get_params(self) -> np.ndarray:
        """
        Returns 1-d array of free parameters. Order of indices is
        
        for 
        """
        # p(x_n=K-1 | x_n-1 = j) = 1 - sum_k=0^K-2 p(x_n=k | x_n-1 = j)
        return np.reshape(self.A[:-1, ...], (-1, ))
    
    def set_params(self, A_small: np.ndarray):
        """
        Set self.A, the (K,...,K) rank M tensor for conditional 
        transition probabilities from the (K-1, K, ..., K) rank M tensor
        A_small.
        """
        # rank M tensor, shape = (K-1, K, ...,K)
        A = np.reshape(A_small, tuple([K-1]+[K]*M))
        
        # rank M tensor, shape = (1, K, ..., K)
        A_ = 1.0 - np.reshape(np.sum(A, axis=0), tuple([1]+[K]*M))
        
        # rank M tensor, shape = (K,...,K)
        self.A = np.vstack((A, A_))
        
            
class MarkovModel:
    def __init__(self, K: int, M: int):
        self.K = k
        self.M = M
        
        self.models = [ConditionalDistribution(K=K, M=_m) for _m in range(M+1)]
        
        
    def log_likelihood(self, x: np.ndarray) -> float:
        """
        Returns ln p(x0) + ln p(x1|x0) ... + ln p(x_M-1|...x0) + sum_n p(x_n | x_n-1...)
        """
        # ln p(x0) +...+ ln p(x_M-1 | ...x0)
        out = sum([self.models[_m].log_prob(*x[0:_m+1][::-1]) for _m in range(self.M)])
        
        for _n in range(self.M, x.shape[1]):
            out += self.modles[self.M].log_prob(*x[_n-self.M:_n+1][::-1])
        
inst = ConditionalDistribution(K=2, M=0)
inst.A.shape

(2,)

In [20]:
from copy import deepcopy

inst = ConditionalDistribution(K=4, M=2)

Aorig = deepcopy(inst.A)

inst.set_params(inst.get_params())
Arecon = deepcopy(inst.A)

np.allclose(Aorig, Arecon)

ValueError: cannot reshape array of size 48 into shape (1,)

In [10]:
K = 2
M = 0

size = tuple([K-1]+[K]*M)

print(size)

(1,)
