Prova con scaling factor ( da rvidere)


In [2]:
import numpy as np


# We need an algorithm to perform belief propagation on our hmm
def forward_HMM(A, B, pi, observed):
    """
    A: transition
    B: emission
    pi: initial
    n_nodes: number of nodes in the chain
    observed: list containing observed ones.
    """
    n_nodes = len(observed)
    n_states = A.shape[0]
    alpha = np.zeros((n_nodes, n_states))
    c = np.zeros(n_nodes)

    for j in range(n_states):
        alpha[0, j] = pi[j] * B[j, observed[0]]

    c[0] = 1.0 / np.sum(alpha[0])

    for i in range(1, n_nodes):
        for j in range(n_states):
            for k in range(n_states):
                alpha[i, j] = (
                    alpha[i, j] + A[k, j] * B[j, observed[i]] * alpha[i - 1, k]
                )

            c[i] = 1.0 / np.sum(alpha[i])

    return alpha, c

In [3]:
# We need an algorithm to perform belief propagation on our hmm
def backward_HMM(A, B, observed, c):
    """
    A: transition
    B: emission
    n_nodes: number of nodes in the chain
    observed: list containing observed ones.
    """
    n_nodes = len(observed)
    n_states = A.shape[0]
    beta = np.zeros((n_nodes - 1, n_states))

    for j in range(n_states):
        for k in range(n_states):
            beta[-1, j] = (beta[-1, j] + A[j, k] * B[k, observed[n_nodes - 1]]) * c[-1]

    for i in range(n_nodes - 3, -1, -1):
        for j in range(n_states):
            for k in range(n_states):
                beta[i, j] = (
                    beta[i, j] + A[j, k] * B[k, observed[i + 1]] * beta[i + 1, k]
                ) * c[i]

    return beta

In [4]:
def compute_conditional(alpha, beta, i):
    """
    alpha: list containing forward messages
    beta: list containing backward messages
    i : hidden element for which you want the conditional on the observed variables (i = 1, ..., M)
    """
    if i == 0:
        raise ValueError("no zio serve il numero di variabile")

    if i == alpha.shape[0]:
        return alpha[i - 1] / np.sum(alpha[i - 1])

    gamma = alpha[i - 1] * beta[i - 1]
    gamma = gamma / np.sum(gamma)

    return gamma

In [5]:
def compute_all_conditional(alpha, beta):
    """
    alpha: list containing forward messages
    beta: list containing backward messages
    """
    n_nodes = alpha.shape[0]
    n_states = alpha.shape[1]

    gamma = np.zeros((n_nodes, n_states))

    gamma[n_nodes - 1] = alpha[n_nodes - 1] / np.sum(alpha[n_nodes - 1])

    for i in range(n_nodes - 1):
        gamma[i] = alpha[i] * beta[i] / np.sum(alpha[i] * beta[i])

    return gamma

In [6]:
def divide_row_by_sum(matrix):
    row_sums = np.sum(matrix, axis=1)  # Calculate the sum of each row
    divided_matrix = (
        matrix / row_sums[:, np.newaxis]
    )  # Divide each element by the corresponding row sum
    return divided_matrix

In [7]:
def update_B(gamma, observed):
    # n_nodes = gamma.shape[0]
    n_states = gamma.shape[1]

    B = np.zeros((n_states, n_states))

    for i in range(n_states):
        for j in range(n_states):
            for k in range(len(observed)):
                if observed[k] == j:
                    B[i, j] += gamma[k, i]

    return divide_row_by_sum(B)

In [8]:
def Baum_Welch(A, B_start, pi, observed, maxIter=100):
    B = np.copy(B_start)
    for it in range(maxIter):
        alpha, c = forward_HMM(A, B, pi, observed)
        beta = backward_HMM(A, B, observed, c)
        gamma = compute_all_conditional(alpha, beta)
        B = update_B(gamma, observed)
    return B

In [11]:
# gpt version of scaling


def forward_HMM_scaled(A, B, pi, observed):
    n_nodes = len(observed)
    n_states = A.shape[0]
    alpha = np.zeros((n_nodes, n_states))
    scaling_factors = np.zeros(n_nodes)

    # Initialization
    scaling_factors[0] = 1.0 / np.sum(pi * B[:, observed[0]])
    alpha[0, :] = pi * B[:, observed[0]] * scaling_factors[0]

    # Induction
    for i in range(1, n_nodes):
        scaling_factors[i] = 1.0 / np.sum(A @ (alpha[i - 1, :] * B[:, observed[i - 1]]))
        alpha[i, :] = (
            A
            @ (alpha[i - 1, :] * B[:, observed[i - 1]])
            * B[:, observed[i]]
            * scaling_factors[i]
        )

    return alpha, scaling_factors


def backward_HMM_scaled(A, B, observed, scaling_factors):
    n_nodes = len(observed)
    n_states = A.shape[0]
    beta = np.zeros((n_nodes, n_states))

    # Initialization
    beta[n_nodes - 1, :] = scaling_factors[n_nodes - 1]

    # Induction
    for i in range(n_nodes - 2, -1, -1):
        beta[i, :] = A.T @ (B[:, observed[i + 1]] * beta[i + 1, :]) * scaling_factors[i]

    return beta


def Baum_Welch_scaled(A, B_start, pi, observed, maxIter=100):
    B = np.copy(B_start)
    for it in range(maxIter):
        alpha, scaling_factors = forward_HMM_scaled(A, B, pi, observed)
        beta = backward_HMM_scaled(A, B, observed, scaling_factors)
        gamma = compute_all_conditional(alpha, beta)
        B = update_B(gamma, observed)
    return B

In [16]:
# questo non so cosa sia


def baum_welch_gpt_not_scaling(A, B_start, pi, observed, maxiter):
    N = A.shape[0]  # Number of states
    M = A.shape[1]  # Number of possible emissions
    T = len(observed)  # Length of observed chain

    B = B_start.copy()  # Make a copy of initial emission probabilities

    for _ in range(maxiter):
        # Forward-Backward algorithm (Expectation step)
        alpha = np.zeros((T, N))
        beta = np.zeros((T, N))
        c = np.zeros(T)

        # Forward pass
        alpha[0] = pi * B[:, observed[0]]
        c[0] = 1.0 / np.sum(alpha[0])
        alpha[0] *= c[0]
        for t in range(1, T):
            alpha[t] = np.dot(alpha[t - 1], A) * B[:, observed[t]]
            c[t] = 1.0 / np.sum(alpha[t])
            alpha[t] *= c[t]

        # Backward pass
        beta[T - 1] = 1
        beta[T - 1] *= c[T - 1]
        for t in range(T - 2, -1, -1):
            beta[t] = np.dot(A, beta[t + 1] * B[:, observed[t + 1]])
            beta[t] *= c[t]

        # Compute gamma and xi matrices
        gamma = alpha * beta
        xi = np.zeros((T - 1, N, N))
        for t in range(T - 1):
            xi[t] = (
                alpha[t][:, np.newaxis] * A * B[:, observed[t + 1]] * beta[t + 1]
            ) * c[t]

        # Maximization step
        B_new = np.zeros((N, M))
        for i in range(N):
            for j in range(M):
                B_new[i, j] = np.sum(gamma[:, i] * (observed == j)) / np.sum(
                    gamma[:, i]
                )

        # Check for convergence
        if np.allclose(B, B_new):
            break

        B = B_new

    return B

In [17]:
import numpy as np


def baum_welch_gpt_scaling(A, B_start, pi, observed, maxiter=100):
    N = A.shape[0]  # Number of states
    M = A.shape[1]  # Number of possible emissions
    T = len(observed)  # Length of observed chain

    B = B_start.copy()  # Make a copy of initial emission probabilities

    for _ in range(maxiter):
        # Forward-Backward algorithm (Expectation step)
        alpha = np.zeros((T, N))
        beta = np.zeros((T, N))
        c = np.zeros(T)

        # Forward pass
        alpha[0] = pi * B[:, observed[0]]
        c[0] = 1.0 / np.sum(alpha[0])
        alpha[0] *= c[0]
        for t in range(1, T):
            alpha[t] = np.dot(alpha[t - 1], A) * B[:, observed[t]]
            c[t] = 1.0 / np.sum(alpha[t])
            alpha[t] *= c[t]

        # Backward pass
        beta[T - 1] = 1
        beta[T - 1] *= c[T - 1]
        for t in range(T - 2, -1, -1):
            beta[t] = np.dot(A, beta[t + 1] * B[:, observed[t + 1]])
            beta[t] *= c[t]

        # Scaling factors
        scale = np.cumprod(c)
        alpha *= scale[:, np.newaxis]
        beta *= scale[:, np.newaxis]

        # Compute gamma and xi matrices
        gamma = alpha * beta
        xi = np.zeros((T - 1, N, N))
        for t in range(T - 1):
            xi[t] = alpha[t][:, np.newaxis] * A * B[:, observed[t + 1]] * beta[t + 1]

        # Maximization step
        B_new = np.zeros((N, M))
        for i in range(N):
            for j in range(M):
                B_new[i, j] = np.sum(gamma[:, i] * (observed == j)) / np.sum(
                    gamma[:, i]
                )

        # Check for convergence
        if np.allclose(B, B_new):
            break

        B = B_new

    return B

# Test


In [18]:
A = np.array([[0.6, 0.4], [0.3, 0.7]])
B = np.array([[0.5, 0.5], [0.1, 0.9]])
pi = np.array([0.2, 0.8])
observed = np.array([1, 0, 1])
B_start = np.zeros((2, 2)) + 0.5

alpha, c = forward_HMM(A, B, pi, observed)
beta = backward_HMM(A, B, observed, c)
gamma = compute_all_conditional(alpha, beta)

In [19]:
Baum_Welch(A, B_start, pi, observed)

array([[0.61345426, 0.38654574],
       [0.14035149, 0.85964851]])

In [20]:
baum_welch_gpt_not_scaling(A, B_start, pi, observed, maxiter=100)

array([[0.52062804, 0.47937196],
       [0.36155231, 0.63844769]])

In [22]:
baum_welch_gpt_scaling(A, B_start, pi, observed, maxiter=100)

  gamma = alpha * beta
  B_new[i, j] = np.sum(gamma[:, i] * (observed == j)) / np.sum(
  B_new[i, j] = np.sum(gamma[:, i] * (observed == j)) / np.sum(


array([[nan, nan],
       [nan, nan]])

In [24]:
pi = [0.5, 0.5]
N = 10000

A = np.array([[0.2, 0.8], [0.6, 0.4]])

chain = np.zeros(N)
chain[0] = np.random.binomial(1, 0.5)
for i in range(1, N):
    if chain[i - 1] == 0:
        chain[i] = np.random.binomial(1, 0.8)
    else:
        chain[i] = np.random.binomial(1, 0.4)

chain = chain.astype(int)

In [25]:
B_true = np.array([[0.8, 0.2], [0.1, 0.9]])

viewed_chain = np.zeros(N)
for i in range(N):
    if chain[i] == 0:
        viewed_chain[i] = np.random.binomial(1, B[0, 1])
    else:
        viewed_chain[i] = np.random.binomial(1, B[1, 1])

viewed_chain = viewed_chain.astype(int)

In [26]:
B_start = np.zeros((2, 2)) + 0.5

In [27]:
Baum_Welch(A, B_start, pi, observed=viewed_chain, maxIter=10000)

  c[i] = 1.0 / np.sum(alpha[i])
  c[i] = 1.0 / np.sum(alpha[i])
  gamma[n_nodes - 1] = alpha[n_nodes - 1] / np.sum(alpha[n_nodes - 1])
  gamma[i] = alpha[i] * beta[i] / np.sum(alpha[i] * beta[i])
  gamma[i] = alpha[i] * beta[i] / np.sum(alpha[i] * beta[i])


KeyboardInterrupt: 

In [17]:
Baum_Welch_scaled(A, B_start, pi, observed=viewed_chain, maxIter=10000)

array([[0.01750132, 0.98249868],
       [0.42631791, 0.57368209]])

In [18]:
baum_welch(A, B_start, pi, observed=viewed_chain, maxiter=1000000)

array([[0.11243579, 0.88756421],
       [0.54824586, 0.45175414]])

cancella sotto


In [34]:
import numpy as np


# We need an algorithm to perform belief propagation on our hmm
def forward_HMM(A, B, pi, observed):
    """
    A: transition
    B: emission
    pi: initial
    observed: list containing observed ones.
    """
    n_nodes = len(observed)
    n_states = A.shape[0]
    alpha = np.zeros((n_nodes, n_states))
    scaling_factors = np.zeros(n_nodes)

    # Initialization
    scaling_factors[0] = 1.0 / np.sum(pi * B[:, observed[0]])
    alpha[0, :] = scaling_factors[0] * pi * B[:, observed[0]]

    # Induction
    for i in range(1, n_nodes):
        scaling_factors[i] = 1.0 / np.sum(
            alpha[i - 1, :] * A[:, :].T * B[:, observed[i]]
        )
        alpha[i, :] = (
            scaling_factors[i] * (alpha[i - 1, :] @ A[:, :]) * B[:, observed[i]]
        )

    return alpha, scaling_factors


# We need an algorithm to perform belief propagation on our hmm
def backward_HMM(A, B, observed, scaling_factors):
    """
    A: transition
    B: emission
    observed: list containing observed ones.
    scaling_factors: scaling factors computed during forward pass
    """
    n_nodes = len(observed)
    n_states = A.shape[0]
    beta = np.zeros((n_nodes - 1, n_states))

    # Initialization
    beta[-1, :] = scaling_factors[-1] * np.ones(n_states)

    # Induction
    for i in range(n_nodes - 3, -1, -1):
        beta[i, :] = scaling_factors[i + 1] * (
            beta[i + 1, :] * B[:, observed[i + 1]] @ A[:, :].T
        )

    return beta


def compute_all_conditional(alpha, beta, scaling_factors):
    """
    alpha: forward messages
    beta: backward messages
    scaling_factors: scaling factors computed during forward pass
    """
    n_nodes = alpha.shape[0]
    n_states = alpha.shape[1]

    gamma = np.zeros((n_nodes, n_states))

    gamma[-1, :] = alpha[-1, :] / np.sum(alpha[-1, :])

    for i in range(n_nodes - 1):
        gamma[i, :] = (alpha[i, :] * beta[i, :]) / np.sum(alpha[i, :] * beta[i, :])
        gamma[i, :] /= scaling_factors[i]

    return gamma


def divide_row_by_sum(matrix):
    row_sums = np.sum(matrix, axis=1)  # Calculate the sum of each row
    divided_matrix = (
        matrix / row_sums[:, np.newaxis]
    )  # Divide each element by the corresponding row sum
    return divided_matrix


def update_B(gamma, observed):
    n_states = gamma.shape[1]
    B = np.zeros((n_states, n_states))

    for i in range(n_states):
        for j in range(n_states):
            for k in range(len(observed)):
                if observed[k] == j:
                    B[i, j] += gamma[k, i]

    return divide_row_by_sum(B)


def Baum_Welch(A, B_start, pi, observed, maxIter=100):
    B = np.copy(B_start)
    for it in range(maxIter):
        alpha, scaling_factors = forward_HMM(A, B, pi, observed)
        beta = backward_HMM(A, B, observed, scaling_factors)
        gamma = compute_all_conditional(alpha, beta, scaling_factors)
        B = update_B(gamma, observed)
    return B

In [35]:
Baum_Welch(A, B_start, pi, observed=viewed_chain, maxIter=1000000)

TypeError: can't multiply sequence by non-int of type 'numpy.float64'

In [39]:
B.shape

(2, 2)