In [37]:
import torch
import numpy as np

In [85]:
class CHMM:
    def __init__(self, num_states, unique_emissions):
        self.num_states = num_states
        self.emission_to_index_map = {char: idx for idx, char in enumerate(unique_emissions)}
        self.num_emissions = len(unique_emissions)
        self.transition_probs = np.full((num_states, num_states), 1.0 / num_states)
        self.emission_probs = np.random.rand(num_states, self.num_emissions)

    def observation_to_indices(self, observation):
        indices = []
        for emission in observation:
            if emission in self.emission_to_index_map:
                indices.append(self.emission_to_index_map[emission])
            else:
                raise ValueError(f"Unexpected emission '{emission}' found in observation.")
                
        return indices


In [86]:
def logsumexp(a):
    """Compute the log of the sum of exponentials of input elements."""
    a_max = np.max(a)
    if a_max == -np.inf:
        return -np.inf
    return np.log(np.sum(np.exp(a - a_max))) + a_max

def forward_pass(chmm, observation_indices):
    num_states = chmm.num_states
    T = len(observation_indices)

    # Initialize forward probabilities matrix
    forward_probs = np.full((T, num_states), -np.inf)  # Use -np.inf for log(0)
    
    # Initial state probabilities (uniform)
    forward_probs[0, :] = np.log(1.0 / num_states)

    # Forward pass
    for t in range(1, T):
        for j in range(num_states):
            emission_idx = observation_indices[t]
            log_emission_prob = np.log(chmm.emission_probs[j, emission_idx])
            log_trans_probs = np.log(chmm.transition_probs[:, j])
            forward_probs[t, j] = logsumexp(forward_probs[t - 1, :] + log_trans_probs) + log_emission_prob

    return forward_probs

def backward_pass(chmm, observation_indices):
    num_states = chmm.num_states
    T = len(observation_indices)

    # Initialize backward probabilities matrix
    backward_probs = np.full((T, num_states), -np.inf)  # Use -np.inf for log(0)
    backward_probs[T - 1, :] = 0  # log(1)

    # Backward pass
    for t in range(T - 2, -1, -1):
        for i in range(num_states):
            emission_idx = observation_indices[t + 1]
            log_emission_probs = np.log(chmm.emission_probs[:, emission_idx])
            log_trans_probs = np.log(chmm.transition_probs[i, :])
            backward_probs[t, i] = logsumexp(backward_probs[t + 1, :] + log_trans_probs + log_emission_probs)

    return backward_probs

def forward_backward_algorithm(chmm, observation):
    observation_indices = chmm.observation_to_indices(observation)
    forward_probs = forward_pass(chmm, observation_indices)
    backward_probs = backward_pass(chmm, observation_indices)

    # Compute state probabilities at each time step
    state_probs = np.exp(forward_probs + backward_probs)
    state_probs /= np.sum(state_probs, axis=1, keepdims=True)  # Normalize

    return state_probs


In [87]:
def calculate_nk(chmm, observation):
    # Placeholder for calculating nk
    return np.random.rand(chmm.num_states, chmm.num_emissions)  # Random counts

def calculate_mk(chmm, observation, labels):
    # Placeholder for calculating mk
    return np.random.rand(chmm.num_states, chmm.num_emissions)  # Random counts


In [88]:
def update_theta(chmm, nk, mk, learning_rate, epsilon=1e-8):
    for i in range(chmm.num_states):
        for j in range(chmm.num_emissions):
            theta_k = max(chmm.emission_probs[i][j], epsilon)
            gradient = -(mk[i][j] - nk[i][j]) / theta_k
            chmm.emission_probs[i][j] -= learning_rate * gradient


In [89]:
def train_chmm(chmm, observation_label_pairs, learning_rate, num_iterations, batch_size, epsilon=1e-8):
    for iteration in range(num_iterations):
        np.random.shuffle(observation_label_pairs)
        for i in range(0, len(observation_label_pairs), batch_size):
            batch = observation_label_pairs[i:i + batch_size]
            update_parameters_from_batch(chmm, batch, learning_rate, epsilon)

def update_parameters_from_batch(chmm, batch, learning_rate, epsilon=1e-8):
    batch_nk = []
    batch_mk = []

    for observation, labels in batch:
        observation_indices = chmm.observation_to_indices(observation)
        nk = calculate_nk(chmm, observation_indices)
        mk = calculate_mk(chmm, observation_indices, labels)
        batch_nk.append(nk)
        batch_mk.append(mk)

    # Average nk and mk over the batch
    avg_nk = np.mean(batch_nk, axis=0)
    avg_mk = np.mean(batch_mk, axis=0)

    # Update model parameters
    for i in range(chmm.num_states):
        for j in range(chmm.num_emissions):
            theta_k = max(chmm.emission_probs[i][j], epsilon)
            gradient = -(avg_mk[i][j] - avg_nk[i][j]) / theta_k
            chmm.emission_probs[i][j] -= learning_rate * gradient


In [90]:
def update_theta_sgd(chmm, avg_nk, avg_mk, learning_rate, epsilon=1e-8):
    for i in range(chmm.num_states):
        for j in range(chmm.num_emissions):
            theta_k = max(chmm.emission_probs[i][j], epsilon)
            gradient = -(avg_mk[i][j] - avg_nk[i][j]) / theta_k
            chmm.emission_probs[i][j] -= learning_rate * gradient

            
def initialize_adam_parameters(chmm):
    adam_params = {
        "m": np.zeros((chmm.num_states, chmm.num_emissions)),
        "v": np.zeros((chmm.num_states, chmm.num_emissions)),
        "t": 0
    }
    return adam_params

def update_theta_adam(chmm, avg_nk, avg_mk, learning_rate, adam_params, epsilon=1e-8, beta1=0.9, beta2=0.999):
    adam_params["t"] += 1
    m = adam_params["m"]
    v = adam_params["v"]
    t = adam_params["t"]
    
    for i in range(chmm.num_states):
        for j in range(chmm.num_emissions):
            theta_k = max(chmm.emission_probs[i][j], epsilon)
            gradient = -(avg_mk[i][j] - avg_nk[i][j]) / theta_k
            
            # Update biased first moment estimate
            m[i][j] = beta1 * m[i][j] + (1 - beta1) * gradient
            
            # Update biased second raw moment estimate
            v[i][j] = beta2 * v[i][j] + (1 - beta2) * (gradient ** 2)
            
            # Compute bias-corrected first moment estimate
            m_hat = m[i][j] / (1 - beta1 ** t)
            
            # Compute bias-corrected second raw moment estimate
            v_hat = v[i][j] / (1 - beta2 ** t)
            
            # Update parameters
            chmm.emission_probs[i][j] -= learning_rate * m_hat / (np.sqrt(v_hat) + epsilon)

    adam_params["m"] = m
    adam_params["v"] = v

{'E', 'M', 'N'}
{0, 2}
[([1, 0, 2], [0, 0, 2])]


ValueError: Unexpected emission '1' found in observation.