In [1]:
import numpy as np

In [2]:
def read_data(file_path):
    text = open(file_path, 'r').read()
    text = text.strip()
    text = text.replace(' ', '#')
    return text

text = read_data('textA.txt')


In [7]:
class simpleHMM:
    def __init__(self, states=2, observations=27):
        self.transitions = np.ones((states, states)) / states
        self.emissions = np.ones((states, observations)) / observations
        self.states = states
        self.observations = observations

    def forward(self, sequence):
        alpha_normalized = np.zeros((len(sequence) + 1, self.states))
        q_values = np.zeros(len(sequence) + 1)

        alpha_normalized[0] = np.ones(self.states) / self.states
        q_values[0] = 1

        for t in range(1, len(sequence) + 1):
            alpha_normalized[t] = alpha_normalized[t - 1] @ self.transitions *  self.emissions[:, self.get_character_index(sequence[t - 1])]
            q_values[t] = np.sum(alpha_normalized[t])
            alpha_normalized[t] = alpha_normalized[t] / np.sum(alpha_normalized[t])
        
        self.alpha_normalized = alpha_normalized
        self.q_values = q_values
        return alpha_normalized, q_values

    def backward(self, sequence):
        beta_normalized = np.zeros((len(sequence) + 1, self.states))
        beta_normalized[-1] = np.ones(self.states) / self.states

        for t in range(len(sequence) - 1, -1, -1):
            beta_normalized[t] = self.transitions * self.emissions[:, self.get_character_index(sequence[t])] @ beta_normalized[t + 1]
            beta_normalized[t] = beta_normalized[t] / self.q_values[t]

        self.beta_normalized = beta_normalized
        return beta_normalized

    def BW(self, sequence):
        self.forward(sequence)
        self.backward(sequence)
        transitions_posterior_probs = np.zeros((len(sequence), self.states, self.states))
        new_emission_probs = np.zeros((self.states, self.observations))


        for i in range(len(sequence)):
            for l in range(self.states):
                for r in range(self.states):
                    posterior_prob = self.alpha_normalized[i,l] * self.transitions[l,r] * self.emissions[r, self.get_character_index(sequence[i])] * self.beta_normalized[i+1,r]
                    new_emission_probs[r, self.get_character_index(sequence[i])] += posterior_prob
                    transitions_posterior_probs[i,l,r] = posterior_prob

        new_transition_prob = np.sum(transitions_posterior_probs, axis=0)
        new_transition_prob = new_transition_prob / np.sum(new_transition_prob, axis=1, keepdims=True)

        new_emission_prob = new_emission_probs / np.sum(new_emission_probs, axis=1, keepdims=True)

        self.transitions = new_transition_prob
        self.emissions = new_emission_prob

        return new_transition_prob, new_emission_prob


    def get_character_index(self, character):
        index = ord(character) - ord('a')
        if index < 0 or index >= self.observations: # the observation is space, denoted '#'
            index = 26
        return index


In [8]:
a = simpleHMM()
a.BW(text)



(array([[0.5, 0.5],
        [0.5, 0.5]]),
 array([[0.06773333, 0.01243333, 0.02866667, 0.03593333, 0.105     ,
         0.0182    , 0.0146    , 0.03723333, 0.0579    , 0.00273333,
         0.0045    , 0.0391    , 0.0192    , 0.05413333, 0.06503333,
         0.0193    , 0.00083333, 0.05333333, 0.05616667, 0.0779    ,
         0.02273333, 0.00806667, 0.01323333, 0.0028    , 0.01396667,
         0.00046667, 0.1688    ],
        [0.06773333, 0.01243333, 0.02866667, 0.03593333, 0.105     ,
         0.0182    , 0.0146    , 0.03723333, 0.0579    , 0.00273333,
         0.0045    , 0.0391    , 0.0192    , 0.05413333, 0.06503333,
         0.0193    , 0.00083333, 0.05333333, 0.05616667, 0.0779    ,
         0.02273333, 0.00806667, 0.01323333, 0.0028    , 0.01396667,
         0.00046667, 0.1688    ]]))

In [39]:
import numpy as np

matrix = np.array([[1, 2], [3, 4]])
vector = np.array([10, 20])

# Correct way - multiply each row by corresponding vector element
result = matrix * vector

print(result)
# Output:
# [[10 20]
#  [60 80]]

[[10 40]
 [30 80]]
