In [1]:
import numpy as np


def baum_welch(observed_sequence, num_states, num_emissions, num_iterations):
    # Initialize emission probabilities randomly
    # emission_probs = np.random.rand(num_states, num_emissions)
    emission_probs = np.full((num_states, num_emissions), 1 / 27)
    emission_probs /= np.sum(emission_probs, axis=1, keepdims=True)

    # Initialize the observed sequence and its length
    observed_sequence = np.array(observed_sequence)
    T = len(observed_sequence)

    for iteration in range(num_iterations):
        # Step 1: Forward-Backward Algorithm

        # Forward variables
        alpha = np.zeros((T, num_states))

        # Calculate alpha(1, i)
        alpha[0] = emission_probs[:, observed_sequence[0]]

        # Calculate alpha(t, i) for t > 1
        for t in range(1, T):
            for i in range(num_states):
                alpha[t, i] = (
                    np.sum(alpha[t - 1] * transition_probs[:, i])
                    * emission_probs[i, observed_sequence[t]]
                )

        # Backward variables
        beta = np.zeros((T, num_states))

        # Set beta(T, i) = 1
        beta[T - 1] = 1

        # Calculate beta(t, i) for t < T
        for t in range(T - 2, -1, -1):
            for i in range(num_states):
                beta[t, i] = np.sum(
                    beta[t + 1]
                    * transition_probs[i, :]
                    * emission_probs[:, observed_sequence[t + 1]]
                )

        # Step 2: Estimation Step

        # Compute gamma variables
        gamma = alpha * beta / np.sum(alpha * beta, axis=1, keepdims=True)

        # Compute xi variables
        xi = np.zeros((T - 1, num_states, num_states))
        for t in range(T - 1):
            for i in range(num_states):
                for j in range(num_states):
                    xi[t, i, j] = (
                        alpha[t, i]
                        * transition_probs[i, j]
                        * emission_probs[j, observed_sequence[t + 1]]
                        * beta[t + 1, j]
                    ) / np.sum(
                        alpha[t, :]
                        * transition_probs[:, j]
                        * emission_probs[j, observed_sequence[t + 1]]
                        * beta[t + 1, j]
                    )

        # Step 3: Maximization Step

        # Update emission probabilities
        for i in range(num_states):
            for k in range(num_emissions):
                emission_probs[i, k] = np.sum(
                    gamma[:, i] * (observed_sequence == k)
                ) / np.sum(gamma[:, i])

    return emission_probs

In [2]:
import random
from src.CipherUtils import CipherGenerator
from src.CipherUtils import TextDecoder, TextEncoder


hidden_sequence = "people of western europe a landing was made this morning on the coast of france by troops kangaroo jokes quasi vile xilophone zenit "
hidden_sequence = "in germany it seems to be pretty much automatic pretty much all the time in france and spain it all depends presumably on social subtleties that you have to be french or spanish to understand in italy why would you even bother when and how much to tip is a question that has been vexing visitors to europe for as long as people have been travelling around the continent outside their own country it seems even europeans don t know the answer according to new polling by yougov in six eu countries britain and the us where as most visitors know but may be reluctant to acknowledge gratuities may make up more than half your waitperson s income europeans are deeply divided on tipping in restaurants for example of respondents in germany told the pollster they typically tipped almost the same as the us in the uk where an optional service charge of about is usually included said they left a gratuity the figure in spain where service is often included in restaurant bills but diners can leave optional tips was while in france where every price on a restaurant menu already includes for service of people said they generally tipped on top even in sweden where tips are generally not expected the figure was but only of italians said they would typically leave a gratuity after a meal out with a rather greater proportion admitting they never left a cent a startling of respondents in the us however and of germans by far the most in europe confessed they would tip sometimes or often even if the service was terrible indicating that for some tipping is not about quality of service at all the findings of the survey will come as a surprise in germany a country that does not generally think of itself as a nation of happy distributors of trinkgeld the word in a similar vein to pourboire in french means drinking money nor is germany a country whose service staff are particularly customer friendly in berlin a glass of beer or a plate of food is often served in a huff rather than with a smile not for nothing is servicew ste deutschland germany is a service desert a thing yet the data reflects a clear trend germans tend to hand over a few extra coins irrespective of how they have been treated they are the nation in europe least likely to not tip even for poor service and the most willing to pay extra for average service a continuing fondness for cash may be a factor with many taxi drivers and bars still refusing payment by card a growing awareness of labour shortages in the catering sector could also explain a change in attitude with many bars and cafes even in big cities reducing their opening hours due to a post pandemic lack of staff some barkeepers say their customers have recently become more generous tippers but if german tipping habits seem ingrained they are"


cipher_generator = CipherGenerator()
cipher = cipher_generator.generate_cipher()
encoder = TextEncoder()
observed_sequence = encoder.encode_text(hidden_sequence, cipher=cipher)


print(observed_sequence)

jt yinwctp je riiwr ed fi mnieep wbqx cbedwcejq mnieep wbqx ckk exi ejwi jt znctqi ctg rmcjt je ckk gimitgr mnirbwcfkp dt rdqjck rbfekiejir exce pdb xcvi ed fi znitqx dn rmctjrx ed btginrectg jt jeckp sxp sdbkg pdb ivit fdexin sxit ctg xds wbqx ed ejm jr c hbirejdt exce xcr fiit viajty vjrjednr ed ibndmi zdn cr kdty cr midmki xcvi fiit encvikkjty cndbtg exi qdtejtite dberjgi exijn dst qdbtenp je riiwr ivit ibndmictr gdt e ltds exi ctrsin cqqdngjty ed tis mdkkjty fp pdbydv jt rja ib qdbtenjir fnjecjt ctg exi br sxini cr wdre vjrjednr ltds fbe wcp fi nikbqecte ed cqltdskigyi yncebjejir wcp wcli bm wdni exct xckz pdbn scjeminrdt r jtqdwi ibndmictr cni giimkp gjvjgig dt ejmmjty jt nirecbncter zdn iacwmki dz nirmdtgiter jt yinwctp edkg exi mdkkrein exip epmjqckkp ejmmig ckwdre exi rcwi cr exi br jt exi bl sxini ct dmejdtck rinvjqi qxcnyi dz cfdbe jr brbckkp jtqkbgig rcjg exip kize c yncebjep exi zjybni jt rmcjt sxini rinvjqi jr dzeit jtqkbgig jt nirecbncte fjkkr fbe gjtinr qct kicvi dmejdtc

In [3]:
from prove.src.Probability import ProbabilityMatrix

with open("texts/moby_dick.txt", "r") as file:
    text = file.read()

alphabet = list("abcdefghijklmnopqrstuvwxyz ")


pm = ProbabilityMatrix(text=text, alphabet=alphabet)
unknown_chars = pm.unknown_chars()
pm.preprocess_text(unknown_chars=unknown_chars)

# compute probabilities
pm.compute_matrix_spaces()

In [4]:
transition_probs = pm.matrix

In [5]:
def map_alphabet_to_numbers():
    alphabet = "abcdefghijklmnopqrstuvwxyz "
    mapping = {char: i for i, char in enumerate(alphabet)}
    return mapping

In [6]:
def translate_string_to_numbers(text, mapping):
    numbers = [mapping[char] for char in text]
    return numbers

In [7]:
hidden_ = translate_string_to_numbers(
    hidden_sequence, mapping=map_alphabet_to_numbers()
)
observed_ = translate_string_to_numbers(
    observed_sequence, mapping=map_alphabet_to_numbers()
)

In [8]:
for i in range(27):
    print(i, hidden_.count(i))

0 188
1 33
2 65
3 69
4 295
5 57
6 51
7 82
8 173
9 0
10 11
11 88
12 49
13 178
14 152
15 65
16 2
17 162
18 144
19 203
20 73
21 38
22 37
23 8
24 60
25 0
26 499


In [9]:
emission = baum_welch(
    observed_sequence=observed_,
    num_emissions=27,
    num_states=27,
    num_iterations=100,
)

  gamma = alpha * beta / np.sum(alpha * beta, axis=1, keepdims=True)
  xi[t, i, j] = (


In [10]:
emission

array([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan],
       [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan],
       [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan],
       [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan],
       [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan],
       [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan],
       [nan, nan, nan, nan, nan, nan, na

In [None]:
emission.argmax(axis=1)

In [None]:
print(sorted(list(emission.argmax(axis=1))))

In [None]:
import numpy as np

# Define the fixed transition probabilities
transition_probs = np.array([[0.7, 0.3], [0.2, 0.8]])

# Define the observed sequence
observed_sequence = [0, 1, 0, 1, 0, 1]  # Example sequence of emissions

# Set the number of states and emissions
num_states = transition_probs.shape[0]
num_emissions = (
    2  # In this example, we assume there are only two possible emissions: 0 and 1
)

# Set the number of iterations for Baum-Welch algorithm
num_iterations = 100

# Estimate the emission probabilities using Baum-Welch algorithm
emission_probs = baum_welch(
    observed_sequence, num_states, num_emissions, num_iterations
)

print("Estimated emission probabilities:")
print(emission_probs)