In [1]:
from src.CipherBreaker import CipherBreaker
from src.CipherUtils import (
    TextDecoder,
    TextEncoder,
    CipherGenerator,
    TextPreProcessor,
)
from src.ProbabilityMatrix import ProbabilityMatrix

In [2]:
cipher_generator = CipherGenerator()
preprocess = TextPreProcessor()
text_encoder = TextEncoder()
text_decoder = TextDecoder()

In [3]:
file_paths = [
    "texts/moby_dick.txt",
    "texts/shakespeare.txt",
    "texts/james-joyce-a-portrait-of-the-artist-as-a-young-man.txt",
    "texts/james-joyce-dubliners.txt",
    "texts/james-joyce-ulysses.txt",
]

texts = []
for file_path in file_paths:
    with open(file_path, "r") as file:
        texts.append(file.read())

corpus = "".join(texts)

In [4]:
# preprocess the text, removing extra-characters

corpus = preprocess.lower(corpus)
unknown_chars = preprocess.unknown_chars(corpus)
# print(unknown_chars)
corpus = preprocess.remove_unknown_chars(corpus, unknown_chars=unknown_chars)
corpus = preprocess.remove_additional_spaces(corpus)

preprocess.save_text(
    corpus
)  # save text after preprocessing inside text_preprocessed.txt


# compute the transition probs

probability_matrix = ProbabilityMatrix(corpus)
probability_matrix.compute_probability_table()

probability_matrix.save_all_2_chars()
probability_matrix.save_probability_table()

# some text to try

text = "There were better sense in the sad mechanic exercise of determining the reason of its absence where it is not. In the novels of the last hundred years there are vast numbers of young ladies with whom it might be a pleasure to fall in love; there are at least five with whom, as it seems to me, no man of taste and spirit can help doing so."
# text = "I do not believe a word of it, my dear. If he had been so very agreeable, he would have talked to Mrs. Long. But I can guess how it was"
# text = "Your plan is a good one,” replied Elizabeth, “where nothing is in question but the desire of being well married; and if I were determined to get a rich husband, or any husband, I dare say I should adopt it. But these are not Jane’s feelings"
# text = "she is not acting by design. As yet she cannot even be certain of the degree of her own regard, nor of its reasonableness. She has known him only a fortnight. She danced four dances with him at Meryton; she saw him one morning at his own house, and has since dined in company with him four times."

In [5]:
# encode and decode a sample text with a generated cipher

text = preprocess.lower(text)
text = preprocess.remove_unknown_chars(
    text, unknown_chars=preprocess.unknown_chars(text)
)
text = preprocess.remove_additional_spaces(text)


cipher = cipher_generator.generate_cipher()

encoded_text = text_encoder.encode_text(text, cipher)
decoded_text = text_decoder.decode_text(encoded_text, cipher)

print("Original Text:", text)
print("Encoded Text:", encoded_text)

Original Text: there were better sense in the sad mechanic exercise of determining the reason of its absence where it is not in the novels of the last hundred years there are vast numbers of young ladies with whom it might be a pleasure to fall in love there are at least five with whom as it seems to me no man of taste and spirit can help doing so
Encoded Text: zswqw gwqw bwzzwq xwuxw yu zsw xmf ewasmuya whwqayxw kt fwzwqeyuyuc zsw qwmxku kt yzx mbxwuaw gswqw yz yx ukz yu zsw ukvwpx kt zsw pmxz siufqwf owmqx zswqw mqw vmxz uiebwqx kt okiuc pmfywx gyzs gske yz eycsz bw m lpwmxiqw zk tmpp yu pkvw zswqw mqw mz pwmxz tyvw gyzs gske mx yz xwwex zk ew uk emu kt zmxzw muf xlyqyz amu swpl fkyuc xk


In [6]:
from prove.src.Probability import ProbabilityMatrix

with open("texts/moby_dick.txt", "r") as file:
    text = file.read()

alphabet = list("abcdefghijklmnopqrstuvwxyz ")

pm = ProbabilityMatrix(text=text, alphabet=alphabet)
unknown_chars = pm.unknown_chars()
pm.preprocess_text(unknown_chars=unknown_chars)
pm.compute_matrix_spaces()

In [7]:
import numpy as np

# Initialize matrix of emissions to uniform.
B = np.zeros_like(pm.matrix)
B = B + 1 / (27 * 27)

In [8]:
B.shape

(27, 27)

In [7]:
import numpy as np


# We need an algorithm to perform belief propagation on our hmm
def forward_HMM(A, B, pi, observed):
    """
    A: transition
    B: emission
    pi: initial
    n_nodes: number of nodes in the chain
    observed: list containing observed ones.
    """
    n_nodes = len(observed)
    n_states = A.shape[0]
    alpha = np.zeros((n_nodes, n_states))

    for j in range(n_states):
        alpha[0, j] = pi[j] * B[j, observed[0]]

    for i in range(1, n_nodes):
        for j in range(n_states):
            for k in range(n_states):
                alpha[i, j] = (
                    alpha[i, j] + A[k, j] * B[j, observed[i]] * alpha[i - 1, k]
                )

    return alpha

In [9]:
pi = np.array([0.2, 0.8])
A = np.array([[0.6, 0.4], [0.3, 0.7]])
B = np.array([[0.5, 0.5], [0.1, 0.9]])
observed = np.array([1, 0])

In [10]:
alpha = forward_HMM(A, B, pi, observed)

In [11]:
import numpy as np


# We need an algorithm to perform belief propagation on our hmm
def backward_HMM(A, B, observed):
    """
    A: transition
    B: emission
    n_nodes: number of nodes in the chain
    observed: list containing observed ones.
    """
    n_nodes = len(observed)
    n_states = A.shape[0]
    beta = np.zeros((n_nodes - 1, n_states))

    for j in range(n_states):
        for k in range(n_states):
            beta[-1, j] = beta[-1, j] + A[j, k] * B[k, observed[n_nodes - 1]]

    for i in range(n_nodes - 3, -1, -1):
        for j in range(n_states):
            for k in range(n_states):
                beta[i, j] = (
                    beta[i, j] + A[j, k] * B[k, observed[i + 1]] * beta[i + 1, k]
                )

    return beta

In [13]:
A = np.array([[0.6, 0.4], [0.3, 0.7]])
B = np.array([[0.5, 0.5], [0.1, 0.9]])
observed = np.array([1, 0, 1])

In [14]:
beta = backward_HMM(A, B, observed)

In [15]:
alpha = forward_HMM(A, B, pi, observed)

In [16]:
def compute_conditional(alpha, beta, i):
    """
    alpha: list containing forward messages
    beta: list containing backward messages
    i : hidden element for which you want the conditional on the observed variables (i = 1, ..., M)
    """
    if i == 0:
        raise ValueError("no zio serve il numero di variabile")

    if i == alpha.shape[0]:
        return alpha[i - 1] / np.sum(alpha[i - 1])

    gamma = alpha[i - 1] * beta[i - 1]
    gamma = gamma / np.sum(gamma)

    return gamma

In [17]:
def compute_all_conditional(alpha, beta):
    """
    alpha: list containing forward messages
    beta: list containing backward messages
    """
    n_nodes = alpha.shape[0]
    n_states = alpha.shape[1]

    gamma = np.zeros((n_nodes, n_states))

    gamma[n_nodes - 1] = alpha[n_nodes - 1] / np.sum(alpha[n_nodes - 1])

    for i in range(n_nodes - 1):
        gamma[i] = alpha[i] * beta[i] / np.sum(alpha[i] * beta[i])

    return gamma

In [18]:
alpha = forward_HMM(A, B, pi, observed)
beta = backward_HMM(A, B, observed)
gamma = compute_all_conditional(alpha, beta)

In [29]:
def divide_row_by_sum(matrix):
    row_sums = np.sum(matrix, axis=1)  # Calculate the sum of each row
    divided_matrix = (
        matrix / row_sums[:, np.newaxis]
    )  # Divide each element by the corresponding row sum
    return divided_matrix

In [30]:
def update_B(gamma, observed):
    n_nodes = gamma.shape[0]
    n_states = gamma.shape[1]

    B = np.zeros((n_states, n_states))

    for i in range(n_states):
        for j in range(n_states):
            for k in range(len(observed)):
                if observed[k] == j:
                    B[i, j] += gamma[k, i]

    return divide_row_by_sum(B)

In [31]:
def Baum_Welch(A, B_start, pi, observed, maxIter=100):
    B = np.copy(B_start)
    for it in range(maxIter):
        alpha = forward_HMM(A, B, pi, observed)
        beta = backward_HMM(A, B, observed)
        gamma = compute_all_conditional(alpha, beta)
        B = update_B(gamma, observed)
    return B

In [33]:
Baum_Welch(A, B, pi, observed)

array([[0.44294088, 0.55705912],
       [0.27897937, 0.72102063]])

In [100]:
pi = [0.5, 0.5]
N = 10000

A = np.array([[0.2, 0.8], [0.6, 0.4]])

chain = np.zeros(N)
chain[0] = np.random.binomial(1, 0.5)
for i in range(1, N):
    if chain[i - 1] == 0:
        chain[i] = np.random.binomial(1, 0.8)
    else:
        chain[i] = np.random.binomial(1, 0.4)

chain = chain.astype(int)

In [102]:
B_true = np.array([[1, 0], [0, 1]])

viewed_chain = np.zeros(N)
for i in range(N):
    if chain[i] == 0:
        viewed_chain[i] = np.random.binomial(1, B[0, 1])
    else:
        viewed_chain[i] = np.random.binomial(1, B[1, 1])

viewed_chain = viewed_chain.astype(int)

In [103]:
B_start = np.zeros((2, 2)) + 0.5

In [104]:
Baum_Welch(A, B_start, pi, observed=viewed_chain)

  gamma[n_nodes-1] = alpha[n_nodes-1] / np.sum(alpha[n_nodes-1])
  gamma[i] = alpha[i] * beta[i] / np.sum(alpha[i] * beta[i])


array([[nan, nan],
       [nan, nan]])