In [1]:
from src.CipherBreaker import CipherBreaker
from src.CipherUtils import (
    TextDecoder,
    TextEncoder,
    CipherGenerator,
    TextPreProcessor,
)
from src.ProbabilityMatrix import ProbabilityMatrix

In [2]:
# create instances of my beautiful classes

cipher_generator = CipherGenerator()
preprocess = TextPreProcessor()
text_encoder = TextEncoder()
text_decoder = TextDecoder()

In [3]:
# List of text file paths to build our corpus (where we learn the transitions probs)

file_paths = [
    "texts/moby_dick.txt",
    "texts/shakespeare.txt",
    "texts/james-joyce-a-portrait-of-the-artist-as-a-young-man.txt",
    "texts/james-joyce-dubliners.txt",
    "texts/james-joyce-ulysses.txt",
]

texts = []
for file_path in file_paths:
    with open(file_path, "r") as file:
        texts.append(file.read())

corpus = "".join(texts)

In [4]:
# preprocess the text, removing extra-characters

corpus = preprocess.lower(corpus)
unknown_chars = preprocess.unknown_chars(corpus)
# print(unknown_chars)
corpus = preprocess.remove_unknown_chars(corpus, unknown_chars=unknown_chars)
corpus = preprocess.remove_additional_spaces(corpus)

preprocess.save_text(
    corpus
)  # save text after preprocessing inside text_preprocessed.txt

In [9]:
# compute the transition probs

probability_matrix = ProbabilityMatrix(corpus)
probability_matrix.compute_probability_table()

In [6]:
probability_matrix.save_all_2_chars()
probability_matrix.save_probability_table()

In [10]:
# some text to try

text = "There were better sense in the sad mechanic exercise of determining the reason of its absence where it is not. In the novels of the last hundred years there are vast numbers of young ladies with whom it might be a pleasure to fall in love; there are at least five with whom, as it seems to me, no man of taste and spirit can help doing so."
# text = "I do not believe a word of it, my dear. If he had been so very agreeable, he would have talked to Mrs. Long. But I can guess how it was"
# text = "Your plan is a good one,” replied Elizabeth, “where nothing is in question but the desire of being well married; and if I were determined to get a rich husband, or any husband, I dare say I should adopt it. But these are not Jane’s feelings"
# text = "she is not acting by design. As yet she cannot even be certain of the degree of her own regard, nor of its reasonableness. She has known him only a fortnight. She danced four dances with him at Meryton; she saw him one morning at his own house, and has since dined in company with him four times."
text = "thanks for your attention thanks for your attention thanks for your attention thanks for your attention thanks for your attention There were better sense in the sad mechanic exercise of determining the reason of its absence where it is not. In the novels of the last hundred years there are vast numbers of young ladies with whom it might be a pleasure to fall in love; there are at least five with whom, as it seems to me, no man of taste and spirit can help doing so. she is not acting by design. As yet she cannot even be certain of the degree of her own regard, nor of its reasonableness. She has known him only a fortnight. She danced four dances with him at Meryton; she saw him one morning at his own house, and has since dined in company with him four times."

In [11]:
# encode and decode a sample text with a generated cipher

text = preprocess.lower(text)
text = preprocess.remove_unknown_chars(
    text, unknown_chars=preprocess.unknown_chars(text)
)
text = preprocess.remove_additional_spaces(text)


cipher = cipher_generator.generate_cipher()

encoded_text = text_encoder.encode_text(text, cipher)
decoded_text = text_decoder.decode_text(encoded_text, cipher)

print("Original Text:", text)
print("Encoded Text:", encoded_text)
print("Decoded Text:", decoded_text)

Original Text: thanks for your attention thanks for your attention thanks for your attention thanks for your attention thanks for your attention there were better sense in the sad mechanic exercise of determining the reason of its absence where it is not in the novels of the last hundred years there are vast numbers of young ladies with whom it might be a pleasure to fall in love there are at least five with whom as it seems to me no man of taste and spirit can help doing so she is not acting by design as yet she cannot even be certain of the degree of her own regard nor of its reasonableness she has known him only a fortnight she danced four dances with him at meryton she saw him one morning at his own house and has since dined in company with him four times
Encoded Text: szriwj gly ulcy rssdisfli szriwj gly ulcy rssdisfli szriwj gly ulcy rssdisfli szriwj gly ulcy rssdisfli szriwj gly ulcy rssdisfli szdyd edyd vdssdy jdijd fi szd jra bdozrifo dxdyofjd lg adsdybfifih szd ydrjli lg fsj 

In [None]:
# break the cipher with MCMC using only one starting point
cipher_breaker = CipherBreaker(
    cipher_generator=cipher_generator,
    ciphered_text=encoded_text,
    probability_table=probability_matrix.probability_table,
)

cipher_breaker.break_cipher(iterations=100000, print_interval=100)

In [None]:
cipher_breaker.extract_best(
    n_extract=10, return_likelihood=True
)  # Extract the best 10 decoded messages along with their likelihood

In [12]:
# break the cipher with MCMC using 5 starting points
cipher_breaker_nstart = CipherBreaker(
    cipher_generator=cipher_generator,
    ciphered_text=encoded_text,
    probability_table=probability_matrix.probability_table,
)
cipher_breaker_nstart.break_cipher_nstart(
    iterations=100000, print_interval=100, nstart=5
)

Iter 0 of start 1: rhtvin ekp ukcp trrmvrbkv rhtvin ekp ukcp trrmvrbkv rhtvin ekp ukcp trrmvrbkv rhtvin ekp ukcp trrmvrbkv rhtvin ekp ukcp trrmvrbkv rhmpm zmpm amrrmp nmvnm bv rhm ntx omghtvbg mympgbnm ke xmrmpobvbvd rhm pmtnkv ke brn tanmvgm zhmpm br bn vkr bv rhm vksmln ke rhm ltnr hcvxpmx umtpn rhmpm tpm stnr vcoampn ke ukcvd ltxbmn zbrh zhko br obdhr am t jlmtncpm rk etll bv lksm rhmpm tpm tr lmtnr ebsm zbrh zhko tn br nmmon rk om vk otv ke rtnrm tvx njbpbr gtv hmlj xkbvd nk nhm bn vkr tgrbvd au xmnbdv tn umr nhm gtvvkr msmv am gmprtbv ke rhm xmdpmm ke hmp kzv pmdtpx vkp ke brn pmtnkvtalmvmnn nhm htn ivkzv hbo kvlu t ekprvbdhr nhm xtvgmx ekcp xtvgmn zbrh hbo tr ompurkv nhm ntz hbo kvm okpvbvd tr hbn kzv hkcnm tvx htn nbvgm xbvmx bv gkojtvu zbrh hbo ekcp rbomn
Iter 100 of start 1: thands for your attention thands for your attention thands for your attention thands for your attention thands for your attention there were better sense in the sap mechanic exercise of petermining the rea

KeyboardInterrupt: 

In [None]:
cipher_breaker_nstart.extract_best(
    n_extract=10, return_likelihood=True
)  # Extract the best 10 decoded messages along with their likelihood

In [None]:
len(
    "thanks for your attention thanks for your attention thanks for your attention thanks for your attention thanks for your attention"
)

In [None]:
cipher_breaker_nstart.generate_animation_130()