In [2]:
import random
import math
from collections import Counter


def generate_cipher():
    """
    Return random permutation of the alphabet
    """
    alphabet = list("abcdefghijklmnopqrstuvwxyz")
    random.shuffle(alphabet)
    return alphabet


def encode_text(text, cipher):
    """
    Encrypt text using the permutation provided
    """
    encoded_text = []
    for char in text:
        if char.isalpha():
            index = ord(char.lower()) - ord("a")
            encoded_char = cipher[index]
            if char.isupper():
                encoded_char = encoded_char.upper()
            encoded_text.append(encoded_char)
        else:
            encoded_text.append(char)
    return "".join(encoded_text)


def decode_text(text, cipher):
    """
    Decrypt text using the permutation provided (exactly the same as encryption)
    """
    decoded_text = []
    for char in text:
        if char.isalpha():
            index = cipher.index(char.lower())
            decoded_char = chr(index + ord("a"))
            if char.isupper():
                decoded_char = decoded_char.upper()
            decoded_text.append(decoded_char)
        else:
            decoded_text.append(char)
    return "".join(decoded_text)


def break_into_two_chars(text):
    """
    Returns the bi-grams given the input text.
    """
    return [text[i : i + 2] for i in range(len(text) - 1)]


def get_prob_two_char(two_char):
    """
    Given a bigram returns the probability from the estimated table of probabilities.
    """
    prob_from_table = probability_table.get(two_char)
    if prob_from_table is None:
        return 1 / len(war_and_peace_2_characters)
    else:
        return prob_from_table


def get_log_lik_text(text):
    """
    Given a text, returns the log-likelihood for that text
    """
    text = " " + text
    two_char_list = [text[i : i + 2] for i in range(len(text) - 1)]
    probabilities = [
        probability_table.get(two_char, 1 / len(probability_table))
        for two_char in two_char_list
    ]
    log_likelihood = sum(math.log(prob) for prob in probabilities)
    return log_likelihood


def swap(x):
    """
    Swaps two letters at random in the permutation
    """
    rand_indices = random.sample(range(len(x)), k=2)
    x[rand_indices[0]], x[rand_indices[1]] = x[rand_indices[1]], x[rand_indices[0]]
    return x


with open("texts/moby_dick.txt", "r") as file:
    text0 = file.read()

with open("texts/shakespeare.txt", "r") as file:
    text1 = file.read()


with open("texts/james-joyce-a-portrait-of-the-artist-as-a-young-man.txt", "r") as file:
    text2 = file.read()


with open("texts/james-joyce-dubliners.txt", "r") as file:
    text3 = file.read()


with open("texts/james-joyce-ulysses.txt", "r") as file:
    text4 = file.read()

# Collate all three texts
text = text0 + text1 + text2 + text3 + text4

war_and_peace_2_characters = break_into_two_chars(text)

total_count = sum(Counter(war_and_peace_2_characters).values()) # Count total number of characters in text 

# Create the probability table by counting the bigrams
probability_table = {
    two_char: count / total_count
    for two_char, count in Counter(war_and_peace_2_characters).items()
}

with open("outputs/prob_table_wrong.txt", "w") as file:
    print(probability_table, file=file)


In [7]:
#plaintext = "to be or not to be that is the question whether tis nobler in the mind to suffer the slings and arrows of outrageous fortune or to take arms against a sea of troubles"

#plaintext = "It s a blue world without you It s a blue world alone Yeah, well, this mad world made me crazy Might just turn around, do one-eighty I aint politickin, I ain t kissin no babies The devil on my doorstep bein so shady  don t trip We don t gotta let him in, don t trip Yeah, yeah	"

# plaintext = "Look, I was gonna go easy on you not to hurt your feelings But I'm only going to get this one chance (six minutes-, six minutes-) Something's wrong, I can feel it (six minutes, Slim Shady, you're on!) Just a feeling I've got, like something's about to happen, but I don't know what If that means what I think it means, we're in trouble, big trouble And if he is as bananas as you say, I'm not taking any chances You are just what the doc ordered I'm beginnin' to feel like a Rap God, Rap God All my people from the front to the back nod, back nod Now, who thinks their arms are long enough to slap box, slap box? They said I rap like a robot, so call me Rap-bot"

plaintext = text = "she is not acting by design. As yet she cannot even be certain of the degree of her own regard, nor of its reasonableness. She has known him only a fortnight. She danced four dances with him at Meryton; she saw him one morning at his own house, and has since dined in company with him four times."

true_cipher = generate_cipher()
ciphered_text = encode_text(plaintext, true_cipher)
print("ciphered_text", ciphered_text)

current_cipher = generate_cipher()


i = 0


for _ in range(50000):
    proposed_cipher = swap(current_cipher.copy())

    decoded_text_proposed = decode_text(ciphered_text, proposed_cipher)
    decoded_text_current = decode_text(ciphered_text, current_cipher)

    proposed_log_lik = get_log_lik_text(decoded_text_proposed)
    current_log_lik = get_log_lik_text(decoded_text_current)

    acceptance_probability = min(1, math.exp(proposed_log_lik - current_log_lik))

    accept = random.choices(
        [True, False], weights=[acceptance_probability, 1 - acceptance_probability], k=1
    )[0]

    if accept:
        current_cipher = proposed_cipher
        if i % 100 == 0:
            print(f"Iter {i}: {decoded_text_proposed}")
        # print(decoded_text_proposed)
        i += 1

ciphered_text plj dp iqm zkmdia fr wjpdai. Zp rjm plj kziiqm jnji fj kjumzdi qo mlj wjaujj qo lju qei ujazuw, iqu qo dmp ujzpqizfxjijpp. Plj lzp siqei ldb qixr z oqumidalm. Plj wzikjw oqvu wzikjp edml ldb zm Bjurmqi; plj pze ldb qij bquidia zm ldp qei lqvpj, ziw lzp pdikj wdijw di kqbgzir edml ldb oqvu mdbjp.
Iter 0: tnb wt feg xigwfr cp ubtwrf. Xt pbg tnb ixffeg bhbf cb ibjgxwf el gnb ubrjbb el nbj eqf jbrxju, fej el wgt jbxtefxckbfbtt. Tnb nxt zfeqf nws efkp x lejgfwrng. Tnb uxfibu lemj uxfibt qwgn nws xg Sbjpgef; tnb txq nws efb sejfwfr xg nwt eqf nemtb, xfu nxt twfib uwfbu wf iesaxfp qwgn nws lemj gwsbt.
Iter 100: she is rot actirl py mesilr. As yet she carrot ever pe centair of the melnee of hen our nelanm, ron of its neasorapberess. She has wrour hid orby a fontrilht. She marcem fokn marces uith hid at Denytor; she sau hid ore donrirl at his our hokse, arm has sirce mirem ir codgary uith hid fokn tides.
Iter 200: the it nos adsing by metign. At yes the dannos epen be dersain of s

In [2]:
from src2.ProbabilityMatrix import ProbabilityMatrix

lik = ProbabilityMatrix(text=text)
lik.compute_probability_table()

with open("pt_class.txt", "w") as file:
    print(lik.probability_table, file=file)