# 2.5 Disguising the Frequencies

## Exercises 2.5

In [1]:
%%capture
%run ./1_2_primes_gcd.ipynb
import random

from src.polyalpabetic_ciphers import TABLE_2_1, display_frequency_tables, LETTER_PROBABILITIES
from src.helpers import strip_text, strip_number_text, format_plaintext, format_ciphertext, \
                        format_number_ciphertext, CHARACTERS, pos, char_at

###  3. Write a program to encipher and decipher messages by means of the Letter-Number scheme appearing in Table 2.1.

For reference, here is the letter-number table being referenced.


 | Letter | Subset of S                                        |
 |:------:|:---------------------------------------------------|
 |   a    | 15, 33, 37, 55, 57, 72, 91, 96                     |
 |   b    | 24                                                 |
 |   c    | 03, 39, 67                                         |
 |   d    | 04, 43, 61, 88                                     |
 |   e    | 08, 12, 20, 46, 47, 59, 64, 79, 81, 85, 90, 94, 97 |
 |   f    | 40, 48                                             |
 |   g    | 29, 53                                             |
 |   h    | 05, 16, 30, 42, 69, 99                             |
 |   i    | 14, 45, 50, 60, 73, 82, 93                         |
 |   j    | 11                                                 |
 |   k    | 77                                                 |
 |   l    | 01, 26, 71, 98                                     |
 |   m    | 34, 87                                             |
 |   n    | 06, 17, 22, 31, 49, 58                             |
 |   o    | 02, 10, 41, 51, 66, 75, 83                         |
 |   p    | 13, 18                                             |
 |   q    | 36                                                 |
 |   r    | 21, 25, 65, 68, 92, 95                             |
 |   s    | 00, 28, 52, 63, 74, 78                             |
 |   t    | 07, 19, 23, 35, 38, 54, 70, 84, 89                 |
 |   u    | 09, 32                                             |
 |   v    | 44                                                 |
 |   w    | 56, 80                                             |
 |   x    | 86                                                 |
 |   У    | 62, 76                                             |
 |   z    | 27                                                 |

For brevity, it is defined in a [separate Python module](https://github.com/dandoug/cryptomath-book/blob/main/src/polyalpabetic_ciphers.py) and referenced here.

In [2]:
class LetterNumber:
    def __init__(self, table: dict[str, list[int]]):
        """
        Construct a LetterNumber object for enciphering and deciphering messages.
        :param table: a dictionary mapping letters to lists of numbers as shown in Table 2.1 (above)
        :raise ValueError if the input table is not valid (see below)
        """
        self._letter_to_numbers = table
        # validate that all the letters a-z are in the table
        if len(table) != 26:
            raise ValueError(f"wrong number of letters in table: {len(table)} != 26")
        for l in CHARACTERS.lower():
            if l not in table:
                raise ValueError(f"letter {l} not in table")
        # Build and validate the numbers_to_letters table
        number_to_letters = {}
        for p, nums in table.items():
            for n in nums:
                # convert n to a two-character string, padded with leading zero if needed
                n_str = f"{n:02}"
                if n_str in number_to_letters:
                    raise ValueError(f"duplicate number {n_str} in table")
                number_to_letters[n_str] = p
        # validate that number_to_letters has the right number of entries
        if len(number_to_letters) != 100:
            raise ValueError(f"wrong number of numbers in table: {len(number_to_letters)} != 100")
        # need to validate that all numbers 00-99 are covered
        for n in range(100):
            n_str = f"{n:02}"
            if n_str not in number_to_letters:
                raise ValueError(f"number {n_str} not in table")
        self._numbers_to_letters = number_to_letters

    def encipher(self, plaintext: str) -> str:
        """
        Encipher a plaintext
        """
        plaintext = strip_text(plaintext)
        ciphertext = ""
        for char in plaintext:
            # randomly select a number from the list for this letter
            nums = self._letter_to_numbers[char]
            index = random.randint(0, len(nums) - 1)
            ciphertext += f"{nums[index]:02}"
        return ciphertext

    def decipher(self, ciphertext: str) -> str:
        """
        Decipher a ciphertext
        """
        ciphertext = strip_number_text(ciphertext)
        if len(ciphertext) % 2 != 0:
            raise ValueError("ciphertext does not contain an even number of digits")
        return ''.join(self._numbers_to_letters[ciphertext[i:i+2]] for i in range(0, len(ciphertext), 2))

encoder = LetterNumber(TABLE_2_1)

### 1. Using the Letter-Number scheme appearing in Table 2.1, encipher each of the following messages:

#### (a) Why is it that when we talk to god we are praying, but when god talks to us we are schizophrenic?

In [3]:
plaintext_1a = "Why is it that when we talk to god we are praying, but when god talks to us we are schizophrenic?"
ciphertext_1a = encoder.encipher(plaintext_1a)
print(format_number_ciphertext(ciphertext_1a))

563076455260237005728480998122568519332677195153108880085721
941365576250225324093580692058296661383771772823750952567933
211200673093271013309285585067


In [4]:
# validation
print(format_plaintext(encoder.decipher(ciphertext_1a)))

whyisitthatwhenwetalktogodweareprayingbutwhengodtalkstouswea
reschizophrenic



#### (b) My grandmother started walking five miles a day when she was sixty. She's ninety-seven today and we don't know where she is.

In [5]:
plaintext_1b = "My grandmother started walking five miles a day when she was sixty. She's ninety-seven today and we don't know where she is."
ciphertext_1b = encoder.encipher(plaintext_1b)
print(format_number_ciphertext(ciphertext_1b))

346253923731438710896981957435916570944380729877821729407344
208745719778558857768005850600695980556374738689625299905258
600608706278904459492310617262334943562043025838770666808069
1265907499641452


In [6]:
# validation
print(format_plaintext(encoder.decipher(ciphertext_1b)))

mygrandmotherstartedwalkingfivemilesadaywhenshewassixtyshesn
inetyseventodayandwedontknowwheresheis


###  2. Using the Letter-Number scheme appearing in Table 2.1, decipher each of the following messages:

#### (a) 50378706411991441253083896254555492420391532006473011044857
 24993871598744596345544812994847295143758249467720963469305
 915408137172498978

In [7]:
ciphertext_2a = "50378706411991441253083896254555492420391532006473011044857" + \
                "24993871598744596345544812994847295143758249467720963469305" + \
                "915408137172498978"
plaintext_2a = encoder.decipher(ciphertext_2a)
print(format_plaintext(plaintext_2a))

iamnotavegetarianbecauseiloveanimalsiamavegetarianbecauseiha
teplants


yields
```
  i am not a vegetarian because i love animals i am a vegetarian because i
  hate plants
```


#### (b) 80167661508877723414773327201893011035745690579530127187818
 900


In [8]:
ciphertext_2b = "80167661508877723414773327201893011035745690579530127187818" + \
                "900"
plaintext_2b = encoder.decipher(ciphertext_2b)
print(format_plaintext(plaintext_2b))

whydidkamikazepilotswearhelmets


yields
```
  why did kamikaze pilots wear helmets
```

### 4. Write a program that accepts a text message and outputs a count of each digraph, trigraph, quadgraph, and quintgraph. (And don't pretend you don't know what these terms mean.)

We did this in [Exercises 1.6](https://dandoug.github.io/cryptomath-book/notebooks/1_6_cryptoanalysis_monoalphabetic_substitution_ciphers.html#write-a-program-that-accepts-a-text-message-and-outputs-the-frequency-of-each-letter-appearing-in-the-message) but that was just for letters, digraphs and trigraphs.  We generalize the code here to take an additional parameter, $n$, $1 \le n \le 7$, and return as list of of maps that count the frequency of i-graphs for $i$ from 1 to $n$.

In [9]:
def frequency_tabulator2(message: str, n: int) -> list[dict[str, int]]:
    """
    Given a message, and a value of n (1 <= n <= 7), return a list of
    dictionaries that count the frequency of i-gram for i from 1 to n.
    """
    if n < 1 or n > 7:
        raise ValueError(f"n must be between 1 and 7, got {n}")
    # initialize the dictionaries
    result: list[dict[str, int]] = [dict() for _ in range(n)]
    current_i_gram: list[str] = ['' for _ in range(n)]
    message = strip_text(message)
    for c in message:
        for i in range(n):
            current_i_gram[i] = (current_i_gram[i] + c)[-(i+1):]
            if len(current_i_gram[i]) == i+1:
                result[i][current_i_gram[i]] = result[i].get(current_i_gram[i], 0) + 1
    for i in range(n):
        result[i] = dict(sorted(result[i].items(), key=lambda item: item[1], reverse=True))
    return result

The function above computes the complete frequency tables.  We choose to limit our display function to the entries that have 2 or more occurances because singleton occurrences aren't much help for cryptoanalysis.

In [10]:
frequencies = frequency_tabulator2(plaintext_2a, 5)
display_frequency_tables(frequencies)

<IPython.core.display.Math object>

### 7. Write a program that enciphers and deciphers messages using the Vigenère Square. The input to your program should be both a message and the keyword.

In [11]:
class VigenereSq:
    """
    Represents a Vigenere square, used in cryptography for encoding and decoding
    text with a cipher key. It houses the key and provides the functionality needed
    to work with the Vigenere cipher.
    """
    def __init__(self, key: str):
        """
        Construct a VigenereSq object using the given key.
        """
        self._key = strip_text(key) # clean up the key
        if len(self._key) == 0:
            raise ValueError("key cannot be empty")
        # precompute the key shift values
        shifts = []
        for k in self._key:
            shifts.append(pos(k)-1)
        self._shifts = shifts

    def encipher(self, plaintext: str) -> str:
        """
        Encode a text message using the Vigenere cipher.
        """
        plaintext = strip_text(plaintext)
        ciphertext = ""
        for i, char in enumerate(plaintext):
            ciphertext += char_at(pos(char)+self._shifts[i % len(self._key)])
        return ciphertext.upper()
    def decipher(self, ciphertext: str) -> str:
        """"
        Decode a text message using the Vigenere cipher.
        """
        ciphertext = strip_text(ciphertext)
        plaintext = ""
        for i, char in enumerate(ciphertext):
            plaintext += char_at(pos(char)-self._shifts[i % len(self._key)])
        return plaintext

### 5. Encipher the following messages using the Vigenère Square:

#### (a) Though the Cold War has passed, decoded Soviet cables have intensified the long-running debate on whether the Soviet Union employed American citizens as spies. (Use keyword venom.)

In [12]:
plaintext_5a = "Though the Cold War has passed, decoded Soviet cables have intensified " + \
               "the long-running debate on whether the Soviet Union employed American " + \
               "citizens as spies."
coder5a = VigenereSq("venom")
ciphertext_5a = coder5a.encipher(plaintext_5a)
print(format_ciphertext(ciphertext_5a))

OLBIS CXUSO JPQKM MLNGB VWFSP YIPCP ZHFCH DIGQM WPRGT VZRWZ
OIAGU AMRRF CIYCZ BVHBZ DRTRQ WEGSA IAUSF CIEHT ZWBJU ZXHBU
JRRAB GSLSP VQRFU XEAQU OMMSZ NEFGB DIF



#### (b) I really do hope that Molly intercepts this message, Stef, because there's no way she could decrypt it. (Use keyword pride.)

In [13]:
plaintext_5b = "I really do hope that Molly intercepts this message, Stef, because there's no " + \
               "way she could decrypt it."
coder5b = VigenereSq("pride")
ciphertext_5b = coder5b.encipher(plaintext_5b)
print(format_ciphertext(ciphertext_5b))

XIMDP APLRL DGMWL PKURP APQQX TIKHT IJBKM HDMVW PXMVX TWJHG
PLAHX WVZHW CFEDC HYMFS JCLGI RIGSX XK


### 6. Decipher the following messages using the Vigenère Square:

#### (a) (Use keyword gift.)

In [14]:
ciphertext_6a = "CPJGO EFLGJ TRCMB XXMXH VWTKZ PFMLW " + \
                "WVNZN LZUFL SGKTS QQRPC XMKFH AGVLX " + \
                "JOQTT KJL"
coder6a = VigenereSq("gift")
print(format_plaintext(coder6a.decipher(ciphertext_6a)))

wheniwasaboyweweresopoorthatforchristmasmyfamilyjustexchange
dglances


yields
```
  when i was a boy we were so poor that for christmas my family just
  exchanged glances
```

#### (b) (Use keyword dispute.)

In [15]:
ciphertext_6b = "VWETW EELUL WUMXK METNA SGKGB GHROG " + \
                "SINKM ECLTX MSWPW ELNWV QSCZK MHLJX " + \
                "WAALT ZTFFO DAAHE BADAS RNNEO TQXHO " + \
                "IQBWS VRXKM WCAEM VPEPN AIPIL XWBEQ " + \
                "KZPLE IVJSQ VTKHV ACYRI DZKEL BSUBG " + \
                "IBXTX JDXWT XLWFD ZDEVQ KZCLA RZC"
encoder6b = VigenereSq("dispute")
print(format_plaintext(encoder6b.decipher(ciphertext_6b)))

someclaimthatthemethodcommonlyattributedtotheprussianfriedri
chwilhelmkasiskiwasactuallyinventedbytheenglishmathematician
charlesbabbagenineyearspriortothepublicationofkasiskiswork


yields
```
 some claim that the method commonly attributed to the prussian friedrich
 wilhelm kasiski was actually invented by the english mathematician
 charles babbage nine years prior to the publication of kasiski s work
```

### 8. Write a program that computes the Index of Coincidence of a message and then approximates the length of the keyword.

Recall the formula

$$
  IC = \sum_{i=1}^{26} \frac{n_i(n_i-1)}{n(n-1)}
$$

where $n$ is the number of characters in the message and $n_i$ is the number of times each letter appears in message.

Recall also that $r$, an approximation of the length of the keyword can be found using this formula

$$
  r \approx \frac{0.027n}{(n-1)IC - 0.038n +0.065}
$$

In [16]:
def ic(message: str) -> (float, float):
    """
    Use the formula above to compute the Index of Coincidence of a message and
    an approximation of the length of the keyword.
    """
    # First, get the letter frequencies
    letter_frequencies = frequency_tabulator2(message, 1)[0]
    chars_in_message = sum(letter_frequencies.values())
    numerator = 0.0
    for n_sub_i in letter_frequencies.values():
        numerator += n_sub_i * (n_sub_i - 1)
    i_of_c = numerator / (chars_in_message * (chars_in_message - 1))
    # now compute the r approximation of the length of the keyword
    r = (0.027 * chars_in_message) / ((chars_in_message - 1) * i_of_c - 0.038 * chars_in_message + 0.065)
    return i_of_c, r

In [17]:
ic(ciphertext_5a)

(0.03827751196172249, 56.434243176179145)

The $IC$ correctly indicates that the encoding was polyalphabetic (since it is close to $0.038$).  However the estimated keyword length is way off ($56.4$ instead of $5$).  I'm going to assume that this is because the cipher text is relatively short (only $133$ characters).  Hopefully, the values yielded during the decoding examples in the next problem will be more helpful.

Before moving onto the next problem, I'm going to implement another keyword length estimate using a slightly [different technique](https://pages.mtu.edu/~shene/NSF-4/Tutorial/VIG/Vig-IOC-Len.html).  Rather than solving for $r$, this technique breaks the ciphertext into groups assuming several different keyword lengths, computes the average $IC$ of the groups based on that keyword length.  The length that gives an average $IC$ closest to $0.065$, the $IC$ of natural English, is assumed to be keyword length.

In [18]:
def cosets_of_size_l(message: str, l: int) -> list[str]:
    """
    partition a message into l cosets
    """
    message = strip_text(message)
    cosets = ['' for _ in range(l)]
    for i, c in enumerate(message):
        cosets[i % l] += c
    return cosets

def ic_avg_diffent_keywod_len(message: str, upper_limit_len: int) -> dict[str, float]:
    """"
    Given a message and a upper bound on the length of the keyword, return a dictionary
    that contains the average IC of the cosets of the message using keyword lengths from
    2 to upper_limit_len.
    """
    if upper_limit_len < 2 or upper_limit_len > 24:
        raise ValueError(f"upper_limit_len must be between 2 and 24, got {upper_limit_len}")
    message = strip_text(message)
    answer: dict[str, float] = {}
    for l in range(2, upper_limit_len+1):
        cosets =cosets_of_size_l(message, l)
        ic_sum = 0.0
        for coset in cosets:
            ic_sum += ic(coset)[0]
        answer[str(l)] = ic_sum / l
    return answer

In [19]:
ic_avg_diffent_keywod_len(ciphertext_5a, 12)

{'2': 0.036273179556761646,
 '3': 0.03894761569180174,
 '4': 0.03459224598930481,
 '5': 0.06732763532763533,
 '6': 0.039964866051822576,
 '7': 0.034252297410192145,
 '8': 0.031127450980392157,
 '9': 0.03817663817663817,
 '10': 0.06043956043956045,
 '11': 0.03676626403899131,
 '12': 0.04040404040404041}

This gives a strong indication that the keywork is length $5$ since $0.0673$ is much closer to $0.065$ than the other values (except $10$ which is a multiple of $5$).

### 9. For each of the following messages enciphered by means of the Vigenère Square,
#### (a) Perform the Kasiski Test to get an idea of the length of the key word.
#### (b) Compute the Index of Coincidence to verify that the encipherment scheme is polyalphabetic.
#### (c) Compute the approximation to the keyword's length using the Index of Coincidence.
#### (d) Figure out the keyword.
#### (e) Decrypt the message. Good luck. You'll need it.

Before diving into this, I'm going to define something to compute the distance between pairs of repeated i-grams in a cipher text (Kasiski Test) to help with keyword length estimation.  The plan is to feed a repeated i-gram detected by the tabulator above and find candidates for keyword length.

In [20]:
def distance_between_repeated_i_grams(i_gram: str, message: str) -> list[int]:
    """
    Given an i-gram and a message, return the character distances between the occurances
    of that i-gram in the message.
    """
    message = strip_text(message)
    i_gram = strip_text(i_gram)
    distances = []
    last_start = -1
    for i, c in enumerate(message):
        if c == i_gram[0] and i + len(i_gram) <= len(message):
            if message[i:i+len(i_gram)] == i_gram:
                if last_start != -1:
                    distances.append(i - last_start)
                last_start = i
    return distances

In [21]:
frequencies = frequency_tabulator2(ciphertext_5a, 7)
display_frequency_tables(frequencies)

<IPython.core.display.Math object>

In [22]:
distances = distance_between_repeated_i_grams("FCI", ciphertext_5a)
factors = [format_factors(prime_factors(n)) for n in distances]
display_prime_factors_table(distances, factors)

<IPython.core.display.Math object>

The trigram, "FCI", occurs twice and is $25$ characters apart.  $25$ is a multiple of $5$ which is also the keyword length we found most likely above.

Once a keyword length is obtained and the cosets creating by partitioning the ciphertext based on that key length are created.  We can perform a chi-squared analysis to determine the most likely shift (and thus keyword letter) for that coset.  Refer to [this page](http://practicalcryptography.com/cryptanalysis/text-characterisation/chi-squared-statistic/) for more details.  The chi-squared calculation yields us measure of "distance" of an observed set of characters from what we would expect to find in English text.   The formula is given by

$$
  \chi^2(C, E) = \sum_{i=A}^{Z} \frac{(C_i - E_i)^2}{E_i}
$$

where $C_i$ is the count of each letter in the coset, and $E_i$ is the expected count which is given by $p_in$ where $p_i$ is the probability of occurance of each letter in English text and $n$ is the length of the coset.  We compute this metric for each possible shift of the coset ($0$...$25$) and the one that has the lowest chi-squared distance probably corresponds to the keyword letter for the respective position.

In [23]:
def keyword_letter_using_chi_squared(coset: str) -> list[tuple[str,float]]:
    coset = strip_text(coset)
    coset_len = len(coset)
    chi_squared_distances = {}
    for shift in range(26):
        shifted_coset = ''.join([char_at(pos(c)-shift) for c in coset])
        shifted_coset_frequencies = frequency_tabulator2(shifted_coset, 1)[0]
        shifted_chi_squared_distance = sum([((cnt - coset_len*LETTER_PROBABILITIES[char])**2)/(coset_len*LETTER_PROBABILITIES[char])
                                            for char, cnt in shifted_coset_frequencies.items()])
        chi_squared_distances[char_at(shift+1)] = shifted_chi_squared_distance
    # It's not an exact science, return the lowest 5 chi-squared distances, sorted, so we can try alternatives if the first guess is wrong
    sorted_pairs = sorted(chi_squared_distances.items(), key=lambda x: x[1])[:5]
    return [(k, v) for k, v in sorted_pairs]

#### Message 1

In [24]:
ciphertext_9_1 = "PVVWA MKAZS TDUDK UJURB " + \
                 "KZPMH DYMXS KUYQZ BDSIU " + \
                 "OHFPA HVDIW EQPVV WAMNA " + \
                 "NRKTE BXEYC LXZUV FWPZF " + \
                 "YCDBH WTMPS UTKKV HAFZR " + \
                 "PVVWA MNANR ZEHCE SAFKT " + \
                 "WBKTA AVEOO XQEHJ QHTWA " + \
                 "NHYQJ HYQPS TTJWH GARVE " + \
                 "YEZNA RZZPV ZEYVR BPSIN " + \
                 "NSRWO RFIJP LFDCN OKICP " + \
                 "PKFOK FIQOD FZZSE FOFVY " + \
                 "AASQN GLODO CAJUB QUKFD " + \
                 "ZKVXH CEQSO PFKRF UPAZS " + \
                 "DHSQP CRSNS VFDOK FDSBQ " + \
                 "UKFDZ WJFDS VZPWI QPSOF " + \
                 "KTKTA RVOHO IMPWF ZKTZZ " + \
                 " " + \
                 "ZSGQJ RVZYS FDPVV OKBJF " + \
                 "EHLFE CEANH YQIWC IWIBQ " + \
                 "AKYUP SGMCS JSAHK TAWUQ " + \
                 "W"

In [25]:
frequencies = frequency_tabulator2(ciphertext_9_1, 7)
display_frequency_tables(frequencies)

<IPython.core.display.Math object>

In [26]:
d1 = distance_between_repeated_i_grams('vvwamna', ciphertext_9_1)
d2 = distance_between_repeated_i_grams('vwamnan', ciphertext_9_1)
d3 = distance_between_repeated_i_grams('wamnanr', ciphertext_9_1)
d4 = distance_between_repeated_i_grams('bqukfdz', ciphertext_9_1)
distances = d1 + d2 + d3 + d4
factors = [format_factors(prime_factors(n)) for n in distances]
display_prime_factors_table(distances, factors)

<IPython.core.display.Math object>

Since the 4 repeated septgrams are separated by distances that are all multiples of $4$, this suggests a keyword length of 4.  Checking the $IC$ next.

In [27]:
ic(ciphertext_9_1)

(0.04220196159690565, 6.335352794081267)

The $IC$ is low and relatively closer to $0.038$ than $0.065$ which suggests a polyalphabetic encipherment.  Using the average of cosets $IC$ method next to confirm $4$ as a good keyword length choice.

In [28]:
ic_avg_diffent_keywod_len(ciphertext_9_1, 10)

{'2': 0.05088564925985703,
 '3': 0.04049493813273341,
 '4': 0.06829040686823443,
 '5': 0.0427760309865573,
 '6': 0.04896046680320874,
 '7': 0.04356775300171527,
 '8': 0.07026672833795869,
 '9': 0.04065340764149613,
 '10': 0.053131268920742616}

This shows that $4$ (and multiples of $4$) are good keyword choices since cosets derived from that length have high $IC$'s.  Let's divide the message into cosets based on a keyword of size 4 and compute the letter frequencies for each coset and try to determine the keyword.

In [29]:
cosets = cosets_of_size_l(ciphertext_9_1, 4)
lets =  [keyword_letter_using_chi_squared(coset) for coset in cosets]
for guess in lets:
    print(guess)

[('w', 16.82023726145013), ('h', 164.22913991201565), ('m', 379.29943024997533), ('c', 385.31965359602304), ('v', 395.3786354653953)]
[('o', 34.542042235258606), ('a', 153.27339567391564), ('h', 211.52722218442582), ('q', 245.66722336520712), ('e', 363.52715359660147)]
[('r', 30.227174345159902), ('x', 295.9677084580993), ('d', 338.4366969446811), ('e', 346.40611723303437), ('h', 355.61740948834864)]
[('m', 30.011454061271554), ('z', 316.61068415902463), ('s', 336.65038716442376), ('y', 443.1694021480859), ('q', 447.3536441390748)]


In [30]:
keyword_guesses = [
    'worm'
]

In [31]:
# Let's try some and see what we get
for i, keyword in enumerate(keyword_guesses):
    print(f"Keyword: {keyword}")
    coder = VigenereSq(keyword)
    print(format_plaintext(coder.decipher(ciphertext_9_1)))
    print("\n")

Keyword: worm
thekeytodecryptingapolyalphabeticcipheristodeterminethekeywo
rdthingscouldgetabitcomplicatedhoweverifthekeywordislongerth
anthemessageitselfforthenthetechniquedescqibedinthischapterb
reaksdownbuthowcouldtwocorrespondentsremembersuchalongkeywor
dwellonewaytodoitmightbetoagreethatthekeywordistheentiretext
ofthedeclarationofindependenceortheconstitutionorthemilwauke
ewhitepagesgettheidea




yields
```
    the key to decrypting a polyalphabetic cipher is to determine the keyword
    things could get a bit complicated however if the keyword is longer than
    the message itself for then the technique descqibed in this chapter breaks
    down but how could two correspondents remember such a long keyword
    well one way to do it might be to agree that the keyword is the entire text
    of the declaration of independence or the constitution or the milwaukee
    whitepages get the idea
```


#### Message 2

In [32]:
ciphertext_9_2 = "XPERG POEVO WIMPF EVOBE " + \
                 "VODLS EWMQS MJQAF LBASI " + \
                 "PAPQG OOPOR YPVVN PXOVL " + \
                 "MWQEV OFUSK GHCZY TVOFA " + \
                 "VIOLO FQBGY TDLEY HTDSO " + \
                 "DUPIM OWZTZ QEJVX THOPA " + \
                 "LYSWJ VXDLE OHOLK EFAPQ " + \
                 "MGWYF ZYTVO BEXHG TFDWA " + \
                 "ILXLW PQSZL PPJHC PXIEH " + \
                 "OOXHC AUSIK GFXZV DYHTU " + \
                 "SKGDF YIEFH OZXHG YTNLE " + \
                 "OLCPX HOHZM IARVM JKRCW " + \
                 "ITGSE OFXIL GATCI AFJIL " + \
                 "TTGYU SVEGV GELIU IPZOT " + \
                 "QMJYH OWAIZ ATQWS ZGEGK"

In [33]:
frequencies = frequency_tabulator2(ciphertext_9_2, 7)
display_frequency_tables(frequencies)

<IPython.core.display.Math object>

In [34]:
d1 = distance_between_repeated_i_grams('zytvo', ciphertext_9_2)
d2 = distance_between_repeated_i_grams('vobe', ciphertext_9_2)
distances = d1 + d2
factors = [format_factors(prime_factors(n)) for n in distances]
display_prime_factors_table(distances, factors)

<IPython.core.display.Math object>

This suggests a keyword of length $3$, $4$, or $6$.  Let's do some $IC$ analysis.

In [35]:
ic(ciphertext_9_2)

(0.043299888517279825, 5.025853154084798)

Polyalphabetic and a keyword that is more likely $4$ or $6$.  Let's look at the $IC$ averages for different keyword lengths.

In [36]:
ic_avg_diffent_keywod_len(ciphertext_9_2, 14)

{'2': 0.05114093959731544,
 '3': 0.05528619528619529,
 '4': 0.054324324324324325,
 '5': 0.043954802259887,
 '6': 0.07061224489795918,
 '7': 0.03901435787020424,
 '8': 0.054883831199620675,
 '9': 0.05158199643493762,
 '10': 0.0496551724137931,
 '11': 0.04155104155104155,
 '12': 0.07722222222222222,
 '13': 0.042515455558933817,
 '14': 0.04703153988868274}

A keyword length of $6$ looks like a good choice since $0.0706$ is close to the value we'd look for.  Let's see what keywords might make sense.

In [37]:
cosets = cosets_of_size_l(ciphertext_9_2, 6)
lets =  [keyword_letter_using_chi_squared(coset) for coset in cosets]
for guess in lets:
    print(guess)

[('b', 31.62295516391733), ('x', 126.59950853488178), ('m', 254.15534768162752), ('v', 305.89975130391167), ('t', 334.8963646566901)]
[('l', 71.79356500060987), ('w', 119.52250775722173), ('x', 156.24472429446652), ('y', 157.5910950844078), ('r', 207.9286251761042)]
[('e', 18.64741663821346), ('k', 138.32421613117788), ('d', 143.90553697642588), ('u', 169.86943266389792), ('p', 260.2466807606641)]
[('a', 32.38807889439707), ('m', 114.36244405037672), ('x', 117.66176425779129), ('c', 180.80137321696182), ('t', 195.2973238427556)]
[('c', 28.563422593564866), ('o', 132.42897802089934), ('s', 145.53314675283596), ('d', 168.88941423259183), ('v', 190.88077485266774)]
[('h', 14.040243397097244), ('a', 111.50256560556956), ('u', 135.17313055007352), ('e', 151.04841745256454), ('n', 198.66834084170435)]


Well, `bleach` jumps right out.  Let's try that.

In [38]:
coder = VigenereSq('bleach')
print(format_plaintext(coder.decipher(ciphertext_9_2)))

weareintroublebethatrobertsmollymadeapointofcominguptometote
llmethejokeabouttheprimenumbersshewassoobnoxioussomehowidont
knowhowshemanagedtofigureoutthatthemessagewaspolyalphabetica
ndthatthekeywordwasjokeweneedanotherschemebethmaybeapolygrap
hicschemeletsreadchapterthreeofthisbooktofindouthowtoproceed


yields
```
    we are in trouble beth at roberts molly made a point of coming up to me to tell
    me the joke about the prime numbers she was so obnoxious somehow i dont
    know how she managed to figure out that the message was polyalphabetic and
    that the keyword was joke we need another scheme beth maybe a polygraphic
    scheme lets read chapter three of this book to find out how to proceed
```
