**Import packages needed in this notebook**

In [1]:
# Cell 1
from collections import Counter
from pathlib import Path

from google.colab import drive

**Mount your Google Drive and determine the path to this notebook**

In [2]:
# Cell 2
drive.mount("/content/gdrive", force_remount=True)
notebook_path = Path("/content/gdrive/MyDrive/wdts-psi")
notebook_path /= Path("Session 04 - Algorithms")
notebook_path

Mounted at /content/gdrive


PosixPath('/content/gdrive/MyDrive/wdts-psi/Session 04 - Algorithms')

**Open the file "bigram_ciphertext.txt" for binary reading**

In [3]:
# Cell 3
file_name = "bigram_ciphertext.txt"
file_path = notebook_path / file_name
with open(file_path, "rb") as f_in:
    f_bytes = bytearray(f_in.read())
print(f'Bigram analysis of file "{file_name}"')

Bigram analysis of file "bigram_ciphertext.txt"


**Create Counter dictionary storing successive letter count**

In [4]:
# Cell 4
bigrams = Counter()
for i in range(len(f_bytes) - 2):
    bigrams[(f_bytes[i], f_bytes[i + 1])] += 1
[(k, v) for k, v in bigrams.items()][:10]


[((203, 193), 39),
 ((193, 208), 13),
 ((208, 201), 16),
 ((201, 212), 133),
 ((212, 197), 497),
 ((197, 204), 142),
 ((204, 197), 210),
 ((197, 201), 608),
 ((201, 206), 560),
 ((206, 211), 153)]

**Reverse sort the bigrams tallied by the Counter's dictionary item value,\
so the bigrams with the highest frequency appear first**

In [5]:
# Cell 5
sorted_bigrams = sorted(bigrams.items(), key=lambda v: v[1], reverse=True)
[(k, v) for k, v in sorted_bigrams][:10]

[((197, 206), 950),
 ((195, 200), 889),
 ((197, 210), 729),
 ((197, 201), 608),
 ((201, 206), 560),
 ((201, 195), 513),
 ((212, 197), 497),
 ((201, 197), 449),
 ((196, 197), 418),
 ((206, 197), 392)]

**Print the top 10 most frequently occurring bigrams in the text file**

In [None]:
# Cell 6
num_bigrams = sum(bigrams.values())
for k, v in sorted_bigrams[:10]:
    # Convert each key's value (a tuple of two ASCII values) to a string
    s = "".join(map(chr, k))
    print(f'Bigram {k}, "{s}": freq = {v/num_bigrams:>5.2%}')