In [1]:
import re
from collections import defaultdict

In [2]:
def preprocess_text(text):
    """Remove non-letter characters and convert to lowercase."""
    return re.sub(r'[^a-zA-Z]', '', text).lower()

In [3]:
def count_frequencies(text, n):
    """Count n-gram frequencies."""
    freq_dict = defaultdict(int)
    for i in range(len(text) - n + 1):
        ngram = text[i:i + n]
        freq_dict[ngram] += 1
    return freq_dict

In [4]:
def sort_frequencies(freq_dict):
    """Sort frequencies in decreasing order."""
    return sorted(freq_dict.items(), key=lambda item: item[1], reverse=True)

In [5]:
def print_frequencies(freq_list, n=None):
    """Print the top n frequencies or all if n is None."""
    for i, (ngram, count) in enumerate(freq_list[:n]):
        print(f"{ngram}: {count}")

In [6]:
def analyze_text(text, ngram_sizes=[1, 2, 3], top_n=30):
    """Analyze and print the frequencies of n-grams in the text."""
    preprocessed_text = preprocess_text(text)
    
    for n in ngram_sizes:
        print(f"\nTop {top_n} {n}-grams:")
        freq_dict = count_frequencies(preprocessed_text, n)
        sorted_freq_list = sort_frequencies(freq_dict)
        print_frequencies(sorted_freq_list, top_n if n > 1 else None)
        if n == 1:
            print("\nAll single-letter frequencies:")
            print_frequencies(sorted_freq_list)  # Print all single-letter frequencies

In [7]:
def decrypt(text, mapping):
    """Substitute characters in the text according to the provided mapping."""
    decrypted_text = []
    for char in text:
        if char.lower() in mapping:
            decrypted_char = mapping[char.lower()]
            decrypted_text.append(decrypted_char.upper() if char.isupper() else decrypted_char)
        else:
            decrypted_text.append(char)
    return ''.join(decrypted_text)

In [8]:
def main():
    # Part (a): Analyze two different texts
    text1 = """To be, or not to be, that is the question: Whether 'tis nobler in the mind to suffer
    The slings and arrows of outrageous fortune, Or to take arms against a sea of troubles
    And by opposing end them."""
    
    text2 = """In the beginning God created the heaven and the earth. And the earth was without form, and void;
    and darkness was upon the face of the deep. And the Spirit of God moved upon the face of the waters."""
    
    print("Text 1 Analysis:")
    analyze_text(text1)
    
    print("\n\nText 2 Analysis:")
    analyze_text(text2)
    
    # Part (d): Decrypt the given ciphertext using frequency analysis
    ciphertext = """bt jpx rmlx pcuv amlx icvjp ibtwxvr ci m lmt'r pmtn, mtn yvcjx cdxv mwmbtrj jpx amtngxrjbah uqct
    jpx qgmrjxv ci jpx ymgg ci jpx hbtw'r qmgmax; mtn jpx hbtw rmy jpx qmvj ci jpx pmtn jpmj yvcjx.
    jpxt jpx hbtw'r acutjxtmtax ymr apmtwxn, mtn pbr jpcuwpjr jvcufgxn pbl, rc jpmj jpx scbtjr ci pbr
    gcbtr yxvx gccrxn, mtn pbr htxxr rlcjx ctx mwmbtrj mtcjpxv. jpx hbtw avbxn mgcun jc fvbtw bt jpx
    mrjvcgcwxvr, jpx apmgnxmtr, mtn jpx rccjprmexvr. mtn jpx hbtw rqmhx, mtn rmbn jc jpx ybrx lxt
    ci fmfegct, ypcrcxdxv rpmgg vxmn jpbr yvbjbtw, mtn rpcy lx jpx btjxvqvxjmjbct jpxvxci, rpmgg fx
    agcjpxn ybjp ramvgxj, mtn pmdx m apmbt ci wcgn mfcuj pbr txah, mtn rpmgg fx jpx jpbvn vugxv
    bt jpx hbtwncl. jpxt amlx bt mgg jpx hbtw'r ybrx lxt; fuj jpxe acugn tcj vxmn jpx yvbjbtw, tcv lmhx
    htcyt jc jpx hbtw jpx btjxvqvxjmjbct jpxvxci. jpxt ymr hbtw fxgrpmoomv wvxmjge jvcufgxn, mtn
    pbr acutjxtmtax ymr apmtwxn bt pbl, mtn pbr gcvnr yxvx mrjctbrpxn. tcy jpx kuxxt, fe vxmrct ci
    jpx ycvnr ci jpx hbtw mtn pbr gcvnr, amlx btjc jpx fmtkuxj pcurx; mtn jpx kuxxt rqmhx mtn rmbn, c
    hbtw, gbdx icvxdxv; gxj tcj jpe jpcuwpjr jvcufgx jpxx, tcv gxj jpe acutjxtmtax fx apmtwxn; jpxvx br
    m lmt bt jpe hbtwncl, bt ypcl br jpx rqbvbj ci jpx pcge wcnr; mtn bt jpx nmer ci jpe ybrncl ci jpx
    wcnr, ymr icutn bt pbl; ypcl jpx hbtw txfuapmntxoomv jpe imjpxv, jpx hbtw, b rme, jpe imjpxv,
    lmnx lmrjxv ci jpx lmwbabmtr, mrjvcgcwxvr, apmgnxmtr, mtn rccjprmexvr; icvmrluap mr mt
    xzaxggxtj rqbvbj, mtn htcygxnwx, mtn utnxvrjmtnbtw, btjxvqvxjbtw ci nvxmlr, mtn rpcybtw ci
    pmvn rxtjxtaxr, mtn nbrrcgdbtw ci ncufjr, yxvx icutn bt jpx rmlx nmtbxg, ypcl jpx hbtw tmlxn
    fxgjxrpmoomv; tcy gxj nmtbxg fx amggxn, mtn px ybgg rpcy jpx btjxvqvxjmjbct"""

    print("\n\nAnalyzing Ciphertext:")
    single_letter_freqs = analyze_text(ciphertext)
    
    # Example of a mapping based on frequency analysis
    mapping = {
       

        'm': 'a',
        'f': 'b',
        'a': 'c',
        'n': 'd',
        'x': 'e',
        'i': 'f',
        'w': 'g',
        'p': 'h',
        'b': 'i',
        's': 'j',
        'h': 'k',
        'g': 'l',
        'l': 'm',
        't': 'n',
        'c': 'o',
        'q': 'p',
        'k': 'q',
        'v': 'r',
        'r': 's',
        'j': 't',
        'd': 'v',
        'y': 'w',
        'z': 'x',
        'e': 'y',
        'o': 'z'
    
    }

    # Decrypt the ciphertext using the mapping
    decrypted_text = decrypt(ciphertext, mapping)

    print("\nDecrypted Text:\n")
    print(decrypted_text)

In [9]:
if __name__ == "__main__":
    main()

Text 1 Analysis:

Top 30 1-grams:
t: 19
o: 18
e: 17
s: 13
n: 12
r: 11
a: 11
i: 8
h: 7
u: 6
b: 5
f: 5
d: 4
g: 4
l: 3
m: 3
w: 2
p: 2
q: 1
k: 1
y: 1

All single-letter frequencies:
t: 19
o: 18
e: 17
s: 13
n: 12
r: 11
a: 11
i: 8
h: 7
u: 6
b: 5
f: 5
d: 4
g: 4
l: 3
m: 3
w: 2
p: 2
q: 1
k: 1
y: 1

Top 30 2-grams:
th: 6
he: 6
in: 5
to: 4
rt: 4
nd: 4
ob: 3
eo: 3
or: 3
ti: 3
st: 3
es: 3
er: 3
sa: 3
ou: 3
be: 2
no: 2
ot: 2
et: 2
is: 2
bl: 2
le: 2
em: 2
dt: 2
os: 2
ng: 2
an: 2
ar: 2
ro: 2
of: 2

Top 30 3-grams:
the: 5
tob: 2
obe: 2
eor: 2
eth: 2
tis: 2
ert: 2
ble: 2
hem: 2
ndt: 2
ing: 2
san: 2
and: 2
ort: 2
beo: 1
orn: 1
rno: 1
not: 1
ott: 1
tto: 1
bet: 1
tha: 1
hat: 1
ati: 1
ist: 1
sth: 1
heq: 1
equ: 1
que: 1
ues: 1


Text 2 Analysis:

Top 30 1-grams:
e: 23
t: 16
a: 15
n: 13
h: 13
d: 12
o: 11
i: 7
r: 7
s: 6
f: 6
g: 4
w: 4
p: 4
c: 3
v: 3
u: 3
m: 2
b: 1
k: 1

All single-letter frequencies:
e: 23
t: 16
a: 15
n: 13
h: 13
d: 12
o: 11
i: 7
r: 7
s: 6
f: 6
g: 4
w: 4
p: 4
c: 3
v: 3
u: 3
m: 2
b: 1
k: 1

Top