# Simple scripts to analyze and decrypt the cipher

Author: Daniel Konecny (xkonec75)

## Load initial data

In [91]:
# We are working with digrams.
n = 2

with open('cipher.txt', 'r') as cipher_file:
    ciphertext = cipher_file.readline()

# Split cipher into n-grams.
ciphertext = [ciphertext[i:i + n].lower() for i in range(0, len(ciphertext), n)]

print(ciphertext)

['bp', 'gq', 'af', 'my', 'ay', 'ye', 'ui', 'xe', 'af', 'xk', 'hm', 'fq', 'vl', 'uq', 'gb', 'nb', 'xa', 'nm', 'mp', 'my', 'uc', 'hq', 'pl', 'yo', 'wc', 'km', 'qc', 'vl', 'gk', 'yb', 'ye', 'pb', 'uv', 'ag', 'cx', 'bg', 'bg', 'da', 'xz', 'wz', 'ce', 'xz', 'uc', 'xf', 'km', 'cp', 'ol', 'gm', 'am', 'cx', 'yt', 'ce', 'am', 'kb', 'ya', 'ri', 'ch', 'ex', 'xz', 'wz', 'ai', 'qn', 'zd', 'my', 'gl', 'nc', 'hy', 'pc', 'vn', 'km', 'ai', 'gc', 'ce', 'mk', 'kb', 'ai', 'ax', 'vh', 'zg', 'en', 'vl', 'bp', 'ya', 'ye', 'pb', 'eq', 'ir', 'ug', 'bp', 'ai', 'gs', 'af', 'my', 'hy', 'ay', 'dx', 'yl', 'qr', 'ae', 'wz', 'me', 'po', 'ry', 'zk', 'bp', 'io', 'nq', 'xe', 'gb', 'rq', 'ar', 'ay', 'xd', 'ai', 'ie', 'qe', 'mf', 'zv', 'bp', 'yo', 'me', 'yh', 'cp', 'zw', 'hg', 'iy', 'bp', 'kq', 'yq', 'ri', 'bp', 'pm', 'nc', 'vh', 'zw', 'bp', 'ru', 'ya', 'hn', 'cp', 'mg', 'xy', 're', 'ef', 'pc', 'vl', 'qr', 'ae', 'wz', 'fm', 'ra', 'me', 'eq', 'vl', 'ri', 'yh', 'hn', 'yr', 'dx', 'oh', 'zd', 'tm', 'ai', 'ak', 'qg', 'cx', 'xd

In [92]:
import re

# We are working with digrams.
n = 2

with open('text.txt', 'r') as text_file:
    plaintext = text_file.readline()

# Delete everything that is not a letter.
isalpha_pattern = re.compile('\W+')
plaintext = isalpha_pattern.sub('', plaintext).lower()

# Replace all 'j's with 'i's.
j_pattern = re.compile('j')
plaintext = j_pattern.sub("i", plaintext)

# Whenever n-gram should contain two identical letters, insert 'x' between them.
previous = ''
indices = []
for index, letter in enumerate(plaintext):
    if letter == previous:
        if (index + len(indices)) % n != 0:
            indices.append(index + len(indices))
    previous = letter
for index in indices:
    plaintext = plaintext[:index] + 'x' + plaintext[index:]

# Place 'x's at the end to match the needed n-gram length.
if len(plaintext) % n != 0:
    plaintext += (n - len(plaintext) % n) * 'x'

# Split text into n-grams.
plaintext = [plaintext[i:i + n] for i in range(0, len(plaintext), n)]

print(plaintext)

['th', 'ew', 'ho', 'le', 'ne', 'ig', 'hb', 'or', 'ho', 'od', 'ab', 'ou', 'nd', 'sw', 'it', 'hl', 'oc', 'al', 'ta', 'le', 'sh', 'au', 'nt', 'ed', 'sp', 'ot', 'sa', 'nd', 'tw', 'il', 'ig', 'ht', 'su', 'pe', 'rs', 'ti', 'ti', 'on', 'sx', 'st', 'ar', 'sx', 'sh', 'ox', 'ot', 'an', 'dm', 'et', 'eo', 'rs', 'gl', 'ar', 'eo', 'ft', 'en', 'er', 'ac', 'ro', 'sx', 'st', 'he', 'va', 'lx', 'le', 'yt', 'ha', 'ni', 'na', 'ny', 'ot', 'he', 'rp', 'ar', 'to', 'ft', 'he', 'co', 'un', 'tr', 'ya', 'nd', 'th', 'en', 'ig', 'ht']


## Test the occurrence of letters

Analysis of the cipher to understand it.

In [93]:
import pprint

occurrences = {}

# Count occurrences of n-grams.
for cipher, plain in zip(ciphertext, plaintext):
    if plain not in occurrences:
        occurrences[plain] = {}
    if cipher not in occurrences[plain]:
        occurrences[plain][cipher] = 0
    occurrences[plain][cipher] += 1

# Check if any n-grams are encoded into themselves.
encoded_into_themself = []
for k, v in occurrences.items():
    for t in v:
        if k == t:
            encoded_into_themself.append(k)
if len(encoded_into_themself) > 0:
    print(f"N-grams encoded into themself: {encoded_into_themself}")
else:
    print("No n-grams encoded into themself.")

print("Occurrences (letter: encoding)")
pprint.pprint(occurrences)

No n-grams encoded into themself.
Occurrences (letter: encoding)
{'ab': {'hm': 1},
 'ac': {'ch': 1},
 'al': {'nm': 1},
 'an': {'cp': 1},
 'ar': {'ce': 3},
 'au': {'hq': 1},
 'co': {'ax': 1},
 'dm': {'ol': 1},
 'ed': {'yo': 1},
 'en': {'ya': 2},
 'eo': {'am': 2},
 'er': {'ri': 1},
 'et': {'gm': 1},
 'ew': {'gq': 1},
 'ft': {'kb': 2},
 'gl': {'yt': 1},
 'ha': {'nc': 1},
 'hb': {'ui': 1},
 'he': {'ai': 3},
 'hl': {'nb': 1},
 'ho': {'af': 2},
 'ht': {'pb': 2},
 'ig': {'ye': 3},
 'il': {'yb': 1},
 'it': {'gb': 1},
 'le': {'my': 3},
 'lx': {'zd': 1},
 'na': {'pc': 1},
 'nd': {'vl': 3},
 'ne': {'ay': 1},
 'ni': {'hy': 1},
 'nt': {'pl': 1},
 'ny': {'vn': 1},
 'oc': {'xa': 1},
 'od': {'xk': 1},
 'on': {'da': 1},
 'or': {'xe': 1},
 'ot': {'km': 3},
 'ou': {'fq': 1},
 'ox': {'xf': 1},
 'pe': {'ag': 1},
 'ro': {'ex': 1},
 'rp': {'gc': 1},
 'rs': {'cx': 2},
 'sa': {'qc': 1},
 'sh': {'uc': 2},
 'sp': {'wc': 1},
 'st': {'wz': 2},
 'su': {'uv': 1},
 'sw': {'uq': 1},
 'sx': {'xz': 3},
 'ta': {'mp': 1},

## Decode from list of n-grams

Although the digram list is not complete, this can still provide some information.

In [101]:
# Delete 'x's at the end used as padding.
def delete_xs_at_the_end(text):
    count = 0
    for character in text[::-1]:
        if character == 'x':
            count += 1
        else:
            break
    if count > 0:
        text = text[:-count]
    return text

# Delete additional 'x's added to digrams.
def delete_xs_from_doubled(text):
    idxs = []
    for idx, character in enumerate(text):
        if character == 'x' and text[idx - 1] == text[idx + 1]:
            idxs.append(idx)
    for idx in idxs[::-1]:
        text = text[:idx] + text[idx + 1:]
    return text

# Construct list of n-grams for decoding.
decodings = {}
for cipher, plain in zip(ciphertext, plaintext):
    decodings[cipher] = plain

# Replace known digrams.
decoded = ""
for digram in ciphertext:
    if digram in decodings:
        decoded += decodings[digram]
    else:
        decoded += digram.upper()

decoded = delete_xs_at_the_end(decoded)
decoded = delete_xs_from_doubled(decoded)
print(decoded)

thewholeneighborhoodaboundswithlocaltaleshauntedspotsandtwilightsuperstitionsstarsshootandmeteorsglareofteneracrossthevalleythaninanyotherpartofthecountryandthenightEQIRUGthheGSholenineDXYLQRAEstMEPORYZKthIONQoritRQARneXDheIEQEMFZVthedMEYHanZWHGIYthKQYQerthPMhaunZWthRUenHNanMGXYREEFnandQRAEstFMRAMEEQnderYHHNYRDXOHlxTMheAKQGrsXDthAQYIRUthAQCKACIYtionXDHOigSIeoPNorQRMHPXUGthoutaheNOitRUsaYFLIQXEAtoMIthREhostXDCNRQURantroxKAerUPXQIAAQFNNOMIenHCCZIYedPQNELIacanADHLalBYCVMEenQEYMRQUZPMMTedSIYHPGheIRQDBVtionarGVaranKVhoRUYQeranONADCVROenLIthRAountIGDXTDUFCZIGYHEPMDPYYHthREMDMEXDniIPtaURDXntheUGPYQXftheUGndUHshauntsaIRADZPonBHneKLothevalleILWBROMGndPMtiEAstotheNOEHARntroNOsandRQpeHRalYNtothYQRHYHitEDOHHNSIHNPMADEIAQLKRUtaPHRO


## Helpful information from digram pairs

Additional information to these pairs can be found [here](https://crypto.stackexchange.com/a/35748).

In [102]:
encodings = {}
for plain, cipher in zip(plaintext, ciphertext):
    encodings[plain] = cipher

used_plains = []

print("Pairs of form AB => CD and CD => AB:")
for plain, cipher in encodings.items():
    if cipher in encodings and encodings[cipher] == plain:
        print(f"- {plain} <=> {cipher}")
        used_plains.append(plain)

print("Pairs of form AB => BC.")
for plain, cipher in encodings.items():
    if plain[1] == cipher[0]:
        print(f"- {plain} => {cipher}")
        used_plains.append(plain)

print("Pairs of form AB => CD and CD => EA.")
for plain, cipher in encodings.items():
    if cipher in encodings and encodings[cipher][1] == plain[0]:
        print(f"- {plain} => {cipher} and {cipher} => {encodings[cipher]}")
        used_plains.append(plain)

print("Pairs of form AB => CD and CD => BE.")
for plain, cipher in encodings.items():
    if cipher in encodings and encodings[cipher][0] == plain[1]:
        print(f"- {plain} => {cipher} and {cipher} => {encodings[cipher]}")
        used_plains.append(plain)

print("Pairs of form AB => CD and AD => CE.")
for plain, cipher in encodings.items():
    if plain[0] + cipher[1] in encodings and encodings[plain[0] + cipher[1]][0] == cipher[0]:
        print(f"- {plain} => {cipher} and {plain[0] + cipher[1]} => {encodings[plain[0] + cipher[1]]}")
        used_plains.append(plain)

print("Remaining pairs:")
for plain, cipher in encodings.items():
    if plain not in used_plains:
        print(f"- {plain} => {cipher}")

Pairs of form AB => CD and CD => AB:
- gl <=> yt
- en <=> ya
- yt <=> gl
- ya <=> en
Pairs of form AB => BC.
- su => uv
- sx => xz
- ox => xf
- er => ri
- ac => ch
Pairs of form AB => CD and CD => EA.
Pairs of form AB => CD and CD => BE.
Pairs of form AB => CD and AD => CE.
Remaining pairs:
- th => bp
- ew => gq
- ho => af
- le => my
- ne => ay
- ig => ye
- hb => ui
- or => xe
- od => xk
- ab => hm
- ou => fq
- nd => vl
- sw => uq
- it => gb
- hl => nb
- oc => xa
- al => nm
- ta => mp
- sh => uc
- au => hq
- nt => pl
- ed => yo
- sp => wc
- ot => km
- sa => qc
- tw => gk
- il => yb
- ht => pb
- pe => ag
- rs => cx
- ti => bg
- on => da
- st => wz
- ar => ce
- an => cp
- dm => ol
- et => gm
- eo => am
- ft => kb
- ro => ex
- he => ai
- va => qn
- lx => zd
- ha => nc
- ni => hy
- na => pc
- ny => vn
- rp => gc
- to => mk
- co => ax
- un => vh
- tr => zg


In [103]:
# Set key as was determined by hand with help of previous code.
key = [
    ['g', 'e', 'r', 'i', 'y'],
    ['p', 'a', 'c', 'h', 'n'],
    ['w', 'q', 's', 'u', 'v'],
    ['k', 'o', 'x', 'f', 'd'],
    ['t', 'm', 'z', 'b', 'l']
]

def decode_digram(encoded_digram):
    import numpy as np

    coordinates1 = np.squeeze(np.where(np.array(key) == encoded_digram[0]))
    coordinates2 = np.squeeze(np.where(np.array(key) == encoded_digram[1]))

    if coordinates1[0] == coordinates2[0]:
        decoded_digram = key[coordinates1[0]][(coordinates1[1] - 1) % 5] + key[coordinates2[0]][(coordinates2[1] - 1) % 5]
    elif coordinates1[1] == coordinates2[1]:
        decoded_digram = key[(coordinates1[0] - 1) % 5][coordinates1[1]] + key[(coordinates2[0] - 1) % 5][coordinates2[1]]
    else:
        decoded_digram = key[coordinates1[0]][coordinates2[1]] + key[coordinates2[0]][coordinates1[1]]

    return decoded_digram

In [105]:
# Decode ciphertext digram by digram.
decoded = ''
for digram in ciphertext:
    decoded += decode_digram(digram)

decoded = delete_xs_at_the_end(decoded)
decoded = delete_xs_from_doubled(decoded)
print(f"{decoded}\n")

thewholeneighborhoodaboundswithlocaltaleshauntedspotsandtwilightsuperstitionsstarsshootandmeteorsglareofteneracrossthevalleythaninanyotherpartofthecountryandthenightmarewithherwholeninefoldseemstomakeitthefavoritesceneofhergambolsthedominantspirithoweverthathauntsthisenchantedregionandseemstobecommanderinchiefofallthepowersoftheairistheapparitionofafigureonhorsebackwithoutaheaditissaidbysometobetheghostofahessiantrooperwhoseheadhadbeencarriedawaybyacannonballinsomenamelessbattleduringtherevolutionarywarandwhoiseverandanonseenbythecountryfolkhurryingalonginthegloomofnightasifonthewingsofthewindhishauntsarenotconfinedtothevalleybutextendattimestotheadiacentroadsandespeciallytothevicinityofachurchatnogreatdistance

