**Cipher Activities**

This notebook includes the pieces of code related to each cipher type of problem 3.

Nahomy Varada Salazar (00211623) & Atik J. Santellán (00326859)

In [10]:

import collections

def frequency_analysis(ciphertext):
    #Computes the letter frequency of a given ciphertext
    frequency = collections.Counter(ciphertext)
    total_letters = sum(frequency.values())

    sorted_freq = sorted(
        frequency.items(), key=lambda item: item[1], reverse=True
    )

    print("\nFrequency Analysis:")
    for letter, count in sorted_freq:
        if letter.isalpha():  # Ignore non-letter characters
            print(f"{letter}: {count} ({count / total_letters:.2%})")

    return sorted_freq

def apply_substitution(ciphertext, substitution_map):
    #Applies a substitution cipher based on a given mapping
    return "".join(
        substitution_map.get(char, char) for char in ciphertext
    )

# Given ciphertext
ciphertext = """EMGLOSUDCGDNCUSWYSFHNSFCYKDPUMLWGYICOXYSIPJCK
QPKUGKMGOLICGINCGACKSNISACYKZSCKXECJCKSHYSXCG
OIDPKZCNKSHICGIWYGKKGKGOLDSILKGOIUSIGLEDSPWZU
GFZCCNDGYYSFUSZCNXEOJNCGYEOWEUPXEZGACGNFGLKNS
ACIGOIYCKXCJUCIUZCFZCCNDGYYSFEUEKUZCSOCFZCCNC
IACZEJNCSHFZEJZEGMXCYHCJUMGKUCY"""

#Frequency Analysis
sorted_frequencies = frequency_analysis(ciphertext)

#Substitutions (Manually adjusted after analysis)
substitutions = {
    'C': 'e', 'I': 'd', 'K': 's', 'G': 'a',
    'O': 'n', 'S': 'o', 'U': 't', 'M': 'm',
    'L': 'y', 'Y': 'r', 'E': 'i', 'W': 'g',
    'D': 'b', 'N': 'l', 'P': 'u', 'Z': 'h',
    'F': 'w', 'H': 'f', 'X': 'p', 'J': 'c'
}

#Apply Substitutions
decrypted_text = apply_substitution(ciphertext, substitutions)

print("\nDecrypted Text:\n")
print(decrypted_text)


Frequency Analysis:
C: 37 (14.18%)
G: 24 (9.20%)
S: 20 (7.66%)
K: 18 (6.90%)
Y: 15 (5.75%)
I: 15 (5.75%)
U: 14 (5.36%)
N: 13 (4.98%)
Z: 13 (4.98%)
E: 12 (4.60%)
O: 10 (3.83%)
F: 9 (3.45%)
D: 8 (3.07%)
L: 7 (2.68%)
X: 7 (2.68%)
J: 7 (2.68%)
P: 6 (2.30%)
M: 5 (1.92%)
W: 5 (1.92%)
H: 5 (1.92%)
A: 5 (1.92%)
Q: 1 (0.38%)

Decrypted Text:

imaynotbeabletogrowflowersbutmygardenproduces
QustasmanydeadleaAesoldoAershoespiecesofropea
ndbushelsofdeadgrassasanybodysandtodayibought
awheelbarrowtohelpinclearingitupihaAealwayslo
Aedandrespectedthewheelbarrowitistheonewheele
dAehicleofwhichiamperfectmaster


In [8]:
import re
import numpy as np
from collections import Counter

def find_repeated_sequences(ciphertext, min_length=3):
    #Finds repeated sequences in the ciphertext
    sequences = {}
    for i in range(len(ciphertext) - min_length):
        seq = ciphertext[i:i + min_length]
        if seq in sequences:
            sequences[seq].append(i)
        else:
            sequences[seq] = [i]

    # Filter out sequences that occur only once
    repeated = {seq: positions for seq, positions in sequences.items() if len(positions) > 1}
    return repeated

def gcd_of_distances(positions):
    #Computes the GCD of distances between repeated sequence positions
    distances = [positions[i+1] - positions[i] for i in range(len(positions) - 1)]
    return np.gcd.reduce(distances)

def split_into_columns(ciphertext, key_length):
    #Splits the ciphertext into columns based on the key lengtH
    columns = [''] * key_length
    for i, char in enumerate(ciphertext):
        columns[i % key_length] += char
    return columns

def frequency_analysis(text):
    #Performs frequency analysis on a text segment
    letter_counts = Counter(text)
    total_letters = sum(letter_counts.values())
    frequencies = {letter: count / total_letters for letter, count in letter_counts.items()}
    return sorted(frequencies.items(), key=lambda item: item[1], reverse=True)

def vigenere_decrypt(ciphertext, key):
    # Decrypts the ciphertext using the Vigenere cipher
    key = key.upper()
    plaintext = []
    key_length = len(key)
    
    for i, char in enumerate(ciphertext):
        if char.isalpha():
            shift = ord(key[i % key_length]) - ord('A')
            decrypted_char = chr(((ord(char) - ord('A') - shift) % 26) + ord('A'))
            plaintext.append(decrypted_char)
        else:
            plaintext.append(char)
    
    return ''.join(plaintext)

# Given ciphertext
ciphertext = """KCCPKBGUFDPHQTYAVINRRTMVGRKDNBVFDETDGILTXRGUD
DKOTFMBPVGEGLTGCKQRACQCWDNAWCRXIZAKFTLEWRPTYC
QKYVXCHKFTPONCQQRHJVAJUWETMCMSPKQDYHJVDAHCTRL
SVSKCGCZQQDZXGSFRLSWCWSJTBHAFSIASPRJAHKJRJUMV
GKMITZHFPDISPZLVLGWTFPLKKEBDPGCEBSHCTJRWXBAFS
PEZQNRWXCVYCGAONWDDKACKAWBBIKFTIOVKCGGHJVLNHI
FFSQESVYCLACNVRWBBIREPBBVFEXOSCDYGZWPFDTKFQIY
CWHJVLNHIQIBTKHJVNPIST""".replace("\n", "")

# Finding Repeated Sequences
repeated_sequences = find_repeated_sequences(ciphertext)
most_common_seq = max(repeated_sequences, key=lambda seq: len(repeated_sequences[seq]))
positions = repeated_sequences[most_common_seq]
gcd_length = gcd_of_distances(positions)

print(f"\nMost Common Repeated Sequence: {most_common_seq}")
print(f"Positions: {positions}")
print(f"Estimated Key Length (GCD): {gcd_length}")

# Splitting the Ciphertext into Groups
columns = split_into_columns(ciphertext, gcd_length)

print("\nSeparated Groups:")
for i, col in enumerate(columns):
    print(f"Column {i + 1}: {col[:50]}...")  # Print only the first 50 characters

# Frequency Analysis for Each Column
print("\nFrequency Analysis Per Column:")
for i, col in enumerate(columns):
    freqs = frequency_analysis(col)
    print(f"Column {i + 1}: {freqs[:5]}")  # Show top 5 most frequent letters

# Determining the Key (Based on frequency analysis, we found "CRYPTO")
key = "CRYPTO"

# Decrypting the Text
decrypted_text = vigenere_decrypt(ciphertext, key)

print("\nDecrypted Text:")
print(decrypted_text)



Most Common Repeated Sequence: HJV
Positions: [107, 125, 263, 317, 329]
Estimated Key Length (GCD): 6

Separated Groups:
Column 1: KGQNGVGGTGCQWAWQHNJEPJTKQFWAPJGHPWKCTAQVNCIVJFVNIV...
Column 2: CUTRRFIUFEKCCKRKKCVTKVRCDRSFRRKFZTEEJFNYWKKKVFYVRF...
Column 3: CFYRKDLDMGQWRFPYFQAMQDLGZLJSJJMPLFBBRSRCDAFCLSCREE...
Column 4: PDATDETDBLRDXTTVTQJCDASCXSTIAUIDVPDSWPWGDWTGNQLWPX...
Column 5: KPVMNTXKPTANILYXPRUMYHVZGWBAHMTILLPHXEXAKBIGHEABBO...
Column 6: BHIVBDROVGCAZECCOHWSHCSQSCHSKVZSGKGCBZCOABOHISCBBS...

Frequency Analysis Per Column:
Column 1: [('Q', 0.12280701754385964), ('G', 0.10526315789473684), ('J', 0.10526315789473684), ('V', 0.08771929824561403), ('N', 0.07017543859649122)]
Column 2: [('K', 0.16071428571428573), ('R', 0.14285714285714285), ('F', 0.14285714285714285), ('V', 0.10714285714285714), ('C', 0.08928571428571429)]
Column 3: [('L', 0.10714285714285714), ('F', 0.08928571428571429), ('R', 0.08928571428571429), ('D', 0.08928571428571429), ('C', 0.07142857142857142)]
Column 4:

In [11]:

def frequency_analysis(ciphertext):
    #Computes the letter frequency of a given ciphertext
    frequency = collections.Counter(ciphertext)
    total_letters = sum(frequency.values())

    # Normalize frequencies
    sorted_freq = sorted(
        frequency.items(), key=lambda item: item[1], reverse=True
    )

    print("\nFrequency Analysis:")
    for letter, count in sorted_freq:
        if letter.isalpha():  # Ignore non-letter characters
            print(f"{letter}: {count} ({count / total_letters:.2%})")

    return sorted_freq

def modular_inverse(a, m):
    # Finds the modular inverse of 'a' under modulo 'm' using the extended Euclidean algorithm
    for x in range(1, m):
        if (a * x) % m == 1:
            return x
    raise ValueError(f"No modular inverse for a = {a} mod {m}")

def affine_decrypt(ciphertext, a, b):
    #Decrypts an Affine Cipher text given 'a' and 'b' values
    a_inv = modular_inverse(a, 26)  # Compute a^(-1) mod 26
    plaintext = ""

    for char in ciphertext:
        if char.isalpha():
            C = ord(char) - ord('A')
            P = (a_inv * (C - b)) % 26
            plaintext += chr(P + ord('A'))
        else:
            plaintext += char  # Preserve non-alphabetic characters

    return plaintext

# Given ciphertext
ciphertext = """KQEREJEBCPPCJCRKIEACUZBKRVPKRBCIBQCARBJCVFCUP
KRIOFKPACUZQEPBKRXPEIIEABDKPBCPFCDCCAFIEABDKP
BCPFEQPKAZBKRHAIBKAPCCIBURCCDKDCCJCIDFUIXPAFF
ERBICZDFKABICBBENEFCUPJCVKABPCYDCCDPKBCOCPERK
IVKSCPICBRKIJPKABI""".replace("\n", "")

# Frequency Analysis
sorted_frequencies = frequency_analysis(ciphertext)

# Setting Affine Cipher decryption parameters (determined manually)
a = 19  # Encryption coefficient
b = 4   # Shift
a_inv = modular_inverse(a, 26)  # Compute modular inverse of 'a'

# Apply Affine Decryption
decrypted_text = affine_decrypt(ciphertext, a, b)

print("\nDecrypted Text:")
print(decrypted_text)

# Formatting the Plaintext
formatted_text = """
O CANADA! TERRE DE NOS AIEUX,
TON FRONT EST CEINT DE FLEURONS GLORIEUX!
CAR TON BRAS SAIT PORTER L'EPEE,
IL SAIT PORTER LA CROIX!
TON HISTOIRE EST UNE EPOPEE
DES PLUS BRILLANTS EXPLOITS.
ET TA VALEUR, DE FOI TREMPEE,
PROTEGERA NOS FOYERS ET NOS DROITS.
"""

print("\nRecognized Plaintext:")
print(formatted_text)


Frequency Analysis:
C: 32 (16.16%)
B: 21 (10.61%)
K: 20 (10.10%)
P: 20 (10.10%)
I: 16 (8.08%)
E: 13 (6.57%)
A: 13 (6.57%)
R: 12 (6.06%)
F: 10 (5.05%)
D: 9 (4.55%)
J: 6 (3.03%)
U: 6 (3.03%)
Q: 4 (2.02%)
Z: 4 (2.02%)
V: 4 (2.02%)
O: 2 (1.01%)
X: 2 (1.01%)
H: 1 (0.51%)
N: 1 (0.51%)
Y: 1 (0.51%)
S: 1 (0.51%)

Decrypted Text:
OCANADATERREDENOSAIEUXTONFRONTESTCEINTDEFLEURONSGLORIEUXCARTONBRASSAITPORTERLEPEEILSAITPORTERLACROIXTONHISTOIREESTUNEEPOPEEDESPLUSBRILLANTSEXPLOITSETTAVALEURDEFOITREMPEEPROTEGERANOSFOYERSETNOSDROITS

Recognized Plaintext:

O CANADA! TERRE DE NOS AIEUX,
TON FRONT EST CEINT DE FLEURONS GLORIEUX!
CAR TON BRAS SAIT PORTER L'EPEE,
IL SAIT PORTER LA CROIX!
TON HISTOIRE EST UNE EPOPEE
DES PLUS BRILLANTS EXPLOITS.
ET TA VALEUR, DE FOI TREMPEE,
PROTEGERA NOS FOYERS ET NOS DROITS.



In [3]:
import numpy as np
import collections

def find_repeated_sequences(ciphertext, min_length=3):
    sequences = {}
    for i in range(len(ciphertext) - min_length):
        seq = ciphertext[i:i + min_length]
        sequences.setdefault(seq, []).append(i)
    return {seq: positions for seq, positions in sequences.items() if len(positions) > 1}

def gcd_of_distances(positions):
    if len(positions) < 2:
        return None
    distances = [positions[i+1] - positions[i] for i in range(len(positions) - 1)]
    return np.gcd.reduce(distances)

def filter_gcd_values(gcd_values):
    filtered = [g for g in gcd_values if 2 <= g <= 100]
    return max(set(filtered), key=filtered.count) if filtered else None

def split_into_columns(ciphertext, key_length):
    return [''.join(ciphertext[i::key_length]) for i in range(key_length)]

def frequency_analysis(text):
    letter_counts = collections.Counter(text)
    total_letters = sum(letter_counts.values())
    return sorted(
        {letter: count / total_letters for letter, count in letter_counts.items()}.items(), 
        key=lambda item: item[1], 
        reverse=True
    )

def vigenere_decrypt(ciphertext, key):
    key = key.upper()
    plaintext = []
    key_length = len(key)
    
    for i, char in enumerate(ciphertext):
        if char.isalpha():
            shift = ord(key[i % key_length]) - ord('A')
            decrypted_char = chr(((ord(char) - ord('A') - shift) % 26) + ord('A'))
            plaintext.append(decrypted_char)
        else:
            plaintext.append(char)
    
    return ''.join(plaintext)

ciphertext = """BNVSNSIHQCEELSSKKYERIFJKXUMBGYKAMQLJTYAVFBKVTDVBPVVRJYYLAOKYMPQS
CGDLFSRLLPROYGESEBUUALRWXMMASAZLGLEDFJBZAVVPXWICGJXASCBYEHOSNMUL
KCEAHTQOKMFLEBKFXLRRFDTZXCIWBJSICBGAWDVYDHAVFJXZIBKCGJIWEAHTTOEW
TUHKRQVVRGZBXYIREMMASCSPBHLHJMBLRFFJELHWEYLWISTFVVYEJCMHYUYRUFSF
MGESIGRLWALSWMNUHSIMYYITCCQPZSICEHBCCMZFEGVJYOCDEMMPGHVAAUMELCMO
EHVLTIPSUYILVGFLMVWDVYDBTHFRAYISYSGKVSUUHYHGGCKTMBLRX""".replace("\n", "")

repeated_sequences = find_repeated_sequences(ciphertext)

gcd_values = [gcd_of_distances(positions) for positions in repeated_sequences.values()]
gcd_values = [g for g in gcd_values if g]

key_length = filter_gcd_values(gcd_values)

if key_length is None:
    print("Could not determine a valid key length.")
else:
    print(f"Estimated Key Length: {key_length}")

    columns = split_into_columns(ciphertext, key_length)

    print("\nFrequency Analysis Per Column:")
    for i, col in enumerate(columns):
        freqs = frequency_analysis(col)
        print(f"Column {i + 1}: {freqs[:5]}")

    key = "THEORY"
    decrypted_text = vigenere_decrypt(ciphertext, key)

    print("\nDecrypted Text:")
    print(decrypted_text)

    if "I GREW UP" in decrypted_text or "THE" in decrypted_text:
        print("\nThe decryption appears successful.")
    else:
        print("\nThe decryption result does not match expectations.")


Estimated Key Length: 96

Frequency Analysis Per Column:
Column 1: [('B', 0.25), ('G', 0.25), ('T', 0.25), ('E', 0.25)]
Column 2: [('N', 0.25), ('L', 0.25), ('U', 0.25), ('H', 0.25)]
Column 3: [('V', 0.25), ('E', 0.25), ('H', 0.25), ('B', 0.25)]
Column 4: [('S', 0.25), ('D', 0.25), ('K', 0.25), ('C', 0.25)]
Column 5: [('N', 0.25), ('F', 0.25), ('R', 0.25), ('C', 0.25)]
Column 6: [('S', 0.25), ('J', 0.25), ('Q', 0.25), ('M', 0.25)]
Column 7: [('I', 0.25), ('B', 0.25), ('V', 0.25), ('Z', 0.25)]
Column 8: [('H', 0.25), ('Z', 0.25), ('V', 0.25), ('F', 0.25)]
Column 9: [('Q', 0.25), ('A', 0.25), ('R', 0.25), ('E', 0.25)]
Column 10: [('G', 0.5), ('C', 0.25), ('V', 0.25)]
Column 11: [('V', 0.5), ('E', 0.25), ('Z', 0.25)]
Column 12: [('E', 0.25), ('P', 0.25), ('B', 0.25), ('J', 0.25)]
Column 13: [('X', 0.5), ('L', 0.25), ('Y', 0.25)]
Column 14: [('S', 0.25), ('W', 0.25), ('Y', 0.25), ('O', 0.25)]
Column 15: [('I', 0.5), ('S', 0.25), ('C', 0.25)]
Column 16: [('K', 0.25), ('C', 0.25), ('R', 0.25