In [1]:
"""
    Index of Coincidence Attack using Python
    Author: Dimitrios Mamakas (f3322209)
"""

import string

In [2]:
# ------------------------ Function definitions ------------------------

In [3]:
"""
    Calculates the Index of Coincidence score for a given sequence of characters of the English alphabet.
"""
def calculate_ioc(group):
    if len(group) == 1 or len(group) == 0:
        # Return 0 if the sequence contains less than a character
        return 0
    else:
        # Otherwise, perform the analysis
        ioc = 0
        char_freq = {}

        # Create character frequencies
        for char in group:
            if char not in char_freq:
                char_freq[char] = group.count(char)

        # Loop through all 26 letters of the english alphabet
        for char in string.ascii_uppercase:
            if char in char_freq:
                # Add this term to the index of coincidence
                ioc = ioc + ((char_freq[char]/len(group)) * ((char_freq[char] - 1)/(len(group) - 1)))

        # Return the result
        return ioc

In [4]:
"""
    Performs the decription of the Vigenère algorithm with a known key.
"""
def decrypt_vigenere(key, cipher):
    # Define our keystream
    keystream = (round(len(cipher) / len(key)) + 1) * key
    keystream = keystream[:len(cipher)]
    
    # Initial original text
    original = ''
    for x in range(0, len(cipher)):
        if string.ascii_uppercase.index(cipher[x]) - string.ascii_uppercase.index(keystream[x]) >= 0:
            letter = string.ascii_uppercase[string.ascii_uppercase.index(cipher[x]) - \
                                            string.ascii_uppercase.index(keystream[x])]
            original = original + letter
        else:
            letter = string.ascii_uppercase[len(string.ascii_uppercase) - abs(string.ascii_uppercase.index(cipher[x]) \
                                                                              - string.ascii_uppercase.index(keystream[x]))]
            original = original + letter
    
    # Return the result
    return original

In [5]:
"""
    Shifts the text (group) to a specific ammount of spaces (characters).
    
    Returns the text in the format of a list.
"""
def shift(text, amount):
    shifted = ''
    letters = string.ascii_uppercase
    for letter in text:
        shifted += letters[(letters.index(letter)-amount) % len(letters)]
    return [x for x in shifted]

In [6]:
"""
    Calculates the score (matching) given a sequence and a specific language.
"""
def calculate_score(freqs, english):
    score = 0
    for letter in string.ascii_uppercase:
        if letter in freqs:
            score = score + (freqs[letter] * english[letter])
    return score

In [7]:
# ------------------------ Index of Coincidence Attack ------------------------

In [8]:
# Load the contents
with open('ciphertext.txt') as f:
    contents = f.readlines()

In [9]:
# Load the cipher
cipher = contents[0]

In [10]:
# Print the cipher
cipher

'CXSFBOIKLEAIBKMMCETMUBAOJSSLUFXDJHHAZBBZYXCKVPBGNHSYBOMRXCSPUXBOJHOWBUQBJBWMPXASBXIFNKAZBKBZEXBDOKZYBOOHOJGPUFKGRDFXNIQSHJVXLKMUNHFXPBQUNTOGQFBBJIHLNITNCXSKFMMBRUGBAJWQJBZRCLAHCYJXELTDBYBEVCMRDFDHEQAXBJSFJFBGQKATAPIRCXSHAIGMNWOMVSMZLJCKFYCSFUOKRMIQCETMUBJHXIDAROMZWTHARPCOYEGXQIGHVCCKNIJDQQJBBRZHBYRXAQQBJBHHJEISJBZHGEMQBFSVVBACXMVXAQQLNIOKRDWNMULVRMBSQQHAHJIMBQZHABBQHJCFVQQFJJSMUBMEOUQMBCBGJJFXFMWMBUCGGEMHATSLPBVCJDHLNKLNWEHAROAONSWXF'

In [11]:
# Canditate keys based on this certain scenario
canditate_keys = [4, 5, 6, 7, 8]

In [12]:
# Initialize the IOC score dictionary
ioc_scores = {
    4: 0,
    5: 0,
    6: 0,
    7: 0,
    8: 0,
}

# Loop through the canditate keys
for canditate_key in canditate_keys:
    # Split into groups
    groups = []
    for group in range(0, canditate_key):
        # Loop through the cipher
        inner_group = []
        for letter in range(group, len(cipher), canditate_key):
            inner_group.append(cipher[letter])
        groups.append(inner_group)
    
    # Find the average IOC score of all groups
    acg_ioc_score = 0
    for group in groups:
        acg_ioc_score = acg_ioc_score + calculate_ioc(group)
    
    # Update the dictionary
    ioc_scores[canditate_key] = acg_ioc_score

In [13]:
# Print the IOC distribution
print(ioc_scores)

{4: 0.21816328421833006, 5: 0.22097275886369708, 6: 0.28190451690355217, 7: 0.3077065128889561, 8: 0.5535734705546026}


In [14]:
# Chose the key length with the highest value
key_length = list(ioc_scores.keys())[list(ioc_scores.values()).index(max(list(ioc_scores.values())))]

In [15]:
# And print it
print('Index of Coincidence analysis decided that the key length is equal to: ' + str(key_length))

Index of Coincidence analysis decided that the key length is equal to: 8


In [16]:
# English letter frequencies
english_letter_freqs = {
    'A': 0.08167, 
    'B': 0.01492, 
    'C': 0.02782, 
    'D': 0.04253, 
    'E': 0.12702, 
    'F': 0.02228, 
    'G': 0.02015,
    'H': 0.06094, 
    'I': 0.06966, 
    'J': 0.00153, 
    'K': 0.00772, 
    'L': 0.04025, 
    'M': 0.02406, 
    'N': 0.06749,
    'O': 0.07507, 
    'P': 0.01929, 
    'Q': 0.00095, 
    'R': 0.05987, 
    'S': 0.06327, 
    'T': 0.09056, 
    'U': 0.02758,
    'V': 0.00978, 
    'W': 0.02360, 
    'X': 0.00150, 
    'Y': 0.01974, 
    'Z': 0.00074
}

In [17]:
# Decrypt the message
groups = []
for group in range(0, key_length):
    # Loop through the cipher
    inner_group = []
    for letter in range(group, len(cipher), key_length):
        inner_group.append(cipher[letter])
    groups.append(inner_group)

# Start with an empty key
key_ = ''

# Loop each group
for group in groups:
    scores = {}

    # For each group, we need to perform exactly 25 shifts
    for shift_ in range(0, 26):
        # Group's letter frequencies
        group_freqs = {}

        # Perform a shift
        grp = shift(''.join(group), shift_)

        # For each letter of the new group
        for letter in grp:
            # Count the frequencies of this group
            group_freqs[letter] = grp.count(letter) / len(grp)

        # Calculate the score for this shift
        score = calculate_score(group_freqs, english_letter_freqs)

        scores[shift_] = score

    # Find the correct letter for this group
    letter = string.ascii_uppercase[list(scores)[list(scores.values()).index(max(list(scores.values())))]]
    key_ = key_ + letter

# Prints
print('- Decrypted message: ')
print(decrypt_vigenere(key_, cipher))

- Decrypted message: 
THEMORALCOMPONENTOFTHESPACESHIPEARTHMETAPHORISTHEREFORESOMEWHATPARADOXICALITCASTSHUMANSASUNGRATEFULFORGIFTSWHICHINREALITYTHEYNEVERRECEIVEDANDITCASTSALLOTHERSPECIESINMORALLYPOSITIVEROLESINLIFESUPPORTSYSTEMWITHHUMANSASTHEONLYNEGATIVEACTORSBUTWEAREPARTOFTHEBIOSPHEREANDTHESUPPOSEDLYIMMORALBEHAVIOURISIDENTICALTOWHATALLOTHERSPECIESDOWHENTIMESAREGOODEXCEPTTHATHUMANSALONETRYTOMITIGATETHEEFFECTOFTHATRESPONSEONTHEIRDESCENDANTSANDONOTHERSPECIES
