In [1]:
import pandas as pd 
import numpy as np
import csv

In [2]:
# Vigenere Cipher
def get_distances_between_trigrams(ciphertext):
    # Scan trigrams in the given `ciphertext` and find the distances
    # between repeated trigrams.
    # Returns:
    #  - distances: a dictionary in which keys are trigrams and values
    #               are lists of distances (i.e., number of letters)
    #               between repeated trigrams
    #               e.g., distances={ 'sys': [265], 'vwf': [230] ...} 
    distances = {}
    
    for i in range(len(ciphertext) - 2):
        trigram = ciphertext[i:i + 3]
        if trigram in distances:
            distances[trigram].append(i)
        else:
            distances[trigram] = [i]
    return distances

In [3]:
def get_all_factors(repeated_distances, max_key_len=20):
    """
    Returns all the factors of observed distances.
    For each distance value in `repeated_distances`, find its factors
    and then compute their frequencies.
    Parameters:
    --------------------
    repeated_distance: a dictionary containing repeated trigrams and the
                       distances between the repeats
    max_key_len: an integer, the largest key length to consider (i.e., the
                 largest factor to consider)
    Return:
    --------------------
    factors_histo: a list of tuples consisting of (factor, freqeuncy),
                   sorted in decreasing order of frequency
    Example:
    --------------------
    Assume `repeated_distance` = {'sys': [265], 'vwf': [230]}.
    Then factors are [
           5, 53                       # factors of 265
           2, 10, 23, 46, 115, 230     # factors of 230
    ]
    and your output should be [(5, 1), (2, 1), (10, 1)]
    """
    factor_freq = []

    for dis in repeated_distances.values():
        for num in dis:
            factor = find_factors(num, max_key_len)
            factor_freq.extend(factor)

    # looked up google and trying to figure out in to correct format
    factor_freq = [(factor, factor_freq.count(factor)) for factor in set(factor_freq)]

    factor_freq.sort(key=lambda x: x[1], reverse=True)

    print(factor_freq)
    
    return factor_freq


In [4]:
def find_factors(num, max_key_len):
    """
    finds the factors of `num`, excluding 1.
    You don't have to consider the factors greater than `max_key_len`
    Return
    ------------------
    factors: a list containing factors of `num` smaller than or
             equal to `max_key_len`
    """
    factors = []
    for i in range(2, min(num, max_key_len) + 1):
        if num % i == 0:
            factors.append(i)
    return factors

In [5]:
def kasiski_test(ciphertext):
    """
    A driver program
    """
    repeated_distances = get_distances_between_trigrams(ciphertext)
    
    key_len_to_try = get_all_factors(repeated_distances, max_key_len=20)
    
    for i, candidate in enumerate(key_len_to_try):
        print(f"Candidate [{i}]: {candidate[0]}")
        

In [6]:
# Stream Cipher
def compute_frequency(path_to_input, n):
    """
    compute the frequency of each n-gram
    and return it as a list of tuples.
    Parameters:
    ----------------
    path_to_input: string containing the path to the input file,
                   i.e., 'ciphertext.txt'
    n: an integer corresponding to the number of characters to consider
    """
    with open(path_to_input, 'r') as file:
        ciphertext = file.read()

    n_grams = []
    freq_dict = {}

    for i in range(len(ciphertext)-n+1):
        ngram = ""
        for j in range(n):
            ngram += ciphertext[i+j]
        n_grams.append(ngram)
        
    for i in n_grams:
        if i in freq_dict:
            freq_dict[i] += 1
        else:
            freq_dict[i] = 1

    freq_list = list(freq_dict.items())
    freq_list.sort(key=lambda x: x[1], reverse=True)
    
    filename = f"frequency_{n}.txt"
    with open(filename, 'w', newline='') as csvfile:
        csv_writer = csv.writer(csvfile)
        csv_writer.writerows(freq_list)
    
    return freq_list

In [7]:
def count_doubles(path_to_input):
    """
    This function returns the frequencies of double characters as a
    list of tuples.

    Parameters:
    ----------------
    path_to_input: string containing the path to the input file,
                   i.e., 'ciphertext.txt'
    """
    with open(path_to_input, 'r') as file:
        ciphertext = file.read()
        
    double_chars = []
    freq_double = {}
    
    for i in range(len(ciphertext) - 1):
        if ciphertext[i] == ciphertext[i + 1]:
            curr = ""
            for j in range(2):
                curr += ciphertext[i + j]
            double_chars.append(curr)
            
    for i in double_chars:
        freq_double[i] = freq_double.get(i, 0) + 1

    freq_list = sorted(freq_double.items(), key=lambda x: x[1], reverse=True)

    return freq_list

In [8]:
def make_substitution(ciphertext, guess):
    """
    Parameters:
    -------------------------
    ciphertext: input ciphertext, string
    guess: a dictionary in which key is a letter in the ciphertext and
           the value is the letter in plaintext
    Return:
    -------------------------
    the output string is generated by replacing all the letters in the
    dictionary with its corresponding plaintext. All the letters which
    do not have corresponding entries in the dictionary are replaced
    with '_' symbol.
    """
    result = ''
    for i in ciphertext:
        if i in guess:
            result += guess[i]
        else:
            result += ' _ '
    return result

In [9]:
def main():
    
    #Vigenere Cipher

    ciphertext = "nenusyegjlegnpwzealpffgzcohojvvsjkwoddirsaoomyaoevzwvoztwjvwfsxldyuselxmngoksvzfyifwcaxouevcnxgqpvrwjtbumuofvdcllusmhzpletrusepwejrtgkshafpovafwmaqocojhbxzpccjhvizlhmpwfqxazhcdnsrgkhrfvmlhvuzngksokrvefoxdgkkinhclxqcietprlaehfqwrzqxtulgfdwjzrsrqqdudkhuuflbspvvyzgrqsdaqzsyeelvalsprlwrusmxzvwfuglzuvsdxkunowzzsorwcblboervqtebuamupvbfusrizzoihgenwspscigkhnwweebjbecjlhtpvvnvyjrfphsejkhrlhtafndphbsskkixhktbusmzhyhdefvosapvifrrwvqdcdhnoenweziv"
    # prob 9
    print("\n\n\n PROBLEM 9: \n")
    dis_trigrams = get_distances_between_trigrams(ciphertext)
    print(dis_trigrams)
    
    # prob 10
    print("\n\n\n PROBLEM 10: \n")
    kasiski_test(ciphertext)

    #Stream Cipher
    
    print("\n\n\n PROBLEM 11: \n")
    n = 1 
    freq_list_single = compute_frequency("ciphertext.txt", n)
    print("\n UNIGRAM: \n")
    print(freq_list_single)
    
    n = 2
    freq_list_doubles = compute_frequency("ciphertext.txt", n)
    print("\n BIGRAM: \n")
    print(freq_list_doubles)
    
    n = 3 
    freq_list_tri = compute_frequency("ciphertext.txt", n)
    print("\n TRIGRAM: \n")
    print(freq_list_tri)
    
    print("\n\n\n PROBLEM 12: \n")
    double = count_doubles("ciphertext.txt")
    print(double)
    
    print("\n\n\n PROBLEM 13: \n")
    ciphertext_13 = "gsrhrhgsvkozrmgvcggszgdzhvmxibkgvwf"
    guess_13 = {'g': 'a', 'm': 'b'}
    
    print(make_substitution(ciphertext_13, guess_13))     
    
    
if __name__ == "__main__":
    main()




 PROBLEM 9: 

{'nen': [0], 'enu': [1], 'nus': [2], 'usy': [3], 'sye': [4, 269], 'yeg': [5], 'egj': [6], 'gjl': [7], 'jle': [8], 'leg': [9], 'egn': [10], 'gnp': [11], 'npw': [12], 'pwz': [13], 'wze': [14], 'zea': [15], 'eal': [16], 'alp': [17], 'lpf': [18], 'pff': [19], 'ffg': [20], 'fgz': [21], 'gzc': [22], 'zco': [23], 'coh': [24], 'oho': [25], 'hoj': [26], 'ojv': [27], 'jvv': [28], 'vvs': [29], 'vsj': [30], 'sjk': [31], 'jkw': [32], 'kwo': [33], 'wod': [34], 'odd': [35], 'ddi': [36], 'dir': [37], 'irs': [38], 'rsa': [39], 'sao': [40], 'aoo': [41], 'oom': [42], 'omy': [43], 'mya': [44], 'yao': [45], 'aoe': [46], 'oev': [47], 'evz': [48], 'vzw': [49], 'zwv': [50], 'wvo': [51], 'voz': [52], 'ozt': [53], 'ztw': [54], 'twj': [55], 'wjv': [56], 'jvw': [57], 'vwf': [58, 288], 'wfs': [59], 'fsx': [60], 'sxl': [61], 'xld': [62], 'ldy': [63], 'dyu': [64], 'yus': [65], 'use': [66, 123], 'sel': [67], 'elx': [68], 'lxm': [69], 'xmn': [70], 'mng': [71], 'ngo': [72], 'gok': [73], 'oks': [74], 'k

In [10]:
n = 1 
freq_list_single = compute_frequency("ciphertext.txt", n)
print("\n UNIGRAM: \n")
print(freq_list_single)
    
n = 2
freq_list_doubles = compute_frequency("ciphertext.txt", n)
print("\n BIGRAM: \n")
print(freq_list_doubles)
    
n = 3 
freq_list_tri = compute_frequency("ciphertext.txt", n)
print("\n TRIGRAM: \n")
print(freq_list_tri)
    
double = count_doubles("ciphertext.txt")
print("\n DOUBLES: \n")
print(double)


 UNIGRAM: 

[('v', 50), ('g', 36), ('h', 28), ('r', 27), ('m', 27), ('z', 24), ('i', 20), ('l', 20), ('s', 19), ('o', 14), ('x', 13), ('f', 13), ('w', 10), ('t', 8), ('k', 7), ('d', 7), ('y', 7), ('n', 6), ('b', 5), ('u', 4), ('e', 3), ('p', 3), ('c', 1), ('j', 1)]

 BIGRAM: 

[('sv', 13), ('gs', 12), ('vi', 11), ('rm', 9), ('vh', 9), ('mt', 7), ('lm', 7), ('hg', 6), ('gv', 6), ('gr', 5), ('zm', 5), ('mw', 5), ('xv', 5), ('hh', 5), ('vg', 5), ('hr', 4), ('oz', 4), ('mg', 4), ('sz', 4), ('zg', 4), ('zh', 4), ('vm', 4), ('mx', 4), ('wv', 4), ('iv', 4), ('vz', 4), ('gg', 3), ('vw', 3), ('hf', 3), ('gf', 3), ('fg', 3), ('rl', 3), ('ev', 3), ('oo', 3), ('xr', 3), ('rk', 3), ('ks', 3), ('vn', 3), ('nv', 3), ('li', 3), ('ir', 3), ('il', 3), ('gl', 3), ('rh', 2), ('vk', 2), ('ko', 2), ('zr', 2), ('dz', 2), ('hv', 2), ('tg', 2), ('fy', 2), ('yh', 2), ('rg', 2), ('fo', 2), ('bl', 2), ('lf', 2), ('ze', 2), ('hu', 2), ('ob', 2), ('vx', 2), ('wr', 2), ('yv', 2), ('or', 2), ('rv', 2), ('fm', 2), ('

In [11]:
# Problem 14

# Output file compare to Reference
# E,  T ,  A ,  O , I, N
# v 50, g 36, h 28, r 27, m 27

# th, he, in, er
# sv 13,   gs 12,  vi 11 

# the , and, tha , ent
# gsv 9, rmt 6, svi 5, zmw 4

# SS, EE, TT, FF 
# hh, gg, oo, xx, ii

# Make substition

with open("ciphertext.txt", 'r') as file:
    ciphertext = file.read()

print("\n 1: \n")
guess = {'v': 'e', 'g': 't', 'h': 'a'}
print(make_substitution(ciphertext, guess))    

print("\n 2: \n")
guess = {'g': 't', 's': 'h', 'v': 'e', 'i': 'r'}
print(make_substitution(ciphertext, guess))    

print("\n 3: \n")
guess = {'g': 't', 's': 'h', 'v': 'e', 
         'r': 'a', 'm': 'n', 't': 'd'}
print(make_substitution(ciphertext, guess))    

print("\n 4: \n")
guess = {'g': 't', 's': 'h', 'v': 'e', 
         'r': 'a', 'm': 'n', 't': 'd',
        'r' : 'o' }
print(make_substitution(ciphertext, guess))    

print("\n 5: \n")
guess = {'g': 't', 's': 'h', 'v': 'e', 
         'r': 'a', 'm': 'n', 't': 'd',
        'r' : 'i', 'h': 's'}
print(make_substitution(ciphertext, guess))    

print("\n 6: \n")
# gsrh rh gsv = THIS IS THE
guess = {'g': 't', 's': 'h', 'r': 'i', 'h': 's', 'v': 'e'}
print(make_substitution(ciphertext, guess))    

print("\n 7: \n")
guess = {'g': 't', 
         's': 'h', 
         'r': 'i', 
         'h': 's', 
         'v': 'e',
         'm': 'a',
         'm': 'n'
        }
print(make_substitution(ciphertext, guess))

print("\n 8: \n")
# th_t try that
guess = {'g': 't', 
         's': 'h', 
         'r': 'i', 
         'h': 's', 
         'v': 'e',
         'm': 'n',
         'z': 'a'
        }
print(make_substitution(ciphertext, guess))

print("\n 9: \n")
guess = {'g': 't', 
         's': 'h', 
         'r': 'i', 
         'h': 's', 
         'v': 'e',
         'm': 'n',
         'z': 'a',
         'i': 'r',
         'x': 'c',
         't': 'g',
         'n': 'm',
        }
print(make_substitution(ciphertext, guess))

print("\n 10: \n")
guess = {'g': 't', 
         's': 'h', 
         'r': 'i', 
         'h': 's', 
         'v': 'e',
         'm': 'n',
         'z': 'a',
         'i': 'r',
         'x': 'c',
         't': 'g',
         'n': 'm',
         'l': 'o',
         'p': 'k',
         'w': 'd',
         'k': 'p',
         'o': 'l',
         'c': 'x',
         'b': 'y',
         'd': 'w',
         'j':'q',
         'f':'u',
         'u': 'f',
         'e': 'v',
         'y': 'b'
        }
print(make_substitution(ciphertext, guess))

# add r to end of e: easie_ and x as c sentence
# I also wrote down on a piece of paper and see what are some english words that would make sense
# I tried them out and after a little bit I relized that this is homework related so I tired terms
# that we learned from the class 
# meaning, plain, text, decipher, encrypted, quickly, have, substitution, ...


 1: 

t _  _ a _ at _ e _  _  _  _  _ te _ tt _  _ t _  _ ae _  _  _  _  _ te _  _ a _  _  _ t _ ea _  _ at _ t _ t _  _  _  _  _  _  _  _  _  _  _  _ t _  _  _ t _  _  _ a _  _  _  _  _  _ ea _  _  _ eaa _  _  _  _  _  _ e _  _  _  _ e _ e _ t _ e _ eaa _  _ e _  _  _  _  _ e _  _ e _ et _  _ t _  _  _  _  _  _  _  _  _ e _ ette _  _  _  _ e _ t _  _  _  _  _  _  _  _  _  _  _ t _ e _  _ a _  _ a _  _ at _ t _ t _  _  _  _  _  _  _ e _  _  _  _  _ at _ et _  _  _  _  _  _  _ e _  _  _  _  _  _  _  _ eaa _  _ a _  _  _  _  _  _  _  _ t _  _  _ e _  _ e _  _ ette _ a _  _  _  _  _  _ t _ t _ e _  _  _  _ eat _ e _ e _  _  _  _  _  _  _  _  _ ea _  _  _  _  _  _ a _  _  _  _  _  _  _  _  _  _  _  _  _ e _ e _  _  _ e _ te _  _  _  _ t _ e _ e _  _  _  _  _  _ eaaae _ te _  _ eat _  _  _  _ et _ e _ e _  _  _  _ e _ e _ a _ e _ 

 2: 

th _  _  _  _ the _  _  _  _  _ te _ tth _ t _  _  _ e _  _ r _  _ te _  _  _  _  _  _ the _  _  _  _ t _ t _ t _  _  _  _  _  _  _  _  _  _ r _ t _  _  _