# Challenge 

## Vigenère cipher

In [1]:
import numpy as np
import collections
import operator
import pandas as pd

PLAIN_ALPHABET = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'

In [2]:
def load(fname):
    f = open(fname,'r')
    #data = []
    contents = f.read()
    file_as_list = contents.splitlines(False) 
    
    data = [line for line in file_as_list if line != ""]
        
    f.close()
    
    return data

In [3]:
def shift_alpha(shift):
    alpha_list = list(PLAIN_ALPHABET)
    return ''.join(alpha_list[shift:] + alpha_list[:shift])

In [4]:
def count_frequencies(cipher, substring_size = 1):
    
    cipher = ' '.join(cipher).replace(" ", "")
    count_dict = {}
    
    number_letters = substring_size
        
    for position in range(len(cipher)):
        try:
            count_dict[cipher[position:position+number_letters]] = count_dict[
                cipher[position:position+number_letters]] + 1
            
        except KeyError:
            count_dict[cipher[position:position+number_letters]] = 1
    
    dataframe = pd.DataFrame(sorted(count_dict.items(), key=operator.itemgetter(1)),
                             columns=["letter", "count"])
    dataframe["percentage"] = dataframe["count"]/len(cipher)
    return dataframe.sort_values(by="percentage", ascending=False)

In [5]:
def find_all(string,substring):

    length = len(substring)
    c=0
    indexes = []
    while c < len(string):
        if string[c:c+length] == substring:
            indexes.append(c)
        c=c+1
    return indexes

def get_digram_ocurrences(digrams,cipher):
    
    cipher = ' '.join(cipher).replace(" ", "")
    
    for digram in digrams:
        positions = find_all(cipher, digram)
        print(np.diff(positions))
        
    return

In [6]:
def decrypt(cipher, alpha, subs=None):
    cipher = ' '.join(cipher).replace(" ", "")
    
    
    decrypted_msg = []
                
    if subs:
        key = [shift_alpha(i) for i in subs]
        for position in np.arange(0, len(cipher), 5):
            #print(position)
            #print("BLA")
            for i in range(5):
                try:
                    decrypted_msg.append(key[i][alpha.index(cipher[position + i])])
                except IndexError:
                    break
                
    print (''.join(decrypted_msg).lower())

In [7]:
# Load file
PATH = "./cipher.txt"

CIPHER = load(PATH)

CIPHER

['K Q O W E F V J P U J U U N U K G L M E K J I N M W U X F Q M K J B',
 'G W R L F N F G H U D W U U M B S V L P S N C M U E K Q C T E S W R',
 'E E K O Y S S I W C T U A X Y O T A P X P L W P N T C G O J B G F Q',
 'H T D W X I Z A Y G F F N S X C S E Y N C T S S P N T U J N Y T G G',
 'W Z G R W U U N E J U U Q E A P Y M E K Q H U I D U X F P G U Y T S',
 'M T F F S H N U O C Z G M R U W E Y T R G K M E E D C T V R E C F B',
 'D J Q C U S W V B P N L G O Y L S K M T E F V J J T W W M F M W P N',
 'M E M T M H R S P X F S S K F F S T N U O C Z G M D O E O Y E E K C',
 'P J R G P M U R S K H F R S E I U E V G O Y C W X I Z A Y G O S A A',
 'N Y D O E O Y J L W U N H A M E B F E L X Y V L W N O J N S I O F R',
 'W U C C E S W K V I D G M U C G O C R U W G N M A A F F V N S I U D',
 'E K Q H C E U C P F C M P V S U D G A V E M N Y M A M V L F M A O Y',
 'F N T Q C U A F V F J N X K L N E I W C W O D C C U L W R I F T W G',
 'M U S W O V M A T N Y B U H T C O C W F Y T N M G Y T Q M K B 

First, let's try to find the word length:

In [8]:
digrams_frequencies = count_frequencies(CIPHER, substring_size = 2)

digrams_frequencies.head(10)

Unnamed: 0,letter,count,percentage
316,GM,9,0.014901
315,EK,8,0.013245
314,DG,6,0.009934
313,ME,6,0.009934
312,EE,6,0.009934
311,OY,6,0.009934
310,GO,6,0.009934
309,WU,6,0.009934
305,UU,5,0.008278
303,WG,5,0.008278


In [9]:
get_digram_ocurrences(digrams_frequencies["letter"][:3], CIPHER)

[80 90 90 75  5 40 10  5]
[ 40  10  85 115 105 160   5]
[ 40 125   5  55  10]


The common factor is 5, that's the key size :)

Once we had find the key size, let's do some frequency analysis: every fifth letter will be encrypted with the same letter of the key.

In [10]:
decrypt(CIPHER, PLAIN_ALPHABET, subs = [0,1,2,3,4])

krqzifwlsyjvwqykhnpikkkqqwvziumllekwsnirfhjxhwvwpfswnswndoxikrewisxthikpavwixewyayarxaqzspwqpwggplekfrjwhwykceyhhirsyeviyoewwsqpwyjoawkgxbjvwvwqijvwtiaqapikrjxmdvzitgvawwmuhiwhowrgzhouywfawvglohiddvyvedhehjrexwwwdsrlhqbpslowifwlmxwxoiqwqppimuokvsqziwslhiwtowrgzhogsepahikdrmvgqoxvsljivsfkxivhqbgwykceyhqveaoagsepampwvpkemfdiilyaypwoqmrsjqivwvefisxmymdhoxggpeuywhppeaghyrsjwgikrjfiudrigmqxvydhcyimoapemwniqapairtrexefwhmrxlnqiixezsddexpwskixwhoxwwpxpetoaeyhuergwgawrmhawumlderlhhexwplixwhpwijlphiddnglwuxeyvhhemjhabmdhoyvdhospsximpahqhikkqiikoaqslskyvwwwkiixwxvwhoxxjdferkhoemdhohiyhwrxdhitiukarxvhievukav
