In [100]:
from enigma.enigma import Enigma
from enigma.rotors.rotor_with_mapping_and_notches import RotorWithMappingAndNotches
from enigma.rotors.rotor_I import RotorI
from enigma.rotors.rotor_II import RotorII
from enigma.rotors.rotor_III import RotorIII
from enigma.rotors.rotor_IV import RotorIV
from enigma.rotors.rotor_V import RotorV
from enigma.reflectors.reflector_b import ReflectorB
from enigma.plugboard import Plugboard
from language_models.character_frequency_kld_language_model import CharacterFrequencyKLDLanguageModel
from language_models.markov_chain_model import MarkovChainModel
from language_models.character_frequency_ic_language_model import CharacterFrequencyICLanguageModel

import timeit, time, random, pickle
from collections import defaultdict
from statistics import mean
from itertools import permutations, combinations
from tqdm import tqdm
from tqdm.auto import trange
from multiprocessing import Pool
from functools import partial
import matplotlib.pyplot as plt

In [115]:
POSSIBLE_ROTORS = {RotorI, RotorII, RotorIII, RotorIV, RotorV}
POSSIBLE_LETTERS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"

def get_random_plugboad(plugboard_size: int):
    letter_sample = random.sample(list(POSSIBLE_LETTERS), plugboard_size*2)
    plugboard_tuples = []
    for i in range(len(letter_sample)//2):
        plugboard_tuples.append((letter_sample[i*2], letter_sample[(i*2)+1]))
    plugboard_tuples = sorted(plugboard_tuples, key=lambda x: x[0])
    return plugboard_tuples

def get_random_config(plugboard_size: int):
    rotors_to_use = random.sample(POSSIBLE_ROTORS, 3)
    offsets = [random.randint(0,25) for _ in range(3)]
    plugboard_tuples = get_random_plugboad(plugboard_size)
    return rotors_to_use, offsets, plugboard_tuples

def string_to_config(string: str):
    match = re.findall("(.+)\(o:(\d+),rs:(\d+)\)\|(.+)\(o:(\d+),rs:(\d+)\)\|(.+)\(o:(\d+),rs:(\d+)\)\|(.*)", 
                       string, re.IGNORECASE)[0]
    available_rotors = {rot.__name__:rot for rot in RotorWithMappingAndNotches.__subclasses__()}
    rotors_classes = [available_rotors[match[0]], available_rotors[match[3]], available_rotors[match[6]]]
    offsets = [int(match[1]), int(match[4]), int(match[7])]
    ringstellungs = [int(match[2]), int(match[5]), int(match[8])]
    plugboard_tuples = []
    for i in range(len(match[9])//2):
        plugboard_tuples.append((match[9][i*2],match[9][(i*2)+1]))
    return rotors_classes, offsets, ringstellungs, plugboard_tuples

def config_to_string(rotors_classes, offsets, plugboard_tuples):
    string = ""
    for rot, off in zip(rotors_classes, offsets):
        string += rot.__name__+"(o:%d)"%(off)+"|"
    for l1, l2 in plugboard_tuples:
        string += l1+l2
    return string

def transform_to_valid_chars(text: str):
    text = text.upper()
    return "".join([c for c in text if c in POSSIBLE_LETTERS])

In [116]:
# Me defino una enigma
PLUGBOARD_SIZE = 10 #Size used by Germany in WW2
rotors, offsets, plugboard_tuples = get_random_config(PLUGBOARD_SIZE)
rotors_to_use = [rot_c(offset=off) 
                 for rot_c, off in zip(rotors, offsets)]
enigma = Enigma(reflector=ReflectorB(),
                        plugboard=Plugboard(plugboard_tuples),
                        rotors=rotors_to_use)
config_to_string(rotors, offsets, plugboard_tuples)

'RotorII(o:20)|RotorIV(o:2)|RotorI(o:21)|AGDLFQIZKSMBNXOEUWVP'

In [117]:
# Modelo de lenguaje basado en la frecuencia de caracteres en un libro en ingles
with open('books/Alices_Adventures_in_Wonderland.txt', 'r') as book:
    train_book = book.read()
freq_kld_model = CharacterFrequencyKLDLanguageModel(train_book)
markov_1_model = MarkovChainModel(train_book)
ic_model = CharacterFrequencyICLanguageModel(train_book)

with open('books/Frankenstein.txt', 'r') as book:
    frankestein_book = transform_to_valid_chars(book.read())

In [118]:
# Todos los datos referidos a tiempos
time_kl, time_markov_1, time_ic = defaultdict(list), defaultdict(list), defaultdict(list)
time_enigma = defaultdict(list) # Tambien me voy a fijar cuanto tarda la enigma

In [119]:
# Me genero todas las posibles combinaciones de los offsets de los rotores, para dos modelos distintos
def crack(encrypted_message):
    kl_divs, markov_1_divs, ic_divs = {}, {}, {}
    length = len(encrypted_message)
    
    #for offsets in tqdm(range(26**3), total=26**3):
    for i in range(26**3):
        off1 = i % 26
        off2 = (i//26) % 26
        off3 = (i//(26**2)) % 26
        enigma_with_given_offset = Enigma(ReflectorB(),
                                          plugboard=Plugboard(plugboard_tuples),
                                          rotors= [rot_c(offset=off) 
                                                   for rot_c, off in zip(rotors, [off1, off2, off3])]
                                         )
        start_time = timeit.default_timer()
        decrypted_message = enigma_with_given_offset.decrypt(encrypted_message)
        time_enigma[length].append(timeit.default_timer() - start_time)
    
        start_time = timeit.default_timer()
        kl_divs[(off1, off2, off3)] = freq_kld_model.fitness(decrypted_message)
        time_kl[length].append(timeit.default_timer() - start_time)
        
        start_time = timeit.default_timer()
        markov_1_divs[(off1, off2, off3)] = markov_1_model.fitness(decrypted_message)
        time_markov_1[length].append(timeit.default_timer() - start_time)
                
        start_time = timeit.default_timer()
        ic_divs[(off1, off2, off3)] = ic_model.fitness(decrypted_message)
        time_ic[length].append(timeit.default_timer() - start_time)
            
    kl_divs = sorted(kl_divs.items(), key=lambda x: x[1], reverse = True)
    markov_1_divs = sorted(markov_1_divs.items(), key=lambda x: x[1], reverse = True)
    ic_divs = sorted(ic_divs.items(), key=lambda x: x[1], reverse = True)
  
    return kl_divs, markov_1_divs, ic_divs

In [128]:
%%time
# Para cada longitud, pruebo los diferentes modelos para los diferentes tipos de rotores

MIN_LEN, MAX_LEN, STEP_LEN = 50, 105, 5
NUMBER_OF_SAMPLES = 10

crack_time_given_length = defaultdict(list)
rankings_freq, rankings_markov_1, rankings_ic = defaultdict(list), defaultdict(list), defaultdict(list)

for length in tqdm(range(MIN_LEN, MAX_LEN, STEP_LEN)):
    print(length)
#for length in tqdm(range(50, 51)):
    #for _ in tqdm(range(1)):
    for _ in tqdm(range(NUMBER_OF_SAMPLES)):
        
        start = random.randint(0, len(frankestein_book)-length) #Para que si o si tenga lenght 
        encrypted_message = enigma.encrypt(frankestein_book[start: start + length])
        #print("Original: ", frankestein_book[start: start + length])
        start_time_of_crack = timeit.default_timer()
        kl_divs, markov_1_divs, ic_divs = crack(encrypted_message)
        
        first = lambda a: [x[0] for x in a] #Saca la primer componente de una lista de pares
        
        kl_divs_rots, markov_1_rots, ic_rots = first(kl_divs), first(markov_1_divs), first(ic_divs)
        
        crack_time_given_length[length].append(timeit.default_timer() - start_time_of_crack)
        #print("A dormir")
        time.sleep(5) # Sleep de 5 segundos para poder bajarle la temperatura al procesador
        
        rankings_freq[length].append(kl_divs_rots.index(tuple(offsets)))
        rankings_markov_1[length].append(markov_1_rots.index(tuple(offsets)))
        rankings_ic[length].append(ic_rots.index(tuple(offsets)))


  0%|          | 0/11 [00:00<?, ?it/s]
  0%|          | 0/10 [00:00<?, ?it/s][A

50



 10%|█         | 1/10 [00:27<04:10, 27.85s/it][A
 20%|██        | 2/10 [00:56<03:45, 28.14s/it][A
 30%|███       | 3/10 [01:23<03:15, 27.98s/it][A
 40%|████      | 4/10 [01:51<02:47, 27.96s/it][A
 50%|█████     | 5/10 [02:19<02:19, 27.88s/it][A
 60%|██████    | 6/10 [02:47<01:51, 27.87s/it][A
 70%|███████   | 7/10 [03:15<01:23, 27.80s/it][A
 80%|████████  | 8/10 [03:42<00:55, 27.79s/it][A
 90%|█████████ | 9/10 [04:10<00:27, 27.81s/it][A
100%|██████████| 10/10 [04:38<00:00, 27.85s/it][A
  9%|▉         | 1/11 [04:38<46:25, 278.53s/it]
  0%|          | 0/10 [00:00<?, ?it/s][A

55



 10%|█         | 1/10 [00:28<04:13, 28.14s/it][A
 20%|██        | 2/10 [00:56<03:45, 28.22s/it][A
 30%|███       | 3/10 [01:24<03:17, 28.20s/it][A
 40%|████      | 4/10 [01:52<02:49, 28.20s/it][A
 50%|█████     | 5/10 [02:21<02:21, 28.21s/it][A
 60%|██████    | 6/10 [02:49<01:52, 28.18s/it][A
 70%|███████   | 7/10 [03:17<01:24, 28.26s/it][A
 80%|████████  | 8/10 [03:45<00:56, 28.24s/it][A
 90%|█████████ | 9/10 [04:13<00:28, 28.22s/it][A
100%|██████████| 10/10 [04:42<00:00, 28.21s/it][A
 18%|█▊        | 2/11 [09:20<42:05, 280.66s/it]
  0%|          | 0/10 [00:00<?, ?it/s][A

60



 10%|█         | 1/10 [00:28<04:20, 28.90s/it][A
 20%|██        | 2/10 [00:57<03:50, 28.87s/it][A
 30%|███       | 3/10 [01:26<03:22, 28.88s/it][A
 40%|████      | 4/10 [01:55<02:52, 28.82s/it][A
 50%|█████     | 5/10 [02:24<02:24, 28.82s/it][A
 60%|██████    | 6/10 [02:53<01:55, 28.82s/it][A
 70%|███████   | 7/10 [03:21<01:26, 28.80s/it][A
 80%|████████  | 8/10 [03:50<00:57, 28.80s/it][A
 90%|█████████ | 9/10 [04:19<00:28, 28.78s/it][A
100%|██████████| 10/10 [04:48<00:00, 28.83s/it][A
 27%|██▋       | 3/11 [14:08<37:53, 284.14s/it]
  0%|          | 0/10 [00:00<?, ?it/s][A

65



 10%|█         | 1/10 [00:29<04:22, 29.18s/it][A
 20%|██        | 2/10 [00:58<03:53, 29.19s/it][A
 30%|███       | 3/10 [01:27<03:24, 29.26s/it][A
 40%|████      | 4/10 [01:57<02:56, 29.39s/it][A
 50%|█████     | 5/10 [02:26<02:26, 29.35s/it][A
 60%|██████    | 6/10 [02:55<01:57, 29.31s/it][A
 70%|███████   | 7/10 [03:24<01:27, 29.26s/it][A
 80%|████████  | 8/10 [03:54<00:58, 29.24s/it][A
 90%|█████████ | 9/10 [04:23<00:29, 29.27s/it][A
100%|██████████| 10/10 [04:52<00:00, 29.27s/it][A
 36%|███▋      | 4/11 [19:01<33:32, 287.55s/it]
  0%|          | 0/10 [00:00<?, ?it/s][A

70



 10%|█         | 1/10 [00:29<04:27, 29.73s/it][A
 20%|██        | 2/10 [00:59<03:58, 29.84s/it][A
 30%|███       | 3/10 [01:29<03:28, 29.77s/it][A
 40%|████      | 4/10 [01:59<02:58, 29.77s/it][A
 50%|█████     | 5/10 [02:28<02:28, 29.77s/it][A
 60%|██████    | 6/10 [02:58<01:59, 29.75s/it][A
 70%|███████   | 7/10 [03:28<01:29, 29.77s/it][A
 80%|████████  | 8/10 [03:58<00:59, 29.73s/it][A
 90%|█████████ | 9/10 [04:27<00:29, 29.75s/it][A
100%|██████████| 10/10 [04:57<00:00, 29.78s/it][A
 45%|████▌     | 5/11 [23:59<29:07, 291.24s/it]
  0%|          | 0/10 [00:00<?, ?it/s][A

75



 10%|█         | 1/10 [00:30<04:31, 30.21s/it][A
 20%|██        | 2/10 [01:00<04:02, 30.25s/it][A
 30%|███       | 3/10 [01:30<03:32, 30.33s/it][A
 40%|████      | 4/10 [02:01<03:02, 30.38s/it][A
 50%|█████     | 5/10 [02:31<02:31, 30.40s/it][A
 60%|██████    | 6/10 [03:02<02:01, 30.43s/it][A
 70%|███████   | 7/10 [03:32<01:31, 30.48s/it][A
 80%|████████  | 8/10 [04:03<01:00, 30.48s/it][A
 90%|█████████ | 9/10 [04:33<00:30, 30.41s/it][A
100%|██████████| 10/10 [05:03<00:00, 30.39s/it][A
 55%|█████▍    | 6/11 [29:03<24:37, 295.56s/it]
  0%|          | 0/10 [00:00<?, ?it/s][A

80



 10%|█         | 1/10 [00:30<04:37, 30.81s/it][A
 20%|██        | 2/10 [01:01<04:07, 30.94s/it][A
 30%|███       | 3/10 [01:32<03:36, 30.86s/it][A
 40%|████      | 4/10 [02:03<03:05, 30.89s/it][A
 50%|█████     | 5/10 [02:34<02:34, 30.84s/it][A
 60%|██████    | 6/10 [03:05<02:04, 31.10s/it][A
 70%|███████   | 7/10 [03:36<01:33, 31.03s/it][A
 80%|████████  | 8/10 [04:07<01:01, 30.94s/it][A
 90%|█████████ | 9/10 [04:41<00:31, 31.77s/it][A
100%|██████████| 10/10 [05:16<00:00, 31.65s/it][A
 64%|██████▎   | 7/11 [34:20<20:09, 302.43s/it]
  0%|          | 0/10 [00:00<?, ?it/s][A

85



 10%|█         | 1/10 [00:35<05:20, 35.63s/it][A
 20%|██        | 2/10 [01:08<04:31, 33.90s/it][A
 30%|███       | 3/10 [01:50<04:23, 37.68s/it][A
 40%|████      | 4/10 [02:24<03:36, 36.07s/it][A
 50%|█████     | 5/10 [02:59<02:58, 35.71s/it][A
 60%|██████    | 6/10 [03:34<02:22, 35.70s/it][A
 70%|███████   | 7/10 [04:08<01:45, 35.07s/it][A
 80%|████████  | 8/10 [04:40<01:08, 34.07s/it][A
 90%|█████████ | 9/10 [05:11<00:33, 33.23s/it][A
100%|██████████| 10/10 [05:43<00:00, 34.38s/it][A
 73%|███████▎  | 8/11 [40:03<15:46, 315.60s/it]
  0%|          | 0/10 [00:00<?, ?it/s][A

90



 10%|█         | 1/10 [00:32<04:50, 32.23s/it][A
 20%|██        | 2/10 [01:04<04:16, 32.10s/it][A
 30%|███       | 3/10 [01:36<03:44, 32.06s/it][A
 40%|████      | 4/10 [02:08<03:12, 32.14s/it][A
 50%|█████     | 5/10 [02:40<02:40, 32.19s/it][A
 60%|██████    | 6/10 [03:12<02:08, 32.08s/it][A
 70%|███████   | 7/10 [03:44<01:36, 32.10s/it][A
 80%|████████  | 8/10 [04:16<01:04, 32.08s/it][A
 90%|█████████ | 9/10 [04:48<00:32, 32.03s/it][A
100%|██████████| 10/10 [05:20<00:00, 32.05s/it][A
 82%|████████▏ | 9/11 [45:24<10:34, 317.14s/it]
  0%|          | 0/10 [00:00<?, ?it/s][A

95



 10%|█         | 1/10 [00:32<04:53, 32.58s/it][A
 20%|██        | 2/10 [01:04<04:19, 32.45s/it][A
 30%|███       | 3/10 [01:44<04:08, 35.48s/it][A
 40%|████      | 4/10 [02:29<03:57, 39.58s/it][A
 50%|█████     | 5/10 [03:07<03:14, 38.97s/it][A
 60%|██████    | 6/10 [03:42<02:30, 37.56s/it][A
 70%|███████   | 7/10 [04:14<01:47, 35.86s/it][A
 80%|████████  | 8/10 [04:47<01:09, 34.67s/it][A
 90%|█████████ | 9/10 [05:19<00:33, 33.95s/it][A
100%|██████████| 10/10 [05:51<00:00, 35.17s/it][A
 91%|█████████ | 10/11 [51:16<05:27, 327.79s/it]
  0%|          | 0/10 [00:00<?, ?it/s][A

100



 10%|█         | 1/10 [00:32<04:54, 32.77s/it][A
 20%|██        | 2/10 [01:05<04:22, 32.85s/it][A
 30%|███       | 3/10 [01:38<03:50, 32.86s/it][A
 40%|████      | 4/10 [02:11<03:16, 32.81s/it][A
 50%|█████     | 5/10 [02:44<02:44, 32.82s/it][A
 60%|██████    | 6/10 [03:16<02:11, 32.78s/it][A
 70%|███████   | 7/10 [03:49<01:38, 32.81s/it][A
 80%|████████  | 8/10 [04:23<01:06, 33.24s/it][A
 90%|█████████ | 9/10 [05:00<00:34, 34.34s/it][A
100%|██████████| 10/10 [05:36<00:00, 33.69s/it][A
100%|██████████| 11/11 [56:52<00:00, 310.27s/it]

CPU times: user 47min 34s, sys: 3.46 s, total: 47min 37s
Wall time: 56min 52s





In [129]:
avg_time_kl = {k: mean(time_kl[k]) for k in time_kl}
avg_time_markov_1 = {k: mean(time_markov_1[k]) for k in time_markov_1}
avg_time_ic = {k: mean(time_ic[k]) for k in time_ic}
avg_time_enigma = {k: mean(time_enigma[k]) for k in time_enigma}

In [130]:
to_dump = (rankings_freq, rankings_markov_1, rankings_ic, crack_time_given_length, avg_time_kl, avg_time_markov_1, avg_time_ic, avg_time_enigma)
with open('data_pickle.pkl', 'wb') as pickle_file:
    pickle.dump(to_dump, pickle_file)

In [143]:
with open('data_pickle.pkl', 'rb') as pickle_file:
    data = pickle.load(pickle_file)
data

avg_ranking_freq = {k: mean(rankings_freq[k]) for k in rankings_freq}
avg_ranking_markov_1 = {k: mean(rankings_markov_1[k]) for k in rankings_markov_1}
avg_ranking_ic = {k: mean(rankings_ic[k]) for k in rankings_ic}
avg_crack_time = {k: mean(crack_time_given_length[k]) for k in crack_time_given_length}

for i in range(MIN_LEN, MAX_LEN, STEP_LEN):
    print("len: {}\n kl: {}\n markov_1: {}\n ic: {}\n crack_time: {}\n".format(i, avg_ranking_freq[i], avg_ranking_markov_1[i], avg_ranking_ic[i], avg_crack_time[i]))

len: 50
 kl: 1.2
 markov_1: 83.1
 ic: 162.5
 crack_time: 22.83364073540015

len: 55
 kl: 22.3
 markov_1: 1008.2
 ic: 738.3
 crack_time: 23.19803240729998

len: 60
 kl: 1.3
 markov_1: 233.5
 ic: 62.3
 crack_time: 23.813124938700277

len: 65
 kl: 0.7
 markov_1: 1593.8
 ic: 236.8
 crack_time: 24.25671641199988

len: 70
 kl: 0.9
 markov_1: 617
 ic: 13.8
 crack_time: 24.75974554289969

len: 75
 kl: 0.1
 markov_1: 610.9
 ic: 15.8
 crack_time: 25.378236897399983

len: 80
 kl: 0
 markov_1: 299.9
 ic: 2.2
 crack_time: 26.64112117610002

len: 85
 kl: 0
 markov_1: 155.6
 ic: 1.5
 crack_time: 29.36585634280018

len: 90
 kl: 0.4
 markov_1: 437.4
 ic: 0.1
 crack_time: 27.034849198099984

len: 95
 kl: 0
 markov_1: 58
 ic: 0.2
 crack_time: 30.14704999609985

len: 100
 kl: 0.3
 markov_1: 1447.7
 ic: 0
 crack_time: 28.66607158820043



In [None]:
rankings_freq, rankings_markov_1, rankings_ic, crack_time_given_length