In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Prepare data

In [None]:
with open('data/tom_sawyer.txt') as file:
    text = file.read()

In [None]:
text

In [None]:
text_splited=text.split(' ')
len(text_splited)

In [None]:
key_words=['sun','heat','beast','thinks','predict','payloads','character','unscramble']
texts_lens=[100,150,200,250,300,350,400,450]
num_texts=10
# key_words=['sun','heat']
# texts_lens=[200,300]
# num_texts=3

In [None]:
def get_text_part(i, text_len, text_splited):
    text_part=''
    for j in range(text_len):
        text_part+=text_splited[i*text_len+j]
        if(j!=text_len-1):
            text_part+=' '
    return text_part

In [None]:
texts=[[] for text in range(num_texts)]
for i in range(num_texts):
    for text_len in texts_lens:
        texts[i].append(get_text_part(i,text_len,text_splited))

# Test text encryption and decryption

In [None]:
test_text=texts[1][1]
test_key=key_words[1]
print(test_key)

In [None]:
!echo "{test_text}" > cache.txt

In [None]:
test_text_enc=!./encrypt.sh {test_key} cache.txt
test_text_enc[0]

In [None]:
!echo "{test_text_enc[0]}" > cache.txt

In [None]:
test_text_dec=!./decrypt.sh {test_key} cache.txt
test_text_dec[0]

# Test Kasiski 

In [None]:
test_prob_key_len=get_prob_key_len(test_text_enc[0],3)
print(test_prob_key_len)
test_key_word=get_prob_key(test_text_enc[0],test_prob_key_len)
print(test_key_word)

# Encrypt all texts

In [None]:
%%time
enc_texts=[[[] for text_let in texts_lens] for key in key_words]
for i in range(len(key_words)):
    for j in range(len(texts_lens)):
        for k in range(num_texts):
            !echo "{texts[k][j]}" > cache.txt
            enc_text=!./encrypt.sh {key_words[i]} cache.txt
            enc_texts[i][j].append(enc_text[0])

In [None]:
test_key_words_num=5
test_len_num=1
test_text_num=1

In [None]:
enc_texts[test_key_words_num][test_len_num][test_text_num]

# Test decryptor

In [None]:
!echo "{enc_texts[test_key_words_num][test_len_num][test_text_num]}" > cache.txt

In [None]:
test_text_dec=!./decrypt.sh {key_words[test_key_words_num]} cache.txt
test_text_dec[0]

# Perform predictions

In [None]:
def get_prob_key_len(text, lgram, top=1):
    !echo "{text}" > cache.txt
    res_lst=!python3 kasiski_analyze.py cache.txt {lgram}
    if(len(res_lst)!=0):
        return res_lst[top-1].split(': ')[0]
    else:
        return 1

In [None]:
def get_prob_key(text, prob_key_len):
    !echo "{text}" > cache.txt
    res_lst=!python3 kasiski_attack.py cache.txt {prob_key_len} 
    return res_lst[0]

In [None]:
def get_hit_prob(enc_texts,texts_lens,key_words,num_texts, lgram):
    prob_key_lens=[[[] for text_let in texts_lens] for key in key_words]
    for i in range(len(key_words)):
        for j in range(len(texts_lens)):
            for k in range(num_texts):
                prob_key_lens[i][j].append(get_prob_key_len(enc_texts[i][j][k],lgram))
                
    prob_key=[[[] for text_let in texts_lens] for key in key_words]
    for i in range(len(key_words)):
        for j in range(len(texts_lens)):
            for k in range(num_texts):
                prob_key[i][j].append(get_prob_key(enc_texts[i][j][k],prob_key_lens[i][j][k]))
                
    hit_key=[[[] for text_let in texts_lens] for key in key_words]
    for i in range(len(key_words)):
        for j in range(len(texts_lens)):
            for k in range(num_texts):
                if(key_words[i]==prob_key[i][j][k]):
                    hit_key[i][j].append(1)
                else:
                    hit_key[i][j].append(0)
                    
    hit_prob=[[0 for text_let in texts_lens] for key in key_words]
    for i in range(len(key_words)):
        for j in range(len(texts_lens)):
            for k in range(num_texts):
                hit_prob[i][j]+=hit_key[i][j][k]
                
    for i in range(len(key_words)):
        for j in range(len(texts_lens)):
            hit_prob[i][j]=hit_prob[i][j]/10
    return hit_prob

## Use bigrams

### Analyze and Attack

In [None]:
%%time
hit_prob_bi=get_hit_prob(enc_texts,texts_lens,key_words,num_texts, 2)

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(hit_prob_bi, interpolation='nearest')
fig.colorbar(cax)

ax.set_yticklabels(['']+[len(key_word) for key_word in key_words])
ax.set_xticklabels(['']+[text_len for text_len in texts_lens])
plt.show()
hit_prob_bi

## Use trigrams

### Analyze and Attack

In [None]:
%%time
hit_prob_tri=get_hit_prob(enc_texts,texts_lens,key_words,num_texts, 3)

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(hit_prob_tri, interpolation='nearest')
fig.colorbar(cax)

ax.set_yticklabels(['']+[len(key_word) for key_word in key_words])
ax.set_xticklabels(['']+[text_len for text_len in texts_lens])
plt.show()

## Use fourgrams

### Analyze and Attack

In [None]:
np_enc_texts=np.asarray(enc_texts)
sub_enc_texts=np_enc_texts[3:7,3:7,:2].tolist()
sub_text_lens=texts_lens[3:7]
sub_key_words=key_words[3:7]
sub_num_texts=2

In [None]:
%%time
hit_prob_four=get_hit_prob(sub_enc_texts,sub_text_lens,sub_key_words,sub_num_texts, 4)

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(hit_prob_four, interpolation='nearest')
fig.colorbar(cax)

ax.set_yticklabels(['']+[len(key_word) for key_word in sub_key_words])
ax.set_xticklabels(['']+[text_len for text_len in sub_text_lens])
plt.show()

In [None]:
np_enc_texts=np.asarray(enc_texts)
sub_enc_texts=np_enc_texts[1:3,3:7,:2].tolist()
sub_text_lens=texts_lens[3:7]
sub_key_words=key_words[1:3]
sub_num_texts=2

In [None]:
%%time
hit_prob_four_2=get_hit_prob(sub_enc_texts,sub_text_lens,sub_key_words,sub_num_texts, 4)

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(hit_prob_four_2, interpolation='nearest')
fig.colorbar(cax)

ax.set_yticklabels(['']+[len(key_word) for key_word in sub_key_words])
ax.set_xticklabels(['']+[text_len for text_len in sub_text_lens])
plt.show()

## Use fivegrams

### Analyze and Attack

In [None]:
np_enc_texts=np.asarray(enc_texts)
sub_enc_texts=np_enc_texts[3:7,3:7,:2].tolist()
sub_text_lens=texts_lens[3:7]
sub_key_words=key_words[3:7]
sub_num_texts=2

In [None]:
# plt.contourf([text_len for text_len in texts_lens],[len(key_word) for key_word in key_words],hit_prob_bi,
#              levels=[i/10 for i in range(11)],corner_mask=False)
# plt.colorbar()
# plt.grid(True,color='white')

In [None]:
%%time
hit_prob_five=get_hit_prob(sub_enc_texts,sub_text_lens,sub_key_words,sub_num_texts, 5)

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(hit_prob_five, interpolation='nearest')
fig.colorbar(cax)

ax.set_yticklabels(['']+[len(key_word) for key_word in sub_key_words])
ax.set_xticklabels(['']+[text_len for text_len in sub_text_lens])
plt.show()