In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import binascii as ba

In [3]:
import numpy as np

In [4]:
import pandas as pd

In [5]:
import itertools as it

In [6]:
import set1

In [7]:
import operator

In [8]:
import pprint

In [11]:
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.backends import default_backend

# 1. Convert hex to base64

The string:

    49276d206b696c6c696e6720796f757220627261696e206c696b65206120706f69736f6e6f7573206d757368726f6f6d

Should produce:

    SSdtIGtpbGxpbmcgeW91ciBicmFpbiBsaWtlIGEgcG9pc29ub3VzIG11c2hyb29t

So go ahead and make that happen. You'll need to use this code for the rest of the exercises. 

In [None]:
q1_input = "49276d206b696c6c696e6720796f757220627261696e206c696b65206120706f69736f6e6f7573206d757368726f6f6d"
q1_output = "SSdtIGtpbGxpbmcgeW91ciBicmFpbiBsaWtlIGEgcG9pc29ub3VzIG11c2hyb29t"

In [None]:
q1_ans = set1.hex_to_b64(q1_input)
print(ba.a2b_hex(q1_input))
print(q1_ans)
print("It is %s that answer matches desired output" % str(set1.byte_to_str(q1_ans) == q1_output))

# 2. Fixed XOR

Write a function that takes two equal-length buffers and produces their XOR combination.

If your function works properly, then when you feed it the string:

    1c0111001f010100061a024b53535009181c

... after hex decoding, and when XOR'd against:

    686974207468652062756c6c277320657965

... should produce:

    746865206b696420646f6e277420706c6179



In [None]:
q2_input1 = "1c0111001f010100061a024b53535009181c"
q2_input2 = "686974207468652062756c6c277320657965"
q2_output = "746865206b696420646f6e277420706c6179"

In [None]:
q2_ans = set1.fixed_xor(ba.a2b_hex(q2_input1), ba.a2b_hex(q2_input2))
print(q2_ans)
print(ba.b2a_hex(q2_ans))
print("It is %s that answer matches desired output" % str(set1.byte_to_str(ba.b2a_hex(q2_ans)) == q2_output))

# 3. Single-byte XOR cipher
 The hex encoded string:

     1b37373331363f78151b7f2b783431333d78397828372d363c78373e783a393b3736

... has been XOR'd against a single character. Find the key, decrypt the message.

You can do this by hand. But don't: write code to do it for you.

How? Devise some method for "scoring" a piece of English plaintext. Character frequency is a good metric. Evaluate each output and choose the one with the best score. 

In [None]:
q3_input = "1b37373331363f78151b7f2b783431333d78397828372d363c78373e783a393b3736"

In [None]:
set1.best_eng_onebyte(ba.a2b_hex(q3_input), 1).squeeze()

# 4. Detect single-character XOR

One of the 60-character strings in [this file](https://cryptopals.com/static/challenge-df/4.txt) has been encrypted by single-character XOR.

Find it.

(Your code from #3 should help.)


In [None]:
with open("data/q4.txt") as q4_file:
    q4_text = pd.Series(q4_file.read().split())
    
q4_text.head()

In [None]:
q4_output = q4_text.apply(ba.a2b_hex).apply(lambda x: set1.best_eng_onebyte(x, 1).squeeze())

In [None]:
# q4_output.sort_values("xor_bcoef", ascending=False)
# q4_output.iloc[q4_output.xor_bcoef.idxmax()]
q4_output.nlargest(1, 'xor_bcoef')

# 5. Implement repeating-key XOR

Here is the opening stanza of an important work of the English language:

    Burning 'em, if you ain't quick and nimble
    I go crazy when I hear a cymbal

Encrypt it, under the key "ICE", using repeating-key XOR.

In repeating-key XOR, you'll sequentially apply each byte of the key; the first byte of plaintext will be XOR'd against I, the next C, the next E, then I again for the 4th byte, and so on.

It should come out to:

    0b3637272a2b2e63622c2e69692a23693a2a3c6324202d623d63343c2a26226324272765272
    a282b2f20430a652e2c652a3124333a653e2b2027630c692b20283165286326302e27282f

Encrypt a bunch of stuff using your repeating-key XOR function. Encrypt your mail. Encrypt your password file. Your .sig file. Get a feel for it. I promise, we aren't wasting your time with this.


In [None]:
q5_input = "Burning 'em, if you ain't quick and nimble\nI go crazy when I hear a cymbal"
q5_output = "0b3637272a2b2e63622c2e69692a23693a2a3c6324202d623d63343c2a26226324272765272a282b2f20430a652e2c652a3124333a653e2b2027630c692b20283165286326302e27282f"

In [None]:
# q5_ans = [set1.repeat_xor(s, "ICE") for s in q5_input[0]]
q5_ans = set1.repeat_xor(bytes(q5_input, "ascii"), "ICE")
print(q5_ans)
print(ba.b2a_hex(q5_ans))
print("It is %s that answer matches desired output" % str(set1.byte_to_str(ba.b2a_hex(q5_ans)) == q5_output))

# 6. Break repeating-key XOR
>It is officially on, now.

>This challenge isn't conceptually hard, but it involves actual error-prone coding. The other challenges in this set are there to bring you up to speed. This one is there to qualify you. If you can do this one, you're probably just fine up to Set 6.

[There's a file here](https://cryptopals.com/static/challenge-data/6.txt). It's been base64'd after being encrypted with repeating-key XOR.

Decrypt it.

Here's how:

1. Let KEYSIZE be the guessed length of the key; try values from 2 to (say) 40.

2. Write a function to compute the edit distance/Hamming distance between two strings. *The Hamming distance is just the number of differing bits.* The distance between:

        this is a test

    and

        wokka wokka!!!

    is 37. *Make sure your code agrees before you proceed.*

3. For each KEYSIZE, take the *first* KEYSIZE worth of bytes, and the *second* KEYSIZE worth of bytes, and find the edit distance between them. Normalize this result by dividing by KEYSIZE.

4. The KEYSIZE with the smallest normalized edit distance is probably the key. You could proceed perhaps with the smallest 2-3 KEYSIZE values. Or take 4 KEYSIZE blocks instead of 2 and average the distances.

5. Now that you probably know the KEYSIZE: break the ciphertext into blocks of KEYSIZE length.

6. Now transpose the blocks: make a block that is the first byte of every block, and a block that is the second byte of every block, and so on.

7. Solve each block as if it was single-character XOR. You already have code to do this.

8. For each block, the single-byte XOR key that produces the best looking histogram is the repeating-key XOR key byte for that block. Put them together and you have the key.

This code is going to turn out to be surprisingly useful later on. Breaking repeating-key XOR ("Vigenere") statistically is obviously an academic exercise, a "Crypto 101" thing. But more people "know how" to break it than can *actually break it*, and a similar technique breaks something much more important. 

In [None]:
q6b_ans = set1.hamming_dist("this is a test", "wokka wokka!!!")
print(q6b_ans)
print("It is %s that answer matches desired output" % str(q6b_ans == 37))

In [None]:
with open("data/q6.txt") as q6_file:
    q6_text = q6_file.read().replace("\n", "")
    
print(q6_text[:32])

In [None]:
q6_ints = list(ba.a2b_base64(q6_text))
print(q6_ints[:10])

In [None]:
q6c_ans = pd.DataFrame({"keysize": range(2, 40)})
q6c_ans = q6c_ans.assign(norm_dist=q6c_ans.keysize.apply(lambda b: set1.norm_keysize(q6_ints, b, 4)))
q6c_ans.nsmallest(5, "norm_dist")
# print("The best fitting key size is %s" % str(int(q6c_ans.nsmallest(1, "norm_dist").keysize)))

In [None]:
q6_chunks = pd.DataFrame({"chunk_num": range(29)})
q6_chunks = q6_chunks.assign(chunk=q6_chunks.chunk_num.apply(lambda x: q6_ints[x::29]))
q6_chunks = q6_chunks.assign(xor_chr=q6_chunks.chunk.apply(
    lambda x: set1.best_eng_onebyte(bytes(x), 1).squeeze().xor_chr
))
q6_key = "".join(q6_chunks.xor_chr)
q6_key

In [None]:
q6_ans = set1.repeat_xor(q6_ints, "Terminator X: Bring the noise")
print(set1.byte_to_str(q6_ans))

# 7. AES in ECB mode

The Base64-encoded content [in this file](https://cryptopals.com/static/challenge-data/7.txt) has been encrypted via AES-128 in ECB mode under the key

    "YELLOW SUBMARINE".

(case-sensitive, without the quotes; exactly 16 characters; I like "YELLOW SUBMARINE" because it's exactly 16 bytes long, and now you do too).

Decrypt it. You know the key, after all.

Easiest way: use OpenSSL::Cipher and give it AES-128-ECB as the cipher. 

In [8]:
with open("data/q7.txt") as q7_file:
    q7_text = q7_file.read()
    
print(q7_text[:180])

CRIwqt4+szDbqkNY+I0qbDe3LQz0wiw0SuxBQtAM5TDdMbjCMD/venUDW9BL
PEXODbk6a48oMbAY6DDZsuLbc0uR9cp9hQ0QQGATyyCESq2NSsvhx5zKlLtz
dsnfK5ED5srKjK7Fz4Q38/ttd+stL/9WnDzlJvAo7WBsjI5YJc2gmAYayN


In [17]:
q7_cipher = Cipher(algorithms.AES("YELLOW SUBMARINE"), modes.ECB backend = default_backend())

In [22]:
q7_decryptor = q7_cipher.encryptor()

TypeError: initializer for ctype 'unsigned char *' must be a bytes or list or tuple, not str

# 8. Detect AES in ECB mode

[In this file](https://cryptopals.com/static/challenge-data/8.txt) are a bunch of hex-encoded ciphertexts.

One of them has been encrypted with ECB.

Detect it.

Remember that the problem with ECB is that it is stateless and deterministic; the same 16 byte plaintext block will always produce the same 16 byte ciphertext.
