# Set 1 - The Cryptopals Crypto Challenges

Cryptopals Rule:
>Always operate on raw bytes, never on encoded strings. Only use hex and base64 for pretty-printing.

## Challenge 1 - Convert hex to base64

In [48]:
import base64

def convert_hex_to_base64(str):
  raw_bytes = bytearray.fromhex(str)
  b64_bytes = base64.b64encode(raw_bytes)

  return b64_bytes

## Challenge 2 - Fixed XOR

>In cryptography, the simple XOR cipher is a type of additive cipher, an encryption algorithm that operates according to the principles:

```
A ^ 0 = A
A ^ A = 0
A ^ B = B ^ A
(A ^ B) ^ C = A ^ (B ^ C)
(B ^ A) ^ A = B ^ 0 = B
```

>where `^` denotes the exclusive disjunction (XOR) operation. This operation is sometimes called modulus 2 addition (or subtraction, which is identical). With this logic, a string of text can be encrypted by applying the bitwise XOR operator to every character using a given key. To decrypt the output, merely reapplying the XOR function with the key will remove the cipher.

*[Source](https://en.wikipedia.org/wiki/XOR_cipher)*

### XOR Cipher Trace Table

| Plaintext | Key | Ciphertext |
| - | - | - |
| 0 | 0 | 0 |
| 0 | 1 | 1 |
| 1 | 0 | 1 |
| 1 | 1 | 0 |

In [2]:
def xor(bytes1, bytes2):
  if len(bytes1) != len(bytes2): return
  return bytes([b1 ^ b2 for b1, b2 in zip(bytes1, bytes2)])

def fixed_xor(str1, str2):
  bytes1 = bytearray.fromhex(str1)
  bytes2 = bytearray.fromhex(str2)

  return xor(bytes1, bytes2)

## Challenge 3 - Single-byte XOR cipher

In [238]:
from string import ascii_lowercase
from collections.abc import Iterable

letter_frequency = { 'e': 12.70, 't': 9.05, 'a': 8.16, 'o': 7.50, 'i': 6.96, 'n': 6.74, 's': 6.32, 'h': 6.09, 'r': 5.98, 'd': 4.25, 'l': 4.02, 'c': 2.78, 'u': 2.75, 'm': 2.40, 'w': 2.36, 'f': 2.22, 'g': 2.01, 'y': 1.97, 'p': 1.92, 'b': 1.49, 'v': 0.97, 'k': 0.77, 'j': 0.15, 'x': 0.15, 'q': 0.09, 'z': 0.07 }
allowed_characters = ['.', ',', '\'', '"', ' ', '!', '?', '-']

def charater_frequency_score(str):
  score = 0
  for c in str:
    if c < 0 or c > 127:
      return -1000
    c = chr(c)
    if not c.isalnum() and c not in allowed_characters:
        score -= 50
    elif c.isalpha():
        score += letter_frequency[c.lower()] * 10

  return score

def xor_byte(bs, i):
    if isinstance(bs, Iterable):
      return bytes(
          [b ^ i for b in bs]
      )
    return bytes(bs ^ i)

def single_byte_xor_cipher(cipher_str):
  if isinstance(cipher_str, str): cipher_str = bytearray.fromhex(cipher_str)
  char_scores = []

  for n in range(0, 256):
    xored_bytes = xor_byte(cipher_str, n)
    score = charater_frequency_score(xored_bytes)
    char_scores.append((n, score))

  return sorted(char_scores, key = lambda t: t[1], reverse=True)

cipher = "1b37373331363f78151b7f2b783431333d78397828372d363c78373e783a393b3736"
xor_byte(bytearray.fromhex(cipher), 88)

b"Cooking MC's like a pound of bacon"

## Challenge 4 - Detect single-character XOR

In [132]:
def detect_single_character_xor():
  file = open('4.txt', 'r')
  lines = file.readlines()
  highest_score = (None, None, -1)
  for line in lines:
      score_tuple = single_byte_xor_cipher(line)[0]
      if (highest_score[2] < score_tuple[1]): highest_score = (line, score_tuple[0], score_tuple[1])

  return highest_score

# cipher = "7b5a4215415d544115415d5015455447414c155c46155f4058455c5b523f"
# xor_byte(bytearray.fromhex(cipher), 53)

('7b5a4215415d544115415d5015455447414c155c46155f4058455c5b523f\n', 53, 1350.8)

## Challenge 5 - Implement repeating-key XOR

In [246]:
import binascii

def encode_repeating_key_xor(s, key):
    xored = [s[i] ^ key[i % len(key)] for i in range(len(s))]
    return bytes(xored)

def repeating_key_xor(s, key):
  if isinstance(s, str): s = bytes(s, "utf-8")
  if isinstance(key, str): key = bytes(key, "utf-8")
  cipher = []

  for i in range(len(s)):
    b = s[i]
    c = key[i % len(key)]
    cipher.append(b ^ c)

  pp = binascii.hexlify(bytes(cipher)).decode('ascii')
  # print(pp)
  return pp

# cipher_array = repeating_key_xor("Burning 'em, if you ain't quick and nimble", "ICE")
# binascii.hexlify(cipher_array).decode('ascii')

## Challenge 6 - Break repeating-key XOR

In [248]:
def hamming_distance(bytes1, bytes2):
  distance = 0
  for b1, b2 in zip(bytes1, bytes2):
    diff = bin(b1 ^ b2)
    count = diff.count('1')
    distance += count

  return distance

assert hamming_distance(b'''this is a test''', b'''wokka wokka!!!''') == 37

def to_blocks(lst, n):
  for i in range(0, len(lst), n):
      yield lst[i:i + n]

def break_repeating_key_xor():
  # KEYSIZE = 2 # 2–40
  file = open('6.txt', 'r')
  decoded = base64.b64decode(file.read())
  distances = []
  for KEYSIZE in range(2, 40):
    first = decoded[0:KEYSIZE]
    second = decoded[KEYSIZE:KEYSIZE*2]
    distance = hamming_distance(first, second) / KEYSIZE
    distances.append((distance, KEYSIZE))
  
  plaintext = []
  distances.sort()
  for i in range(0, 3):
    key = b''
    KEYSIZE = distances[i][1]
    blocks = to_blocks(decoded, KEYSIZE)
    block = b''
    for b in range(i, len(decoded), KEYSIZE):
      block += bytes([decoded[b]])
    key += bytes([single_byte_xor_cipher(block)[0][0]])
    plaintext.append((repeating_key_xor(decoded, key), key))

  return plaintext

print(break_repeating_key_xor())

[('742b762462666b76267a275573000c76207567277a68626e2772687f2c5f697768200d493d74742a5a3a270c3b6f692e3d75642c246e6d653a7b5565776120736078267d2548732e42696c742e30786d60690d4f6e3a695e7f6f65737e2874683b3d593669497a636c2e267b21612c270c577f60512c6f6c61732f6e2676301117030b5f6574662864216f3c737269742b1d6d7768207360786b3b13162069215369737a2073262c216674643a6d53683b706862287a6f692558363a0b7c6f6e606e3d627e287d7f20105a5c6272686c662f6e2674271127214e3b6d6e652c312161286926493d611d627470206b69677f35693b59000c76206b6b3d6968626e276b793a684f797c246b6e6b762672271159005f3b6368603d6f6e603a276b793a6152796f6c2066667926526953362e4275200d5a263d6b793a73266c7f781d656f24666b676a2a3b255427694662206461277e647c3d742667752c3741622470687b6e633c3a1127260b6f68622e3a746569697e636c7665532b37244768284b6775205d3f280b5c6f262e43175261266872683a2b5e6d6e7765277c75676f6e42733d437e20706f303d482c3e6e6a6c3a6e582c11456e632874603b305e26694f746e207a697a687a2c2767207e6d50623724746f6d7326111e592a69527475277d3d7c7365272026616e2c50693b0e536

In [202]:
def run_tests():
  print(f'Set 1 Challenge 1 passing')
  # https://docs.python.org/3/library/stdtypes.html#bytes.decode
  assert convert_hex_to_base64("49276d206b696c6c696e6720796f757220627261696e206c696b65206120706f69736f6e6f7573206d757368726f6f6d").decode() == "SSdtIGtpbGxpbmcgeW91ciBicmFpbiBsaWtlIGEgcG9pc29ub3VzIG11c2hyb29t"
  print(f'Set 1 Challenge 2 passing')
  assert fixed_xor("1c0111001f010100061a024b53535009181c", "686974207468652062756c6c277320657965").hex() == "746865206b696420646f6e277420706c6179"
  print(f'Set 1 Challenge 3 passing')
  # print(single_byte_xor_cipher("1b37373331363f78151b7f2b783431333d78397828372d363c78373e783a393b3736")[0])
  print(f'Set 1 Challenge 4 passing')
  # print(detect_single_character_xor())
  print(f'Set 1 Challenge 5 passing')
  input5 = """Burning 'em, if you ain't quick and nimble
I go crazy when I hear a cymbal"""
  output5 = """0b3637272a2b2e63622c2e69692a23693a2a3c6324202d623d63343c2a26226324272765272a282b2f20430a652e2c652a3124333a653e2b2027630c692b20283165286326302e27282f"""
  assert repeating_key_xor(input5, "ICE") == output5 

run_tests()

Set 1 Challenge 1 passing
Set 1 Challenge 2 passing
Set 1 Challenge 3 passing
Set 1 Challenge 4 passing
Set 1 Challenge 5 passing
