In [577]:
# First set of the cryptopals challenges:
# https://cryptopals.com/sets/1

In [578]:
# Convert hex to base 64
import base64

ts = '49276d206b696c6c696e6720796f757220627261696e206c696b65206120706f69736f6e6f7573206d757368726f6f6d'


def hex_to_base64(s):
    '''
    convert hex to base64
    input: hex string
    output: base64 string
    '''
    try:
        decode = bytes.fromhex(s)
        b64 = base64.b64encode(decode)

        return b64.decode()
    except Exception as e:
        return 'not a valid strring', e

# check if true:
h = 'SSdtIGtpbGxpbmcgeW91ciBicmFpbiBsaWtlIGEgcG9pc29ub3VzIG11c2hyb29t'
hex_to_base64(ts) == h

True

In [579]:
# Explanation:
#https://www.codechef.com/PRJRF14/problems/XORSN

# XOR is Exlusive Or. 

ts1 = '1c0111001f010100061a024b53535009181c'
ts2 = '686974207468652062756c6c277320657965'

import codecs

def fixed_XOR(s1, s2):
    '''
    input: two equal length buffers
    output: XOR combination.
    '''

    #check if two strings are of equal length:
    if len(s1) == len(s2):

        # convert strings to int
        s1 = bytes.fromhex(s1)
        s2 = bytes.fromhex(s2)

        # xor both strings as hex numbers
        xord = [x^y for x,y in zip(s1, s2)]
        sent = bytes(xord)

        xord = codecs.encode(sent, 'hex')
        return xord.decode("utf-8")        

    else:
        return "Strings are not of equal length"


h = '746865206b696420646f6e277420706c6179'

# check if true: 
fixed_XOR(ts1, ts2) == h

True

In [580]:
# single byte XOR cipher

# the hex encoded string:
ts1 = '1b37373331363f78151b7f2b783431333d78397828372d363c78373e783a393b3736'

# http://en.algoritmy.net/article/40379/Letter-frequency-English
freqs = {
'A' : 0.0834,
'B' : 0.0154,
'C' : 0.0273,
'D' : 0.0414,
'E' : 0.1260,
'F' : 0.0203,
'G' : 0.0192,
'H' : 0.0611,
'I' : 0.0671,
'J' : 0.0023,
'K' : 0.0087,
'L' : 0.0424,
'M' : 0.0253,
'N' : 0.0680,
'O' : 0.0770,
'P' : 0.0166,
'Q' : 0.0009,
'R' : 0.0568,
'S' : 0.0611,
'T' : 0.0937,
'U' : 0.0285,
'V' : 0.0106,
'W' : 0.0234,
'X' : 0.0020,
'Y' : 0.0204,
'Z' : 0.0006,
}


# Evaluate each output and chose the one with the best score. 
# done using Bhattacharyya distance
# https://en.wikipedia.org/wiki/Bhattacharyya_distance

def bhatt_dist(s):
    '''
    input: sentence
    output: Bhattacharyya distance of a sentence. Value is between 0 and 1, where 0 is not English and 1 is English.
    '''

    total_chars = len(s)
    count = Counter(s)  

    coff = np.sum(np.sqrt(freqs.get(char, 0) * y/total_chars) for char, y in count.items())
    return coff

# iterate over each of the 256 bytes in combination with the string, and see which byte performs best.
def decode_hex(s):
    '''
    input: hex encoded string
    output: best score and sentence combination
    '''
    s1 = bytes.fromhex(s)
    final_score = 0
    final_sent = ''
    for i in range(256):
        keys = [i] * len(s)

        xord  = [ x ^ y for x,y in zip(s1, keys)]
        sent = bytes(xord)#.decode("utf-8")

        sent = sent.decode("utf-8", errors='ignore')
       


        if bhatt_dist(sent) > final_score:
            final_score = bhatt_dist(sent)
            final_sent = sent

    return final_score, final_sent


decode_hex(ts1)

(0.6617030935065874, 'cOOKING\x00mc\x07S\x00LIKE\x00A\x00POUND\x00OF\x00BACON')

In [581]:
import pandas as pd

# One of the 60-character strings in this file[4.txt] has been encrypted by single-character XOR.
data = pd.read_fwf('4.txt', header=None)

data['score'] = data[0].apply(lambda x: decode_hex(x)[0])
data['sent'] = data[0].apply(lambda x: decode_hex(x)[1])


winner = data.iloc[data['score'].argmax()]
print(winner)

0        7b5a4215415d544115415d5015455447414c155c46155f...
score                                             0.749954
sent                        nOW THAT THE PARTY IS JUMPING*
Name: 170, dtype: object


In [584]:
lines = ["Burning 'em, if you ain't quick and nimble\n", "I go crazy when I hear a cymbal"]
sent = str("".join(lines))


# Encrypt it, under the key "ICE", using repeating-key XOR.
key = "ICE"
# In repeating-key XOR, you'll sequentially apply each byte of the key; the first byte of plaintext will be XOR'd against I, the next C, the next E, then I again for the 4th byte, and so on.
rep_key = key * len(sent)


def repeat_key_XOR(s, keystring):
    '''
    input: sentence as string and a keystring 
    output: encrypted sentence
    '''
    s = bytearray(s, 'utf-8')
    
    keystring = bytes(keystring, 'utf-8')
    sent = bytes([x^y for x,y in zip(s, keystring)])


    return sent.hex()


# It should come out to:
result = "0b3637272a2b2e63622c2e69692a23693a2a3c6324202d623d63343c2a26226324272765272a282b2f20430a652e2c652a3124333a653e2b2027630c692b20283165286326302e27282f"

# check if code works: 
repeat_key_XOR(sent, rep_key) == result

True

In [583]:
# There's a file[6.txt] here. It's been base64'd after being encrypted with repeating-key XOR.
# Decrypt it. Here's how:

# 1. let KEYSIZE be the length of the key; try values from 2 to (say) 40
"""for key in range(2, 40):
    KEYSIZE = len(key)"""

# 2. Write a function to compute the edit distance / Hamming distance between two strings. (hamming distance is the number of differing bits)
# The distance between

d_test1 = 'this is a test'
# and 
d_test2 = 'wokka wokka!'
# is 37. Make sure this is correct before you proceed. 
