# MD5 algorithm

In [136]:
import numpy as np
import binascii


In [137]:
LENGTH_BITS = 64
BLOCK_SIZE = 512

In [138]:
def str_to_bits(string: str) -> np.ndarray:
    bytes_array = bytes(string, 'utf-8')
    array = np.frombuffer(bytes_array, dtype=np.uint8)
    bits_array = np.unpackbits(array)
    return bits_array

def bits_to_str(bits: np.ndarray) -> str:
    bits_array = np.packbits(bits)
    return bits_array.tobytes().decode('utf-8')


def int_to_x_bits(number: int, bits) -> np.ndarray:
    binary_string = np.binary_repr(number, width=bits)
    return np.array(list(binary_string), dtype=np.uint8)

def int_to_64_bits(number: int) -> np.ndarray:
    return int_to_x_bits(number, 64)

def int_to_32_bits(number: int) -> np.ndarray:
    return int_to_x_bits(number, 32)

def bits_to_int(bits_array):
    binary_string = ''.join(bits_array.astype(str))
    return int(binary_string, 2)

def binary_negation(bits: np.ndarray) -> np.ndarray:
    return np.vectorize(lambda x: (x + 1) % 2)(bits)



In [139]:
def add_padding(bits: np.ndarray) -> np.ndarray:
    bits_len = len(bits)
    padding_len = BLOCK_SIZE - (bits_len + LENGTH_BITS + 1) % BLOCK_SIZE
    one_padding = np.ones(1)
    padding = np.zeros(padding_len)
    bits_with_one_padding = np.append(bits, one_padding)
    bits_with_padding = np.append(bits_with_one_padding, padding)
    return bits_with_padding

def add_length_bits(bits: np.ndarray, bits_len: int) -> np.ndarray:
    length_bits = int_to_64_bits(bits_len)
    little_endian_length_bits = make_little_endian(length_bits)
    bits_with_length = np.append(bits, little_endian_length_bits)
    return bits_with_length

def split_x_blocks(bits: np.ndarray, size: int) -> np.ndarray:
    assert bits.size % size == 0
    num_blocks = bits.size // size
    blocks = np.array_split(bits, num_blocks)
    return np.array(blocks, dtype=np.uint8)

def split_512_blocks(bits: np.ndarray) -> np.ndarray:
    return split_x_blocks(bits, 512)

def split_32_blocks(bits: np.ndarray) -> np.ndarray:
    return split_x_blocks(bits, 32)

def rotate_x_parts(bits: np.ndarray, x: int) -> np.ndarray:
    split_arr = np.array_split(bits, x)
    reversed_arr = np.flip(split_arr, 0)
    return np.concatenate(reversed_arr)

def rotate_4_parts(bits: np.ndarray) -> np.ndarray:
    return rotate_x_parts(bits, 4)

def rotate_2_parts(bits: np.ndarray) -> np.ndarray:
    return rotate_x_parts(bits, 2)

def make_little_endian(bits: np.ndarray) -> np.ndarray:
    assert bits.size % 8 == 0
    split_arr = np.array_split(bits, bits.size // 8)
    reversed_arr = np.flip(split_arr, 0)
    return np.concatenate(reversed_arr)


In [140]:
bits = str_to_bits("plaintext")
bits = add_padding(bits)
bits = add_length_bits(bits, len("plaintext"))
print(bits.size)

512


In [141]:
def F(K, L, M) -> np.ndarray:
    return (K & L) | (binary_negation(K) & M)

def G(K, L, M) -> np.ndarray:
    return (M & K) | (binary_negation(M) & L)

def H(K, L, M) -> np.ndarray:
    return  K ^ L ^ M

def I(K, L, M) -> np.ndarray:
    return L ^ (K | binary_negation(M))


In [142]:
# SHIFTS
shift = [
    7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22,
    5,  9, 14, 20, 5,  9, 14, 20, 5,  9, 14, 20, 5,  9, 14, 20, 
    4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23,
    6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21
]

def left_rotate(arr, rotations):
    rotations = rotations % arr.size
    return np.roll(arr, -rotations)

def add(a_bits, b_bits):
    a = bits_to_int(a_bits)
    b = bits_to_int(b_bits)
    res = (a + b)
    return int_to_32_bits(res)

def add_modulo(a_bits, b_bits):
    a = bits_to_int(a_bits)
    b = bits_to_int(b_bits)
    res = (a + b) & 0xFFFFFFFF
    return int_to_32_bits(res)

K = [ 
    0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee,
    0xf57c0faf, 0x4787c62a, 0xa8304613, 0xfd469501,
    0x698098d8, 0x8b44f7af, 0xffff5bb1, 0x895cd7be,
    0x6b901122, 0xfd987193, 0xa679438e, 0x49b40821,
    0xf61e2562, 0xc040b340, 0x265e5a51, 0xe9b6c7aa,
    0xd62f105d, 0x02441453, 0xd8a1e681, 0xe7d3fbc8,
    0x21e1cde6, 0xc33707d6, 0xf4d50d87, 0x455a14ed,
    0xa9e3e905, 0xfcefa3f8, 0x676f02d9, 0x8d2a4c8a,
    0xfffa3942, 0x8771f681, 0x6d9d6122, 0xfde5380c,
    0xa4beea44, 0x4bdecfa9, 0xf6bb4b60, 0xbebfbc70,
    0x289b7ec6, 0xeaa127fa, 0xd4ef3085, 0x04881d05,
    0xd9d4d039, 0xe6db99e5, 0x1fa27cf8, 0xc4ac5665,
    0xf4292244, 0x432aff97, 0xab9423a7, 0xfc93a039,
    0x655b59c3, 0x8f0ccc92, 0xffeff47d, 0x85845dd1,
    0x6fa87e4f, 0xfe2ce6e0, 0xa3014314, 0x4e0811a1,
    0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 
]

In [151]:
def md5(data):
    bits = str_to_bits(data)
    bits_len = len(bits)
    bits = add_padding(bits)
    bits = add_length_bits(bits, bits_len)
    blocks = split_512_blocks(bits)

    a0 = int_to_32_bits(0x67452301)
    b0 = int_to_32_bits(0xefcdab89)
    c0 = int_to_32_bits(0x98badcfe)
    d0 = int_to_32_bits(0x10325476)
    var = (a0, b0, c0, d0)

    for block in blocks:
        var = process_block(block, var)


    bits = np.concatenate(var)
    bits = np.ascontiguousarray(bits)
    hash_bytes = np.packbits(bits)
    hash_bytes = np.flip(hash_bytes).copy(order='C')
    return binascii.hexlify(hash_bytes).decode()



def process_block(block_512, var):
    A, B, C, D = var
    a0, b0, c0, d0 = A, B, C, D
    M = split_32_blocks(block_512)
    E, g = 0, 0
    
    for i in range(64):
        if i < 16:
            E = F(B, C, D)
            g = i
        elif i < 32:
            E = G(B, C, D)
            g = (5*i + 1) % 16
        elif i < 48:
            E = H(B, C, D)
            g = (3*i + 5) % 16
        elif i < 64:
            E = I(B, C, D)
            g = (7*i) % 16
            
        temp = D
        D = C
        C = B
        A = add_modulo(A, E)
        A = add_modulo(A, int_to_32_bits(K[i]))
        A = add_modulo(A, make_little_endian(M[g]))
            
        A = left_rotate(A, shift[i])
        B = add_modulo(B, A)
        A = temp
    
    a0 = add_modulo(a0, A)
    b0 = add_modulo(b0, B)
    c0 = add_modulo(c0, C)
    d0 = add_modulo(d0, D)


    return (d0, c0, b0, a0)
    

md5("") #  = d41d8cd98f00b204e9800998ecf8427e

'900150983cd24fb0d6963f7d28e17f72'

In [152]:
assert md5("") == "d41d8cd98f00b204e9800998ecf8427e"
assert md5("abc") == "900150983cd24fb0d6963f7d28e17f72"
assert md5("password") == "5f4dcc3b5aa765d61d8327deb882cf99"
assert md5("verrryyylooooooooooooong") == "1ac88bd0b7231d80ade9f7f83df9c2ee"
assert md5("DiFr33333N!") == "11b763ea85628ab6ce6eed4eedb2f7d3"
