In [178]:
import numpy as np
import base64

In [259]:
with open('../../data/repeating_xor_cipher.txt', 'r') as file:
    data = file.read().replace('\n', '')
key_sizes = [i for i in range(2, 41)]
data = base64.b64decode(data).hex()

In [262]:
def get_bits(str, encoding='utf-8'):
    bytes = str.encode(encoding)
    res = []
    
    for b in bytes:
        for i in range(8):
            res.append((b >> i) & 1)
    return np.array(res)

In [263]:
def calculate_hamming_distance(left, right, encoding='utf-8'):
    hamming_weight_left = get_bits(left, encoding)
    hamming_weight_right = get_bits(right, encoding)
    
    return np.sum(np.logical_xor(hamming_weight_left, hamming_weight_right))

In [264]:
def find_keys(cipher):
    bs = bytes.fromhex(cipher)
    e = ord('e')
    keys = bytes([b ^ e for b in bs])
    return keys

In [265]:
def decode(keys, cipher):
    bs = bytes.fromhex(cipher)
    for key in keys:
        try:
            xored_plain = bytes([key ^ b for b in bs])
            return xored_plain.decode('ascii', errors='strict'), chr(key)
        except UnicodeDecodeError:
            pass

In [266]:
def get_diff_len(arr):
    lengths = []
    for str in arr:
        if len(str) not in lengths:
            lengths.append(len(str))
    return lengths

In [306]:
def num_occ(bstr):
    symbol_counts = {}

    # Count occurrences of each symbol
    for byte in bstr:
        byte = chr(byte)
        if byte in symbol_counts:
            symbol_counts[byte] += 1
        else:
            symbol_counts[byte] = 1

    sorted_counts = {k: v for k, v in sorted(symbol_counts.items(), key=lambda item: item[1], reverse=True)}
    
    return sorted_counts

In [267]:
key_distances = []

for key_size in key_sizes:
    key_distances.append((key_size,
        calculate_hamming_distance(
            data[:key_size], 
            data[key_size : 2 * key_size]
        ) / key_size)
    )
check_key_length = sorted(key_distances, key=lambda x: x[1])[:10]

In [245]:
check_key_length

[(3, 2.3333333333333335),
 (2, 2.5),
 (7, 2.5714285714285716),
 (20, 2.65),
 (16, 2.6875),
 (28, 2.8214285714285716),
 (6, 2.8333333333333335),
 (29, 2.8620689655172415),
 (24, 2.875),
 (15, 2.933333333333333)]

In [325]:
length = 2
splits = [list(data[i : i + length]) for i in range(0, len(data), length)]
filtered = np.array([block for block in splits if len(block) == length]).T
strings = [''.join(str) for str in filtered]

for i in range(len(strings)):
    for j in range(len(strings)):
        res = decode(find_keys(strings[j]), strings[i])
        if res is not None:
            print(f'###########{res[1]}')
            print(res[0])

###########q
eeqp02g0uaq0p2pu eD7!u u1q Aa0p03eQp4aqtrapea$Dsau015aEqp10&puut`p6puaa&uA`051a!A q0q2$qq!e`6Bpquue3!q4113!`1qdtG1tqq1ds`1qee!q!q%p!@q1p2r%P7E!depfq1qu%Esuq0p 1e#uaat&5q4aqapr43uQeD7A5VA1a%Apu`qsdP0t#4qtr6Qua$Esa0pp5uaA%ap61@ed`u5q2tuaqD7!p 3qqpA q1`2c`pquu1qbt1q`eeqpe0uq@ep03%Q0quep0stq1a$D'`1413!A!qp035Dau!t1p24ap'T3$5tA5aeAea4`2!P`u qdfpup!e@#!pqqqquQ`0qq2c`35a5ats1eqeeDsua u1q'5%@4vPuu uu`r6Qeae#e3pq5p @aeq`2$Dd`uqqrppaatD3aq1eS1!Eaqq02dPeeuuupf1q e@7ae0aqq!Aeqd`&c215aqAqr45A`e7Aq0uRqq !upq2ap5a4`1vpq7@qT'a10e ae eqdqs4P7T`5q0bd0a`e@3ae a1eP!p0t"1@15aue6q1uaqD#`upu!qeeap42eQp3Qu54R0eQe%D3aeqppaaAuqd"%Qa4`quart3qee3 ppewBQqpt!@e4`1Utrpu1`aD3!00qwQqQaap165D`uaaqp21quaU3u`10 qeAeq`1"1Qaq'EqpvPup eD7!qqppqqE`uq`6cpe t0p2`qe`aD3a50qsap`uq2aPutaeu1cpua e@3auRqpp`A q0p#!Qqee4qdr`0qeEr`04q0q!UquBD"4T7qdq4b0puaeq`q4Q!7P@qq0d"!Paaadq4sqq7Qd@'a0 `15aau20r4Dqda1q`4Apqau@q!10q50a@!qtt2%@u`4aar0uqa%Dq q ppqu"`1pqr$Tp3Q4adr`1q!e@3 p$q%4

In [322]:
splits

[['1', 'd'],
 ['4', '2'],
 ['1', 'f'],
 ['4', 'd'],
 ['0', 'b'],
 ['0', 'f'],
 ['0', '2'],
 ['1', 'f'],
 ['4', 'f'],
 ['1', '3'],
 ['4', 'e'],
 ['3', 'c'],
 ['1', 'a'],
 ['6', '9'],
 ['6', '5'],
 ['1', 'f'],
 ['4', '9'],
 ['1', 'c'],
 ['0', 'e'],
 ['4', 'e'],
 ['1', '3'],
 ['0', '1'],
 ['0', 'b'],
 ['0', '7'],
 ['4', 'e'],
 ['1', 'b'],
 ['0', '1'],
 ['1', '6'],
 ['4', '5'],
 ['3', '6'],
 ['0', '0'],
 ['1', 'e'],
 ['0', '1'],
 ['4', '9'],
 ['6', '4'],
 ['2', '0'],
 ['5', '4'],
 ['1', 'd'],
 ['1', 'd'],
 ['4', '3'],
 ['3', '3'],
 ['5', '3'],
 ['4', 'e'],
 ['6', '5'],
 ['5', '2'],
 ['0', '6'],
 ['0', '0'],
 ['4', '7'],
 ['5', '4'],
 ['1', 'c'],
 ['0', 'd'],
 ['4', '5'],
 ['4', 'd'],
 ['0', '7'],
 ['0', '4'],
 ['0', 'c'],
 ['5', '3'],
 ['1', '2'],
 ['3', 'c'],
 ['0', 'c'],
 ['1', 'e'],
 ['0', '8'],
 ['4', '9'],
 ['1', 'a'],
 ['0', '9'],
 ['1', '1'],
 ['4', 'f'],
 ['1', '4'],
 ['4', 'c'],
 ['2', '1'],
 ['1', 'a'],
 ['4', '7'],
 ['2', 'b'],
 ['0', '0'],
 ['0', '5'],
 ['1', 'd'],
 ['4', '7'],

In [None]:
filtered