In [20]:
from pathlib import Path
import itertools

In [4]:
list(Path('test').glob('*.txt'))

[WindowsPath('test/american-english-huge.big.txt'),
 WindowsPath('test/american-english-insane.big.txt'),
 WindowsPath('test/american-english-large.big.txt'),
 WindowsPath('test/american-english-small.big.txt'),
 WindowsPath('test/american-english.big.txt'),
 WindowsPath('test/british-english-huge.big.txt'),
 WindowsPath('test/british-english-insane.big.txt'),
 WindowsPath('test/british-english-large.big.txt'),
 WindowsPath('test/british-english-small.big.txt'),
 WindowsPath('test/british-english.big.txt'),
 WindowsPath('test/canadian-english-huge.big.txt'),
 WindowsPath('test/canadian-english-insane.big.txt'),
 WindowsPath('test/canadian-english-large.big.txt'),
 WindowsPath('test/canadian-english-small.big.txt'),
 WindowsPath('test/canadian-english.big.txt'),
 WindowsPath('test/words_en.txt'),
 WindowsPath('test/words_ms.txt')]

In [5]:
byteset = set(b'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\v\f')
charset = set('0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\v\f')
len(charset)

100

In [6]:
words = set()
with Path('test/words_en.txt').open() as f:
    for line in f:
        word = line.strip().lower()
        if len(word) > 2 and set(word).issubset(charset):
            words.add(line.strip().lower())
len(words)

41431

In [7]:
from find_replace import Trie

In [8]:
trie = Trie.fromkeys(words, case_sensitive=False)

In [9]:
len(trie)

41431

In [10]:
list(trie.find_iter('thequickfbrownsfoxes'))

[<Match object; span=(0, 3), match='the'>,
 <Match object; span=(3, 8), match='quick'>,
 <Match object; span=(9, 15), match='browns'>,
 <Match object; span=(15, 20), match='foxes'>]

In [9]:
trie.findAll('thequickfbrownsfoxes')

['the', 'quick', 'browns', 'foxes']

In [10]:
'rounds' in trie

True

In [11]:
trie.find_longest('togethere')

['together']

In [12]:
for _ in range(1000):
    trie.find_longest('Braised lamb shanks with rosemary and port wine sauce | braising cooking method')

In [13]:
for _ in range(1000):
    sum(map(len, trie.findall('Braised lamb shanks with rosemary and port wine sauce | braising cooking method'*20)))

In [14]:
for _ in range(1000):
    c = 0
    for m in trie.finditer('Braised lamb shanks with rosemary and port wine sauce | braising cooking method'*20):
        c += m.end() - m.start()

In [15]:
400/60

6.666666666666667

In [9]:
len('Braised lamb shanks with rosemary and port wine sauce | braising cooking method'*2)

158

In [11]:
def xor(text, key):
    if isinstance(text, str):
        text = text.encode('ascii')
    if isinstance(key, str):
        key = key.encode('latin1')
    
    text = bytearray(text)
    
    for i, char in enumerate(text):
        text[i] = char ^ key[i % len(key)]
    
    return text

In [15]:
import itertools

In [17]:
def xor_detect(text, key_length, charset=byteset):
    out = [set(range(256)) for _ in range(key_length)]
    for i, char_1 in enumerate(text):
        possible = set()
        for char_2 in charset:
            possible.add(char_1 ^ char_2)
        out[i % key_length].intersection_update(possible)
    return list(itertools.product(*out))
    return [[bytes([key]).decode('latin1') for key in possible_keys] for possible_keys in out]

In [18]:
encoded = xor('The multi-armed bandit setting is the cleanest paradigm to capture the tension between exploring information about the underlying system and exploiting the most profitable actions based on the current information. A decision-maker (or learner ) repeatedly selects among a set of k actions also referred to as arms, earns the reward of the selected arm, and obtains feedback only about it.', 'TEST')
len(encoded)

388

In [21]:
keys = xor_detect(encoded, 4)
len(keys)

638894

In [22]:
test_str = encoded[:65]
results = []
for key in keys[:10000]:
    decoded = xor(test_str, key).decode('ascii')
    score = sum(map(len, trie.findall(decoded)))
    results.append(score)

* tweaking the numbers
* 65 chars allows processing at a rate of about 10k per second
* meaning that 600k takes about 1 min, or 2.5 m is about 4 mins

In [23]:
# test_str = encoded[:65]
# results = []
# for key in keys:
#     decoded = xor(test_str, key).decode('ascii')
#     score = sum(map(len, trie.findall(decoded)))
#     results.append(score)

In [24]:
out = sorted(zip(results, keys), reverse=True)
[(x, bytes(y).decode('latin1'), xor(test_str, y)) for x,y in out[:10]]

[(9,
  'A\tRW',
  bytearray(b'A$d#x9mw|a`qx)e#w-og|8!pp8uj{+!jflukplbop-off8!st>`g|+l#a#!`t<uvg')),
 (7,
  'A\n\x7fT',
  bytearray(b"A\'I x:@t|bMrx*H w.Bd|;\x0csp;Xi{(\x0cifoXhpoOlp.Bef;\x0cpt=Md|(A a \x0cct?Xug")),
 (7,
  'A\nRP',
  bytearray(b"A\'d$x:mp|b`vx*e$w.o`|;!wp;um{(!mfoulpobhp.oaf;!tt=``|(l$a !gt?uqg")),
 (7,
  'A\nFQ',
  bytearray(b"A\'p%x:yq|btwx*q%w.{a|;5vp;al{(5lfoampovip.{`f;5ut=ta|(x%a 5ft?apg")),
 (7,
  'A\tQU',
  bytearray(b'A$g!x9nu|acsx)f!w-le|8"rp8vh{+"hflviplamp-ldf8"qt>ce|+o!a#"bt<vtg')),
 (7,
  'A\tFQ',
  bytearray(b'A$p%x9yq|atwx)q%w-{a|85vp8al{+5lflamplvip-{`f85ut>ta|+x%a#5ft<apg')),
 (7,
  'A\x06QU',
  bytearray(b'A+g!x6nu|ncsx&f!w"le|7"rp7vh{$"hfcvipcamp"ldf7"qt1ce|$o!a,"bt3vtg')),
 (7,
  'A\x06FQ',
  bytearray(b'A+p%x6yq|ntwx&q%w"{a|75vp7al{$5lfcampcvip"{`f75ut1ta|$x%a,5ft3apg')),
 (7,
  'A\x04S]',
  bytearray(b'A)e)x4l}|la{x$d)w nm|5 zp5t`{& `fatapacep nlf5 yt3am|&m)a. jt1t|g')),
 (7,
  'A\x04SS',
  bytearray(b"A)e\'x4ls|laux$d\'w nc|5 tp5tn{& nfatopackp 

In [25]:
encoded = xor('Learn to braised lamb shanks the easy way and serve them with a delicious rosemary and port wine sauce. in tis video we will explain what is the braising cooking method and how to use this cooking technique at home to make lots of delicious recipes.', 'TEST')
len(encoded)

249

In [26]:
keys = xor_detect(encoded, 4)
len(keys)

1988028

In [27]:
encoded = xor('Braised lamb shanks with rosemary and port wine sauce | braising cooking method', 'TEST')
len(encoded)

79

In [28]:
keys = xor_detect(encoded, 4)
len(keys)

5362560

not too bad, 5m keys should take about 10 mins to brute-force

In [29]:
keys.index(tuple('TEST'.encode('latin1')))

2975323

In [30]:
test_str = encoded[:65]
results = [0] * len(keys)
for i, key in list(enumerate(keys))[2950000:3000000]:
    decoded = xor(test_str, key).decode('ascii')
    score = sum(map(len, trie.findall(decoded)))
    results[i] = score
    if (i + 1) % 100000 == 0:
        score, key = sorted(zip(results, keys), reverse=True)[0]
        print(f'[{i + 1}/{len(keys)}] best key so far: {bytes(key).decode("latin1")} ({score}/{len(test_str)})')

[3000000/5362560] best key so far: TEST (47/65)


In [48]:
out = sorted(zip(results, keys), reverse=True)
out2 = [(x, bytes(y).decode('latin1'), xor(test_str, y)) for x,y in out[:100]]
out2[:10]

[(47,
  'TEST',
  bytearray(b'Braised lamb shanks with rosemary and port wine sauce | braising ')),
 (29,
  'TESS',
  bytearray(b"Bransed\'lame shfnks\'wito rotemauy aid phrt pine\'saude |\'bransin` ")),
 (25,
  'TESP',
  bytearray(b'Bramsed$lamf shenks$witl rowemavy ajd pkrt sine$sauge |$bramsinc ')),
 (22,
  'TESZ',
  bytearray(b'Bragsed.laml shonks.witf ro}ema|y a`d part yine.saume |.bragsini ')),
 (22,
  'TESU',
  bytearray(b'Brahsed!lamc sh`nks!witi roremasy aod pnrt vine!saube |!brahsinf ')),
 (22,
  'TEET',
  bytearray(b'Brwiser la{b s~anke wibh rysemwry wnd fort6wins sacce j brwisixg ')),
 (20,
  'TESO',
  bytearray(b'Brarsed;lamy shznks;wits rohemaiy aud ptrt line;sauxe |;brarsin| ')),
 (20,
  'TES@',
  bytearray(b'Bra}sed4lamv shunks4wit| rogemafy azd p{rt cine4sauwe |4bra}sins ')),
 (20,
  'T@SS',
  bytearray(b"Bwans`d\'ldme vhfnns\'wlto wotehauy%aid%phrq pike\'sdude%|\'bwansln` ")),
 (19,
  'TES~',
  bytearray(b'BraCsed\nlamH shKnks\nwitB roYemaXy aDd pErt ]ine\nsauIe |\nbr

In [31]:
def test(key):
    decoded = xor(test_str, key).decode('ascii')
    return sum(map(len, trie.findall(decoded)))

In [37]:
keys2 = keys[2900000:3000000]

In [38]:
res2 = list(map(test, keys2))

In [35]:
from multiprocessing.pool import ThreadPool

In [39]:
pool = ThreadPool(10)
res3 = pool.map(test, keys2)

In [50]:
res2==res3

True

In [45]:
text_chars = set(b'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')
def num_chars(text):
    count = 0
    for char in text:
        if char in text_chars:
            count += 1
    return count

In [57]:
counts = [num_chars(xor(test_str, key)) for key in keys2]

In [58]:
max(counts)

54

In [59]:
out = sorted(zip(res2, keys2,counts), reverse=True)
out2 = [(x, z, bytes(y).decode('latin1'), xor(test_str, y)) for x,y,z in out[:100]]
out2[:10]

[(47,
  53,
  'TEST',
  bytearray(b'Braised lamb shanks with rosemary and port wine sauce | braising ')),
 (29,
  52,
  'TESS',
  bytearray(b"Bransed\'lame shfnks\'wito rotemauy aid phrt pine\'saude |\'bransin` ")),
 (25,
  53,
  'TESP',
  bytearray(b'Bramsed$lamf shenks$witl rowemavy ajd pkrt sine$sauge |$bramsinc ')),
 (22,
  50,
  'TESZ',
  bytearray(b'Bragsed.laml shonks.witf ro}ema|y a`d part yine.saume |.bragsini ')),
 (22,
  52,
  'TESU',
  bytearray(b'Brahsed!lamc sh`nks!witi roremasy aod pnrt vine!saube |!brahsinf ')),
 (22,
  52,
  'TEET',
  bytearray(b'Brwiser la{b s~anke wibh rysemwry wnd fort6wins sacce j brwisixg ')),
 (20,
  52,
  'TESO',
  bytearray(b'Brarsed;lamy shznks;wits rohemaiy aud ptrt line;sauxe |;brarsin| ')),
 (20,
  49,
  'TES@',
  bytearray(b'Bra}sed4lamv shunks4wit| rogemafy azd p{rt cine4sauwe |4bra}sins ')),
 (20,
  51,
  'T@SS',
  bytearray(b"Bwans`d\'ldme vhfnns\'wlto wotehauy%aid%phrq pike\'sdude%|\'bwansln` ")),
 (19,
  52,
  'TES~',
  bytearray(b'Br

In [60]:
len(test_str)

65

In [61]:
from collections import Counter

In [62]:
sorted(Counter(counts).most_common())

[(14, 100),
 (15, 520),
 (17, 350),
 (18, 2340),
 (19, 2704),
 (21, 1865),
 (22, 9848),
 (23, 885),
 (24, 756),
 (25, 1426),
 (26, 4358),
 (27, 4394),
 (28, 3849),
 (29, 4210),
 (30, 4355),
 (31, 5192),
 (32, 6458),
 (33, 2901),
 (34, 4127),
 (35, 4183),
 (36, 3245),
 (37, 3132),
 (38, 3871),
 (39, 4506),
 (40, 3221),
 (41, 2791),
 (42, 2394),
 (43, 2199),
 (44, 1943),
 (45, 1459),
 (46, 966),
 (47, 942),
 (48, 1084),
 (49, 1048),
 (50, 920),
 (51, 768),
 (52, 490),
 (53, 176),
 (54, 24)]

In [66]:
b'0' == 48

False

In [12]:
weighted_chars = {
     b'\t'[0]: 0,
     b'\n'[0]: 0,
     b'\x0b'[0]: 0,
     b'\x0c'[0]: 0,
     b'\r'[0]: 0,
     b' '[0]: 1,
     b'!'[0]: 0,
     b'"'[0]: 0,
     b'#'[0]: 0,
     b'$'[0]: 0,
     b'%'[0]: 0,
     b'&'[0]: 0,
     b"'"[0]: 0,
     b'('[0]: 0,
     b')'[0]: 0,
     b'*'[0]: 0,
     b'+'[0]: 0,
     b','[0]: 0,
     b'-'[0]: 0,
     b'.'[0]: 0,
     b'/'[0]: 0,
     b'0'[0]: 0,
     b'1'[0]: 0,
     b'2'[0]: 0,
     b'3'[0]: 0,
     b'4'[0]: 0,
     b'5'[0]: 0,
     b'6'[0]: 0,
     b'7'[0]: 0,
     b'8'[0]: 0,
     b'9'[0]: 0,
     b':'[0]: 0,
     b';'[0]: 0,
     b'<'[0]: 0,
     b'='[0]: 0,
     b'>'[0]: 0,
     b'?'[0]: 0,
     b'@'[0]: 0,
     b'A'[0]: 1,
     b'B'[0]: 1,
     b'C'[0]: 1,
     b'D'[0]: 1,
     b'E'[0]: 1,
     b'F'[0]: 1,
     b'G'[0]: 1,
     b'H'[0]: 1,
     b'I'[0]: 1,
     b'J'[0]: 1,
     b'K'[0]: 1,
     b'L'[0]: 1,
     b'M'[0]: 1,
     b'N'[0]: 1,
     b'O'[0]: 1,
     b'P'[0]: 1,
     b'Q'[0]: 1,
     b'R'[0]: 1,
     b'S'[0]: 1,
     b'T'[0]: 1,
     b'U'[0]: 1,
     b'V'[0]: 1,
     b'W'[0]: 1,
     b'X'[0]: 1,
     b'Y'[0]: 1,
     b'Z'[0]: 1,
     b'['[0]: 0,
     b'\\'[0]: 0,
     b']'[0]: 0,
     b'^'[0]: 0,
     b'_'[0]: 0,
     b'`'[0]: 0,
     b'a'[0]: 1,
     b'b'[0]: 1,
     b'c'[0]: 1,
     b'd'[0]: 1,
     b'e'[0]: 1,
     b'f'[0]: 1,
     b'g'[0]: 1,
     b'h'[0]: 1,
     b'i'[0]: 1,
     b'j'[0]: 1,
     b'k'[0]: 1,
     b'l'[0]: 1,
     b'm'[0]: 1,
     b'n'[0]: 1,
     b'o'[0]: 1,
     b'p'[0]: 1,
     b'q'[0]: 1,
     b'r'[0]: 1,
     b's'[0]: 1,
     b't'[0]: 1,
     b'u'[0]: 1,
     b'v'[0]: 1,
     b'w'[0]: 1,
     b'x'[0]: 1,
     b'y'[0]: 1,
     b'z'[0]: 1,
     b'{'[0]: 0,
     b'|'[0]: 0,
     b'}'[0]: 0,
     b'~'[0]: 0,
                 }

In [27]:
def xor_detect_2(text, key_length, weighted_charset=weighted_chars):
    out = [dict.fromkeys(range(256), 0) for _ in range(key_length)]
    for i, char_1 in enumerate(text):
        new = {}
        for char, score in out[i % key_length].items():
            if char_1 ^ char in weighted_charset:
                new[char] = score + weighted_charset[char_1 ^ char]
        out[i % key_length] = new
        
    possible = [(0, [])]
    for idx_key in out:
        print('!')
        new = []
        for key, score in idx_key.items():
            for e_score, e_key in possible:
                new.append((e_score + score, e_key + [key]))
        possible.clear()
        possible = new
        print('!!')
    return sorted(possible, reverse=True, key=lambda x: x[0])


In [89]:
results = xor_detect_2(encoded, 4)

!
!
!
!


In [90]:
len(results)

5362560

In [94]:
keys3 = set(bytes(key).decode('latin1') for score, key in results[:10000])

In [95]:
'TEST' in keys3

True

In [66]:
plaintext = '''
Windows IP Configuration


Wireless LAN adapter Local Area Connection* 3:

   Media State . . . . . . . . . . . : Media disconnected
   Connection-specific DNS Suffix  . : 

Wireless LAN adapter Local Area Connection* 4:

   Media State . . . . . . . . . . . : Media disconnected
   Connection-specific DNS Suffix  . : 

Wireless LAN adapter Wi-Fi:

   Connection-specific DNS Suffix  . : lan
   Link-local IPv6 Address . . . . . : fe80::acf3:5c56:a7f8:e2bd%2
   IPv4 Address. . . . . . . . . . . : 192.168.1.144
   Subnet Mask . . . . . . . . . . . : 255.255.255.0
   Default Gateway . . . . . . . . . : 192.168.1.1

Tunnel adapter Teredo Tunneling Pseudo-Interface:

   Connection-specific DNS Suffix  . : 
   IPv6 Address. . . . . . . . . . . : 2001:0:2851:fcb0:2480:ca0:357a:3deb
   Link-local IPv6 Address . . . . . : fe80::2480:ca0:357a:3deb%16
   Default Gateway . . . . . . . . . : ::

'''[600:]

In [67]:
encoded = xor(plaintext, 'hello')

In [68]:
results = xor_detect_2(encoded, 5)

!
!
!
!
!


In [69]:
[bytes(key).decode('latin1') for score, key in results[:10]]

['he5lo',
 'hello',
 'he4lo',
 'he6lo',
 'he5ll',
 'helll',
 'he4ll',
 'he5lh',
 'hellh',
 'he4lh']

In [70]:
len(results)

22400

In [79]:
results

[(189, [104, 101, 53, 108, 111]),
 (189, [104, 101, 108, 108, 111]),
 (188, [104, 101, 52, 108, 111]),
 (185, [104, 101, 54, 108, 111]),
 (182, [104, 101, 53, 108, 108]),
 (182, [104, 101, 108, 108, 108]),
 (181, [104, 101, 52, 108, 108]),
 (180, [104, 101, 53, 108, 104]),
 (180, [104, 101, 108, 108, 104]),
 (179, [104, 101, 52, 108, 104]),
 (179, [104, 101, 53, 108, 110]),
 (179, [104, 101, 108, 108, 110]),
 (179, [104, 60, 53, 108, 111]),
 (179, [104, 60, 108, 108, 111]),
 (178, [104, 101, 54, 108, 108]),
 (178, [104, 101, 52, 108, 110]),
 (178, [104, 101, 50, 108, 111]),
 (178, [104, 60, 52, 108, 111]),
 (178, [102, 101, 53, 108, 111]),
 (178, [102, 101, 108, 108, 111]),
 (177, [102, 101, 52, 108, 111]),
 (176, [104, 101, 54, 108, 104]),
 (176, [104, 98, 53, 108, 111]),
 (176, [105, 101, 53, 108, 111]),
 (176, [106, 101, 53, 108, 111]),
 (176, [111, 101, 53, 108, 111]),
 (176, [104, 98, 108, 108, 111]),
 (176, [105, 101, 108, 108, 111]),
 (176, [106, 101, 108, 108, 111]),
 (176, [11

In [72]:
len(plaintext)

295

In [73]:
895-646

249

In [74]:
646-249

397

In [75]:
test_str = encoded
# results = [0] * len(keys)
for i, (x, key) in enumerate(results[:10]):
    decoded = xor(test_str, key).decode('ascii')
    score = sum(map(len, trie.findall(decoded)))
    print(score, bytes(key).decode('latin1'), x, repr(decoded[:100]))
#     results[i] = score
#     if (i + 1) % 100000 == 0:
#         score, key = sorted(zip(results, keys), reverse=True)[0]
#         print(f'[{i + 1}/{len(keys)}] best key so far: {bytes(key).decode("latin1")} ({score}/{len(test_str)})')

30 he5lo 189 '. w : 1`2.16a.1.1S\nTun7el a=apte+ Ter<do T,nnel0ng P*eudotInte+facec\n\n  yConn<ctio7-spe:ificyDNS \nuf'
102 hello 189 '. . : 192.168.1.1\n\nTunnel adapter Teredo Tunneling Pseudo-Interface:\n\n   Connection-specific DNS Suf'
24 he4lo 188 '. v : 1a2.16`.1.1R\nTun6el a<apte* Ter=do T-nnel1ng P+eudouInte*faceb\n\n  xConn=ctio6-spe;ificxDNS \x0buf'
30 he6lo 185 '. t : 1c2.16b.1.1P\nTun4el a>apte( Ter?do T/nnel3ng P)eudowInte(face`\n\n  zConn?ctio4-spe9ificzDNS \tuf'
6 he5ll 182 '. w 9 1`2-16a.2.1S\nWun7eo a=aste+ Wer<dl T,nmel0nd P*evdotImte+fbcec\n\t  yClnn<cwio7-ppe:ieicyDMS \nue'
13 helll 182 '. . 9 192-168.2.1\n\nWunneo adaster Weredl Tunmelind Psevdo-Imterfbce:\n\t   Clnnecwion-ppecieic DMS Sue'
0 he4ll 181 '. v 9 1a2-16`.2.1R\nWun6eo a<aste* Wer=dl T-nmel1nd P+evdouImte*fbceb\n\t  xClnn=cwio6-ppe;ieicxDMS \x0bue'
9 he5lh 180 '. w = 1`2)16a.6.1S\nSun7ek a=awte+ Ser<dh T,niel0n` P*erdotIite+ffcec\n\r  yChnn<csio7-tpe:iaicyDIS \nua'
22 hellh 180 '. . = 192)168.6.1\n\nSun

In [76]:
sum(map(len, trie.findall(plaintext)))

102

In [77]:
len(plaintext)

295

In [58]:
plaintext = 'Tax prep volunteering - a window into society  (Read 6065 times)'

In [59]:
encoded = xor(plaintext, 'test')

In [60]:
len(xor_detect(encoded, 4))

3646500

In [61]:
tmp = {bytes(key).decode('latin1'): score for score, key in results}
tmp['test']

63

In [91]:
results[:10]

[(63, [116, 101, 115, 116]),
 (62, [116, 103, 115, 116]),
 (61, [116, 97, 115, 116]),
 (61, [116, 99, 115, 116]),
 (61, [116, 100, 115, 116]),
 (61, [116, 102, 115, 116]),
 (61, [116, 104, 115, 116]),
 (61, [116, 120, 115, 116]),
 (61, [116, 121, 115, 116]),
 (61, [116, 122, 115, 116])]

In [71]:
def xor_detect_3(text, key_length, weighted_charset=weighted_chars):
    out = [dict.fromkeys(range(256), 0) for _ in range(key_length)]
    for i, char_1 in enumerate(text):
        new = {}
        for char, score in out[i % key_length].items():
            if char_1 ^ char in weighted_charset:
                new[char] = score + weighted_charset[char_1 ^ char]
        out[i % key_length] = new
        
    possible = [(0, None)]
    new = []
    for idx_key in out:
        print('!')
        for key, score in idx_key.items():
            for e_score, e_key in possible:
                new.append((e_score + score, (e_key, key)))
        possible.clear()
        possible, new = new, possible
        print('!!')
    return sorted(possible, reverse=True, key=lambda x: x[0])


In [73]:
results = xor_detect_3(encoded, 4)
len(results)

!
!!
!
!!
!
!!
!
!!


3646500

In [63]:
def xor_detect_4(text, key_length, weighted_charset=weighted_chars):
    out = [dict.fromkeys(range(256), 0) for _ in range(key_length)]
    for i, char_1 in enumerate(text):
        new = {}
        for char, score in out[i % key_length].items():
            if char_1 ^ char in weighted_charset:
                new[char] = score + weighted_charset[char_1 ^ char]
        out[i % key_length] = new
        
    possible = [(0, ())]
    for idx_key in out:
        print('!')
        new = []
        for key, score in idx_key.items():
            for e_score, e_key in possible:
                new.append((e_score + score, (*e_key, key)))
        possible.clear()
        possible = new
        print('!!')
    return sorted(possible, reverse=True, key=lambda x: x[0])


In [67]:
results = xor_detect_4(encoded, 4)
len(results)

!
!!
!
!!
!
!!
!
!!


3646500

In [57]:
results[:5]

[(63, (116, 101, 115, 116)),
 (62, (116, 103, 115, 116)),
 (61, (116, 97, 115, 116)),
 (61, (116, 99, 115, 116)),
 (61, (116, 100, 115, 116))]