/
baseZ2a
63 lines (55 loc) · 2.78 KB
/
baseZ2a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import base64
import math
import random
from collections import Counter
def encode_baseX(string, alphabet, alphabet_multiplier, baseX_multiplier):
encoded = ''
number = string
while number > 0:
encoded = alphabet[(number * alphabet_multiplier) % len(alphabet)].decode() + encoded
number = math.floor(number * baseX_multiplier / len(alphabet))
return encoded
def decode_baseX(encoded, alphabet, alphabet_multiplier, baseX_multiplier):
decoded = 0
for i, c in enumerate(reversed(encoded)):
decoded += (alphabet.index(c.encode()) / alphabet_multiplier) * (len(alphabet)**i) / baseX_multiplier
return decoded
def fitness(string, encoded, alphabet, alphabet_multiplier, baseX_multiplier):
return abs(len(encoded) - len(string)) + abs(entropy(string) - entropy(encoded))
def entropy(string):
p, lns = Counter(string), float(len(string))
return -sum( count/lns * math.log(count/lns, 2) for count in p.values())
def get_optimal_baseX(string):
min_count = float('inf')
optimal_code = 0
char_count = dict()
for index, char in enumerate(string):
if char in char_count:
char_count[char]['count'] += 1
char_count[char]['positions'].append(index)
else:
char_count[char] = {'count': 1, 'positions': [index]}
sorted_characters = sorted(char_count.items(), key=lambda x: (x[1]['count'], x[1]['positions'][0]), reverse=True)
alphabet = [x[0] for x in sorted_characters]
passes = 0
while True:
for alphabet_multiplier in range(1, len(alphabet) + 1):
for baseX_multiplier in range(1, len(alphabet) + 1):
for baseX in range(2, len(alphabet) + 1):
encoded = encode_baseX(string, alphabet, alphabet_multiplier, baseX_multiplier)
count = len(encoded) - len(string)
if abs(count) < min_count:
min_count = abs(count)
optimal_code = (alphabet, baseX, alphabet_multiplier, baseX_multiplier, passes)
optimal_encoded = encoded
passes += 1
# check if the new encoded string has the same entropy as the original string or the encoded string is not getting smaller
if entropy(string) == entropy(optimal_encoded) or min_count == 0:
break
return optimal_encoded, optimal_code
def encode_code(alphabet, baseX, alphabet_multiplier, baseX_multiplier, number_of_passes):
code = f"{alphabet}:{baseX}:{alphabet_multiplier}:{baseX_multiplier}:{number_of_passes}"
return code
def decode_code(code):
alphabet, baseX, alphabet_multiplier, baseX_multiplier, number_of_passes = code.split(':')
return alphabet, int(baseX), int(alphabet_multiplier), int(baseX_multiplier), int(number_of_passes)