-
Notifications
You must be signed in to change notification settings - Fork 0
/
BaseZ2
89 lines (77 loc) · 4.36 KB
/
BaseZ2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# This is a script that encodes and decodes strings using a custom base-X representation. The script includes functions for encoding and decoding using an alphabet, a multiplier for the alphabet, and a multiplier for the base-X representation. It also includes a fitness function that calculates the difference in length and entropy between the original string and the encoded string. The script also includes a function called get_optimal_baseX that finds the optimal values for alphabet, baseX, alphabet multiplier, and baseX multiplier based on the input string and returns the encoded string along with a unique code that represents the values used for encoding.
import base64
import math
import random
from collections import Counter
def encode_baseX(number, alphabet):
encoded = ''
while number > 0:
encoded = alphabet[number % len(alphabet)].decode() + encoded
number //= len(alphabet)
return encoded
def decode_baseX(encoded, alphabet, multiplier):
decoded = 0
for i, c in enumerate(reversed(encoded)):
decoded += (alphabet.index(c.encode()) + multiplier) * len(alphabet)**i
return decoded
def encode_baseX(number, alphabet, alphabet_multiplier, baseX_multiplier):
encoded = ''
while number > 0:
encoded = alphabet[(number * alphabet_multiplier) % len(alphabet)].decode() + encoded
number = math.floor(number * baseX_multiplier / len(alphabet))
return encoded
def decode_baseX(encoded, alphabet, alphabet_multiplier, baseX_multiplier):
decoded = 0
for i, c in enumerate(reversed(encoded)):
decoded += (alphabet.index(c.encode()) / alphabet_multiplier) * (len(alphabet)**i) / baseX_multiplier
return decoded
def fitness(string, encoded, alphabet, alphabet_multiplier, baseX_multiplier):
return abs(len(encoded) - len(string)) + abs(entropy(string) - entropy(encoded))
def entropy(string):
p, lns = Counter(string), float(len(string))
return -sum( count/lns * math.log(count/lns, 2) for count in p.values())
def get_optimal_baseX(string):
min_count = float('inf')
optimal_code = 0
#count the frequency and position of characters in the input string
char_count = dict()
for index, char in enumerate(string):
if char in char_count:
char_count[char]['count'] += 1
char_count[char]['positions'].append(index)
else:
char_count[char] = {'count': 1, 'positions': [index]}
#sort the characters based on their frequency and position
sorted_characters = sorted(char_count.items(), key=lambda x: (x[1]['count'], x[1]['positions'][0]), reverse=True)
#generate the alphabet on-the-fly
alphabet = [x[0] for x in sorted_characters]
#implement the rest of the code
for alphabet_multiplier in range(1, len(alphabet) + 1):
for baseX_multiplier in range(1, len(alphabet) + 1):
for baseX in range(2, len(alphabet) + 1):
encoded = encode_baseX(string, alphabet, alphabet_multiplier, baseX_multiplier)
code = encode_code(alphabet, baseX, alphabet_multiplier, baseX_multiplier)
count = len(encoded) - len(string)
if abs(count) < min_count:
min_count = abs(count)
optimal_code = code
#decode the code to get the optimal values of alphabet, baseX, alphabet multiplier, and baseX multiplier
optimal_alphabet, optimal_baseX, optimal_alphabet_multiplier, optimal_baseX_multiplier = decode_code(optimal_code)
return optimal_alphabet, optimal_baseX, optimal_alphabet_multiplier, optimal_baseX_multiplier
def get_optimal_baseX(string):
baseX, alphabet, alphabet_multiplier, baseX_multiplier, encoded = ...
#generate a unique code for the values
code = encode_code(alphabet, baseX, alphabet_multiplier, baseX_multiplier)
#use the code to represent the values in the output
output = encoded + code
return output
def encode_code(alphabet, baseX, alphabet_multiplier, baseX_multiplier):
code = (alphabet * alphabet_multiplier) + (baseX * baseX_multiplier)
return code
def decode_code(code):
alphabet_multiplier = code // max_baseX
baseX_multiplier = code % max_baseX
alphabet = code // alphabet_multiplier
baseX = code % baseX_multiplier
return alphabet, baseX, alphabet_multiplier, baseX_multiplier
# By Josef Kulovany - https://zchg.org - DO NOT REMOVE PER LICENSE AND COPYRIGHT LAW