forked from matthiasbock/OpenSkype
-
Notifications
You must be signed in to change notification settings - Fork 0
/
arithmetic.py
executable file
·104 lines (94 loc) · 2.49 KB
/
arithmetic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/python
#
# Skype uses arithmetic coding for compression
#
# http://en.wikipedia.org/wiki/Arithmetic_coding
# http://en.wikipedia.org/wiki/Huffman_coding
#
# Note: the picture, presented in "Silver needle in the Skype",
# does not represent the frequency dictionary of "ACAB",
# it should read "ACABCACA" or something like that
#
def getfrequencies(s, debug=False):
frequencies = {}
for c in s:
if c in frequencies.keys():
frequencies[c] += 1
else:
frequencies[c] = 1
if debug:
print '\tsymbol frequencies: '+str(frequencies)
return frequencies
def frequencies2dictionary(length, frequencies, debug=False):
keys = sorted(frequencies.keys())
interval = 1.0/length
dictionary = {}
start = 0.0
for k in keys:
step = frequencies[k]*interval
dictionary[k] = [start, start+step]
start += step
if debug:
print '\tsymbol dictionary: '+str(dictionary)
return dictionary
def makedictionary(s, debug=False):
return frequencies2dictionary( len(s), getfrequencies(s, debug), debug )
#
# Example usage:
# interval = [0.5, 0.75]
# subinterval = [0.4, 0.6]
# return [0.5+0.4*0.25, 0.5+0.6*0.25]
#
def subdivide(interval, subinterval):
start = interval[0]
stop = interval[1]
width = stop-start
istart = subinterval[0]
istop = subinterval[1]
# iwidth = istop-istart
return [ start+(istart*width), start+(istop*width) ]
#
# compress string to real
#
def compress(s, debug=False):
l = len(s)
if debug:
print 'compressing "'+s+'" ...'
print '\tstring length: '+str(l)
d = makedictionary(s, debug)
r = [0, 1]
for c in s:
r = subdivide(r, d[c])
if debug:
print '\t'+c+': '+str(r)
return r[0],d,l
#
# decompress real to string
#
def decompress(r, dictionary, length, debug=False):
if debug:
print 'decompressing "'+str(r)+'" ...'
print '\tstring length: '+str(l)
print '\tsymbol dictionary: '+str(dictionary)
s = ''
interval = [0, 1]
from copy import deepcopy
applied_dictionary = deepcopy(dictionary)
for i in range(length):
for k in applied_dictionary.keys(): # which is the next char ?
subinterval = applied_dictionary[k]
if r >= subinterval[0] and (r < subinterval[1] or (r == 1 and subinterval[1] == 1)):
s += k
break
if debug:
print '\t'+str(r)+' is in '+str(subinterval)+' -> '+k
for k in dictionary.keys():
applied_dictionary[k] = subdivide(subinterval, dictionary[k])
return s
if __name__ == '__main__':
import sys
try:
r,d,l = compress(sys.argv[1], debug=True)
except:
r,d,l = compress('ACABCACA', debug=True)
decompress(r, d, l, debug=True)