/
compressors.py
221 lines (180 loc) · 10.3 KB
/
compressors.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import torch
import numpy as np
import random, math
class CompressorType:
IDENTICAL = 1 # Identical compressor
LAZY_COMPRESSOR = 2 # Lazy or Bernulli compressor
RANDK_COMPRESSOR = 3 # Rank-K compressor
NATURAL_COMPRESSOR_FP64 = 4 # Natural compressor with FP64
NATURAL_COMPRESSOR_FP32 = 5 # Natural compressor with FP32
STANDARD_DITHERING_FP64 = 6 # Standard dithering with FP64
STANDARD_DITHERING_FP32 = 7 # Standard dithering with FP32
NATURAL_DITHERING_FP32 = 8 # Natural Dithering applied for FP32 components vectors
NATURAL_DITHERING_FP64 = 9 # Natural Dithering applied for FP64 components vectors
TOPK_COMPRESSOR = 10 # Top-K compressor
SIGN_COMPRESSOR = 11 # Sign compressor
ONEBIT_SIGN_COMPRESSOR = 12 # One bit sign compressor
class Compressor:
def __init__(self, compressorName = ""):
self.compressorName = compressorName
self.compressorType = CompressorType.IDENTICAL
self.w = 0.0
self.last_need_to_send_advance = 0
self.component_bits_size = 32
def name(self):
omega = r'$\omega$'
if self.compressorType == CompressorType.IDENTICAL: return f"Identical"
if self.compressorType == CompressorType.LAZY_COMPRESSOR: return f"Bernoulli(Lazy) [p={self.P:g},{omega}={self.getW():.1f}]"
if self.compressorType == CompressorType.RANDK_COMPRESSOR: return f" (K={self.K})"
if self.compressorType == CompressorType.NATURAL_COMPRESSOR_FP64: return f"Natural for fp64 [{omega}={self.getW():.1f}]"
if self.compressorType == CompressorType.NATURAL_COMPRESSOR_FP32: return f"Natural for fp32 [{omega}={self.getW():.1f}]"
if self.compressorType == CompressorType.STANDARD_DITHERING_FP64: return f"Standard Dithering for fp64[s={self.s}]"
if self.compressorType == CompressorType.STANDARD_DITHERING_FP64: return f"Standard Dithering for fp32[s={self.s}]"
if self.compressorType == CompressorType.NATURAL_DITHERING_FP32: return f"Natural Dithering for fp32[s={self.s},{omega}={self.getW():.1f}]"
if self.compressorType == CompressorType.NATURAL_DITHERING_FP64: return f"Natural Dithering for fp64[s={self.s},{omega}={self.getW():.1f}]"
if self.compressorType == CompressorType.TOPK_COMPRESSOR: return f" Top (K={self.K})"
if self.compressorType == CompressorType.SIGN_COMPRESSOR: return f"Sign"
if self.compressorType == CompressorType.ONEBIT_SIGN_COMPRESSOR: return f"One Bit Sign"
return "?"
def fullName(self):
omega = r'$\omega$'
if self.compressorType == CompressorType.IDENTICAL: return f"Identical"
if self.compressorType == CompressorType.LAZY_COMPRESSOR: return f"Bernoulli(Lazy) [p={self.P:g},{omega}={self.getW():.1f}]"
if self.compressorType == CompressorType.RANDK_COMPRESSOR: return f"Rand [K={self.K}]"
if self.compressorType == CompressorType.NATURAL_COMPRESSOR_FP64: return f"Natural for fp64 [{omega}={self.getW():.1f}]"
if self.compressorType == CompressorType.NATURAL_COMPRESSOR_FP32: return f"Natural for fp32 [{omega}={self.getW():.1f}]"
if self.compressorType == CompressorType.STANDARD_DITHERING_FP64: return f"Standard Dithering for fp64[s={self.s}]"
if self.compressorType == CompressorType.STANDARD_DITHERING_FP64: return f"Standard Dithering for fp32[s={self.s}]"
if self.compressorType == CompressorType.NATURAL_DITHERING_FP32: return f"Natural Dithering for fp32[s={self.s},{omega}={self.getW():.1f}]"
if self.compressorType == CompressorType.NATURAL_DITHERING_FP64: return f"Natural Dithering for fp64[s={self.s},{omega}={self.getW():.1f}]"
if self.compressorType == CompressorType.TOPK_COMPRESSOR: return f"Top [K={self.K}]"
if self.compressorType == CompressorType.SIGN_COMPRESSOR: return f"Sign"
if self.compressorType == CompressorType.ONEBIT_SIGN_COMPRESSOR: return f"One Bit Sign"
return "?"
def resetStats(self):
self.last_need_to_send_advance = 0
def makeIdenticalCompressor(self):
self.compressorType = CompressorType.IDENTICAL
self.resetStats()
def makeLazyCompressor(self, P):
self.compressorType = CompressorType.LAZY_COMPRESSOR
self.P = P
self.w = 1.0 / P - 1.0
self.resetStats()
def makeStandardDitheringFP64(self, levels, vectorNormCompressor, p = float("inf")):
self.compressorType = CompressorType.STANDARD_DITHERING_FP64
self.levelsValues = np.arange(0.0, 1.1, 1.0/levels) # levels + 1 values in range [0.0, 1.0] which uniformly split this segment
self.s = len(self.levelsValues) - 1 # # should be equal to level
assert self.s == levels
self.p = p
self.vectorNormCompressor = vectorNormCompressor
self.w = 0.0 # TODO
self.resetStats()
def makeStandardDitheringFP32(self, levels, vectorNormCompressor, p = float("inf")):
self.compressorType = CompressorType.STANDARD_DITHERING_FP32
self.levelsValues = torch.arange(0.0, 1.1, 1.0/levels) # levels + 1 values in range [0.0, 1.0] which uniformly split this segment
self.s = len(self.levelsValues) - 1 # should be equal to level
assert self.s == levels
self.p = p
self.vectorNormCompressor = vectorNormCompressor
self.w = 0.0 # TODO
self.resetStats()
def makeQSGD_FP64(self, levels, dInput):
norm_compressor = Compressor("norm_compressor")
norm_compressor.makeIdenticalCompressor()
self.makeStandardDitheringFP64(levels, norm_compressor, p = 2)
# Lemma 3.1. from https://arxiv.org/pdf/1610.02132.pdf, page 5
self.w = min(dInput/(levels*levels), dInput**0.5/levels)
def makeNaturalDitheringFP64(self, levels, dInput, p = float("inf")):
self.compressorType = CompressorType.NATURAL_DITHERING_FP64
self.levelsValues = torch.zeros(levels + 1)
for i in range(levels):
self.levelsValues[i] = (1.0/2.0)**i
self.levelsValues = torch.flip(self.levelsValues, dims = [0])
self.s = len(self.levelsValues) - 1
assert self.s == levels
self.p = p
r = min(p, 2)
self.w = 1.0/8.0 + (dInput** (1.0/r)) / (2**(self.s - 1)) * min(1, (dInput**(1.0/r)) / (2**(self.s-1)))
self.resetStats()
def makeNaturalDitheringFP32(self, levels, dInput, p = float("inf")):
self.compressorType = CompressorType.NATURAL_DITHERING_FP32
self.levelsValues = torch.zeros(levels + 1)
for i in range(levels):
self.levelsValues[i] = (1.0/2.0)**i
self.levelsValues = torch.flip(self.levelsValues, dims=[0])
self.s = len(self.levelsValues) - 1
assert self.s == levels
self.p = p
r = min(p, 2)
self.w = 1.0/8.0 + (dInput** (1.0/r)) / (2**(self.s - 1)) * min(1, (dInput**(1.0/r)) / (2**(self.s-1)))
self.resetStats()
# K - how much component we leave from input vector
def makeRandKCompressor(self, K):
self.compressorType = CompressorType.RANDK_COMPRESSOR
self.K = K
self.resetStats()
def makeTopKCompressor(self, K):
self.compressorType = CompressorType.TOPK_COMPRESSOR
self.K = K
self.resetStats()
def makeNaturalCompressorFP64(self):
self.compressorType = CompressorType.NATURAL_COMPRESSOR_FP64
self.w = 1.0/8.0
self.resetStats()
def makeNaturalCompressorFP32(self):
self.compressorType = CompressorType.NATURAL_COMPRESSOR_FP32
self.w = 1.0/8.0
self.resetStats()
def makeSignCompressor(self, freeze_iteration=0):
self.compressorType = CompressorType.SIGN_COMPRESSOR
self.freeze_iteration = freeze_iteration
self.resetStats()
def makeOneBitSignCompressor(self, freeze_iteration=0):
self.compressorType = CompressorType.ONEBIT_SIGN_COMPRESSOR
self.freeze_iteration = freeze_iteration
self.resetStats()
def getW(self):
return self.w
def compressVector(self, x, iteration=0):
d = max(x.shape)
if self.compressorType == CompressorType.IDENTICAL:
out = x.clone()
self.last_need_to_send_advance = d * self.component_bits_size
elif self.compressorType == CompressorType.TOPK_COMPRESSOR:
#S = torch.arange(d)
# np.random.shuffle(S)
top_size = max(int(self.K*d), 1)
_, S = torch.topk(torch.abs(x), top_size)
out = torch.zeros_like(x)
out[S] = x[S]
# !!! in real case, one needs to send the out vector and a support set to indicate the indices of top K
self.last_need_to_send_advance = 2 * top_size * self.component_bits_size
elif self.compressorType == CompressorType.SIGN_COMPRESSOR:
if iteration < self.freeze_iteration:
out = x.clone()
self.last_need_to_send_advance = d * self.component_bits_size
else:
out = torch.sign(x)
scale = torch.norm(x, p=1) / torch.numel(x)
out.mul_(scale) # <-- we use this just for similation
# !!! in real case, one needs to send D bits for {0, 1} and 32 bits for the scale constant
self.last_need_to_send_advance = d + self.component_bits_size
elif self.compressorType == CompressorType.ONEBIT_SIGN_COMPRESSOR:
# according to one bit adam paper,
# during warmup, the signal is not compressed
if iteration < self.freeze_iteration:
out = x.clone()
self.last_need_to_send_advance = d * self.component_bits_size
else:
out = torch.sign(x)
# out.add_(1).bool().float().add_(-0.5).mul_(2.0)
scale = torch.norm(x) / np.sqrt(torch.numel(x))
# out = torch.cat((scale, out), 0) <-- in real case, only send a scale, and a {0,1}^D output
# this is just for similate
out.mul_(scale) # <-- we use this just for similation
# !!! in real case, one needs to send D bits for {0, 1} and 32 bits for the scale constant
self.last_need_to_send_advance = d + self.component_bits_size
return out