forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
qtensor.h
260 lines (220 loc) · 6.43 KB
/
qtensor.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
#ifndef CAFFE2_CORE_QTENSOR_H_
#define CAFFE2_CORE_QTENSOR_H_
#include <algorithm>
#include <climits>
#include <cstddef>
#include <vector>
#include "caffe2/core/common.h"
#include "caffe2/core/context.h"
#include "caffe2/core/tensor.h"
#include <c10/util/typeid.h>
namespace caffe2 {
template <class Context>
class C10_EXPORT QTensor {
public:
QTensor() {}
virtual ~QTensor() {}
/**
* @brief Creates a quantized tensor of the given dimension.
*
* Note that the actual data allocation is not going to be carried out until
* the first time mutable_data() is called.
*
* The underlying storage of the quantized tensor interleaves elements
* by bit depth.
*
* Labeled memory for tensor of size 6, precision 3
* [ E1[0] E2[0] E3[0] E4[0] E5[0] E6[0] ] // Least significant Bits
* [ E1[1] E2[1] E3[1] E4[1] E5[1] E6[1] ]
* [ E1[2] E2[2] E3[2] E4[2] E5[2] E6[2] ]
*
* In the case of sign bits (see enable_sign argument), an extra bit
* per element is added:
*
* Labeled memory for tensor of size 6, precision 3, sign bit enabled
* [ E1[0] E2[0] E3[0] E4[0] E5[0] E6[0] ]
* [ E1[1] E2[1] E3[1] E4[1] E5[1] E6[1] ]
* [ E1[2] E2[2] E3[2] E4[2] E5[2] E6[2] ]
* [ E1[s] E2[s] E3[s] E4[s] E5[s] E6[s] ]
* Where 's' is 1 if E is negative
*
* The reason for this layout is the ability to efficiently multiply
* many low precision integers as a sum of popcnt(A & B) * 1 << bit.
* Explained here: https://arxiv.org/abs/1606.06160
*/
// TODO: changing at::ArrayRef<int> to at::ArrayRef<int64_t>?
explicit QTensor(
at::ArrayRef<int> dims,
const unsigned char precision,
const bool signbit = false)
: precision_(precision), signed_(signbit) {
Resize(dims);
}
void Resize(at::ArrayRef<int> dim_source) {
if (dims_ != dim_source) {
size_t source_size = std::accumulate(
dim_source.begin(), dim_source.end(), 1, std::multiplies<int>());
if ((source_size * (precision_ + signed_)) > capacity_) {
data_ptr_.clear();
capacity_ = 0;
}
dims_ = dim_source.vec();
size_ = source_size;
}
}
void
SetBitAtIndex(const unsigned char bit, const size_t index, const bool value) {
// Get the mutable data at bit depth `bit`.
unsigned char* d = mutable_data();
CAFFE_ENFORCE(
bit < precision_ + signed_,
"Attempted to a set a bit that is not allocated.");
CAFFE_ENFORCE(bit * aligned_size() < capacity_);
auto idx = (aligned_size() * bit) / CHAR_BIT;
d = &d[idx];
idx = index / CHAR_BIT;
auto shift = CHAR_BIT - (index % CHAR_BIT) - 1;
if (value) {
d[idx] |= 1 << shift;
} else {
d[idx] &= ~(1 << shift);
}
}
bool GetBitAtIndex(const unsigned char bit, const size_t index) const {
// Get the data at bit depth `bit`
const unsigned char* d = data();
auto idx = (aligned_size() * bit) / CHAR_BIT;
d = &d[idx];
idx = index / CHAR_BIT;
auto shift = CHAR_BIT - (index % CHAR_BIT) - 1;
return d[idx] & (1 << shift);
}
void SetPrecision(const unsigned char precision) {
precision_ = precision;
data_ptr_.clear();
}
void SetSigned(const bool make_signed = true) {
signed_ = make_signed;
data_ptr_.clear();
}
void SetScale(const double scale) {
scale_ = scale;
}
void SetBias(const double bias) {
bias_ = bias;
}
unsigned char* mutable_data() {
if (!data_ptr_) {
data_ptr_ = Context::New(nbytes());
capacity_ = nbytes() * CHAR_BIT;
}
CAFFE_ENFORCE(capacity_ == nbytes() * CHAR_BIT);
return static_cast<unsigned char*>(data_ptr_.get());
}
inline const unsigned char* data() const {
return static_cast<unsigned char*>(data_ptr_.get());
}
inline size_t size() const {
return size_;
}
inline unsigned char alignment() const {
return alignment_;
}
inline unsigned char precision() const {
return precision_;
}
inline at::ArrayRef<int> sizes() const {
return dims_;
}
// TODO: deprecate?
inline at::ArrayRef<int> dims() const {
return dims_;
}
inline bool is_signed() const {
return signed_;
}
/**
* Returns the number of dimensions of the data.
*/
inline int ndim() const {
return dims_.size();
}
inline size_t aligned_size() const {
return alignment_ * ((size_ + alignment_ - 1) / alignment_);
}
inline size_t nbytes() const {
return (aligned_size() * (precision_ + signed_)) / CHAR_BIT;
}
inline double scale() const {
return scale_;
}
inline double bias() const {
return bias_;
}
/**
* Returns the i-th dimension of the qtensor in int.
*/
inline int dim32(const int i) const {
DCHECK_LT(i, dims_.size()) << "Exceeding ndim limit " << dims_.size();
DCHECK_GE(i, 0) << "Cannot have negative index";
CAFFE_ENFORCE_LT(dims_[i], std::numeric_limits<int>::max());
return static_cast<int>(dims_[i]);
}
/**
* Returns the 'canonical' version of a (usually) user-specified axis,
* allowing for negative indexing (e.g., -1 for the last axis).
*
* @param axis_index the axis index.
* If 0 <= index < ndim(), return index.
* If -ndim <= index <= -1, return (ndim() - (-index)),
* e.g., the last axis index (ndim() - 1) if index == -1,
* the second to last if index == -2, etc.
* Dies on out of range index.
*/
inline int canonical_axis_index(int axis_index) const {
CAFFE_ENFORCE_GE(axis_index, -ndim());
CAFFE_ENFORCE_LT(axis_index, ndim());
if (axis_index < 0) {
return axis_index + ndim();
}
return axis_index;
}
/**
* Return product of all dimensions starting from K.
*/
inline int64_t size_from_dim(int k) const {
int64_t r = 1;
for (int i = k; i < dims_.size(); ++i) {
r *= dims_[i];
}
return r;
}
/**
* Product of all dims up to.
*/
inline int64_t size_to_dim(int k) const {
CAFFE_ENFORCE(k < dims_.size());
int64_t r = 1;
for (int i = 0; i < k; ++i) {
r *= dims_[i];
}
return r;
}
protected:
std::vector<int> dims_;
size_t size_ = 0;
// Precision in bits.
unsigned char precision_ = CHAR_BIT;
// Bit alignment.
unsigned char alignment_ = CHAR_BIT;
// Allocated data.
at::DataPtr data_ptr_;
// value = scale_ * (x + bias_)
double scale_;
double bias_;
bool signed_ = false;
// Capacity in bits.
size_t capacity_ = 0;
};
} // namespace caffe2
#endif // CAFFE2_CORE_QTENSOR_H_