Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions python/singa/tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,14 +233,23 @@ def reset_like(self, t):
self.device = t.device
self.dtype = t.dtype

'''
def as_type(self, dtype):
Change the data type.
'''Change the data type.

Args:
dtype:
'''
if dtype == singa.kInt:
pass
elif dtype == singa.kFloat32:
pass
elif dtype == 'int':
dtype = singa.kInt
elif dtype == 'float':
dtype = singa.kFloat32
else:
raise TypeError("invalid data type %s" % dtype)
self.data.AsType(dtype)
'''

def to_device(self, device):
'''Move the tensor data onto a given device.
Expand Down
22 changes: 22 additions & 0 deletions src/core/tensor/math_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,20 @@ __global__ void KernelAbs(const size_t n, const float *in, float *out) {
}
}

__global__ void KernelCastFloat2Int(const size_t n, const float *in, int *out) {
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n;
i += blockDim.x * gridDim.x) {
out[i] = int(in[i]);
}
}

__global__ void KernelCastInt2Float(const size_t n, const int *in, float *out) {
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n;
i += blockDim.x * gridDim.x) {
out[i] = float(in[i]);
}
}

__global__ void KernelSoftplus(const size_t n, const float *in, float *out) {
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n;
i += blockDim.x * gridDim.x) {
Expand Down Expand Up @@ -509,6 +523,14 @@ void abs(const size_t n, const float *in, float *out, cudaStream_t s) {
KernelAbs <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF, 0, s>>> (n, in, out);
}

void cast_float_2_int(const size_t n, const float *src, int *dst, cudaStream_t s) {
KernelCastFloat2Int <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF, 0, s>>> (n, src, dst);
}

void cast_int_2_float(const size_t n, const int *src, float *dst, cudaStream_t s) {
KernelCastInt2Float <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF, 0, s>>> (n, src, dst);
}

void sign(const size_t n, const float *in, float *out, cudaStream_t s) {
KernelSign <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF, 0, s>>> (n, in, out);
}
Expand Down
4 changes: 4 additions & 0 deletions src/core/tensor/math_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ void abs(const size_t n, const float *in, float *out, cudaStream_t s);
void sign(const size_t n, const float *in, float *out, cudaStream_t s);
void exp(const size_t n, const float *in, float *out, cudaStream_t s);
void ceil2(const size_t n, const float *in, float *out, cudaStream_t s);
void cast_float_2_int(const size_t n, const float *src, int *dst,
cudaStream_t s);
void cast_int_2_float(const size_t n, const int *src, float *dst,
cudaStream_t s);
void log(const size_t n, const float *in, float *out, cudaStream_t s);
void sqrt(const size_t n, const float *in, float *out, cudaStream_t s);
void square(const size_t n, const float *in, float *out, cudaStream_t s);
Expand Down
66 changes: 64 additions & 2 deletions src/core/tensor/tensor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,73 @@ Tensor Resize(const Tensor &in, const Shape &shape) {
return out;
}

#define TYPE_TYPE_LANG_SWITCH(ldtype, LDType, rdtype, RDType, ltype, Lang, \
...) \
do { \
const int _SwitchShift = 3; \
int _SwitchHash = \
((ldtype) << _SwitchShift * 2) + ((rdtype) << _SwitchShift) + (ltype); \
switch (_SwitchHash) { \
case (((kFloat32) << _SwitchShift * 2) + (kInt << _SwitchShift) + \
kCuda): { \
typedef float LDType; \
typedef int RDType; \
typedef lang::Cuda Lang; \
{ __VA_ARGS__ } \
break; \
} \
case (((kInt) << _SwitchShift * 2) + (kFloat32 << _SwitchShift) + \
kCuda): { \
typedef int LDType; \
typedef float RDType; \
typedef lang::Cuda Lang; \
{ __VA_ARGS__ } \
break; \
} \
case (((kFloat32) << _SwitchShift * 2) + (kInt << _SwitchShift) + \
kCpp): { \
typedef float LDType; \
typedef int RDType; \
typedef lang::Cpp Lang; \
{ __VA_ARGS__ } \
break; \
} \
case (((kInt) << _SwitchShift * 2) + (kFloat32 << _SwitchShift) + \
kCpp): { \
typedef int LDType; \
typedef float RDType; \
typedef lang::Cpp Lang; \
{ __VA_ARGS__ } \
break; \
} \
default: \
LOG(FATAL) << "Unknown combination of left data type " \
<< DataType_Name(ldtype) << " and right data type " \
<< DataType_Name(rdtype) << " and language " \
<< LangType_Name(ltype); \
} \
} while (0)

Tensor &Tensor::AsType(const DataType type) {
if (data_type_ != type) {
if (block_ != nullptr && block_->DecRefCount() == 0)
if (block_ != nullptr && block_->DecRefCount() == 0) {
auto offset = Product(shape_);
auto new_block_ =
device_->NewBlock((int)(Product(shape_) * SizeOf(type)));
TYPE_TYPE_LANG_SWITCH(
data_type_, LDType, type, RDType, device_->lang(), Lang, {
device_->Exec(
[this, new_block_, offset, type](Context *ctx) {
CastAsType<LDType, RDType, Lang>(this, new_block_, offset,
ctx);
},
{}, {});
});
device_->FreeBlock(block_);
block_ = device_->NewBlock((int)(Product(shape_) * SizeOf(type)));
block_ = new_block_;
} else {
block_ = device_->NewBlock((int)(Product(shape_) * SizeOf(type)));
}
data_type_ = type;
}
return *this;
Expand Down
5 changes: 5 additions & 0 deletions src/core/tensor/tensor_math.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,11 @@ void Abs(const Tensor &in, Tensor *out, Context *ctx) {
LOG(FATAL) << "Abs Not Implemented";
}

template <typename DTypeSrc, typename DTypeDst, typename Lang>
void CastAsType(const Tensor *src, Block *dst, int offset, Context *ctx) {
LOG(FATAL) << "CastAsType Not Implemented";
}

template <typename DType, typename Lang>
void Ceil(const Tensor &in, Tensor *out, Context *ctx) {
LOG(FATAL) << "Ceil Not Implemented";
Expand Down
16 changes: 16 additions & 0 deletions src/core/tensor/tensor_math_cpp.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,22 @@ void Abs<float, lang::Cpp>(const Tensor &in, Tensor *out, Context *ctx) {
traverse_unary<float>(in, out, [](float x) { return fabs(x); });
}

template <>
void CastAsType<float, int, lang::Cpp>(const Tensor *src, Block *dst,
int offset, Context *ctx) {
int *dst_array = static_cast<int *>(dst->mutable_data());
const float *src_array = static_cast<const float *>(src->block()->data());
for (int i = 0; i < offset; ++i) dst_array[i] = (int)src_array[i];
}

template <>
void CastAsType<int, float, lang::Cpp>(const Tensor *src, Block *dst,
int offset, Context *ctx) {
float *dst_array = static_cast<float *>(dst->mutable_data());
const int *src_array = static_cast<const int *>(src->block()->data());
for (int i = 0; i < offset; ++i) dst_array[i] = (float)src_array[i];
}

template <>
void Ceil<float, lang::Cpp>(const Tensor &in, Tensor *out, Context *ctx) {
traverse_unary<float>(in, out, [](float x) { return std::ceil(x); });
Expand Down
17 changes: 17 additions & 0 deletions src/core/tensor/tensor_math_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,23 @@ void Abs<float, lang::Cuda>(const Tensor& in, Tensor* out, Context* ctx) {
cudnnDestroyTensorDescriptor(in_desc);
}

template <>
void CastAsType<float, int, lang::Cuda>(const Tensor* src, Block* dst,
int offset, Context* ctx) {
const float* srcPtr = static_cast<const float*>(src->block()->data());
int* dstPtr = static_cast<int*>(dst->mutable_data());
const size_t num = src->Size();
cuda::cast_float_2_int(num, srcPtr, dstPtr, ctx->stream);
}

template <>
void CastAsType<int, float, lang::Cuda>(const Tensor* src, Block* dst,
int offset, Context* ctx) {
const int* srcPtr = static_cast<const int*>(src->block()->data());
float* dstPtr = static_cast<float*>(dst->mutable_data());
cuda::cast_int_2_float(offset, srcPtr, dstPtr, ctx->stream);
}

template <>
void Set<float, lang::Cuda>(const float x, Tensor* out, Context* ctx) {
float* outPtr = static_cast<float*>(out->block()->mutable_data());
Expand Down
27 changes: 27 additions & 0 deletions test/python/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,6 +605,33 @@ def test_ceil(self):
np.testing.assert_array_almost_equal(
tensor.to_numpy(_cTensor_to_pyTensor(t2_ct)), np2)

def test_as_type(self):
np1 = np.random.random([3]).astype(np.float32)
np1 = np1 * 10 - 5
np2 = np1.astype(np.int32)
np3 = np2.astype(np.float32)

for dev in [cpu_dev, gpu_dev]:
t1 = tensor.Tensor(device=dev, data=np1)

t1_ct = t1.data

self.assertEqual(t1_ct.data_type(), singa_api.kFloat32)

t1_ct.AsType(singa_api.kInt)

self.assertEqual(t1_ct.data_type(), singa_api.kInt)

np.testing.assert_array_almost_equal(
tensor.to_numpy(_cTensor_to_pyTensor(t1_ct)), np2)

t1_ct.AsType(singa_api.kFloat32)

self.assertEqual(t1_ct.data_type(), singa_api.kFloat32)

np.testing.assert_array_almost_equal(
tensor.to_numpy(_cTensor_to_pyTensor(t1_ct)), np3)


if __name__ == '__main__':
unittest.main()
15 changes: 15 additions & 0 deletions test/python/test_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,21 @@ def test_ceil(self):

np.testing.assert_array_almost_equal(tensor.to_numpy(t2), np2)

def test_astype(self):
for dev in [cpu_dev, gpu_dev]:

np1 = np.random.random([5, 6, 7, 8]).astype(np.float32)
np1 = np1 * 10 - 5

np2 = np1.astype(np.int32)
np3 = np2.astype(np.float32)

t1 = tensor.Tensor(device=dev, data=np1)
t1.as_type('int')
np.testing.assert_array_almost_equal(tensor.to_numpy(t1), np2)
t1.as_type('float')
np.testing.assert_array_almost_equal(tensor.to_numpy(t1), np3)


if __name__ == '__main__':
unittest.main()
78 changes: 74 additions & 4 deletions test/singa/test_tensor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,81 @@ TEST(TensorClass, Reshape) {
EXPECT_TRUE(o.shape() != t.shape());
}

TEST(TensorClass, AsType) {
Tensor t;
#ifdef USE_CUDA

TEST(TensorClass, FloatAsTypeIntCuda) {
auto cuda = std::make_shared<singa::CudaGPU>();

Tensor t(Shape{3}, cuda);
float data[] = {1.0f, 2.0f, 3.0f};
t.CopyDataFromHostPtr(data, 3);
EXPECT_EQ(singa::kFloat32, t.data_type());

t.AsType(singa::kInt);

EXPECT_EQ(singa::kInt, t.data_type());

t.ToHost();
const int* dptr2 = static_cast<const int*>(t.block()->data());
EXPECT_EQ(1, dptr2[0]);
EXPECT_EQ(2, dptr2[1]);
EXPECT_EQ(3, dptr2[2]);
}

TEST(TensorClass, IntAsTypeFloatCuda) {
auto cuda = std::make_shared<singa::CudaGPU>();

Tensor t(Shape{3}, cuda, singa::kInt);
int data[] = {1, 2, 3};
t.CopyDataFromHostPtr(data, 3);
EXPECT_EQ(singa::kInt, t.data_type());

t.AsType(singa::kFloat32);

EXPECT_EQ(singa::kFloat32, t.data_type());

t.ToHost();
const float* dptr2 = static_cast<const float*>(t.block()->data());
EXPECT_EQ(1.0f, dptr2[0]);
EXPECT_EQ(2.0f, dptr2[1]);
EXPECT_EQ(3.0f, dptr2[2]);
}

#endif // USE_CUDA

TEST(TensorClass, FloatAsTypeIntCPU) {
Tensor t(Shape{3});
float data[] = {1.0f, 2.0f, 3.0f};
t.CopyDataFromHostPtr(data, 3);
EXPECT_EQ(singa::kFloat32, t.data_type());
const float* dptr = static_cast<const float*>(t.block()->data());
EXPECT_FLOAT_EQ(1.0f, dptr[0]);
EXPECT_FLOAT_EQ(2.0f, dptr[1]);
EXPECT_FLOAT_EQ(3.0f, dptr[2]);

t.AsType(singa::kInt);

EXPECT_EQ(singa::kInt, t.data_type());
const int* dptr2 = static_cast<const int*>(t.block()->data());
EXPECT_EQ(1, dptr2[0]);
EXPECT_EQ(2, dptr2[1]);
EXPECT_EQ(3, dptr2[2]);
}

TEST(TensorClass, IntAsTypeFloatCPU) {
Tensor t(Shape{3}, singa::kInt);
int data[] = {1, 2, 3};
t.CopyDataFromHostPtr(data, 3);
EXPECT_EQ(singa::kInt, t.data_type());

t.AsType(singa::kFloat32);

EXPECT_EQ(singa::kFloat32, t.data_type());
t.AsType(singa::kFloat16);
EXPECT_EQ(singa::kFloat16, t.data_type());

const float* dptr2 = static_cast<const float*>(t.block()->data());
EXPECT_EQ(1.0f, dptr2[0]);
EXPECT_EQ(2.0f, dptr2[1]);
EXPECT_EQ(3.0f, dptr2[2]);
}

TEST(TensorClass, ToDevice) {
Expand Down