Skip to content

Commit 5db5af1

Browse files
alexeyserbinachennaka
authored andcommitted
KUDU-1261 add GetArray/SetArray C++ client API
This patch adds methods into the Kudu client C++ API to set an array of values for all the supported scalar types except for INT128 and DECIMAL128. Also, added corresponding KuduScanBatch::RowPtr::GetArrayXxx() methods. Added src/kudu/client/array_cell.h with KuduArrayCellView API to access raw data in array cells. That's oriented to working with raw data access API such as KuduScanBatch::RowPtr::cell() and KuduScanBatch::direct_data(); AFAIK, that's what Impala uses to fetch scan results from Kudu. This patch is made to unblock the integration work dependent on the new Kudu client API for array columns (i.e., integration with Impala). So, it contains only partial test coverage for the newly added functionality, primarily in partial_row-test.cc. The end-to-end tests for writing/reading array data are coming in a follow-up changelist. They require server-side support of the array data in both MRS and array data blocks persisted on the disk, and is a part of another changelist [1]. Adding Doxygen documentation comments for the newly introduced methods will be taken care in a separate changelist as well. [1] https://gerrit.cloudera.org/#/c/22868/ Change-Id: Idba29337563cefefa8cf1000663ec7e27885cb6f Reviewed-on: http://gerrit.cloudera.org:8080/23474 Tested-by: Alexey Serbin <alexey@apache.org> Reviewed-by: Abhishek Chennaka <achennaka@cloudera.com>
1 parent d86f992 commit 5db5af1

File tree

9 files changed

+991
-31
lines changed

9 files changed

+991
-31
lines changed

src/kudu/client/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ ADD_EXPORTABLE_LIBRARY(client_proto
3030
NONLINK_DEPS ${CLIENT_PROTO_TGTS})
3131

3232
set(CLIENT_SRCS
33+
array_cell-internal.cc
3334
authz_token_cache.cc
3435
batcher.cc
3536
client.cc
@@ -183,6 +184,7 @@ install(TARGETS kudu_client_exported
183184

184185
# Headers: client
185186
install(FILES
187+
array_cell.h
186188
callbacks.h
187189
client.h
188190
columnar_scan_batch.h
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
19+
#include <cstdint>
20+
#include <cstring>
21+
22+
#include "kudu/client/array_cell.h"
23+
#include "kudu/client/schema-internal.h"
24+
#include "kudu/client/schema.h"
25+
#include "kudu/common/array_cell_view.h"
26+
#include "kudu/util/slice.h"
27+
#include "kudu/util/status.h"
28+
29+
namespace kudu {
30+
namespace client {
31+
32+
class KuduArrayCellView::Data {
33+
public:
34+
Data(const uint8_t* buf, const size_t size)
35+
: view_(buf, size) {
36+
}
37+
38+
~Data() = default;
39+
40+
ArrayCellMetadataView view_;
41+
};
42+
43+
KuduArrayCellView::KuduArrayCellView(const void* cell_ptr) {
44+
if (!cell_ptr) {
45+
data_ = new Data(nullptr, 0);
46+
} else {
47+
const Slice* cell = reinterpret_cast<const Slice*>(cell_ptr);
48+
data_ = new Data(cell->data(), cell->size());
49+
}
50+
}
51+
52+
KuduArrayCellView::KuduArrayCellView(const uint8_t* buf, const size_t size)
53+
: data_(new Data(buf, size)) {
54+
}
55+
56+
KuduArrayCellView::~KuduArrayCellView() {
57+
delete data_;
58+
}
59+
60+
Status KuduArrayCellView::Init() {
61+
return data_->view_.Init();
62+
}
63+
64+
size_t KuduArrayCellView::elem_num() const {
65+
return data_->view_.elem_num();
66+
}
67+
68+
bool KuduArrayCellView::empty() const {
69+
return data_->view_.empty();
70+
}
71+
72+
const uint8_t* KuduArrayCellView::not_null_bitmap() const {
73+
return data_->view_.not_null_bitmap();
74+
}
75+
76+
const void* KuduArrayCellView::data(KuduColumnSchema::DataType data_type,
77+
const KuduColumnTypeAttributes& attributes) const {
78+
return data_->view_.data_as(ToInternalDataType(data_type, attributes));
79+
}
80+
81+
} // namespace client
82+
} // namespace kudu

src/kudu/client/array_cell.h

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
#ifndef KUDU_CLIENT_ARRAY_CELL_H
19+
#define KUDU_CLIENT_ARRAY_CELL_H
20+
21+
#ifdef KUDU_HEADERS_NO_STUBS
22+
#include "kudu/gutil/port.h"
23+
#else
24+
#include "kudu/client/stubs.h"
25+
#endif
26+
27+
#include "kudu/client/schema.h"
28+
#include "kudu/util/kudu_export.h"
29+
#include "kudu/util/status.h"
30+
31+
namespace kudu {
32+
namespace client {
33+
34+
// TODO(aserbin): add doxygen comments
35+
class KUDU_EXPORT KuduArrayCellView {
36+
public:
37+
// cell_ptr: pointer to an array cell in scan results, returned by, e.g.,
38+
// by KuduScanBatch::RowPtr::cell(idx)
39+
explicit KuduArrayCellView(const void* cell_ptr);
40+
// buf: data raw pointer
41+
// len: size of the buffer (bytes) pointed at by the 'buf' pointer
42+
KuduArrayCellView(const uint8_t* buf, size_t size);
43+
~KuduArrayCellView();
44+
45+
// Process the input data. This method must be called once prior to calling
46+
// any other methods of this class.
47+
Status Init();
48+
49+
// Number of elements in the array.
50+
size_t elem_num() const;
51+
52+
// Whether the array cell is empty, i.e. does not contain any elements.
53+
bool empty() const;
54+
55+
// Get non-null (a.k.a. validity) bitmap for the array elements.
56+
const uint8_t* not_null_bitmap() const;
57+
58+
// Accessor for the cell's raw data in the format similar to what
59+
// KuduScanBatch::RowPtr::direct_data() provides
60+
const void* data(
61+
KuduColumnSchema::DataType data_type,
62+
const KuduColumnTypeAttributes& attributes = KuduColumnTypeAttributes()) const;
63+
64+
private:
65+
class KUDU_NO_EXPORT Data;
66+
67+
// Owned.
68+
Data* data_;
69+
70+
DISALLOW_COPY_AND_ASSIGN(KuduArrayCellView);
71+
};
72+
73+
} // namespace client
74+
} // namespace kudu
75+
76+
#endif // #ifndef KUDU_CLIENT_ARRAY_CELL_H ...

src/kudu/client/scan_batch.cc

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,17 @@
1717

1818
#include "kudu/client/scan_batch.h"
1919

20+
#include <algorithm>
21+
#include <iterator>
2022
#include <cstring>
2123
#include <string>
24+
#include <vector>
2225

2326
#include <glog/logging.h>
2427

2528
#include "kudu/client/row_result.h"
2629
#include "kudu/client/scanner-internal.h"
30+
#include "kudu/common/array_cell_view.h"
2731
#include "kudu/common/common.pb.h"
2832
#include "kudu/common/schema.h"
2933
#include "kudu/common/types.h"
@@ -33,6 +37,7 @@
3337
#include "kudu/util/logging.h"
3438

3539
using std::string;
40+
using std::vector;
3641
using strings::Substitute;
3742

3843
namespace kudu {
@@ -255,6 +260,173 @@ Status KuduScanBatch::RowPtr::Get(int col_idx, typename T::cpp_type* val) const
255260
return Status::OK();
256261
}
257262

263+
namespace {
264+
265+
Status ArrayValidation(const ColumnSchema& col,
266+
const char* type_name) {
267+
if (PREDICT_FALSE(col.type_info()->type() != NESTED)) {
268+
return BadTypeStatus(type_name, col);
269+
}
270+
const auto* descriptor = col.type_info()->nested_type_info();
271+
if (PREDICT_FALSE(!descriptor)) {
272+
return Status::InvalidArgument(Substitute(
273+
"column '$0': missing type descriptor for NESTED type", col.name()));
274+
}
275+
if (PREDICT_FALSE(!descriptor->is_array())) {
276+
return Status::InvalidArgument(Substitute(
277+
"column '$0': underlying NESTED type isn't an array", col.name()));
278+
}
279+
return Status::OK();
280+
}
281+
282+
} // anonymous namespace
283+
284+
template<typename T>
285+
Status KuduScanBatch::RowPtr::GetArray(const Slice& col_name,
286+
vector<typename T::cpp_type>* data_out,
287+
vector<bool>* validity_out) const {
288+
int col_idx;
289+
RETURN_NOT_OK(schema_->FindColumn(col_name, &col_idx));
290+
return GetArray<T>(col_idx, data_out, validity_out);
291+
}
292+
293+
template<typename T>
294+
Status KuduScanBatch::RowPtr::GetArray(int col_idx,
295+
vector<typename T::cpp_type>* data_out,
296+
vector<bool>* validity_out) const {
297+
const ColumnSchema& col = schema_->column(col_idx);
298+
RETURN_NOT_OK(ArrayValidation(col, T::name()));
299+
if (PREDICT_FALSE(col.is_nullable() && IsNull(col_idx))) {
300+
return Status::NotFound("column is NULL");
301+
}
302+
const Slice* cell_data = reinterpret_cast<const Slice*>(
303+
row_data_ + schema_->column_offset(col_idx));
304+
ArrayCellMetadataView view(cell_data->data(), cell_data->size());
305+
RETURN_NOT_OK(view.Init());
306+
307+
if (data_out) {
308+
data_out->resize(view.elem_num());
309+
if (!view.empty()) {
310+
const uint8_t* data_raw = view.data_as(T::type);
311+
DCHECK(data_raw);
312+
memcpy(data_out->data(), data_raw, view.elem_num() * sizeof(typename T::cpp_type));
313+
}
314+
}
315+
if (validity_out) {
316+
validity_out->resize(view.elem_num());
317+
if (!view.empty()) {
318+
*validity_out = BitmapToVector(view.not_null_bitmap(), view.elem_num());
319+
}
320+
}
321+
return Status::OK();
322+
}
323+
324+
// Since std::vector<bool> isn't a standard container, the data() accessor
325+
// isn't available and copying the data requires an alternative approach.
326+
template<>
327+
Status KuduScanBatch::RowPtr::GetArray<TypeTraits<BOOL>>(
328+
int col_idx,
329+
vector<bool>* data_out,
330+
vector<bool>* validity) const {
331+
const ColumnSchema& col = schema_->column(col_idx);
332+
RETURN_NOT_OK(ArrayValidation(col, TypeTraits<BOOL>::name()));
333+
if (PREDICT_FALSE(col.is_nullable() && IsNull(col_idx))) {
334+
return Status::NotFound("column is NULL");
335+
}
336+
const Slice* cell_data = reinterpret_cast<const Slice*>(
337+
row_data_ + schema_->column_offset(col_idx));
338+
ArrayCellMetadataView view(cell_data->data(), cell_data->size());
339+
RETURN_NOT_OK(view.Init());
340+
341+
if (data_out) {
342+
const size_t elem_num = view.elem_num();
343+
data_out->clear();
344+
data_out->reserve(elem_num);
345+
const uint8_t* data_raw = view.data_as(BOOL);
346+
DCHECK(data_raw);
347+
std::copy(data_raw, data_raw + elem_num, std::back_inserter(*data_out));
348+
}
349+
if (validity) {
350+
*validity = BitmapToVector(view.not_null_bitmap(), view.elem_num());
351+
}
352+
return Status::OK();
353+
}
354+
355+
Status KuduScanBatch::RowPtr::GetArrayBool(int col_idx,
356+
vector<bool>* data,
357+
vector<bool>* validity) const {
358+
return GetArray<TypeTraits<BOOL>>(col_idx, data, validity);
359+
}
360+
361+
Status KuduScanBatch::RowPtr::GetArrayInt8(int col_idx,
362+
vector<int8_t>* data,
363+
vector<bool>* validity) const {
364+
return GetArray<TypeTraits<INT8>>(col_idx, data, validity);
365+
}
366+
367+
Status KuduScanBatch::RowPtr::GetArrayInt16(int col_idx,
368+
vector<int16_t>* data,
369+
vector<bool>* validity) const {
370+
return GetArray<TypeTraits<INT16>>(col_idx, data, validity);
371+
}
372+
373+
Status KuduScanBatch::RowPtr::GetArrayInt32(int col_idx,
374+
vector<int32_t>* data,
375+
vector<bool>* validity) const {
376+
return GetArray<TypeTraits<INT32>>(col_idx, data, validity);
377+
}
378+
379+
Status KuduScanBatch::RowPtr::GetArrayInt64(int col_idx,
380+
vector<int64_t>* data,
381+
vector<bool>* validity) const {
382+
return GetArray<TypeTraits<INT64>>(col_idx, data, validity);
383+
}
384+
Status KuduScanBatch::RowPtr::GetArrayUnscaledDecimal(int col_idx,
385+
vector<int32_t>* data,
386+
vector<bool>* validity) const {
387+
return GetArray<TypeTraits<DECIMAL32>>(col_idx, data, validity);
388+
}
389+
Status KuduScanBatch::RowPtr::GetArrayUnscaledDecimal(int col_idx,
390+
vector<int64_t>* data,
391+
vector<bool>* validity) const {
392+
return GetArray<TypeTraits<DECIMAL64>>(col_idx, data, validity);
393+
}
394+
Status KuduScanBatch::RowPtr::GetArrayUnixTimeMicros(int col_idx,
395+
vector<int64_t>* data,
396+
vector<bool>* validity) const {
397+
return GetArray<TypeTraits<UNIXTIME_MICROS>>(col_idx, data, validity);
398+
}
399+
Status KuduScanBatch::RowPtr::GetArrayDate(int col_idx,
400+
vector<int32_t>* data,
401+
vector<bool>* validity) const {
402+
return GetArray<TypeTraits<DATE>>(col_idx, data, validity);
403+
}
404+
Status KuduScanBatch::RowPtr::GetArrayFloat(int col_idx,
405+
vector<float>* data,
406+
vector<bool>* validity) const {
407+
return GetArray<TypeTraits<FLOAT>>(col_idx, data, validity);
408+
}
409+
Status KuduScanBatch::RowPtr::GetArrayDouble(int col_idx,
410+
vector<double>* data,
411+
vector<bool>* validity) const {
412+
return GetArray<TypeTraits<DOUBLE>>(col_idx, data, validity);
413+
}
414+
Status KuduScanBatch::RowPtr::GetArrayString(int col_idx,
415+
vector<Slice>* data,
416+
vector<bool>* validity) const {
417+
return GetArray<TypeTraits<STRING>>(col_idx, data, validity);
418+
}
419+
Status KuduScanBatch::RowPtr::GetArrayBinary(int col_idx,
420+
vector<Slice>* data,
421+
vector<bool>* validity) const {
422+
return GetArray<TypeTraits<BINARY>>(col_idx, data, validity);
423+
}
424+
Status KuduScanBatch::RowPtr::GetArrayVarchar(int col_idx,
425+
vector<Slice>* data,
426+
vector<bool>* validity) const {
427+
return GetArray<TypeTraits<VARCHAR>>(col_idx, data, validity);
428+
}
429+
258430
const void* KuduScanBatch::RowPtr::cell(int col_idx) const {
259431
return row_data_ + schema_->column_offset(col_idx);
260432
}

0 commit comments

Comments
 (0)