|
17 | 17 |
|
18 | 18 | #include "kudu/client/scan_batch.h"
|
19 | 19 |
|
| 20 | +#include <algorithm> |
| 21 | +#include <iterator> |
20 | 22 | #include <cstring>
|
21 | 23 | #include <string>
|
| 24 | +#include <vector> |
22 | 25 |
|
23 | 26 | #include <glog/logging.h>
|
24 | 27 |
|
25 | 28 | #include "kudu/client/row_result.h"
|
26 | 29 | #include "kudu/client/scanner-internal.h"
|
| 30 | +#include "kudu/common/array_cell_view.h" |
27 | 31 | #include "kudu/common/common.pb.h"
|
28 | 32 | #include "kudu/common/schema.h"
|
29 | 33 | #include "kudu/common/types.h"
|
|
33 | 37 | #include "kudu/util/logging.h"
|
34 | 38 |
|
35 | 39 | using std::string;
|
| 40 | +using std::vector; |
36 | 41 | using strings::Substitute;
|
37 | 42 |
|
38 | 43 | namespace kudu {
|
@@ -255,6 +260,173 @@ Status KuduScanBatch::RowPtr::Get(int col_idx, typename T::cpp_type* val) const
|
255 | 260 | return Status::OK();
|
256 | 261 | }
|
257 | 262 |
|
| 263 | +namespace { |
| 264 | + |
| 265 | +Status ArrayValidation(const ColumnSchema& col, |
| 266 | + const char* type_name) { |
| 267 | + if (PREDICT_FALSE(col.type_info()->type() != NESTED)) { |
| 268 | + return BadTypeStatus(type_name, col); |
| 269 | + } |
| 270 | + const auto* descriptor = col.type_info()->nested_type_info(); |
| 271 | + if (PREDICT_FALSE(!descriptor)) { |
| 272 | + return Status::InvalidArgument(Substitute( |
| 273 | + "column '$0': missing type descriptor for NESTED type", col.name())); |
| 274 | + } |
| 275 | + if (PREDICT_FALSE(!descriptor->is_array())) { |
| 276 | + return Status::InvalidArgument(Substitute( |
| 277 | + "column '$0': underlying NESTED type isn't an array", col.name())); |
| 278 | + } |
| 279 | + return Status::OK(); |
| 280 | +} |
| 281 | + |
| 282 | +} // anonymous namespace |
| 283 | + |
| 284 | +template<typename T> |
| 285 | +Status KuduScanBatch::RowPtr::GetArray(const Slice& col_name, |
| 286 | + vector<typename T::cpp_type>* data_out, |
| 287 | + vector<bool>* validity_out) const { |
| 288 | + int col_idx; |
| 289 | + RETURN_NOT_OK(schema_->FindColumn(col_name, &col_idx)); |
| 290 | + return GetArray<T>(col_idx, data_out, validity_out); |
| 291 | +} |
| 292 | + |
| 293 | +template<typename T> |
| 294 | +Status KuduScanBatch::RowPtr::GetArray(int col_idx, |
| 295 | + vector<typename T::cpp_type>* data_out, |
| 296 | + vector<bool>* validity_out) const { |
| 297 | + const ColumnSchema& col = schema_->column(col_idx); |
| 298 | + RETURN_NOT_OK(ArrayValidation(col, T::name())); |
| 299 | + if (PREDICT_FALSE(col.is_nullable() && IsNull(col_idx))) { |
| 300 | + return Status::NotFound("column is NULL"); |
| 301 | + } |
| 302 | + const Slice* cell_data = reinterpret_cast<const Slice*>( |
| 303 | + row_data_ + schema_->column_offset(col_idx)); |
| 304 | + ArrayCellMetadataView view(cell_data->data(), cell_data->size()); |
| 305 | + RETURN_NOT_OK(view.Init()); |
| 306 | + |
| 307 | + if (data_out) { |
| 308 | + data_out->resize(view.elem_num()); |
| 309 | + if (!view.empty()) { |
| 310 | + const uint8_t* data_raw = view.data_as(T::type); |
| 311 | + DCHECK(data_raw); |
| 312 | + memcpy(data_out->data(), data_raw, view.elem_num() * sizeof(typename T::cpp_type)); |
| 313 | + } |
| 314 | + } |
| 315 | + if (validity_out) { |
| 316 | + validity_out->resize(view.elem_num()); |
| 317 | + if (!view.empty()) { |
| 318 | + *validity_out = BitmapToVector(view.not_null_bitmap(), view.elem_num()); |
| 319 | + } |
| 320 | + } |
| 321 | + return Status::OK(); |
| 322 | +} |
| 323 | + |
| 324 | +// Since std::vector<bool> isn't a standard container, the data() accessor |
| 325 | +// isn't available and copying the data requires an alternative approach. |
| 326 | +template<> |
| 327 | +Status KuduScanBatch::RowPtr::GetArray<TypeTraits<BOOL>>( |
| 328 | + int col_idx, |
| 329 | + vector<bool>* data_out, |
| 330 | + vector<bool>* validity) const { |
| 331 | + const ColumnSchema& col = schema_->column(col_idx); |
| 332 | + RETURN_NOT_OK(ArrayValidation(col, TypeTraits<BOOL>::name())); |
| 333 | + if (PREDICT_FALSE(col.is_nullable() && IsNull(col_idx))) { |
| 334 | + return Status::NotFound("column is NULL"); |
| 335 | + } |
| 336 | + const Slice* cell_data = reinterpret_cast<const Slice*>( |
| 337 | + row_data_ + schema_->column_offset(col_idx)); |
| 338 | + ArrayCellMetadataView view(cell_data->data(), cell_data->size()); |
| 339 | + RETURN_NOT_OK(view.Init()); |
| 340 | + |
| 341 | + if (data_out) { |
| 342 | + const size_t elem_num = view.elem_num(); |
| 343 | + data_out->clear(); |
| 344 | + data_out->reserve(elem_num); |
| 345 | + const uint8_t* data_raw = view.data_as(BOOL); |
| 346 | + DCHECK(data_raw); |
| 347 | + std::copy(data_raw, data_raw + elem_num, std::back_inserter(*data_out)); |
| 348 | + } |
| 349 | + if (validity) { |
| 350 | + *validity = BitmapToVector(view.not_null_bitmap(), view.elem_num()); |
| 351 | + } |
| 352 | + return Status::OK(); |
| 353 | +} |
| 354 | + |
| 355 | +Status KuduScanBatch::RowPtr::GetArrayBool(int col_idx, |
| 356 | + vector<bool>* data, |
| 357 | + vector<bool>* validity) const { |
| 358 | + return GetArray<TypeTraits<BOOL>>(col_idx, data, validity); |
| 359 | +} |
| 360 | + |
| 361 | +Status KuduScanBatch::RowPtr::GetArrayInt8(int col_idx, |
| 362 | + vector<int8_t>* data, |
| 363 | + vector<bool>* validity) const { |
| 364 | + return GetArray<TypeTraits<INT8>>(col_idx, data, validity); |
| 365 | +} |
| 366 | + |
| 367 | +Status KuduScanBatch::RowPtr::GetArrayInt16(int col_idx, |
| 368 | + vector<int16_t>* data, |
| 369 | + vector<bool>* validity) const { |
| 370 | + return GetArray<TypeTraits<INT16>>(col_idx, data, validity); |
| 371 | +} |
| 372 | + |
| 373 | +Status KuduScanBatch::RowPtr::GetArrayInt32(int col_idx, |
| 374 | + vector<int32_t>* data, |
| 375 | + vector<bool>* validity) const { |
| 376 | + return GetArray<TypeTraits<INT32>>(col_idx, data, validity); |
| 377 | +} |
| 378 | + |
| 379 | +Status KuduScanBatch::RowPtr::GetArrayInt64(int col_idx, |
| 380 | + vector<int64_t>* data, |
| 381 | + vector<bool>* validity) const { |
| 382 | + return GetArray<TypeTraits<INT64>>(col_idx, data, validity); |
| 383 | +} |
| 384 | +Status KuduScanBatch::RowPtr::GetArrayUnscaledDecimal(int col_idx, |
| 385 | + vector<int32_t>* data, |
| 386 | + vector<bool>* validity) const { |
| 387 | + return GetArray<TypeTraits<DECIMAL32>>(col_idx, data, validity); |
| 388 | +} |
| 389 | +Status KuduScanBatch::RowPtr::GetArrayUnscaledDecimal(int col_idx, |
| 390 | + vector<int64_t>* data, |
| 391 | + vector<bool>* validity) const { |
| 392 | + return GetArray<TypeTraits<DECIMAL64>>(col_idx, data, validity); |
| 393 | +} |
| 394 | +Status KuduScanBatch::RowPtr::GetArrayUnixTimeMicros(int col_idx, |
| 395 | + vector<int64_t>* data, |
| 396 | + vector<bool>* validity) const { |
| 397 | + return GetArray<TypeTraits<UNIXTIME_MICROS>>(col_idx, data, validity); |
| 398 | +} |
| 399 | +Status KuduScanBatch::RowPtr::GetArrayDate(int col_idx, |
| 400 | + vector<int32_t>* data, |
| 401 | + vector<bool>* validity) const { |
| 402 | + return GetArray<TypeTraits<DATE>>(col_idx, data, validity); |
| 403 | +} |
| 404 | +Status KuduScanBatch::RowPtr::GetArrayFloat(int col_idx, |
| 405 | + vector<float>* data, |
| 406 | + vector<bool>* validity) const { |
| 407 | + return GetArray<TypeTraits<FLOAT>>(col_idx, data, validity); |
| 408 | +} |
| 409 | +Status KuduScanBatch::RowPtr::GetArrayDouble(int col_idx, |
| 410 | + vector<double>* data, |
| 411 | + vector<bool>* validity) const { |
| 412 | + return GetArray<TypeTraits<DOUBLE>>(col_idx, data, validity); |
| 413 | +} |
| 414 | +Status KuduScanBatch::RowPtr::GetArrayString(int col_idx, |
| 415 | + vector<Slice>* data, |
| 416 | + vector<bool>* validity) const { |
| 417 | + return GetArray<TypeTraits<STRING>>(col_idx, data, validity); |
| 418 | +} |
| 419 | +Status KuduScanBatch::RowPtr::GetArrayBinary(int col_idx, |
| 420 | + vector<Slice>* data, |
| 421 | + vector<bool>* validity) const { |
| 422 | + return GetArray<TypeTraits<BINARY>>(col_idx, data, validity); |
| 423 | +} |
| 424 | +Status KuduScanBatch::RowPtr::GetArrayVarchar(int col_idx, |
| 425 | + vector<Slice>* data, |
| 426 | + vector<bool>* validity) const { |
| 427 | + return GetArray<TypeTraits<VARCHAR>>(col_idx, data, validity); |
| 428 | +} |
| 429 | + |
258 | 430 | const void* KuduScanBatch::RowPtr::cell(int col_idx) const {
|
259 | 431 | return row_data_ + schema_->column_offset(col_idx);
|
260 | 432 | }
|
|
0 commit comments