Skip to content

Commit

Permalink
Fix parquet clippy lints (#1254) (#2377)
Browse files Browse the repository at this point in the history
* Fix parquet clippy lints (#1254)

* Fix MapArrayReader
  • Loading branch information
tustvold committed Aug 8, 2022
1 parent 613b99d commit 80a6ef7
Show file tree
Hide file tree
Showing 43 changed files with 316 additions and 451 deletions.
6 changes: 1 addition & 5 deletions .github/workflows/parquet.yml
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,4 @@ jobs:
rustup component add clippy
- name: Run clippy
run: |
# Only run clippy for the library at this time,
# as there are clippy errors for other targets
cargo clippy -p parquet --all-features --lib -- -D warnings
# https://github.com/apache/arrow-rs/issues/1254
#cargo clippy -p parquet --all-targets --all-features -- -D warnings
cargo clippy -p parquet --all-targets --all-features -- -D warnings
5 changes: 1 addition & 4 deletions parquet/src/arrow/array_reader/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,21 +104,18 @@ fn build_list_reader(

let data_type = field.arrow_type.clone();
let item_reader = build_reader(&children[0], row_groups)?;
let item_type = item_reader.get_data_type().clone();

match is_large {
false => Ok(Box::new(ListArrayReader::<i32>::new(
item_reader,
data_type,
item_type,
field.def_level,
field.rep_level,
field.nullable,
)) as _),
true => Ok(Box::new(ListArrayReader::<i64>::new(
item_reader,
data_type,
item_type,
field.def_level,
field.rep_level,
field.nullable,
Expand Down Expand Up @@ -318,7 +315,7 @@ mod tests {
use super::*;
use crate::arrow::parquet_to_arrow_schema;
use crate::file::reader::{FileReader, SerializedFileReader};
use crate::util::test_common::get_test_file;
use crate::util::test_common::file_util::get_test_file;
use arrow::datatypes::Field;
use std::sync::Arc;

Expand Down
14 changes: 3 additions & 11 deletions parquet/src/arrow/array_reader/list_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ use std::sync::Arc;
pub struct ListArrayReader<OffsetSize: OffsetSizeTrait> {
item_reader: Box<dyn ArrayReader>,
data_type: ArrowType,
item_type: ArrowType,
/// The definition level at which this list is not null
def_level: i16,
/// The repetition level that corresponds to a new value in this array
Expand All @@ -49,15 +48,13 @@ impl<OffsetSize: OffsetSizeTrait> ListArrayReader<OffsetSize> {
pub fn new(
item_reader: Box<dyn ArrayReader>,
data_type: ArrowType,
item_type: ArrowType,
def_level: i16,
rep_level: i16,
nullable: bool,
) -> Self {
Self {
item_reader,
data_type,
item_type,
def_level,
rep_level,
nullable,
Expand Down Expand Up @@ -304,13 +301,13 @@ mod tests {
// ]

let l3_item_type = ArrowType::Int32;
let l3_type = list_type::<OffsetSize>(l3_item_type.clone(), true);
let l3_type = list_type::<OffsetSize>(l3_item_type, true);

let l2_item_type = l3_type.clone();
let l2_type = list_type::<OffsetSize>(l2_item_type.clone(), true);
let l2_type = list_type::<OffsetSize>(l2_item_type, true);

let l1_item_type = l2_type.clone();
let l1_type = list_type::<OffsetSize>(l1_item_type.clone(), false);
let l1_type = list_type::<OffsetSize>(l1_item_type, false);

let leaf = PrimitiveArray::<Int32Type>::from_iter(vec![
Some(1),
Expand Down Expand Up @@ -387,7 +384,6 @@ mod tests {
let l3 = ListArrayReader::<OffsetSize>::new(
Box::new(item_array_reader),
l3_type,
l3_item_type,
5,
3,
true,
Expand All @@ -396,7 +392,6 @@ mod tests {
let l2 = ListArrayReader::<OffsetSize>::new(
Box::new(l3),
l2_type,
l2_item_type,
3,
2,
false,
Expand All @@ -405,7 +400,6 @@ mod tests {
let mut l1 = ListArrayReader::<OffsetSize>::new(
Box::new(l2),
l1_type,
l1_item_type,
2,
1,
true,
Expand Down Expand Up @@ -456,7 +450,6 @@ mod tests {
let mut list_array_reader = ListArrayReader::<OffsetSize>::new(
Box::new(item_array_reader),
list_type::<OffsetSize>(ArrowType::Int32, true),
ArrowType::Int32,
1,
1,
false,
Expand Down Expand Up @@ -509,7 +502,6 @@ mod tests {
let mut list_array_reader = ListArrayReader::<OffsetSize>::new(
Box::new(item_array_reader),
list_type::<OffsetSize>(ArrowType::Int32, true),
ArrowType::Int32,
2,
1,
true,
Expand Down
2 changes: 2 additions & 0 deletions parquet/src/arrow/array_reader/map_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ pub struct MapArrayReader {
value_reader: Box<dyn ArrayReader>,
data_type: ArrowType,
map_def_level: i16,
#[allow(unused)]
map_rep_level: i16,
}

Expand All @@ -47,6 +48,7 @@ impl MapArrayReader {
key_reader,
value_reader,
data_type,
// These are the wrong way round https://github.com/apache/arrow-rs/issues/1699
map_def_level: rep_level,
map_rep_level: def_level,
}
Expand Down
4 changes: 1 addition & 3 deletions parquet/src/arrow/array_reader/null_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ where
pages: Box<dyn PageIterator>,
def_levels_buffer: Option<Buffer>,
rep_levels_buffer: Option<Buffer>,
column_desc: ColumnDescPtr,
record_reader: RecordReader<T>,
}

Expand All @@ -50,14 +49,13 @@ where
{
/// Construct null array reader.
pub fn new(pages: Box<dyn PageIterator>, column_desc: ColumnDescPtr) -> Result<Self> {
let record_reader = RecordReader::<T>::new(column_desc.clone());
let record_reader = RecordReader::<T>::new(column_desc);

Ok(Self {
data_type: ArrowType::Null,
pages,
def_levels_buffer: None,
rep_levels_buffer: None,
column_desc,
record_reader,
})
}
Expand Down
7 changes: 3 additions & 4 deletions parquet/src/arrow/array_reader/primitive_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ where
pages: Box<dyn PageIterator>,
def_levels_buffer: Option<Buffer>,
rep_levels_buffer: Option<Buffer>,
column_desc: ColumnDescPtr,
record_reader: RecordReader<T>,
}

Expand All @@ -67,14 +66,13 @@ where
.clone(),
};

let record_reader = RecordReader::<T>::new(column_desc.clone());
let record_reader = RecordReader::<T>::new(column_desc);

Ok(Self {
data_type,
pages,
def_levels_buffer: None,
rep_levels_buffer: None,
column_desc,
record_reader,
})
}
Expand Down Expand Up @@ -244,14 +242,15 @@ mod tests {
use crate::data_type::Int32Type;
use crate::schema::parser::parse_message_type;
use crate::schema::types::SchemaDescriptor;
use crate::util::test_common::make_pages;
use crate::util::test_common::rand_gen::make_pages;
use crate::util::InMemoryPageIterator;
use arrow::array::PrimitiveArray;
use arrow::datatypes::ArrowPrimitiveType;

use rand::distributions::uniform::SampleUniform;
use std::collections::VecDeque;

#[allow(clippy::too_many_arguments)]
fn make_column_chunks<T: DataType>(
column_desc: ColumnDescPtr,
encoding: Encoding,
Expand Down
1 change: 0 additions & 1 deletion parquet/src/arrow/array_reader/struct_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,6 @@ mod tests {
let list_reader = ListArrayReader::<i32>::new(
Box::new(reader),
expected_l.data_type().clone(),
ArrowType::Int32,
3,
1,
true,
Expand Down
3 changes: 1 addition & 2 deletions parquet/src/arrow/array_reader/test_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,7 @@ pub fn utf8_column() -> ColumnDescPtr {

/// Encode `data` with the provided `encoding`
pub fn encode_byte_array(encoding: Encoding, data: &[ByteArray]) -> ByteBufferPtr {
let descriptor = utf8_column();
let mut encoder = get_encoder::<ByteArrayType>(descriptor, encoding).unwrap();
let mut encoder = get_encoder::<ByteArrayType>(encoding).unwrap();

encoder.put(data).unwrap();
encoder.flush_buffer().unwrap()
Expand Down
7 changes: 5 additions & 2 deletions parquet/src/arrow/arrow_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ pub(crate) struct RowSelection {

impl RowSelection {
/// Select `row_count` rows
#[allow(unused)]
pub fn select(row_count: usize) -> Self {
Self {
row_count,
Expand All @@ -93,6 +94,7 @@ impl RowSelection {
}

/// Skip `row_count` rows
#[allow(unused)]
pub fn skip(row_count: usize) -> Self {
Self {
row_count,
Expand All @@ -109,7 +111,7 @@ pub struct ArrowReaderOptions {

impl ArrowReaderOptions {
/// Create a new [`ArrowReaderOptions`] with the default settings
fn new() -> Self {
pub fn new() -> Self {
Self::default()
}

Expand All @@ -129,6 +131,7 @@ impl ArrowReaderOptions {
/// Scan rows from the parquet file according to the provided `selection`
///
/// TODO: Make public once row selection fully implemented (#1792)
#[allow(unused)]
pub(crate) fn with_row_selection(
self,
selection: impl Into<Vec<RowSelection>>,
Expand Down Expand Up @@ -433,7 +436,7 @@ mod tests {
use crate::file::writer::SerializedFileWriter;
use crate::schema::parser::parse_message_type;
use crate::schema::types::{Type, TypePtr};
use crate::util::test_common::RandGen;
use crate::util::test_common::rand_gen::RandGen;

#[test]
fn test_arrow_reader_all_columns() {
Expand Down
50 changes: 19 additions & 31 deletions parquet/src/arrow/buffer/converter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,21 @@

use crate::data_type::{ByteArray, FixedLenByteArray, Int96};
use arrow::array::{
Array, ArrayRef, BasicDecimalArray, BinaryArray, BinaryBuilder, Decimal128Array,
FixedSizeBinaryArray, FixedSizeBinaryBuilder, IntervalDayTimeArray,
IntervalDayTimeBuilder, IntervalYearMonthArray, IntervalYearMonthBuilder,
LargeBinaryArray, LargeBinaryBuilder, LargeStringArray, LargeStringBuilder,
StringArray, StringBuilder, TimestampNanosecondArray,
Array, ArrayRef, BasicDecimalArray, Decimal128Array, FixedSizeBinaryArray,
FixedSizeBinaryBuilder, IntervalDayTimeArray, IntervalDayTimeBuilder,
IntervalYearMonthArray, IntervalYearMonthBuilder, TimestampNanosecondArray,
};
use std::convert::{From, TryInto};
use std::sync::Arc;

use crate::errors::Result;
use std::marker::PhantomData;

#[cfg(test)]
use arrow::array::{
BinaryArray, BinaryBuilder, LargeStringArray, LargeStringBuilder, StringArray,
StringBuilder,
};

/// A converter is used to consume record reader's content and convert it to arrow
/// primitive array.
pub trait Converter<S, T> {
Expand Down Expand Up @@ -185,8 +188,10 @@ impl Converter<Vec<Option<Int96>>, TimestampNanosecondArray> for Int96ArrayConve
}
}

#[cfg(test)]
pub struct Utf8ArrayConverter {}

#[cfg(test)]
impl Converter<Vec<Option<ByteArray>>, StringArray> for Utf8ArrayConverter {
fn convert(&self, source: Vec<Option<ByteArray>>) -> Result<StringArray> {
let data_size = source
Expand All @@ -206,8 +211,10 @@ impl Converter<Vec<Option<ByteArray>>, StringArray> for Utf8ArrayConverter {
}
}

#[cfg(test)]
pub struct LargeUtf8ArrayConverter {}

#[cfg(test)]
impl Converter<Vec<Option<ByteArray>>, LargeStringArray> for LargeUtf8ArrayConverter {
fn convert(&self, source: Vec<Option<ByteArray>>) -> Result<LargeStringArray> {
let data_size = source
Expand All @@ -227,8 +234,10 @@ impl Converter<Vec<Option<ByteArray>>, LargeStringArray> for LargeUtf8ArrayConve
}
}

#[cfg(test)]
pub struct BinaryArrayConverter {}

#[cfg(test)]
impl Converter<Vec<Option<ByteArray>>, BinaryArray> for BinaryArrayConverter {
fn convert(&self, source: Vec<Option<ByteArray>>) -> Result<BinaryArray> {
let mut builder = BinaryBuilder::new(source.len());
Expand All @@ -243,33 +252,9 @@ impl Converter<Vec<Option<ByteArray>>, BinaryArray> for BinaryArrayConverter {
}
}

pub struct LargeBinaryArrayConverter {}

impl Converter<Vec<Option<ByteArray>>, LargeBinaryArray> for LargeBinaryArrayConverter {
fn convert(&self, source: Vec<Option<ByteArray>>) -> Result<LargeBinaryArray> {
let mut builder = LargeBinaryBuilder::new(source.len());
for v in source {
match v {
Some(array) => builder.append_value(array.data()),
None => builder.append_null(),
}
}

Ok(builder.finish())
}
}

#[cfg(test)]
pub type Utf8Converter =
ArrayRefConverter<Vec<Option<ByteArray>>, StringArray, Utf8ArrayConverter>;
pub type LargeUtf8Converter =
ArrayRefConverter<Vec<Option<ByteArray>>, LargeStringArray, LargeUtf8ArrayConverter>;
pub type BinaryConverter =
ArrayRefConverter<Vec<Option<ByteArray>>, BinaryArray, BinaryArrayConverter>;
pub type LargeBinaryConverter = ArrayRefConverter<
Vec<Option<ByteArray>>,
LargeBinaryArray,
LargeBinaryArrayConverter,
>;

pub type Int96Converter =
ArrayRefConverter<Vec<Option<Int96>>, TimestampNanosecondArray, Int96ArrayConverter>;
Expand Down Expand Up @@ -299,11 +284,13 @@ pub type DecimalFixedLengthByteArrayConverter = ArrayRefConverter<
pub type DecimalByteArrayConvert =
ArrayRefConverter<Vec<Option<ByteArray>>, Decimal128Array, DecimalArrayConverter>;

#[cfg(test)]
pub struct FromConverter<S, T> {
_source: PhantomData<S>,
_dest: PhantomData<T>,
}

#[cfg(test)]
impl<S, T> FromConverter<S, T>
where
T: From<S>,
Expand All @@ -316,6 +303,7 @@ where
}
}

#[cfg(test)]
impl<S, T> Converter<S, T> for FromConverter<S, T>
where
T: From<S>,
Expand Down
1 change: 1 addition & 0 deletions parquet/src/arrow/buffer/dictionary_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ impl<K: ScalarValue, V: ScalarValue> Default for DictionaryBuffer<K, V> {
impl<K: ScalarValue + ArrowNativeType + Ord, V: ScalarValue + OffsetSizeTrait>
DictionaryBuffer<K, V>
{
#[allow(unused)]
pub fn len(&self) -> usize {
match self {
Self::Dict { keys, .. } => keys.len(),
Expand Down
Loading

0 comments on commit 80a6ef7

Please sign in to comment.