Skip to content

Commit

Permalink
remove byteorder dependency from parquet (#2486)
Browse files Browse the repository at this point in the history
  • Loading branch information
psvri committed Aug 17, 2022
1 parent 3557428 commit 9f77e4e
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 28 deletions.
1 change: 0 additions & 1 deletion parquet/Cargo.toml
Expand Up @@ -33,7 +33,6 @@ rust-version = "1.62"
ahash = "0.8"
parquet-format = { version = "4.0.0", default-features = false }
bytes = { version = "1.1", default-features = false, features = ["std"] }
byteorder = { version = "1", default-features = false }
thrift = { version = "0.13", default-features = false }
snap = { version = "1.0", default-features = false, optional = true }
brotli = { version = "3.3", default-features = false, features = ["std"], optional = true }
Expand Down
17 changes: 7 additions & 10 deletions parquet/src/data_type.rs
Expand Up @@ -23,8 +23,6 @@ use std::mem;
use std::ops::{Deref, DerefMut};
use std::str::from_utf8;

use byteorder::{BigEndian, ByteOrder};

use crate::basic::Type;
use crate::column::reader::{ColumnReader, ColumnReaderImpl};
use crate::column::writer::{ColumnWriter, ColumnWriterImpl};
Expand Down Expand Up @@ -337,8 +335,7 @@ pub enum Decimal {
impl Decimal {
/// Creates new decimal value from `i32`.
pub fn from_i32(value: i32, precision: i32, scale: i32) -> Self {
let mut bytes = [0; 4];
BigEndian::write_i32(&mut bytes, value);
let bytes = value.to_be_bytes();
Decimal::Int32 {
value: bytes,
precision,
Expand All @@ -348,8 +345,7 @@ impl Decimal {

/// Creates new decimal value from `i64`.
pub fn from_i64(value: i64, precision: i32, scale: i32) -> Self {
let mut bytes = [0; 8];
BigEndian::write_i64(&mut bytes, value);
let bytes = value.to_be_bytes();
Decimal::Int64 {
value: bytes,
precision,
Expand Down Expand Up @@ -557,7 +553,6 @@ pub(crate) mod private {
use crate::util::memory::ByteBufferPtr;

use crate::basic::Type;
use byteorder::ByteOrder;
use std::convert::TryInto;

use super::{ParquetError, Result, SliceAsBytes};
Expand Down Expand Up @@ -839,9 +834,11 @@ pub(crate) mod private {

let mut pos = 0; // position in byte array
for item in buffer.iter_mut().take(num_values) {
let elem0 = byteorder::LittleEndian::read_u32(&bytes[pos..pos + 4]);
let elem1 = byteorder::LittleEndian::read_u32(&bytes[pos + 4..pos + 8]);
let elem2 = byteorder::LittleEndian::read_u32(&bytes[pos + 8..pos + 12]);
let elem0 = u32::from_le_bytes(bytes[pos..pos + 4].try_into().unwrap());
let elem1 =
u32::from_le_bytes(bytes[pos + 4..pos + 8].try_into().unwrap());
let elem2 =
u32::from_le_bytes(bytes[pos + 8..pos + 12].try_into().unwrap());

item.set_data(elem0, elem1, elem2);
pos += 12;
Expand Down
3 changes: 1 addition & 2 deletions parquet/src/file/footer.rs
Expand Up @@ -17,7 +17,6 @@

use std::{io::Read, sync::Arc};

use byteorder::{ByteOrder, LittleEndian};
use parquet_format::{ColumnOrder as TColumnOrder, FileMetaData as TFileMetaData};
use thrift::protocol::TCompactInputProtocol;

Expand Down Expand Up @@ -101,7 +100,7 @@ pub fn decode_footer(slice: &[u8; FOOTER_SIZE]) -> Result<usize> {
}

// get the metadata length from the footer
let metadata_len = LittleEndian::read_i32(&slice[..4]);
let metadata_len = i32::from_le_bytes(slice[..4].try_into().unwrap());
metadata_len.try_into().map_err(|_| {
general_err!(
"Invalid Parquet file. Metadata length is less than zero ({})",
Expand Down
17 changes: 8 additions & 9 deletions parquet/src/file/statistics.rs
Expand Up @@ -39,7 +39,6 @@

use std::fmt;

use byteorder::{ByteOrder, LittleEndian};
use parquet_format::Statistics as TStatistics;

use crate::basic::Type;
Expand Down Expand Up @@ -163,15 +162,15 @@ pub fn from_thrift(
old_format,
),
Type::INT32 => Statistics::int32(
min.map(|data| LittleEndian::read_i32(&data)),
max.map(|data| LittleEndian::read_i32(&data)),
min.map(|data| i32::from_le_bytes(data[..4].try_into().unwrap())),
max.map(|data| i32::from_le_bytes(data[..4].try_into().unwrap())),
distinct_count,
null_count,
old_format,
),
Type::INT64 => Statistics::int64(
min.map(|data| LittleEndian::read_i64(&data)),
max.map(|data| LittleEndian::read_i64(&data)),
min.map(|data| i64::from_le_bytes(data[..8].try_into().unwrap())),
max.map(|data| i64::from_le_bytes(data[..8].try_into().unwrap())),
distinct_count,
null_count,
old_format,
Expand All @@ -191,15 +190,15 @@ pub fn from_thrift(
Statistics::int96(min, max, distinct_count, null_count, old_format)
}
Type::FLOAT => Statistics::float(
min.map(|data| LittleEndian::read_f32(&data)),
max.map(|data| LittleEndian::read_f32(&data)),
min.map(|data| f32::from_le_bytes(data[..4].try_into().unwrap())),
max.map(|data| f32::from_le_bytes(data[..4].try_into().unwrap())),
distinct_count,
null_count,
old_format,
),
Type::DOUBLE => Statistics::double(
min.map(|data| LittleEndian::read_f64(&data)),
max.map(|data| LittleEndian::read_f64(&data)),
min.map(|data| f64::from_le_bytes(data[..8].try_into().unwrap())),
max.map(|data| f64::from_le_bytes(data[..8].try_into().unwrap())),
distinct_count,
null_count,
old_format,
Expand Down
10 changes: 4 additions & 6 deletions parquet/src/file/writer.rs
Expand Up @@ -20,7 +20,6 @@

use std::{io::Write, sync::Arc};

use byteorder::{ByteOrder, LittleEndian};
use parquet_format as parquet;
use parquet_format::{ColumnIndex, OffsetIndex, RowGroup};
use thrift::protocol::{TCompactOutputProtocol, TOutputProtocol};
Expand All @@ -35,7 +34,7 @@ use crate::data_type::DataType;
use crate::errors::{ParquetError, Result};
use crate::file::{
metadata::*, properties::WriterPropertiesPtr,
statistics::to_thrift as statistics_to_thrift, FOOTER_SIZE, PARQUET_MAGIC,
statistics::to_thrift as statistics_to_thrift, PARQUET_MAGIC,
};
use crate::schema::types::{
self, ColumnDescPtr, SchemaDescPtr, SchemaDescriptor, TypePtr,
Expand Down Expand Up @@ -292,11 +291,10 @@ impl<W: Write> SerializedFileWriter<W> {
let end_pos = self.buf.bytes_written();

// Write footer
let mut footer_buffer: [u8; FOOTER_SIZE] = [0; FOOTER_SIZE];
let metadata_len = (end_pos - start_pos) as i32;
LittleEndian::write_i32(&mut footer_buffer, metadata_len);
(&mut footer_buffer[4..]).write_all(&PARQUET_MAGIC)?;
self.buf.write_all(&footer_buffer)?;

self.buf.write_all(&metadata_len.to_le_bytes())?;
self.buf.write_all(&PARQUET_MAGIC)?;
Ok(file_metadata)
}

Expand Down

0 comments on commit 9f77e4e

Please sign in to comment.