Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ jobs:
with:
rust-version: stable

- name: Install jq
run: apt install -y jq

- name: Cache Cargo
uses: actions/cache@v3
with:
Expand All @@ -68,6 +71,12 @@ jobs:
- name: Check workspace without default features
run: cargo check --no-default-features -p datafusion

- name: Check tests in each package
run: |
for package in $(cargo metadata --no-deps --format-version 1 | jq -r '.packages[].name'); do
cargo check --tests --package "$package" || exit 1
done
Comment on lines +74 to +78
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure if we need --no-default-features to check tests 🤔

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this command verifies that cargo test --package "$package" will compile successfully, but it doesn't check if cargo test --no-default-features --package "$package" would run successfully

But I am not sure how common running cargo test --no-default-features --package "$package" is 🤔


- name: Check workspace in debug mode
run: cargo check

Expand Down
3 changes: 2 additions & 1 deletion datafusion/common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ path = "src/lib.rs"
[features]
avro = ["apache-avro"]
backtrace = []
pyarrow = ["pyo3", "arrow/pyarrow", "parquet"]
default = ["parquet"]
pyarrow = ["pyo3", "arrow/pyarrow"]

[dependencies]
ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] }
Expand Down
4 changes: 0 additions & 4 deletions datafusion/common/src/file_options/file_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ pub enum FileType {
/// Apache Avro file
AVRO,
/// Apache Parquet file
#[cfg(feature = "parquet")]
PARQUET,
/// CSV file
CSV,
Expand All @@ -61,7 +60,6 @@ impl GetExt for FileType {
match self {
FileType::ARROW => DEFAULT_ARROW_EXTENSION.to_owned(),
FileType::AVRO => DEFAULT_AVRO_EXTENSION.to_owned(),
#[cfg(feature = "parquet")]
FileType::PARQUET => DEFAULT_PARQUET_EXTENSION.to_owned(),
FileType::CSV => DEFAULT_CSV_EXTENSION.to_owned(),
FileType::JSON => DEFAULT_JSON_EXTENSION.to_owned(),
Expand All @@ -74,7 +72,6 @@ impl Display for FileType {
let out = match self {
FileType::CSV => "csv",
FileType::JSON => "json",
#[cfg(feature = "parquet")]
FileType::PARQUET => "parquet",
FileType::AVRO => "avro",
FileType::ARROW => "arrow",
Expand All @@ -91,7 +88,6 @@ impl FromStr for FileType {
match s.as_str() {
"ARROW" => Ok(FileType::ARROW),
"AVRO" => Ok(FileType::AVRO),
#[cfg(feature = "parquet")]
"PARQUET" => Ok(FileType::PARQUET),
"CSV" => Ok(FileType::CSV),
"JSON" | "NDJSON" => Ok(FileType::JSON),
Expand Down
11 changes: 2 additions & 9 deletions datafusion/common/src/file_options/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ pub mod avro_writer;
pub mod csv_writer;
pub mod file_type;
pub mod json_writer;
#[cfg(feature = "parquet")]
pub mod parquet_writer;
pub(crate) mod parse_utils;

Expand All @@ -38,14 +37,13 @@ use crate::{
DataFusionError, FileType, Result,
};

#[cfg(feature = "parquet")]
use self::parquet_writer::ParquetWriterOptions;

use self::{
arrow_writer::ArrowWriterOptions, avro_writer::AvroWriterOptions,
csv_writer::CsvWriterOptions, json_writer::JsonWriterOptions,
};

use self::parquet_writer::ParquetWriterOptions;

/// Represents a single arbitrary setting in a
/// [StatementOptions] where OptionTuple.0 determines
/// the specific setting to be modified and OptionTuple.1
Expand Down Expand Up @@ -148,7 +146,6 @@ impl StatementOptions {
/// plus any DataFusion specific writing options (e.g. CSV compression)
#[derive(Clone, Debug)]
pub enum FileTypeWriterOptions {
#[cfg(feature = "parquet")]
Parquet(ParquetWriterOptions),
CSV(CsvWriterOptions),
JSON(JsonWriterOptions),
Expand All @@ -168,7 +165,6 @@ impl FileTypeWriterOptions {
let options = (config_defaults, statement_options);

let file_type_write_options = match file_type {
#[cfg(feature = "parquet")]
FileType::PARQUET => {
FileTypeWriterOptions::Parquet(ParquetWriterOptions::try_from(options)?)
}
Expand Down Expand Up @@ -198,7 +194,6 @@ impl FileTypeWriterOptions {
let options = (config_defaults, &empty_statement);

let file_type_write_options = match file_type {
#[cfg(feature = "parquet")]
FileType::PARQUET => {
FileTypeWriterOptions::Parquet(ParquetWriterOptions::try_from(options)?)
}
Expand All @@ -221,7 +216,6 @@ impl FileTypeWriterOptions {

/// Tries to extract ParquetWriterOptions from this FileTypeWriterOptions enum.
/// Returns an error if a different type from parquet is set.
#[cfg(feature = "parquet")]
pub fn try_into_parquet(&self) -> Result<&ParquetWriterOptions> {
match self {
FileTypeWriterOptions::Parquet(opt) => Ok(opt),
Expand Down Expand Up @@ -288,7 +282,6 @@ impl Display for FileTypeWriterOptions {
FileTypeWriterOptions::Avro(_) => "AvroWriterOptions",
FileTypeWriterOptions::CSV(_) => "CsvWriterOptions",
FileTypeWriterOptions::JSON(_) => "JsonWriterOptions",
#[cfg(feature = "parquet")]
FileTypeWriterOptions::Parquet(_) => "ParquetWriterOptions",
};
write!(f, "{}", name)
Expand Down
28 changes: 28 additions & 0 deletions datafusion/common/src/file_options/parquet_writer/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#[cfg(feature = "parquet")]
mod parquet;

#[cfg(feature = "parquet")]
pub use self::parquet::*;

#[cfg(not(feature = "parquet"))]
mod parquet_stub;

#[cfg(not(feature = "parquet"))]
pub use parquet_stub::*;
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@ use parquet::file::properties::{WriterProperties, WriterPropertiesBuilder};

use crate::{config::ConfigOptions, DataFusionError, Result};

use super::StatementOptions;

use crate::file_options::StatementOptions;
use parquet::{
basic::{BrotliLevel, GzipLevel, ZstdLevel},
file::properties::{EnabledStatistics, WriterVersion},
Expand Down
36 changes: 36 additions & 0 deletions datafusion/common/src/file_options/parquet_writer/parquet_stub.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

use crate::config::ConfigOptions;
use crate::error::_not_impl_err;
use crate::file_options::StatementOptions;
use crate::{DataFusionError, Result};

/// Stub implementation of `ParquetWriterOptions` that always returns a
/// NotYetImplemented error used when parquet feature is not activated.
#[derive(Clone, Debug)]
pub struct ParquetWriterOptions {}

impl TryFrom<(&ConfigOptions, &StatementOptions)> for ParquetWriterOptions {
type Error = DataFusionError;

fn try_from(_: (&ConfigOptions, &StatementOptions)) -> Result<Self> {
_not_impl_err!(
"Parquet support is not enabled. Hint enable the `parquet` feature flag"
)
}
}
1 change: 0 additions & 1 deletion datafusion/common/src/test_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,6 @@ pub fn arrow_test_data() -> String {
/// let filename = format!("{}/binary.parquet", testdata);
/// assert!(std::path::PathBuf::from(filename).exists());
/// ```
#[cfg(feature = "parquet")]
pub fn parquet_test_data() -> String {
match get_data_dir("PARQUET_TEST_DATA", "../../parquet-testing/data") {
Ok(pb) => pb.display().to_string(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,6 @@ impl FileTypeExt for FileType {
"FileCompressionType can be specified for CSV/JSON FileType.".into(),
)),
},
#[cfg(feature = "parquet")]
FileType::PARQUET => match c.variant {
UNCOMPRESSED => Ok(ext),
_ => Err(DataFusionError::Internal(
Expand Down Expand Up @@ -285,7 +284,6 @@ mod tests {

let mut ty_ext_tuple = vec![];
ty_ext_tuple.push((FileType::AVRO, ".avro"));
#[cfg(feature = "parquet")]
ty_ext_tuple.push((FileType::PARQUET, ".parquet"));

// Cannot specify compression for these file types
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/src/datasource/file_format/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ pub mod csv;
pub mod file_compression_type;
pub mod json;
pub mod options;
#[cfg(feature = "parquet")]
pub mod parquet;

pub mod write;

use std::any::Any;
Expand Down
1 change: 0 additions & 1 deletion datafusion/core/src/datasource/file_format/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ use datafusion_common::{plan_err, DataFusionError};

use crate::datasource::file_format::arrow::ArrowFormat;
use crate::datasource::file_format::file_compression_type::FileCompressionType;
#[cfg(feature = "parquet")]
use crate::datasource::file_format::parquet::ParquetFormat;
use crate::datasource::file_format::DEFAULT_SCHEMA_INFER_MAX_RECORD;
use crate::datasource::listing::{ListingTableInsertMode, ListingTableUrl};
Expand Down
30 changes: 30 additions & 0 deletions datafusion/core/src/datasource/file_format/parquet/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

//! FileFormat for parquet

/// If parquet is enabled, use actual implementation
#[cfg(feature = "parquet")]
mod parquet_impl;
#[cfg(feature = "parquet")]
pub use parquet_impl::*;

/// If parquet is not enabled, use dummy implementation
#[cfg(not(feature = "parquet"))]
mod parquet_stub;
#[cfg(not(feature = "parquet"))]
pub use parquet_stub::ParquetFormat;
Original file line number Diff line number Diff line change
Expand Up @@ -54,17 +54,20 @@ use parquet::file::metadata::ParquetMetaData;
use parquet::file::properties::WriterProperties;
use parquet::file::statistics::Statistics as ParquetStatistics;

use super::write::demux::start_demuxer_task;
use super::write::{create_writer, AbortableWrite, FileWriterMode};
use super::{FileFormat, FileScanConfig};
use crate::arrow::array::{
BooleanArray, Float32Array, Float64Array, Int32Array, Int64Array,
};
use crate::arrow::datatypes::DataType;
use crate::config::ConfigOptions;
use crate::datasource::file_format::write::demux::start_demuxer_task;
use crate::datasource::file_format::write::{
create_writer, AbortableWrite, FileWriterMode,
};
use crate::datasource::file_format::FileFormat;

use crate::datasource::physical_plan::{
FileGroupDisplay, FileMeta, FileSinkConfig, ParquetExec, SchemaAdapter,
FileGroupDisplay, FileMeta, FileScanConfig, FileSinkConfig, ParquetExec,
SchemaAdapter,
};
use crate::error::Result;
use crate::execution::context::SessionState;
Expand Down Expand Up @@ -1204,14 +1207,14 @@ pub(crate) mod test_util {

#[cfg(test)]
mod tests {
use super::super::test_util::scan_format;
use crate::physical_plan::collect;
use std::fmt::{Display, Formatter};
use std::sync::atomic::{AtomicUsize, Ordering};

use super::*;

use crate::datasource::file_format::parquet::test_util::store_parquet;
use crate::datasource::file_format::test_util::scan_format;
use crate::physical_plan::metrics::MetricValue;
use crate::prelude::{SessionConfig, SessionContext};
use arrow::array::{Array, ArrayRef, StringArray};
Expand Down
Loading