Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ARROW-7684: [Rust] Example Flight client and server for DataFusion
This PR adds DataFusion examples for a Flight client and server where the client can send a SQL query to the server and then receive the results. I have manually tested with a Java client as well to confirm that it works. Closes #6308 from andygrove/datafusion-flight-example and squashes the following commits: 788feef <Andy Grove> code cleanup 9c47338 <Neville Dipale> Complete flight client's record batch reader 1337b98 <Andy Grove> parse recordbatch 459bef3 <Andy Grove> client parses schema from ipc batches 31c894b <Andy Grove> update release test script efe05ae <Andy Grove> update release test script 5ecea83 <Andy Grove> formatting 8b419da <Andy Grove> update release test script 03d2c84 <Andy Grove> client streams results 0a39a51 <Andy Grove> client can stream batches e72c605 <Andy Grove> add starting point for flight-client example ab28da8 <Andy Grove> get schema from query plan instead of from first batch 0901a3f <Neville Dipale> Merge branch 'datafusion-flight-example' of https://github.com/andygrove/arrow into datafusion-flight-example ad2e3b0 <Neville Dipale> send schema before batches 996f2a4 <Andy Grove> Use PARQUET_TEST_DATA env var 260f9ca <Neville Dipale> fix license violation 516b66d <Neville Dipale> add helpers to convert record batch to flight data proto message 6beb4ea <Andy Grove> WIP example Flight server for DataFusion Lead-authored-by: Andy Grove <andygrove73@gmail.com> Co-authored-by: Neville Dipale <nevilledips@gmail.com> Signed-off-by: Andy Grove <andygrove73@gmail.com>
- Loading branch information
Showing
11 changed files
with
394 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one | ||
// or more contributor license agreements. See the NOTICE file | ||
// distributed with this work for additional information | ||
// regarding copyright ownership. The ASF licenses this file | ||
// to you under the Apache License, Version 2.0 (the | ||
// "License"); you may not use this file except in compliance | ||
// with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, | ||
// software distributed under the License is distributed on an | ||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
// KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
//! Utilities to assist with reading and writing Arrow data as Flight messages | ||
|
||
use std::convert::TryFrom; | ||
use std::sync::Arc; | ||
|
||
use flight::FlightData; | ||
|
||
use crate::datatypes::Schema; | ||
use crate::error::{ArrowError, Result}; | ||
use crate::ipc::{convert, reader, writer}; | ||
use crate::record_batch::RecordBatch; | ||
|
||
/// Convert a `RecordBatch` to `FlightData` by getting the header and body as bytes | ||
impl From<&RecordBatch> for FlightData { | ||
fn from(batch: &RecordBatch) -> Self { | ||
let (header, body) = writer::record_batch_to_bytes(batch); | ||
Self { | ||
flight_descriptor: None, | ||
app_metadata: vec![], | ||
data_header: header, | ||
data_body: body, | ||
} | ||
} | ||
} | ||
|
||
/// Convert a `Schema` to `FlightData` by converting to an IPC message | ||
impl From<&Schema> for FlightData { | ||
fn from(schema: &Schema) -> Self { | ||
let schema = writer::schema_to_bytes(schema); | ||
Self { | ||
flight_descriptor: None, | ||
app_metadata: vec![], | ||
data_header: schema, | ||
data_body: vec![], | ||
} | ||
} | ||
} | ||
|
||
/// Try convert `FlightData` into an Arrow Schema | ||
/// | ||
/// Returns an error if the `FlightData` header is not a valid IPC schema | ||
impl TryFrom<&FlightData> for Schema { | ||
type Error = ArrowError; | ||
fn try_from(data: &FlightData) -> Result<Self> { | ||
convert::schema_from_bytes(&data.data_header[..]).ok_or(ArrowError::ParseError( | ||
"Unable to convert flight data to Arrow schema".to_string(), | ||
)) | ||
} | ||
} | ||
|
||
/// Convert a FlightData message to a RecordBatch | ||
pub fn flight_data_to_batch( | ||
data: &FlightData, | ||
schema: Arc<Schema>, | ||
) -> Result<Option<RecordBatch>> { | ||
// check that the data_header is a record batch message | ||
let message = crate::ipc::get_root_as_message(&data.data_header[..]); | ||
let batch_header = message | ||
.header_as_record_batch() | ||
.ok_or(ArrowError::ParseError( | ||
"Unable to convert flight data header to a record batch".to_string(), | ||
))?; | ||
reader::read_record_batch(&data.data_body, batch_header, schema) | ||
} | ||
|
||
// TODO: add more explicit conversion that expoess flight descriptor and metadata options |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
<!--- | ||
Licensed to the Apache Software Foundation (ASF) under one | ||
or more contributor license agreements. See the NOTICE file | ||
distributed with this work for additional information | ||
regarding copyright ownership. The ASF licenses this file | ||
to you under the Apache License, Version 2.0 (the | ||
"License"); you may not use this file except in compliance | ||
with the License. You may obtain a copy of the License at | ||
http://www.apache.org/licenses/LICENSE-2.0 | ||
Unless required by applicable law or agreed to in writing, | ||
software distributed under the License is distributed on an | ||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
KIND, either express or implied. See the License for the | ||
specific language governing permissions and limitations | ||
under the License. | ||
--> | ||
|
||
# DataFusion Examples | ||
|
||
## Single Process | ||
|
||
The examples `csv_sql.rs` and `parquet_sql.rs` demonstrate building a query plan from a SQL statement and then executing the query plan against local CSV and Parquet files, respectively. | ||
|
||
## Distributed | ||
|
||
The `flight-client.rs` and `flight-server.rs` examples demonstrate how to run DataFusion as a standalone process and execute SQL queries from a client using the Flight protocol. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one | ||
// or more contributor license agreements. See the NOTICE file | ||
// distributed with this work for additional information | ||
// regarding copyright ownership. The ASF licenses this file | ||
// to you under the Apache License, Version 2.0 (the | ||
// "License"); you may not use this file except in compliance | ||
// with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, | ||
// software distributed under the License is distributed on an | ||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
// KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
use std::convert::TryFrom; | ||
use std::sync::Arc; | ||
|
||
use arrow::array::Int32Array; | ||
use arrow::datatypes::Schema; | ||
use arrow::flight::flight_data_to_batch; | ||
use flight::flight_service_client::FlightServiceClient; | ||
use flight::Ticket; | ||
|
||
#[tokio::main] | ||
async fn main() -> Result<(), Box<dyn std::error::Error>> { | ||
let mut client = FlightServiceClient::connect("http://localhost:50051").await?; | ||
|
||
let request = tonic::Request::new(Ticket { | ||
ticket: "SELECT id FROM alltypes_plain".into(), | ||
}); | ||
|
||
let mut stream = client.do_get(request).await?.into_inner(); | ||
|
||
// the schema should be the first message returned, else client should error | ||
let flight_data = stream.message().await?.unwrap(); | ||
// convert FlightData to a stream | ||
let schema = Arc::new(Schema::try_from(&flight_data)?); | ||
println!("Schema: {:?}", schema); | ||
|
||
// all the remaining stream messages should be dictionary and record batches | ||
while let Some(flight_data) = stream.message().await? { | ||
// the unwrap is infallible and thus safe | ||
let record_batch = flight_data_to_batch(&flight_data, schema.clone())?.unwrap(); | ||
|
||
println!( | ||
"record_batch has {} columns and {} rows", | ||
record_batch.num_columns(), | ||
record_batch.num_rows() | ||
); | ||
let column = record_batch.column(0); | ||
let column = column | ||
.as_any() | ||
.downcast_ref::<Int32Array>() | ||
.expect("Unable to get column"); | ||
println!("Column 1: {:?}", column); | ||
} | ||
|
||
Ok(()) | ||
} |
Oops, something went wrong.