Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion datafusion/common/src/dfschema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

use std::collections::{HashMap, HashSet};
use std::convert::TryFrom;
use std::hash::Hash;
use std::sync::Arc;

use crate::error::{DataFusionError, Result, SchemaError};
Expand Down Expand Up @@ -496,6 +497,15 @@ impl From<DFSchema> for SchemaRef {
}
}

// Hashing refers to a subset of fields considered in PartialEq.
#[allow(clippy::derive_hash_xor_eq)]
impl Hash for DFSchema {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.fields.hash(state);
self.metadata.len().hash(state); // HashMap is not hashable
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree it is ok to just use the metadata's length to hash as it satisfies the EQ constraint

https://doc.rust-lang.org/std/hash/trait.Hash.html#hash-and-eq

}
}

/// Convenience trait to convert Schema like things to DFSchema and DFSchemaRef with fewer keystrokes
pub trait ToDFSchema
where
Expand Down Expand Up @@ -587,7 +597,7 @@ impl ExprSchema for DFSchema {
}

/// DFField wraps an Arrow field and adds an optional qualifier
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct DFField {
/// Optional qualifier (usually a table or relation name)
qualifier: Option<String>,
Expand Down
2 changes: 1 addition & 1 deletion datafusion/common/src/parsers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ const SECONDS_PER_HOUR: f64 = 3_600_f64;
const NANOS_PER_SECOND: f64 = 1_000_000_000_f64;

/// Readable file compression type
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum CompressionTypeVariant {
/// Gzip-ed file
GZIP,
Expand Down
2 changes: 1 addition & 1 deletion datafusion/common/src/table_reference.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ pub enum TableReference<'a> {

/// Represents a path to a table that may require further resolution
/// that owns the underlying names
#[derive(Debug, Clone)]
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum OwnedTableReference {
/// An unqualified table reference, e.g. "table"
Bare {
Expand Down
14 changes: 14 additions & 0 deletions datafusion/core/src/physical_plan/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2373,6 +2373,7 @@ Internal error: Optimizer rule 'type_coercion' failed due to unexpected error: E
}
}
/// An example extension node that doesn't do anything
#[derive(PartialEq, Eq, Hash)]
struct NoOpExtensionNode {
schema: DFSchemaRef,
}
Expand Down Expand Up @@ -2425,6 +2426,19 @@ Internal error: Optimizer rule 'type_coercion' failed due to unexpected error: E
) -> Arc<dyn UserDefinedLogicalNode> {
unimplemented!("NoOp");
}

fn dyn_eq(&self, other: &dyn UserDefinedLogicalNode) -> bool {
match other.as_any().downcast_ref::<Self>() {
Some(o) => self == o,
None => false,
}
}

fn dyn_hash(&self, state: &mut dyn std::hash::Hasher) {
use std::hash::Hash;
let mut s = state;
self.hash(&mut s);
}
}

#[derive(Debug)]
Expand Down
14 changes: 14 additions & 0 deletions datafusion/core/tests/user_defined_plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,7 @@ impl OptimizerRule for TopKOptimizerRule {
}
}

#[derive(PartialEq, Eq, Hash)]
struct TopKPlanNode {
k: usize,
input: LogicalPlan,
Expand Down Expand Up @@ -376,6 +377,19 @@ impl UserDefinedLogicalNode for TopKPlanNode {
expr: exprs[0].clone(),
})
}

fn dyn_eq(&self, other: &dyn UserDefinedLogicalNode) -> bool {
match other.as_any().downcast_ref::<Self>() {
Some(o) => self == o,
None => false,
}
}

fn dyn_hash(&self, state: &mut dyn std::hash::Hasher) {
use std::hash::Hash;
let mut s = state;
self.hash(&mut s);
}
}

/// Physical planner for TopK nodes
Expand Down
27 changes: 25 additions & 2 deletions datafusion/expr/src/logical_plan/extension.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,15 @@
//! This module defines the interface for logical nodes
use crate::{Expr, LogicalPlan};
use datafusion_common::DFSchemaRef;
use std::{any::Any, collections::HashSet, fmt, sync::Arc};
use std::hash::{Hash, Hasher};
use std::{any::Any, cmp::Eq, collections::HashSet, fmt, sync::Arc};

/// This defines the interface for `LogicalPlan` nodes that can be
/// used to extend DataFusion with custom relational operators.
///
/// See the example in
/// [user_defined_plan.rs](../../tests/user_defined_plan.rs) for an
/// example of how to use this extension API
/// example of how to use this extension API.
pub trait UserDefinedLogicalNode: fmt::Debug + Send + Sync {
/// Return a reference to self as Any, to support dynamic downcasting
fn as_any(&self) -> &dyn Any;
Expand Down Expand Up @@ -77,4 +78,26 @@ pub trait UserDefinedLogicalNode: fmt::Debug + Send + Sync {
exprs: &[Expr],
inputs: &[LogicalPlan],
) -> Arc<dyn UserDefinedLogicalNode>;

/// Hashing respecting requirements from [std::hash::Hash].
fn dyn_hash(&self, state: &mut dyn Hasher);

/// Comparison respecting requirements from [std::cmp::Eq].
///
/// When `other` has an another type than `self`, then the values are *not* equal.
fn dyn_eq(&self, other: &dyn UserDefinedLogicalNode) -> bool;
}

impl Hash for dyn UserDefinedLogicalNode {
fn hash<H: Hasher>(&self, state: &mut H) {
self.dyn_hash(state);
}
}

impl std::cmp::PartialEq for dyn UserDefinedLogicalNode {
fn eq(&self, other: &Self) -> bool {
self.dyn_eq(other)
}
}

impl Eq for dyn UserDefinedLogicalNode {}
Loading