diff --git a/datafusion/datasource-parquet/src/source.rs b/datafusion/datasource-parquet/src/source.rs index edc9c65450ec..450ccc5d0620 100644 --- a/datafusion/datasource-parquet/src/source.rs +++ b/datafusion/datasource-parquet/src/source.rs @@ -504,7 +504,7 @@ impl FileSource for ParquetSource { ) { (Some(expr_adapter_factory), Some(schema_adapter_factory)) => { // Use both the schema adapter factory and the expr adapter factory. - // This results in the the SchemaAdapter being used for projections (e.g. a column was selected that is a UInt32 in the file and a UInt64 in the table schema) + // This results in the SchemaAdapter being used for projections (e.g. a column was selected that is a UInt32 in the file and a UInt64 in the table schema) // but the PhysicalExprAdapterFactory being used for predicate pushdown and stats pruning. ( Some(Arc::clone(expr_adapter_factory)), diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs index 94dcd2a86150..13160d573ab4 100644 --- a/datafusion/expr/src/expr.rs +++ b/datafusion/expr/src/expr.rs @@ -100,7 +100,7 @@ impl From for NullTreatment { /// /// # Printing Expressions /// -/// You can print `Expr`s using the the `Debug` trait, `Display` trait, or +/// You can print `Expr`s using the `Debug` trait, `Display` trait, or /// [`Self::human_display`]. See the [examples](#examples-displaying-exprs) below. /// /// If you need SQL to pass to other systems, consider using [`Unparser`]. @@ -990,7 +990,7 @@ impl WindowFunctionDefinition { } } - /// Return the the inner window simplification function, if any + /// Return the inner window simplification function, if any /// /// See [`WindowFunctionSimplification`] for more information pub fn simplify(&self) -> Option { @@ -1077,7 +1077,7 @@ impl WindowFunction { } } - /// Return the the inner window simplification function, if any + /// Return the inner window simplification function, if any /// /// See [`WindowFunctionSimplification`] for more information pub fn simplify(&self) -> Option { diff --git a/datafusion/physical-plan/src/execution_plan.rs b/datafusion/physical-plan/src/execution_plan.rs index 00fbdde53341..ffa9611d26e8 100644 --- a/datafusion/physical-plan/src/execution_plan.rs +++ b/datafusion/physical-plan/src/execution_plan.rs @@ -797,7 +797,7 @@ impl ExecutionPlanProperties for &dyn ExecutionPlan { /// For unbounded streams, it also tracks whether the operator requires finite memory /// to process the stream or if memory usage could grow unbounded. /// -/// Boundedness of the output stream is based on the the boundedness of the input stream and the nature of +/// Boundedness of the output stream is based on the boundedness of the input stream and the nature of /// the operator. For example, limit or topk with fetch operator can convert an unbounded stream to a bounded stream. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Boundedness { diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs index 949c4e784bc3..fc32bb6fc94c 100644 --- a/datafusion/physical-plan/src/joins/cross_join.rs +++ b/datafusion/physical-plan/src/joins/cross_join.rs @@ -627,7 +627,7 @@ impl CrossJoinStream { Poll::Ready(Ok(StatefulStreamResult::Continue)) } - /// Joins the the indexed row of left data with the current probe batch. + /// Joins the indexed row of left data with the current probe batch. /// If all the results are produced, the state is set to fetch new probe batch. fn build_batches(&mut self) -> Result>> { let right_batch = self.state.try_as_record_batch()?; diff --git a/datafusion/physical-plan/src/joins/sort_merge_join/stream.rs b/datafusion/physical-plan/src/joins/sort_merge_join/stream.rs index 7639e4fc5514..1185866b9f46 100644 --- a/datafusion/physical-plan/src/joins/sort_merge_join/stream.rs +++ b/datafusion/physical-plan/src/joins/sort_merge_join/stream.rs @@ -1031,7 +1031,7 @@ impl SortMergeJoinStream { let mut join_streamed = false; // Whether to join buffered rows let mut join_buffered = false; - // For Mark join we store a dummy id to indicate the the row has a match + // For Mark join we store a dummy id to indicate the row has a match let mut mark_row_as_match = false; // determine whether we need to join streamed/buffered rows @@ -1140,7 +1140,7 @@ impl SortMergeJoinStream { } else { Some(self.buffered_data.scanning_batch_idx) }; - // For Mark join we store a dummy id to indicate the the row has a match + // For Mark join we store a dummy id to indicate the row has a match let scanning_idx = mark_row_as_match.then_some(0); self.streamed_batch diff --git a/datafusion/physical-plan/src/repartition/distributor_channels.rs b/datafusion/physical-plan/src/repartition/distributor_channels.rs index 6e06c87a4821..34294d0f2326 100644 --- a/datafusion/physical-plan/src/repartition/distributor_channels.rs +++ b/datafusion/physical-plan/src/repartition/distributor_channels.rs @@ -151,7 +151,7 @@ impl Clone for DistributionSender { impl Drop for DistributionSender { fn drop(&mut self) { let n_senders_pre = self.channel.n_senders.fetch_sub(1, Ordering::SeqCst); - // is the the last copy of the sender side? + // is the last copy of the sender side? if n_senders_pre > 1 { return; } diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt index 5b2587bdc330..5c684eb83d1a 100644 --- a/datafusion/sqllogictest/test_files/select.slt +++ b/datafusion/sqllogictest/test_files/select.slt @@ -1912,7 +1912,7 @@ select * from t_with_user where user = 'foo'; statement count 0 create table t_with_current_time(a int, current_time text) as values (1,'now'), (2,null), (3,'later'); -# here it's clear the the column was meant +# here it's clear the column was meant query B select t_with_current_time.current_time is not null from t_with_current_time; ---- diff --git a/docs/source/user-guide/sql/dml.md b/docs/source/user-guide/sql/dml.md index c29447f23cd9..4934bc267437 100644 --- a/docs/source/user-guide/sql/dml.md +++ b/docs/source/user-guide/sql/dml.md @@ -88,7 +88,7 @@ of hive-style partitioned parquet files: +-------+ ``` -If the the data contains values of `x` and `y` in column1 and only `a` in +If the data contains values of `x` and `y` in column1 and only `a` in column2, output files will appear in the following directory structure: ```text diff --git a/docs/source/user-guide/sql/subqueries.md b/docs/source/user-guide/sql/subqueries.md index ee75a6a1575c..692d1c4020d7 100644 --- a/docs/source/user-guide/sql/subqueries.md +++ b/docs/source/user-guide/sql/subqueries.md @@ -183,7 +183,7 @@ FROM and return _true_ or _false_. Rows that evaluate to _false_ or NULL are filtered from results. The `WHERE` clause supports correlated and non-correlated subqueries -as well as scalar and non-scalar subqueries (depending on the the operator used +as well as scalar and non-scalar subqueries (depending on the operator used in the predicate expression). ```sql @@ -293,7 +293,7 @@ returned by aggregate functions in the `SELECT` clause to the result of the subquery and return _true_ or _false_. Rows that evaluate to _false_ are filtered from results. The `HAVING` clause supports correlated and non-correlated subqueries -as well as scalar and non-scalar subqueries (depending on the the operator used +as well as scalar and non-scalar subqueries (depending on the operator used in the predicate expression). ```sql