diff --git a/datafusion/datasource-parquet/src/source.rs b/datafusion/datasource-parquet/src/source.rs
index edc9c65450ec..450ccc5d0620 100644
--- a/datafusion/datasource-parquet/src/source.rs
+++ b/datafusion/datasource-parquet/src/source.rs
@@ -504,7 +504,7 @@ impl FileSource for ParquetSource {
         ) {
             (Some(expr_adapter_factory), Some(schema_adapter_factory)) => {
                 // Use both the schema adapter factory and the expr adapter factory.
-                // This results in the the SchemaAdapter being used for projections (e.g. a column was selected that is a UInt32 in the file and a UInt64 in the table schema)
+                // This results in the SchemaAdapter being used for projections (e.g. a column was selected that is a UInt32 in the file and a UInt64 in the table schema)
                 // but the PhysicalExprAdapterFactory being used for predicate pushdown and stats pruning.
                 (
                     Some(Arc::clone(expr_adapter_factory)),
diff --git a/datafusion/expr/src/expr.rs b/datafusion/expr/src/expr.rs
index 94dcd2a86150..13160d573ab4 100644
--- a/datafusion/expr/src/expr.rs
+++ b/datafusion/expr/src/expr.rs
@@ -100,7 +100,7 @@ impl From<sqlparser::ast::NullTreatment> for NullTreatment {
 ///
 /// # Printing Expressions
 ///
-/// You can print `Expr`s using the the `Debug` trait, `Display` trait, or
+/// You can print `Expr`s using the `Debug` trait, `Display` trait, or
 /// [`Self::human_display`]. See the [examples](#examples-displaying-exprs) below.
 ///
 /// If you need  SQL to pass to other systems, consider using [`Unparser`].
@@ -990,7 +990,7 @@ impl WindowFunctionDefinition {
         }
     }
 
-    /// Return the the inner window simplification function, if any
+    /// Return the inner window simplification function, if any
     ///
     /// See [`WindowFunctionSimplification`] for more information
     pub fn simplify(&self) -> Option<WindowFunctionSimplification> {
@@ -1077,7 +1077,7 @@ impl WindowFunction {
         }
     }
 
-    /// Return the the inner window simplification function, if any
+    /// Return the inner window simplification function, if any
     ///
     /// See [`WindowFunctionSimplification`] for more information
     pub fn simplify(&self) -> Option<WindowFunctionSimplification> {
diff --git a/datafusion/physical-plan/src/execution_plan.rs b/datafusion/physical-plan/src/execution_plan.rs
index 00fbdde53341..ffa9611d26e8 100644
--- a/datafusion/physical-plan/src/execution_plan.rs
+++ b/datafusion/physical-plan/src/execution_plan.rs
@@ -797,7 +797,7 @@ impl ExecutionPlanProperties for &dyn ExecutionPlan {
 /// For unbounded streams, it also tracks whether the operator requires finite memory
 /// to process the stream or if memory usage could grow unbounded.
 ///
-/// Boundedness of the output stream is based on the the boundedness of the input stream and the nature of
+/// Boundedness of the output stream is based on the boundedness of the input stream and the nature of
 /// the operator. For example, limit or topk with fetch operator can convert an unbounded stream to a bounded stream.
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum Boundedness {
diff --git a/datafusion/physical-plan/src/joins/cross_join.rs b/datafusion/physical-plan/src/joins/cross_join.rs
index 949c4e784bc3..fc32bb6fc94c 100644
--- a/datafusion/physical-plan/src/joins/cross_join.rs
+++ b/datafusion/physical-plan/src/joins/cross_join.rs
@@ -627,7 +627,7 @@ impl<T: BatchTransformer> CrossJoinStream<T> {
         Poll::Ready(Ok(StatefulStreamResult::Continue))
     }
 
-    /// Joins the the indexed row of left data with the current probe batch.
+    /// Joins the indexed row of left data with the current probe batch.
     /// If all the results are produced, the state is set to fetch new probe batch.
     fn build_batches(&mut self) -> Result<StatefulStreamResult<Option<RecordBatch>>> {
         let right_batch = self.state.try_as_record_batch()?;
diff --git a/datafusion/physical-plan/src/joins/sort_merge_join/stream.rs b/datafusion/physical-plan/src/joins/sort_merge_join/stream.rs
index 7639e4fc5514..1185866b9f46 100644
--- a/datafusion/physical-plan/src/joins/sort_merge_join/stream.rs
+++ b/datafusion/physical-plan/src/joins/sort_merge_join/stream.rs
@@ -1031,7 +1031,7 @@ impl SortMergeJoinStream {
         let mut join_streamed = false;
         // Whether to join buffered rows
         let mut join_buffered = false;
-        // For Mark join we store a dummy id to indicate the the row has a match
+        // For Mark join we store a dummy id to indicate the row has a match
         let mut mark_row_as_match = false;
 
         // determine whether we need to join streamed/buffered rows
@@ -1140,7 +1140,7 @@ impl SortMergeJoinStream {
             } else {
                 Some(self.buffered_data.scanning_batch_idx)
             };
-            // For Mark join we store a dummy id to indicate the the row has a match
+            // For Mark join we store a dummy id to indicate the row has a match
             let scanning_idx = mark_row_as_match.then_some(0);
 
             self.streamed_batch
diff --git a/datafusion/physical-plan/src/repartition/distributor_channels.rs b/datafusion/physical-plan/src/repartition/distributor_channels.rs
index 6e06c87a4821..34294d0f2326 100644
--- a/datafusion/physical-plan/src/repartition/distributor_channels.rs
+++ b/datafusion/physical-plan/src/repartition/distributor_channels.rs
@@ -151,7 +151,7 @@ impl<T> Clone for DistributionSender<T> {
 impl<T> Drop for DistributionSender<T> {
     fn drop(&mut self) {
         let n_senders_pre = self.channel.n_senders.fetch_sub(1, Ordering::SeqCst);
-        // is the the last copy of the sender side?
+        // is the last copy of the sender side?
         if n_senders_pre > 1 {
             return;
         }
diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt
index 5b2587bdc330..5c684eb83d1a 100644
--- a/datafusion/sqllogictest/test_files/select.slt
+++ b/datafusion/sqllogictest/test_files/select.slt
@@ -1912,7 +1912,7 @@ select * from t_with_user where user = 'foo';
 statement count 0
 create table t_with_current_time(a int, current_time text) as values (1,'now'), (2,null), (3,'later');
 
-# here it's clear the the column was meant
+# here it's clear the column was meant
 query B
 select t_with_current_time.current_time is not null from t_with_current_time;
 ----
diff --git a/docs/source/user-guide/sql/dml.md b/docs/source/user-guide/sql/dml.md
index c29447f23cd9..4934bc267437 100644
--- a/docs/source/user-guide/sql/dml.md
+++ b/docs/source/user-guide/sql/dml.md
@@ -88,7 +88,7 @@ of hive-style partitioned parquet files:
 +-------+
 ```
 
-If the the data contains values of `x` and `y` in column1 and only `a` in
+If the data contains values of `x` and `y` in column1 and only `a` in
 column2, output files will appear in the following directory structure:
 
 ```text
diff --git a/docs/source/user-guide/sql/subqueries.md b/docs/source/user-guide/sql/subqueries.md
index ee75a6a1575c..692d1c4020d7 100644
--- a/docs/source/user-guide/sql/subqueries.md
+++ b/docs/source/user-guide/sql/subqueries.md
@@ -183,7 +183,7 @@ FROM
 and return _true_ or _false_.
 Rows that evaluate to _false_ or NULL are filtered from results.
 The `WHERE` clause supports correlated and non-correlated subqueries
-as well as scalar and non-scalar subqueries (depending on the the operator used
+as well as scalar and non-scalar subqueries (depending on the operator used
 in the predicate expression).
 
 ```sql
@@ -293,7 +293,7 @@ returned by aggregate functions in the `SELECT` clause to the result of the
 subquery and return _true_ or _false_.
 Rows that evaluate to _false_ are filtered from results.
 The `HAVING` clause supports correlated and non-correlated subqueries
-as well as scalar and non-scalar subqueries (depending on the the operator used
+as well as scalar and non-scalar subqueries (depending on the operator used
 in the predicate expression).
 
 ```sql