MINOR: Fix typos in Python comments and docstrings

Closes #12295 from kianmeng/fix-typos-in-python Authored-by: Kian-Meng Ang <kianmeng@cpan.org> Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
apache · Jan 31, 2022 · ad073b7 · ad073b7
1 parent ab8462c
commit ad073b7
Show file tree

Hide file tree

Showing 15 changed files with 20 additions and 20 deletions.
diff --git a/python/pyarrow/_dataset.pxd b/python/pyarrow/_dataset.pxd
@@ -69,7 +69,7 @@ cdef class FileFormat(_Weakrefable):
     cdef _set_default_fragment_scan_options(self, FragmentScanOptions options)
 
     # Return a WrittenFile after a file was written.
-    # May be overriden by subclasses, e.g. to add metadata.
+    # May be overridden by subclasses, e.g. to add metadata.
     cdef WrittenFile _finish_write(self, path, base_dir,
                                    CFileWriter* file_writer)
 

diff --git a/python/pyarrow/_dataset_parquet.pyx b/python/pyarrow/_dataset_parquet.pyx
@@ -444,7 +444,7 @@ cdef class ParquetReadOptions(_Weakrefable):
     coerce_int96_timestamp_unit : str, default None.
         Cast timestamps that are stored in INT96 format to a particular
         resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
-        and therefore INT96 timestamps will be infered as timestamps
+        and therefore INT96 timestamps will be inferred as timestamps
         in nanoseconds.
     """
 

diff --git a/python/pyarrow/compute.py b/python/pyarrow/compute.py
@@ -293,7 +293,7 @@ def _make_global_functions():
     """
     Make global functions wrapping each compute function.
 
-    Note that some of the automatically-generated wrappers may be overriden
+    Note that some of the automatically-generated wrappers may be overridden
     by custom versions below.
     """
     g = globals()

diff --git a/python/pyarrow/fs.py b/python/pyarrow/fs.py
@@ -96,7 +96,7 @@ def _ensure_filesystem(
         if use_mmap:
             raise ValueError(
                 "Specifying to use memory mapping not supported for "
-                "filesytem specified as an URI string"
+                "filesystem specified as an URI string"
             )
         return _filesystem_from_str(filesystem)
 

diff --git a/python/pyarrow/gandiva.pyx b/python/pyarrow/gandiva.pyx
@@ -488,7 +488,7 @@ cpdef make_projector(Schema schema, children, MemoryPool pool,
 
 cpdef make_filter(Schema schema, Condition condition):
     """
-    Contruct a filter based on a condition.
+    Construct a filter based on a condition.
 
     A filter is built for a specific schema and condition. Once the filter is
     built, it can be used to evaluate many row batches.

diff --git a/python/pyarrow/parquet.py b/python/pyarrow/parquet.py
@@ -219,7 +219,7 @@ class ParquetFile:
     coerce_int96_timestamp_unit : str, default None.
         Cast timestamps that are stored in INT96 format to a particular
         resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
-        and therefore INT96 timestamps will be infered as timestamps
+        and therefore INT96 timestamps will be inferred as timestamps
         in nanoseconds.
     """
 
@@ -1343,7 +1343,7 @@ class ParquetDataset:
 coerce_int96_timestamp_unit : str, default None.
     Cast timestamps that are stored in INT96 format to a particular resolution
     (e.g. 'ms'). Setting to None is equivalent to 'ns' and therefore INT96
-    timestamps will be infered as timestamps in nanoseconds.
+    timestamps will be inferred as timestamps in nanoseconds.
 """.format(_read_docstring_common, _DNF_filter_doc)
 
     def __new__(cls, path_or_paths=None, filesystem=None, schema=None,
@@ -1933,7 +1933,7 @@ def partitioning(self):
 coerce_int96_timestamp_unit : str, default None.
     Cast timestamps that are stored in INT96 format to a particular
     resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
-    and therefore INT96 timestamps will be infered as timestamps
+    and therefore INT96 timestamps will be inferred as timestamps
     in nanoseconds.
 
 Returns

diff --git a/python/pyarrow/tests/parquet/test_datetime.py b/python/pyarrow/tests/parquet/test_datetime.py
@@ -303,7 +303,7 @@ def get_table(pq_reader_method, filename, **kwargs):
         elif pq_reader_method == "read_table":
             return pq.read_table(filename, **kwargs)
 
-    # Recreating the initial JIRA issue referrenced in ARROW-12096
+    # Recreating the initial JIRA issue referenced in ARROW-12096
     oob_dts = [
         datetime.datetime(1000, 1, 1),
         datetime.datetime(2000, 1, 1),

diff --git a/python/pyarrow/tests/parquet/test_metadata.py b/python/pyarrow/tests/parquet/test_metadata.py
@@ -501,7 +501,7 @@ def test_parquet_metadata_empty_to_dict(tempdir):
 @pytest.mark.slow
 @pytest.mark.large_memory
 def test_metadata_exceeds_message_size():
-    # ARROW-13655: Thrift may enable a defaut message size that limits
+    # ARROW-13655: Thrift may enable a default message size that limits
     # the size of Parquet metadata that can be written.
     NCOLS = 1000
     NREPEATS = 4000

diff --git a/python/pyarrow/tests/test_dataset.py b/python/pyarrow/tests/test_dataset.py
@@ -3130,7 +3130,7 @@ def test_parquet_dataset_lazy_filtering(tempdir, open_logging_fs):
     with assert_opens([]):
         fragments[0].split_by_row_group(ds.field("f1") > 15)
 
-    # ensuring metadata of splitted fragment should also not open any file
+    # ensuring metadata of split fragment should also not open any file
     with assert_opens([]):
         rg_fragments = fragments[0].split_by_row_group()
         rg_fragments[0].ensure_complete_metadata()
@@ -4196,7 +4196,7 @@ def test_write_dataset_s3(s3_example_simple):
         table, "mybucket/dataset", filesystem=fs, format="feather",
         partitioning=part
     )
-    # check rountrip
+    # check roundtrip
     result = ds.dataset(
         "mybucket/dataset", filesystem=fs, format="ipc", partitioning="hive"
     ).to_table()
@@ -4205,7 +4205,7 @@ def test_write_dataset_s3(s3_example_simple):
     # writing with URI
     uri = uri_template.format("mybucket/dataset2")
     ds.write_dataset(table, uri, format="feather", partitioning=part)
-    # check rountrip
+    # check roundtrip
     result = ds.dataset(
         "mybucket/dataset2", filesystem=fs, format="ipc", partitioning="hive"
     ).to_table()
@@ -4216,7 +4216,7 @@ def test_write_dataset_s3(s3_example_simple):
     ds.write_dataset(
         table, "dataset3", filesystem=uri, format="feather", partitioning=part
     )
-    # check rountrip
+    # check roundtrip
     result = ds.dataset(
         "mybucket/dataset3", filesystem=fs, format="ipc", partitioning="hive"
     ).to_table()

diff --git a/python/pyarrow/tests/test_flight.py b/python/pyarrow/tests/test_flight.py
@@ -639,7 +639,7 @@ def start_call(self, info, headers):
 
 
 class HeaderAuthServerMiddleware(ServerMiddleware):
-    """A ServerMiddleware that transports incoming username and passowrd."""
+    """A ServerMiddleware that transports incoming username and password."""
 
     def __init__(self, token):
         self.token = token

diff --git a/python/pyarrow/tests/test_gdb.py b/python/pyarrow/tests/test_gdb.py
@@ -228,7 +228,7 @@ def check_heap_repr(gdb, expr, expected):
     Check printing a heap-located value, given its address.
     """
     s = gdb.print_value(f"*{expr}")
-    # GDB may prefix the value with an adress or type specification
+    # GDB may prefix the value with an address or type specification
     if s != expected:
         assert s.endswith(f" {expected}")
 

diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py
@@ -701,7 +701,7 @@ def test_compression_level(compression):
     # The ability to set a seed this way is not present on older versions of
     # numpy (currently in our python 3.6 CI build).  Some inputs might just
     # happen to compress the same between the two levels so using seeded
-    # random numbers is neccesary to help get more reliable results
+    # random numbers is necessary to help get more reliable results
     #
     # The goal of this part is to ensure the compression_level is being
     # passed down to the C++ layer, not to verify the compression algs

diff --git a/python/pyarrow/tests/test_json.py b/python/pyarrow/tests/test_json.py
@@ -209,7 +209,7 @@ def test_empty_rows(self):
         assert table.num_rows == 2
 
     def test_reconcile_accross_blocks(self):
-        # ARROW-12065: reconciling inferred types accross blocks
+        # ARROW-12065: reconciling inferred types across blocks
         first_row = b'{                               }\n'
         read_options = ReadOptions(block_size=len(first_row))
         for next_rows, expected_pylist in [

diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py
@@ -3390,7 +3390,7 @@ def test_array_uses_memory_pool():
     arr = pa.array(np.arange(N, dtype=np.int64),
                    mask=np.random.randint(0, 2, size=N).astype(np.bool_))
 
-    # In the case the gc is caught loafing
+    # In the case the gc is caught loading
     gc.collect()
 
     prior_allocation = pa.total_allocated_bytes()

diff --git a/python/pyarrow/util.py b/python/pyarrow/util.py
@@ -165,7 +165,7 @@ def _break_traceback_cycle_from_frame(frame):
         # somewhere along the chain of execution frames).
         frame.clear()
         # To visit the inner frame, we need to find it among the
-        # referers of this frame (while `frame.f_back` would let
+        # referrers of this frame (while `frame.f_back` would let
         # us visit the outer frame).
         refs = gc.get_referrers(frame)
     refs = frame = this_frame = None