Skip to content

Commit

Permalink
MINOR: Fix typos in Python comments and docstrings
Browse files Browse the repository at this point in the history
Closes #12295 from kianmeng/fix-typos-in-python

Authored-by: Kian-Meng Ang <kianmeng@cpan.org>
Signed-off-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
  • Loading branch information
kianmeng authored and jorisvandenbossche committed Jan 31, 2022
1 parent ab8462c commit ad073b7
Show file tree
Hide file tree
Showing 15 changed files with 20 additions and 20 deletions.
2 changes: 1 addition & 1 deletion python/pyarrow/_dataset.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ cdef class FileFormat(_Weakrefable):
cdef _set_default_fragment_scan_options(self, FragmentScanOptions options)

# Return a WrittenFile after a file was written.
# May be overriden by subclasses, e.g. to add metadata.
# May be overridden by subclasses, e.g. to add metadata.
cdef WrittenFile _finish_write(self, path, base_dir,
CFileWriter* file_writer)

Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/_dataset_parquet.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ cdef class ParquetReadOptions(_Weakrefable):
coerce_int96_timestamp_unit : str, default None.
Cast timestamps that are stored in INT96 format to a particular
resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
and therefore INT96 timestamps will be infered as timestamps
and therefore INT96 timestamps will be inferred as timestamps
in nanoseconds.
"""

Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@ def _make_global_functions():
"""
Make global functions wrapping each compute function.
Note that some of the automatically-generated wrappers may be overriden
Note that some of the automatically-generated wrappers may be overridden
by custom versions below.
"""
g = globals()
Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def _ensure_filesystem(
if use_mmap:
raise ValueError(
"Specifying to use memory mapping not supported for "
"filesytem specified as an URI string"
"filesystem specified as an URI string"
)
return _filesystem_from_str(filesystem)

Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/gandiva.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -488,7 +488,7 @@ cpdef make_projector(Schema schema, children, MemoryPool pool,

cpdef make_filter(Schema schema, Condition condition):
"""
Contruct a filter based on a condition.
Construct a filter based on a condition.
A filter is built for a specific schema and condition. Once the filter is
built, it can be used to evaluate many row batches.
Expand Down
6 changes: 3 additions & 3 deletions python/pyarrow/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,7 @@ class ParquetFile:
coerce_int96_timestamp_unit : str, default None.
Cast timestamps that are stored in INT96 format to a particular
resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
and therefore INT96 timestamps will be infered as timestamps
and therefore INT96 timestamps will be inferred as timestamps
in nanoseconds.
"""

Expand Down Expand Up @@ -1343,7 +1343,7 @@ class ParquetDataset:
coerce_int96_timestamp_unit : str, default None.
Cast timestamps that are stored in INT96 format to a particular resolution
(e.g. 'ms'). Setting to None is equivalent to 'ns' and therefore INT96
timestamps will be infered as timestamps in nanoseconds.
timestamps will be inferred as timestamps in nanoseconds.
""".format(_read_docstring_common, _DNF_filter_doc)

def __new__(cls, path_or_paths=None, filesystem=None, schema=None,
Expand Down Expand Up @@ -1933,7 +1933,7 @@ def partitioning(self):
coerce_int96_timestamp_unit : str, default None.
Cast timestamps that are stored in INT96 format to a particular
resolution (e.g. 'ms'). Setting to None is equivalent to 'ns'
and therefore INT96 timestamps will be infered as timestamps
and therefore INT96 timestamps will be inferred as timestamps
in nanoseconds.
Returns
Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/tests/parquet/test_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ def get_table(pq_reader_method, filename, **kwargs):
elif pq_reader_method == "read_table":
return pq.read_table(filename, **kwargs)

# Recreating the initial JIRA issue referrenced in ARROW-12096
# Recreating the initial JIRA issue referenced in ARROW-12096
oob_dts = [
datetime.datetime(1000, 1, 1),
datetime.datetime(2000, 1, 1),
Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/tests/parquet/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,7 +501,7 @@ def test_parquet_metadata_empty_to_dict(tempdir):
@pytest.mark.slow
@pytest.mark.large_memory
def test_metadata_exceeds_message_size():
# ARROW-13655: Thrift may enable a defaut message size that limits
# ARROW-13655: Thrift may enable a default message size that limits
# the size of Parquet metadata that can be written.
NCOLS = 1000
NREPEATS = 4000
Expand Down
8 changes: 4 additions & 4 deletions python/pyarrow/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3130,7 +3130,7 @@ def test_parquet_dataset_lazy_filtering(tempdir, open_logging_fs):
with assert_opens([]):
fragments[0].split_by_row_group(ds.field("f1") > 15)

# ensuring metadata of splitted fragment should also not open any file
# ensuring metadata of split fragment should also not open any file
with assert_opens([]):
rg_fragments = fragments[0].split_by_row_group()
rg_fragments[0].ensure_complete_metadata()
Expand Down Expand Up @@ -4196,7 +4196,7 @@ def test_write_dataset_s3(s3_example_simple):
table, "mybucket/dataset", filesystem=fs, format="feather",
partitioning=part
)
# check rountrip
# check roundtrip
result = ds.dataset(
"mybucket/dataset", filesystem=fs, format="ipc", partitioning="hive"
).to_table()
Expand All @@ -4205,7 +4205,7 @@ def test_write_dataset_s3(s3_example_simple):
# writing with URI
uri = uri_template.format("mybucket/dataset2")
ds.write_dataset(table, uri, format="feather", partitioning=part)
# check rountrip
# check roundtrip
result = ds.dataset(
"mybucket/dataset2", filesystem=fs, format="ipc", partitioning="hive"
).to_table()
Expand All @@ -4216,7 +4216,7 @@ def test_write_dataset_s3(s3_example_simple):
ds.write_dataset(
table, "dataset3", filesystem=uri, format="feather", partitioning=part
)
# check rountrip
# check roundtrip
result = ds.dataset(
"mybucket/dataset3", filesystem=fs, format="ipc", partitioning="hive"
).to_table()
Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/tests/test_flight.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,7 @@ def start_call(self, info, headers):


class HeaderAuthServerMiddleware(ServerMiddleware):
"""A ServerMiddleware that transports incoming username and passowrd."""
"""A ServerMiddleware that transports incoming username and password."""

def __init__(self, token):
self.token = token
Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/tests/test_gdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def check_heap_repr(gdb, expr, expected):
Check printing a heap-located value, given its address.
"""
s = gdb.print_value(f"*{expr}")
# GDB may prefix the value with an adress or type specification
# GDB may prefix the value with an address or type specification
if s != expected:
assert s.endswith(f" {expected}")

Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -701,7 +701,7 @@ def test_compression_level(compression):
# The ability to set a seed this way is not present on older versions of
# numpy (currently in our python 3.6 CI build). Some inputs might just
# happen to compress the same between the two levels so using seeded
# random numbers is neccesary to help get more reliable results
# random numbers is necessary to help get more reliable results
#
# The goal of this part is to ensure the compression_level is being
# passed down to the C++ layer, not to verify the compression algs
Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/tests/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def test_empty_rows(self):
assert table.num_rows == 2

def test_reconcile_accross_blocks(self):
# ARROW-12065: reconciling inferred types accross blocks
# ARROW-12065: reconciling inferred types across blocks
first_row = b'{ }\n'
read_options = ReadOptions(block_size=len(first_row))
for next_rows, expected_pylist in [
Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/tests/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -3390,7 +3390,7 @@ def test_array_uses_memory_pool():
arr = pa.array(np.arange(N, dtype=np.int64),
mask=np.random.randint(0, 2, size=N).astype(np.bool_))

# In the case the gc is caught loafing
# In the case the gc is caught loading
gc.collect()

prior_allocation = pa.total_allocated_bytes()
Expand Down
2 changes: 1 addition & 1 deletion python/pyarrow/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def _break_traceback_cycle_from_frame(frame):
# somewhere along the chain of execution frames).
frame.clear()
# To visit the inner frame, we need to find it among the
# referers of this frame (while `frame.f_back` would let
# referrers of this frame (while `frame.f_back` would let
# us visit the outer frame).
refs = gc.get_referrers(frame)
refs = frame = this_frame = None

0 comments on commit ad073b7

Please sign in to comment.