Skip to content

Commit

Permalink
REGR: be able to read Stata files without reading them fully into memory
Browse files Browse the repository at this point in the history
Fixes pandas-dev#48700
Regressed in pandas-dev#9245
Regressed in 2f0ada3
  • Loading branch information
akx committed Oct 3, 2022
1 parent e43d75e commit 1532991
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 6 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.5.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ Fixed regressions
- Fixed :meth:`.DataFrameGroupBy.size` not returning a Series when ``axis=1`` (:issue:`48738`)
- Fixed Regression in :meth:`DataFrameGroupBy.apply` when user defined function is called on an empty dataframe (:issue:`47985`)
- Fixed regression in :meth:`DataFrame.apply` when passing non-zero ``axis`` via keyword argument (:issue:`48656`)
-
- Fixed regression in Stata reading where files were needlessly buffered in memory (:issue:`48922`)

.. ---------------------------------------------------------------------------
Expand Down
18 changes: 13 additions & 5 deletions pandas/io/stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -1164,15 +1164,23 @@ def __init__(
self._lines_read = 0

self._native_byteorder = _set_endianness(sys.byteorder)
with get_handle(

handles = get_handle(
path_or_buf,
"rb",
storage_options=storage_options,
is_text=False,
compression=compression,
) as handles:
# Copy to BytesIO, and ensure no encoding
self.path_or_buf = BytesIO(handles.handle.read())
)
if hasattr(handles.handle, "seekable") and handles.handle.seekable():
# If the handle is directly seekable, use it without an extra copy.
self.path_or_buf = handles.handle
self._close_file = handles.close
else:
# Copy to memory, and ensure no encoding.
with handles:
self.path_or_buf = BytesIO(handles.handle.read())
self._close_file = self.path_or_buf.close

self._read_header()
self._setup_dtype()
Expand All @@ -1192,7 +1200,7 @@ def __exit__(

def close(self) -> None:
"""close the handle if its open"""
self.path_or_buf.close()
self._close_file()

def _set_encoding(self) -> None:
"""
Expand Down

0 comments on commit 1532991

Please sign in to comment.