From 2312ed100b3d83530297ea1dedb755990750e425 Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Mon, 3 Oct 2022 18:25:46 +0300 Subject: [PATCH] REGR: be able to read Stata files without reading them fully into memory Fixes #48700 Regressed in pandas-dev/pandas#9245 Regressed in 2f0ada3d430f0cc49fa9ab1e7e2e2a7ec23c9616 --- doc/source/whatsnew/v1.6.0.rst | 1 + pandas/io/stata.py | 18 +++++++++++++----- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index 3c7a80f0968448..8ebf77eeb84340 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -229,6 +229,7 @@ MultiIndex I/O ^^^ - Bug in :func:`read_sas` caused fragmentation of :class:`DataFrame` and raised :class:`.errors.PerformanceWarning` (:issue:`48595`) +- Regression in :class:`StataReader` caused all files to needlessly be buffered in memory (:issue:`48922`) - Period diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 6baf5f0da86124..640c0d5cd5c63f 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -1164,15 +1164,23 @@ def __init__( self._lines_read = 0 self._native_byteorder = _set_endianness(sys.byteorder) - with get_handle( + + handles = get_handle( path_or_buf, "rb", storage_options=storage_options, is_text=False, compression=compression, - ) as handles: - # Copy to BytesIO, and ensure no encoding - self.path_or_buf = BytesIO(handles.handle.read()) + ) + if hasattr(handles.handle, "seekable") and handles.handle.seekable(): + # If the handle is directly seekable, use it without an extra copy. + self.path_or_buf = handles.handle + self._close_file = handles.close + else: + # Copy to memory, and ensure no encoding. + with handles: + self.path_or_buf = BytesIO(handles.handle.read()) + self._close_file = self.path_or_buf.close self._read_header() self._setup_dtype() @@ -1192,7 +1200,7 @@ def __exit__( def close(self) -> None: """close the handle if its open""" - self.path_or_buf.close() + self._close_file() def _set_encoding(self) -> None: """