Skip to content

Commit

Permalink
PERF: Break reference cycle for all Index types (pandas-dev#27840)
Browse files Browse the repository at this point in the history
* Generalize guards for index ref cycles

* add issue number
  • Loading branch information
topper-123 authored and gabriellm1 committed Aug 27, 2019
1 parent 5882ff7 commit fc436e4
Show file tree
Hide file tree
Showing 6 changed files with 20 additions and 14 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.25.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ Indexing
^^^^^^^^

- Bug in partial-string indexing returning a NumPy array rather than a ``Series`` when indexing with a scalar like ``.loc['2015']`` (:issue:`27516`)
- Break reference cycle involving :class:`Index` to allow garbage collection of :class:`Index` objects without running the GC. (:issue:`27585`)
- Break reference cycle involving :class:`Index` and other index classes to allow garbage collection of index objects without running the GC. (:issue:`27585`, :issue:`27840`)
- Fix regression in assigning values to a single column of a DataFrame with a ``MultiIndex`` columns (:issue:`27841`).
-

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -665,7 +665,7 @@ def _cleanup(self):
def _engine(self):
# property, for now, slow to look up

# to avoid a refernce cycle, bind `_ndarray_values` to a local variable, so
# to avoid a reference cycle, bind `_ndarray_values` to a local variable, so
# `self` is not passed into the lambda.
_ndarray_values = self._ndarray_values
return self._engine_type(lambda: _ndarray_values, len(self))
Expand Down
8 changes: 5 additions & 3 deletions pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,9 +446,11 @@ def argsort(self, *args, **kwargs):

@cache_readonly
def _engine(self):

# we are going to look things up with the codes themselves
return self._engine_type(lambda: self.codes, len(self))
# we are going to look things up with the codes themselves.
# To avoid a reference cycle, bind `codes` to a local variable, so
# `self` is not passed into the lambda.
codes = self.codes
return self._engine_type(lambda: codes, len(self))

# introspection
@cache_readonly
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from datetime import datetime, timedelta
import warnings
import weakref

import numpy as np

Expand Down Expand Up @@ -441,7 +442,9 @@ def _formatter_func(self):

@cache_readonly
def _engine(self):
return self._engine_type(lambda: self, len(self))
# To avoid a reference cycle, pass a weakref of self to _engine_type.
period = weakref.ref(self)
return self._engine_type(period, len(self))

@Appender(_index_shared_docs["contains"])
def __contains__(self, key):
Expand Down
9 changes: 9 additions & 0 deletions pandas/tests/indexes/common.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import gc

import numpy as np
import pytest

Expand Down Expand Up @@ -908,3 +910,10 @@ def test_is_unique(self):
# multiple NA should not be unique
index_na_dup = index_na.insert(0, np.nan)
assert index_na_dup.is_unique is False

def test_engine_reference_cycle(self):
# GH27585
index = self.create_index()
nrefs_pre = len(gc.get_referrers(index))
index._engine
assert len(gc.get_referrers(index)) == nrefs_pre
8 changes: 0 additions & 8 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
from collections import defaultdict
from datetime import datetime, timedelta
import gc
from io import StringIO
import math
import operator
Expand Down Expand Up @@ -2425,13 +2424,6 @@ def test_deprecated_contains(self):
with tm.assert_produces_warning(FutureWarning):
index.contains(1)

def test_engine_reference_cycle(self):
# https://github.com/pandas-dev/pandas/issues/27585
index = pd.Index([1, 2, 3])
nrefs_pre = len(gc.get_referrers(index))
index._engine
assert len(gc.get_referrers(index)) == nrefs_pre


class TestMixedIntIndex(Base):
# Mostly the tests from common.py for which the results differ
Expand Down

0 comments on commit fc436e4

Please sign in to comment.