Skip to content

Commit

Permalink
Merge pull request #761 from gokcennurlu/lru_cache_tz
Browse files Browse the repository at this point in the history
Add LRU caching to tzoffset, tzstr and gettz
  • Loading branch information
pganssle committed Oct 17, 2018
2 parents 770b8f0 + 02ace35 commit 83df62a
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 3 deletions.
1 change: 1 addition & 0 deletions AUTHORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ switch, and thus all their contributions are dual-licensed.
- Florian Rathgeber (gh: @kynan) **D**
- Gabriel Bianconi <gabriel@MASKED> (gh: @GabrielBianconi) **D**
- Gabriel Poesia <gabriel.poesia@MASKED>
- Gökçen Nurlu <gnurlu1@bloomberg.net> (gh: @gokcennurlu) **D**
- Gustavo Niemeyer <gustavo@niemeyer.net> (gh: @niemeyer)
- Holger Joukl <holger.joukl@MASKED> (gh: @hjoukl)
- Igor <mrigor83@MASKED>
Expand Down
1 change: 1 addition & 0 deletions changelog.d/761.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added a small "strong value" cache into ``tz.gettz``, ``tz.tzoffset`` and ``tz.tzstr`` to improve performance in the situation where transient references are repeatedly created to the same time zones, but no strong reference is continuously held. Patch by Gökçen Nurlu (gh issue #691, gh pr #761)
50 changes: 47 additions & 3 deletions dateutil/test/test_tz.py
Original file line number Diff line number Diff line change
Expand Up @@ -746,7 +746,15 @@ def test_tzoffset_weakref():
del UTC1
gc.collect()

assert UTC_ref() is None
assert UTC_ref() is not None # Should be in the strong cache
assert UTC_ref() is tz.tzoffset('UTC', 0)

# Fill the strong cache with other items
for offset in range(5,15):
tz.tzoffset('RandomZone', offset)

gc.collect()
assert UTC_ref() is None
assert UTC_ref() is not tz.tzoffset('UTC', 0)


Expand Down Expand Up @@ -1106,12 +1114,33 @@ def test_gettz_cache_clear():

assert NYC1 is not NYC2

@pytest.mark.gettz
@pytest.mark.xfail(IS_WIN, reason='zoneinfo separately cached')
def test_gettz_set_cache_size():
tz.gettz.cache_clear()
tz.gettz.set_cache_size(3)

MONACO_ref = weakref.ref(tz.gettz('Europe/Monaco'))
EASTER_ref = weakref.ref(tz.gettz('Pacific/Easter'))
CURRIE_ref = weakref.ref(tz.gettz('Australia/Currie'))

gc.collect()

assert MONACO_ref() is not None
assert EASTER_ref() is not None
assert CURRIE_ref() is not None

tz.gettz.set_cache_size(2)
gc.collect()

assert MONACO_ref() is None

@pytest.mark.xfail(IS_WIN, reason="Windows does not use system zoneinfo")
@pytest.mark.smoke
@pytest.mark.gettz
def test_gettz_weakref():
tz.gettz.cache_clear()
tz.gettz.set_cache_size(2)
NYC1 = tz.gettz('America/New_York')
NYC_ref = weakref.ref(tz.gettz('America/New_York'))

Expand All @@ -1120,9 +1149,17 @@ def test_gettz_weakref():
del NYC1
gc.collect()

assert NYC_ref() is None
assert tz.gettz('America/New_York') is not NYC_ref()
assert NYC_ref() is not None # Should still be in the strong cache
assert tz.gettz('America/New_York') is NYC_ref()

# Populate strong cache with other timezones
tz.gettz('Europe/Monaco')
tz.gettz('Pacific/Easter')
tz.gettz('Australia/Currie')

gc.collect()
assert NYC_ref() is None # Should have been pushed out
assert tz.gettz('America/New_York') is not NYC_ref()

class ZoneInfoGettzTest(GettzTest, WarningTestMixin):
def gettz(self, name):
Expand Down Expand Up @@ -1462,6 +1499,13 @@ def test_tzstr_weakref():
del tz_t1
gc.collect()

assert tz_t2_ref() is not None
assert tz.tzstr('EST5EDT') is tz_t2_ref()

for offset in range(5,15):
tz.tzstr('GMT+{}'.format(offset))
gc.collect()

assert tz_t2_ref() is None
assert tz.tzstr('EST5EDT') is not tz_t2_ref()

Expand Down
24 changes: 24 additions & 0 deletions dateutil/tz/_factories.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from datetime import timedelta
import weakref
from collections import OrderedDict


class _TzSingleton(type):
def __init__(cls, *args, **kwargs):
Expand All @@ -11,6 +13,7 @@ def __call__(cls):
cls.__instance = super(_TzSingleton, cls).__call__()
return cls.__instance


class _TzFactory(type):
def instance(cls, *args, **kwargs):
"""Alternate constructor that returns a fresh instance"""
Expand All @@ -20,6 +23,8 @@ def instance(cls, *args, **kwargs):
class _TzOffsetFactory(_TzFactory):
def __init__(cls, *args, **kwargs):
cls.__instances = weakref.WeakValueDictionary()
cls.__strong_cache = OrderedDict()
cls.__strong_cache_size = 8

def __call__(cls, name, offset):
if isinstance(offset, timedelta):
Expand All @@ -31,12 +36,22 @@ def __call__(cls, name, offset):
if instance is None:
instance = cls.__instances.setdefault(key,
cls.instance(name, offset))

cls.__strong_cache[key] = cls.__strong_cache.pop(key, instance)

# Remove an item if the strong cache is overpopulated
# TODO: Maybe this should be under a lock?
if len(cls.__strong_cache) > cls.__strong_cache_size:
cls.__strong_cache.popitem(last=False)

return instance


class _TzStrFactory(_TzFactory):
def __init__(cls, *args, **kwargs):
cls.__instances = weakref.WeakValueDictionary()
cls.__strong_cache = OrderedDict()
cls.__strong_cache_size = 8

def __call__(cls, s, posix_offset=False):
key = (s, posix_offset)
Expand All @@ -45,5 +60,14 @@ def __call__(cls, s, posix_offset=False):
if instance is None:
instance = cls.__instances.setdefault(key,
cls.instance(s, posix_offset))

cls.__strong_cache[key] = cls.__strong_cache.pop(key, instance)


# Remove an item if the strong cache is overpopulated
# TODO: Maybe this should be under a lock?
if len(cls.__strong_cache) > cls.__strong_cache_size:
cls.__strong_cache.popitem(last=False)

return instance

18 changes: 18 additions & 0 deletions dateutil/tz/tz.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import os
import bisect
import weakref
from collections import OrderedDict

import six
from six import string_types
Expand Down Expand Up @@ -1538,6 +1539,8 @@ class GettzFunc(object):
def __init__(self):

self.__instances = weakref.WeakValueDictionary()
self.__strong_cache_size = 8
self.__strong_cache = OrderedDict()
self._cache_lock = _thread.allocate_lock()

def __call__(self, name=None):
Expand All @@ -1556,12 +1559,27 @@ def __call__(self, name=None):
# We also cannot store weak references to None, so we
# will also not store that.
self.__instances[name] = rv
else:
# No need for strong caching, return immediately
return rv

self.__strong_cache[name] = self.__strong_cache.pop(name, rv)

if len(self.__strong_cache) > self.__strong_cache_size:
self.__strong_cache.popitem(last=False)

return rv

def set_cache_size(self, size):
with self._cache_lock:
self.__strong_cache_size = size
while len(self.__strong_cache) > size:
self.__strong_cache.popitem(last=False)

def cache_clear(self):
with self._cache_lock:
self.__instances = weakref.WeakValueDictionary()
self.__strong_cache.clear()

@staticmethod
def nocache(name=None):
Expand Down

0 comments on commit 83df62a

Please sign in to comment.