Skip to content

Commit

Permalink
Clear inactive cache files
Browse files Browse the repository at this point in the history
Cache files that wasn't accessed in last 30 days will be automatically
garbage collected. This collection happens when the `save_module` is called
via a lock system that would make it happen only one time per day.
  • Loading branch information
isidentical committed May 24, 2020
1 parent 6ecd975 commit a0126c0
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 18 deletions.
26 changes: 23 additions & 3 deletions parso/_compatibility.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@ def u(string):


try:
# Python 2.7
# Python 3.3+
FileNotFoundError = FileNotFoundError
except NameError:
# Python 3.3+
FileNotFoundError = IOError
# Python 2.7 (both IOError + OSError)
FileNotFoundError = EnvironmentError


def utf8_repr(func):
Expand All @@ -67,3 +67,23 @@ def wrapper(self):
return func
else:
return wrapper

if sys.version_info < (3, 5):
"""
A super-minimal shim around listdir that behave like
scandir for the information we need.
"""
class _DirEntry:

def __init__(self, name, basepath):
self.name = name
self.basepath = basepath

def stat(self):
# won't follow symlinks
return os.lstat(os.path.join(self.basepath, self.name))

def scandir(dir):
return [_DirEntry(name, dir) for name in os.listdir(dir)]
else:
from os import scandir
56 changes: 55 additions & 1 deletion parso/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,22 @@
except:
import pickle

from parso._compatibility import FileNotFoundError
from parso._compatibility import FileNotFoundError, scandir
from parso.file_io import FileIO

LOG = logging.getLogger(__name__)

_CACHED_FILE_MINIMUM_SURVIVAL = 60 * 10 # 10 minutes
"""
Cached files should survive at least a few minutes.
"""

_CACHED_FILE_MAXIMUM_SURVIVAL = 60 * 60 * 24 * 30
"""
Maximum time for a cached file to survive if it is not
accessed within.
"""

_CACHED_SIZE_TRIGGER = 600
"""
This setting limits the amount of cached files. It's basically a way to start
Expand Down Expand Up @@ -82,6 +90,20 @@ def _get_default_cache_path():
``$XDG_CACHE_HOME/parso`` is used instead of the default one.
"""

_CACHE_CLEAR_THRESHOLD = 60 * 60 * 24

def _get_cache_clear_lock(cache_path = None):
"""
The path where the cache lock is stored.
Cache lock will prevent continous cache clearing and only allow garbage
collection once a day (can be configured in _CACHE_CLEAR_THRESHOLD).
"""
cache_path = cache_path or _get_default_cache_path()

return FileIO(os.path.join(cache_path, "{}-cache-lock".format(_VERSION_TAG)))


parser_cache = {}


Expand Down Expand Up @@ -173,6 +195,7 @@ def save_module(hashed_grammar, file_io, module, lines, pickling=True, cache_pat
_set_cache_item(hashed_grammar, path, item)
if pickling and path is not None:
_save_to_file_system(hashed_grammar, path, item, cache_path=cache_path)
_remove_cache_and_update_lock(cache_path = cache_path)


def _save_to_file_system(hashed_grammar, path, item, cache_path=None):
Expand All @@ -187,6 +210,37 @@ def clear_cache(cache_path=None):
parser_cache.clear()


def clear_inactive_cache(
cache_path=None,
inactivity_threshold=_CACHED_FILE_MAXIMUM_SURVIVAL,
):
cache_path = _get_default_cache_path()
if not os.path.exists(cache_path):
return False
for version_path in os.listdir(cache_path):
version_path = os.path.join(cache_path, version_path)
if not os.path.isdir(version_path):
continue
for file in scandir(version_path):
if (
file.stat().st_atime + _CACHED_FILE_MAXIMUM_SURVIVAL
<= time.time()
):
os.remove(os.path.join(version_path, file.path))
else:
return True


def _remove_cache_and_update_lock(cache_path = None):
lock = _get_cache_clear_lock(cache_path=cache_path)
clear_lock_time = lock.get_last_modified()
if (
clear_lock_time is None # first time
or clear_lock_time + _CACHE_CLEAR_THRESHOLD <= time.time()
):
if clear_inactive_cache(cache_path = cache_path):
lock._touch()

def _get_hashed_path(hashed_grammar, path, cache_path=None):
directory = _get_cache_directory_path(cache_path=cache_path)

Expand Down
8 changes: 8 additions & 0 deletions parso/file_io.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
from parso._compatibility import FileNotFoundError


class FileIO(object):
Expand All @@ -22,6 +23,13 @@ def get_last_modified(self):
# Might raise FileNotFoundError, OSError for Python 2
return None

def _touch(self):
try:
os.utime(self.path, None)
except FileNotFoundError:
file = open(self.path, 'a')
file.close()

def __repr__(self):
return '%s(%s)' % (self.__class__.__name__, self.path)

Expand Down
55 changes: 41 additions & 14 deletions test/test_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,32 @@
Test all things related to the ``jedi.cache`` module.
"""

from os import unlink
import os
import os.path

import pytest
import time

from parso.cache import _NodeCacheItem, save_module, load_module, \
_get_hashed_path, parser_cache, _load_from_file_system, _save_to_file_system
from parso.cache import (_CACHED_FILE_MAXIMUM_SURVIVAL, _VERSION_TAG,
_get_cache_clear_lock, _get_hashed_path,
_load_from_file_system, _NodeCacheItem,
_remove_cache_and_update_lock, _save_to_file_system,
clear_inactive_cache, load_module, parser_cache,
save_module)
from parso import load_grammar
from parso import cache
from parso import file_io
from parso import parse


@pytest.fixture()
def isolated_jedi_cache(monkeypatch, tmpdir):
"""
Set `jedi.settings.cache_directory` to a temporary directory during test.
Same as `clean_jedi_cache`, but create the temporary directory for
each test case (scope='function').
"""
monkeypatch.setattr(cache, '_default_cache_path', str(tmpdir))

def isolated_parso_cache(monkeypatch, tmpdir):
"""Set `parso.cache._default_cache_path` to a temporary directory
during the test. """
cache_path = str(os.path.join(str(tmpdir), "__parso_cache"))
monkeypatch.setattr(cache, '_default_cache_path', cache_path)
monkeypatch.setattr(cache, '_get_default_cache_path', lambda *args, **kwargs: cache_path)
return cache_path

def test_modulepickling_change_cache_dir(tmpdir):
"""
Expand Down Expand Up @@ -57,7 +60,7 @@ def load_stored_item(hashed_grammar, path, item, cache_path):
return item


@pytest.mark.usefixtures("isolated_jedi_cache")
@pytest.mark.usefixtures("isolated_parso_cache")
def test_modulepickling_simulate_deleted_cache(tmpdir):
"""
Tests loading from a cache file after it is deleted.
Expand All @@ -84,7 +87,7 @@ def test_modulepickling_simulate_deleted_cache(tmpdir):
save_module(grammar._hashed, io, module, lines=[])
assert load_module(grammar._hashed, io) == module

unlink(_get_hashed_path(grammar._hashed, path))
os.unlink(_get_hashed_path(grammar._hashed, path))
parser_cache.clear()

cached2 = load_module(grammar._hashed, io)
Expand Down Expand Up @@ -139,3 +142,27 @@ def test_cache_last_used_update(diff_cache, use_file_io):

node_cache_item = next(iter(parser_cache.values()))[p]
assert now < node_cache_item.last_used < time.time()

def test_inactive_cache(tmpdir, isolated_parso_cache):
parser_cache.clear()
test_subjects = "abcdef"
for path in test_subjects:
parse('somecode', cache=True, path=os.path.join(str(tmpdir), path))
raw_cache_path = os.path.join(isolated_parso_cache, _VERSION_TAG)
assert os.path.exists(raw_cache_path)
paths = os.listdir(raw_cache_path)
a_while_ago = time.time() - _CACHED_FILE_MAXIMUM_SURVIVAL
old_paths = set()
for path in paths[:len(test_subjects) // 2]: # make certain number of paths old
os.utime(os.path.join(raw_cache_path, path), (a_while_ago, a_while_ago))
old_paths.add(path)
# nothing should be cleared while the lock is on
assert os.path.exists(_get_cache_clear_lock().path)
_remove_cache_and_update_lock() # it shouldn't clear anything
assert len(os.listdir(raw_cache_path)) == len(test_subjects)
assert old_paths.issubset(os.listdir(raw_cache_path))

os.utime(_get_cache_clear_lock().path, (a_while_ago, a_while_ago))
_remove_cache_and_update_lock()
assert len(os.listdir(raw_cache_path)) == len(test_subjects) // 2
assert not old_paths.intersection(os.listdir(raw_cache_path))

0 comments on commit a0126c0

Please sign in to comment.