Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

Commit

Permalink
Rename DATASET_CACHE to CACHE_DIRECTORY. (#2000)
Browse files Browse the repository at this point in the history
This also moves `~/.allennlp/datasets` to `~/.allennlp/cache` since we use it as a general caching mechanism (not just for datasets).
  • Loading branch information
schmmd committed Nov 29, 2018
1 parent 3842820 commit 44269a1
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 8 deletions.
19 changes: 15 additions & 4 deletions allennlp/common/file_utils.py
Expand Up @@ -22,7 +22,18 @@
logger = logging.getLogger(__name__) # pylint: disable=invalid-name

CACHE_ROOT = Path(os.getenv('ALLENNLP_CACHE_ROOT', Path.home() / '.allennlp'))
DATASET_CACHE = str(CACHE_ROOT / "datasets")
CACHE_DIRECTORY = str(CACHE_ROOT / "cache")
DEPRECATED_CACHE_DIRECTORY = str(CACHE_ROOT / "datasets")

# This variable was deprecated in 0.7.2 since we use a single folder for caching
# all types of files (datasets, models, etc.)
DATASET_CACHE = CACHE_DIRECTORY

# Warn if the user is still using the deprecated cache directory.
if os.path.exists(DEPRECATED_CACHE_DIRECTORY):
logger = logging.getLogger(__name__) # pylint: disable=invalid-name
logger.warning(f"Deprecated cache directory found ({DEPRECATED_CACHE_DIRECTORY}). "
f"Please remove this directory from your system to free up space.")


def url_to_filename(url: str, etag: str = None) -> str:
Expand All @@ -49,7 +60,7 @@ def filename_to_url(filename: str, cache_dir: str = None) -> Tuple[str, str]:
Raise ``FileNotFoundError`` if `filename` or its stored metadata do not exist.
"""
if cache_dir is None:
cache_dir = DATASET_CACHE
cache_dir = CACHE_DIRECTORY

cache_path = os.path.join(cache_dir, filename)
if not os.path.exists(cache_path):
Expand All @@ -75,7 +86,7 @@ def cached_path(url_or_filename: Union[str, Path], cache_dir: str = None) -> str
make sure the file exists and then return the path.
"""
if cache_dir is None:
cache_dir = DATASET_CACHE
cache_dir = CACHE_DIRECTORY
if isinstance(url_or_filename, Path):
url_or_filename = str(url_or_filename)

Expand Down Expand Up @@ -163,7 +174,7 @@ def get_from_cache(url: str, cache_dir: str = None) -> str:
If it's not there, download it. Then return the path to the cached file.
"""
if cache_dir is None:
cache_dir = DATASET_CACHE
cache_dir = CACHE_DIRECTORY

os.makedirs(cache_dir, exist_ok=True)

Expand Down
8 changes: 4 additions & 4 deletions scripts/inspect_cache.py
Expand Up @@ -4,14 +4,14 @@

sys.path.insert(0, os.path.dirname(os.path.abspath(os.path.join(__file__, os.pardir))))
from allennlp.common.file_utils import filename_to_url
from allennlp.common.file_utils import DATASET_CACHE
from allennlp.common.file_utils import CACHE_DIRECTORY

print(f"Looking for datasets in {DATASET_CACHE}...")
if not os.path.exists(DATASET_CACHE):
print(f"Looking for datasets in {CACHE_DIRECTORY}...")
if not os.path.exists(CACHE_DIRECTORY):
print('Directory does not exist.')
print('No cached datasets found.')

cached_files = os.listdir(DATASET_CACHE)
cached_files = os.listdir(CACHE_DIRECTORY)

if not cached_files:
print('Directory is empty.')
Expand Down

0 comments on commit 44269a1

Please sign in to comment.