Skip to content

Commit

Permalink
override cache dir if PYENSEMBL_CACHE_DIR is set
Browse files Browse the repository at this point in the history
  • Loading branch information
armish committed Jul 21, 2016
1 parent 3108c6e commit da9a92a
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 3 deletions.
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,25 @@ human reference data from Ensembl releases 75 and 76.
Alternatively, you can create the `EnsemblRelease` object from inside a Python
process and call `ensembl_object.download()` followed by `ensembl_object.index()`.

## Cache Location
By default, PyEnsembl uses the platform-specific `Cache` folder
and caches the files into the `pyensembl` sub-directory.
You can override this default by setting the environment key `PYENSEMBL_CACHE_DIR`
as your preferred location for caching:

```sh
export PYENSEMBL_CACHE_DIR=/custom/cache/dir
```

or

```python
import os

os.environ['PYENSEMBL_CACHE_DIR'] = '/custom/cache/dir'
# ... PyEnsembl API usage
```

# Non-Ensembl Data

PyEnsembl also allows arbitrary genomes via the specification
Expand Down
11 changes: 8 additions & 3 deletions pyensembl/download_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from os import listdir, remove
from os import listdir, remove, environ
from os.path import join, exists, split, abspath
from shutil import copy2, rmtree
import logging

import datacache

CACHE_BASE_SUBDIR = "pyensembl"
ENV_KEY = "PYENSEMBL_CACHE_DIR"

def cache_subdirectory(
reference_name=None,
Expand Down Expand Up @@ -111,8 +112,12 @@ def __init__(
annotation_name=annotation_name,
annotation_version=annotation_version)

self._cache_directory_path = datacache.get_data_dir(
subdir=self.cache_subdirectory)
if environ.get(ENV_KEY) is None:
self._cache_directory_path = datacache.get_data_dir(
subdir=self.cache_subdirectory)
else:
self._cache_directory_path = datacache.get_data_dir(
envkey=ENV_KEY)

self.decompress_on_download = decompress_on_download
self.copy_local_files_to_cache = copy_local_files_to_cache
Expand Down
15 changes: 15 additions & 0 deletions test/test_download_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@
MissingRemoteFile
)

import os
import tempfile

from .data import data_path

download_cache = DownloadCache(
reference_name="__test_reference",
annotation_name="__test_annotation",
Expand All @@ -25,3 +30,13 @@ def test_download_cache_missing_remote_file():
with assert_raises(MissingRemoteFile):
download_cache.download_or_copy_if_necessary(
path_or_url="ftp://NOTAURL.NOTAURL.NOTAURL")

def test_download_cache_custom_location():
test_file = "refseq.ucsc.small.gtf"
tmp_dir = tempfile.gettempdir()
os.environ['PYENSEMBL_CACHE_DIR'] = tmp_dir
download_cache.delete_cache_directory()
download_cache.download_or_copy_if_necessary(
path_or_url=data_path(test_file))
os.path.exists(os.path.join(tmp_dir, test_file))
del os.environ['PYENSEMBL_CACHE_DIR']

0 comments on commit da9a92a

Please sign in to comment.