Skip to content

Commit

Permalink
Merge 75ffb6a into 3108c6e
Browse files Browse the repository at this point in the history
  • Loading branch information
armish committed Jul 22, 2016
2 parents 3108c6e + 75ffb6a commit dd404eb
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 3 deletions.
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,25 @@ human reference data from Ensembl releases 75 and 76.
Alternatively, you can create the `EnsemblRelease` object from inside a Python
process and call `ensembl_object.download()` followed by `ensembl_object.index()`.

## Cache Location
By default, PyEnsembl uses the platform-specific `Cache` folder
and caches the files into the `pyensembl` sub-directory.
You can override this default by setting the environment key `PYENSEMBL_CACHE_DIR`
as your preferred location for caching:

```sh
export PYENSEMBL_CACHE_DIR=/custom/cache/dir
```

or

```python
import os

os.environ['PYENSEMBL_CACHE_DIR'] = '/custom/cache/dir'
# ... PyEnsembl API usage
```

# Non-Ensembl Data

PyEnsembl also allows arbitrary genomes via the specification
Expand Down
7 changes: 5 additions & 2 deletions pyensembl/download_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from os import listdir, remove
from os import listdir, remove, environ
from os.path import join, exists, split, abspath
from shutil import copy2, rmtree
import logging

import datacache

CACHE_BASE_SUBDIR = "pyensembl"
CACHE_DIR_ENV_KEY = "PYENSEMBL_CACHE_DIR"

def cache_subdirectory(
reference_name=None,
Expand Down Expand Up @@ -111,8 +112,10 @@ def __init__(
annotation_name=annotation_name,
annotation_version=annotation_version)

# If `CACHE_DIR_ENV_KEY` is set, the cache will be saved there
self._cache_directory_path = datacache.get_data_dir(
subdir=self.cache_subdirectory)
subdir=self.cache_subdirectory,
envkey=CACHE_DIR_ENV_KEY)

self.decompress_on_download = decompress_on_download
self.copy_local_files_to_cache = copy_local_files_to_cache
Expand Down
25 changes: 24 additions & 1 deletion test/test_download_cache.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
from __future__ import absolute_import

from nose.tools import assert_raises
from nose.tools import assert_raises, ok_
from pyensembl.download_cache import (
DownloadCache,
MissingLocalFile,
MissingRemoteFile
)

import os
import tempfile

from .data import data_path

download_cache = DownloadCache(
reference_name="__test_reference",
annotation_name="__test_annotation",
Expand All @@ -25,3 +30,21 @@ def test_download_cache_missing_remote_file():
with assert_raises(MissingRemoteFile):
download_cache.download_or_copy_if_necessary(
path_or_url="ftp://NOTAURL.NOTAURL.NOTAURL")

def test_download_cache_custom_location():
test_file = "refseq.ucsc.small.gtf"
tmp_dir = os.path.join(tempfile.gettempdir(), "pyensembl")
os.environ['PYENSEMBL_CACHE_DIR'] = tmp_dir
# We need another instance of DownloadCache
# that copies files over to cache folder
download_cache = DownloadCache(
reference_name="test",
annotation_name="test",
copy_local_files_to_cache=True)
# clean up
download_cache.delete_cache_directory()
download_cache.download_or_copy_if_necessary(
download_if_missing=True,
path_or_url=data_path(test_file))
ok_(os.path.exists(os.path.join(tmp_dir, test_file)))
del os.environ['PYENSEMBL_CACHE_DIR']

0 comments on commit dd404eb

Please sign in to comment.