Skip to content

Commit 3737d26

Browse files
giddenshoyer
authored andcommitted
dl tutorial files to tmp directory, then move them once successful (pydata#1393)
* dl tutorial files to tmp directory, then move them once successful closes pydata#1392 * redo attempt at tutorial checking to use md5 checksums instead. depends on pydata/xarray-data#9 * rm extraneous import * update md5 function name * update whats-new.rst * fix issue link in whats-new * adding tutorial dataset test, adds conditional --run-network-tests flag to pytest cli * one suppress block per file
1 parent 5f92955 commit 3737d26

File tree

6 files changed

+57
-8
lines changed

6 files changed

+57
-8
lines changed

.travis.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ matrix:
2323
- python: 3.6
2424
env:
2525
- CONDA_ENV=py36
26-
- EXTRA_FLAGS="--run-flaky"
26+
- EXTRA_FLAGS="--run-flaky --run-network-tests"
2727
- python: 3.6
2828
env: CONDA_ENV=py36-pydap
2929
- python: 3.6
@@ -45,7 +45,7 @@ matrix:
4545
- python: 3.6
4646
env:
4747
- CONDA_ENV=py36
48-
- EXTRA_FLAGS="--run-flaky"
48+
- EXTRA_FLAGS="--run-flaky --run-network-tests"
4949
- python: 3.6
5050
env: CONDA_ENV=py36-pydap
5151
- python: 3.6

conftest.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,5 @@ def pytest_addoption(parser):
55
"""Add command-line flags for pytest."""
66
parser.addoption("--run-flaky", action="store_true",
77
help="runs flaky tests")
8+
parser.addoption("--run-network-tests", action="store_true",
9+
help="runs tests requiring a network connection")

doc/whats-new.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@ Enhancements
3030
By `Chun-Wei Yuan <https://github.com/chunweiyuan>`_ and
3131
`Kyle Heuton <https://github.com/kheuton>`_.
3232

33+
- Enhanced tests suite by use of ``@network`` decorator, which is
34+
controlled via ``--run-network-tests`` command line argument
35+
to ``py.test`` (:issue:`1393`).
36+
By `Matthew Gidden <https://github.com/gidden>`_.
37+
3338
Bug fixes
3439
~~~~~~~~~
3540

@@ -40,6 +45,10 @@ By `Ryan Abernathey <https://github.com/rabernat>`_.
4045
``data_vars``.
4146
By `Keisuke Fujii <https://github.com/fujiisoup>`_.
4247

48+
- Tutorial datasets are now checked against a reference MD5 sum to confirm
49+
successful download (:issue:`1392`). By `Matthew Gidden
50+
<https://github.com/gidden>`_.
51+
4352
.. _whats-new.0.9.5:
4453

4554
v0.9.5 (17 April, 2017)

xarray/tests/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,14 +100,20 @@
100100

101101
try:
102102
_SKIP_FLAKY = not pytest.config.getoption("--run-flaky")
103+
_SKIP_NETWORK_TESTS = not pytest.config.getoption("--run-network-tests")
103104
except ValueError:
104105
# Can't get config from pytest, e.g., because xarray is installed instead
105106
# of being run from a development version (and hence conftests.py is not
106107
# available). Don't run flaky tests.
107108
_SKIP_FLAKY = True
109+
_SKIP_NETWORK_TESTS = True
108110

109111
flaky = pytest.mark.skipif(
110112
_SKIP_FLAKY, reason="set --run-flaky option to run flaky tests")
113+
network = pytest.mark.skipif(
114+
_SKIP_NETWORK_TESTS,
115+
reason="set --run-network-tests option to run tests requiring an "
116+
"internet connection")
111117

112118

113119
class TestCase(unittest.TestCase):
@@ -173,6 +179,7 @@ class UnexpectedDataAccess(Exception):
173179

174180

175181
class InaccessibleArray(utils.NDArrayMixin):
182+
176183
def __init__(self, array):
177184
self.array = array
178185

@@ -181,6 +188,7 @@ def __getitem__(self, key):
181188

182189

183190
class ReturnItem(object):
191+
184192
def __getitem__(self, key):
185193
return key
186194

xarray/tests/test_tutorial.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,27 @@
11
from __future__ import absolute_import
22
from __future__ import division
33
from __future__ import print_function
4+
45
import os
6+
import pytest
57

68
from xarray import tutorial, DataArray
79
from xarray.core.pycompat import suppress
810

9-
from . import TestCase, unittest
11+
from . import TestCase, network
1012

1113

12-
@unittest.skip('TODO: make this conditional on network availability')
13-
class Test_load_dataset(TestCase):
14+
@network
15+
class TestLoadDataset(TestCase):
1416

1517
def setUp(self):
1618
self.testfile = 'tiny'
1719
self.testfilepath = os.path.expanduser(os.sep.join(
1820
('~', '.xarray_tutorial_data', self.testfile)))
1921
with suppress(OSError):
20-
os.remove(self.testfilepath)
22+
os.remove('{}.nc'.format(self.testfilepath))
23+
with suppress(OSError):
24+
os.remove('{}.md5'.format(self.testfilepath))
2125

2226
def test_download_from_github(self):
2327
ds = tutorial.load_dataset(self.testfile)

xarray/tutorial.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
from __future__ import division
1010
from __future__ import print_function
1111

12+
import hashlib
13+
1214
import os as _os
1315

1416
from .backends.api import open_dataset as _open_dataset
@@ -18,9 +20,17 @@
1820
_default_cache_dir = _os.sep.join(('~', '.xarray_tutorial_data'))
1921

2022

23+
def file_md5_checksum(fname):
24+
hash_md5 = hashlib.md5()
25+
with open(fname, "rb") as f:
26+
hash_md5.update(f.read())
27+
return hash_md5.hexdigest()
28+
29+
2130
# idea borrowed from Seaborn
2231
def load_dataset(name, cache=True, cache_dir=_default_cache_dir,
23-
github_url='https://github.com/pydata/xarray-data', **kws):
32+
github_url='https://github.com/pydata/xarray-data',
33+
branch='master', **kws):
2434
"""
2535
Load a dataset from the online repository (requires internet).
2636
@@ -37,13 +47,17 @@ def load_dataset(name, cache=True, cache_dir=_default_cache_dir,
3747
If True, then cache data locally for use on subsequent calls
3848
github_url : string
3949
Github repository where the data is stored
50+
branch : string
51+
The git branch to download from
4052
kws : dict, optional
4153
Passed to xarray.open_dataset
4254
4355
"""
4456
longdir = _os.path.expanduser(cache_dir)
4557
fullname = name + '.nc'
4658
localfile = _os.sep.join((longdir, fullname))
59+
md5name = name + '.md5'
60+
md5file = _os.sep.join((longdir, md5name))
4761

4862
if not _os.path.exists(localfile):
4963

@@ -52,8 +66,20 @@ def load_dataset(name, cache=True, cache_dir=_default_cache_dir,
5266
if not _os.path.isdir(longdir):
5367
_os.mkdir(longdir)
5468

55-
url = '/'.join((github_url, 'raw', 'master', fullname))
69+
url = '/'.join((github_url, 'raw', branch, fullname))
5670
_urlretrieve(url, localfile)
71+
url = '/'.join((github_url, 'raw', branch, md5name))
72+
_urlretrieve(url, md5file)
73+
74+
localmd5 = file_md5_checksum(localfile)
75+
with open(md5file, 'r') as f:
76+
remotemd5 = f.read()
77+
if localmd5 != remotemd5:
78+
_os.remove(localfile)
79+
msg = """
80+
MD5 checksum does not match, try downloading dataset again.
81+
"""
82+
raise IOError(msg)
5783

5884
ds = _open_dataset(localfile, **kws).load()
5985

0 commit comments

Comments
 (0)